├── .gitignore
├── LICENSE
├── README.md
├── borip
    ├── client.go
    ├── example
    │   └── rx.go
    └── packet.go
├── boripserver
    ├── devices.go
    └── main.go
├── dsp
    ├── ax25
    │   ├── constants.go
    │   ├── crc.go
    │   └── hdlc.go
    ├── biquad.go
    ├── conversion.go
    ├── conversion_386.s
    ├── conversion_amd64.s
    ├── conversion_arm.s
    ├── conversion_arm64.s
    ├── conversion_avo_amd64.go
    ├── conversion_avo_amd64.s
    ├── conversion_test.go
    ├── cpu_amd64.go
    ├── cpu_arm.go
    ├── cpu_arm64.go
    ├── cpu_arm64_test.go
    ├── cpu_arm_test.go
    ├── cpu_x86_test.go
    ├── demod.go
    ├── demod_386.s
    ├── demod_amd64.s
    ├── demod_arm.s
    ├── demod_arm64.s
    ├── demod_test.go
    ├── downsample.go
    ├── downsample_386.s
    ├── downsample_amd64.s
    ├── downsample_arm.s
    ├── downsample_arm64.s
    ├── downsample_test.go
    ├── dtmf
    │   └── dtmf.go
    ├── filter.go
    ├── fuzz.go
    ├── goertzel.go
    ├── goertzel_test.go
    ├── iaca.h
    ├── internal
    │   └── cpu
    │   │   ├── cpu.go
    │   │   ├── cpu.s
    │   │   ├── cpu_386.go
    │   │   ├── cpu_amd64.go
    │   │   ├── cpu_arm.go
    │   │   ├── cpu_arm64.go
    │   │   ├── cpu_arm64.s
    │   │   ├── cpu_arm64_android.go
    │   │   ├── cpu_arm64_darwin.go
    │   │   ├── cpu_arm64_freebsd.go
    │   │   ├── cpu_arm64_hwcap.go
    │   │   ├── cpu_arm64_linux.go
    │   │   ├── cpu_arm64_other.go
    │   │   ├── cpu_mips.go
    │   │   ├── cpu_mips64x.go
    │   │   ├── cpu_mipsle.go
    │   │   ├── cpu_no_name.go
    │   │   ├── cpu_ppc64x.go
    │   │   ├── cpu_ppc64x_aix.go
    │   │   ├── cpu_ppc64x_linux.go
    │   │   ├── cpu_riscv64.go
    │   │   ├── cpu_s390x.go
    │   │   ├── cpu_s390x.s
    │   │   ├── cpu_s390x_test.go
    │   │   ├── cpu_test.go
    │   │   ├── cpu_wasm.go
    │   │   ├── cpu_x86.go
    │   │   ├── cpu_x86.s
    │   │   ├── cpu_x86_test.go
    │   │   └── export_test.go
    ├── interpolate.go
    ├── math32.go
    ├── math32_386.s
    ├── math32_amd64.s
    ├── math32_arm.s
    ├── math32_arm64.s
    ├── math32_test.go
    ├── mathfixed.go
    ├── mathfixed_arm.s
    ├── mathfixed_test.go
    ├── sdft.go
    ├── stub_windows.go
    ├── util.go
    ├── util_386.s
    ├── util_amd64.s
    ├── util_arm.s
    ├── util_arm64.s
    ├── util_test.go
    └── window.go
└── examples
    ├── ax25.go
    ├── dtmf_file.go
    └── dtmf_live.go


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Samuel Stauffer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Software Defined Radio (SDR) package and tools for Go
 2 | -----------------------------------------------------
 3 | 
 4 | This repo is a collection of packages and tools for working with SDR in Go.
 5 | 
 6 | It also includes ARM assembly optimized filters and conversions which
 7 | allow real-time FM demodulation on the Raspberry Pi.
 8 | 
 9 | ### Demodulators
10 | 
11 | * FM (polar disciminator)
12 | * AFSK
13 | 
14 | ### Decoders
15 | 
16 | * AX.25
17 | * DTMF
18 | 
19 | ### Other Algorithms
20 | 
21 | * Goertzel
22 | * Sliding DFT
23 | 
24 | ### Clients & Servers
25 | 
26 | * borip compatible client and server
27 | 
28 | ## Filter design
29 | 
30 | Go packages for filter design:
31 | 
32 | - [Parks-McClellan aka Remez](https://github.com/samuel/go-remez)
33 | 
34 | ## SDR Hardware Interfaces
35 | 
36 | Go packages to utilize SDR hardware:
37 | 
38 | - [RTL-SDR](https://github.com/samuel/go-rtlsdr)
39 | - [HackRF](https://github.com/samuel/go-hackrf)
40 | 


--------------------------------------------------------------------------------
/borip/client.go:
--------------------------------------------------------------------------------
  1 | package borip
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"errors"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"net"
  9 | 	"strconv"
 10 | 	"strings"
 11 | )
 12 | 
 13 | var ErrUnexpectedResponse = errors.New("borip: unexpected resposne from server")
 14 | 
 15 | type ErrResponse struct {
 16 | 	errorType string
 17 | 	msg       string
 18 | }
 19 | 
 20 | func (e ErrResponse) Error() string {
 21 | 	return fmt.Sprintf("borip: %s %s", e.errorType, e.msg)
 22 | }
 23 | 
 24 | func makeErrorResponse(parts []string) ErrResponse {
 25 | 	if len(parts) == 1 {
 26 | 		return ErrResponse{parts[0], ""}
 27 | 	}
 28 | 	return ErrResponse{parts[0], strings.Join(parts[1:], " ")}
 29 | }
 30 | 
 31 | type Device struct {
 32 | 	Name, Serial               string
 33 | 	MinGain, MaxGain, GainStep float64
 34 | 	FPGAFreq                   float64 // Hz
 35 | 	SamplesPerPacket           int     // complex 4-byte samples (16-bit I/Q) per packet
 36 | 	ValidAntennas              []string
 37 | }
 38 | 
 39 | func parseDeviceString(st string) (*Device, error) {
 40 | 	// Terratec NOXON (rev 3)|-5.000000|30.000000|1.000000|3200000.000000|16256|(Default)|Terratec NOXON (rev 3)
 41 | 	deviceParts := strings.Split(st, "|")
 42 | 	if len(deviceParts) < 8 {
 43 | 		return nil, ErrUnexpectedResponse
 44 | 	}
 45 | 	d := &Device{
 46 | 		Name:   deviceParts[0],
 47 | 		Serial: deviceParts[7],
 48 | 	}
 49 | 	var err error
 50 | 	if d.MinGain, err = strconv.ParseFloat(deviceParts[1], 64); err != nil {
 51 | 		return nil, err
 52 | 	}
 53 | 	if d.MaxGain, err = strconv.ParseFloat(deviceParts[2], 64); err != nil {
 54 | 		return nil, err
 55 | 	}
 56 | 	if d.GainStep, err = strconv.ParseFloat(deviceParts[3], 64); err != nil {
 57 | 		return nil, err
 58 | 	}
 59 | 	if d.FPGAFreq, err = strconv.ParseFloat(deviceParts[4], 64); err != nil {
 60 | 		return nil, err
 61 | 	}
 62 | 	if val, err := strconv.ParseInt(deviceParts[5], 10, 32); err != nil {
 63 | 		return nil, err
 64 | 	} else {
 65 | 		d.SamplesPerPacket = int(val)
 66 | 	}
 67 | 	d.ValidAntennas = strings.Split(deviceParts[6], ",")
 68 | 	return d, nil
 69 | }
 70 | 
 71 | type BorIP struct {
 72 | 	conn    net.Conn
 73 | 	rd      *bufio.Reader
 74 | 	wr      *bufio.Writer
 75 | 	running bool // After a successful "GO" is sent to the server
 76 | 
 77 | 	device *Device
 78 | }
 79 | 
 80 | func Dial(addr string) (*BorIP, error) {
 81 | 	conn, err := net.Dial("tcp", addr)
 82 | 	if err != nil {
 83 | 		return nil, err
 84 | 	}
 85 | 	bor := &BorIP{
 86 | 		conn: conn,
 87 | 		rd:   bufio.NewReader(conn),
 88 | 		wr:   bufio.NewWriter(conn),
 89 | 	}
 90 | 	line, err := bor.rd.ReadString('\n')
 91 | 	if err != nil {
 92 | 		return nil, err
 93 | 	}
 94 | 	line = strings.TrimSpace(line)
 95 | 	if line != "DEVICE -" {
 96 | 		log.Printf("Unexpected hello from server: %s", line)
 97 | 	}
 98 | 	return bor, nil
 99 | }
100 | 
101 | func (bor *BorIP) SelectDevice(hint string) (*Device, error) {
102 | 	res, err := bor.command("DEVICE", hint)
103 | 	if err != nil {
104 | 		return nil, err
105 | 	}
106 | 	resParts := strings.SplitN(res, " ", 2)
107 | 	if resParts[0] != "DEVICE" || len(resParts) < 2 {
108 | 		return nil, ErrUnexpectedResponse
109 | 	}
110 | 	if resParts[1][0] == '-' {
111 | 		if len(resParts[1]) == 1 {
112 | 			// Probably selected ! which is not an error. Just deselects the device.
113 | 			bor.device = nil
114 | 			return nil, nil
115 | 		}
116 | 		return nil, errors.New("borip: " + strings.TrimSpace(resParts[1][1:]))
117 | 	}
118 | 	dev, err := parseDeviceString(resParts[1])
119 | 	if err != nil {
120 | 		return nil, err
121 | 	}
122 | 	bor.device = dev
123 | 	return dev, nil
124 | }
125 | 
126 | func (bor *BorIP) Device() *Device {
127 | 	return bor.device
128 | }
129 | 
130 | func (bor *BorIP) SetFrequency(freq float64) (targetIF, actualIF, targetDDC, actualDDC float64, err error) {
131 | 	res, e := bor.command("FREQ", strconv.FormatFloat(freq, 'f', -1, 64))
132 | 	if e != nil {
133 | 		err = e
134 | 		return
135 | 	}
136 | 	resParts, e := parseAndCheck(res, 1)
137 | 	if e != nil {
138 | 		err = e
139 | 		return
140 | 	}
141 | 	if len(resParts) >= 3 {
142 | 		targetIF, _ = strconv.ParseFloat(resParts[2], 64)
143 | 	}
144 | 	if len(resParts) >= 4 {
145 | 		actualIF, _ = strconv.ParseFloat(resParts[3], 64)
146 | 	}
147 | 	if len(resParts) >= 5 {
148 | 		targetDDC, _ = strconv.ParseFloat(resParts[4], 64)
149 | 	}
150 | 	if len(resParts) >= 6 {
151 | 		actualDDC, _ = strconv.ParseFloat(resParts[5], 64)
152 | 	}
153 | 	return
154 | }
155 | 
156 | func (bor *BorIP) Frequency() (float64, error) {
157 | 	res, err := bor.command("FREQ")
158 | 	if err != nil {
159 | 		return 0.0, err
160 | 	}
161 | 	if len(res) < 6 {
162 | 		return 0.0, ErrUnexpectedResponse
163 | 	}
164 | 	return strconv.ParseFloat(res[5:], 64)
165 | }
166 | 
167 | func (bor *BorIP) SetAntenna(ant string) error {
168 | 	res, err := bor.command("ANTENNA", ant)
169 | 	if err != nil {
170 | 		return err
171 | 	}
172 | 	_, err = parseAndCheck(res, 1)
173 | 	return err
174 | }
175 | 
176 | func (bor *BorIP) Antenna() (string, error) {
177 | 	res, err := bor.command("ANTENNA")
178 | 	if err != nil {
179 | 		return "", err
180 | 	}
181 | 	if len(res) < 8 {
182 | 		return "", ErrUnexpectedResponse
183 | 	}
184 | 	return res[8:], nil
185 | }
186 | 
187 | // Return the actual sampling rate (closest)
188 | func (bor *BorIP) SetRate(rate float64) (float64, error) {
189 | 	res, err := bor.command("RATE", strconv.FormatFloat(rate, 'f', -1, 64))
190 | 	if err != nil {
191 | 		return 0.0, err
192 | 	}
193 | 	parts, err := parseAndCheck(res, 1)
194 | 	if err != nil {
195 | 		return 0.0, err
196 | 	}
197 | 	return strconv.ParseFloat(parts[2], 64)
198 | }
199 | 
200 | func (bor *BorIP) Rate() (float64, error) {
201 | 	res, err := bor.command("RATE")
202 | 	if err != nil {
203 | 		return 0.0, err
204 | 	}
205 | 	if len(res) < 6 {
206 | 		return 0.0, ErrUnexpectedResponse
207 | 	}
208 | 	return strconv.ParseFloat(res[5:], 64)
209 | }
210 | 
211 | // Return the actual gain (closest)
212 | func (bor *BorIP) SetGain(rate float64) error {
213 | 	res, err := bor.command("GAIN", strconv.FormatFloat(rate, 'f', -1, 64))
214 | 	if err != nil {
215 | 		return err
216 | 	}
217 | 	_, err = parseAndCheck(res, 1)
218 | 	return err
219 | }
220 | 
221 | func (bor *BorIP) Gain() (float64, error) {
222 | 	res, err := bor.command("GAIN")
223 | 	if err != nil {
224 | 		return 0.0, err
225 | 	}
226 | 	if len(res) < 6 {
227 | 		return 0.0, ErrUnexpectedResponse
228 | 	}
229 | 	return strconv.ParseFloat(res[5:], 64)
230 | }
231 | 
232 | func (bor *BorIP) SetDestination(dest string) (string, error) {
233 | 	res, err := bor.command("DEST", dest)
234 | 	if err != nil {
235 | 		return "", err
236 | 	}
237 | 	parts, err := parseAndCheck(res, 2)
238 | 	if err != nil {
239 | 		return "", err
240 | 	}
241 | 	return parts[2], nil
242 | }
243 | 
244 | func (bor *BorIP) Destination() (string, error) {
245 | 	res, err := bor.command("DEST")
246 | 	if err != nil {
247 | 		return "", err
248 | 	}
249 | 	if len(res) < 5 {
250 | 		return "", ErrUnexpectedResponse
251 | 	}
252 | 	return res[5:], nil
253 | }
254 | 
255 | func (bor *BorIP) SetHeaderEnabled(enabled bool) error {
256 | 	enabledStr := "OFF"
257 | 	if enabled {
258 | 		enabledStr = "ON"
259 | 	}
260 | 	res, err := bor.command("HEADER", enabledStr)
261 | 	if err != nil {
262 | 		return err
263 | 	}
264 | 	_, err = parseAndCheck(res, 1)
265 | 	return err
266 | }
267 | 
268 | func (bor *BorIP) HeaderEnabled() (bool, error) {
269 | 	res, err := bor.command("HEADER")
270 | 	if err != nil {
271 | 		return false, err
272 | 	}
273 | 	if len(res) < 8 {
274 | 		return false, ErrUnexpectedResponse
275 | 	}
276 | 	switch res[7:] {
277 | 	case "ON":
278 | 		return true, nil
279 | 	case "OFF":
280 | 		return false, nil
281 | 	}
282 | 	return false, ErrUnexpectedResponse
283 | }
284 | 
285 | func (bor *BorIP) Go() error {
286 | 	res, err := bor.command("GO")
287 | 	if err != nil {
288 | 		return err
289 | 	}
290 | 	_, err = parseAndCheck(res, 1)
291 | 	if err == nil {
292 | 		bor.running = true
293 | 	}
294 | 	return err
295 | }
296 | 
297 | func (bor *BorIP) Stop() error {
298 | 	res, err := bor.command("STOP")
299 | 	if err != nil {
300 | 		return err
301 | 	}
302 | 	_, err = parseAndCheck(res, 1)
303 | 	if err == nil {
304 | 		bor.running = false
305 | 	}
306 | 	return err
307 | }
308 | 
309 | func parseAndCheck(res string, minArgs int) ([]string, error) {
310 | 	resParts := strings.Split(res, " ")
311 | 	if len(resParts) < 2 {
312 | 		return nil, ErrUnexpectedResponse
313 | 	}
314 | 	if resParts[1] != "OK" {
315 | 		return nil, makeErrorResponse(resParts[1:])
316 | 	}
317 | 	if len(resParts) < 1+minArgs {
318 | 		return nil, ErrUnexpectedResponse
319 | 	}
320 | 	return resParts, nil
321 | }
322 | 
323 | func (bor *BorIP) command(cmd string, args ...string) (string, error) {
324 | 	argString := ""
325 | 	if len(args) > 0 {
326 | 		argString = " " + strings.Join(args, " ")
327 | 	}
328 | 	if _, err := bor.wr.WriteString(cmd + argString + "\n"); err != nil {
329 | 		return "", err
330 | 	}
331 | 	if err := bor.wr.Flush(); err != nil {
332 | 		return "", err
333 | 	}
334 | 	line, err := bor.rd.ReadString('\n')
335 | 	return strings.TrimSpace(line), err
336 | }
337 | 
338 | func (bor *BorIP) Close() {
339 | 	if bor.running {
340 | 		bor.Stop()
341 | 	}
342 | 	if bor.device != nil {
343 | 		bor.SelectDevice("!")
344 | 	}
345 | 	bor.conn.Close()
346 | }
347 | 


--------------------------------------------------------------------------------
/borip/example/rx.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"math"
  8 | 	"net"
  9 | 	"os"
 10 | 	"time"
 11 | 
 12 | 	"github.com/samuel/go-dsp/borip"
 13 | )
 14 | 
 15 | func polarDiscriminant(a, b complex128) float64 {
 16 | 	c := a * complex(real(b), -imag(b))
 17 | 	angle := math.Atan2(imag(c), real(c))
 18 | 	return angle / math.Pi
 19 | }
 20 | 
 21 | func main() {
 22 | 	bor, err := borip.Dial("127.0.0.1:28888")
 23 | 	if err != nil {
 24 | 		log.Fatal(err)
 25 | 	}
 26 | 	defer bor.Close()
 27 | 
 28 | 	dev, err := bor.SelectDevice("RTL")
 29 | 	if err != nil {
 30 | 		log.Fatal(err)
 31 | 	}
 32 | 	fmt.Fprintf(os.Stderr, "%+v\n", dev)
 33 | 
 34 | 	freq := 484.7e6
 35 | 	// freq := 162.4e6
 36 | 	// freq := 92.7e6
 37 | 
 38 | 	targetIF, actualIF, _, _, err := bor.SetFrequency(freq)
 39 | 	if err != nil {
 40 | 		log.Fatal(err)
 41 | 	}
 42 | 	queryFreq, err := bor.Frequency()
 43 | 	if err != nil {
 44 | 		log.Fatal(err)
 45 | 	}
 46 | 	fmt.Fprintf(os.Stderr, "Freq: %f %f (%f)\n", targetIF, actualIF, queryFreq)
 47 | 	if err := bor.SetAntenna(dev.ValidAntennas[0]); err != nil {
 48 | 		log.Fatal(err)
 49 | 	}
 50 | 	// ant, err := bor.Antenna()
 51 | 	// if err != nil {
 52 | 	// 	log.Fatal(err)
 53 | 	// }
 54 | 	// fmt.Printf("Antenna: %s\n", ant)
 55 | 
 56 | 	rate := 1.0e6
 57 | 	actualRate, err := bor.SetRate(rate)
 58 | 	if err != nil {
 59 | 		log.Fatal(err)
 60 | 	}
 61 | 	fmt.Fprintf(os.Stderr, "Actual rate: %f\n", actualRate)
 62 | 	// actualRate, err = bor.Rate()
 63 | 	// if err != nil {
 64 | 	// 	log.Fatal(err)
 65 | 	// }
 66 | 	// fmt.Printf("Actual rate: %f\n", actualRate)
 67 | 
 68 | 	// err = bor.SetGain(-1.0)
 69 | 	// if err != nil {
 70 | 	// 	log.Fatal(err)
 71 | 	// }
 72 | 	// gain, err := bor.Gain()
 73 | 	// if err != nil {
 74 | 	// 	log.Fatal(err)
 75 | 	// }
 76 | 	// fmt.Printf("Gain: %f\n", gain)
 77 | 	// dest, err := bor.Destination()
 78 | 	// if err != nil {
 79 | 	// 	log.Fatal(err)
 80 | 	// }
 81 | 	// fmt.Printf("Destination: %s\n", dest)
 82 | 	// if err := bor.SetHeaderEnabled(true); err != nil {
 83 | 	// 	log.Fatal(err)
 84 | 	// }
 85 | 	headers, err := bor.HeaderEnabled()
 86 | 	if err != nil {
 87 | 		log.Fatal(err)
 88 | 	}
 89 | 	fmt.Fprintf(os.Stderr, "Header enabled: %+v\n", headers)
 90 | 
 91 | 	//
 92 | 
 93 | 	dest, err := bor.SetDestination("127.0.0.1:2288")
 94 | 	if err != nil {
 95 | 		log.Fatal(err)
 96 | 	}
 97 | 	fmt.Fprintf(os.Stderr, "Destination: %s\n", dest)
 98 | 
 99 | 	addr, err := net.ResolveUDPAddr("udp", dest)
100 | 	if err != nil {
101 | 		log.Fatal(err)
102 | 	}
103 | 	conn, err := net.ListenUDP("udp", addr)
104 | 	if err != nil {
105 | 		log.Fatal(err)
106 | 	}
107 | 	defer conn.Close()
108 | 
109 | 	// rd := borip.NewPacketReader(conn, headers)
110 | 	// go func() {
111 | 	// 	samples := make([]complex128, 65536)
112 | 	// 	// lastIQ := complex(float64(0.0), float64(0.0))
113 | 	// 	lastT := time.Now()
114 | 	// 	for {
115 | 	// 		n, err := rd.ReadSamples(samples)
116 | 	// 		if err != nil {
117 | 	// 			log.Fatal(err)
118 | 	// 		}
119 | 	// 		if headers {
120 | 	// 			fmt.Fprintf(os.Stderr, "%+v\n", rd.Header())
121 | 	// 		}
122 | 	// 		t := time.Now()
123 | 	// 		rate := float64(n) / (float64(t.Sub(lastT).Nanoseconds()) / 1e9)
124 | 	// 		fmt.Printf("Actual rate: %f\n", rate)
125 | 	// 		lastT = t
126 | 
127 | 	// 		// for _, iq := range samples {
128 | 	// 		// 	pcm := polarDiscriminant(iq, lastIQ)
129 | 	// 		// 	lastIQ = iq
130 | 	// 		// 	_ = pcm
131 | 	// 		// 	// fmt.Printf(" %f", pcm)
132 | 	// 		// 	pcm16 := int16(pcm * 16384)
133 | 	// 		// 	binary.Write(os.Stdout, binary.LittleEndian, pcm16)
134 | 	// 		// }
135 | 	// 		// fmt.Println()
136 | 
137 | 	// 		// fm->pre_r = fm->signal[fm->signal_len - 2];
138 | 	// 		// fm->pre_j = fm->signal[fm->signal_len - 1];
139 | 	// 		// fm->signal2_len = fm->signal_len/2;
140 | 	// 		// fmt.Printf("%d:", n)
141 | 	// 		// if n > 4 {
142 | 	// 		// 	n = 4
143 | 	// 		// }
144 | 	// 		// for i := 0; i < n; i++ {
145 | 	// 		// 	fmt.Printf(" %+v", samples[i])
146 | 	// 		// }
147 | 	// 		// fmt.Println()
148 | 	// 	}
149 | 	// }()
150 | 
151 | 	rd := borip.NewPacketReader(conn, headers)
152 | 	go func() {
153 | 		wr, err := os.Create("samples.bin")
154 | 		if err != nil {
155 | 			log.Fatal(err)
156 | 		}
157 | 		defer wr.Close()
158 | 		samples := make([]complex128, 65536)
159 | 		for {
160 | 			n, err := rd.ReadSamples(samples)
161 | 			if err != nil {
162 | 				log.Fatal(err)
163 | 			}
164 | 			for i := 0; i < n; i++ {
165 | 				if err := binary.Write(wr, binary.LittleEndian, float32(real(samples[i]))); err != nil {
166 | 					log.Fatal(err)
167 | 				}
168 | 				if err := binary.Write(wr, binary.LittleEndian, float32(imag(samples[i]))); err != nil {
169 | 					log.Fatal(err)
170 | 				}
171 | 			}
172 | 			if err := wr.Sync(); err != nil {
173 | 				log.Fatal(err)
174 | 			}
175 | 		}
176 | 	}()
177 | 
178 | 	bor.Go()
179 | 
180 | 	time.Sleep(time.Second * 30)
181 | 
182 | 	bor.Close()
183 | }
184 | 


--------------------------------------------------------------------------------
/borip/packet.go:
--------------------------------------------------------------------------------
 1 | package borip
 2 | 
 3 | import (
 4 | 	"encoding/binary"
 5 | 	"errors"
 6 | 	"net"
 7 | )
 8 | 
 9 | const (
10 | 	defaultBufferSize = 256 * 1024
11 | 	packetHeaderSize  = 4
12 | )
13 | 
14 | var ErrShortPacket = errors.New("borip: short packet")
15 | 
16 | const (
17 | 	FlagNone            = 0x00
18 | 	FlagHardwareOverrun = 0x01 // Used at hardware interface
19 | 	FlagNetworkOverrun  = 0x02 // Used at client (network too slow)
20 | 	FlagBufferOverrun   = 0x04 // Used at client (client consumer too slow)
21 | 	FlagEmptyPayload    = 0x08 // Reserved
22 | 	FlagStreamStart     = 0x10 // Used for first packet of newly started stream
23 | 	FlagStremEnd        = 0x20 // Reserved (TO DO: Server sends BF_EMPTY_PAYLOAD | BF_STREAM_END)
24 | 	FlagBufferUnderrun  = 0x40 // Used at hardware interface
25 | 	FlagHardwareTimeout = 0x80 // Used at hardware interface
26 | )
27 | 
28 | type PacketHeader struct {
29 | 	Flags        byte
30 | 	Notification byte   // Reserved (currently 0)
31 | 	Idx          uint16 // Sequence number (incremented each time a packet is sent, used by client to count dropped packets)
32 | }
33 | 
34 | type PacketReader struct {
35 | 	conn        net.PacketConn
36 | 	buf         []byte
37 | 	bufI, bufN  int
38 | 	withHeaders bool
39 | 	header      PacketHeader
40 | }
41 | 
42 | func NewPacketReader(conn net.PacketConn, withHeaders bool) *PacketReader {
43 | 	return &PacketReader{
44 | 		conn:        conn,
45 | 		buf:         make([]byte, defaultBufferSize),
46 | 		withHeaders: withHeaders,
47 | 	}
48 | }
49 | 
50 | func (rd *PacketReader) Header() PacketHeader {
51 | 	return rd.header
52 | }
53 | 
54 | func (rd *PacketReader) ReadSamples(samples []complex128) (int, error) {
55 | 	if rd.bufI >= rd.bufN {
56 | 		n, _, err := rd.conn.ReadFrom(rd.buf)
57 | 		if err != nil {
58 | 			return 0, err
59 | 		}
60 | 		rd.bufI = 0
61 | 		rd.bufN = n
62 | 		if rd.withHeaders {
63 | 			if n < packetHeaderSize {
64 | 				return 0, ErrShortPacket
65 | 			}
66 | 			rd.header.Flags = rd.buf[rd.bufI]
67 | 			rd.header.Notification = rd.buf[rd.bufI+1]
68 | 			rd.header.Idx = binary.LittleEndian.Uint16(rd.buf[2:4])
69 | 			rd.bufI += 4
70 | 		}
71 | 		// rd.bufN = n - (n & 7)
72 | 	}
73 | 	idx := 0
74 | 	for rd.bufI < rd.bufN {
75 | 		iReal := int16(binary.LittleEndian.Uint16(rd.buf[rd.bufI : rd.bufI+2]))
76 | 		qQmag := int16(binary.LittleEndian.Uint16(rd.buf[rd.bufI+2 : rd.bufI+4]))
77 | 		samples[idx] = complex(float64(iReal), float64(qQmag))
78 | 		idx++
79 | 		rd.bufI += 4
80 | 	}
81 | 	return idx, nil
82 | }
83 | 


--------------------------------------------------------------------------------
/boripserver/devices.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | // TODO:
 4 | // * keep device list up to date (handle inserting/removing new devices)
 5 | 
 6 | import (
 7 | 	"errors"
 8 | 	"log"
 9 | 	"sync"
10 | 
11 | 	"github.com/samuel/go-rtlsdr/rtl"
12 | )
13 | 
14 | var (
15 | 	ErrDeviceNotAvailable = errors.New("device not available")
16 | )
17 | 
18 | type device struct {
19 | 	name     string
20 | 	rtlIndex int
21 | 
22 | 	mutex         sync.RWMutex
23 | 	rtlDev        *rtl.Device
24 | 	inUse         bool
25 | 	sendCloseChan chan bool
26 | }
27 | 
28 | var (
29 | 	defaultDevice string
30 | 
31 | 	devicesMutex sync.RWMutex
32 | 	devices      map[string]*device
33 | )
34 | 
35 | func init() {
36 | 	devices = make(map[string]*device)
37 | 
38 | 	count := rtl.GetDeviceCount()
39 | 	for i := 0; i < count; i++ {
40 | 		name := rtl.GetDeviceName(i)
41 | 		if name == "" {
42 | 			log.Printf("RTL returned a blank name for index %d", i)
43 | 		} else {
44 | 			// TODO: handle non-unique device names
45 | 			if defaultDevice == "" {
46 | 				defaultDevice = name
47 | 			}
48 | 			devices[name] = &device{
49 | 				name:     name,
50 | 				rtlIndex: i,
51 | 			}
52 | 		}
53 | 	}
54 | }
55 | 
56 | func deviceList() []*device {
57 | 	devicesMutex.RLock()
58 | 	defer devicesMutex.RUnlock()
59 | 
60 | 	devs := make([]*device, 0, len(devices))
61 | 	for _, dev := range devices {
62 | 		dev.mutex.Lock()
63 | 		if !dev.inUse {
64 | 			devs = append(devs, dev)
65 | 		}
66 | 		dev.mutex.Unlock()
67 | 	}
68 | 	return devs
69 | }
70 | 
71 | func (dev *device) open() error {
72 | 	dev.mutex.Lock()
73 | 	defer dev.mutex.Unlock()
74 | 	if dev.inUse {
75 | 		return ErrDeviceNotAvailable
76 | 	}
77 | 	rdev, err := rtl.Open(dev.rtlIndex)
78 | 	if err != nil {
79 | 		return err
80 | 	}
81 | 	dev.inUse = true
82 | 	dev.rtlDev = rdev
83 | 	return nil
84 | }
85 | 
86 | func (dev *device) close() {
87 | 	dev.mutex.Lock()
88 | 	defer dev.mutex.Unlock()
89 | 	if dev.rtlDev == nil {
90 | 		return
91 | 	}
92 | 	dev.rtlDev.Close()
93 | 	dev.rtlDev = nil
94 | 	dev.inUse = false
95 | }
96 | 


--------------------------------------------------------------------------------
/dsp/ax25/constants.go:
--------------------------------------------------------------------------------
  1 | package ax25
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | )
  6 | 
  7 | type PID byte
  8 | 
  9 | const (
 10 | 	ISO8208CCITTX25PLP     PID = 0x01 // ISO 8208/CCITT X.25 PLP
 11 | 	CompressedTCPIP        PID = 0x06 // Compressed TCP/IP. RFC 1144
 12 | 	UncompressedTCPIP      PID = 0x07 // Uncompressed TCP/IP
 13 | 	SegmentationFragment   PID = 0x08 // Segmentation fragment
 14 | 	TEXNETDatagramProtocol PID = 0xc3 // TEXNET database protocol
 15 | 	LinkQualityProtocol    PID = 0xc4 // Link Quality Protocol
 16 | 	AppleTalk              PID = 0xca // Appletalk
 17 | 	AppletalkARP           PID = 0xcb // Appletalk ARP
 18 | 	ARPAInternetProtocol   PID = 0xcc // ARPA Internet Protocol
 19 | 	ARPAAddressResolution  PID = 0xcd // ARPA Address Resolution
 20 | 	FlexNet                PID = 0xce // FlexNet
 21 | 	NETROM                 PID = 0xcf // NET/ROM
 22 | 	NoLayer3Protocol       PID = 0xf0 // No Layer 3 Protocol Implemented
 23 | )
 24 | 
 25 | var pidToString = map[PID]string{
 26 | 	ISO8208CCITTX25PLP:     "ISO 8208/CCITT X.25 PLP",
 27 | 	CompressedTCPIP:        "Compressed TCP/IP. RFC 1144",
 28 | 	UncompressedTCPIP:      "Uncompressed TCP/IP",
 29 | 	SegmentationFragment:   "Segmentation fragment",
 30 | 	TEXNETDatagramProtocol: "TEXNET database protocol",
 31 | 	LinkQualityProtocol:    "Link Quality Protocol",
 32 | 	AppleTalk:              "Appletalk",
 33 | 	AppletalkARP:           "Appletalk ARP",
 34 | 	ARPAInternetProtocol:   "ARPA Internet Protocol",
 35 | 	ARPAAddressResolution:  "ARPA Address Resolution",
 36 | 	FlexNet:                "FlexNet",
 37 | 	NETROM:                 "NET/ROM",
 38 | 	NoLayer3Protocol:       "No Layer 3 Protocol Implemented",
 39 | }
 40 | 
 41 | func (pid PID) String() string {
 42 | 	if s := pidToString[pid]; s != "" {
 43 | 		return s
 44 | 	}
 45 | 	return fmt.Sprintf("%02x", int(pid))
 46 | }
 47 | 
 48 | type FrameType byte
 49 | 
 50 | const (
 51 | 	IFrame FrameType = 0 // Information frame
 52 | 	SFrame FrameType = 1 // Supervisory frame
 53 | 	UFrame FrameType = 2 // Unnumbered frame
 54 | )
 55 | 
 56 | func (t FrameType) String() string {
 57 | 	switch t {
 58 | 	case IFrame:
 59 | 		return "I"
 60 | 	case SFrame:
 61 | 		return "S"
 62 | 	case UFrame:
 63 | 		return "U"
 64 | 	}
 65 | 	return fmt.Sprintf("%02x", int(t))
 66 | }
 67 | 
 68 | type UnnumberedType byte
 69 | 
 70 | const (
 71 | 	SABME UnnumberedType = 0x6f // Set Async Balanced Mode
 72 | 	SABM  UnnumberedType = 0x2f // Set Async Balanced Mode
 73 | 	DISC  UnnumberedType = 0x43 // Disconnect
 74 | 	DM    UnnumberedType = 0x0f // Disconnect Mode
 75 | 	UA    UnnumberedType = 0x63 // Unnumbered Acknowledge
 76 | 	FRMR  UnnumberedType = 0x87 // Frame Reject
 77 | 	UI    UnnumberedType = 0x03 // Unnumbered Information
 78 | 	XID   UnnumberedType = 0xaf // Exchange Identification
 79 | 	TEST  UnnumberedType = 0xe3 // Test
 80 | )
 81 | 
 82 | var (
 83 | 	UnnumberedTypeName = map[UnnumberedType]string{
 84 | 		SABME: "SABME",
 85 | 		SABM:  "SABM",
 86 | 		DISC:  "DISC",
 87 | 		DM:    "DM",
 88 | 		UA:    "UA",
 89 | 		FRMR:  "FRMR",
 90 | 		UI:    "UI",
 91 | 		XID:   "XID",
 92 | 		TEST:  "TEST",
 93 | 	}
 94 | )
 95 | 
 96 | func (t UnnumberedType) String() string {
 97 | 	if s := UnnumberedTypeName[t]; s != "" {
 98 | 		return s
 99 | 	}
100 | 	return fmt.Sprintf("%02x", int(t))
101 | }
102 | 
103 | type SupervisoryType byte
104 | 
105 | const (
106 | 	RR   SupervisoryType = 0x1 // Receive Ready
107 | 	RNR  SupervisoryType = 0x5 // Receive Not Ready
108 | 	REJ  SupervisoryType = 0x9 // Reject
109 | 	SREJ SupervisoryType = 0xd // Selective Reject
110 | )
111 | 
112 | func (t SupervisoryType) String() string {
113 | 	switch t {
114 | 	case RR:
115 | 		return "RR"
116 | 	case RNR:
117 | 		return "RNR"
118 | 	case REJ:
119 | 		return "REJ"
120 | 	case SREJ:
121 | 		return "SREJ"
122 | 	}
123 | 	return fmt.Sprintf("%02x", int(t))
124 | }
125 | 


--------------------------------------------------------------------------------
/dsp/ax25/crc.go:
--------------------------------------------------------------------------------
 1 | package ax25
 2 | 
 3 | var crcCcittTable = []uint16{
 4 | 	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
 5 | 	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
 6 | 	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
 7 | 	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
 8 | 	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
 9 | 	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
10 | 	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
11 | 	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
12 | 	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
13 | 	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
14 | 	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
15 | 	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
16 | 	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
17 | 	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
18 | 	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
19 | 	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
20 | 	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
21 | 	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
22 | 	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
23 | 	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
24 | 	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
25 | 	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
26 | 	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
27 | 	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
28 | 	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
29 | 	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
30 | 	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
31 | 	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
32 | 	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
33 | 	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
34 | 	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
35 | 	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78,
36 | }
37 | 
38 | func checkCrcCcitt(buf []byte) bool {
39 | 	var crc uint16 = 0xffff
40 | 
41 | 	for _, b := range buf {
42 | 		crc = (crc >> 8) ^ crcCcittTable[(crc^uint16(b))&0xff]
43 | 	}
44 | 	return (crc & 0xffff) == 0xf0b8
45 | }
46 | 


--------------------------------------------------------------------------------
/dsp/ax25/hdlc.go:
--------------------------------------------------------------------------------
  1 | package ax25
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | )
  6 | 
  7 | type Address struct {
  8 | 	Callsign string
  9 | 	SSID     int // Secondary Station ID
 10 | }
 11 | 
 12 | func (a Address) String() string {
 13 | 	return fmt.Sprintf("%s-%d", a.Callsign, a.SSID)
 14 | }
 15 | 
 16 | type Frame struct {
 17 | 	Source           Address
 18 | 	Destination      Address
 19 | 	Repeaters        []Address
 20 | 	V1               bool
 21 | 	Command          bool // command=true, response=false
 22 | 	Type             FrameType
 23 | 	SendSeq, RecvSeq int
 24 | 	PollFinal        bool // P/F of 1 is true, 0 is false
 25 | 	UnnumberedType   UnnumberedType
 26 | 	SupervisoryType  SupervisoryType
 27 | 	PID              PID // Protocol Identifier
 28 | 	Info             []byte
 29 | }
 30 | 
 31 | type AX25 struct {
 32 | 	bitstream     byte
 33 | 	inFrame       bool
 34 | 	rxBitI        int
 35 | 	rxBits        byte
 36 | 	rxBuf         []byte
 37 | 	maxBufferSize int
 38 | }
 39 | 
 40 | func NewDecoder() *AX25 {
 41 | 	return &AX25{
 42 | 		rxBuf:         make([]byte, 0, 512),
 43 | 		maxBufferSize: 512,
 44 | 	}
 45 | }
 46 | 
 47 | func parseAddress(buf []byte) Address {
 48 | 	i := 0
 49 | 	for ; i < 6; i++ {
 50 | 		buf[i] >>= 1
 51 | 		if buf[i] == 0x20 {
 52 | 			break
 53 | 		}
 54 | 	}
 55 | 	return Address{Callsign: string(buf[:i]), SSID: int((buf[6] >> 1) & 0xf)}
 56 | }
 57 | 
 58 | func (ax *AX25) processFrame() *Frame {
 59 | 	if len(ax.rxBuf) < 10 {
 60 | 		return nil
 61 | 	}
 62 | 
 63 | 	if !checkCrcCcitt(ax.rxBuf) {
 64 | 		return nil
 65 | 	}
 66 | 
 67 | 	buf := ax.rxBuf[:len(ax.rxBuf)-2]
 68 | 
 69 | 	// for i, b := range buf {
 70 | 	// 	fmt.Printf("%d %08b\n", i, b)
 71 | 	// }
 72 | 
 73 | 	frame := Frame{
 74 | 		V1:      true,
 75 | 		Command: false, // command (true) or response (false)
 76 | 	}
 77 | 
 78 | 	if buf[1]&1 > 0 {
 79 | 		// FlexNet Header Compression
 80 | 		frame.V1 = false
 81 | 		frame.Command = (buf[1] & 2) != 0
 82 | 		var dest []byte
 83 | 		if i := (buf[2] >> 2) & 0x2f; i != 0 {
 84 | 			dest = append(dest, i+0x20)
 85 | 		}
 86 | 		if i := (buf[2] << 4) | ((buf[3]>>4)&0xf)&0x3f; i != 0 {
 87 | 			dest = append(dest, i+0x20)
 88 | 		}
 89 | 		if i := (buf[3] << 2) | ((buf[4]>>6)&3)&0x3f; i != 0 {
 90 | 			dest = append(dest, i+0x20)
 91 | 		}
 92 | 		if i := buf[4] & 0x3f; i != 0 {
 93 | 			dest = append(dest, i+0x20)
 94 | 		}
 95 | 		if i := (buf[5] >> 2) & 0x3f; i != 0 {
 96 | 			dest = append(dest, i+0x20)
 97 | 		}
 98 | 		if i := ((buf[5] << 4) | ((buf[6] >> 4) & 0xf)) & 0x3f; i != 0 {
 99 | 			dest = append(dest, i+0x20)
100 | 		}
101 | 		if dest != nil {
102 | 			frame.Destination = Address{
103 | 				Callsign: string(dest),
104 | 				SSID:     int(buf[6] & 0xf),
105 | 			}
106 | 		}
107 | 		// TODO
108 | 		// fmt.Printf("%s QSO Nr %u", frame.Destination, (buf[0]<<6)|(buf[1]>>2))
109 | 		buf = buf[7:]
110 | 	} else {
111 | 		// Normal Header
112 | 		if len(buf) < 15 {
113 | 			return nil
114 | 		}
115 | 
116 | 		// 6.1.2. Command/Response Procedure
117 | 		// dest SSID high bit : buf[6]&0x80 -> C bit of AX.25 frame
118 | 		// src SSID high bit : buf[13]&0x80 -> C bit of LA PA frame
119 | 		if buf[6]&0x80 != buf[13]&0x80 {
120 | 			frame.V1 = false
121 | 			frame.Command = int(ax.rxBuf[6]&0x80) != 0
122 | 		}
123 | 
124 | 		frame.Destination = parseAddress(buf[:7])
125 | 		frame.Source = parseAddress(buf[7:14])
126 | 
127 | 		o := 14
128 | 		for ; buf[o-1]&1 == 0 && len(buf)-o > 7; o += 7 {
129 | 			frame.Repeaters = append(frame.Repeaters, parseAddress(buf[o:]))
130 | 		}
131 | 		buf = buf[o:]
132 | 	}
133 | 
134 | 	if len(buf) == 0 {
135 | 		return &frame
136 | 	}
137 | 
138 | 	// 4.2 Control-Field
139 | 
140 | 	controlField := buf[0]
141 | 	buf = buf[1:]
142 | 
143 | 	// 4.2.1 & 6.2 Poll/Final bit
144 | 	frame.PollFinal = controlField&0x10 != 0
145 | 
146 | 	if controlField&1 == 0 {
147 | 		// Info frame
148 | 		frame.Type = IFrame
149 | 		// 0  : 0
150 | 		// 1-3: N(S)
151 | 		// 4  : P
152 | 		// 5-7: N(R)
153 | 		frame.SendSeq = int((controlField >> 1) & 7)
154 | 		frame.RecvSeq = int((controlField >> 5) & 7)
155 | 	} else if controlField&2 != 0 {
156 | 		// Unnumbered frame
157 | 		frame.Type = UFrame
158 | 		// 4.3.3 Unnumbered Frame Control Fields
159 | 		frame.UnnumberedType = UnnumberedType(controlField & ^byte(0x10))
160 | 	} else {
161 | 		// Supervisory frame
162 | 		frame.Type = SFrame
163 | 		frame.SupervisoryType = SupervisoryType(controlField & 0x0f)
164 | 		frame.RecvSeq = int((controlField >> 5) & 7)
165 | 	}
166 | 	if len(buf) == 0 {
167 | 		return &frame
168 | 	}
169 | 
170 | 	if frame.Type == IFrame || (frame.Type == UFrame && frame.UnnumberedType == UI) {
171 | 		frame.PID = PID(buf[0])
172 | 		frame.Info = buf[1:]
173 | 	}
174 | 	return &frame
175 | }
176 | 
177 | // Return a frame when a full one has been received. Otherwise return nil.
178 | func (ax *AX25) Feed(bit int) *Frame {
179 | 	ax.bitstream <<= 1
180 | 	ax.bitstream |= byte(bit)
181 | 	// Watch for flag
182 | 	if ax.bitstream&0xff == 0x7e {
183 | 		var frame *Frame
184 | 		if ax.inFrame && len(ax.rxBuf) > 2 {
185 | 			frame = ax.processFrame()
186 | 		}
187 | 		ax.inFrame = true
188 | 		ax.rxBuf = ax.rxBuf[:0]
189 | 		ax.rxBits = 0
190 | 		ax.rxBitI = 0
191 | 		return frame
192 | 	}
193 | 	// Frame abort
194 | 	if ax.bitstream&0x7f == 0x7f {
195 | 		ax.inFrame = false
196 | 		return nil
197 | 	}
198 | 	if !ax.inFrame {
199 | 		return nil
200 | 	}
201 | 	// Stuffed bit
202 | 	if ax.bitstream&0x3f == 0x3e {
203 | 		return nil
204 | 	}
205 | 	ax.rxBits >>= 1
206 | 	if bit != 0 {
207 | 		ax.rxBits |= 0x80
208 | 	}
209 | 	ax.rxBitI++
210 | 	if ax.rxBitI == 8 {
211 | 		if len(ax.rxBuf) >= ax.maxBufferSize {
212 | 			ax.inFrame = false
213 | 			// TODO: return an error?
214 | 			return nil
215 | 		}
216 | 		ax.rxBuf = append(ax.rxBuf, ax.rxBits)
217 | 		ax.rxBits = 0
218 | 		ax.rxBitI = 0
219 | 	}
220 | 	return nil
221 | }
222 | 


--------------------------------------------------------------------------------
/dsp/biquad.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import "math"
  4 | 
  5 | // http://www.musicdsp.org/files/Audio-EQ-Cookbook.txt
  6 | 
  7 | type BiQuadFilter struct {
  8 | 	B0, B1, B2      float64
  9 | 	A0, A1, A2      float64
 10 | 	prevIn, prevOut [2]float64
 11 | }
 12 | 
 13 | func (f *BiQuadFilter) Filter(input, output []float64) {
 14 | 	b0a0 := f.B0 / f.A0
 15 | 	b1a0 := f.B1 / f.A0
 16 | 	b2a0 := f.B2 / f.A0
 17 | 	a1a0 := f.A1 / f.A0
 18 | 	a2a0 := f.A2 / f.A0
 19 | 	for i, s := range input {
 20 | 		newSample := b0a0*s + b1a0*f.prevIn[0] + b2a0*f.prevIn[1] - a1a0*f.prevOut[0] - a2a0*f.prevOut[1]
 21 | 		f.prevOut[1] = f.prevOut[0]
 22 | 		f.prevOut[0] = newSample
 23 | 		f.prevIn[1] = f.prevIn[0]
 24 | 		f.prevIn[0] = s
 25 | 		output[i] = newSample
 26 | 	}
 27 | }
 28 | 
 29 | func (f *BiQuadFilter) FilterF32(input, output []float32) {
 30 | 	b0a0 := f.B0 / f.A0
 31 | 	b1a0 := f.B1 / f.A0
 32 | 	b2a0 := f.B2 / f.A0
 33 | 	a1a0 := f.A1 / f.A0
 34 | 	a2a0 := f.A2 / f.A0
 35 | 	for i, s := range input {
 36 | 		newSample := b0a0*float64(s) + b1a0*f.prevIn[0] + b2a0*f.prevIn[1] - a1a0*f.prevOut[0] - a2a0*f.prevOut[1]
 37 | 		f.prevOut[1] = f.prevOut[0]
 38 | 		f.prevOut[0] = newSample
 39 | 		f.prevIn[1] = f.prevIn[0]
 40 | 		f.prevIn[0] = float64(s)
 41 | 		output[i] = float32(newSample)
 42 | 	}
 43 | }
 44 | 
 45 | // H(s) = 1 / (s^2 + s/Q + 1)
 46 | func NewLowPassBiQuadFilter(sampleRate, cutoffFreq, q float64) *BiQuadFilter {
 47 | 	w0 := 2 * math.Pi * cutoffFreq / sampleRate
 48 | 	sinW0 := math.Sin(w0)
 49 | 	cosW0 := math.Cos(w0)
 50 | 	alpha := sinW0 / (2 * q)
 51 | 	return &BiQuadFilter{
 52 | 		B0: (1 - cosW0) / 2,
 53 | 		B1: 1 - cosW0,
 54 | 		B2: (1 - cosW0) / 2,
 55 | 		A0: 1 + alpha,
 56 | 		A1: -2 * cosW0,
 57 | 		A2: 1 - alpha,
 58 | 	}
 59 | }
 60 | 
 61 | // H(s) = s^2 / (s^2 + s/Q + 1)
 62 | func NewHighPassBiQuadFilter(sampleRate, cutoffFreq, q float64) *BiQuadFilter {
 63 | 	w0 := 2 * math.Pi * cutoffFreq / sampleRate
 64 | 	sinW0 := math.Sin(w0)
 65 | 	cosW0 := math.Cos(w0)
 66 | 	alpha := sinW0 / (2 * q)
 67 | 	return &BiQuadFilter{
 68 | 		B0: (1 + cosW0) / 2,
 69 | 		B1: -(1 + cosW0),
 70 | 		B2: (1 + cosW0) / 2,
 71 | 		A0: 1 + alpha,
 72 | 		A1: -2 * cosW0,
 73 | 		A2: 1 - alpha,
 74 | 	}
 75 | }
 76 | 
 77 | // H(s) = s / (s^2 + s/Q + 1)  (constant skirt gain, peak gain = Q)
 78 | func NewBandPassConstantSkirtGainBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter {
 79 | 	w0 := 2 * math.Pi * centreFrequency / sampleRate
 80 | 	sinW0 := math.Sin(w0)
 81 | 	cosW0 := math.Cos(w0)
 82 | 	alpha := sinW0 / (2 * q)
 83 | 	return &BiQuadFilter{
 84 | 		B0: sinW0 / 2, // = Q*alpha
 85 | 		B1: 0,
 86 | 		B2: -sinW0 / 2, // = -Q*alpha
 87 | 		A0: 1 + alpha,
 88 | 		A1: -2 * cosW0,
 89 | 		A2: 1 - alpha,
 90 | 	}
 91 | }
 92 | 
 93 | // H(s) = (s/Q) / (s^2 + s/Q + 1)      (constant 0 dB peak gain)
 94 | func NewBandPassConstantPeakGainBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter {
 95 | 	w0 := 2 * math.Pi * centreFrequency / sampleRate
 96 | 	sinW0 := math.Sin(w0)
 97 | 	cosW0 := math.Cos(w0)
 98 | 	alpha := sinW0 / (2 * q)
 99 | 	return &BiQuadFilter{
100 | 		B0: alpha,
101 | 		B1: 0,
102 | 		B2: -alpha,
103 | 		A0: 1 + alpha,
104 | 		A1: -2 * cosW0,
105 | 		A2: 1 - alpha,
106 | 	}
107 | }
108 | 
109 | // H(s) = (s^2 + 1) / (s^2 + s/Q + 1)
110 | func NotchBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter {
111 | 	w0 := 2 * math.Pi * centreFrequency / sampleRate
112 | 	sinW0 := math.Sin(w0)
113 | 	cosW0 := math.Cos(w0)
114 | 	alpha := sinW0 / (2 * q)
115 | 	return &BiQuadFilter{
116 | 		B0: 1,
117 | 		B1: -2 * cosW0,
118 | 		B2: 1,
119 | 		A0: 1 + alpha,
120 | 		A1: -2 * cosW0,
121 | 		A2: 1 - alpha,
122 | 	}
123 | }
124 | 
125 | // H(s) = (s^2 - s/Q + 1) / (s^2 + s/Q + 1)
126 | func AllPassBiQuadFilter(sampleRate, centreFrequency, q float64) *BiQuadFilter {
127 | 	w0 := 2 * math.Pi * centreFrequency / sampleRate
128 | 	sinW0 := math.Sin(w0)
129 | 	cosW0 := math.Cos(w0)
130 | 	alpha := sinW0 / (2 * q)
131 | 	return &BiQuadFilter{
132 | 		B0: 1 - alpha,
133 | 		B1: -2 * cosW0,
134 | 		B2: 1 + alpha,
135 | 		A0: 1 + alpha,
136 | 		A1: -2 * cosW0,
137 | 		A2: 1 - alpha,
138 | 	}
139 | }
140 | 
141 | // H(s) = (s^2 + s*(A/Q) + 1) / (s^2 + s/(A*Q) + 1)
142 | func PeakingEQBiQuadFilter(sampleRate, centreFrequency, q, dbGain float64) *BiQuadFilter {
143 | 	w0 := 2 * math.Pi * centreFrequency / sampleRate
144 | 	sinW0 := math.Sin(w0)
145 | 	cosW0 := math.Cos(w0)
146 | 	alpha := sinW0 / (2 * q)
147 | 	a := math.Pow(10, dbGain/40) // TODO: should we square root this value?
148 | 	return &BiQuadFilter{
149 | 		B0: 1 + alpha*a,
150 | 		B1: -2 * cosW0,
151 | 		B2: 1 - alpha*a,
152 | 		A0: 1 + alpha/a,
153 | 		A1: -2 * cosW0,
154 | 		A2: 1 - alpha/a,
155 | 	}
156 | }
157 | 
158 | // H(s) = A * (s^2 + (sqrt(A)/Q)*s + A)/(A*s^2 + (sqrt(A)/Q)*s + 1)
159 | // shelfSlope: a "shelf slope" parameter (for shelving EQ only).
160 | // When S = 1, the shelf slope is as steep as it can be and remain monotonically
161 | // increasing or decreasing gain with frequency.  The shelf slope, in dB/octave,
162 | // remains proportional to S for all other values for a fixed f0/Fs and dBgain.</param>
163 | // dbGain: Gain in decibels
164 | func LowShelfBiQuadFilter(sampleRate, cutoffFrequency, shelfSlope, dbGain float64) *BiQuadFilter {
165 | 	w0 := 2 * math.Pi * cutoffFrequency / sampleRate
166 | 	sinW0 := math.Sin(w0)
167 | 	cosW0 := math.Cos(w0)
168 | 	a := math.Pow(10, dbGain/40.0) // TODO: should we square root this value?
169 | 	alpha := sinW0 / 2 * math.Sqrt((a+1/a)*(1/shelfSlope-1)+2)
170 | 	temp := 2 * math.Sqrt(a) * alpha
171 | 	return &BiQuadFilter{
172 | 		B0: a * ((a + 1) - (a-1)*cosW0 + temp),
173 | 		B1: 2 * a * ((a - 1) - (a+1)*cosW0),
174 | 		B2: a * ((a + 1) - (a-1)*cosW0 - temp),
175 | 		A0: (a + 1) + (a-1)*cosW0 + temp,
176 | 		A1: -2 * ((a - 1) + (a+1)*cosW0),
177 | 		A2: (a + 1) + (a-1)*cosW0 - temp,
178 | 	}
179 | }
180 | 
181 | // H(s) = A * (A*s^2 + (sqrt(A)/Q)*s + 1)/(s^2 + (sqrt(A)/Q)*s + A)
182 | func HighShelfBiQuadFilter(sampleRate, cutoffFrequency, shelfSlope, dbGain float64) *BiQuadFilter {
183 | 	w0 := 2 * math.Pi * cutoffFrequency / sampleRate
184 | 	sinW0 := math.Sin(w0)
185 | 	cosW0 := math.Cos(w0)
186 | 	a := math.Pow(10, dbGain/40) // TODO: should we square root this value?
187 | 	alpha := sinW0 / 2 * math.Sqrt((a+1/a)*(1/shelfSlope-1)+2)
188 | 	temp := 2 * math.Sqrt(a) * alpha
189 | 	return &BiQuadFilter{
190 | 		B0: a * ((a + 1) + (a-1)*cosW0 + temp),
191 | 		B1: -2 * a * ((a - 1) + (a+1)*cosW0),
192 | 		B2: a * ((a + 1) + (a-1)*cosW0 - temp),
193 | 		A0: (a + 1) - (a-1)*cosW0 + temp,
194 | 		A1: 2 * ((a - 1) - (a+1)*cosW0),
195 | 		A2: (a + 1) - (a-1)*cosW0 - temp,
196 | 	}
197 | }
198 | 


--------------------------------------------------------------------------------
/dsp/conversion.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"math"
  6 | )
  7 | 
  8 | //go:generate go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go
  9 | 
 10 | // Ui8toi16 converts and scales unsigned 8-bit samples to 16-bit signed samples.
 11 | func Ui8toi16(input []byte, output []int16)
 12 | func ui8toi16(input []byte, output []int16) {
 13 | 	n := len(output)
 14 | 	if len(input) < n {
 15 | 		n = len(input)
 16 | 	}
 17 | 	for i, v := range input[:n] {
 18 | 		v -= 128
 19 | 		v16 := int16((uint16(v) << 8) | uint16(v))
 20 | 		output[i] = v16
 21 | 	}
 22 | }
 23 | 
 24 | // Ui8toi16b converts and scales unsigned 8-bit samples to 16-bit signed samples.
 25 | func Ui8toi16b(input, output []byte)
 26 | func ui8toi16b(input, output []byte) {
 27 | 	n := len(output) / 2
 28 | 	if len(input) < n {
 29 | 		n = len(input)
 30 | 	}
 31 | 	for i, v := range input[:n] {
 32 | 		v -= 128
 33 | 		output[i*2] = v
 34 | 		output[i*2+1] = v
 35 | 	}
 36 | }
 37 | 
 38 | // Ui8tof32 converts unsigned 8-bit samples to 32-bit float.
 39 | // It does not scale the samples.
 40 | func Ui8tof32(input []byte, output []float32)
 41 | func ui8tof32(input []byte, output []float32) {
 42 | 	n := len(input)
 43 | 	if len(output) < n {
 44 | 		n = len(output)
 45 | 	}
 46 | 	_ = output[n-1] // eliminate bounds check
 47 | 	for i, v := range input[:n] {
 48 | 		output[i] = float32(int(v) - 128)
 49 | 	}
 50 | }
 51 | 
 52 | // I8tof32 converts signed 8-bit samples to 32-bit float.
 53 | // It does not scale the samples.
 54 | func I8tof32(input []byte, output []float32)
 55 | func i8tof32(input []byte, output []float32) {
 56 | 	n := len(input)
 57 | 	if len(output) < n {
 58 | 		n = len(output)
 59 | 	}
 60 | 	for i, v := range input[:n] {
 61 | 		output[i] = float32(int8(v))
 62 | 	}
 63 | }
 64 | 
 65 | // Ui8toc64 converts unsigned 8-bit interleaved complex samples to 64-bit complex (32-bit real and imaginary parts).
 66 | // It does not scale the samples.
 67 | func Ui8toc64(input []byte, output []complex64)
 68 | func ui8toc64(input []byte, output []complex64) {
 69 | 	n := len(input) / 2
 70 | 	if len(output) < n {
 71 | 		n = len(output)
 72 | 	}
 73 | 	for i := 0; i < n; i++ {
 74 | 		output[i] = complex(
 75 | 			float32(int(input[i*2])-128),
 76 | 			float32(int(input[i*2+1])-128),
 77 | 		)
 78 | 	}
 79 | }
 80 | 
 81 | // I8toc64 converts signed 8-bit interleaved complex samples to 64-bit complex (32-bit real and imaginary parts).
 82 | // It does not scale the samples.
 83 | func I8toc64(input []int8, output []complex64) {
 84 | 	// func i8toc64(input []int8, output []complex64) {
 85 | 	n := len(input) / 2
 86 | 	if len(output) < n {
 87 | 		n = len(output)
 88 | 	}
 89 | 	for i := 0; i < n; i++ {
 90 | 		output[i] = complex(
 91 | 			float32(input[i*2]),
 92 | 			float32(input[i*2+1]),
 93 | 		)
 94 | 	}
 95 | }
 96 | 
 97 | // C64toi8 converts 64-bit complex samples to signed 8-bit interleaved.
 98 | // It does not scale the samples.
 99 | func C64toi8(input []complex64, output []int8) {
100 | 	// func c64toi8(input []complex64, output []int8) {
101 | 	n := len(output) / 2
102 | 	if len(input) < n {
103 | 		n = len(input)
104 | 	}
105 | 	for i, s := range input[:n] {
106 | 		output[i*2] = int8(real(s))
107 | 		output[i*2+1] = int8(imag(s))
108 | 	}
109 | }
110 | 
111 | // F32toi16 converts scaled 32-bit floats to 16-bit integers.
112 | func F32toi16(input []float32, output []int16, scale float32)
113 | func f32toi16(input []float32, output []int16, scale float32) {
114 | 	n := len(output)
115 | 	if len(input) < n {
116 | 		n = len(input)
117 | 	}
118 | 	for i, v := range input[:n] {
119 | 		output[i] = int16(v * scale)
120 | 	}
121 | }
122 | 
123 | // F32toi16ble converts float32 to int16 stored in a byte slice. The values
124 | // are stored in little-endian.
125 | func F32toi16ble(input []float32, output []byte, scale float32)
126 | func f32toi16ble(input []float32, output []byte, scale float32) {
127 | 	n := len(output) / 2
128 | 	if len(input) < n {
129 | 		n = len(input)
130 | 	}
131 | 	for i, v := range input[:n] {
132 | 		v := uint16(int16(v * scale))
133 | 		output[i*2] = uint8(v & 0xff)
134 | 		output[i*2+1] = uint8(v >> 8)
135 | 	}
136 | }
137 | 
138 | // I16ToBLE converts int16 values to little endian bytes.
139 | func I16ToBLE(input []int16, output []byte)
140 | func i16ToBLE(input []int16, output []byte) {
141 | 	n := len(input)
142 | 	if len(output)/2 < n {
143 | 		n = len(output) / 2
144 | 	}
145 | 	for i, v := range input[:n] {
146 | 		output[i*2] = byte(v & 0xff)
147 | 		output[i*2+1] = byte(v >> 8)
148 | 	}
149 | }
150 | 
151 | // I16bleToF64 converts int16 stored in a byte slice as little endian to float64.
152 | func I16bleToF64(input []byte, output []float64, scale float64)
153 | func i16bleToF64(input []byte, output []float64, scale float64) {
154 | 	n := len(input) / 2
155 | 	if len(output) < n {
156 | 		n = len(output)
157 | 	}
158 | 	for i := range output[:n] {
159 | 		output[i] = float64(int16(uint16(input[i*2])|(uint16(input[i*2+1])<<8))) * scale
160 | 	}
161 | }
162 | 
163 | // I16bleToF32 converts int16 stored in a byte slice as little endian to float32.
164 | func I16bleToF32(input []byte, output []float32, scale float32)
165 | func i16bleToF32(input []byte, output []float32, scale float32) {
166 | 	n := len(input) / 2
167 | 	if len(output) < n {
168 | 		n = len(output)
169 | 	}
170 | 	for i := range output[:n] {
171 | 		output[i] = float32(int16(uint16(input[i*2])|(uint16(input[i*2+1])<<8))) * scale
172 | 	}
173 | }
174 | 
175 | // I32bleToF32 converts int32 stored in a byte slice as little endian to float32.
176 | func I32bleToF32(input []byte, output []float32, scale float32) {
177 | 	// func i32bleToF32(input []byte, output []float32, scale float32) {
178 | 	n := len(input) / 4
179 | 	if len(output) < n {
180 | 		n = len(output)
181 | 	}
182 | 	for i := range output[:n] {
183 | 		output[i] = float32(
184 | 			int32(
185 | 				uint32(input[i*4])|
186 | 					(uint32(input[i*4+1])<<8)|
187 | 					(uint32(input[i*4+2])<<16)|
188 | 					(uint32(input[i*4+3])<<24))) * scale
189 | 	}
190 | }
191 | 
192 | // F32Tof32ble converts a float32 slice to a byte slice of  little endian float32.
193 | func F32Tof32ble(input []float32, output []byte) {
194 | 	// func f32Tof32ble(input []float32, output []byte) {
195 | 	n := len(output) / 4
196 | 	if len(input) < n {
197 | 		n = len(input)
198 | 	}
199 | 	for i, s := range input[:n] {
200 | 		binary.LittleEndian.PutUint32(output[i*4:], math.Float32bits(s))
201 | 	}
202 | }
203 | 


--------------------------------------------------------------------------------
/dsp/conversion_386.s:
--------------------------------------------------------------------------------
 1 | TEXT ·Ui8toi16(SB), 7, $0
 2 | 	JMP ·ui8toi16(SB)
 3 | 
 4 | TEXT ·Ui8toi16b(SB), 7, $0
 5 | 	JMP ·ui8toi16b(SB)
 6 | 
 7 | TEXT ·Ui8tof32(SB), 7, $0
 8 | 	JMP ·ui8tof32(SB)
 9 | 
10 | TEXT ·I8tof32(SB), 7, $0
11 | 	JMP ·i8tof32(SB)
12 | 
13 | TEXT ·Ui8toc64(SB), 7, $0
14 | 	JMP ·ui8toc64(SB)
15 | 
16 | TEXT ·F32toi16(SB), 7, $0
17 | 	JMP ·f32toi16(SB)
18 | 
19 | TEXT ·F32toi16ble(SB), 7, $0
20 | 	JMP ·f32toi16ble(SB)
21 | 
22 | TEXT ·I16bleToF64(SB), 7, $0
23 | 	JMP ·i16bleToF64(SB)
24 | 
25 | TEXT ·I16bleToF32(SB), 7, $0
26 | 	JMP ·i16bleToF32(SB)
27 | 


--------------------------------------------------------------------------------
/dsp/conversion_arm.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | TEXT ·Ui8toi16(SB), NOSPLIT, $0
  4 | 	MOVW input+0(FP), R1
  5 | 	MOVW input_len+4(FP), R2
  6 | 	MOVW output+12(FP), R3
  7 | 	MOVW output_len+16(FP), R4
  8 | 
  9 | 	// Choose the shortest length
 10 | 	CMP     R2, R4
 11 | 	MOVW.LT R4, R2
 12 | 
 13 | 	// If no input then skip loop
 14 | 	TEQ $0, R2
 15 | 	BEQ ui8toi16_done
 16 | 	ADD R1, R2
 17 | 
 18 | ui8toi16_loop:
 19 | 	MOVBU 0(R1), R0
 20 | 	ADD   $1, R1
 21 | 	SUB   $128, R0
 22 | 	MOVBU R0, 0(R3)
 23 | 	MOVBU R0, 1(R3)
 24 | 	ADD   $2, R3
 25 | 	CMP   R2, R1
 26 | 	BLT   ui8toi16_loop
 27 | 
 28 | ui8toi16_done:
 29 | 	RET
 30 | 
 31 | TEXT ·Ui8toi16b(SB), NOSPLIT, $0
 32 | 	MOVW output_len+16(FP), R4
 33 | 	MOVW R4>>1, R4
 34 | 	MOVW R4, output_len+16(FP)
 35 | 	B    ·Ui8toi16(SB)
 36 | 
 37 | TEXT ·Ui8tof32(SB), NOSPLIT, $0
 38 | 	MOVW input+0(FP), R1
 39 | 	MOVW input_len+4(FP), R2
 40 | 	MOVW output+12(FP), R3
 41 | 	MOVW output_len+16(FP), R0
 42 | 
 43 | 	// Choose the shortest length
 44 | 	CMP     R2, R0
 45 | 	MOVW.LT R0, R2
 46 | 
 47 | 	// If no input then skip loop
 48 | 	CMP $0, R2
 49 | 	BEQ ui8tof32_done
 50 | 
 51 | 	MOVBU ·HaveNEON+0(SB), R0
 52 | 	CMP   $0, R0
 53 | 	BNE   ui8tof32_neon
 54 | 
 55 | 	AND $(~3), R2, R0
 56 | 	ADD R1, R2
 57 | 	TEQ $0, R0
 58 | 	BEQ ui8tof32_tail
 59 | 	ADD R1, R0
 60 | 
 61 | 	MOVW $0x80808080, R8
 62 | 
 63 | ui8tof32_loop:
 64 | 	// This is slower on Raspberry Pi but faster on Udoo Quad (which uses NEON anyway)
 65 | 	// MOVBU	0(R1), R4
 66 | 	// MOVBU	1(R1), R5
 67 | 	// MOVBU	2(R1), R6
 68 | 	// MOVBU	3(R1), R7
 69 | 	// ADD	$4, R1
 70 | 	// SUB	$128, R4
 71 | 	// SUB	$128, R5
 72 | 	// SUB	$128, R6
 73 | 	// SUB	$128, R7
 74 | 
 75 | 	// This is faster on Raspberry Pi but slower on Udoo Quad (which uses NEON anyway)
 76 | 	MOVW (R1), R4
 77 | 	ADD  $4, R1
 78 | 	WORD $0xe6544ff8 // usub8 r4, r4, r8
 79 | 	WORD $0xe6af5474 // sxtb r5, r4, ror #8
 80 | 	WORD $0xe6af6874 // sxtb r6, r4, ror #16
 81 | 	WORD $0xe6af7c74 // sxtb r7, r4, ror #24
 82 | 	WORD $0xe6af4074 // sxtb r4, r4
 83 | 
 84 | 	WORD $0xec454a1e // vmov s28, s29, r4, r5
 85 | 	WORD $0xec476a1f // vmov s30, s31, r6, r7
 86 | 	WORD $0xeeb80ace // vcvt.f32.s32 s0, s28
 87 | 	WORD $0xeef80aee // vcvt.f32.s32 s1, s29
 88 | 	WORD $0xeeb81acf // vcvt.f32.s32 s2, s30
 89 | 	WORD $0xeef81aef // vcvt.f32.s32 s3, s31
 90 | 
 91 | 	WORD $0xeca30a04 // vstmia r3!, {s0, s1, s2, s3}
 92 | 	CMP  R0, R1
 93 | 	BLT  ui8tof32_loop
 94 | 
 95 | 	B ui8tof32_tail
 96 | 
 97 | 	////////////// Neon ////////////
 98 | 
 99 | ui8tof32_neon:
100 | 	MOVW $128, R0
101 | 	WORD $0xeee00b10 // vdup.8 q0, r0
102 | 
103 | 	AND $(~(16*4-1)), R2, R4
104 | 	ADD R1, R2
105 | 	TEQ $0, R4
106 | 	BEQ ui8tof32_tail
107 | 	ADD R1, R4
108 | 
109 | ui8tof32_neon_loop:
110 | 	WORD $0xf461c28d // vld1.32 {d28, d29, d30, d31}, [r1]!
111 | 
112 | 	// WORD	$0xf461c2bd // vld1.32 {d28, d29, d30, d31}, [r1:256]!
113 | 	WORD $0xf3cc4280 // vsubl.u8 q10, d28, d0
114 | 	WORD $0xf3cd6280 // vsubl.u8 q11, d29, d0
115 | 	WORD $0xf3ce8280 // vsubl.u8 q12, d30, d0
116 | 	WORD $0xf3cfa280 // vsubl.u8 q13, d31, d0
117 | 	WORD $0xf2902a34 // vmovl.s16 q1, d20
118 | 	WORD $0xf2904a35 // vmovl.s16 q2, d21
119 | 	WORD $0xf2906a36 // vmovl.s16 q3, d22
120 | 	WORD $0xf2908a37 // vmovl.s16 q4, d23
121 | 	WORD $0xf290aa38 // vmovl.s16 q5, d24
122 | 	WORD $0xf290ca39 // vmovl.s16 q6, d25
123 | 	WORD $0xf290ea3a // vmovl.s16 q7, d26
124 | 	WORD $0xf2d00a3b // vmovl.s16 q8, d27
125 | 	WORD $0xf3bb2642 // vcvt.f32.s32 q1, q1
126 | 	WORD $0xf3bb4644 // vcvt.f32.s32 q2, q2
127 | 	WORD $0xf3bb6646 // vcvt.f32.s32 q3, q3
128 | 	WORD $0xf3bb8648 // vcvt.f32.s32 q4, q4
129 | 	WORD $0xf403228d // vst1.32 {d2, d3, d4, d5}, [r3]!
130 | 	WORD $0xf3bba64a // vcvt.f32.s32 q5, q5
131 | 	WORD $0xf3bbc64c // vcvt.f32.s32 q6, q6
132 | 	WORD $0xf403628d // vst1.32 {d6, d7, d8, d9}, [r3]!
133 | 	WORD $0xf3bbe64e // vcvt.f32.s32 q7, q7
134 | 	WORD $0xf3fb0660 // vcvt.f32.s32 q8, q8
135 | 	WORD $0xf403a28d // vst1.32 {d10, d11, d12, d13}, [r3]!
136 | 	WORD $0xf403e28d // vst1.32 {d14, d15, d16, d17}, [r3]!
137 | 
138 | 	CMP R4, R1
139 | 	BLT ui8tof32_neon_loop
140 | 
141 | ui8tof32_tail:
142 | 	CMP R1, R2
143 | 	BEQ ui8tof32_done
144 | 
145 | ui8tof32_tail_loop:
146 | 	MOVBU 0(R1), R4
147 | 	SUB   $128, R4
148 | 	MOVWF R4, F0
149 | 	ADD   $1, R1
150 | 	WORD  $0xeca30a01        // vstmia     r3!, {s0}
151 | 	CMP   R2, R1
152 | 	BLT   ui8tof32_tail_loop
153 | 
154 | ui8tof32_done:
155 | 	RET
156 | 
157 | // TODO
158 | TEXT ·I8tof32(SB), NOSPLIT, $0
159 | 	B ·i8tof32(SB)
160 | 
161 | TEXT ·Ui8toc64(SB), NOSPLIT, $0
162 | 	MOVW input_len+4(FP), R2
163 | 	AND  $(~1), R2
164 | 	MOVW R2, input_len+4(FP)
165 | 	MOVW output_len+16(FP), R0
166 | 	MOVW R0<<1, R0
167 | 	MOVW R0, output_len+16(FP)
168 | 	B    ·Ui8tof32(SB)
169 | 
170 | TEXT ·F32toi16(SB), NOSPLIT, $0
171 | 	MOVW input+0(FP), R1
172 | 	MOVW input_len+4(FP), R2
173 | 	MOVW output+12(FP), R3
174 | 	MOVW output_len+16(FP), R0
175 | 	MOVF scale+24(FP), F0
176 | 
177 | 	// Choose the shortest length
178 | 	CMP     R2, R0
179 | 	MOVW.LT R0, R2
180 | 
181 | 	// If no input then we are done
182 | 	TEQ $0, R2
183 | 	BEQ f32toi16_done
184 | 
185 | 	MOVW R2, R7
186 | 	ADD  R2<<2, R1, R2
187 | 
188 | 	// R1 = input
189 | 	// R2 = end of output
190 | 	// R3 = output
191 | 	// R7 = count
192 | 
193 | 	MOVBU ·UseVector+0(SB), R0
194 | 	TEQ   $0, R0
195 | 	BNE   f32toi16_vector
196 | 
197 | 	//////////////// VFP Scalar /////////////
198 | 
199 | 	AND $(~3), R7
200 | 	TEQ $0, R7
201 | 	BEQ f32toi16_tail
202 | 	ADD R7<<2, R1, R7 // R7 = end of output truncated to block size
203 | 
204 | f32toi16_scalar_loop:
205 | 	WORD $0xecb11a04 // vldmia r1!, {s2, s3, s4, s5}
206 | 	WORD $0xee211a00 // vmul.f32 s2, s2, s0
207 | 	WORD $0xee611a80 // vmul.f32 s3, s3, s0
208 | 	WORD $0xee222a00 // vmul.f32 s4, s4, s0
209 | 	WORD $0xee622a80 // vmul.f32 s5, s5, s0
210 | 	WORD $0xeebd1ac1 // vcvt.s32.f32 s2, s2
211 | 	WORD $0xeefd1ae1 // vcvt.s32.f32 s3, s3
212 | 	WORD $0xeebd2ac2 // vcvt.s32.f32 s4, s4
213 | 	WORD $0xeefd2ae2 // vcvt.s32.f32 s5, s5
214 | 	WORD $0xec540a11 // vmov r0, r4, s2, s3
215 | 	MOVH R0, 0(R3)
216 | 	MOVH R4, 2(R3)
217 | 	WORD $0xec5b8a12 // vmov r8, r11, s4, s5
218 | 	MOVH R8, 4(R3)
219 | 	MOVH R11, 6(R3)
220 | 	ADD  $8, R3
221 | 
222 | 	CMP R7, R1
223 | 	BLT f32toi16_scalar_loop
224 | 
225 | 	B f32toi16_tail
226 | 
227 | 	///////////// VFP Vector //////////////
228 | 
229 | f32toi16_vector:
230 | 	AND $(~7), R7
231 | 	TEQ $0, R7
232 | 	BEQ f32toi16_tail
233 | 	ADD R7<<2, R1, R7 // R7 = end of output truncated to block size
234 | 
235 | 	PLD (R1)
236 | 	PLD 64(R1)
237 | 	PLD (2*64)(R1)
238 | 	PLD (3*64)(R1)
239 | 
240 | 	// Set vector length to 8
241 | 	WORD $0xeef10a10            // vmrs r0, fpscr
242 | 	BIC  $((7<<16)|(3<<20)), R0
243 | 	ORR  $((7<<16)|(0<<20)), R0
244 | 	WORD $0xeee10a10            // fmxr fpscr, r0
245 | 
246 | f32toi16_vector_loop:
247 | 	PLD  (4*64)(R1)
248 | 	WORD $0xecb14a08 // vldmia r1!, {s8-s15}
249 | 	WORD $0xee244a00 // vmul.f32 s8, s8, s0
250 | 	WORD $0xeebd4ac4 // vcvt.s32.f32 s8, s8
251 | 	WORD $0xeefd4ae4 // vcvt.s32.f32 s9, s9
252 | 	WORD $0xeebd5ac5 // vcvt.s32.f32 s10, s10
253 | 	WORD $0xeefd5ae5 // vcvt.s32.f32 s11, s11
254 | 	WORD $0xec540a14 // vmov r0, r4, s8, s9
255 | 	WORD $0xec5b8a15 // vmov r8, r11, s10, s11
256 | 	MOVH R0, 0(R3)
257 | 	MOVH R4, 2(R3)
258 | 	MOVH R8, 4(R3)
259 | 	MOVH R11, 6(R3)
260 | 	WORD $0xeebd6ac6 // vcvt.s32.f32 s12, s12
261 | 	WORD $0xeefd6ae6 // vcvt.s32.f32 s13, s13
262 | 	WORD $0xeebd7ac7 // vcvt.s32.f32 s14, s14
263 | 	WORD $0xeefd7ae7 // vcvt.s32.f32 s15, s15
264 | 	WORD $0xec540a16 // vmov r0, r4, s12, s13
265 | 	WORD $0xec5b8a17 // vmov r8, r11, s14, s15
266 | 	MOVH R0, 8(R3)
267 | 	MOVH R4, 10(R3)
268 | 	MOVH R8, 12(R3)
269 | 	MOVH R11, 14(R3)
270 | 	ADD  $16, R3
271 | 
272 | 	CMP R7, R1
273 | 	BLT f32toi16_vector_loop
274 | 
275 | 	// Clear vector mode
276 | 	WORD $0xeef10a10            // vmrs r0, fpscr
277 | 	BIC  $((7<<16)|(3<<20)), R0
278 | 	WORD $0xeee10a10            // fmxr fpscr, r0
279 | 
280 | f32toi16_tail:
281 | 	CMP R1, R2
282 | 	BEQ f32toi16_done
283 | 
284 | f32toi16_tail_loop:
285 | 	MOVF  0(R1), F1
286 | 	ADD   $4, R1
287 | 	MULF  F0, F1
288 | 	MOVFW F1, R0
289 | 	MOVHU R0, (R3)
290 | 	ADD   $2, R3
291 | 	CMP   R2, R1
292 | 	BLT   f32toi16_tail_loop
293 | 
294 | f32toi16_done:
295 | 	RET
296 | 
297 | // TODO: detect endianess and use non-native order writes on big-endian
298 | TEXT ·F32toi16ble(SB), NOSPLIT, $0
299 | 	MOVW output_len+16(FP), R0
300 | 	MOVW R0>>1, R0
301 | 	MOVW R0, output_len+16(FP)
302 | 	B    ·F32toi16(SB)
303 | 
304 | TEXT ·I16bleToF64(SB), NOSPLIT, $0
305 | 	B ·i16bleToF64(SB)
306 | 
307 | TEXT ·I16bleToF32(SB), NOSPLIT, $0
308 | 	B ·i16bleToF32(SB)
309 | 


--------------------------------------------------------------------------------
/dsp/conversion_arm64.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | TEXT ·Ui8toi16(SB), NOSPLIT, $0
  4 |     B ·ui8toi16(SB)
  5 | 
  6 | TEXT ·Ui8toi16b(SB), NOSPLIT, $0
  7 | 	B ·ui8toi16b(SB)
  8 | 
  9 | TEXT ·Ui8tof32(SB), NOSPLIT, $0
 10 | 	MOVD	input(FP), R0
 11 | 	MOVD	input_len+8(FP), R1
 12 | 	MOVD	output+24(FP), R2
 13 | 	MOVD	output_len+32(FP), R3
 14 | 
 15 |     CMP     R3, R1
 16 |     BLT     ui8tof32_min_len
 17 |     MOVD    R3, R1
 18 | ui8tof32_min_len:
 19 | 
 20 | #define BLOCK_SIZE 32
 21 | 
 22 | 	MOVW	$0x80, R3
 23 | 	WORD	$0x4e010c60 // dup v0.16b, w3
 24 | 
 25 |     CMP     $BLOCK_SIZE, R1
 26 |     BLT     ui8tof32_scalar
 27 | 
 28 | ui8tof32_simd_loop:
 29 |     WORD    $0xad400801 // ldp q1, q2, [x0]
 30 |     ADD     $BLOCK_SIZE, R0
 31 | 	WORD	$0x6e208421 // sub v1.16b, v1.16b, v0.16b
 32 | 	WORD	$0x6e208442 // sub v2.16b, v2.16b, v0.16b
 33 | 	WORD    $0xf9804001 // prfm pldl1strm, [x0, 128]
 34 | 	WORD	$0x0f08a42a // sxtl v10.8h, v1.8b
 35 | 	WORD	$0x4f08a42b // sxtl2 v11.8h, v1.16b
 36 | 	WORD	$0x0f08a44c // sxtl v12.8h, v2.8b
 37 | 	WORD	$0x4f08a44d // sxtl2 v13.8h, v2.16b
 38 | 	WORD	$0x0f10a554 // sxtl v20.4s, v10.4h
 39 | 	WORD	$0x4f10a555 // sxtl2 v21.4s, v10.8h
 40 | 	WORD	$0x0f10a576 // sxtl v22.4s, v11.4h
 41 | 	WORD	$0x4f10a577 // sxtl2 v23.4s, v11.8h
 42 | 	WORD	$0x0f10a598 // sxtl v24.4s, v12.4h
 43 | 	WORD	$0x4f10a599 // sxtl2 v25.4s, v12.8h
 44 | 	WORD	$0x0f10a5ba // sxtl v26.4s, v13.4h
 45 | 	WORD	$0x4f10a5bb // sxtl2 v27.4s, v13.8h
 46 | 	//WORD	$0x4e21da81 // scvtf v1.4s, v20.4s
 47 | 	//WORD	$0x4e21daa2 // scvtf v2.4s, v21.4s
 48 | 	WORD	$0x4e21da89 // scvtf v9.4s, v20.4s
 49 | 	WORD	$0x4e21daaa // scvtf v10.4s, v21.4s
 50 | 	WORD	$0x4e21dac3 // scvtf v3.4s, v22.4s
 51 | 	WORD	$0x4e21dae4 // scvtf v4.4s, v23.4s
 52 |     //WORD    $0xad000841 // stp q1, q2, [x2]
 53 | 	WORD    $0xad002849 // stp q9, q10, [x2]
 54 | 	WORD	$0x4e21db05 // scvtf v5.4s, v24.4s
 55 | 	WORD	$0x4e21db26 // scvtf v6.4s, v25.4s
 56 |     WORD    $0xad011043 // stp q3, q4, [x2,32]
 57 | 	WORD	$0x4e21db47 // scvtf v7.4s, v26.4s
 58 | 	WORD	$0x4e21db68 // scvtf v8.4s, v27.4s
 59 | 	WORD    $0xad021845 // stp q5, q6, [x2,64]
 60 | 	//WORD    $0xad032047 // stp q7, q8, [x2,96]
 61 |     ADD     $(BLOCK_SIZE*4), R2
 62 |     SUB     $BLOCK_SIZE, R1
 63 | 	WORD    $0xad3f2047 // stp q7, q8, [x2,-32]
 64 |     CMP     $BLOCK_SIZE, R1
 65 |     BGE     ui8tof32_simd_loop
 66 | 
 67 | ui8tof32_scalar:
 68 |     CMP     ZR, R1
 69 |     BEQ     ui8tof32_done
 70 | 
 71 | ui8tof32_scalar_loop:
 72 | 	MOVBU	(R0), R5
 73 | 	SUB		$128, R5, R5
 74 | 	SCVTFS	R5, F0
 75 | 	FMOVS	F0, (R2)
 76 | 	ADD		$1, R0
 77 | 	ADD		$4, R2
 78 |     SUBS    $1, R1
 79 | 	BNE     ui8tof32_scalar_loop
 80 | ui8tof32_done:
 81 | 	RET
 82 | 
 83 | TEXT ·I8tof32(SB), NOSPLIT, $0
 84 | 	B ·i8tof32(SB)
 85 | 
 86 | TEXT ·Ui8toc64(SB), NOSPLIT, $0
 87 | 	B ·ui8toc64(SB)
 88 | 
 89 | TEXT ·F32toi16(SB), NOSPLIT, $0
 90 |     B ·f32toi16(SB)
 91 | 
 92 | TEXT ·F32toi16ble(SB), NOSPLIT, $0
 93 | 	B ·f32toi16ble(SB)
 94 | 
 95 | TEXT ·I16bleToF64(SB), NOSPLIT, $0
 96 | 	B ·i16bleToF64(SB)
 97 | 
 98 | TEXT ·I16bleToF32(SB), NOSPLIT, $0
 99 | 	B ·i16bleToF32(SB)
100 | 


--------------------------------------------------------------------------------
/dsp/conversion_avo_amd64.go:
--------------------------------------------------------------------------------
  1 | //go:build ignore
  2 | // +build ignore
  3 | 
  4 | package main
  5 | 
  6 | import (
  7 | 	. "github.com/mmcloughlin/avo/build"
  8 | 	. "github.com/mmcloughlin/avo/operand"
  9 | 	. "github.com/mmcloughlin/avo/reg"
 10 | )
 11 | 
 12 | func main() {
 13 | 	TEXT("Ui8tof32", NOSPLIT, "func(input []byte, output []float32)")
 14 | 	Doc("Ui8tof32 converts unsigned 8-bit samples to 32-bit float.")
 15 | 	inputPtr := Load(Param("input").Base(), GP64())
 16 | 	inputLen := Load(Param("input").Len(), GP64())
 17 | 	outputPtr := Load(Param("output").Base(), GP64())
 18 | 	outputLen := Load(Param("output").Len(), GP64())
 19 | 
 20 | 	Comment("Pick shortest length")
 21 | 	CMPQ(outputLen, inputLen)
 22 | 	JGE(LabelRef("ui8tof32_min_len"))
 23 | 	MOVQ(outputLen, inputLen)
 24 | 	Label("ui8tof32_min_len")
 25 | 
 26 | 	index := GP64()
 27 | 	MOVQ(U64(0), index)
 28 | 
 29 | 	t64 := GP64()
 30 | 
 31 | 	Comment("If input is too short to optimize (less than 32 bytes) then single step")
 32 | 	MOVQ(U64(32), t64)
 33 | 	CMPQ(t64, inputLen)
 34 | 	JGE(LabelRef("ui8tof32_stepper"))
 35 | 
 36 | 	Comment("Align output to 16-byte boundary")
 37 | 	MOVQ(outputPtr, t64)
 38 | 	ANDQ(Imm(0xf), t64)
 39 | 	SHRQ(Imm(2), t64) // divide by 4 to convert bytes to 32-bit blocks
 40 | 	JZ(LabelRef("ui8tof32_aligned"))
 41 | 
 42 | 	t2 := GP64()
 43 | 	MOVQ(U64(4), t2)
 44 | 	SUBQ(t64, t2)
 45 | 	ui8tof32Step(inputPtr, outputPtr, index, t2, "ui8tof32_align")
 46 | 
 47 | 	Label("ui8tof32_aligned")
 48 | 	n := GP64()
 49 | 	MOVQ(inputLen, n)
 50 | 	ANDQ(U32(^uint32(15)), n)
 51 | 	CMPQ(index, n)
 52 | 	JGE(LabelRef("ui8tof32_stepper"))
 53 | 
 54 | 	// CMPB(NewDataAddr(Symbol{Name: "·x86+const_offsetX86HasSSE41"}, 0), Imm(1))
 55 | 	CMPB(NewDataAddr(Symbol{Name: "·useSSE4"}, 0), Imm(1))
 56 | 	JNE(LabelRef("ui8tof32_nosse4"))
 57 | 
 58 | 	ui8tof32SSE4(inputPtr, outputPtr, index, n)
 59 | 
 60 | 	JMP(LabelRef("ui8tof32_stepper"))
 61 | 
 62 | 	Label("ui8tof32_nosse4")
 63 | 
 64 | 	ui8tof32SSE2(inputPtr, outputPtr, index, n)
 65 | 
 66 | 	Comment("TODO: work increasingly smaller blocks")
 67 | 
 68 | 	Label("ui8tof32_stepper")
 69 | 	CMPQ(index, inputLen)
 70 | 	JGE(LabelRef("ui8tof32_done"))
 71 | 
 72 | 	ui8tof32Step(inputPtr, outputPtr, index, inputLen, "ui8tof32_step")
 73 | 
 74 | 	Label("ui8tof32_done")
 75 | 	RET()
 76 | 
 77 | 	Generate()
 78 | }
 79 | 
 80 | func ui8tof32Step(inputPtr, outputPtr, index, maxIndex Register, label string) {
 81 | 	Label(label)
 82 | 	x0 := XMM()
 83 | 	t64 := GP64()
 84 | 	MOVBQZX(Mem{Base: inputPtr}, t64)
 85 | 	INCQ(inputPtr)
 86 | 	SUBQ(Imm(128), t64)
 87 | 	CVTSQ2SS(t64, x0)
 88 | 	MOVSS(x0, Mem{Base: outputPtr})
 89 | 	ADDQ(Imm(4), outputPtr)
 90 | 	INCQ(index)
 91 | 	CMPQ(index, maxIndex)
 92 | 	JLT(LabelRef(label))
 93 | }
 94 | 
 95 | func ui8tof32SSE4(inputPtr, outputPtr, index, maxIndex Register) {
 96 | 	t32 := GP32()
 97 | 	x0 := XMM()
 98 | 	x1 := XMM()
 99 | 	toSub := XMM()
100 | 
101 | 	MOVL(U32(0x80808080), t32)
102 | 	MOVD(t32, toSub)
103 | 	PSHUFL(Imm(0), toSub, toSub)
104 | 
105 | 	Label("ui8tof32_sse4_loop")
106 | 	Comment("Load 16 unsigned 8-bit values")
107 | 	MOVOU(Mem{Base: inputPtr}, x0)
108 | 	Comment("Make the values signed")
109 | 	PSUBB(toSub, x0)
110 | 
111 | 	Comment("Lowest 4 values (bytes 0-3)")
112 | 	PMOVSXBD(x0, x1)
113 | 	Comment("Convert 32-bit signed integers to 32-bit float")
114 | 	CVTPL2PS(x1, x1)
115 | 	MOVAPS(x1, Mem{Base: outputPtr})
116 | 
117 | 	Comment("Next 4 values (bytes 4-7)")
118 | 	PSHUFL(Imm(1), x0, x1)
119 | 	PMOVSXBD(x1, x1)
120 | 	Comment("Convert 32-bit signed integers to 32-bit float")
121 | 	CVTPL2PS(x1, x1)
122 | 	MOVAPS(x1, Mem{Base: outputPtr, Disp: 16})
123 | 
124 | 	Comment("Next 4 values (bytes 8-11)")
125 | 	PSHUFL(Imm(2), x0, x1)
126 | 	PMOVSXBD(x1, x1)
127 | 	Comment("Convert 32-bit signed integers to 32-bit float")
128 | 	CVTPL2PS(x1, x1)
129 | 	MOVAPS(x1, Mem{Base: outputPtr, Disp: 32})
130 | 
131 | 	Comment("Next 4 values (bytes 12-15)")
132 | 	PSHUFL(Imm(3), x0, x1)
133 | 	PMOVSXBD(x1, x1)
134 | 	Comment("Convert 32-bit signed integers to 32-bit float")
135 | 	CVTPL2PS(x1, x1)
136 | 	MOVAPS(x1, Mem{Base: outputPtr, Disp: 48})
137 | 
138 | 	ADDQ(Imm(16), index)
139 | 	ADDQ(Imm(16), inputPtr)
140 | 	ADDQ(Imm(64), outputPtr)
141 | 	CMPQ(index, maxIndex)
142 | 	JLT(LabelRef("ui8tof32_sse4_loop"))
143 | }
144 | 
145 | func ui8tof32SSE2(inputPtr, outputPtr, index, maxIndex Register) {
146 | 	t32 := GP32()
147 | 	x0 := XMM()
148 | 	x1 := XMM()
149 | 	x2 := XMM()
150 | 	toSub := XMM()
151 | 
152 | 	MOVL(U32(0x80808080), t32)
153 | 	MOVD(t32, toSub)
154 | 	PSHUFL(Imm(0), toSub, toSub)
155 | 
156 | 	Label("ui8tof32_sse2_loop")
157 | 	Comment("Load 16 unsigned 8-bit values")
158 | 	MOVOU(Mem{Base: inputPtr}, x0)
159 | 	Comment("Make the values signed")
160 | 	PSUBB(toSub, x0)
161 | 	MOVO(x0, x1)
162 | 
163 | 	Comment("Lowest 4 values (bytes 0-3)")
164 | 	PUNPCKLBW(x1, x1)
165 | 	MOVO(x1, x2)
166 | 	PUNPCKLWL(x1, x1)
167 | 	PSRAL(Imm(24), x1)
168 | 	CVTPL2PS(x1, x1)
169 | 	MOVAPS(x1, Mem{Base: outputPtr})
170 | 
171 | 	Comment("Next 4 values (bytes 4-7)")
172 | 	PUNPCKHWL(x2, x2)
173 | 	PSRAL(Imm(24), x2)
174 | 	CVTPL2PS(x2, x2)
175 | 	MOVAPS(x2, Mem{Base: outputPtr, Disp: 16})
176 | 
177 | 	Comment("Next 4 values (bytes 8-11)")
178 | 	PUNPCKHBW(x0, x0)
179 | 	MOVO(x0, x2)
180 | 	PUNPCKLWL(x0, x0)
181 | 	PSRAL(Imm(24), x0)
182 | 	CVTPL2PS(x0, x0)
183 | 	MOVAPS(x0, Mem{Base: outputPtr, Disp: 32})
184 | 
185 | 	Comment("Next 4 values (bytes 12-15)")
186 | 	PUNPCKHWL(x2, x2)
187 | 	PSRAL(Imm(24), x2)
188 | 	CVTPL2PS(x2, x2)
189 | 	MOVAPS(x2, Mem{Base: outputPtr, Disp: 48})
190 | 
191 | 	ADDQ(Imm(16), index)
192 | 	ADDQ(Imm(16), inputPtr)
193 | 	ADDQ(Imm(64), outputPtr)
194 | 	CMPQ(index, maxIndex)
195 | 	JLT(LabelRef("ui8tof32_sse2_loop"))
196 | }
197 | 


--------------------------------------------------------------------------------
/dsp/conversion_avo_amd64.s:
--------------------------------------------------------------------------------
  1 | // Code generated by command: go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go. DO NOT EDIT.
  2 | 
  3 | #include "textflag.h"
  4 | 
  5 | // func Ui8tof32(input []byte, output []float32)
  6 | // Requires: SSE, SSE2, SSE4.1
  7 | TEXT ·Ui8tof32(SB), NOSPLIT, $0-48
  8 | 	MOVQ input_base+0(FP), AX
  9 | 	MOVQ input_len+8(FP), CX
 10 | 	MOVQ output_base+24(FP), DX
 11 | 	MOVQ output_len+32(FP), BX
 12 | 
 13 | 	// Pick shortest length
 14 | 	CMPQ BX, CX
 15 | 	JGE  ui8tof32_min_len
 16 | 	MOVQ BX, CX
 17 | 
 18 | ui8tof32_min_len:
 19 | 	MOVQ $0x0000000000000000, BX
 20 | 
 21 | 	// If input is too short to optimize (less than 32 bytes) then single step
 22 | 	MOVQ $0x0000000000000020, SI
 23 | 	CMPQ SI, CX
 24 | 	JGE  ui8tof32_stepper
 25 | 
 26 | 	// Align output to 16-byte boundary
 27 | 	MOVQ DX, SI
 28 | 	ANDQ $0x0f, SI
 29 | 	SHRQ $0x02, SI
 30 | 	JZ   ui8tof32_aligned
 31 | 	MOVQ $0x0000000000000004, DI
 32 | 	SUBQ SI, DI
 33 | 
 34 | ui8tof32_align:
 35 | 	MOVBQZX  (AX), SI
 36 | 	INCQ     AX
 37 | 	SUBQ     $0x80, SI
 38 | 	CVTSQ2SS SI, X0
 39 | 	MOVSS    X0, (DX)
 40 | 	ADDQ     $0x04, DX
 41 | 	INCQ     BX
 42 | 	CMPQ     BX, DI
 43 | 	JLT      ui8tof32_align
 44 | 
 45 | ui8tof32_aligned:
 46 | 	MOVQ   CX, SI
 47 | 	ANDQ   $0xfffffff0, SI
 48 | 	CMPQ   BX, SI
 49 | 	JGE    ui8tof32_stepper
 50 | 	CMPB   ·useSSE4+0(SB), $0x01
 51 | 	JNE    ui8tof32_nosse4
 52 | 	MOVL   $0x80808080, DI
 53 | 	MOVD   DI, X3
 54 | 	PSHUFL $0x00, X3, X3
 55 | 
 56 | ui8tof32_sse4_loop:
 57 | 	// Load 16 unsigned 8-bit values
 58 | 	MOVOU (AX), X0
 59 | 
 60 | 	// Make the values signed
 61 | 	PSUBB X3, X0
 62 | 
 63 | 	// Lowest 4 values (bytes 0-3)
 64 | 	PMOVSXBD X0, X2
 65 | 
 66 | 	// Convert 32-bit signed integers to 32-bit float
 67 | 	CVTPL2PS X2, X2
 68 | 	MOVAPS   X2, (DX)
 69 | 
 70 | 	// Next 4 values (bytes 4-7)
 71 | 	PSHUFL   $0x01, X0, X2
 72 | 	PMOVSXBD X2, X2
 73 | 
 74 | 	// Convert 32-bit signed integers to 32-bit float
 75 | 	CVTPL2PS X2, X2
 76 | 	MOVAPS   X2, 16(DX)
 77 | 
 78 | 	// Next 4 values (bytes 8-11)
 79 | 	PSHUFL   $0x02, X0, X2
 80 | 	PMOVSXBD X2, X2
 81 | 
 82 | 	// Convert 32-bit signed integers to 32-bit float
 83 | 	CVTPL2PS X2, X2
 84 | 	MOVAPS   X2, 32(DX)
 85 | 
 86 | 	// Next 4 values (bytes 12-15)
 87 | 	PSHUFL   $0x03, X0, X2
 88 | 	PMOVSXBD X2, X2
 89 | 
 90 | 	// Convert 32-bit signed integers to 32-bit float
 91 | 	CVTPL2PS X2, X2
 92 | 	MOVAPS   X2, 48(DX)
 93 | 	ADDQ     $0x10, BX
 94 | 	ADDQ     $0x10, AX
 95 | 	ADDQ     $0x40, DX
 96 | 	CMPQ     BX, SI
 97 | 	JLT      ui8tof32_sse4_loop
 98 | 	JMP      ui8tof32_stepper
 99 | 
100 | ui8tof32_nosse4:
101 | 	MOVL   $0x80808080, DI
102 | 	MOVD   DI, X4
103 | 	PSHUFL $0x00, X4, X4
104 | 
105 | ui8tof32_sse2_loop:
106 | 	// Load 16 unsigned 8-bit values
107 | 	MOVOU (AX), X0
108 | 
109 | 	// Make the values signed
110 | 	PSUBB X4, X0
111 | 	MOVO  X0, X2
112 | 
113 | 	// Lowest 4 values (bytes 0-3)
114 | 	PUNPCKLBW X2, X2
115 | 	MOVO      X2, X3
116 | 	PUNPCKLWL X2, X2
117 | 	PSRAL     $0x18, X2
118 | 	CVTPL2PS  X2, X2
119 | 	MOVAPS    X2, (DX)
120 | 
121 | 	// Next 4 values (bytes 4-7)
122 | 	PUNPCKHWL X3, X3
123 | 	PSRAL     $0x18, X3
124 | 	CVTPL2PS  X3, X3
125 | 	MOVAPS    X3, 16(DX)
126 | 
127 | 	// Next 4 values (bytes 8-11)
128 | 	PUNPCKHBW X0, X0
129 | 	MOVO      X0, X3
130 | 	PUNPCKLWL X0, X0
131 | 	PSRAL     $0x18, X0
132 | 	CVTPL2PS  X0, X0
133 | 	MOVAPS    X0, 32(DX)
134 | 
135 | 	// Next 4 values (bytes 12-15)
136 | 	PUNPCKHWL X3, X3
137 | 	PSRAL     $0x18, X3
138 | 	CVTPL2PS  X3, X3
139 | 	MOVAPS    X3, 48(DX)
140 | 	ADDQ      $0x10, BX
141 | 	ADDQ      $0x10, AX
142 | 	ADDQ      $0x40, DX
143 | 	CMPQ      BX, SI
144 | 	JLT       ui8tof32_sse2_loop
145 | 
146 | 	// TODO: work increasingly smaller blocks
147 | ui8tof32_stepper:
148 | 	CMPQ BX, CX
149 | 	JGE  ui8tof32_done
150 | 
151 | ui8tof32_step:
152 | 	MOVBQZX  (AX), SI
153 | 	INCQ     AX
154 | 	SUBQ     $0x80, SI
155 | 	CVTSQ2SS SI, X1
156 | 	MOVSS    X1, (DX)
157 | 	ADDQ     $0x04, DX
158 | 	INCQ     BX
159 | 	CMPQ     BX, CX
160 | 	JLT      ui8tof32_step
161 | 
162 | ui8tof32_done:
163 | 	RET
164 | 


--------------------------------------------------------------------------------
/dsp/cpu_amd64.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"github.com/samuel/go-dsp/dsp/internal/cpu"
 5 | )
 6 | 
 7 | var (
 8 | 	useSSE4 bool
 9 | 	useAVX2 bool
10 | 	useSSE2 bool
11 | )
12 | 
13 | func init() {
14 | 	useSSE4 = cpu.X86.HasSSE41
15 | 	useAVX2 = cpu.X86.HasAVX
16 | 	useSSE2 = cpu.X86.HasSSE2
17 | }
18 | 


--------------------------------------------------------------------------------
/dsp/cpu_arm.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"io"
 5 | 	"io/ioutil"
 6 | 	"log"
 7 | 	"os"
 8 | 	"regexp"
 9 | 	"strconv"
10 | )
11 | 
12 | var (
13 | 	// HaveNEON is true if ARM NEON SIMD instructions are available
14 | 	HaveNEON bool
15 | 	// UseVector is true if VFP vector ops should be used
16 | 	UseVector bool
17 | )
18 | 
19 | var (
20 | 	// neonRE matches /proc/cpuinfo if the neon instruction set is available
21 | 	neonRE = regexp.MustCompile(`(?m)^Features.*(neon|asimd).*$`)
22 | 	// rpi1RE matches /proc/cpuinfo for Raspberry Pi 1
23 | 	rpi1RE = regexp.MustCompile(`(?m)^Hardware.*BCM2708.*$`)
24 | )
25 | 
26 | func init() {
27 | 	// ARM doesn't expose CPU info to userland so it's necessary to
28 | 	// get the information from the kernel.
29 | 	// Ref: Cortex-A Series Programmer's Guide Section 20.1.7 Detecting NEON
30 | 
31 | 	f, err := os.Open("/proc/cpuinfo")
32 | 	if err != nil {
33 | 		return
34 | 	}
35 | 	defer f.Close()
36 | 
37 | 	b, err := ioutil.ReadAll(io.LimitReader(f, 2048))
38 | 	if err != nil {
39 | 		log.Printf("Failed to read cpuinfo: %s", err.Error())
40 | 		return
41 | 	}
42 | 
43 | 	HaveNEON = neonRE.Match(b)
44 | 	// Vector ops are considerably slower on more recent ARM (ARM8, ARM9).
45 | 	// These generally have NEON anyway. Only enable vfp vector use for
46 | 	// Raspberry Pi 1 to be safe.
47 | 	UseVector = !HaveNEON && rpi1RE.Match(b)
48 | 	if b, err := strconv.ParseBool(os.Getenv("ARMVECTOR")); err == nil {
49 | 		UseVector = b
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/dsp/cpu_arm64.go:
--------------------------------------------------------------------------------
1 | package dsp
2 | 
3 | var HaveNEON = true
4 | 


--------------------------------------------------------------------------------
/dsp/cpu_arm64_test.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import "testing"
 4 | 
 5 | func simdTest(t *testing.T, fn func(t *testing.T)) {
 6 | 	if HaveNEON {
 7 | 		t.Run("neon", fn)
 8 | 		HaveNEON = false
 9 | 		t.Run("noneon", fn)
10 | 		HaveNEON = true
11 | 	} else {
12 | 		t.Run("neon", func(t *testing.T) { t.Skip("NEON not available") })
13 | 		t.Run("noneon", fn)
14 | 	}
15 | }
16 | 


--------------------------------------------------------------------------------
/dsp/cpu_arm_test.go:
--------------------------------------------------------------------------------
 1 | //go:build arm
 2 | // +build arm
 3 | 
 4 | package dsp
 5 | 
 6 | import "testing"
 7 | 
 8 | func simdTest(t *testing.T, fn func(t *testing.T)) {
 9 | 	if HaveNEON {
10 | 		t.Run("neon", fn)
11 | 		HaveNEON = false
12 | 		t.Run("noneon", fn)
13 | 		HaveNEON = true
14 | 	} else {
15 | 		t.Run("neon", func(t *testing.T) { t.Skip("NEON not available") })
16 | 		t.Run("noneon", fn)
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/dsp/cpu_x86_test.go:
--------------------------------------------------------------------------------
 1 | //go:build 386 || amd64
 2 | // +build 386 amd64
 3 | 
 4 | package dsp
 5 | 
 6 | import (
 7 | 	"testing"
 8 | )
 9 | 
10 | func simdTest(t *testing.T, fn func(t *testing.T)) {
11 | 	if useSSE4 {
12 | 		t.Run("sse4", fn)
13 | 		useSSE4 = false
14 | 		t.Run("nosse4", fn)
15 | 		useSSE4 = true
16 | 	} else {
17 | 		t.Run("sse4", func(t *testing.T) { t.Skip("sse4 not available") })
18 | 		t.Run("nosse4", fn)
19 | 	}
20 | }
21 | 


--------------------------------------------------------------------------------
/dsp/demod.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import "math/cmplx"
 4 | 
 5 | // PolarDiscriminator returns the phase angle between two complex vectors
 6 | // equivalent to arg(a * conj(b)). The returned angle is in the range [-Pi, Pi].
 7 | func PolarDiscriminator(a, b complex128) float64 {
 8 | 	return cmplx.Phase(a * cmplx.Conj(b))
 9 | }
10 | 
11 | // PolarDiscriminator32 returns the phase angle between two complex vectors
12 | // equivalent to arg(a * conj(b)). The returned angle is in the range [-Pi, Pi].
13 | func PolarDiscriminator32(a, b complex64) float32 {
14 | 	return FastAtan2(imag(a)*real(b)-real(a)*imag(b), real(a)*real(b)+imag(a)*imag(b))
15 | }
16 | 
17 | // FMDemodFilter is an FM demodulator filter using a polar disciminator.
18 | //
19 | // 	x(n)─────▶○───────────────────▶(X)──────────────────▶arctan──▶
20 | // 	          │                     ▲  y(n)=x(n)x*(n-1)
21 | // 	          │   ┌───┐     ┌───┐   │
22 | // 	          └──▶│z⁻¹├────▶│z^*├───┘
23 | // 	              └───┘     └───┘
24 | type FMDemodFilter struct {
25 | 	pre complex64
26 | }
27 | 
28 | func (fi *FMDemodFilter) Demodulate(input []complex64, output []float32) int {
29 | 	return fmDemodulateAsm(fi, input, output)
30 | }
31 | 
32 | func fmDemodulateAsm(fi *FMDemodFilter, input []complex64, output []float32) int
33 | 
34 | func fmDemodulate(fi *FMDemodFilter, input []complex64, output []float32) int {
35 | 	pre := fi.pre
36 | 	for i, inp := range input {
37 | 		// output[i] = PolarDiscriminator32(inp, pre)
38 | 		output[i] = FastAtan2(imag(inp)*real(pre)-real(inp)*imag(pre), real(inp)*real(pre)+imag(inp)*imag(pre))
39 | 		pre = inp
40 | 	}
41 | 	fi.pre = pre
42 | 	return len(input)
43 | }
44 | 


--------------------------------------------------------------------------------
/dsp/demod_386.s:
--------------------------------------------------------------------------------
1 | TEXT ·fmDemodulateAsm(SB), 7, $0
2 | 	JMP ·fmDemodulate(SB)
3 | 


--------------------------------------------------------------------------------
/dsp/demod_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0
4 | 	JMP ·fmDemodulate(SB)
5 | 


--------------------------------------------------------------------------------
/dsp/demod_arm.s:
--------------------------------------------------------------------------------
 1 | #include "textflag.h"
 2 | 
 3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0
 4 | 	MOVW input+4(FP), R1
 5 | 	MOVW input_len+8(FP), R2
 6 | 	MOVW output+16(FP), R3
 7 | 	MOVW output_len+20(FP), R4
 8 | 
 9 | 	// Choose the shortest length
10 | 	CMP     R2, R4
11 | 	MOVW.LT R4, R2
12 | 
13 | 	// If no input then skip loop
14 | 	TEQ $0, R2
15 | 	BEQ fmDemod_done
16 | 
17 | 	MOVW fi+0(FP), R0
18 | 	MOVF 0(R0), F5    // real(pre)
19 | 	MOVF 4(R0), F1    // imag(pre)
20 | 
21 | fmDemod_loop:
22 | 	MOVF 0(R1), F2 // real(inp)
23 | 	MOVF 4(R1), F3 // imag(inp)
24 | 	ADD  $8, R1
25 | 
26 | 	MULF F3, F5, F6 // imag(inp)*real(pre)
27 | 	MULF F2, F1, F0 // real(inp)*imag(pre)
28 | 	MULF F2, F5, F4 // real(inp)*real(pre)
29 | 	MULF F3, F1, F7 // imag(inp)*imag(pre)
30 | 	SUBF F0, F6
31 | 	ADDF F7, F4
32 | 
33 | 	MOVF F2, F5 // real(pre) = real(inp)
34 | 	MOVF F3, F1 // imag(pre) = imag(inp)
35 | 
36 | 	// FastAtan2(y=F6, x=F4)
37 | 
38 | 	ABSF F6, F2
39 | 	MOVF $1e-20, F0
40 | 	ADDF F0, F2
41 | 	WORD $0xeeb54ac0            // vcmpe.f32 s8, #0x0
42 | 	WORD $0xeef1fa10            // vmrs APSR_nzcv, fpscr
43 | 	BEQ  fmDemod_atan_zero_x
44 | 	BGT  fmDemod_atan_pos_x
45 | 	ADDF F2, F4, F7             // x + abs(y)
46 | 	SUBF F4, F2, F4             // abs(y) - x
47 | 	MOVF $2.356194496154785, F3 // pi * 3/4
48 | 	B    fmDemod_atan_1
49 | 
50 | fmDemod_atan_pos_x:
51 | 	SUBF F2, F4, F7              // x - abs(y)
52 | 	ADDF F2, F4, F4              // abs(y) + x
53 | 	MOVF $0.7853981852531433, F3 // pi * 1/4
54 | 
55 | fmDemod_atan_1:
56 | 	DIVF F4, F7, F2
57 | 	MOVF $0.1963, F7
58 | 	MULF F2, F7
59 | 	MULF F2, F7
60 | 	MOVF $0.9817, F0
61 | 	SUBF F0, F7
62 | 	MULF F2, F7
63 | 	ADDF F3, F7
64 | 	WORD $0xeeb56ac0       // vcmpe.f32 s12, #0x0
65 | 	WORD $0xeef1fa10       // vmrs APSR_nzcv, fpscr
66 | 	WORD $0xbeb17a47       // vneglt.f32 s14, s14
67 | 	MOVF F7, 0(R3)
68 | 	B    fmDemod_atan_done
69 | 
70 | fmDemod_atan_zero_x:
71 | 	WORD    $0xeeb56ac0                                              // vcmpe.f32 s12, #0x0
72 | 	WORD    $0xeef1fa10                                              // vmrs APSR_nzcv, fpscr
73 | 	MOVF.LT $-1.570796326794896557998981734272092580795288085938, F6
74 | 	MOVF.GT $1.570796326794896557998981734272092580795288085938, F6
75 | 	MOVF    F6, 0(R3)
76 | 
77 | fmDemod_atan_done:
78 | 
79 | 	//
80 | 
81 | 	ADD $4, R3
82 | 
83 | 	SUB $1, R2
84 | 	TEQ $0, R2
85 | 	BNE fmDemod_loop
86 | 
87 | 	MOVF F5, 0(R0) // real(pre)
88 | 	MOVF F1, 4(R0) // imag(pre)
89 | 
90 | fmDemod_done:
91 | 	MOVW input_len+8(FP), R0
92 | 	MOVW R0, output_len+28(FP)
93 | 	RET
94 | 


--------------------------------------------------------------------------------
/dsp/demod_arm64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·fmDemodulateAsm(SB), NOSPLIT, $0
4 |     B ·fmDemodulate(SB)
5 | 


--------------------------------------------------------------------------------
/dsp/demod_test.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"math/rand"
 5 | 	"testing"
 6 | )
 7 | 
 8 | var demodBenchSamples []complex64
 9 | 
10 | func init() {
11 | 	r := rand.New(rand.NewSource(0))
12 | 	demodBenchSamples = make([]complex64, benchSize)
13 | 	for i := 0; i < benchSize; i++ {
14 | 		demodBenchSamples[i] = complex(r.Float32(), r.Float32())
15 | 	}
16 | }
17 | 
18 | func TestFMDemodulation(t *testing.T) {
19 | 	filter := &FMDemodFilter{}
20 | 	input := []complex64{complex(0.0, 2.0), complex(1.0, 2.0), complex(-3.0, 7.0), complex(4.0, -9.0)}
21 | 	output := make([]float32, len(input))
22 | 	filter.pre = 0.0
23 | 	if n := fmDemodulateAsm(filter, input, output); n != len(input) {
24 | 		t.Fatalf("Expected n %d instead of %d", len(input), n)
25 | 	}
26 | 	expected := make([]float32, len(input))
27 | 	filter.pre = 0.0
28 | 	if n := fmDemodulate(filter, input, expected); n != len(input) {
29 | 		t.Fatalf("Expected n %d instead of %d", len(input), n)
30 | 	}
31 | 	if len(output) != len(expected) {
32 | 		t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
33 | 	}
34 | 	for i := 0; i < len(output); i++ {
35 | 		if output[i] != expected[i] {
36 | 			t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
37 | 		}
38 | 	}
39 | }
40 | 
41 | // func TestPolarDiscriminator32(t *testing.T) {
42 | // 	for i := 0; i < 1000; i++ {
43 | // 		x := complex(rand.Float32()-0.5, rand.Float32()-0.5)
44 | // 		y := complex(rand.Float32()-0.5, rand.Float32()-0.5)
45 | // 		expected := polarDiscriminator32(x, y)
46 | // 		output := PolarDiscriminator32(x, y)
47 | // 		if expected != output {
48 | // 			t.Fatalf("Output differs: %f != %f", output, expected)
49 | // 		}
50 | // 	}
51 | // }
52 | 
53 | func BenchmarkPolarDiscriminator32(b *testing.B) {
54 | 	x := complex(float32(1), float32(2))
55 | 	y := complex(float32(-3), float32(9))
56 | 	for i := 0; i < b.N; i++ {
57 | 		_ = PolarDiscriminator32(x, y)
58 | 	}
59 | }
60 | 
61 | func BenchmarkFMDemodulation(b *testing.B) {
62 | 	filter := &FMDemodFilter{}
63 | 	output := make([]float32, benchSize)
64 | 	b.SetBytes(benchSize)
65 | 	b.ResetTimer()
66 | 	for i := 0; i < b.N; i++ {
67 | 		_ = fmDemodulateAsm(filter, demodBenchSamples, output)
68 | 	}
69 | }
70 | 
71 | func BenchmarkFMDemodulation_Go(b *testing.B) {
72 | 	filter := &FMDemodFilter{}
73 | 	output := make([]float32, benchSize)
74 | 	b.SetBytes(benchSize)
75 | 	b.ResetTimer()
76 | 	for i := 0; i < b.N; i++ {
77 | 		_ = fmDemodulate(filter, demodBenchSamples, output)
78 | 	}
79 | }
80 | 


--------------------------------------------------------------------------------
/dsp/downsample.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | type LowPassDownsampleComplexFilter struct {
 4 | 	Downsample int
 5 | 
 6 | 	now       complex64
 7 | 	prevIndex int
 8 | }
 9 | 
10 | func (fi *LowPassDownsampleComplexFilter) Filter(samples []complex64) []complex64 {
11 | 	return lowPassDownsampleComplexFilterAsm(fi, samples)
12 | }
13 | 
14 | func lowPassDownsampleComplexFilterAsm(fi *LowPassDownsampleComplexFilter, samples []complex64) []complex64
15 | 
16 | func lowPassDownsampleComplexFilter(fi *LowPassDownsampleComplexFilter, samples []complex64) []complex64 {
17 | 	i2 := 0
18 | 	// outputScale := 1.0 / float32(fi.Downsample)
19 | 	for _, v := range samples {
20 | 		fi.now += v
21 | 		fi.prevIndex++
22 | 		if fi.prevIndex < fi.Downsample {
23 | 			continue
24 | 		}
25 | 		samples[i2] = fi.now // complex(real(fi.now)*outputScale, imag(fi.now)*outputScale)
26 | 		fi.prevIndex = 0
27 | 		fi.now = 0
28 | 		i2++
29 | 	}
30 | 	return samples[:i2]
31 | }
32 | 
33 | type LowPassDownsampleRationalFilter struct {
34 | 	Fast, Slow int
35 | 
36 | 	sum       float32
37 | 	prevIndex int
38 | }
39 | 
40 | func (fi *LowPassDownsampleRationalFilter) Filter(samples []float32) []float32 {
41 | 	return lowPassDownsampleRationalFilterAsm(fi, samples)
42 | }
43 | 
44 | func lowPassDownsampleRationalFilterAsm(fi *LowPassDownsampleRationalFilter, samples []float32) []float32
45 | 
46 | func lowPassDownsampleRationalFilter(fi *LowPassDownsampleRationalFilter, samples []float32) []float32 {
47 | 	i2 := 0
48 | 	fastSlowRatio := float32(fi.Slow) / float32(fi.Fast)
49 | 	for _, v := range samples {
50 | 		fi.sum += v
51 | 		fi.prevIndex += fi.Slow
52 | 		if fi.prevIndex < fi.Fast {
53 | 			continue
54 | 		}
55 | 		samples[i2] = fi.sum * fastSlowRatio
56 | 		i2++
57 | 		fi.prevIndex -= fi.Fast
58 | 		fi.sum = 0.0
59 | 	}
60 | 	return samples[:i2]
61 | }
62 | 


--------------------------------------------------------------------------------
/dsp/downsample_386.s:
--------------------------------------------------------------------------------
1 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), 7, $0
2 | 	JMP ·lowPassDownsampleComplexFilter(SB)
3 | 
4 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), 7, $0
5 | 	JMP ·lowPassDownsampleRationalFilter(SB)
6 | 


--------------------------------------------------------------------------------
/dsp/downsample_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0
4 | 	JMP ·lowPassDownsampleComplexFilter(SB)
5 | 
6 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0
7 | 	JMP ·lowPassDownsampleRationalFilter(SB)
8 | 


--------------------------------------------------------------------------------
/dsp/downsample_arm.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0
  4 | 	MOVW fi+0(FP), R3
  5 | 	MOVW 0(R3), R8              // fi.Downsample
  6 | 	MOVW 12(R3), R7             // fi.prevIndex
  7 | 	MOVW samples_len+8(FP), R2
  8 | 	MOVW samples_data+4(FP), R5 // input
  9 | 	MOVW R5, R6                 // output
 10 | 	MOVF 4(R3), F0              // real(fi.now)
 11 | 	MOVF 8(R3), F1              // imag(fi.now)
 12 | 	B    complexLoopStart
 13 | 
 14 | complexLoop:
 15 | 	SUB $1, R2
 16 | 
 17 | complexLoopStart:
 18 | 	TEQ $0, R2
 19 | 	BEQ complexLoopEnd
 20 | 
 21 | 	// samples[i]
 22 | 	MOVF 0(R5), F2 // real
 23 | 	MOVF 4(R5), F3 // imag
 24 | 	ADD  $8, R5
 25 | 
 26 | 	// fi.now += samples[i]
 27 | 	ADDF F2, F0
 28 | 	ADDF F3, F1
 29 | 
 30 | 	// fi.prevIndex++
 31 | 	ADD $1, R7
 32 | 
 33 | 	// if prevIndex < downsample: continue
 34 | 	CMP R8, R7
 35 | 	BLT complexLoop
 36 | 
 37 | 	// samples[i2] = fi.now
 38 | 	MOVF F0, 0(R6)
 39 | 	MOVF F1, 4(R6)
 40 | 	ADD  $8, R6
 41 | 
 42 | 	// fi.prevIndex = 0
 43 | 	MOVW $0, R7
 44 | 
 45 | 	// fi.now = 0.0
 46 | 	MOVF $0.0, F0
 47 | 	MOVF $0.0, F1
 48 | 
 49 | 	B complexLoop
 50 | 
 51 | complexLoopEnd:
 52 | 	MOVW R7, 12(R3) // fi.prevIndex
 53 | 	MOVF F0, 4(R3)  // real(fi.now)
 54 | 	MOVF F1, 8(R3)  // imag(fi.now)
 55 | 
 56 | 	MOVW samples_data+4(FP), R0
 57 | 	SUB  R0, R6
 58 | 	MOVW R6>>3, R6
 59 | 	MOVW R6, ret_len+20(FP)
 60 | 	MOVW samples_cap+12(FP), R4
 61 | 	MOVW R4, ret_cap+24(FP)
 62 | 	MOVW samples_data+4(FP), R0
 63 | 	MOVW R0, ret_data+16(FP)
 64 | 	RET
 65 | 
 66 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0
 67 | 	MOVW fi+0(FP), R4 // fi
 68 | 
 69 | 	MOVW  4(R4), R7 // fi.Slow
 70 | 	MOVW  R7, F4
 71 | 	MOVWF F4, F4
 72 | 
 73 | 	MOVW  0(R4), R8 // fi.Fast
 74 | 	MOVW  R8, F3
 75 | 	MOVWF F3, F3
 76 | 
 77 | 	DIVF F3, F4 // fi.Slow / fi.Fast
 78 | 
 79 | 	MOVF 8(R4), F3  // fi.sum
 80 | 	MOVW 12(R4), R2 // fi.prevIndex
 81 | 
 82 | 	MOVW samples_ptr+4(FP), R5 // input
 83 | 	MOVW R5, R6                // output
 84 | 	MOVW samples_len+8(FP), R3
 85 | 	ADD  R3<<2, R5, R3         // end of input
 86 | 
 87 | rationalLoop:
 88 | 	CMP R5, R3
 89 | 	BLE rationalLoopEnd
 90 | 
 91 | 	MOVF (R5), F0 // samples[i]
 92 | 	ADD  $4, R5
 93 | 
 94 | 	ADDF F0, F3 // fi.sum += samples[i]
 95 | 	ADD  R7, R2 // fi.prevIndex += fi.Slow
 96 | 
 97 | 	CMP R8, R2
 98 | 	BLT rationalLoop
 99 | 
100 | 	MULF F4, F3 // fi.sum * (Slow/Fast)
101 | 
102 | 	MOVF F3, (R6)
103 | 	ADD  $4, R6
104 | 
105 | 	SUB  R8, R2   // fi.prevIndex -= fi.Fast
106 | 	MOVF $0.0, F3 // fi.sum = 0.0
107 | 
108 | 	B rationalLoop
109 | 
110 | rationalLoopEnd:
111 | 	MOVW R2, 12(R4) // fi.prevIndex
112 | 	MOVF F3, 8(R4)  // fi.sum
113 | 
114 | 	MOVW samples_ptr+4(FP), R0
115 | 	SUB  R0, R6
116 | 	MOVW R6>>2, R6
117 | 	MOVW R6, res_len+20(FP)
118 | 	MOVW samples_cap+12(FP), R4
119 | 	MOVW R4, res_cap+24(FP)
120 | 	MOVW samples_ptr+4(FP), R0
121 | 	MOVW R0, res_ptr+16(FP)
122 | 	RET
123 | 


--------------------------------------------------------------------------------
/dsp/downsample_arm64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·lowPassDownsampleComplexFilterAsm(SB), NOSPLIT, $0
4 |     B ·lowPassDownsampleComplexFilter(SB)
5 | 
6 | TEXT ·lowPassDownsampleRationalFilterAsm(SB), NOSPLIT, $0
7 |     B ·lowPassDownsampleRationalFilter(SB)
8 | 


--------------------------------------------------------------------------------
/dsp/downsample_test.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestLowPassDownsampleComplexFilter(t *testing.T) {
  6 | 	filter := &LowPassDownsampleComplexFilter{Downsample: 2}
  7 | 	input := []complex64{complex(0.0, 2.0), complex(1.0, 2.0), complex(-3.0, 7.0), complex(4.0, -9.0)}
  8 | 
  9 | 	output := make([]complex64, 256)
 10 | 	copy(output, input)
 11 | 	filter.now = 0.0
 12 | 	filter.prevIndex = 0
 13 | 	output = lowPassDownsampleComplexFilterAsm(filter, output)
 14 | 
 15 | 	expected := make([]complex64, 256)
 16 | 	copy(expected, input)
 17 | 	filter.now = 0.0
 18 | 	filter.prevIndex = 0
 19 | 	expected = lowPassDownsampleComplexFilter(filter, expected)
 20 | 
 21 | 	if len(output) != len(expected) {
 22 | 		t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
 23 | 	}
 24 | 	for i := 0; i < len(output); i++ {
 25 | 		if output[i] != expected[i] {
 26 | 			t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
 27 | 		}
 28 | 	}
 29 | }
 30 | 
 31 | func TestLowPassDownsampleRationalFilter(t *testing.T) {
 32 | 	filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2}
 33 | 	input := make([]float32, 256)
 34 | 	for i := 0; i < len(input); i++ {
 35 | 		input[i] = float32(i - 128)
 36 | 	}
 37 | 
 38 | 	output := make([]float32, 256)
 39 | 	copy(output, input)
 40 | 	filter.prevIndex = 0
 41 | 	filter.sum = 0.0
 42 | 	output = lowPassDownsampleRationalFilterAsm(filter, output)
 43 | 
 44 | 	expected := make([]float32, 256)
 45 | 	copy(expected, input)
 46 | 	filter.prevIndex = 0
 47 | 	filter.sum = 0.0
 48 | 	expected = lowPassDownsampleRationalFilter(filter, expected)
 49 | 
 50 | 	if len(output) != len(expected) {
 51 | 		t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected)
 52 | 	}
 53 | 	for i := 0; i < len(output); i++ {
 54 | 		if output[i] != expected[i] {
 55 | 			t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected)
 56 | 		}
 57 | 	}
 58 | }
 59 | 
 60 | func BenchmarkLowPassDownsampleComplexFilter(b *testing.B) {
 61 | 	filter := &LowPassDownsampleComplexFilter{Downsample: 2}
 62 | 	input := make([]complex64, 256)
 63 | 	for i := 0; i < 256; i++ {
 64 | 		input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0))
 65 | 	}
 66 | 	for i := 0; i < b.N; i++ {
 67 | 		_ = lowPassDownsampleComplexFilterAsm(filter, input)
 68 | 	}
 69 | }
 70 | 
 71 | func BenchmarkLowPassDownsampleComplexFilter_Go(b *testing.B) {
 72 | 	filter := &LowPassDownsampleComplexFilter{Downsample: 2}
 73 | 	input := make([]complex64, 256)
 74 | 	for i := 0; i < 256; i++ {
 75 | 		input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0))
 76 | 	}
 77 | 	for i := 0; i < b.N; i++ {
 78 | 		_ = lowPassDownsampleComplexFilter(filter, input)
 79 | 	}
 80 | }
 81 | 
 82 | func BenchmarkLowPassDownsampleRationalFilter(b *testing.B) {
 83 | 	filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2}
 84 | 	input := make([]float32, 256)
 85 | 	for i := 0; i < 256; i++ {
 86 | 		input[i] = float32(i) - 128.0
 87 | 	}
 88 | 	for i := 0; i < b.N; i++ {
 89 | 		_ = lowPassDownsampleRationalFilterAsm(filter, input)
 90 | 	}
 91 | }
 92 | 
 93 | func BenchmarkLowPassDownsampleRationalFilter_Go(b *testing.B) {
 94 | 	filter := &LowPassDownsampleRationalFilter{Fast: 3, Slow: 2}
 95 | 	input := make([]float32, 256)
 96 | 	for i := 0; i < 256; i++ {
 97 | 		input[i] = float32(i) - 128.0
 98 | 	}
 99 | 	for i := 0; i < b.N; i++ {
100 | 		_ = lowPassDownsampleRationalFilter(filter, input)
101 | 	}
102 | }
103 | 


--------------------------------------------------------------------------------
/dsp/dtmf/dtmf.go:
--------------------------------------------------------------------------------
 1 | package dtmf
 2 | 
 3 | import (
 4 | 	"github.com/samuel/go-dsp/dsp"
 5 | )
 6 | 
 7 | var (
 8 | 	Keypad = []rune{
 9 | 		'1', '2', '3', 'A',
10 | 		'4', '5', '6', 'B',
11 | 		'7', '8', '9', 'C',
12 | 		'*', '0', '#', 'D',
13 | 	}
14 | 	StdLowFreq  = []uint64{697, 770, 852, 941}
15 | 	StdHighFreq = []uint64{1209, 1336, 1477, 1633}
16 | )
17 | 
18 | type DTMF struct {
19 | 	lowFreq   *dsp.Goertzel32
20 | 	highFreq  *dsp.Goertzel32
21 | 	nHigh     int
22 | 	blockSize int
23 | 	w         []float32
24 | }
25 | 
26 | func New(lowFreq, highFreq []uint64, sampleRate, blockSize int, windowFunc func([]float32)) *DTMF {
27 | 	w := make([]float32, blockSize)
28 | 	if windowFunc != nil {
29 | 		windowFunc(w)
30 | 	} else {
31 | 		dsp.HammingWindowF32(w)
32 | 	}
33 | 	return &DTMF{
34 | 		lowFreq:   dsp.NewGoertzel32(lowFreq, sampleRate, blockSize),
35 | 		highFreq:  dsp.NewGoertzel32(highFreq, sampleRate, blockSize),
36 | 		nHigh:     len(highFreq),
37 | 		blockSize: blockSize,
38 | 		w:         w,
39 | 	}
40 | }
41 | 
42 | func NewStandard(sampleRate, blockSize int) *DTMF {
43 | 	return New(StdLowFreq, StdHighFreq, sampleRate, blockSize, dsp.HammingWindowF32)
44 | }
45 | 
46 | // Return key number (lowFreqIndex * numHighFreq + highFreqIndex) and minimum magnitude
47 | func (d *DTMF) Feed(samples []float32) (int, float32) {
48 | 	if len(samples) > d.blockSize {
49 | 		samples = samples[:d.blockSize]
50 | 	}
51 | 	for i, s := range samples {
52 | 		samples[i] = s * d.w[i]
53 | 	}
54 | 	d.lowFreq.Reset()
55 | 	d.highFreq.Reset()
56 | 	d.lowFreq.Feed(samples)
57 | 	d.highFreq.Feed(samples)
58 | 	row, thresh1 := max(d.lowFreq.Magnitude())
59 | 	col, thresh2 := max(d.highFreq.Magnitude())
60 | 	if thresh2 < thresh1 {
61 | 		thresh1 = thresh2
62 | 	}
63 | 	return row*d.nHigh + col, thresh1
64 | }
65 | 
66 | func max(val []float32) (int, float32) {
67 | 	lrg := float32(0.0)
68 | 	idx := 0
69 | 	for i, f := range val {
70 | 		if f > lrg {
71 | 			lrg = f
72 | 			idx = i
73 | 		}
74 | 	}
75 | 	return idx, lrg
76 | }
77 | 


--------------------------------------------------------------------------------
/dsp/filter.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | type IIRFilter struct {
  4 | 	bCoef, aCoef []float64
  5 | 	pIn, pOut    []float64
  6 | }
  7 | 
  8 | type ComplexIIRFilter32 struct {
  9 | 	bCoef, aCoef []complex64
 10 | 	pIn, pOut    []complex64
 11 | }
 12 | 
 13 | type ComplexIIRFilter struct {
 14 | 	bCoef, aCoef []complex128
 15 | 	pIn, pOut    []complex128
 16 | }
 17 | 
 18 | func NewIIRFilter(bCoef, aCoef []float64) *IIRFilter {
 19 | 	if len(bCoef) != len(aCoef) || len(bCoef) == 0 {
 20 | 		panic("IIR filter must have len(b)==len(a) and len(b) > 0")
 21 | 	}
 22 | 	for i, c := range bCoef {
 23 | 		bCoef[i] = c / aCoef[0]
 24 | 	}
 25 | 	for i, c := range aCoef[1:] {
 26 | 		aCoef[i+1] = c / aCoef[0]
 27 | 	}
 28 | 	return &IIRFilter{
 29 | 		bCoef: bCoef,
 30 | 		aCoef: aCoef,
 31 | 		pIn:   make([]float64, len(bCoef)-1),
 32 | 		pOut:  make([]float64, len(bCoef)-1),
 33 | 	}
 34 | }
 35 | 
 36 | func NewComplexIIRFilter32(bCoef, aCoef []float32) *ComplexIIRFilter32 {
 37 | 	if len(bCoef) != len(aCoef) || len(bCoef) == 0 {
 38 | 		panic("IIR filter must have len(b)==len(a) and len(b) > 0")
 39 | 	}
 40 | 	for i, c := range bCoef {
 41 | 		bCoef[i] = c / aCoef[0]
 42 | 	}
 43 | 	for i, c := range aCoef[1:] {
 44 | 		aCoef[i+1] = c / aCoef[0]
 45 | 	}
 46 | 	return &ComplexIIRFilter32{
 47 | 		bCoef: rtoc32(bCoef),
 48 | 		aCoef: rtoc32(aCoef),
 49 | 		pIn:   make([]complex64, len(bCoef)-1),
 50 | 		pOut:  make([]complex64, len(bCoef)-1),
 51 | 	}
 52 | }
 53 | 
 54 | func NewComplexIIRFilter(bCoef, aCoef []float64) *ComplexIIRFilter {
 55 | 	if len(bCoef) != len(aCoef) || len(bCoef) == 0 {
 56 | 		panic("IIR filter must have len(b)==len(a) and len(b) > 0")
 57 | 	}
 58 | 	for i, c := range bCoef {
 59 | 		bCoef[i] = c / aCoef[0]
 60 | 	}
 61 | 	for i, c := range aCoef[1:] {
 62 | 		aCoef[i+1] = c / aCoef[0]
 63 | 	}
 64 | 	return &ComplexIIRFilter{
 65 | 		bCoef: rtoc(bCoef),
 66 | 		aCoef: rtoc(aCoef),
 67 | 		pIn:   make([]complex128, len(bCoef)-1),
 68 | 		pOut:  make([]complex128, len(bCoef)-1),
 69 | 	}
 70 | }
 71 | 
 72 | func (f *IIRFilter) Filter(input, output []float64) {
 73 | 	for i, s := range input {
 74 | 		sum := f.bCoef[0] * s
 75 | 		for j, p := range f.pIn {
 76 | 			sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j]
 77 | 		}
 78 | 		for i := len(f.pIn) - 1; i > 0; i-- {
 79 | 			f.pIn[i] = f.pIn[i-1]
 80 | 			f.pOut[i] = f.pOut[i-1]
 81 | 		}
 82 | 		f.pIn[0] = s
 83 | 		f.pOut[0] = sum
 84 | 		output[i] = sum
 85 | 	}
 86 | }
 87 | 
 88 | func (f *ComplexIIRFilter32) Filter(input, output []complex64) {
 89 | 	for i, s := range input {
 90 | 		sum := f.bCoef[0] * s
 91 | 		for j, p := range f.pIn {
 92 | 			sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j]
 93 | 		}
 94 | 		for i := len(f.pIn) - 1; i > 0; i-- {
 95 | 			f.pIn[i] = f.pIn[i-1]
 96 | 			f.pOut[i] = f.pOut[i-1]
 97 | 		}
 98 | 		f.pIn[0] = s
 99 | 		f.pOut[0] = sum
100 | 		output[i] = sum
101 | 	}
102 | }
103 | 
104 | func (f *ComplexIIRFilter) Filter(input, output []complex128) {
105 | 	for i, s := range input {
106 | 		sum := f.bCoef[0] * s
107 | 		for j, p := range f.pIn {
108 | 			sum += f.bCoef[j+1]*p - f.aCoef[j+1]*f.pOut[j]
109 | 		}
110 | 		for i := len(f.pIn) - 1; i > 0; i-- {
111 | 			f.pIn[i] = f.pIn[i-1]
112 | 			f.pOut[i] = f.pOut[i-1]
113 | 		}
114 | 		f.pIn[0] = s
115 | 		f.pOut[0] = sum
116 | 		output[i] = sum
117 | 	}
118 | }
119 | 
120 | type DCFilter struct {
121 | 	a float64
122 | 	w float64
123 | }
124 | 
125 | func NewDCFilter(a float64) *DCFilter {
126 | 	return &DCFilter{a: a}
127 | }
128 | 
129 | func (f *DCFilter) Filter(input, output []float64) {
130 | 	lw := f.w
131 | 	for i, x := range input {
132 | 		w := x + f.a*lw
133 | 		output[i] = w - lw
134 | 		lw = w
135 | 	}
136 | 	f.w = lw
137 | }
138 | 
139 | func (f *DCFilter) FilterOne(x float64) float64 {
140 | 	w := x + f.a*f.w
141 | 	y := w - f.w
142 | 	f.w = w
143 | 	return y
144 | }
145 | 
146 | type DCFilter32 struct {
147 | 	a float32
148 | 	w float32
149 | }
150 | 
151 | func NewDCFilter32(a float32) *DCFilter32 {
152 | 	return &DCFilter32{a: a}
153 | }
154 | 
155 | func (f *DCFilter32) Filter(input, output []float32) {
156 | 	lw := f.w
157 | 	for i, x := range input {
158 | 		w := x + f.a*lw
159 | 		output[i] = w - lw
160 | 		lw = w
161 | 	}
162 | 	f.w = lw
163 | }
164 | 
165 | func (f *DCFilter32) FilterOne(x float32) float32 {
166 | 	w := x + f.a*f.w
167 | 	y := w - f.w
168 | 	f.w = w
169 | 	return y
170 | }
171 | 
172 | // TODO: implement https://www.researchgate.net/publication/261775781_DC_Blocker_Algorithms -- https://www.dsprelated.com/showarticle/58.php
173 | // https://github.com/gnuradio/gnuradio/blob/master/gr-filter/include/gnuradio/filter/dc_blocker_ff.h
174 | // https://github.com/ghostop14/gr-correctiq
175 | 


--------------------------------------------------------------------------------
/dsp/fuzz.go:
--------------------------------------------------------------------------------
 1 | // +build gofuzz
 2 | 
 3 | package dsp
 4 | 
 5 | func Fuzz(data []byte) int {
 6 | 	data = data[:len(data)/2*2]
 7 | 	output := make([]float32, len(data)/2)
 8 | 	expected := make([]float32, len(data)/2)
 9 | 	I16bleToF32(data, output, 2.0)
10 | 	i16bleToF32(data, expected, 2.0)
11 | 	for i, v := range expected {
12 | 		if output[i] != v {
13 | 			return 0
14 | 		}
15 | 	}
16 | 	return 1
17 | }
18 | 


--------------------------------------------------------------------------------
/dsp/goertzel.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import "math"
  4 | 
  5 | type Goertzel struct {
  6 | 	freq []*goertzel
  7 | 	mag  []float64
  8 | 	cplx []complex128
  9 | }
 10 | 
 11 | type Goertzel32 struct {
 12 | 	freq []*goertzel
 13 | 	mag  []float32
 14 | 	cplx []complex64
 15 | }
 16 | 
 17 | type ComplexGoertzel struct {
 18 | 	freq []*goertzel
 19 | 	mag  []float64
 20 | 	cplx []complex128
 21 | }
 22 | 
 23 | type goertzel struct {
 24 | 	coeff    float64
 25 | 	cos, sin float64
 26 | 	q1, q2   float64
 27 | 	q1i, q2i float64
 28 | }
 29 | 
 30 | func NewGoertzel32(targetFreqs []uint64, sampleRate, blockSize int) *Goertzel32 {
 31 | 	freq := make([]*goertzel, len(targetFreqs))
 32 | 	for i, f := range targetFreqs {
 33 | 		// k is the closest bucket for the frequency
 34 | 		k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate))
 35 | 		w := 2.0 * math.Pi * float64(k) / float64(blockSize)
 36 | 		sin := math.Sin(w)
 37 | 		cos := math.Cos(w)
 38 | 		freq[i] = &goertzel{
 39 | 			coeff: 2.0 * cos,
 40 | 			cos:   cos,
 41 | 			sin:   sin,
 42 | 		}
 43 | 	}
 44 | 	return &Goertzel32{
 45 | 		freq: freq,
 46 | 		mag:  make([]float32, len(targetFreqs)),
 47 | 		cplx: make([]complex64, len(targetFreqs)),
 48 | 	}
 49 | }
 50 | 
 51 | func (g *Goertzel32) Reset() {
 52 | 	for _, freq := range g.freq {
 53 | 		freq.q1 = 0.0
 54 | 		freq.q2 = 0.0
 55 | 	}
 56 | }
 57 | 
 58 | func (g *Goertzel32) Feed(samples []float32) {
 59 | 	for _, samp := range samples {
 60 | 		for _, freq := range g.freq {
 61 | 			q0 := freq.coeff*freq.q1 - freq.q2 + float64(samp)
 62 | 			freq.q2 = freq.q1
 63 | 			freq.q1 = q0
 64 | 		}
 65 | 	}
 66 | }
 67 | 
 68 | func (g *Goertzel32) Magnitude() []float32 {
 69 | 	for i, freq := range g.freq {
 70 | 		g.mag[i] = float32(freq.q1*freq.q1 + freq.q2*freq.q2 - freq.q1*freq.q2*freq.coeff)
 71 | 	}
 72 | 	return g.mag
 73 | }
 74 | 
 75 | func (g *Goertzel32) Complex() []complex64 {
 76 | 	for i, freq := range g.freq {
 77 | 		g.cplx[i] = complex(float32(freq.q1*freq.cos-freq.q2), float32(freq.q1*freq.sin))
 78 | 	}
 79 | 	return g.cplx
 80 | }
 81 | 
 82 | func NewGoertzel(targetFreqs []uint64, sampleRate, blockSize int) *Goertzel {
 83 | 	freq := make([]*goertzel, len(targetFreqs))
 84 | 	for i, f := range targetFreqs {
 85 | 		// k is the closest bucket for the frequency
 86 | 		k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate))
 87 | 		w := 2.0 * math.Pi * float64(k) / float64(blockSize)
 88 | 		sin := math.Sin(w)
 89 | 		cos := math.Cos(w)
 90 | 		freq[i] = &goertzel{
 91 | 			coeff: 2.0 * cos,
 92 | 			cos:   cos,
 93 | 			sin:   sin,
 94 | 		}
 95 | 	}
 96 | 	return &Goertzel{
 97 | 		freq: freq,
 98 | 		mag:  make([]float64, len(targetFreqs)),
 99 | 		cplx: make([]complex128, len(targetFreqs)),
100 | 	}
101 | }
102 | 
103 | func (g *Goertzel) Reset() {
104 | 	for _, freq := range g.freq {
105 | 		freq.q1 = 0.0
106 | 		freq.q2 = 0.0
107 | 	}
108 | }
109 | 
110 | func (g *Goertzel) Feed(samples []float64) {
111 | 	for _, samp := range samples {
112 | 		for _, freq := range g.freq {
113 | 			q0 := freq.coeff*freq.q1 - freq.q2 + samp
114 | 			freq.q2 = freq.q1
115 | 			freq.q1 = q0
116 | 		}
117 | 	}
118 | }
119 | 
120 | func (g *Goertzel) Magnitude() []float64 {
121 | 	for i, freq := range g.freq {
122 | 		g.mag[i] = freq.q1*freq.q1 + freq.q2*freq.q2 - freq.q1*freq.q2*freq.coeff
123 | 	}
124 | 	return g.mag
125 | }
126 | 
127 | func (g *Goertzel) Complex() []complex128 {
128 | 	for i, freq := range g.freq {
129 | 		g.cplx[i] = complex(freq.q1*freq.cos-freq.q2, freq.q1*freq.sin)
130 | 	}
131 | 	return g.cplx
132 | }
133 | 
134 | func NewComplexGoertzel(targetFreqs []uint64, sampleRate, blockSize int) *ComplexGoertzel {
135 | 	freq := make([]*goertzel, len(targetFreqs))
136 | 	for i, f := range targetFreqs {
137 | 		k := uint64(0.5 + float64(uint64(blockSize)*f)/float64(sampleRate))
138 | 		w := 2.0 * math.Pi * float64(k) / float64(blockSize)
139 | 		sin := math.Sin(w)
140 | 		cos := math.Cos(w)
141 | 		freq[i] = &goertzel{
142 | 			coeff: 2.0 * cos,
143 | 			cos:   cos,
144 | 			sin:   sin,
145 | 		}
146 | 	}
147 | 	return &ComplexGoertzel{
148 | 		freq: freq,
149 | 		mag:  make([]float64, len(targetFreqs)),
150 | 		cplx: make([]complex128, len(targetFreqs)),
151 | 	}
152 | }
153 | 
154 | func (g *ComplexGoertzel) Reset() {
155 | 	for _, freq := range g.freq {
156 | 		freq.q1 = 0.0
157 | 		freq.q2 = 0.0
158 | 		freq.q1i = 0.0
159 | 		freq.q2i = 0.0
160 | 	}
161 | }
162 | 
163 | func (g *ComplexGoertzel) Feed(samples []complex128) {
164 | 	for _, samp := range samples {
165 | 		for _, freq := range g.freq {
166 | 			q0 := freq.coeff*freq.q1 - freq.q2 + real(samp)
167 | 			freq.q2 = freq.q1
168 | 			freq.q1 = q0
169 | 			q0 = freq.coeff*freq.q1i - freq.q2i + imag(samp)
170 | 			freq.q2i = freq.q1i
171 | 			freq.q1i = q0
172 | 		}
173 | 	}
174 | }
175 | 
176 | func (g *ComplexGoertzel) Magnitude() []float64 {
177 | 	for i, f := range g.freq {
178 | 		re := f.q1*f.cos - f.q2 - f.q1i*f.sin
179 | 		im := f.q1*f.sin + f.q1i*f.cos - f.q2i
180 | 		// q1*cos - q2 - q1i*sin
181 | 		g.mag[i] = re*re + im*im
182 | 	}
183 | 	return g.mag
184 | }
185 | 
186 | func (g *ComplexGoertzel) Complex() []complex128 {
187 | 	for i, f := range g.freq {
188 | 		g.cplx[i] = complex(
189 | 			f.q1*f.cos-f.q2-f.q1i*f.sin,
190 | 			f.q1*f.sin+f.q1i*f.cos-f.q2i,
191 | 		)
192 | 	}
193 | 	return g.cplx
194 | }
195 | 
196 | // Sliding Goertzel implements a sliding version of the Goertzel filter.
197 | //
198 | // 	x(n)                                               y(n)
199 | // 	──────┬──────(+)──(+)────────────────┬────────(+)─────▶
200 | // 	      ▼       ▲    ▲ ▼               ▼         ▲
201 | // 	    ┌───┐     │    │  ╲            ┌───┐       │
202 | // 	    │z⁻ⁿ│     │    │   ╲           │z⁻ⁿ│       │
203 | // 	    └─┬─┘     │    │    ╲          └─┬─┘       │
204 | // 	      └─▶(x)──┘    │     ╲           │         │
205 | // 	                   │      (x)◀───────●───────▶(x)
206 | // 	                   │       ▲         │         ▲
207 | // 	                   │       │       ┌─▼─┐       │
208 | // 	                   │  2cos(2πk/N)  │z⁻ⁿ│  -e^(-j2πk/N)
209 | // 	                   │               └─┬─┘
210 | // 	                   └──────(x)◀───────┘
211 | // 	                           ▲
212 | // 	                           │
213 | // 	                          -1
214 | // TODO
215 | // type SlidingGoertzel struct {
216 | // }
217 | // func NewSlidingGoertzel(k, n int) *SlidingGoertzel {
218 | // 	return &SlidingGoertzel{}
219 | // }
220 | 


--------------------------------------------------------------------------------
/dsp/goertzel_test.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"math"
 5 | 	"math/cmplx"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func TestGoertzel(t *testing.T) {
10 | 	samplerate := 1024
11 | 	blocksize := 1024
12 | 	freq := 128
13 | 	samples := make([]float64, blocksize)
14 | 	w := 2 * math.Pi / float64(samplerate)
15 | 	for i := 0; i < blocksize; i++ {
16 | 		samples[i] = math.Sin(float64(i) * float64(freq) * w)
17 | 	}
18 | 	g := NewGoertzel([]uint64{128, 129}, samplerate, blocksize)
19 | 	g.Feed(samples)
20 | 	m := g.Magnitude()
21 | 	if e := math.Pow(float64(blocksize)/2, 2); !approxEqual(m[0], e, 1e-8) {
22 | 		t.Errorf("Goertzel magnitude = %f. Want %f", m[0], e)
23 | 	}
24 | 	if !approxEqual(float64(m[1]), 0.0, 1e-10) {
25 | 		t.Errorf("Foertzel magnitude = %f. Want 0.0", m[1])
26 | 	}
27 | 	c := g.Complex()
28 | 	if e, m := math.Sqrt(math.Pow(float64(blocksize)/2, 2)), cmplx.Abs(complex128(c[0])); !approxEqual(m, e, 1e-8) {
29 | 		t.Errorf("Goertzel magnitude = %f. Want %f", m, e)
30 | 	}
31 | 	if e, p := -math.Pi/2, cmplx.Phase(complex128(c[0])); !approxEqual(p, e, 1e-12) {
32 | 		t.Errorf("Goertzel phase = %f. Want %f", p, e)
33 | 	}
34 | }
35 | 


--------------------------------------------------------------------------------
/dsp/iaca.h:
--------------------------------------------------------------------------------
1 | #define IACA_SSC_MARK(MARK_ID) \
2 | 	BYTE $0xBB; BYTE MARK_ID; BYTE $0x00; BYTE $0x00; BYTE $0x00 \
3 | 	BYTE $0x64; BYTE $0x67; BYTE $0x90
4 | #define IACA_UD_BYTES BYTE $0x0F; BYTE $0x0B
5 | #define IACA_START IACA_UD_BYTES; IACA_SSC_MARK($111)
6 | #define IACA_END IACA_SSC_MARK($222); IACA_UD_BYTES
7 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package cpu implements processor feature detection
  6 | // used by the Go standard library.
  7 | package cpu
  8 | 
  9 | // DebugOptions is set to true by the runtime if the OS supports reading
 10 | // GODEBUG early in runtime startup.
 11 | // This should not be changed after it is initialized.
 12 | var DebugOptions bool
 13 | 
 14 | // CacheLinePad is used to pad structs to avoid false sharing.
 15 | type CacheLinePad struct{ _ [CacheLinePadSize]byte }
 16 | 
 17 | // CacheLineSize is the CPU's assumed cache line size.
 18 | // There is currently no runtime detection of the real cache line size
 19 | // so we use the constant per GOARCH CacheLinePadSize as an approximation.
 20 | var CacheLineSize uintptr = CacheLinePadSize
 21 | 
 22 | // The booleans in X86 contain the correspondingly named cpuid feature bit.
 23 | // HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
 24 | // in addition to the cpuid feature bit being set.
 25 | // The struct is padded to avoid false sharing.
 26 | var X86 struct {
 27 | 	_            CacheLinePad
 28 | 	HasAES       bool
 29 | 	HasADX       bool
 30 | 	HasAVX       bool
 31 | 	HasAVX2      bool
 32 | 	HasBMI1      bool
 33 | 	HasBMI2      bool
 34 | 	HasERMS      bool
 35 | 	HasFMA       bool
 36 | 	HasOSXSAVE   bool
 37 | 	HasPCLMULQDQ bool
 38 | 	HasPOPCNT    bool
 39 | 	HasSSE2      bool
 40 | 	HasSSE3      bool
 41 | 	HasSSSE3     bool
 42 | 	HasSSE41     bool
 43 | 	HasSSE42     bool
 44 | 	_            CacheLinePad
 45 | }
 46 | 
 47 | // The booleans in ARM contain the correspondingly named cpu feature bit.
 48 | // The struct is padded to avoid false sharing.
 49 | var ARM struct {
 50 | 	_        CacheLinePad
 51 | 	HasVFPv4 bool
 52 | 	HasIDIVA bool
 53 | 	_        CacheLinePad
 54 | }
 55 | 
 56 | // The booleans in ARM64 contain the correspondingly named cpu feature bit.
 57 | // The struct is padded to avoid false sharing.
 58 | var ARM64 struct {
 59 | 	_            CacheLinePad
 60 | 	HasAES       bool
 61 | 	HasPMULL     bool
 62 | 	HasSHA1      bool
 63 | 	HasSHA2      bool
 64 | 	HasCRC32     bool
 65 | 	HasATOMICS   bool
 66 | 	HasCPUID     bool
 67 | 	IsNeoverseN1 bool
 68 | 	IsZeus       bool
 69 | 	_            CacheLinePad
 70 | }
 71 | 
 72 | var MIPS64X struct {
 73 | 	_      CacheLinePad
 74 | 	HasMSA bool // MIPS SIMD architecture
 75 | 	_      CacheLinePad
 76 | }
 77 | 
 78 | // For ppc64(le), it is safe to check only for ISA level starting on ISA v3.00,
 79 | // since there are no optional categories. There are some exceptions that also
 80 | // require kernel support to work (darn, scv), so there are feature bits for
 81 | // those as well. The minimum processor requirement is POWER8 (ISA 2.07).
 82 | // The struct is padded to avoid false sharing.
 83 | var PPC64 struct {
 84 | 	_        CacheLinePad
 85 | 	HasDARN  bool // Hardware random number generator (requires kernel enablement)
 86 | 	HasSCV   bool // Syscall vectored (requires kernel enablement)
 87 | 	IsPOWER8 bool // ISA v2.07 (POWER8)
 88 | 	IsPOWER9 bool // ISA v3.00 (POWER9)
 89 | 	_        CacheLinePad
 90 | }
 91 | 
 92 | var S390X struct {
 93 | 	_         CacheLinePad
 94 | 	HasZARCH  bool // z architecture mode is active [mandatory]
 95 | 	HasSTFLE  bool // store facility list extended [mandatory]
 96 | 	HasLDISP  bool // long (20-bit) displacements [mandatory]
 97 | 	HasEIMM   bool // 32-bit immediates [mandatory]
 98 | 	HasDFP    bool // decimal floating point
 99 | 	HasETF3EH bool // ETF-3 enhanced
100 | 	HasMSA    bool // message security assist (CPACF)
101 | 	HasAES    bool // KM-AES{128,192,256} functions
102 | 	HasAESCBC bool // KMC-AES{128,192,256} functions
103 | 	HasAESCTR bool // KMCTR-AES{128,192,256} functions
104 | 	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
105 | 	HasGHASH  bool // KIMD-GHASH function
106 | 	HasSHA1   bool // K{I,L}MD-SHA-1 functions
107 | 	HasSHA256 bool // K{I,L}MD-SHA-256 functions
108 | 	HasSHA512 bool // K{I,L}MD-SHA-512 functions
109 | 	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
110 | 	HasVX     bool // vector facility. Note: the runtime sets this when it processes auxv records.
111 | 	HasVXE    bool // vector-enhancements facility 1
112 | 	HasKDSA   bool // elliptic curve functions
113 | 	HasECDSA  bool // NIST curves
114 | 	HasEDDSA  bool // Edwards curves
115 | 	_         CacheLinePad
116 | }
117 | 
118 | // Initialize examines the processor and sets the relevant variables above.
119 | // This is called by the runtime package early in program initialization,
120 | // before normal init functions are run. env is set by runtime if the OS supports
121 | // cpu feature options in GODEBUG.
122 | func Initialize(env string) {
123 | 	doinit()
124 | 	processOptions(env)
125 | }
126 | 
127 | // options contains the cpu debug options that can be used in GODEBUG.
128 | // Options are arch dependent and are added by the arch specific doinit functions.
129 | // Features that are mandatory for the specific GOARCH should not be added to options
130 | // (e.g. SSE2 on amd64).
131 | var options []option
132 | 
133 | // Option names should be lower case. e.g. avx instead of AVX.
134 | type option struct {
135 | 	Name      string
136 | 	Feature   *bool
137 | 	Specified bool // whether feature value was specified in GODEBUG
138 | 	Enable    bool // whether feature should be enabled
139 | 	Required  bool // whether feature is mandatory and can not be disabled
140 | }
141 | 
142 | // processOptions enables or disables CPU feature values based on the parsed env string.
143 | // The env string is expected to be of the form cpu.feature1=value1,cpu.feature2=value2...
144 | // where feature names is one of the architecture specific list stored in the
145 | // cpu packages options variable and values are either 'on' or 'off'.
146 | // If env contains cpu.all=off then all cpu features referenced through the options
147 | // variable are disabled. Other feature names and values result in warning messages.
148 | func processOptions(env string) {
149 | field:
150 | 	for env != "" {
151 | 		field := ""
152 | 		i := indexByte(env, ',')
153 | 		if i < 0 {
154 | 			field, env = env, ""
155 | 		} else {
156 | 			field, env = env[:i], env[i+1:]
157 | 		}
158 | 		if len(field) < 4 || field[:4] != "cpu." {
159 | 			continue
160 | 		}
161 | 		i = indexByte(field, '=')
162 | 		if i < 0 {
163 | 			print("GODEBUG: no value specified for \"", field, "\"\n")
164 | 			continue
165 | 		}
166 | 		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
167 | 
168 | 		var enable bool
169 | 		switch value {
170 | 		case "on":
171 | 			enable = true
172 | 		case "off":
173 | 			enable = false
174 | 		default:
175 | 			print("GODEBUG: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
176 | 			continue field
177 | 		}
178 | 
179 | 		if key == "all" {
180 | 			for i := range options {
181 | 				options[i].Specified = true
182 | 				options[i].Enable = enable || options[i].Required
183 | 			}
184 | 			continue field
185 | 		}
186 | 
187 | 		for i := range options {
188 | 			if options[i].Name == key {
189 | 				options[i].Specified = true
190 | 				options[i].Enable = enable
191 | 				continue field
192 | 			}
193 | 		}
194 | 
195 | 		print("GODEBUG: unknown cpu feature \"", key, "\"\n")
196 | 	}
197 | 
198 | 	for _, o := range options {
199 | 		if !o.Specified {
200 | 			continue
201 | 		}
202 | 
203 | 		if o.Enable && !*o.Feature {
204 | 			print("GODEBUG: can not enable \"", o.Name, "\", missing CPU support\n")
205 | 			continue
206 | 		}
207 | 
208 | 		if !o.Enable && o.Required {
209 | 			print("GODEBUG: can not disable \"", o.Name, "\", required CPU feature\n")
210 | 			continue
211 | 		}
212 | 
213 | 		*o.Feature = o.Enable
214 | 	}
215 | }
216 | 
217 | // indexByte returns the index of the first instance of c in s,
218 | // or -1 if c is not present in s.
219 | func indexByte(s string, c byte) int {
220 | 	for i := 0; i < len(s); i++ {
221 | 		if s[i] == c {
222 | 			return i
223 | 		}
224 | 	}
225 | 	return -1
226 | }
227 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu.s:
--------------------------------------------------------------------------------
1 | // Copyright 2020 The Go Authors. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | // This assembly file exists to allow internal/cpu to call
6 | // non-exported runtime functions that use "go:linkname".


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_386.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Go Authors. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | package cpu
6 | 
7 | const GOARCH = "386"
8 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_amd64.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Go Authors. All rights reserved.
2 | // Use of this source code is governed by a BSD-style
3 | // license that can be found in the LICENSE file.
4 | 
5 | package cpu
6 | 
7 | const GOARCH = "amd64"
8 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 32
 8 | 
 9 | // arm doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
10 | // These are initialized by archauxv() and should not be changed after they are
11 | // initialized.
12 | var HWCap uint
13 | var HWCap2 uint
14 | 
15 | // HWCAP/HWCAP2 bits. These are exposed by Linux and FreeBSD.
16 | const (
17 | 	hwcap_VFPv4 = 1 << 16
18 | 	hwcap_IDIVA = 1 << 17
19 | )
20 | 
21 | func doinit() {
22 | 	options = []option{
23 | 		{Name: "vfpv4", Feature: &ARM.HasVFPv4},
24 | 		{Name: "idiva", Feature: &ARM.HasIDIVA},
25 | 	}
26 | 
27 | 	// HWCAP feature bits
28 | 	ARM.HasVFPv4 = isSet(HWCap, hwcap_VFPv4)
29 | 	ARM.HasIDIVA = isSet(HWCap, hwcap_IDIVA)
30 | }
31 | 
32 | func isSet(hwc uint, value uint) bool {
33 | 	return hwc&value != 0
34 | }
35 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 64
 8 | 
 9 | func doinit() {
10 | 	options = []option{
11 | 		{Name: "aes", Feature: &ARM64.HasAES},
12 | 		{Name: "pmull", Feature: &ARM64.HasPMULL},
13 | 		{Name: "sha1", Feature: &ARM64.HasSHA1},
14 | 		{Name: "sha2", Feature: &ARM64.HasSHA2},
15 | 		{Name: "crc32", Feature: &ARM64.HasCRC32},
16 | 		{Name: "atomics", Feature: &ARM64.HasATOMICS},
17 | 		{Name: "cpuid", Feature: &ARM64.HasCPUID},
18 | 		{Name: "isNeoverseN1", Feature: &ARM64.IsNeoverseN1},
19 | 		{Name: "isZeus", Feature: &ARM64.IsZeus},
20 | 	}
21 | 
22 | 	// arm64 uses different ways to detect CPU features at runtime depending on the operating system.
23 | 	osInit()
24 | }
25 | 
26 | func getisar0() uint64
27 | 
28 | func getMIDR() uint64
29 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func getisar0() uint64
 8 | TEXT ·getisar0(SB),NOSPLIT,$0
 9 | 	// get Instruction Set Attributes 0 into R0
10 | 	MRS	ID_AA64ISAR0_EL1, R0
11 | 	MOVD	R0, ret+0(FP)
12 | 	RET
13 | 
14 | // func getMIDR() uint64
15 | TEXT ·getMIDR(SB), NOSPLIT, $0-8
16 | 	MRS	MIDR_EL1, R0
17 | 	MOVD	R0, ret+0(FP)
18 | 	RET
19 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_android.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64
 6 | // +build arm64
 7 | 
 8 | package cpu
 9 | 
10 | func osInit() {
11 | 	hwcapInit("android")
12 | }
13 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_darwin.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64 && darwin && !ios
 6 | // +build arm64,darwin,!ios
 7 | 
 8 | package cpu
 9 | 
10 | func osInit() {
11 | 	ARM64.HasATOMICS = sysctlEnabled([]byte("hw.optional.armv8_1_atomics\x00"))
12 | 	ARM64.HasCRC32 = sysctlEnabled([]byte("hw.optional.armv8_crc32\x00"))
13 | 
14 | 	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
15 | 	// to detect their supported state dynamically. Assume the CPU features that
16 | 	// Apple Silicon M1 supports to be available as a minimal set of features
17 | 	// to all Go programs running on darwin/arm64.
18 | 	ARM64.HasAES = true
19 | 	ARM64.HasPMULL = true
20 | 	ARM64.HasSHA1 = true
21 | 	ARM64.HasSHA2 = true
22 | }
23 | 
24 | //go:noescape
25 | func getsysctlbyname(name []byte) (int32, int32)
26 | 
27 | func sysctlEnabled(name []byte) bool {
28 | 	ret, value := getsysctlbyname(name)
29 | 	if ret < 0 {
30 | 		return false
31 | 	}
32 | 	return value > 0
33 | }
34 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_freebsd.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64
 6 | // +build arm64
 7 | 
 8 | package cpu
 9 | 
10 | func osInit() {
11 | 	// Retrieve info from system register ID_AA64ISAR0_EL1.
12 | 	isar0 := getisar0()
13 | 
14 | 	// ID_AA64ISAR0_EL1
15 | 	switch extractBits(isar0, 4, 7) {
16 | 	case 1:
17 | 		ARM64.HasAES = true
18 | 	case 2:
19 | 		ARM64.HasAES = true
20 | 		ARM64.HasPMULL = true
21 | 	}
22 | 
23 | 	switch extractBits(isar0, 8, 11) {
24 | 	case 1:
25 | 		ARM64.HasSHA1 = true
26 | 	}
27 | 
28 | 	switch extractBits(isar0, 12, 15) {
29 | 	case 1, 2:
30 | 		ARM64.HasSHA2 = true
31 | 	}
32 | 
33 | 	switch extractBits(isar0, 16, 19) {
34 | 	case 1:
35 | 		ARM64.HasCRC32 = true
36 | 	}
37 | 
38 | 	switch extractBits(isar0, 20, 23) {
39 | 	case 2:
40 | 		ARM64.HasATOMICS = true
41 | 	}
42 | }
43 | 
44 | func extractBits(data uint64, start, end uint) uint {
45 | 	return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
46 | }
47 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_hwcap.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64 && linux
 6 | // +build arm64,linux
 7 | 
 8 | package cpu
 9 | 
10 | // HWCap may be initialized by archauxv and
11 | // should not be changed after it was initialized.
12 | var HWCap uint
13 | 
14 | // HWCAP bits. These are exposed by Linux.
15 | const (
16 | 	hwcap_AES     = 1 << 3
17 | 	hwcap_PMULL   = 1 << 4
18 | 	hwcap_SHA1    = 1 << 5
19 | 	hwcap_SHA2    = 1 << 6
20 | 	hwcap_CRC32   = 1 << 7
21 | 	hwcap_ATOMICS = 1 << 8
22 | 	hwcap_CPUID   = 1 << 11
23 | )
24 | 
25 | func hwcapInit(os string) {
26 | 	// HWCap was populated by the runtime from the auxiliary vector.
27 | 	// Use HWCap information since reading aarch64 system registers
28 | 	// is not supported in user space on older linux kernels.
29 | 	ARM64.HasAES = isSet(HWCap, hwcap_AES)
30 | 	ARM64.HasPMULL = isSet(HWCap, hwcap_PMULL)
31 | 	ARM64.HasSHA1 = isSet(HWCap, hwcap_SHA1)
32 | 	ARM64.HasSHA2 = isSet(HWCap, hwcap_SHA2)
33 | 	ARM64.HasCRC32 = isSet(HWCap, hwcap_CRC32)
34 | 	ARM64.HasCPUID = isSet(HWCap, hwcap_CPUID)
35 | 
36 | 	// The Samsung S9+ kernel reports support for atomics, but not all cores
37 | 	// actually support them, resulting in SIGILL. See issue #28431.
38 | 	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
39 | 	ARM64.HasATOMICS = isSet(HWCap, hwcap_ATOMICS) && os != "android"
40 | 
41 | 	// Check to see if executing on a NeoverseN1 and in order to do that,
42 | 	// check the AUXV for the CPUID bit. The getMIDR function executes an
43 | 	// instruction which would normally be an illegal instruction, but it's
44 | 	// trapped by the kernel, the value sanitized and then returned. Without
45 | 	// the CPUID bit the kernel will not trap the instruction and the process
46 | 	// will be terminated with SIGILL.
47 | 	if ARM64.HasCPUID {
48 | 		midr := getMIDR()
49 | 		part_num := uint16((midr >> 4) & 0xfff)
50 | 		implementor := byte((midr >> 24) & 0xff)
51 | 
52 | 		if implementor == 'A' && part_num == 0xd0c {
53 | 			ARM64.IsNeoverseN1 = true
54 | 		}
55 | 		if implementor == 'A' && part_num == 0xd40 {
56 | 			ARM64.IsZeus = true
57 | 		}
58 | 	}
59 | }
60 | 
61 | func isSet(hwc uint, value uint) bool {
62 | 	return hwc&value != 0
63 | }
64 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_linux.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64 && linux && !android
 6 | // +build arm64,linux,!android
 7 | 
 8 | package cpu
 9 | 
10 | func osInit() {
11 | 	hwcapInit("linux")
12 | }
13 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_arm64_other.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build arm64 && !linux && !freebsd && !android && (!darwin || ios)
 6 | // +build arm64
 7 | // +build !linux
 8 | // +build !freebsd
 9 | // +build !android
10 | // +build !darwin ios
11 | 
12 | package cpu
13 | 
14 | func osInit() {
15 | 	// Other operating systems do not support reading HWCap from auxiliary vector,
16 | 	// reading privileged aarch64 system registers or sysctl in user space to detect
17 | 	// CPU features at runtime.
18 | }
19 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_mips.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 32
 8 | 
 9 | func doinit() {
10 | }
11 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_mips64x.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build mips64 || mips64le
 6 | // +build mips64 mips64le
 7 | 
 8 | package cpu
 9 | 
10 | const CacheLinePadSize = 32
11 | 
12 | // This is initialized by archauxv and should not be changed after it is
13 | // initialized.
14 | var HWCap uint
15 | 
16 | // HWCAP bits. These are exposed by the Linux kernel 5.4.
17 | const (
18 | 	// CPU features
19 | 	hwcap_MIPS_MSA = 1 << 1
20 | )
21 | 
22 | func doinit() {
23 | 	options = []option{
24 | 		{Name: "msa", Feature: &MIPS64X.HasMSA},
25 | 	}
26 | 
27 | 	// HWCAP feature bits
28 | 	MIPS64X.HasMSA = isSet(HWCap, hwcap_MIPS_MSA)
29 | }
30 | 
31 | func isSet(hwc uint, value uint) bool {
32 | 	return hwc&value != 0
33 | }
34 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_mipsle.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 32
 8 | 
 9 | func doinit() {
10 | }
11 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_no_name.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build !386 && !amd64
 6 | // +build !386,!amd64
 7 | 
 8 | package cpu
 9 | 
10 | // Name returns the CPU name given by the vendor
11 | // if it can be read directly from memory or by CPU instructions.
12 | // If the CPU name can not be determined an empty string is returned.
13 | //
14 | // Implementations that use the Operating System (e.g. sysctl or /sys/)
15 | // to gather CPU information for display should be placed in internal/sysinfo.
16 | func Name() string {
17 | 	// "A CPU has no name".
18 | 	return ""
19 | }
20 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_ppc64x.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build ppc64 || ppc64le
 6 | // +build ppc64 ppc64le
 7 | 
 8 | package cpu
 9 | 
10 | const CacheLinePadSize = 128
11 | 
12 | func doinit() {
13 | 	options = []option{
14 | 		{Name: "darn", Feature: &PPC64.HasDARN},
15 | 		{Name: "scv", Feature: &PPC64.HasSCV},
16 | 		{Name: "power9", Feature: &PPC64.IsPOWER9},
17 | 	}
18 | 
19 | 	osinit()
20 | }
21 | 
22 | func isSet(hwc uint, value uint) bool {
23 | 	return hwc&value != 0
24 | }
25 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_ppc64x_aix.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build ppc64 || ppc64le
 6 | // +build ppc64 ppc64le
 7 | 
 8 | package cpu
 9 | 
10 | const (
11 | 	// getsystemcfg constants
12 | 	_SC_IMPL     = 2
13 | 	_IMPL_POWER9 = 0x20000
14 | )
15 | 
16 | func osinit() {
17 | 	impl := getsystemcfg(_SC_IMPL)
18 | 	PPC64.IsPOWER9 = isSet(impl, _IMPL_POWER9)
19 | }
20 | 
21 | // getsystemcfg is defined in runtime/os2_aix.go
22 | func getsystemcfg(label uint) uint
23 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_ppc64x_linux.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build ppc64 || ppc64le
 6 | // +build ppc64 ppc64le
 7 | 
 8 | package cpu
 9 | 
10 | // ppc64 doesn't have a 'cpuid' equivalent, so we rely on HWCAP/HWCAP2.
11 | // These are initialized by archauxv and should not be changed after they are
12 | // initialized.
13 | var HWCap uint
14 | var HWCap2 uint
15 | 
16 | // HWCAP bits. These are exposed by Linux.
17 | const (
18 | 	// ISA Level
19 | 	hwcap2_ARCH_3_00 = 0x00800000
20 | 
21 | 	// CPU features
22 | 	hwcap2_DARN = 0x00200000
23 | 	hwcap2_SCV  = 0x00100000
24 | )
25 | 
26 | func osinit() {
27 | 	PPC64.IsPOWER9 = isSet(HWCap2, hwcap2_ARCH_3_00)
28 | 	PPC64.HasDARN = isSet(HWCap2, hwcap2_DARN)
29 | 	PPC64.HasSCV = isSet(HWCap2, hwcap2_SCV)
30 | }
31 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_riscv64.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2019 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 32
 8 | 
 9 | func doinit() {
10 | }
11 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_s390x.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package cpu
  6 | 
  7 | const CacheLinePadSize = 256
  8 | 
  9 | var HWCap uint
 10 | 
 11 | // bitIsSet reports whether the bit at index is set. The bit index
 12 | // is in big endian order, so bit index 0 is the leftmost bit.
 13 | func bitIsSet(bits []uint64, index uint) bool {
 14 | 	return bits[index/64]&((1<<63)>>(index%64)) != 0
 15 | }
 16 | 
 17 | // function is the function code for the named function.
 18 | type function uint8
 19 | 
 20 | const (
 21 | 	// KM{,A,C,CTR} function codes
 22 | 	aes128 function = 18 // AES-128
 23 | 	aes192 function = 19 // AES-192
 24 | 	aes256 function = 20 // AES-256
 25 | 
 26 | 	// K{I,L}MD function codes
 27 | 	sha1     function = 1  // SHA-1
 28 | 	sha256   function = 2  // SHA-256
 29 | 	sha512   function = 3  // SHA-512
 30 | 	sha3_224 function = 32 // SHA3-224
 31 | 	sha3_256 function = 33 // SHA3-256
 32 | 	sha3_384 function = 34 // SHA3-384
 33 | 	sha3_512 function = 35 // SHA3-512
 34 | 	shake128 function = 36 // SHAKE-128
 35 | 	shake256 function = 37 // SHAKE-256
 36 | 
 37 | 	// KLMD function codes
 38 | 	ghash function = 65 // GHASH
 39 | )
 40 | 
 41 | const (
 42 | 	// KDSA function codes
 43 | 	ecdsaVerifyP256    function = 1  // NIST P256
 44 | 	ecdsaVerifyP384    function = 2  // NIST P384
 45 | 	ecdsaVerifyP521    function = 3  // NIST P521
 46 | 	ecdsaSignP256      function = 9  // NIST P256
 47 | 	ecdsaSignP384      function = 10 // NIST P384
 48 | 	ecdsaSignP521      function = 11 // NIST P521
 49 | 	eddsaVerifyEd25519 function = 32 // Curve25519
 50 | 	eddsaVerifyEd448   function = 36 // Curve448
 51 | 	eddsaSignEd25519   function = 40 // Curve25519
 52 | 	eddsaSignEd448     function = 44 // Curve448
 53 | )
 54 | 
 55 | // queryResult contains the result of a Query function
 56 | // call. Bits are numbered in big endian order so the
 57 | // leftmost bit (the MSB) is at index 0.
 58 | type queryResult struct {
 59 | 	bits [2]uint64
 60 | }
 61 | 
 62 | // Has reports whether the given functions are present.
 63 | func (q *queryResult) Has(fns ...function) bool {
 64 | 	if len(fns) == 0 {
 65 | 		panic("no function codes provided")
 66 | 	}
 67 | 	for _, f := range fns {
 68 | 		if !bitIsSet(q.bits[:], uint(f)) {
 69 | 			return false
 70 | 		}
 71 | 	}
 72 | 	return true
 73 | }
 74 | 
 75 | // facility is a bit index for the named facility.
 76 | type facility uint8
 77 | 
 78 | const (
 79 | 	// mandatory facilities
 80 | 	zarch  facility = 1  // z architecture mode is active
 81 | 	stflef facility = 7  // store-facility-list-extended
 82 | 	ldisp  facility = 18 // long-displacement
 83 | 	eimm   facility = 21 // extended-immediate
 84 | 
 85 | 	// miscellaneous facilities
 86 | 	dfp    facility = 42 // decimal-floating-point
 87 | 	etf3eh facility = 30 // extended-translation 3 enhancement
 88 | 
 89 | 	// cryptography facilities
 90 | 	msa  facility = 17  // message-security-assist
 91 | 	msa3 facility = 76  // message-security-assist extension 3
 92 | 	msa4 facility = 77  // message-security-assist extension 4
 93 | 	msa5 facility = 57  // message-security-assist extension 5
 94 | 	msa8 facility = 146 // message-security-assist extension 8
 95 | 	msa9 facility = 155 // message-security-assist extension 9
 96 | 
 97 | 	// vector facilities
 98 | 	vxe facility = 135 // vector-enhancements 1
 99 | 
100 | 	// Note: vx requires kernel support
101 | 	// and so must be fetched from HWCAP.
102 | 
103 | 	hwcap_VX = 1 << 11 // vector facility
104 | )
105 | 
106 | // facilityList contains the result of an STFLE call.
107 | // Bits are numbered in big endian order so the
108 | // leftmost bit (the MSB) is at index 0.
109 | type facilityList struct {
110 | 	bits [4]uint64
111 | }
112 | 
113 | // Has reports whether the given facilities are present.
114 | func (s *facilityList) Has(fs ...facility) bool {
115 | 	if len(fs) == 0 {
116 | 		panic("no facility bits provided")
117 | 	}
118 | 	for _, f := range fs {
119 | 		if !bitIsSet(s.bits[:], uint(f)) {
120 | 			return false
121 | 		}
122 | 	}
123 | 	return true
124 | }
125 | 
126 | // The following feature detection functions are defined in cpu_s390x.s.
127 | // They are likely to be expensive to call so the results should be cached.
128 | func stfle() facilityList
129 | func kmQuery() queryResult
130 | func kmcQuery() queryResult
131 | func kmctrQuery() queryResult
132 | func kmaQuery() queryResult
133 | func kimdQuery() queryResult
134 | func klmdQuery() queryResult
135 | func kdsaQuery() queryResult
136 | 
137 | func doinit() {
138 | 	options = []option{
139 | 		{Name: "zarch", Feature: &S390X.HasZARCH},
140 | 		{Name: "stfle", Feature: &S390X.HasSTFLE},
141 | 		{Name: "ldisp", Feature: &S390X.HasLDISP},
142 | 		{Name: "msa", Feature: &S390X.HasMSA},
143 | 		{Name: "eimm", Feature: &S390X.HasEIMM},
144 | 		{Name: "dfp", Feature: &S390X.HasDFP},
145 | 		{Name: "etf3eh", Feature: &S390X.HasETF3EH},
146 | 		{Name: "vx", Feature: &S390X.HasVX},
147 | 		{Name: "vxe", Feature: &S390X.HasVXE},
148 | 		{Name: "kdsa", Feature: &S390X.HasKDSA},
149 | 	}
150 | 
151 | 	aes := []function{aes128, aes192, aes256}
152 | 	facilities := stfle()
153 | 
154 | 	S390X.HasZARCH = facilities.Has(zarch)
155 | 	S390X.HasSTFLE = facilities.Has(stflef)
156 | 	S390X.HasLDISP = facilities.Has(ldisp)
157 | 	S390X.HasEIMM = facilities.Has(eimm)
158 | 	S390X.HasDFP = facilities.Has(dfp)
159 | 	S390X.HasETF3EH = facilities.Has(etf3eh)
160 | 	S390X.HasMSA = facilities.Has(msa)
161 | 
162 | 	if S390X.HasMSA {
163 | 		// cipher message
164 | 		km, kmc := kmQuery(), kmcQuery()
165 | 		S390X.HasAES = km.Has(aes...)
166 | 		S390X.HasAESCBC = kmc.Has(aes...)
167 | 		if facilities.Has(msa4) {
168 | 			kmctr := kmctrQuery()
169 | 			S390X.HasAESCTR = kmctr.Has(aes...)
170 | 		}
171 | 		if facilities.Has(msa8) {
172 | 			kma := kmaQuery()
173 | 			S390X.HasAESGCM = kma.Has(aes...)
174 | 		}
175 | 
176 | 		// compute message digest
177 | 		kimd := kimdQuery() // intermediate (no padding)
178 | 		klmd := klmdQuery() // last (padding)
179 | 		S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
180 | 		S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
181 | 		S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
182 | 		S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
183 | 		sha3 := []function{
184 | 			sha3_224, sha3_256, sha3_384, sha3_512,
185 | 			shake128, shake256,
186 | 		}
187 | 		S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...)
188 | 		S390X.HasKDSA = facilities.Has(msa9) // elliptic curves
189 | 		if S390X.HasKDSA {
190 | 			kdsa := kdsaQuery()
191 | 			S390X.HasECDSA = kdsa.Has(ecdsaVerifyP256, ecdsaSignP256, ecdsaVerifyP384, ecdsaSignP384, ecdsaVerifyP521, ecdsaSignP521)
192 | 			S390X.HasEDDSA = kdsa.Has(eddsaVerifyEd25519, eddsaSignEd25519, eddsaVerifyEd448, eddsaSignEd448)
193 | 		}
194 | 	}
195 | 
196 | 	S390X.HasVX = isSet(HWCap, hwcap_VX)
197 | 
198 | 	if S390X.HasVX {
199 | 		S390X.HasVXE = facilities.Has(vxe)
200 | 	}
201 | }
202 | 
203 | func isSet(hwc uint, value uint) bool {
204 | 	return hwc&value != 0
205 | }
206 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_s390x.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | #include "textflag.h"
 6 | 
 7 | // func stfle() facilityList
 8 | TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
 9 | 	MOVD $ret+0(FP), R1
10 | 	MOVD $3, R0          // last doubleword index to store
11 | 	XC   $32, (R1), (R1) // clear 4 doublewords (32 bytes)
12 | 	WORD $0xb2b01000     // store facility list extended (STFLE)
13 | 	RET
14 | 
15 | // func kmQuery() queryResult
16 | TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
17 | 	MOVD $0, R0         // set function code to 0 (KM-Query)
18 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
19 | 	WORD $0xB92E0024    // cipher message (KM)
20 | 	RET
21 | 
22 | // func kmcQuery() queryResult
23 | TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
24 | 	MOVD $0, R0         // set function code to 0 (KMC-Query)
25 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
26 | 	WORD $0xB92F0024    // cipher message with chaining (KMC)
27 | 	RET
28 | 
29 | // func kmctrQuery() queryResult
30 | TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
31 | 	MOVD $0, R0         // set function code to 0 (KMCTR-Query)
32 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
33 | 	WORD $0xB92D4024    // cipher message with counter (KMCTR)
34 | 	RET
35 | 
36 | // func kmaQuery() queryResult
37 | TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
38 | 	MOVD $0, R0         // set function code to 0 (KMA-Query)
39 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
40 | 	WORD $0xb9296024    // cipher message with authentication (KMA)
41 | 	RET
42 | 
43 | // func kimdQuery() queryResult
44 | TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
45 | 	MOVD $0, R0         // set function code to 0 (KIMD-Query)
46 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
47 | 	WORD $0xB93E0024    // compute intermediate message digest (KIMD)
48 | 	RET
49 | 
50 | // func klmdQuery() queryResult
51 | TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
52 | 	MOVD $0, R0         // set function code to 0 (KLMD-Query)
53 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
54 | 	WORD $0xB93F0024    // compute last message digest (KLMD)
55 | 	RET
56 | 
57 | // func kdsaQuery() queryResult
58 | TEXT ·kdsaQuery(SB), NOSPLIT|NOFRAME, $0-16
59 | 	MOVD $0, R0         // set function code to 0 (KLMD-Query)
60 | 	MOVD $ret+0(FP), R1 // address of 16-byte return value
61 | 	WORD $0xB93A0008    // compute digital signature authentication
62 | 	RET
63 | 
64 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_s390x_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu_test
 6 | 
 7 | import (
 8 | 	"errors"
 9 | 	. "internal/cpu"
10 | 	"os"
11 | 	"regexp"
12 | 	"testing"
13 | )
14 | 
15 | func getFeatureList() ([]string, error) {
16 | 	cpuinfo, err := os.ReadFile("/proc/cpuinfo")
17 | 	if err != nil {
18 | 		return nil, err
19 | 	}
20 | 	r := regexp.MustCompile("features\\s*:\\s*(.*)")
21 | 	b := r.FindSubmatch(cpuinfo)
22 | 	if len(b) < 2 {
23 | 		return nil, errors.New("no feature list in /proc/cpuinfo")
24 | 	}
25 | 	return regexp.MustCompile("\\s+").Split(string(b[1]), -1), nil
26 | }
27 | 
28 | func TestS390XAgainstCPUInfo(t *testing.T) {
29 | 	// mapping of linux feature strings to S390X fields
30 | 	mapping := make(map[string]*bool)
31 | 	for _, option := range Options {
32 | 		mapping[option.Name] = option.Feature
33 | 	}
34 | 
35 | 	// these must be true on the machines Go supports
36 | 	mandatory := make(map[string]bool)
37 | 	mandatory["zarch"] = false
38 | 	mandatory["eimm"] = false
39 | 	mandatory["ldisp"] = false
40 | 	mandatory["stfle"] = false
41 | 
42 | 	features, err := getFeatureList()
43 | 	if err != nil {
44 | 		t.Error(err)
45 | 	}
46 | 	for _, feature := range features {
47 | 		if _, ok := mandatory[feature]; ok {
48 | 			mandatory[feature] = true
49 | 		}
50 | 		if flag, ok := mapping[feature]; ok {
51 | 			if !*flag {
52 | 				t.Errorf("feature '%v' not detected", feature)
53 | 			}
54 | 		} else {
55 | 			t.Logf("no entry for '%v'", feature)
56 | 		}
57 | 	}
58 | 	for k, v := range mandatory {
59 | 		if !v {
60 | 			t.Errorf("mandatory feature '%v' not detected", k)
61 | 		}
62 | 	}
63 | }
64 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu_test
 6 | 
 7 | import (
 8 | 	. "internal/cpu"
 9 | 	"internal/testenv"
10 | 	"os"
11 | 	"os/exec"
12 | 	"runtime"
13 | 	"strings"
14 | 	"testing"
15 | )
16 | 
17 | func TestMinimalFeatures(t *testing.T) {
18 | 	// TODO: maybe do MustSupportFeatureDectection(t) ?
19 | 	if runtime.GOARCH == "arm64" {
20 | 		switch runtime.GOOS {
21 | 		case "linux", "android", "darwin":
22 | 		default:
23 | 			t.Skipf("%s/%s is not supported", runtime.GOOS, runtime.GOARCH)
24 | 		}
25 | 	}
26 | 
27 | 	for _, o := range Options {
28 | 		if o.Required && !*o.Feature {
29 | 			t.Errorf("%v expected true, got false", o.Name)
30 | 		}
31 | 	}
32 | }
33 | 
34 | func MustHaveDebugOptionsSupport(t *testing.T) {
35 | 	if !DebugOptions {
36 | 		t.Skipf("skipping test: cpu feature options not supported by OS")
37 | 	}
38 | }
39 | 
40 | func MustSupportFeatureDectection(t *testing.T) {
41 | 	// TODO: add platforms that do not have CPU feature detection support.
42 | }
43 | 
44 | func runDebugOptionsTest(t *testing.T, test string, options string) {
45 | 	MustHaveDebugOptionsSupport(t)
46 | 
47 | 	testenv.MustHaveExec(t)
48 | 
49 | 	env := "GODEBUG=" + options
50 | 
51 | 	cmd := exec.Command(os.Args[0], "-test.run="+test)
52 | 	cmd.Env = append(cmd.Env, env)
53 | 
54 | 	output, err := cmd.CombinedOutput()
55 | 	lines := strings.Fields(string(output))
56 | 	lastline := lines[len(lines)-1]
57 | 
58 | 	got := strings.TrimSpace(lastline)
59 | 	want := "PASS"
60 | 	if err != nil || got != want {
61 | 		t.Fatalf("%s with %s: want %s, got %v", test, env, want, got)
62 | 	}
63 | }
64 | 
65 | func TestDisableAllCapabilities(t *testing.T) {
66 | 	MustSupportFeatureDectection(t)
67 | 	runDebugOptionsTest(t, "TestAllCapabilitiesDisabled", "cpu.all=off")
68 | }
69 | 
70 | func TestAllCapabilitiesDisabled(t *testing.T) {
71 | 	MustHaveDebugOptionsSupport(t)
72 | 
73 | 	if os.Getenv("GODEBUG") != "cpu.all=off" {
74 | 		t.Skipf("skipping test: GODEBUG=cpu.all=off not set")
75 | 	}
76 | 
77 | 	for _, o := range Options {
78 | 		want := o.Required
79 | 		if got := *o.Feature; got != want {
80 | 			t.Errorf("%v: expected %v, got %v", o.Name, want, got)
81 | 		}
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_wasm.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | const CacheLinePadSize = 64
 8 | 
 9 | func doinit() {
10 | }
11 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_x86.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | //go:build 386 || amd64
  6 | // +build 386 amd64
  7 | 
  8 | package cpu
  9 | 
 10 | const CacheLinePadSize = 64
 11 | 
 12 | // cpuid is implemented in cpu_x86.s.
 13 | func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
 14 | 
 15 | // xgetbv with ecx = 0 is implemented in cpu_x86.s.
 16 | func xgetbv() (eax, edx uint32)
 17 | 
 18 | const (
 19 | 	// edx bits
 20 | 	cpuid_SSE2 = 1 << 26
 21 | 
 22 | 	// ecx bits
 23 | 	cpuid_SSE3      = 1 << 0
 24 | 	cpuid_PCLMULQDQ = 1 << 1
 25 | 	cpuid_SSSE3     = 1 << 9
 26 | 	cpuid_FMA       = 1 << 12
 27 | 	cpuid_SSE41     = 1 << 19
 28 | 	cpuid_SSE42     = 1 << 20
 29 | 	cpuid_POPCNT    = 1 << 23
 30 | 	cpuid_AES       = 1 << 25
 31 | 	cpuid_OSXSAVE   = 1 << 27
 32 | 	cpuid_AVX       = 1 << 28
 33 | 
 34 | 	// ebx bits
 35 | 	cpuid_BMI1 = 1 << 3
 36 | 	cpuid_AVX2 = 1 << 5
 37 | 	cpuid_BMI2 = 1 << 8
 38 | 	cpuid_ERMS = 1 << 9
 39 | 	cpuid_ADX  = 1 << 19
 40 | )
 41 | 
 42 | var maxExtendedFunctionInformation uint32
 43 | 
 44 | func doinit() {
 45 | 	options = []option{
 46 | 		{Name: "adx", Feature: &X86.HasADX},
 47 | 		{Name: "aes", Feature: &X86.HasAES},
 48 | 		{Name: "avx", Feature: &X86.HasAVX},
 49 | 		{Name: "avx2", Feature: &X86.HasAVX2},
 50 | 		{Name: "bmi1", Feature: &X86.HasBMI1},
 51 | 		{Name: "bmi2", Feature: &X86.HasBMI2},
 52 | 		{Name: "erms", Feature: &X86.HasERMS},
 53 | 		{Name: "fma", Feature: &X86.HasFMA},
 54 | 		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
 55 | 		{Name: "popcnt", Feature: &X86.HasPOPCNT},
 56 | 		{Name: "sse3", Feature: &X86.HasSSE3},
 57 | 		{Name: "sse41", Feature: &X86.HasSSE41},
 58 | 		{Name: "sse42", Feature: &X86.HasSSE42},
 59 | 		{Name: "ssse3", Feature: &X86.HasSSSE3},
 60 | 
 61 | 		// These capabilities should always be enabled on amd64:
 62 | 		{Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"},
 63 | 	}
 64 | 
 65 | 	maxID, _, _, _ := cpuid(0, 0)
 66 | 
 67 | 	if maxID < 1 {
 68 | 		return
 69 | 	}
 70 | 
 71 | 	maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
 72 | 
 73 | 	_, _, ecx1, edx1 := cpuid(1, 0)
 74 | 	X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
 75 | 
 76 | 	X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
 77 | 	X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
 78 | 	X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3)
 79 | 	X86.HasSSE41 = isSet(ecx1, cpuid_SSE41)
 80 | 	X86.HasSSE42 = isSet(ecx1, cpuid_SSE42)
 81 | 	X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT)
 82 | 	X86.HasAES = isSet(ecx1, cpuid_AES)
 83 | 
 84 | 	// OSXSAVE can be false when using older Operating Systems
 85 | 	// or when explicitly disabled on newer Operating Systems by
 86 | 	// e.g. setting the xsavedisable boot option on Windows 10.
 87 | 	X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE)
 88 | 
 89 | 	// The FMA instruction set extension only has VEX prefixed instructions.
 90 | 	// VEX prefixed instructions require OSXSAVE to be enabled.
 91 | 	// See Intel 64 and IA-32 Architecture Software Developer’s Manual Volume 2
 92 | 	// Section 2.4 "AVX and SSE Instruction Exception Specification"
 93 | 	X86.HasFMA = isSet(ecx1, cpuid_FMA) && X86.HasOSXSAVE
 94 | 
 95 | 	osSupportsAVX := false
 96 | 	// For XGETBV, OSXSAVE bit is required and sufficient.
 97 | 	if X86.HasOSXSAVE {
 98 | 		eax, _ := xgetbv()
 99 | 		// Check if XMM and YMM registers have OS support.
100 | 		osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2)
101 | 	}
102 | 
103 | 	X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX
104 | 
105 | 	if maxID < 7 {
106 | 		return
107 | 	}
108 | 
109 | 	_, ebx7, _, _ := cpuid(7, 0)
110 | 	X86.HasBMI1 = isSet(ebx7, cpuid_BMI1)
111 | 	X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX
112 | 	X86.HasBMI2 = isSet(ebx7, cpuid_BMI2)
113 | 	X86.HasERMS = isSet(ebx7, cpuid_ERMS)
114 | 	X86.HasADX = isSet(ebx7, cpuid_ADX)
115 | }
116 | 
117 | func isSet(hwc uint32, value uint32) bool {
118 | 	return hwc&value != 0
119 | }
120 | 
121 | // Name returns the CPU name given by the vendor.
122 | // If the CPU name can not be determined an
123 | // empty string is returned.
124 | func Name() string {
125 | 	if maxExtendedFunctionInformation < 0x80000004 {
126 | 		return ""
127 | 	}
128 | 
129 | 	data := make([]byte, 0, 3*4*4)
130 | 
131 | 	var eax, ebx, ecx, edx uint32
132 | 	eax, ebx, ecx, edx = cpuid(0x80000002, 0)
133 | 	data = appendBytes(data, eax, ebx, ecx, edx)
134 | 	eax, ebx, ecx, edx = cpuid(0x80000003, 0)
135 | 	data = appendBytes(data, eax, ebx, ecx, edx)
136 | 	eax, ebx, ecx, edx = cpuid(0x80000004, 0)
137 | 	data = appendBytes(data, eax, ebx, ecx, edx)
138 | 
139 | 	// Trim leading spaces.
140 | 	for len(data) > 0 && data[0] == ' ' {
141 | 		data = data[1:]
142 | 	}
143 | 
144 | 	// Trim tail after and including the first null byte.
145 | 	for i, c := range data {
146 | 		if c == '\x00' {
147 | 			data = data[:i]
148 | 			break
149 | 		}
150 | 	}
151 | 
152 | 	return string(data)
153 | }
154 | 
155 | func appendBytes(b []byte, args ...uint32) []byte {
156 | 	for _, arg := range args {
157 | 		b = append(b,
158 | 			byte((arg >> 0)),
159 | 			byte((arg >> 8)),
160 | 			byte((arg >> 16)),
161 | 			byte((arg >> 24)))
162 | 	}
163 | 	return b
164 | }
165 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_x86.s:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build 386 || amd64
 6 | // +build 386 amd64
 7 | 
 8 | #include "textflag.h"
 9 | 
10 | // func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
11 | TEXT ·cpuid(SB), NOSPLIT, $0-24
12 | 	MOVL eaxArg+0(FP), AX
13 | 	MOVL ecxArg+4(FP), CX
14 | 	CPUID
15 | 	MOVL AX, eax+8(FP)
16 | 	MOVL BX, ebx+12(FP)
17 | 	MOVL CX, ecx+16(FP)
18 | 	MOVL DX, edx+20(FP)
19 | 	RET
20 | 
21 | // func xgetbv() (eax, edx uint32)
22 | TEXT ·xgetbv(SB),NOSPLIT,$0-8
23 | 	MOVL $0, CX
24 | 	XGETBV
25 | 	MOVL AX, eax+0(FP)
26 | 	MOVL DX, edx+4(FP)
27 | 	RET
28 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/cpu_x86_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | //go:build 386 || amd64
 6 | // +build 386 amd64
 7 | 
 8 | package cpu_test
 9 | 
10 | import (
11 | 	. "internal/cpu"
12 | 	"os"
13 | 	"runtime"
14 | 	"testing"
15 | )
16 | 
17 | func TestX86ifAVX2hasAVX(t *testing.T) {
18 | 	if X86.HasAVX2 && !X86.HasAVX {
19 | 		t.Fatalf("HasAVX expected true when HasAVX2 is true, got false")
20 | 	}
21 | }
22 | 
23 | func TestDisableSSE2(t *testing.T) {
24 | 	runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off")
25 | }
26 | 
27 | func TestSSE2DebugOption(t *testing.T) {
28 | 	MustHaveDebugOptionsSupport(t)
29 | 
30 | 	if os.Getenv("GODEBUG") != "cpu.sse2=off" {
31 | 		t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set")
32 | 	}
33 | 
34 | 	want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386.
35 | 	if got := X86.HasSSE2; got != want {
36 | 		t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got)
37 | 	}
38 | }
39 | 
40 | func TestDisableSSE3(t *testing.T) {
41 | 	runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
42 | }
43 | 
44 | func TestSSE3DebugOption(t *testing.T) {
45 | 	MustHaveDebugOptionsSupport(t)
46 | 
47 | 	if os.Getenv("GODEBUG") != "cpu.sse3=off" {
48 | 		t.Skipf("skipping test: GODEBUG=cpu.sse3=off not set")
49 | 	}
50 | 
51 | 	want := false
52 | 	if got := X86.HasSSE3; got != want {
53 | 		t.Errorf("X86.HasSSE3 expected %v, got %v", want, got)
54 | 	}
55 | }
56 | 


--------------------------------------------------------------------------------
/dsp/internal/cpu/export_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package cpu
 6 | 
 7 | var (
 8 | 	Options = options
 9 | )
10 | 


--------------------------------------------------------------------------------
/dsp/interpolate.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import "math"
  4 | 
  5 | // Linear interpolates using linear interpolation.
  6 | func Linear(samples []float64, x float64) float64 {
  7 | 	var samp float64
  8 | 	low := math.Floor(x)
  9 | 	lowInt := int(low)
 10 | 	if lowInt < len(samples) {
 11 | 		lowValue := samples[lowInt]
 12 | 		var highValue float64
 13 | 		if i := lowInt + 1; i >= len(samples) {
 14 | 			highValue = 0
 15 | 		} else {
 16 | 			highValue = samples[i]
 17 | 		}
 18 | 		samp = lowValue + (x-low)*(highValue-lowValue)
 19 | 	}
 20 | 	return samp
 21 | }
 22 | 
 23 | // LinearF32 interpolates using linear interpolation.
 24 | func LinearF32(samples []float32, x float32) float32 {
 25 | 	if x < 0 || x > math.MaxFloat32 {
 26 | 		return 0
 27 | 	}
 28 | 	var samp float32
 29 | 	low := float32(math.Floor(float64(x)))
 30 | 	lowInt := int(low)
 31 | 	if lowInt < len(samples) {
 32 | 		lowValue := samples[lowInt]
 33 | 		var highValue float32
 34 | 		if i := lowInt + 1; i >= len(samples) {
 35 | 			highValue = 0
 36 | 		} else {
 37 | 			highValue = samples[i]
 38 | 		}
 39 | 		samp = lowValue + (x-low)*(highValue-lowValue)
 40 | 	}
 41 | 	return samp
 42 | }
 43 | 
 44 | // Hermite4p3o interpolates using 4-point, 3rd-order Hermite (x-form)
 45 | func Hermite4p3o(samples []float64, x float64) float64 {
 46 | 	xi := int(x)
 47 | 
 48 | 	var s [4]float64
 49 | 	for i := -1; i <= 2; i++ {
 50 | 		if j := xi + i; j >= 0 && j < len(samples) {
 51 | 			s[i+1] = samples[j]
 52 | 		}
 53 | 	}
 54 | 
 55 | 	x -= math.Floor(x)
 56 | 	c0 := s[1]
 57 | 	c1 := 1.0 / 2.0 * (s[2] - s[0])
 58 | 	c2 := s[0] - 5.0/2.0*s[1] + 2.0*s[2] - 1.0/2.0*s[3]
 59 | 	c3 := 1.0/2.0*(s[3]-s[0]) + 3.0/2.0*(s[1]-s[2])
 60 | 	return ((c3*x+c2)*x+c1)*x + c0
 61 | }
 62 | 
 63 | // Hermite4p3oF32 interpolates using 4-point, 3rd-order Hermite (x-form)
 64 | func Hermite4p3oF32(samples []float32, x float32) float32 {
 65 | 	xi := int(x)
 66 | 
 67 | 	var s [4]float32
 68 | 	for i := -1; i <= 2; i++ {
 69 | 		if j := xi + i; j >= 0 && j < len(samples) {
 70 | 			s[i+1] = float32(samples[j])
 71 | 		}
 72 | 	}
 73 | 
 74 | 	x -= float32(math.Floor(float64(x)))
 75 | 	c0 := s[1]
 76 | 	c1 := 1.0 / 2.0 * (s[2] - s[0])
 77 | 	c2 := s[0] - 5.0/2.0*s[1] + 2.0*s[2] - 1.0/2.0*s[3]
 78 | 	c3 := 1.0/2.0*(s[3]-s[0]) + 3.0/2.0*(s[1]-s[2])
 79 | 	return ((c3*x+c2)*x+c1)*x + c0
 80 | }
 81 | 
 82 | // Optimal2x4p4o interpolates using optimal 2x (4-point, 4th-order) (z-form)
 83 | func Optimal2x4p4o(samples []float64, x float64) float64 {
 84 | 	const middle = 1
 85 | 
 86 | 	xi := int(x)
 87 | 
 88 | 	var s [6]float64
 89 | 	for i := -1; i <= 2; i++ {
 90 | 		if j := xi + i; j >= 0 && j < len(samples) {
 91 | 			s[middle+i] = samples[j]
 92 | 		}
 93 | 	}
 94 | 
 95 | 	even1 := s[middle+1] + s[middle]
 96 | 	odd1 := s[middle+1] - s[middle]
 97 | 	even2 := s[middle+2] + s[middle-1]
 98 | 	odd2 := s[middle+2] - s[middle-1]
 99 | 	c0 := even1*0.45645918406487612 + even2*0.04354173901996461
100 | 	c1 := odd1*0.47236675362442071 + odd2*0.17686613581136501
101 | 	c2 := even1*-0.253674794204558521 + even2*0.25371918651882464
102 | 	c3 := odd1*-0.37917091811631082 + odd2*0.11952965967158000
103 | 	c4 := even1*0.04252164479749607 + even2*-0.04289144034653719
104 | 
105 | 	z := x - math.Floor(x) - 1.0/2.0
106 | 	return (((c4*z+c3)*z+c2)*z+c1)*z + c0
107 | }
108 | 
109 | // Optimal2x4p4oF32 interpolates using optimal 2x (4-point, 4th-order) (z-form)
110 | func Optimal2x4p4oF32(samples []float32, x float32) float32 {
111 | 	const middle = 1
112 | 
113 | 	xi := int(x)
114 | 
115 | 	var s [6]float32
116 | 	for i := -1; i <= 2; i++ {
117 | 		if j := xi + i; j >= 0 && j < len(samples) {
118 | 			s[middle+i] = samples[j]
119 | 		}
120 | 	}
121 | 
122 | 	even1 := s[middle+1] + s[middle]
123 | 	odd1 := s[middle+1] - s[middle]
124 | 	even2 := s[middle+2] + s[middle-1]
125 | 	odd2 := s[middle+2] - s[middle-1]
126 | 	c0 := even1*0.45645918406487612 + even2*0.04354173901996461
127 | 	c1 := odd1*0.47236675362442071 + odd2*0.17686613581136501
128 | 	c2 := even1*-0.253674794204558521 + even2*0.25371918651882464
129 | 	c3 := odd1*-0.37917091811631082 + odd2*0.11952965967158000
130 | 	c4 := even1*0.04252164479749607 + even2*-0.04289144034653719
131 | 
132 | 	z := x - float32(math.Floor(float64(x))) - 1.0/2.0
133 | 	return (((c4*z+c3)*z+c2)*z+c1)*z + c0
134 | }
135 | 
136 | // Optimal2x6p5o interpolates using optimal 2x (6-point, 5th-order) (z-form)
137 | func Optimal2x6p5o(samples []float64, x float64) float64 {
138 | 	const middle = 2
139 | 	xi := int(x)
140 | 
141 | 	var s [6]float64
142 | 	for i := -2; i <= 3; i++ {
143 | 		if j := xi + i; j >= 0 && j < len(samples) {
144 | 			s[middle+i] = samples[j]
145 | 		}
146 | 	}
147 | 
148 | 	even1 := s[middle+1] + s[middle]
149 | 	odd1 := s[middle+1] - s[middle]
150 | 	even2 := s[middle+2] + s[middle-1]
151 | 	odd2 := s[middle+2] - s[middle-1]
152 | 	even3 := s[middle+3] + s[middle-2]
153 | 	odd3 := s[middle+3] - s[middle-2]
154 | 	c0 := even1*0.40513396007145713 + even2*0.09251794438424393 + even3*0.00234806603570670
155 | 	c1 := odd1*0.28342806338906690 + odd2*0.21703277024054901 + odd3*0.01309294748731515
156 | 	c2 := even1*-0.191337682540351941 + even2*0.16187844487943592 + even3*0.02946017143111912
157 | 	c3 := odd1*-0.16471626190554542 + odd2*-0.00154547203542499 + odd3*0.03399271444851909
158 | 	c4 := even1*0.03845798729588149 + even2*-0.05712936104242644 + even3*0.01866750929921070
159 | 	c5 := odd1*0.04317950185225609 + odd2*-0.01802814255926417 + odd3*0.00152170021558204
160 | 
161 | 	z := x - math.Floor(x) - 1.0/2.0
162 | 	return ((((c5*z+c4)*z+c3)*z+c2)*z+c1)*z + c0
163 | }
164 | 
165 | // Optimal2x6p5oF32 interpolates using optimal 2x (6-point, 5th-order) (z-form)
166 | func Optimal2x6p5oF32(samples []float32, x float32) float32 {
167 | 	const middle = 2
168 | 	xi := int(x)
169 | 
170 | 	var s [6]float32
171 | 	for i := -2; i <= 3; i++ {
172 | 		if j := xi + i; j >= 0 && j < len(samples) {
173 | 			s[middle+i] = float32(samples[j])
174 | 		}
175 | 	}
176 | 
177 | 	even1 := s[middle+1] + s[middle]
178 | 	odd1 := s[middle+1] - s[middle]
179 | 	even2 := s[middle+2] + s[middle-1]
180 | 	odd2 := s[middle+2] - s[middle-1]
181 | 	even3 := s[middle+3] + s[middle-2]
182 | 	odd3 := s[middle+3] - s[middle-2]
183 | 	c0 := even1*0.40513396007145713 + even2*0.09251794438424393 + even3*0.00234806603570670
184 | 	c1 := odd1*0.28342806338906690 + odd2*0.21703277024054901 + odd3*0.01309294748731515
185 | 	c2 := even1*-0.191337682540351941 + even2*0.16187844487943592 + even3*0.02946017143111912
186 | 	c3 := odd1*-0.16471626190554542 + odd2*-0.00154547203542499 + odd3*0.03399271444851909
187 | 	c4 := even1*0.03845798729588149 + even2*-0.05712936104242644 + even3*0.01866750929921070
188 | 	c5 := odd1*0.04317950185225609 + odd2*-0.01802814255926417 + odd3*0.00152170021558204
189 | 
190 | 	z := x - float32(math.Floor(float64(x))) - 1.0/2.0
191 | 	return ((((c5*z+c4)*z+c3)*z+c2)*z+c1)*z + c0
192 | }
193 | 


--------------------------------------------------------------------------------
/dsp/math32.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"unsafe"
  6 | )
  7 | 
  8 | // VMulC64xF32 multiplies a vector of complex values with a vector with real values.
  9 | // This is useful for applying a window to complex samples.
 10 | //
 11 | //	output[i] = complex(real(input[i])*mul[i], imag(input[i])*mul[i])
 12 | func VMulC64xF32(input, output []complex64, mul []float32)
 13 | func vMulC64xF32(input, output []complex64, mul []float32) {
 14 | 	n := len(input)
 15 | 	if len(output) < n {
 16 | 		n = len(output)
 17 | 	}
 18 | 	if len(mul) < n {
 19 | 		n = len(mul)
 20 | 	}
 21 | 	for i, v := range input[:n] {
 22 | 		w := mul[i]
 23 | 		output[i] = complex(real(v)*w, imag(v)*w)
 24 | 	}
 25 | }
 26 | 
 27 | // VMulC64 multiplies eache value of the input by the matching value in the multiplier.
 28 | //
 29 | //	output[i] = input[i] * mul[i]
 30 | func VMulC64(input, output, mul []complex64) {
 31 | 	n := len(input)
 32 | 	if len(output) < n {
 33 | 		n = len(output)
 34 | 	}
 35 | 	if len(mul) < n {
 36 | 		n = len(mul)
 37 | 	}
 38 | 	for i, v := range input[:n] {
 39 | 		output[i] = v * mul[i]
 40 | 	}
 41 | }
 42 | 
 43 | func VAddF32(input, output []float32) {
 44 | 	n := len(input)
 45 | 	if len(output) < n {
 46 | 		n = len(output)
 47 | 	}
 48 | 	for i, v := range input[:n] {
 49 | 		output[i] += v
 50 | 	}
 51 | }
 52 | 
 53 | func VAddC64(input, output []complex64) {
 54 | 	n := len(input)
 55 | 	if len(output) < n {
 56 | 		n = len(output)
 57 | 	}
 58 | 	for i, v := range input[:n] {
 59 | 		output[i] += v
 60 | 	}
 61 | }
 62 | 
 63 | func VScaleC64(input, output []complex64, scale float32) {
 64 | 	in := (*[2 << 25]float32)(unsafe.Pointer(&input[0]))[:len(input)*2]
 65 | 	out := (*[2 << 25]float32)(unsafe.Pointer(&output[0]))[:len(output)*2]
 66 | 	VScaleF32(in, out, scale)
 67 | }
 68 | 
 69 | func VScaleF32(input, output []float32, scale float32)
 70 | func vscaleF32(input, output []float32, scale float32) {
 71 | 	n := len(input)
 72 | 	if len(output) < n {
 73 | 		n = len(output)
 74 | 	}
 75 | 	for i, v := range input[:n] {
 76 | 		output[i] = v * scale
 77 | 	}
 78 | }
 79 | 
 80 | func VAbsC64(input []complex64, output []float32)
 81 | func vAbsC64(input []complex64, output []float32) {
 82 | 	n := len(input)
 83 | 	if len(output) < n {
 84 | 		n = len(output)
 85 | 	}
 86 | 	_ = output[n-1] // eliminate bounds check
 87 | 	for i, v := range input[:n] {
 88 | 		output[i] = float32(math.Sqrt(float64(real(v)*real(v) + imag(v)*imag(v))))
 89 | 	}
 90 | }
 91 | 
 92 | // VMaxF32 returns the maximum value from an array of 32-bit floating point values.
 93 | func VMaxF32(input []float32) float32
 94 | func vMaxF32(input []float32) float32 {
 95 | 	max := float32(math.Inf(-1))
 96 | 	for _, v := range input {
 97 | 		if v > max {
 98 | 			max = v
 99 | 		}
100 | 	}
101 | 	return max
102 | }
103 | 
104 | // VMinF32 returns the minimum value from an array of 32-bit floating point values.
105 | func VMinF32(input []float32) float32
106 | func vMinF32(input []float32) float32 {
107 | 	min := float32(math.Inf(1))
108 | 	for _, v := range input {
109 | 		if v < min {
110 | 			min = v
111 | 		}
112 | 	}
113 | 	return min
114 | }
115 | 
116 | func Conj32(x complex64) complex64    { return complex(real(x), -imag(x)) }
117 | func FastPhase32(x complex64) float32 { return FastAtan2(imag(x), real(x)) }
118 | func Phase32(x complex64) float32     { return float32(math.Atan2(float64(imag(x)), float64(real(x)))) }
119 | 
120 | const (
121 | 	pi2  = math.Pi / 2
122 | 	pi4  = math.Pi / 4
123 | 	pi34 = math.Pi * 3 / 4
124 | )
125 | 
126 | // max |error| < 0.01
127 | func FastAtan2(y, x float32) float32
128 | func fastAtan2(y, x float32) float32 {
129 | 	absY := y
130 | 	if absY < 0 {
131 | 		absY = -absY
132 | 	}
133 | 	absY += 1e-20 // kludge to prevent 0/0 condition
134 | 	var angle float32
135 | 	if x < 0.0 {
136 | 		r := (x + absY) / (absY - x)
137 | 		angle = pi34 + (0.1963*r*r-0.9817)*r
138 | 	} else if x > 0.0 {
139 | 		r := (x - absY) / (x + absY)
140 | 		angle = pi4 + (0.1963*r*r-0.9817)*r
141 | 	} else if y < 0.0 {
142 | 		return -pi2
143 | 	} else if y > 0.0 {
144 | 		return pi2
145 | 	} else {
146 | 		return 0.0
147 | 	}
148 | 	if y < 0.0 {
149 | 		return -angle // negate if in quad III or IV
150 | 	}
151 | 	return angle
152 | }
153 | 
154 | // |error| < 0.005
155 | func FastAtan2_2(y, x float32) float32
156 | func fastAtan2_2(y, x float32) float32 {
157 | 	if x == 0.0 {
158 | 		switch {
159 | 		case y > 0.0:
160 | 			return pi2
161 | 		case y < 0.0:
162 | 			return -pi2
163 | 		}
164 | 		return 0.0
165 | 	}
166 | 	z := y / x
167 | 	zz := z * z
168 | 	if zz < 1.0 {
169 | 		atan := z / (1.0 + 0.28*zz)
170 | 		if x < 0.0 {
171 | 			if y < 0.0 {
172 | 				return atan - math.Pi
173 | 			}
174 | 			return atan + math.Pi
175 | 		}
176 | 		return atan
177 | 	}
178 | 	atan := pi2 - z/(zz+0.28)
179 | 	if y < 0.0 {
180 | 		return atan - math.Pi
181 | 	}
182 | 	return atan
183 | }
184 | 


--------------------------------------------------------------------------------
/dsp/math32_386.s:
--------------------------------------------------------------------------------
 1 | TEXT ·FastAtan2(SB), 7, $0
 2 | 	JMP ·fastAtan2(SB)
 3 | 
 4 | TEXT ·FastAtan2_2(SB), 7, $0
 5 | 	JMP ·fastAtan2_2(SB)
 6 | 
 7 | TEXT ·VScaleF32(SB), 7, $0
 8 | 	JMP ·vscaleF32(SB)
 9 | 
10 | TEXT ·VAbsC64(SB), 7, $0
11 | 	JMP ·vAbsC64(SB)
12 | 
13 | TEXT ·VMaxF32(SB), 7, $0
14 | 	JMP ·vMaxF32(SB)
15 | 
16 | TEXT ·VMulC64xF32(SB), 7, $0
17 | 	JMP ·vMulC64xF32(SB)
18 | 


--------------------------------------------------------------------------------
/dsp/math32_amd64.s:
--------------------------------------------------------------------------------
  1 | #include "go_asm.h"
  2 | #include "textflag.h"
  3 | 
  4 | TEXT ·FastAtan2(SB), NOSPLIT, $0
  5 | 	JMP ·fastAtan2(SB)
  6 | 
  7 | TEXT ·FastAtan2_2(SB), NOSPLIT, $0
  8 | 	JMP ·fastAtan2_2(SB)
  9 | 
 10 | TEXT ·VAbsC64(SB), NOSPLIT, $0
 11 | 	JMP ·vAbsC64(SB)
 12 | 
 13 | TEXT ·VMaxF32(SB), NOSPLIT, $0-28
 14 | 	MOVQ input+0(FP), SI
 15 | 	MOVQ input_len+8(FP), CX
 16 | 
 17 | 	MOVL $0xff800000, AX // -InF
 18 | 	MOVL AX, X0
 19 | 
 20 | 	MOVQ $0, DX
 21 | 
 22 | 	//CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
 23 | 	CMPB	·useAVX2(SB), $1
 24 | 	JE 		vmaxf32_avx2
 25 | 	//CMPB 	internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
 26 | 	CMPB	·useSSE2(SB), $1
 27 | 	JE 		vmaxf32_sse2
 28 | 	JMP		vmaxf32_scalar
 29 | 
 30 | vmaxf32_avx2:
 31 | 	MOVQ CX, BX
 32 | 	ANDQ $-32, BX
 33 | 	CMPQ DX, BX
 34 | 	JGE  vmaxf32_scalar
 35 | 
 36 | 	VBROADCASTSS X0, Y0
 37 | 	VMOVUPS	Y0, Y1
 38 | 	VMOVUPS	Y0, Y2
 39 | 	VMOVUPS	Y0, Y3
 40 | 
 41 | vmaxf32_avx2_loop:
 42 | 	VMOVUPS (SI), Y4
 43 | 	VMOVUPS 32(SI), Y5
 44 | 	VMOVUPS 64(SI), Y6
 45 | 	VMOVUPS 96(SI), Y7
 46 | 	VMAXPS  Y4, Y0, Y0
 47 | 	VMAXPS  Y5, Y1, Y1
 48 | 	VMAXPS  Y6, Y2, Y2
 49 | 	VMAXPS  Y7, Y3, Y3
 50 | 	ADDQ    $128, SI
 51 | 	ADDQ    $32, DX
 52 | 	CMPQ    DX, BX
 53 | 	JLT     vmaxf32_avx2_loop
 54 | 
 55 | 	VMAXPS	Y1, Y0, Y0
 56 | 	VMAXPS	Y2, Y0, Y0
 57 | 	VMAXPS	Y3, Y0, Y0
 58 | 	VEXTRACTF128 $1, Y0, X1
 59 | 	MAXPS   X1, X0
 60 | 	MOVHLPS X0, X1
 61 | 	MAXPS	X1, X0
 62 | 	PSHUFD  $0x55, X0, X1
 63 | 	MAXPS   X1, X0
 64 | 	JMP		vmaxf32_scalar
 65 | 
 66 | vmaxf32_sse2:
 67 | 	MOVQ CX, BX
 68 | 	ANDQ $-16, BX
 69 | 	CMPQ DX, BX
 70 | 	JGE  vmaxf32_scalar
 71 | 
 72 | 	PSHUFD	$0, X0, X0
 73 | 	MOVUPS	X0, X1
 74 | 	MOVUPS	X0, X2
 75 | 	MOVUPS	X0, X3
 76 | 
 77 | vmaxf32_sse_loop:
 78 | 	MOVUPS (SI), X4
 79 | 	MOVUPS 16(SI), X5
 80 | 	MOVUPS 32(SI), X6
 81 | 	MOVUPS 48(SI), X7
 82 | 	MAXPS  X4, X0
 83 | 	MAXPS  X5, X1
 84 | 	MAXPS  X6, X2
 85 | 	MAXPS  X7, X3
 86 | 	ADDQ   $64, SI
 87 | 	ADDQ   $16, DX
 88 | 	CMPQ   DX, BX
 89 | 	JLT    vmaxf32_sse_loop
 90 | 
 91 | 	MAXPS	X1, X0
 92 | 	MAXPS	X2, X0
 93 | 	MAXPS	X3, X0
 94 | 	MOVHLPS X0, X1
 95 | 	MAXPS   X1, X0
 96 | 	PSHUFD  $0x55, X0, X1
 97 | 	MAXPS   X1, X0
 98 | 
 99 | vmaxf32_scalar:
100 | 	CMPQ DX, CX
101 | 	JGE  vmaxf32_done
102 | 
103 | vmaxf32_scalar_loop:
104 | 	MOVSS   (SI), X1
105 | 	UCOMISS X0, X1
106 | 	JLS     vmaxf32_not_max
107 | 	MOVO    X1, X0
108 | 
109 | vmaxf32_not_max:
110 | 	ADDQ $4, SI
111 | 	INCQ DX
112 | 	CMPQ DX, CX
113 | 	JLT  vmaxf32_scalar_loop
114 | 
115 | vmaxf32_done:
116 | 	MOVSS X0, ret+24(FP)
117 | 	RET
118 | 
119 | TEXT ·VMinF32(SB), NOSPLIT, $0-28
120 | 	MOVQ input+0(FP), SI
121 | 	MOVQ input_len+8(FP), CX
122 | 
123 | 	MOVL $0x7f800000, AX // InF
124 | 	MOVL AX, X0
125 | 
126 | 	MOVQ $0, DX
127 | 
128 | 	//CMPB	·x86+const_offsetX86HasAVX2(SB), $1
129 | 	CMPB	·useAVX2(SB), $1
130 | 	JE 		vminf32_avx2
131 | 	//CMPB	·x86+const_offsetX86HasSSE2(SB), $1
132 | 	CMPB	·useSSE2(SB), $1
133 | 	JE 		vminf32_sse2
134 | 	JMP		vminf32_scalar
135 | 
136 | vminf32_avx2:
137 | 	MOVQ CX, BX
138 | 	ANDQ $-32, BX
139 | 	CMPQ DX, BX
140 | 	JGE  vminf32_scalar
141 | 
142 | 	VBROADCASTSS X0, Y0
143 | 	VMOVUPS	Y0, Y1
144 | 	VMOVUPS	Y0, Y2
145 | 	VMOVUPS	Y0, Y3
146 | 
147 | vminf32_avx2_loop:
148 | 	VMOVUPS (SI), Y4
149 | 	VMOVUPS 32(SI), Y5
150 | 	VMOVUPS 64(SI), Y6
151 | 	VMOVUPS 96(SI), Y7
152 | 	VMINPS  Y4, Y0, Y0
153 | 	VMINPS  Y5, Y1, Y1
154 | 	VMINPS  Y6, Y2, Y2
155 | 	VMINPS  Y7, Y3, Y3
156 | 	ADDQ    $128, SI
157 | 	ADDQ    $32, DX
158 | 	CMPQ    DX, BX
159 | 	JLT     vminf32_avx2_loop
160 | 
161 | 	VMINPS	Y1, Y0, Y0
162 | 	VMINPS	Y2, Y0, Y0
163 | 	VMINPS	Y3, Y0, Y0
164 | 	VEXTRACTF128 $1, Y0, X1
165 | 	MINPS   X1, X0
166 | 	MOVHLPS X0, X1
167 | 	MINPS	X1, X0
168 | 	PSHUFD  $0x55, X0, X1
169 | 	MINPS   X1, X0
170 | 	JMP		vminf32_scalar
171 | 
172 | vminf32_sse2:
173 | 	MOVQ CX, BX
174 | 	ANDQ $-16, BX
175 | 	CMPQ DX, BX
176 | 	JGE  vminf32_scalar
177 | 
178 | 	PSHUFD	$0, X0, X0
179 | 	MOVUPS	X0, X1
180 | 	MOVUPS	X0, X2
181 | 	MOVUPS	X0, X3
182 | 
183 | vminf32_sse_loop:
184 | 	MOVUPS (SI), X4
185 | 	MOVUPS 16(SI), X5
186 | 	MOVUPS 32(SI), X6
187 | 	MOVUPS 48(SI), X7
188 | 	MINPS  X4, X0
189 | 	MINPS  X5, X1
190 | 	MINPS  X6, X2
191 | 	MINPS  X7, X3
192 | 	ADDQ   $64, SI
193 | 	ADDQ   $16, DX
194 | 	CMPQ   DX, BX
195 | 	JLT    vminf32_sse_loop
196 | 
197 | 	MINPS	X1, X0
198 | 	MINPS	X2, X0
199 | 	MINPS	X3, X0
200 | 	MOVHLPS X0, X1
201 | 	MINPS   X1, X0
202 | 	PSHUFD  $0x55, X0, X1
203 | 	MINPS   X1, X0
204 | 
205 | vminf32_scalar:
206 | 	CMPQ DX, CX
207 | 	JGE  vminf32_done
208 | 
209 | vminf32_scalar_loop:
210 | 	MOVSS   (SI), X1
211 | 	UCOMISS X1, X0
212 | 	JLS     vminf32_not_min
213 | 	MOVO    X1, X0
214 | 
215 | vminf32_not_min:
216 | 	ADDQ $4, SI
217 | 	INCQ DX
218 | 	CMPQ DX, CX
219 | 	JLT  vminf32_scalar_loop
220 | 
221 | vminf32_done:
222 | 	MOVSS X0, ret+24(FP)
223 | 	RET
224 | 
225 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0
226 | 	JMP ·vMulC64xF32(SB)
227 | 
228 | TEXT ·VScaleF32(SB), NOSPLIT, $0
229 | 	MOVQ   input+0(FP), SI
230 | 	MOVQ   input_len+8(FP), AX
231 | 	MOVQ   output+24(FP), DI
232 | 	MOVQ   output_len+32(FP), CX
233 | 	MOVSS  scale+48(FP), X8
234 | 	PSHUFD $0, X8, X8
235 | 
236 | 	CMPQ AX, CX
237 | 	JGE  vscalef32_min_len
238 | 	MOVQ AX, CX
239 | vscalef32_min_len:
240 | 	MOVQ CX, DX
241 | 
242 | 	MOVQ $0, AX
243 | 
244 | 	//CMPB	·x86+const_offsetX86HasAVX2(SB), $1
245 | 	CMPB	·useAVX2(SB), $1
246 | 	JE 		vscalef32_avx2
247 | 	//CMPB	·x86+const_offsetX86HasSSE2(SB), $1
248 | 	CMPB	·useSSE2(SB), $1
249 | 	JE 		vscalef32_sse2
250 | 	JMP		vscalef32_scalar
251 | 
252 | vscalef32_avx2:
253 | 	MOVQ CX, DX
254 | 	ANDQ $(~63), CX
255 | 	CMPQ AX, CX
256 | 	JGE  vscalef32_scalar
257 | 
258 | 	VBROADCASTSS X8, Y8
259 | 
260 | vscalef32_avx2_loop:
261 | 	VMOVUPS (SI), Y0
262 | 	VMOVUPS 32(SI), Y1
263 | 	VMOVUPS 64(SI), Y2
264 | 	VMOVUPS 96(SI), Y3
265 | 	VMULPS  Y8, Y0, Y0
266 | 	VMULPS  Y8, Y1, Y1
267 | 	VMULPS  Y8, Y2, Y2
268 | 	VMULPS  Y8, Y3, Y3
269 | 	VMOVUPS Y0, (DI)
270 | 	VMOVUPS Y1, 32(DI)
271 | 	VMOVUPS Y2, 64(DI)
272 | 	VMOVUPS Y3, 96(DI)
273 | 	ADDQ   $32, AX
274 | 	ADDQ   $128, SI
275 | 	ADDQ   $128, DI
276 | 	CMPQ   AX, CX
277 | 	JLT    vscalef32_avx2_loop
278 | 
279 | 	JMP    vscalef32_scalar
280 | 
281 | vscalef32_sse2:
282 | 	MOVQ CX, DX
283 | 	ANDQ $(~31), CX
284 | 	CMPQ AX, CX
285 | 	JGE  vscalef32_scalar
286 | 
287 | vscalef32_sse2_loop:
288 | 	MOVUPS (SI), X0
289 | 	MOVUPS 16(SI), X1
290 | 	MOVUPS 32(SI), X2
291 | 	MOVUPS 48(SI), X3
292 | 	MULPS  X8, X0
293 | 	MULPS  X8, X1
294 | 	MULPS  X8, X2
295 | 	MULPS  X8, X3
296 | 	MOVUPS X0, (DI)
297 | 	MOVUPS X1, 16(DI)
298 | 	MOVUPS X2, 32(DI)
299 | 	MOVUPS X3, 48(DI)
300 | 	ADDQ   $16, AX
301 | 	ADDQ   $64, SI
302 | 	ADDQ   $64, DI
303 | 	CMPQ   AX, CX
304 | 	JLT    vscalef32_sse2_loop
305 | 
306 | vscalef32_scalar:
307 | 	CMPQ AX, DX
308 | 	JGE  vscalef32_done
309 | 
310 | vscalef32_scalar_loop:
311 | 	MOVSS (SI), X0
312 | 	MULSS X8, X0
313 | 	MOVSS X0, (DI)
314 | 	INCQ  AX
315 | 	ADDQ  $4, SI
316 | 	ADDQ  $4, DI
317 | 	CMPQ  AX, CX
318 | 	JLT   vscalef32_scalar_loop
319 | 
320 | vscalef32_done:
321 | 	RET
322 | 


--------------------------------------------------------------------------------
/dsp/math32_arm.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | #define pi $3.14159265358979323846264338327950288419716939937510582097494459
  4 | #define halfPi $1.570796326794896557998981734272092580795288085938
  5 | #define negativeHalfPi $-1.570796326794896557998981734272092580795288085938
  6 | 
  7 | #define vmrs_APSR_nzcv_fpscr WORD $0xeef1fa10
  8 | 
  9 | // Uses F0, F1, F2, F3, F4, F6
 10 | TEXT ·FastAtan2(SB), NOSPLIT, $-4
 11 | 	MOVF y+0(FP), F6
 12 | 	MOVF x+4(FP), F4
 13 | 
 14 | 	ABSF F6, F2
 15 | 
 16 | 	MOVF $1e-20, F0
 17 | 	ADDF F0, F2
 18 | 
 19 | 	WORD $0xeeb54ac0   // vcmpe.f32 s8, #0x0
 20 | 	vmrs_APSR_nzcv_fpscr
 21 | 	BGT  fatan2_pos_x
 22 | 	BEQ  fatan2_zero_x
 23 | 
 24 | 	ADDF F2, F4, F1             // x + abs(y)
 25 | 	SUBF F4, F2, F4             // abs(y) - x
 26 | 	MOVF $2.356194496154785, F3 // pi * 3/4
 27 | 	B    fatan2_2
 28 | 
 29 | fatan2_pos_x:
 30 | 	SUBF F2, F4, F1              // x - abs(y)
 31 | 	ADDF F2, F4, F4              // abs(y) + x
 32 | 	MOVF $0.7853981852531433, F3 // pi * 1/4
 33 | 
 34 | fatan2_2:
 35 | 	DIVF F4, F1, F2
 36 | 
 37 | 	MOVF $0.1963, F1
 38 | 	MULF F2, F1
 39 | 	MULF F2, F1
 40 | 	MOVF $0.9817, F0
 41 | 	SUBF F0, F1
 42 | 	MULF F2, F1
 43 | 	ADDF F3, F1
 44 | 
 45 | 	WORD $0xeeb56ac0   // vcmpe.f32 s12, #0x0
 46 | 	vmrs_APSR_nzcv_fpscr
 47 | 	WORD $0xbeb11a41   // vneglt.f32 s2, s2
 48 | 	MOVF F1, ret+8(FP)
 49 | 	RET
 50 | 
 51 | fatan2_zero_x:
 52 | 	WORD $0xeeb56ac0   // vcmpe.f32 s12, #0x0
 53 | 	vmrs_APSR_nzcv_fpscr
 54 | 	BGT  fatan2_pos_y
 55 | 	BLT  fatan2_neg_y
 56 | 	MOVF F6, ret+8(FP)
 57 | 	RET
 58 | 
 59 | fatan2_neg_y:
 60 | 	MOVF negativeHalfPi, F6
 61 | 	MOVF F6, ret+8(FP)
 62 | 	RET
 63 | 
 64 | fatan2_pos_y:
 65 | 	MOVF halfPi, F6
 66 | 	MOVF F6, ret+8(FP)
 67 | 	RET
 68 | 
 69 | // Uses F0, F1, F2, F3, F4, F6
 70 | TEXT ·FastAtan2_2(SB), NOSPLIT, $-4
 71 | 	MOVF x+4(FP), F6
 72 | 	MOVF y+0(FP), F3
 73 | 	WORD $0xeeb56ac0    // vcmpe.f32 s12, #0x0
 74 | 	vmrs_APSR_nzcv_fpscr
 75 | 	BEQ  fatan22_zero_x
 76 | 
 77 | 	// y / x
 78 | 	DIVF F6, F3, F1
 79 | 	MULF F1, F1, F2
 80 | 	MOVF $1.0, F0
 81 | 
 82 | 	// CMPF F0, F2
 83 | 	WORD $0xeeb42ac0 // vcmpe.f32 s4, s0
 84 | 	vmrs_APSR_nzcv_fpscr
 85 | 	BGT  fatan22_5
 86 | 
 87 | 	// z / (1.0 + 0.28*z*z)
 88 | 	MOVF    $0.28, F4
 89 | 	MULF    F4, F2
 90 | 	ADDF    F0, F2
 91 | 	DIVF    F2, F1, F2
 92 | 	WORD    $0xeeb56ac0 // vcmpe.f32 s12, #0x0
 93 | 	vmrs_APSR_nzcv_fpscr
 94 | 	BGE     fatan22_6
 95 | 	MOVF    pi, F1
 96 | 	WORD    $0xeeb53ac0 // vcmpe.f32 s6, #0x0
 97 | 	vmrs_APSR_nzcv_fpscr
 98 | 	SUBF.LT F1, F2
 99 | 	ADDF.GE F1, F2
100 | 
101 | fatan22_6:
102 | 	MOVF F2, ret+8(FP)
103 | 	RET
104 | 
105 | fatan22_5:
106 | 	// pi2 - z/(z*z+0.28)
107 | 	MOVF    $0.28, F4
108 | 	ADDF    F4, F2
109 | 	DIVF    F2, F1, F2
110 | 	MOVF    halfPi, F1
111 | 	SUBF    F2, F1, F2
112 | 	MOVF    pi, F1
113 | 	WORD    $0xeeb53ac0   // vcmpe.f32 s6, #0x0
114 | 	vmrs_APSR_nzcv_fpscr
115 | 	SUBF.LT F1, F2
116 | 	MOVF    F2, ret+8(FP)
117 | 	RET
118 | 
119 | fatan22_zero_x:
120 | 	WORD $0xeeb53ac0 // vcmpe.f32 s6, #0x0
121 | 	vmrs_APSR_nzcv_fpscr
122 | 
123 | 	// MOVF.LT	negativeHalfPi, F6
124 | 	// MOVF.GT	halfPi, F6
125 | 	// MOVF	F6, ret+8(FP)
126 | 	// RET
127 | 	BGT  fatan22_pi2
128 | 	BLT  fatan22_neg_pi2
129 | 	MOVF F6, ret+8(FP)
130 | 	RET
131 | 
132 | fatan22_neg_pi2:
133 | 	MOVF negativeHalfPi, F6
134 | 	MOVF F6, ret+8(FP)
135 | 	RET
136 | 
137 | fatan22_pi2:
138 | 	MOVF halfPi, F6
139 | 	MOVF F6, ret+8(FP)
140 | 	RET
141 | 
142 | TEXT ·VScaleF32(SB), NOSPLIT, $0
143 | 	MOVW input+0(FP), R0
144 | 	MOVW input_len+4(FP), R2
145 | 	MOVW output+12(FP), R1
146 | 	MOVW output_len+16(FP), R3
147 | 	MOVF scale+24(FP), F0
148 | 
149 | 	// Choose the shortest length
150 | 	CMP     R2, R3
151 | 	MOVW.LT R3, R2
152 | 
153 | 	TEQ 	$0, R2
154 | 	BEQ 	vscalef32_done
155 | 
156 | 	MOVBU 	·HaveNEON+0(SB), R3
157 | 	CMP   	$0, R3
158 | 	BEQ   	vscalef32_scalar_loop
159 | 
160 | 	CMP 	$16, R2
161 | 	BLT 	vscalef32_scalar_loop
162 | 
163 | 	PLD 	(R0)
164 | vscalef32_neon_loop:
165 | 	PLD		(4*16)(R0)
166 | 	WORD	$0xecb02b10 // vldmia r0!, {q1, q2, q3, q4}
167 | 	WORD	$0xf3a22940 // vmul.f32 q1, q1, d0[0]
168 | 	WORD	$0xf3a44940 // vmul.f32 q2, q2, d0[0]
169 | 	WORD	$0xf3a66940 // vmul.f32 q3, q3, d0[0]
170 | 	WORD	$0xf3a88940 // vmul.f32 q4, q4, d0[0]
171 | 	WORD	$0xeca12b10 // vstmia r1!, {q1, q2, q3, q4}
172 | 	SUB		$16, R2
173 | 	CMP  	$16, R2
174 | 	BGE  	vscalef32_neon_loop
175 | 
176 | vscalef32_scalar:
177 | 	TEQ $0, R2
178 | 	BEQ vscalef32_done
179 | 
180 | vscalef32_scalar_loop:
181 | 	MOVF 	(R0), F1
182 | 	ADD  	$4, R0
183 | 	MULF 	F0, F1, F1
184 | 	MOVF 	F1, (R1)
185 | 	ADD  	$4, R1
186 | 	SUB     $1, R2
187 | 	TEQ     $0, R2
188 | 	BNE     vscalef32_scalar_loop
189 | 
190 | vscalef32_done:
191 | 	RET
192 | 
193 | 
194 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0
195 | 	B ·vMulC64xF32(SB)
196 | 
197 | TEXT ·VAbsC64(SB), NOSPLIT, $0
198 | 	MOVW input+0(FP), R0
199 | 	MOVW output+12(FP), R1
200 | 	MOVW input_len+4(FP), R2
201 | 	MOVW output_len+16(FP), R3
202 | 
203 | 	// Choose the shortest length
204 | 	CMP     R2, R3
205 | 	MOVW.LT R3, R2
206 | 
207 | 	// If no input then skip loop
208 | 	CMP $0, R2
209 | 	BEQ vabsc64_done
210 | 
211 | 	MOVBU ·UseVector+0(SB), R3
212 | 	TEQ   $0, R3
213 | 	BEQ   vabsc64_scalar_loop
214 | 
215 | 	CMP $4, R2
216 | 	BLT vabsc64_scalar_loop
217 | 
218 | 	PLD (R0)
219 | 	PLD 64(R0)
220 | 	PLD (2*64)(R0)
221 | 	PLD (3*64)(R0)
222 | 
223 | 	// Set vector length to 4 and stride to 2
224 | 	WORD $0xeef13a10            // vmrs r3, fpscr
225 | 	BIC  $((7<<16)|(3<<20)), R3
226 | 	ORR  $((3<<16)|(1<<20)), R3
227 | 	WORD $0xeee13a10            // fmxr fpscr, r3
228 | 
229 | vabsc64_vector_loop:
230 | 	PLD (4*64)(R0)
231 | 
232 | 	WORD $0xecb04a08 // vldmia r0!, {s8-s15}
233 | 	WORD $0xee244a04 // vmul.f32 s8, s8, s8
234 | 	WORD $0xee044aa4 // vmla.f32 s8, s9, s9
235 | 	WORD $0xeeb14ac4 // vsqrt.f32 s8, s8
236 | 	WORD $0xed814a00 // vstr s8, [r1]
237 | 	WORD $0xed815a01 // vstr s10, [r1, #0x4]
238 | 	WORD $0xed816a02 // vstr s12, [r1, #0x8]
239 | 	WORD $0xed817a03 // vstr s14, [r1, #0xc]
240 | 	ADD  $16, R1
241 | 
242 | 	SUB $4, R2
243 | 	CMP $4, R2
244 | 	BGE vabsc64_vector_loop
245 | 
246 | 	// Clear vector mode
247 | 	WORD $0xeef13a10            // vmrs r3, fpscr
248 | 	BIC  $((7<<16)|(3<<20)), R3
249 | 	WORD $0xeee13a10            // fmxr fpscr, r3
250 | 
251 | 	TEQ $0, R2
252 | 	BEQ vabsc64_done
253 | 
254 | vabsc64_scalar_loop:
255 | 	MOVF  0(R0), F0           // real
256 | 	MOVF  4(R0), F1           // imag
257 | 	ADD   $8, R0
258 | 	MULF  F0, F0
259 | 	MULF  F1, F1
260 | 	ADDF  F1, F0
261 | 	SQRTF F0, F0
262 | 	MOVF  F0, 0(R1)
263 | 	ADD   $4, R1
264 | 	SUB   $1, R2
265 | 	TEQ   $0, R2
266 | 	BNE   vabsc64_scalar_loop
267 | 
268 | vabsc64_done:
269 | 	RET
270 | 
271 | TEXT ·VMaxF32(SB), 7, $0
272 | 	MOVW input+0(FP), R0
273 | 	MOVW input_len+4(FP), R2
274 | 
275 | 	MOVW $0xff800000, R1
276 | 	MOVW R1, F4
277 | 
278 | 	CMP $0, R2
279 | 	BEQ vmaxf32_done
280 | 
281 | 	MOVBU 	·HaveNEON+0(SB), R3
282 | 	CMP   	$0, R3
283 | 	BEQ   	vmaxf32_batch
284 | 
285 | 	CMP 	$16, R2
286 | 	BLT 	vmaxf32_batch
287 | 
288 | 	WORD	$0xecb08b08 // vldmia r0!, {q4,q5}
289 | 	SUB		$8, R2
290 | 
291 | 	//PLD 	(R0)
292 | vmaxf32_neon_loop:
293 | 	//PLD		(12*16)(R0)
294 | 	WORD	$0xecb02b08 // vldmia r0!, {q1, q2}
295 | 	WORD	$0xf2088f42 // vmax.f32 q4, q4, q1
296 | 	WORD	$0xf20aaf44 // vmax.f32 q5, q5, q2
297 | 	SUB		$8, R2
298 | 	CMP  	$8, R2
299 | 	BGE  	vmaxf32_neon_loop
300 | 
301 | 	WORD	$0xf2080f4a // vmax.f32 q0, q4, q5
302 | 	WORD	$0xf3004f01 // vpmax.f32 d4, d0, d1
303 | 	WORD	$0xf3044f04 // vpmax.f32 d4, d4, d4
304 | 
305 | 	B       vmaxf32_scalar
306 | 
307 | vmaxf32_batch:
308 | 	CMP $4, R2
309 | 	BLT vmaxf32_scalar_loop
310 | 
311 | 	PLD (R0)
312 | 	PLD 64(R0)
313 | 	PLD (2*64)(R0)
314 | 
315 | vmaxf32_batch_loop:
316 | 	PLD  (3*64)(R0)
317 | 	WORD $0xecb00a04        // vldmia r0!, {s0-s3}
318 | 	WORD $0xeeb40ac4        // vcmpe.f32 s0, s8
319 | 	vmrs_APSR_nzcv_fpscr
320 | 	WORD $0xceb04a40        // vmovgt.f32 s8, s0
321 | 	WORD $0xeef40ac4        // vcmpe.f32 s1, s8
322 | 	vmrs_APSR_nzcv_fpscr
323 | 	WORD $0xceb04a60        // vmovgt.f32 s8, s1
324 | 	WORD $0xeeb41ac4        // vcmpe.f32 s2, s8
325 | 	vmrs_APSR_nzcv_fpscr
326 | 	WORD $0xceb04a41        // vmovgt.f32 s8, s2
327 | 	WORD $0xeef41ac4        // vcmpe.f32 s3, s8
328 | 	vmrs_APSR_nzcv_fpscr
329 | 	WORD $0xceb04a61        // vmovgt.f32 s8, s3
330 | 	SUB  $4, R2
331 | 	CMP  $4, R2
332 | 	BGE  vmaxf32_batch_loop
333 | 
334 | vmaxf32_scalar:
335 | 	TEQ $0, R2
336 | 	BEQ vmaxf32_done
337 | 
338 | vmaxf32_scalar_loop:
339 | 	MOVF 0(R0), F1
340 | 	ADD  $4, R0
341 | 
342 | 	// CMPF    F4, F1
343 | 	WORD    $0xeeb41ac4         // vcmpe.f32 s2, s8
344 | 	vmrs_APSR_nzcv_fpscr
345 | 	MOVF.GT F1, F4
346 | 	SUB     $1, R2
347 | 	TEQ     $0, R2
348 | 	BNE     vmaxf32_scalar_loop
349 | 
350 | vmaxf32_done:
351 | 	MOVF F4, ret+12(FP)
352 | 	RET
353 | 


--------------------------------------------------------------------------------
/dsp/math32_arm64.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | #define Inf32 0x7f800000
  4 | #define NegInf32 0xff800000
  5 | 
  6 | TEXT ·FastAtan2(SB), NOSPLIT, $0
  7 |     B ·fastAtan2(SB)
  8 | 
  9 | TEXT ·FastAtan2_2(SB), NOSPLIT, $0
 10 |     B ·fastAtan2_2(SB)
 11 | 
 12 | TEXT ·VScaleF32(SB), NOSPLIT, $0
 13 | 	MOVD	input(FP), R0
 14 | 	MOVD	input_len+8(FP), R1
 15 | 	MOVD	output+24(FP), R2
 16 | 	MOVD	output_len+32(FP), R3
 17 |     FMOVS   scale+48(FP), F0
 18 | 
 19 |     CMP     R3, R1
 20 |     BLT     vscalef32_min_len
 21 |     MOVD    R3, R1
 22 | vscalef32_min_len:
 23 | 
 24 | #define BLOCK_SIZE 16
 25 | 
 26 |     CMP     $BLOCK_SIZE, R1
 27 |     BLT     vscalef32_scaler
 28 | vscalef32_simd_loop:
 29 |     //VLD1    (R0), [V1.S4]
 30 |     //WORD    $0x3dc00001 // ldr q1, [x0]
 31 |     //ADD     $16, R0
 32 | 
 33 |     //WORD    $0xf9802003 // prfm pldl2strm, [x0, 64]
 34 |     //WORD    $0xf980a000 // prfm pldl1keep, [x0, 320]
 35 |     //WORD    $0xf980a001 // prfm pldl1strm, [x0, 320]
 36 |     WORD    $0xf9804001 // prfm pldl1strm, [x0, 128]
 37 |     //WORD    $0xf9804000 // prfm pldl1keep, [x0, 128]
 38 | 
 39 |     //WORD    $0x4c402801 // ld1 {v1.4s,v2.4s,v3.4s,v4.4s}, [x0]
 40 |     WORD    $0xad400801 // ldp q1, q2, [x0]
 41 |     WORD    $0xad411003 // ldp q3, q4, [x0,32]
 42 |     //WORD    $0xad421805 // ldp q5, q6, [x0,64]
 43 |     ADD     $(BLOCK_SIZE*4), R0
 44 | 
 45 |     WORD    $0x4f809021    // fmul v1.4s, v1.4s, v0.s[0]
 46 |     WORD    $0x4f809042    // fmul v2.4s, v2.4s, v0.s[0]
 47 |     WORD    $0x4f809063    // fmul v3.4s, v3.4s, v0.s[0]
 48 |     WORD    $0x4f809084    // fmul v4.4s, v4.4s, v0.s[0]
 49 |     //WORD    $0x4f8090a5    // fmul v5.4s, v5.4s, v0.s[0]
 50 |     //WORD    $0x4f8090c6    // fmul v6.4s, v6.4s, v0.s[0]
 51 | 
 52 |     //VST1    [V1.S4], (R2)
 53 |     //WORD    $0x3d800041 // str q1, [x2]
 54 |     //ADD     $16, R2
 55 | 
 56 |     //WORD    $0x4c00a841 // st1 {v1.4s,v2.4s}, [x2]
 57 |     WORD    $0xad000841 // stp q1, q2, [x2]
 58 |     WORD    $0xad011043 // stp q3, q4, [x2,32]
 59 |     //WORD    $0xad021845 // stp q5, q6, [x2,64]
 60 |     ADD     $(BLOCK_SIZE*4), R2
 61 | 
 62 |     SUB     $BLOCK_SIZE, R1
 63 |     CMP     $BLOCK_SIZE, R1
 64 |     BGE     vscalef32_simd_loop
 65 | 
 66 | vscalef32_scaler:
 67 |     CMP     ZR, R1
 68 |     BEQ     vscalef32_done
 69 | vscalef32_scaler_loop:
 70 | 	FMOVS.P	4(R0), F1
 71 |     FMULS   F0, F1, F1
 72 |     FMOVS.P F1, 4(R2)
 73 |     SUBS    $1, R1
 74 | 	BNE     vscalef32_scaler_loop
 75 | vscalef32_done:
 76 | 	RET
 77 | 
 78 | TEXT ·VMulC64xF32(SB), NOSPLIT, $0
 79 | 	B ·vMulC64xF32(SB)
 80 | 
 81 | TEXT ·VAbsC64(SB), NOSPLIT, $0
 82 |     B ·vAbsC64(SB)
 83 | 
 84 | TEXT ·VMaxF32(SB), NOSPLIT, $0
 85 | 	MOVD	input(FP), R0
 86 | 	MOVD	input_len+8(FP), R1
 87 | 	MOVW    $NegInf32, R2
 88 | 	FMOVS   R2, F31
 89 | 
 90 | #undef BLOCK_SIZE
 91 | #define BLOCK_SIZE 16
 92 | 
 93 |     CMP     $(8+BLOCK_SIZE), R1
 94 |     BLT     vmaxf32_scaler
 95 | 
 96 |     //VLD1.P  16(R0), [V0.S4] // ld1 {v0.4s}, [x0], #16
 97 |     WORD    $0xad401c00 // ldp q0, q7, [x0]
 98 |     ADD     $(BLOCK_SIZE/2*4), R0
 99 |     SUB     $(BLOCK_SIZE/2), R1
100 | vmaxf32_simd_loop:
101 |     //VLD1.P    (R0), [V1.S4,V2.S4,V3.S4,V4.S4]
102 |     // ldp is faster than vld1
103 |     WORD    $0xad400801     // ldp q1, q2, [x0]
104 |     WORD    $0xad411003     // ldp q3, q4, [x0,32]
105 |     ADD     $(BLOCK_SIZE*4), R0
106 |     WORD    $0x4e21f400     // fmax v0.4s, v0.4s, v1.4s
107 |     WORD    $0x4e23f4e7     // fmax v7.4s, v7.4s, v3.4s
108 |     WORD    $0x4e22f400     // fmax v0.4s, v0.4s, v2.4s
109 |     WORD    $0x4e24f4e7     // fmax v7.4s, v7.4s, v4.4s
110 |     SUB     $BLOCK_SIZE, R1
111 |     CMP     $BLOCK_SIZE, R1
112 |     BGE     vmaxf32_simd_loop
113 |     WORD    $0x6e30f81e     // fmaxv s30, v0.4s
114 |     WORD    $0x6e30f8ff     // fmaxv s31, v7.4s
115 |     FMAXS   F31, F30, F31
116 | 
117 | vmaxf32_scaler:
118 |     CMP     ZR, R1
119 |     BEQ     vmaxf32_done
120 | vmaxf32_loop:
121 | 	//FMOVS.P	(R0), F1
122 |     FMOVS   (R0), F1
123 |     ADD     $4, R0
124 |     FMAXS   F31, F1, F31
125 |     SUBS    $1, R1
126 | 	BNE     vmaxf32_loop
127 | vmaxf32_done:
128 | 	FMOVS	F31, ret+24(FP)
129 | 	RET
130 | 
131 | TEXT ·VMinF32(SB), NOSPLIT, $0
132 | 	MOVD	input(FP), R0
133 | 	MOVD	input_len+8(FP), R1
134 | 	MOVW    $Inf32, R2
135 | 	FMOVS   R2, F31
136 | 
137 | #undef BLOCK_SIZE
138 | #define BLOCK_SIZE 16
139 | 
140 |     CMP     $(8+BLOCK_SIZE), R1
141 |     BLT     vmaxf32_scaler
142 | 
143 |     //VLD1.P  16(R0), [V0.S4] // ld1 {v0.4s}, [x0], #16
144 |     WORD    $0xad401c00 // ldp q0, q7, [x0]
145 |     ADD     $(BLOCK_SIZE/2*4), R0
146 |     SUB     $(BLOCK_SIZE/2), R1
147 | vmaxf32_simd_loop:
148 |     //VLD1.P    (R0), [V1.S4,V2.S4,V3.S4,V4.S4]
149 |     // ldp is faster than vld1
150 |     WORD    $0xad400801     // ldp q1, q2, [x0]
151 |     WORD    $0xad411003     // ldp q3, q4, [x0,32]
152 |     ADD     $(BLOCK_SIZE*4), R0
153 |     WORD    $0x4ea1f400     // fmin v0.4s, v0.4s, v1.4s
154 |     WORD    $0x4ea3f4e7     // fmin v7.4s, v7.4s, v3.4s
155 |     WORD    $0x4ea2f400     // fmin v0.4s, v0.4s, v2.4s
156 |     WORD    $0x4ea4f4e7     // fmin v7.4s, v7.4s, v4.4s
157 |     SUB     $BLOCK_SIZE, R1
158 |     CMP     $BLOCK_SIZE, R1
159 |     BGE     vmaxf32_simd_loop
160 |     WORD    $0x6eb0f81e     // fminv s30, v0.4s
161 |     WORD    $0x6eb0f8ff     // fminv s31, v7.4s
162 |     FMINS   F31, F30, F31
163 | 
164 | vmaxf32_scaler:
165 |     CMP     ZR, R1
166 |     BEQ     vmaxf32_done
167 | vmaxf32_loop:
168 | 	//FMOVS.P	(R0), F1
169 |     FMOVS   (R0), F1
170 |     ADD     $4, R0
171 |     FMINS   F31, F1, F31
172 |     SUBS    $1, R1
173 | 	BNE     vmaxf32_loop
174 | vmaxf32_done:
175 | 	FMOVS	F31, ret+24(FP)
176 | 	RET
177 | 


--------------------------------------------------------------------------------
/dsp/math32_test.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"math/rand"
  6 | 	"testing"
  7 | )
  8 | 
  9 | const (
 10 | 	approxErrorLimit = 0.011
 11 | )
 12 | 
 13 | var (
 14 | 	atanBenchTable      = [][2]float32{}
 15 | 	atanBenchTableFixed = [][2]int{}
 16 | )
 17 | 
 18 | func init() {
 19 | 	for y := -1.0; y <= 1.0; y += 0.5 {
 20 | 		for x := -1.0; x <= 1.0; x += 0.5 {
 21 | 			atanBenchTable = append(atanBenchTable, [2]float32{float32(x), float32(y)})
 22 | 			atanBenchTableFixed = append(atanBenchTableFixed, [2]int{int(x * (1 << 14)), int(y * (1 << 14))})
 23 | 		}
 24 | 	}
 25 | }
 26 | 
 27 | func TestAtan2(t *testing.T) {
 28 | 	for y := -1.0; y <= 1.0; y += 0.01 {
 29 | 		for x := -1.0; x <= 1.0; x += 0.01 {
 30 | 			expected := float32(math.Atan2(y, x))
 31 | 			if err := math.Abs(float64(expected - FastAtan2(float32(y), float32(x)))); err > approxErrorLimit {
 32 | 				t.Errorf("FastAtan2 gave an error of %f for x=%f y=%f", err, x, y)
 33 | 			}
 34 | 			if err := math.Abs(float64(expected - FastAtan2_2(float32(y), float32(x)))); err > approxErrorLimit {
 35 | 				t.Errorf("FastAtan2_2 gave an error of %f for x=%f y=%f", err, x, y)
 36 | 			}
 37 | 		}
 38 | 	}
 39 | 	x, y := 0.0, 0.0
 40 | 	expected := float32(math.Atan2(y, x))
 41 | 	if err := math.Abs(float64(expected - FastAtan2(float32(y), float32(x)))); err > approxErrorLimit {
 42 | 		t.Errorf("FastAtan2 gave an error of %f for x=%f y=%f", err, x, y)
 43 | 	}
 44 | 	if err := math.Abs(float64(expected - FastAtan2_2(float32(y), float32(x)))); err > approxErrorLimit {
 45 | 		t.Errorf("FastAtan2_2 gave an error of %f for x=%f y=%f", err, x, y)
 46 | 	}
 47 | }
 48 | 
 49 | func TestFastAtan2Error(t *testing.T) {
 50 | 	maxE := 0.0
 51 | 	sumE := 0.0
 52 | 	count := 0
 53 | 	for y := -1.0; y <= 1.0; y += 0.01 {
 54 | 		for x := -1.0; x <= 1.0; x += 0.01 {
 55 | 			ai := float64(FastAtan2(float32(y), float32(x)))
 56 | 			af := math.Atan2(y, x)
 57 | 			e := math.Abs(ai - af)
 58 | 			sumE += e
 59 | 			if e > maxE {
 60 | 				maxE = e
 61 | 			}
 62 | 			count++
 63 | 		}
 64 | 	}
 65 | 	if maxE > 0.0102 {
 66 | 		t.Errorf("Expected max error of 0.0102 got %f", maxE)
 67 | 	}
 68 | 	t.Logf("Max error %f\n", maxE)
 69 | 	t.Logf("Mean absolute error %f", sumE/float64(count))
 70 | }
 71 | 
 72 | func TestFastAtan2_2Error(t *testing.T) {
 73 | 	maxE := 0.0
 74 | 	sumE := 0.0
 75 | 	count := 0
 76 | 	for y := -1.0; y <= 1.0; y += 0.01 {
 77 | 		for x := -1.0; x <= 1.0; x += 0.01 {
 78 | 			ai := float64(FastAtan2_2(float32(y), float32(x)))
 79 | 			af := math.Atan2(y, x)
 80 | 			e := math.Abs(ai - af)
 81 | 			sumE += e
 82 | 			if e > maxE {
 83 | 				maxE = e
 84 | 			}
 85 | 			count++
 86 | 		}
 87 | 	}
 88 | 	if maxE > 0.005 {
 89 | 		t.Errorf("Expected max error of 0.005 got %f", maxE)
 90 | 	}
 91 | 	t.Logf("Max error %f\n", maxE)
 92 | 	t.Logf("Mean absolute error %f", sumE/float64(count))
 93 | }
 94 | 
 95 | func TestVScaleF32(t *testing.T) {
 96 | 	simdTest(t, func(t *testing.T) {
 97 | 		input := make([]float32, 257)
 98 | 		for i := 0; i < len(input); i++ {
 99 | 			input[i] = float32(i)
100 | 		}
101 | 		expected := make([]float32, len(input))
102 | 		output := make([]float32, len(input))
103 | 		vscaleF32(input, expected, 1.0/256.0)
104 | 		VScaleF32(input, output, 1.0/256.0)
105 | 		for i, v := range expected {
106 | 			if output[i] != v {
107 | 				t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
108 | 			}
109 | 		}
110 | 
111 | 		// Unaligned
112 | 		input = input[1:]
113 | 		expected = make([]float32, len(input)+1)[1:]
114 | 		output = make([]float32, len(input)+1)[1:]
115 | 		vscaleF32(input, expected, 1.0/256.0)
116 | 		VScaleF32(input, output, 1.0/256.0)
117 | 		for i, v := range expected {
118 | 			if output[i] != v {
119 | 				t.Fatalf("Output doesn't match expected:\n%+v\n%+v", output, expected)
120 | 			}
121 | 		}
122 | 	})
123 | }
124 | 
125 | func TestVAbsC64(t *testing.T) {
126 | 	input := []complex64{
127 | 		complex(0.0, 0.0),
128 | 		complex(1.0, 1.0),
129 | 		complex(1.3, -2.7),
130 | 		complex(0.0, -1.0),
131 | 		complex(1.0, 0.0),
132 | 		complex(-2.3, 1.9),
133 | 	}
134 | 	expected := make([]float32, len(input))
135 | 	for i, v := range input {
136 | 		expected[i] = float32(math.Sqrt(float64(real(v)*real(v) + imag(v)*imag(v))))
137 | 	}
138 | 	output := make([]float32, len(input))
139 | 	VAbsC64(input, output)
140 | 	for i, v := range output {
141 | 		if !approxEqual32(v, expected[i], 1e-20) {
142 | 			t.Errorf("Expected %+v got %+v for %+v", expected[i], v, input[i])
143 | 		}
144 | 	}
145 | }
146 | 
147 | func TestVMaxF32(t *testing.T) {
148 | 	simdTest(t, func(t *testing.T) {
149 | 		input := make([]float32, 123)
150 | 		for i := 0; i < len(input); i++ {
151 | 			input[i] = rand.Float32() - 0.5
152 | 		}
153 | 		expected := vMaxF32(input)
154 | 		max := VMaxF32(input)
155 | 		if max != expected {
156 | 			t.Fatalf("Expected %f got %f", expected, max)
157 | 		}
158 | 
159 | 		// Test SIMD by having max in each specific lane
160 | 		for i := 0; i < 1024; i++ {
161 | 			input := make([]float32, 1024)
162 | 			input[i] = 1.0
163 | 			if max := VMaxF32(input); max != 1.0 {
164 | 				t.Fatalf("Expected 1.0 got %f at position %d", max, i)
165 | 			}
166 | 		}
167 | 
168 | 		// Ascending
169 | 		input = []float32{-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}
170 | 		if max := VMaxF32(input); max != 4.0 {
171 | 			t.Fatalf("Expected 4.0 got %f", max)
172 | 		}
173 | 
174 | 		// Descending
175 | 		input = []float32{4.0, 3.0, 2.0, 1.0, 0.0, -1.0, -2.0, -3.0, -4.0}
176 | 		if max := VMaxF32(input); max != 4.0 {
177 | 			t.Fatalf("Expected 4.0 got %f", max)
178 | 		}
179 | 
180 | 		// Unordered
181 | 		input = []float32{1.5, -4.0, 8.0, 0.0, -1.0, 2.0, -3.0}
182 | 		if max := VMaxF32(input); max != 8.0 {
183 | 			t.Fatalf("Expected 8.0 got %f", max)
184 | 		}
185 | 	})
186 | }
187 | 
188 | func TestVMinF32(t *testing.T) {
189 | 	simdTest(t, func(t *testing.T) {
190 | 		input := make([]float32, 123)
191 | 		for i := 0; i < len(input); i++ {
192 | 			input[i] = rand.Float32() - 0.5
193 | 		}
194 | 		expected := vMinF32(input)
195 | 		min := VMinF32(input)
196 | 		if min != expected {
197 | 			t.Fatalf("Expected %f got %f", expected, min)
198 | 		}
199 | 
200 | 		// Test SIMD by having min in each specific lane
201 | 		for i := 0; i < 1024; i++ {
202 | 			input := make([]float32, 1024)
203 | 			input[i] = -1.0
204 | 			if min := VMinF32(input); min != -1.0 {
205 | 				t.Fatalf("Expected -1.0 got %f at position %d", min, i)
206 | 			}
207 | 		}
208 | 
209 | 		// Ascending
210 | 		input = []float32{-4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0}
211 | 		if min := VMinF32(input); min != -4.0 {
212 | 			t.Fatalf("Expected -4.0 got %f", min)
213 | 		}
214 | 
215 | 		// Descending
216 | 		input = []float32{4.0, 3.0, 2.0, 1.0, 0.0, -1.0, -2.0, -3.0, -4.0}
217 | 		if min := VMinF32(input); min != -4.0 {
218 | 			t.Fatalf("Expected -4.0 got %f", min)
219 | 		}
220 | 
221 | 		// Unordered
222 | 		input = []float32{1.5, -4.0, 8.0, 0.0, -1.0, 2.0, -3.0}
223 | 		if min := VMinF32(input); min != -4.0 {
224 | 			t.Fatalf("Expected -4.0 got %f", min)
225 | 		}
226 | 	})
227 | }
228 | 
229 | func BenchmarkConj32(b *testing.B) {
230 | 	in := complex64(complex(1.0, -0.2))
231 | 	for i := 0; i < b.N; i++ {
232 | 		_ = Conj32(in)
233 | 	}
234 | }
235 | 
236 | func BenchmarkFastAtan2(b *testing.B) {
237 | 	for i := 0; i < b.N; i++ {
238 | 		for _, xy := range atanBenchTable {
239 | 			FastAtan2(xy[1], xy[0])
240 | 		}
241 | 	}
242 | }
243 | 
244 | func BenchmarkFastAtan2_Go(b *testing.B) {
245 | 	for i := 0; i < b.N; i++ {
246 | 		for _, xy := range atanBenchTable {
247 | 			fastAtan2(xy[1], xy[0])
248 | 		}
249 | 	}
250 | }
251 | 
252 | func BenchmarkFastAtan2_2(b *testing.B) {
253 | 	for i := 0; i < b.N; i++ {
254 | 		for _, xy := range atanBenchTable {
255 | 			FastAtan2_2(xy[1], xy[0])
256 | 		}
257 | 	}
258 | }
259 | 
260 | func BenchmarkFastAtan2_2_Go(b *testing.B) {
261 | 	for i := 0; i < b.N; i++ {
262 | 		for _, xy := range atanBenchTable {
263 | 			fastAtan2_2(xy[1], xy[0])
264 | 		}
265 | 	}
266 | }
267 | 
268 | func BenchmarkAtan2(b *testing.B) {
269 | 	for i := 0; i < b.N; i++ {
270 | 		for _, xy := range atanBenchTable {
271 | 			math.Atan2(float64(xy[1]), float64(xy[0]))
272 | 		}
273 | 	}
274 | }
275 | 
276 | func BenchmarkVScaleF32(b *testing.B) {
277 | 	input := make([]float32, benchSize)
278 | 	output := make([]float32, len(input))
279 | 	b.SetBytes(benchSize)
280 | 	b.ResetTimer()
281 | 	for i := 0; i < b.N; i++ {
282 | 		VScaleF32(input, output, 1.0/benchSize)
283 | 	}
284 | }
285 | 
286 | func BenchmarkVScaleF32_Go(b *testing.B) {
287 | 	input := make([]float32, benchSize)
288 | 	output := make([]float32, len(input))
289 | 	b.SetBytes(benchSize)
290 | 	b.ResetTimer()
291 | 	for i := 0; i < b.N; i++ {
292 | 		vscaleF32(input, output, 1.0/benchSize)
293 | 	}
294 | }
295 | 
296 | func BenchmarkVAbsC64(b *testing.B) {
297 | 	input := make([]complex64, benchSize)
298 | 	output := make([]float32, len(input))
299 | 	b.SetBytes(benchSize)
300 | 	b.ResetTimer()
301 | 	for i := 0; i < b.N; i++ {
302 | 		VAbsC64(input, output)
303 | 	}
304 | }
305 | 
306 | func BenchmarkVAbsC64_Go(b *testing.B) {
307 | 	input := make([]complex64, benchSize)
308 | 	output := make([]float32, len(input))
309 | 	b.SetBytes(benchSize)
310 | 	b.ResetTimer()
311 | 	for i := 0; i < b.N; i++ {
312 | 		vAbsC64(input, output)
313 | 	}
314 | }
315 | 
316 | func BenchmarkVMaxF32_Random(b *testing.B) {
317 | 	input := make([]float32, benchSize)
318 | 	r := rand.New(rand.NewSource(0))
319 | 	for i := 0; i < len(input); i++ {
320 | 		input[i] = r.Float32()
321 | 	}
322 | 	b.SetBytes(benchSize)
323 | 	b.ResetTimer()
324 | 	for i := 0; i < b.N; i++ {
325 | 		_ = VMaxF32(input)
326 | 	}
327 | }
328 | 
329 | func BenchmarkVMaxF32_Ascending(b *testing.B) {
330 | 	input := make([]float32, benchSize)
331 | 	for i := 0; i < len(input); i++ {
332 | 		input[i] = float32(i)
333 | 	}
334 | 	b.SetBytes(benchSize)
335 | 	b.ResetTimer()
336 | 	for i := 0; i < b.N; i++ {
337 | 		_ = VMaxF32(input)
338 | 	}
339 | }
340 | 
341 | func BenchmarkVMaxF32_Descending(b *testing.B) {
342 | 	input := make([]float32, benchSize)
343 | 	for i := 0; i < len(input); i++ {
344 | 		input[i] = float32(-i)
345 | 	}
346 | 	b.SetBytes(benchSize)
347 | 	b.ResetTimer()
348 | 	for i := 0; i < b.N; i++ {
349 | 		_ = VMaxF32(input)
350 | 	}
351 | }
352 | 
353 | func BenchmarkVMaxF32_Alternating(b *testing.B) {
354 | 	input := make([]float32, benchSize)
355 | 	for i := 0; i < len(input); i++ {
356 | 		if i&1 == 0 {
357 | 			input[i] = float32(i)
358 | 		} else {
359 | 			input[i] = float32(-i)
360 | 		}
361 | 	}
362 | 	b.SetBytes(benchSize)
363 | 	b.ResetTimer()
364 | 	for i := 0; i < b.N; i++ {
365 | 		_ = VMaxF32(input)
366 | 	}
367 | }
368 | 
369 | func BenchmarkVMaxF32_Go_Random(b *testing.B) {
370 | 	input := make([]float32, benchSize)
371 | 	r := rand.New(rand.NewSource(0))
372 | 	for i := 0; i < len(input); i++ {
373 | 		input[i] = r.Float32()
374 | 	}
375 | 	b.SetBytes(benchSize)
376 | 	b.ResetTimer()
377 | 	for i := 0; i < b.N; i++ {
378 | 		_ = vMaxF32(input)
379 | 	}
380 | }
381 | 
382 | func BenchmarkVMaxF32_Go_Ascending(b *testing.B) {
383 | 	input := make([]float32, benchSize)
384 | 	for i := 0; i < len(input); i++ {
385 | 		input[i] = float32(i)
386 | 	}
387 | 	b.SetBytes(benchSize)
388 | 	b.ResetTimer()
389 | 	for i := 0; i < b.N; i++ {
390 | 		_ = vMaxF32(input)
391 | 	}
392 | }
393 | 
394 | func BenchmarkVMaxF32_Go_Decending(b *testing.B) {
395 | 	input := make([]float32, benchSize)
396 | 	for i := 0; i < len(input); i++ {
397 | 		input[i] = float32(-i)
398 | 	}
399 | 	b.SetBytes(benchSize)
400 | 	b.ResetTimer()
401 | 	for i := 0; i < b.N; i++ {
402 | 		_ = vMaxF32(input)
403 | 	}
404 | }
405 | 
406 | func BenchmarkVMaxF32_Go_Alternating(b *testing.B) {
407 | 	input := make([]float32, benchSize)
408 | 	for i := 0; i < len(input); i++ {
409 | 		if i&1 == 0 {
410 | 			input[i] = float32(i)
411 | 		} else {
412 | 			input[i] = float32(-i)
413 | 		}
414 | 	}
415 | 	b.SetBytes(benchSize)
416 | 	b.ResetTimer()
417 | 	for i := 0; i < b.N; i++ {
418 | 		_ = vMaxF32(input)
419 | 	}
420 | }
421 | 


--------------------------------------------------------------------------------
/dsp/mathfixed.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import "math"
 4 | 
 5 | const (
 6 | 	fixedPi   = 1 << 14
 7 | 	fixedPi4  = fixedPi / 4
 8 | 	fixedPi34 = 3 * fixedPi / 4
 9 | )
10 | 
11 | const (
12 | 	atanLUTSize = 131072 // 512 KiB
13 | 	atanLUTCoef = 8
14 | )
15 | 
16 | var atanLUT []int
17 | 
18 | func init() {
19 | 	atanLUT = make([]int, atanLUTSize)
20 | 	for i := 0; i < atanLUTSize; i++ {
21 | 		atanLUT[i] = int(math.Atan(float64(i)/float64(1<<atanLUTCoef)) / math.Pi * (1 << 14))
22 | 	}
23 | }
24 | 
25 | func Atan2LUT(y, x int) int {
26 | 	if x == 0 {
27 | 		switch {
28 | 		case y > 0:
29 | 			return 1 << 13
30 | 		case y < 0:
31 | 			return -(1 << 13)
32 | 		}
33 | 		return 0
34 | 	}
35 | 
36 | 	t := (y << atanLUTCoef) / x
37 | 	if t == 0 {
38 | 		switch {
39 | 		case x > 0:
40 | 			return 0
41 | 		case y < 0:
42 | 			return -(1 << 14)
43 | 		}
44 | 		return 1 << 14
45 | 	}
46 | 
47 | 	if t >= atanLUTSize || -t >= atanLUTSize {
48 | 		if y > 0 {
49 | 			return 1 << 13
50 | 		}
51 | 		return -(1 << 13)
52 | 	}
53 | 
54 | 	if t > 0 {
55 | 		if y > 0 {
56 | 			return atanLUT[t]
57 | 		}
58 | 		return atanLUT[t] - (1 << 14)
59 | 	}
60 | 	if y > 0 {
61 | 		return (1 << 14) - atanLUT[-t]
62 | 	}
63 | 	return -atanLUT[-t]
64 | }
65 | 
66 | func FastAtan2Fixed(y, x int) int {
67 | 	if x == 0 && y == 0 {
68 | 		return 0
69 | 	}
70 | 
71 | 	yAbs := y
72 | 	if yAbs < 0 {
73 | 		yAbs = -yAbs
74 | 	}
75 | 
76 | 	var angle int
77 | 	if x >= 0 {
78 | 		angle = fixedPi4 - fixedPi4*(x-yAbs)/(x+yAbs)
79 | 	} else {
80 | 		angle = fixedPi34 - fixedPi4*(x+yAbs)/(yAbs-x)
81 | 	}
82 | 	if y < 0 {
83 | 		return -angle
84 | 	}
85 | 	return angle
86 | }
87 | 


--------------------------------------------------------------------------------
/dsp/mathfixed_arm.s:
--------------------------------------------------------------------------------
  1 | #include "textflag.h"
  2 | 
  3 | // TEXT ·Atan2LUT(SB),NOSPLIT,$0
  4 | // 	MOVW	x+4(FP), R4
  5 | // 	MOVW	y+0(FP), R3
  6 | // 	MOVW	$0, R0
  7 | // 	TEQ	$0, R4
  8 | // 	BNE	L1
  9 | // 	CMP	$0, R3
 10 | // 	BLE	L2
 11 | // 	MOVW	$8192, R0
 12 | // 	MOVW	R0, res+8(FP)
 13 | // 	RET
 14 | // L2:
 15 | // 	CMP	$0, R3
 16 | // 	BGE	L3
 17 | // 	MOVW	$-8192, R0
 18 | // 	MOVW	R0, res+8(FP)
 19 | // 	RET
 20 | // L3:
 21 | // 	MOVW	$0, R0
 22 | // 	MOVW	R0, res+8(FP)
 23 | // 	RET
 24 | // L1:
 25 | // 	MOVW	R3<<8, R0
 26 | // 	MOVW	0(R13), R11
 27 | // 	MOVW.W	R11, -8(R13)
 28 | // 	MOVW	R4, 4(R13)
 29 | // 	MOVW	R0, R11
 30 | // 	BL	_div(SB)
 31 | // 	MOVW	R11, R2
 32 | // 	ADD	$8, R13
 33 | // 	TEQ	$0, R2
 34 | // 	BNE	L4
 35 | // 	CMP	$0, R4
 36 | // 	BLE	L5
 37 | // 	MOVW	$0,R0
 38 | // 	MOVW	R0, res+8(FP)
 39 | // 	RET
 40 | // L5:
 41 | // 	CMP	$0, R3,
 42 | // 	BGE	L6
 43 | // 	MOVW	$-16384, R0
 44 | // 	MOVW	R0, res+8(FP)
 45 | // 	RET
 46 | // L6:
 47 | // 	MOVW	$16384,R0
 48 | // 	MOVW	R0, res+8(FP)
 49 | // 	RET
 50 | // L4:
 51 | // 	MOVW	$131072, R4
 52 | // 	CMP	R4, R2,
 53 | // 	BGE	L7
 54 | // 	RSB	$0, R2, R4
 55 | // 	MOVW	$131072, R5
 56 | // 	CMP	R5, R4
 57 | // 	BGE	L7
 58 | // 	CMP	$0, R2
 59 | // 	BLE	L8
 60 | // 	CMP	$0, R3
 61 | // 	BLE	L9
 62 | // 	MOVW	$atanLUT+0(SB), R0
 63 | // // 	MOVW	R2, R1
 64 | // // 	MOVW	4(R0), R2
 65 | // // 	CMP	R2, R1
 66 | // // 	BLO	L10
 67 | // // 	PCDATA	$1,$0
 68 | // // 	BL	,runtime.panicindex(SB)
 69 | // // L10:
 70 | // 	MOVW	0(R0), R0
 71 | // 	MOVW	R1<<2(R0), R1
 72 | // 	MOVW	R1, res+8(FP)
 73 | // 	RET
 74 | // L9:
 75 | // 	MOVW	$atanLUT+0(SB),R0
 76 | // // 0x0120 00288 MOVW	R2,R1
 77 | // // 0x0124 00292 MOVW	4(R0),R2
 78 | // // 0x0128 00296 CMP	R2,R1,
 79 | // // 0x012c 00300 BLO	,312
 80 | // // 0x0130 00304 PCDATA	$1,$0
 81 | // // 0x0130 00304 BL	,runtime.panicindex(SB)
 82 | // // 0x0134 00308 UNDEF	,
 83 | // 	MOVW	0(R0),R0
 84 | // 	MOVW	R1<<2(R0),R0
 85 | // 	SUB	$16384,R0
 86 | // 	MOVW	R0, res+8(FP)
 87 | // 	RET
 88 | // L8:
 89 | // 	CMP	$0, R3,
 90 | // 	BLE	L10
 91 | // 	RSB	$0,R2,R1
 92 | // 	MOVW	$atanLUT+0(SB),R0
 93 | // // 0x015c 00348 MOVW	4(R0),R2
 94 | // // 0x0160 00352 CMP	R2,R1,
 95 | // // 0x0164 00356 BLO	,368
 96 | // // 0x0168 00360 PCDATA	$1,$0
 97 | // // 0x0168 00360 BL	,runtime.panicindex(SB)
 98 | // // 0x016c 00364 UNDEF	,
 99 | // 	MOVW	0(R0),R0
100 | // 	MOVW	R1<<2(R0),R0
101 | // 	MOVW	$16384,R1
102 | // 	SUB	R0,R1
103 | // 	MOVW	R1, res+8(FP)
104 | // 	RET
105 | // L10:
106 | // 	RSB	$0,R2,R1
107 | // 	MOVW	$atanLUT+0(SB),R0
108 | // // 0x0190 00400 MOVW	4(R0),R2
109 | // // 0x0194 00404 CMP	R2,R1,
110 | // // 0x0198 00408 BLO	,420
111 | // // 0x019c 00412 PCDATA	$1,$0
112 | // // 0x019c 00412 BL	,runtime.panicindex(SB)
113 | // // 0x01a0 00416 UNDEF	,
114 | // 	MOVW	0(R0),R0
115 | // 	MOVW	R1<<2(R0),R0
116 | // 	RSB	$0,R0
117 | // 	MOVW	R0, res+8(FP)
118 | // 	RET
119 | // L7:
120 | // 	CMP	$0, R3
121 | // 	BLE	L11
122 | // 	MOVW	$8192,R0
123 | // 	MOVW	R0, res+8(FP)
124 | // 	RET
125 | // L11:
126 | // 	MOVW	$-8192, R0
127 | // 	MOVW	R0, res+8(FP)
128 | // 	RET
129 | // // 0x01dc 00476 WORD	,$-8192
130 | // // 0x01e0 00480 WORD	,$-16384
131 | // // 0x01e4 00484 WORD	,$"".atanLUT+0(SB)
132 | 
133 | // TEXT ·FastAtan2Fixed(SB),NOSPLIT,$0-12
134 | // 	MOVW	y+0(FP), R5
135 | // 	MOVW	x+4(FP), R4
136 | // 	TEQ	$0, R4
137 | // 	BNE	fatan2fixed_1
138 | // 	TEQ	$0, R5
139 | // 	BNE	fatan2fixed_1
140 | // 	MOVW	R4, res+8(FP)
141 | // 	RET
142 | // fatan2fixed_1:
143 | // 	MOVW	R5, R3 // yAbs = y
144 | // 	CMP	$0, R5
145 | // 	RSB.LT	$0, R3, R3 // if yAbs < 0 : yAbs = -yAbs
146 | // 	CMP	$0, R4
147 | // 	BLT	fatan2fixed_2
148 | 
149 | // 	SUB	R3, R4, R11
150 | // 	MOVW	R11<<12, R11
151 | // 	ADD	R3, R4, R1
152 | 
153 | // 	MOVW	0(R13), R0
154 | // 	MOVW.W	R0, -8(R13)
155 | // 	MOVW	R1, 4(R13)
156 | // 	BL	_div(SB)
157 | // 	ADD	$8, R13
158 | 
159 | // 	MOVW	$4096, R1
160 | // 	SUB	R11, R1, R2
161 | // 	CMP	$0, R5
162 | // 	RSB.LT	$0, R2, R2
163 | // 	MOVW	R2, res+8(FP)
164 | // 	RET
165 | // fatan2fixed_2:
166 | // 	ADD	R3, R4, R11
167 | // 	MOVW	R11<<12, R11
168 | // 	SUB	R4, R3, R1
169 | 
170 | // 	MOVW	0(R13), R0
171 | // 	MOVW.W	R0, -8(R13)
172 | // 	MOVW	R1, 4(R13)
173 | // 	BL	_div(SB)
174 | // 	ADD	$8, R13
175 | 
176 | // 	MOVW	$12288, R1
177 | // 	SUB	R11, R1, R2
178 | // 	CMP	$0, R5
179 | // 	RSB.LT	$0, R2, R2
180 | // 	MOVW	R2, res+8(FP)
181 | // 	RET
182 | 
183 | 


--------------------------------------------------------------------------------
/dsp/mathfixed_test.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"math"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestFastAtan2FixedError(t *testing.T) {
 9 | 	maxE := 0.0
10 | 	sumE := 0.0
11 | 	count := 0
12 | 	for y := -32768; y < 32768; y += 64 {
13 | 		for x := -32768; x < 32768; x += 64 {
14 | 			ai := float64(FastAtan2Fixed(y, x)) * math.Pi / (1 << 14)
15 | 			af := math.Atan2(float64(y), float64(x))
16 | 			e := math.Abs(ai - af)
17 | 			sumE += e
18 | 			if e > maxE {
19 | 				maxE = e
20 | 			}
21 | 			count++
22 | 		}
23 | 	}
24 | 	if maxE > 0.08 {
25 | 		t.Errorf("Expected max error of 0.08 got %f", maxE)
26 | 	}
27 | 	t.Logf("Max error %f\n", maxE)
28 | 	t.Logf("Mean absolute error %f", sumE/float64(count))
29 | }
30 | 
31 | func TestAtan2LUTError(t *testing.T) {
32 | 	maxE := 0.0
33 | 	sumE := 0.0
34 | 	count := 0
35 | 	for y := -32768; y < 32768; y += 64 {
36 | 		for x := -32768; x < 32768; x += 64 {
37 | 			ai := float64(Atan2LUT(y, x)) * math.Pi / (1 << 14)
38 | 			af := math.Atan2(float64(y), float64(x))
39 | 			e := math.Abs(ai - af)
40 | 			sumE += e
41 | 			if e > maxE {
42 | 				maxE = e
43 | 			}
44 | 			count++
45 | 		}
46 | 	}
47 | 	if maxE > 0.005 {
48 | 		t.Errorf("Expected max error of 0.005 got %f", maxE)
49 | 	}
50 | 	t.Logf("Max error %f\n", maxE)
51 | 	t.Logf("Mean absolute error %f", sumE/float64(count))
52 | }
53 | 
54 | func BenchmarkFastAtan2Fixed(b *testing.B) {
55 | 	for i := 0; i < b.N; i++ {
56 | 		for _, xy := range atanBenchTableFixed {
57 | 			FastAtan2Fixed(xy[1], xy[0])
58 | 		}
59 | 	}
60 | }
61 | 
62 | func BenchmarkAtan2LUT(b *testing.B) {
63 | 	for i := 0; i < b.N; i++ {
64 | 		for _, xy := range atanBenchTableFixed {
65 | 			Atan2LUT(xy[1], xy[0])
66 | 		}
67 | 	}
68 | }
69 | 


--------------------------------------------------------------------------------
/dsp/sdft.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"math/cmplx"
  6 | )
  7 | 
  8 | // TODO: damping
  9 | 
 10 | // SDFT is a sliding DFT.
 11 | type SDFT struct {
 12 | 	i int
 13 | 	w []complex128
 14 | 	s []complex128
 15 | 	x []complex128
 16 | 	e []complex128
 17 | }
 18 | 
 19 | func NewSDFT(k, n int, window []float64) *SDFT {
 20 | 	var win []complex128
 21 | 	if len(window) == 0 {
 22 | 		win = []complex128{complex(1, 0)}
 23 | 	} else {
 24 | 		win = make([]complex128, len(window))
 25 | 		for i, w := range window {
 26 | 			win[i] = complex(w, 0)
 27 | 		}
 28 | 	}
 29 | 	s := &SDFT{
 30 | 		w: win,
 31 | 		x: make([]complex128, n),
 32 | 		e: make([]complex128, len(win)),
 33 | 		s: make([]complex128, len(win)),
 34 | 	}
 35 | 	for i := 0; i < len(win); i++ {
 36 | 		j := k - len(win)/2 + i
 37 | 		if j < 0 {
 38 | 			j += n
 39 | 		} else if j >= n {
 40 | 			j -= n
 41 | 		}
 42 | 		s.e[i] = cmplx.Exp(complex(0, 2*math.Pi*float64(j)/float64(n)))
 43 | 	}
 44 | 	return s
 45 | }
 46 | 
 47 | func (sd *SDFT) Filter(x complex128) complex128 {
 48 | 	i := (sd.i + 1) % len(sd.x)
 49 | 	x0 := sd.x[i]
 50 | 	sd.x[i] = x
 51 | 	sd.i = i
 52 | 	xd := x - x0
 53 | 	var sum complex128
 54 | 	for i, w := range sd.w {
 55 | 		s := (xd + sd.s[i]) * sd.e[i]
 56 | 		sd.s[i] = s
 57 | 		sum += w * s
 58 | 	}
 59 | 	return sum
 60 | }
 61 | 
 62 | // SDFT32 is a 32-bit float version of a sliding DFT.
 63 | type SDFT32 struct {
 64 | 	i int
 65 | 	w []complex64
 66 | 	s []complex64
 67 | 	x []complex64
 68 | 	e []complex64
 69 | }
 70 | 
 71 | func NewSDFT32(k, n int, window []float32) *SDFT32 {
 72 | 	var win []complex64
 73 | 	if len(window) == 0 {
 74 | 		win = []complex64{complex(1, 0)}
 75 | 	} else {
 76 | 		win = make([]complex64, len(window))
 77 | 		for i, w := range window {
 78 | 			win[i] = complex(w, 0)
 79 | 		}
 80 | 	}
 81 | 	s := &SDFT32{
 82 | 		w: win,
 83 | 		x: make([]complex64, n),
 84 | 		e: make([]complex64, len(win)),
 85 | 		s: make([]complex64, len(win)),
 86 | 	}
 87 | 	for i := 0; i < len(win); i++ {
 88 | 		j := k - len(win)/2 + i
 89 | 		if j < 0 {
 90 | 			j += n
 91 | 		} else if j >= n {
 92 | 			j -= n
 93 | 		}
 94 | 		s.e[i] = complex64(cmplx.Exp(complex(0, 2*math.Pi*float64(j)/float64(n))))
 95 | 	}
 96 | 	return s
 97 | }
 98 | 
 99 | func (sd *SDFT32) Filter(x complex64) complex64 {
100 | 	i := (sd.i + 1) % len(sd.x)
101 | 	x0 := sd.x[i]
102 | 	sd.x[i] = x
103 | 	sd.i = i
104 | 	xd := x - x0
105 | 	var sum complex64
106 | 	for i, w := range sd.w {
107 | 		s := (xd + sd.s[i]) * sd.e[i]
108 | 		sd.s[i] = s
109 | 		sum += w * s
110 | 	}
111 | 	return sum
112 | }
113 | 


--------------------------------------------------------------------------------
/dsp/stub_windows.go:
--------------------------------------------------------------------------------
1 | // Code generated by command: go run conversion_avo_amd64.go -out conversion_avo_amd64.s -stubs stub_windows.go. DO NOT EDIT.
2 | 
3 | package dsp
4 | 
5 | // Ui8tof32 converts unsigned 8-bit samples to 32-bit float.
6 | func Ui8tof32(input []byte, output []float32)
7 | 


--------------------------------------------------------------------------------
/dsp/util.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | type ComplexSource interface {
 4 | 	Source() ([]complex64, error)
 5 | }
 6 | 
 7 | type RealSink interface {
 8 | 	Sink([]float32) error
 9 | }
10 | 
11 | type ComplexFilter interface {
12 | 	Filter([]complex64) ([]complex64, error)
13 | }
14 | 
15 | type Demodulator interface {
16 | 	Demodulate(input []complex64, output []float32) (int, error)
17 | }
18 | 
19 | type Rotate90Filter struct {
20 | }
21 | 
22 | func (fi *Rotate90Filter) Filter(samples []complex64) []complex64 {
23 | 	return rotate90FilterAsm(fi, samples)
24 | }
25 | 
26 | func rotate90FilterAsm(fi *Rotate90Filter, samples []complex64) []complex64
27 | 
28 | func rotate90Filter(fi *Rotate90Filter, samples []complex64) []complex64 {
29 | 	for i := 0; i < len(samples); i += 4 {
30 | 		samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1]))
31 | 		samples[i+2] = -samples[i+2]
32 | 		samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3]))
33 | 	}
34 | 	return samples
35 | }
36 | 
37 | type I32Rotate90Filter struct {
38 | }
39 | 
40 | func (fi *I32Rotate90Filter) Filter(samples []int32) []int32 {
41 | 	return i32Rotate90FilterAsm(fi, samples)
42 | }
43 | 
44 | func i32Rotate90FilterAsm(fi *I32Rotate90Filter, samples []int32) []int32
45 | 
46 | func i32Rotate90Filter(fi *I32Rotate90Filter, samples []int32) []int32 {
47 | 	for i := 0; i < len(samples); i += 8 {
48 | 		samples[i+2], samples[i+3] = -samples[i+3], samples[i+2]
49 | 		samples[i+4] = -samples[i+4]
50 | 		samples[i+5] = -samples[i+5]
51 | 		samples[i+6], samples[i+7] = samples[i+7], -samples[i+6]
52 | 	}
53 | 	return samples
54 | }
55 | 
56 | func rtoc(r []float64) []complex128 {
57 | 	c := make([]complex128, len(r))
58 | 	for i, v := range r {
59 | 		c[i] = complex(v, 0)
60 | 	}
61 | 	return c
62 | }
63 | 
64 | func rtoc32(r []float32) []complex64 {
65 | 	c := make([]complex64, len(r))
66 | 	for i, v := range r {
67 | 		c[i] = complex(v, 0)
68 | 	}
69 | 	return c
70 | }
71 | 


--------------------------------------------------------------------------------
/dsp/util_386.s:
--------------------------------------------------------------------------------
1 | TEXT ·rotate90FilterAsm(SB), 7, $0
2 | 	JMP ·rotate90Filter(SB)
3 | 
4 | TEXT ·i32Rotate90FilterAsm(SB), 7, $0
5 | 	JMP ·i32Rotate90Filter(SB)
6 | 


--------------------------------------------------------------------------------
/dsp/util_amd64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0
4 | 	JMP ·rotate90Filter(SB)
5 | 
6 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0
7 | 	JMP ·i32Rotate90Filter(SB)
8 | 


--------------------------------------------------------------------------------
/dsp/util_arm.s:
--------------------------------------------------------------------------------
 1 | #include "textflag.h"
 2 | 
 3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0
 4 | 	MOVW samples_len+8(FP), R7
 5 | 	MOVW samples_ptr+4(FP), R8
 6 | 	AND  $(~3), R7             // round down to nearest multiple of 4
 7 | 
 8 | 	TEQ $0, R7
 9 | 	BEQ r90_end
10 | 
11 | 	ADD R7<<3, R8, R7
12 | 
13 | r90_loop:
14 | 	// First sample of the group of 4 doesn't change
15 | 	ADD $8, R8
16 | 
17 | 	MOVM.IA (R8), [R0-R5]
18 | 
19 | 	// samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1]))
20 | 	MOVW R0, R6
21 | 	EOR  $(1<<31), R1, R0
22 | 	MOVW R6, R1
23 | 
24 | 	// samples[i+2] = -samples[i+2]
25 | 	EOR $(1<<31), R2
26 | 	EOR $(1<<31), R3
27 | 
28 | 	// samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3]))
29 | 	EOR       $(1<<31), R4, R6
30 | 	MOVW      R5, R4
31 | 	MOVW      R6, R5
32 | 	MOVM.IA.W [R0-R5], (R8)
33 | 
34 | 	CMP R8, R7
35 | 	BGT r90_loop
36 | 
37 | r90_end:
38 | 	MOVW samples_ptr+4(FP), R0
39 | 	MOVW R0, ret_ptr+16(FP)
40 | 	MOVW samples_len+8(FP), R0
41 | 	MOVW R0, ret_len+20(FP)
42 | 	MOVW samples_cap+12(FP), R0
43 | 	MOVW R0, ret_cap+24(FP)
44 | 	RET
45 | 
46 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0
47 | 	MOVW samples_len+8(FP), R7
48 | 	MOVW samples_ptr+4(FP), R8
49 | 	AND  $(~3), R7             // round down to nearest multiple of 4
50 | 
51 | 	TEQ $0, R7
52 | 	BEQ i32r90_end
53 | 
54 | 	ADD R7<<2, R8, R7
55 | 
56 | i32r90_loop:
57 | 	// First sample of the group of 4 doesn't change
58 | 	ADD $8, R8
59 | 
60 | 	MOVM.IA (R8), [R0-R5]
61 | 
62 | 	// samples[i+1] = complex(-imag(samples[i+1]), real(samples[i+1]))
63 | 	MOVW R0, R6
64 | 	MVN  R1, R0
65 | 	MOVW R6, R1
66 | 
67 | 	// samples[i+2] = -samples[i+2]
68 | 	MVN R2, R2
69 | 	MVN R3, R3
70 | 
71 | 	// samples[i+3] = complex(imag(samples[i+3]), -real(samples[i+3]))
72 | 	MVN       R4, R6
73 | 	MOVW      R5, R4
74 | 	MOVW      R6, R5
75 | 	MOVM.IA.W [R0-R5], (R8)
76 | 
77 | 	CMP R8, R7
78 | 	BGT i32r90_loop
79 | 
80 | i32r90_end:
81 | 	MOVW samples_ptr+4(FP), R0
82 | 	MOVW R0, ret_ptr+16(FP)
83 | 	MOVW samples_len+8(FP), R0
84 | 	MOVW R0, ret_len+20(FP)
85 | 	MOVW samples_cap+12(FP), R0
86 | 	MOVW R0, ret_cap+24(FP)
87 | 	RET
88 | 


--------------------------------------------------------------------------------
/dsp/util_arm64.s:
--------------------------------------------------------------------------------
1 | #include "textflag.h"
2 | 
3 | TEXT ·rotate90FilterAsm(SB), NOSPLIT, $0
4 |     B ·rotate90Filter(SB)
5 | 
6 | TEXT ·i32Rotate90FilterAsm(SB), NOSPLIT, $0
7 |     B ·i32Rotate90Filter(SB)
8 | 


--------------------------------------------------------------------------------
/dsp/util_test.go:
--------------------------------------------------------------------------------
 1 | package dsp
 2 | 
 3 | import (
 4 | 	"math"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func approxEqual(a, b, e float64) bool {
 9 | 	return math.Abs(a-b) <= e
10 | }
11 | 
12 | func approxEqual32(a, b, e float32) bool {
13 | 	return math.Abs(float64(a)-float64(b)) <= float64(e)
14 | }
15 | 
16 | func TestRotate90Filter(t *testing.T) {
17 | 	filter := &Rotate90Filter{}
18 | 	input := make([]complex64, 256)
19 | 	for i := 0; i < 256; i++ {
20 | 		input[i] = complex(float32(i)-128.0, -(float32(i) - 128.0))
21 | 	}
22 | 	output := make([]complex64, 256)
23 | 	copy(output, input)
24 | 	output = rotate90FilterAsm(filter, output)
25 | 	expected := make([]complex64, 256)
26 | 	copy(expected, input)
27 | 	expected = rotate90Filter(filter, expected)
28 | 	if len(output) != len(expected) {
29 | 		t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected)
30 | 	}
31 | 	for i := 0; i < len(output); i++ {
32 | 		if output[i] != expected[i] {
33 | 			t.Fatalf("Output doesn't match expected: %+v != %+v", output, expected)
34 | 		}
35 | 	}
36 | }
37 | 
38 | func BenchmarkRotate90Filter(b *testing.B) {
39 | 	filter := &Rotate90Filter{}
40 | 	input := make([]complex64, benchSize)
41 | 	b.SetBytes(benchSize)
42 | 	b.ResetTimer()
43 | 	for i := 0; i < b.N; i++ {
44 | 		_ = rotate90FilterAsm(filter, input)
45 | 	}
46 | }
47 | 
48 | func BenchmarkRotate90Filter_Go(b *testing.B) {
49 | 	filter := &Rotate90Filter{}
50 | 	input := make([]complex64, benchSize)
51 | 	b.SetBytes(benchSize)
52 | 	b.ResetTimer()
53 | 	for i := 0; i < b.N; i++ {
54 | 		_ = rotate90Filter(filter, input)
55 | 	}
56 | }
57 | 
58 | func BenchmarkI32Rotate90Filter(b *testing.B) {
59 | 	filter := &I32Rotate90Filter{}
60 | 	input := make([]int32, 2*benchSize)
61 | 	b.SetBytes(benchSize)
62 | 	b.ResetTimer()
63 | 	for i := 0; i < b.N; i++ {
64 | 		_ = i32Rotate90FilterAsm(filter, input)
65 | 	}
66 | }
67 | 
68 | func BenchmarkI32Rotate90Filter_Go(b *testing.B) {
69 | 	filter := &I32Rotate90Filter{}
70 | 	input := make([]int32, 2*benchSize)
71 | 	b.SetBytes(benchSize)
72 | 	b.ResetTimer()
73 | 	for i := 0; i < b.N; i++ {
74 | 		_ = i32Rotate90Filter(filter, input)
75 | 	}
76 | }
77 | 


--------------------------------------------------------------------------------
/dsp/window.go:
--------------------------------------------------------------------------------
  1 | package dsp
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"strconv"
  6 | )
  7 | 
  8 | var (
  9 | 	BlackmanFreqCoeff = []float64{0.16 / 4, -1.0 / 4, (1 - 0.16) / 2, -1.0 / 4, 0.16 / 4}
 10 | 	HammingFreqCoeff  = []float64{(0.53836 - 1) / 2, 0.53836, (0.53836 - 1) / 2}
 11 | 	HanningFreqCoeff  = []float64{-0.25, 0.5, -0.25}
 12 | 
 13 | 	BlackmanFreqCoeff32 = []float32{0.16 / 4, -1.0 / 4, (1 - 0.16) / 2, -1.0 / 4, 0.16 / 4}
 14 | 	HammingFreqCoeff32  = []float32{(0.53836 - 1) / 2, 0.53836, (0.53836 - 1) / 2}
 15 | 	HanningFreqCoeff32  = []float32{-0.25, 0.5, -0.25}
 16 | )
 17 | 
 18 | func TriangleWindow(output []float64) {
 19 | 	for n := range output {
 20 | 		output[n] = 1 - math.Abs((float64(n)-float64(len(output)-1)/2.0)/(float64(len(output)+1)/2.0))
 21 | 	}
 22 | }
 23 | 
 24 | func TriangleWindowF32(output []float32) {
 25 | 	for n := range output {
 26 | 		output[n] = float32(1 - math.Abs((float64(n)-float64(len(output)-1)/2.0)/(float64(len(output)+1)/2.0)))
 27 | 	}
 28 | }
 29 | 
 30 | func HammingWindow(output []float64) {
 31 | 	window(output, []float64{0.53836, 1 - 0.53836})
 32 | }
 33 | 
 34 | func HammingWindowF32(output []float32) {
 35 | 	windowF32(output, []float64{0.53836, 1 - 0.53836})
 36 | }
 37 | 
 38 | func HanningWindow(output []float64) {
 39 | 	for n := range output {
 40 | 		output[n] = 0.5 * (1 - math.Cos(2*math.Pi*float64(n)/float64(len(output)-1)))
 41 | 	}
 42 | }
 43 | 
 44 | func HanningWindowF32(output []float32) {
 45 | 	for n := range output {
 46 | 		output[n] = float32(0.5 * (1 - math.Cos(2*math.Pi*float64(n)/float64(len(output)-1))))
 47 | 	}
 48 | }
 49 | 
 50 | func BlackmanWindow(output []float64) {
 51 | 	a := 0.16
 52 | 	window(output, []float64{(1.0 - a) / 2.0, 1.0 / 2.0, a / 2.0})
 53 | }
 54 | 
 55 | func BlackmanWindowF32(output []float32) {
 56 | 	a := 0.16
 57 | 	windowF32(output, []float64{(1.0 - a) / 2.0, 1.0 / 2.0, a / 2.0})
 58 | }
 59 | 
 60 | func NuttallWindow(output []float64) {
 61 | 	window(output, []float64{0.355768, 0.487396, 0.144232, 0.012604})
 62 | }
 63 | 
 64 | func NuttallWindowF32(output []float32) {
 65 | 	windowF32(output, []float64{0.355768, 0.487396, 0.144232, 0.012604})
 66 | }
 67 | 
 68 | func window(output []float64, a []float64) {
 69 | 	if len(a) < 1 || len(a) > 4 {
 70 | 		panic("invalid window length " + strconv.Itoa(len(a)))
 71 | 	}
 72 | 	nn := float64(len(output) - 1)
 73 | 	for n := range output {
 74 | 		fn := float64(n)
 75 | 		v := a[0]
 76 | 		if len(a) > 1 {
 77 | 			v -= a[1] * math.Cos(2*math.Pi*fn/nn)
 78 | 		}
 79 | 		if len(a) > 2 {
 80 | 			v += a[2] * math.Cos(4*math.Pi*fn/nn)
 81 | 		}
 82 | 		if len(a) > 3 {
 83 | 			v -= a[3] * math.Cos(6*math.Pi*fn/nn)
 84 | 		}
 85 | 		output[n] = v
 86 | 	}
 87 | }
 88 | 
 89 | func windowF32(output []float32, a []float64) {
 90 | 	if len(a) < 1 || len(a) > 4 {
 91 | 		panic("invalid window length " + strconv.Itoa(len(a)))
 92 | 	}
 93 | 	nn := float64(len(output) - 1)
 94 | 	for n := range output {
 95 | 		fn := float64(n)
 96 | 		v := a[0]
 97 | 		if len(a) > 1 {
 98 | 			v -= a[1] * math.Cos(2*math.Pi*fn/nn)
 99 | 		}
100 | 		if len(a) > 2 {
101 | 			v += a[2] * math.Cos(4*math.Pi*fn/nn)
102 | 		}
103 | 		if len(a) > 3 {
104 | 			v -= a[3] * math.Cos(6*math.Pi*fn/nn)
105 | 		}
106 | 		output[n] = float32(v)
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/examples/ax25.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/hex"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"math"
 10 | 	"os"
 11 | 
 12 | 	"github.com/samuel/go-dsp/dsp"
 13 | 	"github.com/samuel/go-dsp/dsp/ax25"
 14 | )
 15 | 
 16 | var flagVerbose = flag.Bool("v", false, "Verbose output")
 17 | 
 18 | func main() {
 19 | 	flag.Parse()
 20 | 
 21 | 	rd := os.Stdin
 22 | 	if len(flag.Args()) > 0 && flag.Arg(0) != "-" {
 23 | 		fi, err := os.Open(flag.Arg(0))
 24 | 		if err != nil {
 25 | 			log.Fatal(err)
 26 | 		}
 27 | 		defer fi.Close()
 28 | 		rd = fi
 29 | 	}
 30 | 
 31 | 	sampleRate := 44100
 32 | 	baud := 1200
 33 | 	window := 4
 34 | 	interp := 1
 35 | 	blockSize := sampleRate / baud
 36 | 
 37 | 	goer := dsp.NewGoertzel([]int{1200, 2200}, sampleRate*interp, blockSize*interp)
 38 | 
 39 | 	threshold := 50.0
 40 | 
 41 | 	buf := make([]byte, window*2)
 42 | 	samples := make([]float32, blockSize*interp)
 43 | 	lastSample := float32(0.0)
 44 | 
 45 | 	currentTime := float64(0.0)
 46 | 	bitClock := 1.0 / float64(baud)
 47 | 	windowTime := float64(window) / float64(sampleRate)
 48 | 	timeDelta := 0.0
 49 | 	prevBit := 0
 50 | 	transition := false
 51 | 
 52 | 	ax := ax25.NewDecoder()
 53 | 	for {
 54 | 		_, err := rd.Read(buf)
 55 | 		if err == io.EOF {
 56 | 			break
 57 | 		} else if err != nil {
 58 | 			log.Fatal(err)
 59 | 		}
 60 | 
 61 | 		copy(samples, samples[window*interp:])
 62 | 
 63 | 		si := len(samples) - window*interp
 64 | 		for i := 0; i < len(buf); i += 2 {
 65 | 			s := float32(int16(buf[i])|(int16(buf[i+1])<<8)) / 32768.0
 66 | 			if interp > 1 {
 67 | 				d := (s - lastSample) / float32(interp)
 68 | 				for j := 1; j < interp; j++ {
 69 | 					lastSample += d
 70 | 					samples[si] = lastSample
 71 | 					si++
 72 | 				}
 73 | 				lastSample = s
 74 | 			}
 75 | 			samples[si] = s
 76 | 			si++
 77 | 		}
 78 | 
 79 | 		goer.Reset()
 80 | 		goer.Feed(samples)
 81 | 		mags := goer.Magnitude()
 82 | 		diff := mags[0] - mags[1]
 83 | 
 84 | 		if math.Abs(float64(diff)) > threshold {
 85 | 			b := 1
 86 | 			if diff < 0 {
 87 | 				b = 0
 88 | 			}
 89 | 			if prevBit != b {
 90 | 				transition = true
 91 | 				prevBit = b
 92 | 				// Align transitions to middle of clock tick
 93 | 				timeDelta = bitClock/2.0 - currentTime
 94 | 			}
 95 | 		}
 96 | 
 97 | 		currentTime += windowTime
 98 | 		for currentTime >= bitClock {
 99 | 			currentTime -= bitClock
100 | 			b := 1
101 | 			if transition {
102 | 				b = 0
103 | 				currentTime += timeDelta
104 | 				timeDelta = 0.0
105 | 			}
106 | 			frame := ax.Feed(b)
107 | 			if frame != nil {
108 | 				if *flagVerbose {
109 | 					fmt.Printf("%+v\n", frame)
110 | 				} else {
111 | 					fmt.Printf("%s to %s", frame.Source, frame.Destination)
112 | 					if len(frame.Repeaters) != 0 {
113 | 						fmt.Print(" via ")
114 | 						for i, r := range frame.Repeaters {
115 | 							if i != 0 {
116 | 								fmt.Print(",")
117 | 							}
118 | 							fmt.Print(r.String())
119 | 						}
120 | 					}
121 | 					fmt.Println()
122 | 				}
123 | 				fmt.Print(hex.Dump(frame.Info))
124 | 			}
125 | 			transition = false
126 | 		}
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/examples/dtmf_file.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"log"
 7 | 	"os"
 8 | 
 9 | 	"github.com/samuel/go-dsp/dsp/dtmf"
10 | )
11 | 
12 | func main() {
13 | 	sampleRate := 8000
14 | 	blockSize := 205 * sampleRate / 8000
15 | 	window := blockSize / 4
16 | 	dt := dtmf.NewStandard(sampleRate, blockSize)
17 | 	lastKey := -1
18 | 	keyCount := 0
19 | 	samples := make([]float32, blockSize)
20 | 
21 | 	rd := os.Stdin
22 | 	if len(os.Args) > 1 && os.Args[1] != "-" {
23 | 		fi, err := os.Open(os.Args[1])
24 | 		if err != nil {
25 | 			log.Fatal(err)
26 | 		}
27 | 		defer fi.Close()
28 | 		rd = fi
29 | 	}
30 | 
31 | 	buf := make([]byte, window*2)
32 | 
33 | 	for {
34 | 		_, err := rd.Read(buf)
35 | 		if err == io.EOF {
36 | 			break
37 | 		} else if err != nil {
38 | 			log.Fatal(err)
39 | 		}
40 | 
41 | 		copy(samples, samples[window:])
42 | 
43 | 		si := len(samples) - window
44 | 		for i := 0; i < len(buf); i += 2 {
45 | 			s := float32(int16(buf[i])|(int16(buf[i+1])<<8)) / 32768.0
46 | 			samples[si] = s
47 | 			si++
48 | 		}
49 | 
50 | 		if k, t := dt.Feed(samples); k == lastKey && t > 0.0 {
51 | 			keyCount++
52 | 			if keyCount == 9 {
53 | 				fmt.Printf("%c", dtmf.Keypad[k])
54 | 			}
55 | 		} else {
56 | 			lastKey = k
57 | 			keyCount = 0
58 | 		}
59 | 	}
60 | 	fmt.Println()
61 | }
62 | 


--------------------------------------------------------------------------------
/examples/dtmf_live.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 	"os"
 7 | 	"os/signal"
 8 | 
 9 | 	"code.google.com/p/portaudio-go/portaudio"
10 | 	"github.com/samuel/go-dsp/dsp/dtmf"
11 | )
12 | 
13 | func main() {
14 | 	sampleRate := 44100
15 | 	blockSize := 205 * sampleRate / 8000
16 | 	window := blockSize / 4
17 | 	dt := dtmf.NewStandard(sampleRate, blockSize)
18 | 	lastKey := -1
19 | 	keyCount := 0
20 | 	samples := make([]float32, blockSize)
21 | 
22 | 	if err := portaudio.Initialize(); err != nil {
23 | 		log.Fatalf("Initialize: %+v", err)
24 | 	}
25 | 	defer func() {
26 | 		if err := portaudio.Terminate(); err != nil {
27 | 			log.Fatalf("Terminate: %+v", err)
28 | 		}
29 | 	}()
30 | 	inputBuf := make([]float32, window)
31 | 	stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), len(inputBuf), inputBuf)
32 | 	if err != nil {
33 | 		log.Fatalf("OpenDefaultStream: %+v", err)
34 | 	}
35 | 	defer stream.Close()
36 | 	if err := stream.Start(); err != nil {
37 | 		log.Fatalf("Start: %+v", err)
38 | 	}
39 | 	defer stream.Stop()
40 | 
41 | 	fmt.Printf("%+v\n", stream.Info())
42 | 
43 | 	sig := make(chan os.Signal, 1)
44 | 	signal.Notify(sig, os.Interrupt, os.Kill)
45 | 	for {
46 | 		if err := stream.Read(); err != nil {
47 | 			log.Fatalf("Read: %+v", err)
48 | 		}
49 | 
50 | 		copy(samples, samples[window:])
51 | 		copy(samples[len(samples)-len(inputBuf):], inputBuf)
52 | 
53 | 		if k, t := dt.Feed(samples); k == lastKey && t > 0.0 {
54 | 			keyCount++
55 | 			if keyCount == 10 {
56 | 				fmt.Printf("%c", dtmf.Keypad[k])
57 | 			}
58 | 		} else {
59 | 			lastKey = k
60 | 			keyCount = 0
61 | 		}
62 | 
63 | 		select {
64 | 		case <-sig:
65 | 			fmt.Println()
66 | 			return
67 | 		default:
68 | 		}
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------