├── .gitignore ├── .editorconfig ├── go.mod ├── operate ├── proxy_test.go ├── proxy_ctl.go ├── fwd_test.go ├── proxy.go └── fwd.go ├── go.sum ├── option ├── option.go ├── parsecli_test.go └── parsecli.go ├── crypto ├── chacha20 │ ├── README.md │ ├── internal │ │ ├── hardware │ │ │ ├── impl.go │ │ │ ├── impl_amd64.go │ │ │ └── impl_amd64.s │ │ ├── api │ │ │ └── api.go │ │ └── ref │ │ │ └── impl.go │ ├── chacha20.go │ └── LICENSE ├── xchacha20.go └── xchacha20_test.go ├── logger └── logger.go ├── .goreleaser.yml ├── LICENSE ├── main.go ├── netio ├── handler_test.go ├── handler.go ├── forward_test.go └── forward.go ├── docs └── README_CN.md ├── CHANGELOG ├── README.md └── socks5 └── socks5.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | *.exe 4 | dist 5 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | indent_size = 4 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module iox 2 | 3 | go 1.12 4 | 5 | require golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f 6 | -------------------------------------------------------------------------------- /operate/proxy_test.go: -------------------------------------------------------------------------------- 1 | package operate 2 | 3 | import "testing" 4 | 5 | func TestProxyLocal(t *testing.T) { 6 | ProxyLocal(":9999", false) 7 | } 8 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8= 2 | golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 3 | -------------------------------------------------------------------------------- /option/option.go: -------------------------------------------------------------------------------- 1 | package option 2 | 3 | const ( 4 | TCP_BUFFER_SIZE = 0x8000 5 | 6 | CONN_BUFFER_SIZE = 0x20 7 | 8 | // UDP protocol's max capacity 9 | UDP_PACKET_MAX_SIZE = 0xFFFF - 28 10 | 11 | UDP_PACKET_CHANNEL_SIZE = 0x400 12 | 13 | MAX_UDP_FWD_WORKER = 0x10 14 | 15 | HEARTBEAT_FREQUENCY = 30 16 | ) 17 | 18 | var ( 19 | TIMEOUT = 5000 20 | 21 | PROTOCOL = "TCP" 22 | 23 | // enable log output 24 | VERBOSE = false 25 | 26 | // logic optimization, changed in v0.1.1 27 | FORWARD_WITHOUT_DEC = false 28 | ) 29 | -------------------------------------------------------------------------------- /crypto/chacha20/README.md: -------------------------------------------------------------------------------- 1 | ### chacha20 - ChaCha20 2 | #### Yawning Angel (yawning at schwanenlied dot me) 3 | 4 | Yet another Go ChaCha20 implementation. Everything else I found was slow, 5 | didn't support all the variants I need to use, or relied on cgo to go fast. 6 | 7 | Features: 8 | 9 | * 20 round, 256 bit key only. Everything else is pointless and stupid. 10 | * IETF 96 bit nonce variant. 11 | * XChaCha 24 byte nonce variant. 12 | * SSSE3 and AVX2 support on amd64 targets. 13 | * Incremental encrypt/decrypt support, unlike golang.org/x/crypto/salsa20. 14 | -------------------------------------------------------------------------------- /logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "fmt" 5 | "iox/option" 6 | "os" 7 | ) 8 | 9 | const ( 10 | WARN = "[!]" 11 | INFO = "[+]" 12 | SUCCESS = "[*]" 13 | ) 14 | 15 | func Info(format string, args ...interface{}) { 16 | if option.VERBOSE { 17 | fmt.Fprintf(os.Stdout, INFO+" "+format+"\n", args...) 18 | } 19 | } 20 | 21 | func Warn(format string, args ...interface{}) { 22 | fmt.Fprintf(os.Stderr, WARN+" "+format+"\n", args...) 23 | } 24 | 25 | func Success(format string, args ...interface{}) { 26 | fmt.Fprintf(os.Stdout, SUCCESS+" "+format+"\n", args...) 27 | } 28 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # This is an example goreleaser.yaml file with some sane defaults. 2 | # Make sure to check the documentation at http://goreleaser.com 3 | before: 4 | hooks: 5 | # you may remove this if you don't use vgo 6 | - go mod download 7 | # you may remove this if you don't need go generate 8 | - go generate ./... 9 | builds: 10 | - env: 11 | - CGO_ENABLED=0 12 | goos: 13 | - windows 14 | - linux 15 | - darwin 16 | goarch: 17 | - amd64 18 | - 386 19 | archives: 20 | - replacements: 21 | darwin: Darwin 22 | linux: Linux 23 | windows: Windows 24 | 386: i386 25 | amd64: x86_64 26 | checksum: 27 | name_template: 'checksums.txt' 28 | snapshot: 29 | name_template: "{{ .Tag }}-next" 30 | changelog: 31 | sort: asc 32 | filters: 33 | exclude: 34 | - '^docs:' 35 | - '^test:' 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 iv4n 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /crypto/chacha20/internal/hardware/impl.go: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // Package hardware provides the hardware accelerated ChaCha20 implementations. 17 | package hardware 18 | 19 | import "iox/crypto/chacha20/internal/api" 20 | 21 | var hardwareImpls []api.Implementation 22 | 23 | // Register appends the implementation(s) to the provided slice, and returns the 24 | // new slice. 25 | func Register(impls []api.Implementation) []api.Implementation { 26 | return append(impls, hardwareImpls...) 27 | } 28 | -------------------------------------------------------------------------------- /crypto/xchacha20.go: -------------------------------------------------------------------------------- 1 | /* 2 | Third-party chacha20 lib from https://github.com/Yawning/chacha20 3 | */ 4 | package crypto 5 | 6 | import ( 7 | "crypto/rand" 8 | "iox/crypto/chacha20" 9 | ) 10 | 11 | var ( 12 | SECRET_KEY []byte 13 | NONCE []byte 14 | ) 15 | 16 | func expand32(key []byte) ([]byte, []byte) { 17 | if len(key) >= 0x20 { 18 | return key[:0x20], append(key[:0xC], key[len(key)-0xC:]...) 19 | } 20 | 21 | var c byte = 0x20 - byte(len(key)&0x1F) 22 | 23 | for i := 0; i < int(c); i++ { 24 | key = append(key, c) 25 | } 26 | return key[:0x20], append(key[:0xC], key[len(key)-0xC:]...) 27 | } 28 | 29 | func ExpandKey(key []byte) { 30 | SECRET_KEY, NONCE = expand32(key) 31 | } 32 | 33 | type Cipher struct { 34 | c *chacha20.Cipher 35 | } 36 | 37 | func NewCipherPair() (*Cipher, *Cipher, error) { 38 | ccA, err := chacha20.New(SECRET_KEY, NONCE) 39 | if err != nil { 40 | return nil, nil, err 41 | } 42 | ccB, err := chacha20.New(SECRET_KEY, NONCE) 43 | if err != nil { 44 | return nil, nil, err 45 | } 46 | 47 | return &Cipher{c: ccA}, &Cipher{c: ccB}, nil 48 | } 49 | 50 | func RandomNonce() ([]byte, error) { 51 | iv := make([]byte, 0x18) 52 | _, err := rand.Read(iv) 53 | if err != nil { 54 | return nil, err 55 | } 56 | return iv, nil 57 | } 58 | 59 | func NewCipher(nonce []byte) (*Cipher, error) { 60 | cc, err := chacha20.New(SECRET_KEY, nonce) 61 | if err != nil { 62 | return nil, err 63 | } 64 | 65 | return &Cipher{ 66 | c: cc, 67 | }, nil 68 | } 69 | 70 | func (c Cipher) StreamXOR(dst []byte, src []byte) { 71 | c.c.XORKeyStream(dst, src) 72 | } 73 | -------------------------------------------------------------------------------- /option/parsecli_test.go: -------------------------------------------------------------------------------- 1 | package option 2 | 3 | import "testing" 4 | 5 | func TestParseCli(t *testing.T) { 6 | var mode string 7 | var submode int 8 | var local, remote []string 9 | var lenc, renc []bool 10 | var err error 11 | 12 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", "9999", "-r", "1.1.1.1:8888", "-k", "0001", "-v"}) 13 | if mode != "fwd" || submode != SUBMODE_L2R || lenc[0] || renc[0] || local[0] != ":9999" || remote[0] != "1.1.1.1:8888" || err != nil { 14 | t.Error("Error case 1") 15 | } 16 | 17 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", "9999", "-l", "*8888", "-k", "0001", "-v"}) 18 | if mode != "fwd" || submode != SUBMODE_L2L || lenc[0] || !lenc[1] || local[0] != ":9999" || local[1] != ":8888" || err != nil { 19 | t.Error("Error case 2") 20 | } 21 | 22 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-r", "*1.1.1.1:9999", "-r", "*1.1.1.1:8888", "-k", "0001", "-v"}) 23 | if mode != "fwd" || submode != SUBMODE_R2R || !renc[0] || !renc[1] || remote[0] != "1.1.1.1:9999" || remote[1] != "1.1.1.1:8888" || err != nil { 24 | t.Error(mode, submode, local, remote, lenc, renc, err, "Error case 3") 25 | } 26 | 27 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"proxy", "-r", "*1.1.1.1:9999", "-r", "*1.1.1.1:8888", "-k", "0001", "-v"}) 28 | if mode != "proxy" || err != errUnrecognizedSubMode { 29 | t.Error("Error case 4") 30 | } 31 | 32 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", ":9999", "-r", "1.1.1.1:8888", "-k", "0001", "-h"}) 33 | if mode != "fwd" || err != PrintUsage { 34 | t.Error("Error case 5") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "iox/operate" 6 | "iox/option" 7 | "os" 8 | ) 9 | 10 | const VERSION = "0.3" 11 | 12 | func Usage() { 13 | fmt.Printf( 14 | "iox v%v\n"+ 15 | " Roaming intranet easier (https://github.com/eddieivan01/iox)\n\n"+ 16 | "Usage: iox fwd/proxy [-l [*]PORT] [-r [*]HOST:PORT] [-k HEX] [-t TIMEOUT] [-u] [-h] [-v]\n\n"+ 17 | "Options:\n"+ 18 | " -l [*]PORT\n"+ 19 | " port to listen on. `*` means encrypted socket\n"+ 20 | " -r [*]HOST:PORT\n"+ 21 | " remote host to connect, HOST can be IP or Domain. `*` means encrypted socket\n"+ 22 | " -k HEX\n"+ 23 | " hexadecimal format key, be used to generate AES Key and IV\n"+ 24 | " -u\n"+ 25 | " udp forward mode\n"+ 26 | " -t TIMEOUT\n"+ 27 | " set connection timeout(millisecond), default is 5000\n"+ 28 | " -v\n"+ 29 | " enable log output\n"+ 30 | " -h\n"+ 31 | " print usage then exit\n", VERSION, 32 | ) 33 | } 34 | 35 | func main() { 36 | mode, submode, local, remote, lenc, renc, err := option.ParseCli(os.Args[1:]) 37 | if err != nil { 38 | if err == option.PrintUsage { 39 | Usage() 40 | } else { 41 | fmt.Println(err.Error()) 42 | } 43 | return 44 | } 45 | 46 | switch mode { 47 | case "fwd": 48 | switch submode { 49 | case option.SUBMODE_L2R: 50 | operate.Local2Remote(local[0], remote[0], lenc[0], renc[0]) 51 | case option.SUBMODE_L2L: 52 | operate.Local2Local(local[0], local[1], lenc[0], lenc[1]) 53 | case option.SUBMODE_R2R: 54 | operate.Remote2Remote(remote[0], remote[1], renc[0], renc[1]) 55 | } 56 | case "proxy": 57 | switch submode { 58 | case option.SUBMODE_LP: 59 | operate.ProxyLocal(local[0], lenc[0]) 60 | case option.SUBMODE_RP: 61 | operate.ProxyRemote(remote[0], renc[0]) 62 | case option.SUBMODE_RPL2L: 63 | operate.ProxyRemoteL2L(local[0], local[1], lenc[0], lenc[1]) 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /netio/handler_test.go: -------------------------------------------------------------------------------- 1 | package netio 2 | 3 | import ( 4 | "net" 5 | "testing" 6 | ) 7 | 8 | func bytesEq(a, b []byte) bool { 9 | if len(a) != len(b) { 10 | return false 11 | } 12 | 13 | for i := 0; i < len(a); i++ { 14 | if a[i] != b[i] { 15 | return false 16 | } 17 | } 18 | 19 | return true 20 | } 21 | 22 | func TestTCPCtx(t *testing.T) { 23 | listener, err := net.Listen("tcp", "127.0.0.1:9999") 24 | if err != nil { 25 | t.Error(err.Error()) 26 | } 27 | defer listener.Close() 28 | 29 | buf := make([]byte, 1024) 30 | signal := make(chan struct{}, 1) 31 | msg := "testing message." 32 | 33 | go func() { 34 | server, err := listener.Accept() 35 | if err != nil { 36 | t.Error(err.Error()) 37 | } 38 | defer server.Close() 39 | 40 | serverCtx, _ := NewTCPCtx(server, true) 41 | serverCtx.DecryptRead(buf) 42 | signal <- struct{}{} 43 | }() 44 | 45 | client, err := net.Dial("tcp", "127.0.0.1:9999") 46 | if err != nil { 47 | t.Error(err.Error()) 48 | } 49 | defer client.Close() 50 | 51 | clientCtx, err := NewTCPCtx(client, true) 52 | if err != nil { 53 | t.Error(err.Error()) 54 | } 55 | clientCtx.EncryptWrite([]byte(msg)) 56 | 57 | <-signal 58 | if !bytesEq([]byte(msg), buf[:len(msg)]) { 59 | t.Error("TCPCtx error") 60 | } 61 | } 62 | 63 | func TestUDPConn(t *testing.T) { 64 | addr, _ := net.ResolveUDPAddr("udp", ":9999") 65 | l, _ := net.ListenUDP("udp", addr) 66 | lCtx, _ := NewUDPCtx(l, true, false) 67 | 68 | signal := make(chan struct{}, 0) 69 | 70 | go func() { 71 | addr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:9999") 72 | c, _ := net.DialUDP("udp", nil, addr) 73 | cCtx, _ := NewUDPCtx(c, true, true) 74 | 75 | cCtx.EncryptWrite([]byte("testing message.")) 76 | 77 | signal <- struct{}{} 78 | }() 79 | 80 | <-signal 81 | 82 | buf := make([]byte, 32) 83 | n, err := lCtx.DecryptRead(buf) 84 | if err != nil { 85 | t.Error(err.Error()) 86 | } 87 | 88 | if string(buf[:n]) != "testing message." { 89 | t.Log(buf[:n]) 90 | t.Error("UDPCtx Error") 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /crypto/xchacha20_test.go: -------------------------------------------------------------------------------- 1 | package crypto 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func bytesEq(a, b []byte) bool { 8 | if len(a) != len(b) { 9 | return false 10 | } 11 | 12 | for i := 0; i < len(a); i++ { 13 | if a[i] != b[i] { 14 | return false 15 | } 16 | } 17 | 18 | return true 19 | } 20 | 21 | /* 22 | func TestExpand32(t *testing.T) { 23 | src36 := []byte{ 24 | 0, 1, 2, 3, 4, 5, 6, 7, 25 | 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 26 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 27 | 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 28 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 29 | } 30 | 31 | src16 := []byte{ 32 | 0, 1, 2, 3, 4, 5, 6, 7, 33 | 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 34 | } 35 | 36 | src10 := []byte{ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 38 | 8, 9, 39 | } 40 | 41 | var key, iv []byte 42 | key, iv = expand32(src36) 43 | if !bytesEq(key, src16) || !bytesEq(iv, []byte{ 44 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 45 | 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 46 | }) { 47 | t.Error("src36 error") 48 | } 49 | 50 | key, iv = expand32(src16) 51 | if !bytesEq(key, src16) || !bytesEq(iv, []byte{ 52 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 53 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 54 | }) { 55 | t.Error("src16 error") 56 | } 57 | 58 | key, iv = expand32(src10) 59 | if !bytesEq(key, append(src10, []byte{ 60 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 61 | }...)) || !bytesEq(iv, []byte{ 62 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 63 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 64 | }) { 65 | t.Error("src10 error") 66 | } 67 | } 68 | */ 69 | 70 | func TestStreamXOR(t *testing.T) { 71 | ExpandKey([]byte("KEY")) 72 | cipherA, cipherB, _ := NewCipherPair() 73 | plain := []byte("testing plain text...") 74 | output1 := make([]byte, len(plain)) 75 | cipherA.StreamXOR(output1, plain) 76 | 77 | output2 := make([]byte, len(plain)) 78 | cipherB.StreamXOR(output2, output1) 79 | 80 | if !bytesEq(output2, plain) || bytesEq(output1, plain) { 81 | t.Error("AES-CTR error") 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /crypto/chacha20/internal/api/api.go: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // Package api provides the ChaCha20 implementation abstract interface. 17 | package api 18 | 19 | const ( 20 | // BlockSize is the size of a ChaCha20 block in bytes. 21 | BlockSize = 64 22 | 23 | // StateSize is the size of the ChaCha20 state as 32 bit unsigned words. 24 | StateSize = 16 25 | 26 | // HashSize is the size of the HChaCha output in bytes. 27 | HashSize = 32 28 | 29 | // HNonceSize is the HChaCha20 nonce size in bytes. 30 | HNonceSize = 16 31 | 32 | // Sigma0 is the first word of the ChaCha constant. 33 | Sigma0 = uint32(0x61707865) 34 | 35 | // Sigma1 is the second word of the ChaCha constant. 36 | Sigma1 = uint32(0x3320646e) 37 | 38 | // Sigma2 is the third word of the ChaCha constant. 39 | Sigma2 = uint32(0x79622d32) 40 | 41 | // Sigma3 is the fourth word of the ChaCha constant. 42 | Sigma3 = uint32(0x6b206574) 43 | ) 44 | 45 | // Implementation is a ChaCha20 implementation 46 | type Implementation interface { 47 | // Name returns the name of the implementation. 48 | Name() string 49 | 50 | // Blocks calculates the ChaCha20 blocks. If src is not nil, dst will 51 | // be set to the XOR of src with the key stream, otherwise dst will be 52 | // set to the key stream. 53 | Blocks(x *[StateSize]uint32, dst, src []byte, nrBlocks int) 54 | 55 | // HChaCha calculates the HChaCha20 hash. 56 | // 57 | // Note: `dst` is guaranteed to be HashSize bytes. 58 | HChaCha(key, nonce []byte, dst []byte) 59 | } 60 | -------------------------------------------------------------------------------- /crypto/chacha20/internal/hardware/impl_amd64.go: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // +build amd64,!noasm 17 | 18 | package hardware 19 | 20 | import ( 21 | "iox/crypto/chacha20/internal/api" 22 | 23 | "golang.org/x/sys/cpu" 24 | ) 25 | 26 | //go:noescape 27 | func blocksAVX2(s *[api.StateSize]uint32, in, out []byte) 28 | 29 | //go:noescape 30 | func hChaChaAVX2(key, nonce []byte, dst *byte) 31 | 32 | //go:noescape 33 | func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte) 34 | 35 | //go:noescape 36 | func hChaChaSSSE3(key, nonce []byte, dst *byte) 37 | 38 | type implAmd64 struct { 39 | name string 40 | 41 | blocksFn func(*[api.StateSize]uint32, []byte, []byte, int) 42 | hChaChaFn func([]byte, []byte, *byte) 43 | } 44 | 45 | func (impl *implAmd64) Name() string { 46 | return impl.name 47 | } 48 | 49 | func (impl *implAmd64) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) { 50 | impl.blocksFn(x, dst, src, nrBlocks) 51 | } 52 | 53 | func (impl *implAmd64) HChaCha(key, nonce []byte, dst []byte) { 54 | impl.hChaChaFn(key, nonce, &dst[0]) 55 | } 56 | 57 | func blockWrapper(fn func(*[api.StateSize]uint32, []byte, []byte)) func(*[api.StateSize]uint32, []byte, []byte, int) { 58 | return func(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) { 59 | sz := nrBlocks * api.BlockSize 60 | if src != nil { 61 | fn(x, src[:sz], dst[:sz]) 62 | } else { 63 | // Sub-optimal, but the compiler special cases this to an assembly 64 | // optimized runtime.memclrNoHeapPointers, so it's not terrible. 65 | for i := range dst[:sz] { 66 | dst[i] = 0 67 | } 68 | fn(x, dst[:sz], dst[:sz]) 69 | } 70 | } 71 | } 72 | 73 | func init() { 74 | if cpu.X86.HasAVX2 { 75 | hardwareImpls = append(hardwareImpls, &implAmd64{ 76 | name: "amd64_avx2", 77 | blocksFn: blockWrapper(blocksAVX2), 78 | hChaChaFn: hChaChaAVX2, 79 | }) 80 | } 81 | if cpu.X86.HasSSE3 { 82 | hardwareImpls = append(hardwareImpls, &implAmd64{ 83 | name: "amd64_ssse3", 84 | blocksFn: blockWrapper(blocksSSSE3), 85 | hChaChaFn: hChaChaSSSE3, 86 | }) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /netio/handler.go: -------------------------------------------------------------------------------- 1 | package netio 2 | 3 | import ( 4 | "iox/crypto" 5 | "iox/option" 6 | "net" 7 | ) 8 | 9 | type Ctx interface { 10 | DecryptRead(b []byte) (int, error) 11 | EncryptWrite(b []byte) (int, error) 12 | 13 | net.Conn 14 | } 15 | 16 | type TCPCtx struct { 17 | net.Conn 18 | encrypted bool 19 | 20 | // Ensure stream cipher synchronous 21 | encCipher *crypto.Cipher 22 | decCipher *crypto.Cipher 23 | } 24 | 25 | func NewTCPCtx(conn net.Conn, encrypted bool) (*TCPCtx, error) { 26 | // if tc, ok := conn.(*net.TCPConn); ok { 27 | // tc.SetLinger(0) 28 | // } 29 | 30 | encrypted = encrypted && !option.FORWARD_WITHOUT_DEC 31 | 32 | ctx := &TCPCtx{ 33 | Conn: conn, 34 | encrypted: encrypted, 35 | } 36 | 37 | if encrypted { 38 | encCipher, decCipher, err := crypto.NewCipherPair() 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | ctx.encCipher = encCipher 44 | ctx.decCipher = decCipher 45 | } 46 | 47 | return ctx, nil 48 | } 49 | 50 | func (c *TCPCtx) DecryptRead(b []byte) (int, error) { 51 | n, err := c.Read(b) 52 | if err != nil { 53 | return n, err 54 | } 55 | 56 | if c.encrypted { 57 | c.decCipher.StreamXOR(b[:n], b[:n]) 58 | } 59 | 60 | return n, err 61 | } 62 | 63 | func (c *TCPCtx) EncryptWrite(b []byte) (int, error) { 64 | if c.encrypted { 65 | c.encCipher.StreamXOR(b, b) 66 | } 67 | return c.Write(b) 68 | } 69 | 70 | type UDPCtx struct { 71 | *net.UDPConn 72 | encrypted bool 73 | connected bool 74 | remoteAddr *net.UDPAddr 75 | 76 | // sync.Mutex 77 | } 78 | 79 | func NewUDPCtx(conn *net.UDPConn, encrypted bool, connected bool) (*UDPCtx, error) { 80 | encrypted = encrypted && !option.FORWARD_WITHOUT_DEC 81 | 82 | ctx := &UDPCtx{ 83 | UDPConn: conn, 84 | encrypted: encrypted, 85 | connected: connected, 86 | } 87 | 88 | return ctx, nil 89 | } 90 | 91 | // Encryption for packet is different from stream 92 | func (c *UDPCtx) DecryptRead(b []byte) (int, error) { 93 | var n int 94 | var err error 95 | 96 | if !c.connected { 97 | var remoteAddr *net.UDPAddr 98 | n, remoteAddr, err = c.ReadFromUDP(b) 99 | if err != nil { 100 | return n, err 101 | } 102 | c.remoteAddr = remoteAddr 103 | 104 | } else { 105 | n, err = c.Read(b) 106 | if err != nil { 107 | return n, err 108 | } 109 | } 110 | 111 | if c.encrypted { 112 | // no nonce, skip this packet 113 | if len(b) < 0x18 { 114 | return 0, nil 115 | } 116 | nonce := b[n-0x18 : n] 117 | b = b[:n-0x18] 118 | 119 | cipher, err := crypto.NewCipher(nonce) 120 | if err != nil { 121 | return 0, err 122 | } 123 | 124 | n -= 0x18 125 | cipher.StreamXOR(b[:n], b[:n]) 126 | } 127 | 128 | return n, err 129 | } 130 | 131 | func (c *UDPCtx) EncryptWrite(b []byte) (int, error) { 132 | if c.encrypted { 133 | iv, err := crypto.RandomNonce() 134 | cipher, err := crypto.NewCipher(iv) 135 | if err != nil { 136 | return 0, err 137 | } 138 | 139 | cipher.StreamXOR(b, b) 140 | b = append(b, iv...) 141 | } 142 | 143 | if !c.connected { 144 | return c.WriteTo(b, c.remoteAddr) 145 | } 146 | return c.Write(b) 147 | } 148 | 149 | func (c UDPCtx) IsRemoteAddrRegisted() bool { 150 | return c.remoteAddr != nil 151 | } 152 | -------------------------------------------------------------------------------- /operate/proxy_ctl.go: -------------------------------------------------------------------------------- 1 | package operate 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "iox/logger" 7 | "iox/option" 8 | "net" 9 | "time" 10 | ) 11 | 12 | const ( 13 | CTL_HANDSHAKE = iota 14 | CTL_CONNECT_ME 15 | CTL_CLEANUP 16 | CTL_HEARTBEAT 17 | 18 | MAX_CONNECTION = 0x400 19 | CLIENT_HANDSHAKE = 0xC0 20 | SERVER_HANDSHAKE = 0xE0 21 | ) 22 | 23 | type Protocol struct { 24 | CMD byte 25 | N byte 26 | 27 | // ACK uint16 28 | } 29 | 30 | var END = []byte{0xEE, 0xFF} 31 | 32 | func serialize(p Protocol) []byte { 33 | buf := make([]byte, 4) 34 | buf[0] = p.CMD 35 | buf[1] = p.N 36 | 37 | buf[2], buf[3] = END[0], END[1] 38 | return buf 39 | } 40 | 41 | func unserialize(b []byte) (*Protocol, error) { 42 | if len(b) < 2 { 43 | return nil, errors.New("Protocol data too short") 44 | } 45 | 46 | p := &Protocol{ 47 | CMD: b[0], 48 | N: b[1], 49 | } 50 | 51 | return p, nil 52 | } 53 | 54 | func bytesEq(a, b []byte) bool { 55 | for i := 0; i < len(a); i++ { 56 | if a[i] != b[i] { 57 | return false 58 | } 59 | } 60 | 61 | return true 62 | } 63 | 64 | func readUntilEnd(conn net.Conn) ([]byte, error) { 65 | buf := make([]byte, 1) 66 | output := make([]byte, 0, 4) 67 | 68 | for { 69 | n, err := conn.Read(buf) 70 | if err != nil { 71 | if err == io.EOF { 72 | break 73 | } 74 | return nil, err 75 | } 76 | 77 | if n != 1 { 78 | return nil, errors.New("Transmission error") 79 | } 80 | 81 | output = append(output, buf[0]) 82 | 83 | if len(output) >= 2 && bytesEq(END, output[len(output)-2:len(output)]) { 84 | break 85 | } 86 | } 87 | 88 | return output[:2], nil 89 | } 90 | 91 | func serverHandshake(listener net.Listener) net.Conn { 92 | var masterConn net.Conn 93 | var err error 94 | for { 95 | masterConn, err = listener.Accept() 96 | if err != nil { 97 | continue 98 | } 99 | 100 | pb, err := readUntilEnd(masterConn) 101 | if err != nil { 102 | continue 103 | } 104 | 105 | p, err := unserialize(pb) 106 | if err != nil { 107 | continue 108 | } 109 | 110 | if p.CMD == CTL_HANDSHAKE && p.N == CLIENT_HANDSHAKE { 111 | logger.Success("Remote socks5 handshake ok") 112 | masterConn.Write(serialize(Protocol{ 113 | CMD: CTL_HANDSHAKE, 114 | N: SERVER_HANDSHAKE, 115 | })) 116 | break 117 | } 118 | } 119 | 120 | return masterConn 121 | } 122 | 123 | func clientHandshake(remote string) (net.Conn, error) { 124 | masterConn, err := net.DialTimeout( 125 | "tcp", 126 | remote, 127 | time.Millisecond*time.Duration(option.TIMEOUT), 128 | ) 129 | if err != nil { 130 | return nil, err 131 | } 132 | 133 | masterConn.Write(serialize(Protocol{ 134 | CMD: CTL_HANDSHAKE, 135 | N: CLIENT_HANDSHAKE, 136 | })) 137 | 138 | pb, err := readUntilEnd(masterConn) 139 | if err != nil { 140 | return nil, errors.New("Connect to remote forward server error") 141 | } 142 | 143 | p, err := unserialize(pb) 144 | if err != nil { 145 | return nil, errors.New("Connect to remote forward server error") 146 | } 147 | if p.CMD == CTL_HANDSHAKE && p.N == SERVER_HANDSHAKE { 148 | logger.Success("Connect to remote forward server ok") 149 | } else { 150 | return nil, errors.New("Connect to remote forward server error") 151 | } 152 | 153 | return masterConn, nil 154 | } 155 | -------------------------------------------------------------------------------- /docs/README_CN.md: -------------------------------------------------------------------------------- 1 | # iox 2 | 3 | [English](https://github.com/EddieIvan01/iox) | 中文 4 | 5 | 端口转发 & 内网代理工具,功能类似于`lcx`/`ew`,但是比它们更好 6 | 7 | ## 为什么写iox? 8 | 9 | `lcx`和`ew`是很优秀的工具,但还可以提高 10 | 11 | 在最初使用它们的一段时间里,我都记不住那些复杂的命令行参数,诸如`tran, slave, rcsocks, sssocks`。工具的工作模式很清晰,明明可以用简单的参数表示,为什么他们要设计成这样(特别是`ew`的`-l -d -e -f -g -h`) 12 | 13 | 除此之外,我认为网络编程的逻辑可以优化 14 | 15 | 举个栗子,当运行`lcx -listen 8888 9999`命令时,客户端必须先连`:8888`,再连`:9999`,实际上这两个端口是平等的,在`iox`里则没有这个限制。当运行`lcx -slave 1.1.1.1 8888 1.1.1.1 9999`命令时,`lcx`会串行的连接两个主机,但是并发连接两个主机会更高效,毕竟是纯I/O操作,`iox`就是这样做的 16 | 17 | 更进一步,`iox`提供了流量加密功能。实际上,你可以直接将`iox`当做一个简易的ShadowSocks使用 18 | 19 | `iox`还提供了UDP流量转发的功能 20 | 21 | 当然,因为`iox`是用Go写的,所以静态连接的程序有一点大,原程序有2.2MB(UPX压缩后800KB) 22 | 23 | ## 特性 24 | 25 | + 流量加密(可选) 26 | + 友好的命令行参数 27 | + 逻辑优化 28 | + UDP流量转发 29 | 30 | ## 用法 31 | 32 | 所有的参数都是统一的。`-l/--local`意为监听本地端口;`-r/--remote`意为连接远端主机 33 | 34 | ### 两种模式 35 | 36 | **fwd**: 37 | 38 | 监听 `0.0.0.0:8888` 和`0.0.0.0:9999`,将两个连接间的流量转发 39 | 40 | ``` 41 | ./iox fwd -l 8888 -l 9999 42 | 43 | 44 | for lcx: 45 | ./lcx -listen 8888 9999 46 | ``` 47 | 48 | 监听`0.0.0.0:8888`,把流量转发到`1.1.1.1:9999` 49 | 50 | ``` 51 | ./iox fwd -l 8888 -r 1.1.1.1:9999 52 | 53 | 54 | for lcx: 55 | ./lcx -tran 8888 1.1.1.1 9999 56 | ``` 57 | 58 | 连接`1.1.1.1:8888`和`1.1.1.1:9999`, 在两个连接间转发 59 | 60 | ``` 61 | ./iox fwd -r 1.1.1.1:8888 -r 1.1.1.1:9999 62 | 63 | 64 | for lcx: 65 | ./lcx -slave 1.1.1.1 8888 1.1.1.1 9999 66 | ``` 67 | 68 | **proxy** 69 | 70 | 在本地 `0.0.0.0:1080`启动Socks5服务 71 | 72 | ``` 73 | ./iox proxy -l 1080 74 | 75 | 76 | for ew: 77 | ./ew -s ssocksd -l 1080 78 | ``` 79 | 80 | 在被控机开启Socks5服务,将服务转发到公网VPS 81 | 82 | 在VPS上转发`0.0.0.0:9999`到`0.0.0.0:1080` 83 | 84 | 你必须将两条命令成对使用,因为它内部包含了一个简单的协议来控制回连 85 | 86 | ``` 87 | ./iox proxy -r 1.1.1.1:9999 88 | ./iox proxy -l 9999 -l 1080 // 注意,这两个端口是有顺序的 89 | 90 | 91 | for ew: 92 | ./ew -s rcsocks -l 1080 -e 9999 93 | ./ew -s rssocks -d 1.1.1.1 -e 9999 94 | ``` 95 | 96 | 接着连接内网主机 97 | 98 | ``` 99 | # proxychains.conf 100 | # socks5://1.1.1.1:1080 101 | 102 | $ proxychains rdesktop 192.168.0.100:3389 103 | ``` 104 | 105 | *** 106 | 107 | ### 启用加密 108 | 109 | 举个栗子,我们把内网3389端口转发到VPS 110 | 111 | ``` 112 | // 被控主机 113 | ./iox fwd -r 192.168.0.100:3389 -r *1.1.1.1:8888 -k 656565 114 | 115 | 116 | // 我们的VPS 117 | ./iox fwd -l *8888 -l 33890 -k 656565 118 | ``` 119 | 120 | 很好理解:被控主机和VPS:8888之间的流量会被加密,预共享的密钥是'AAA',`iox`会用这个密钥生成种子密钥和nonce(**正常来讲,不应该复用nonce。但是考虑到iox的加密功能仅仅为了绕过IDS等设备,为了不额外分配空间,TCP流加密会复用nonce**),并用Xchacha20流加密 (在v0.3版本中用Xchacha20替换掉了AES-CTR) 121 | 122 | 所以,`*`应该成对使用 123 | 124 | ``` 125 | ./iox fwd -l 1000 -r *127.0.0.1:1001 -k 000102 126 | ./iox fwd -l *1001 -r *127.0.0.1:1002 -k 000102 127 | ./iox fwd -l *1002 -r *127.0.0.1:1003 -k 000102 128 | ./iox proxy -l *1003 -k 000102 129 | 130 | 131 | $ curl google.com -x socks5://127.0.0.1:1000 132 | ``` 133 | 134 | 你也可以把`iox`当做一个简单的ShadowSocks来用: 135 | 136 | ``` 137 | // ssserver 138 | ./iox proxy -l *9999 -k 000102 139 | 140 | 141 | // sslocal 142 | ./iox fwd -l 1080 -r *VPS:9999 -k 000102 143 | ``` 144 | 145 | ### UDP转发 146 | 147 | 只需要添加命令行参数:`-u` 148 | 149 | ``` 150 | ./iox fwd -l 53 -r *127.0.0.1:8888 -k 000102 -u 151 | ./iox fwd -l *8888 -l *9999 -k 000102 -u 152 | ./iox fwd -r *127.0.0.1:9999 -r 8.8.8.8:53 -k 000102 -u 153 | ``` 154 | 155 | **注意:当你做多级连接的转发时,`Remote2Remote-UDP-mode`必须最后一个被启动,也就是上面示例中的第三条** 156 | 157 | UDP转发可能会有一些不合你预期的行为。实际上,目前在GitHub上只有将本地监听的UDP流量转发到远程主机的例子,所以我只能以我的理解来实现 158 | 159 | 你可以在源码里找到答案,如果你有什么想法,欢迎提PR / issue 160 | 161 | ## 许可 162 | 163 | The MIT license 164 | 165 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | v0.3: 2 | 1. Replace AES-CTR with XChaCha20. 3 | Because in Golang, only AES-GCM on AMD64/ARM64 has special assembly-speed-up 4 | optimization. In summary, in Golang, except for the AES-GCM on AMD64/ARM64 architecture, 5 | all AES-XXX else are slower than ChaCha20 (look at the benchmark below). 6 | Also, AES-GCM in Golang's implementation is not a stream operator, and needs 7 | 2X alloc overhead, so it doesn't fit. 8 | Considering all, ChaCha20 is the better choice. 9 | The benchmark: 10 | goos: windows 11 | goarch: amd64 12 | BenchmarkAESGCMSeal1K-4 3880944 308 ns/op 3326.14 MB/s 13 | BenchmarkAESGCMOpen1K-4 4092999 290 ns/op 3525.53 MB/s 14 | BenchmarkAESGCMSeal8K-4 600762 1849 ns/op 4429.61 MB/s 15 | BenchmarkAESGCMOpen8K-4 632307 1828 ns/op 4481.21 MB/s 16 | BenchmarkAESGCMSeal32K-4 164750 7034 ns/op 4658.32 MB/s 17 | BenchmarkAESGCMOpen32K-4 169389 6871 ns/op 4768.88 MB/s 18 | BenchmarkAESCTR1K-4 752043 1408 ns/op 723.51 MB/s 19 | BenchmarkAESCTR8K-4 105513 11078 ns/op 739.05 MB/s 20 | BenchmarkAESCTR32K-4 26914 44505 ns/op 736.17 MB/s 21 | BenchmarkChacha201K-4 1000000 1083 ns/op 940.83 MB/s 22 | BenchmarkChacha208K-4 300565 3942 ns/op 2076.80 MB/s 23 | BenchmarkChacha2032K-4 85921 13720 ns/op 2387.96 MB/s 24 | 25 | goos: windows 26 | goarch: 386 27 | BenchmarkAESGCMSeal1K-4 56220 21181 ns/op 48.35 MB/s 28 | BenchmarkAESGCMOpen1K-4 56224 21535 ns/op 47.55 MB/s 29 | BenchmarkAESGCMSeal8K-4 6332 166170 ns/op 49.30 MB/s 30 | BenchmarkAESGCMOpen8K-4 7063 167895 ns/op 48.79 MB/s 31 | BenchmarkAESGCMSeal32K-4 1693 667421 ns/op 49.10 MB/s 32 | BenchmarkAESGCMOpen32K-4 1718 660650 ns/op 49.60 MB/s 33 | BenchmarkAESCTR1K-4 132240 9035 ns/op 112.78 MB/s 34 | BenchmarkAESCTR8K-4 16527 72232 ns/op 113.34 MB/s 35 | BenchmarkAESCTR32K-4 4009 288576 ns/op 113.53 MB/s 36 | BenchmarkChacha201K-4 343777 3426 ns/op 297.41 MB/s 37 | BenchmarkChacha208K-4 48712 24447 ns/op 334.89 MB/s 38 | BenchmarkChacha2032K-4 12442 95950 ns/op 341.46 MB/s 39 | 40 | 2. Increse the TCP_BUFFER_SIZE to 0x8000 41 | 42 | 3. Fix a bug in UDP forward 43 | 44 | 45 | v0.2.1: 46 | 1. Add heartbeat for remote-proxy's ctl-connection, to prevent 47 | the NAT device drops mapping rules 48 | 49 | 2. Reduce `Remote2Remote` function's retry frequency 50 | 51 | 52 | v0.2: 53 | 1. Add UDP forward mode, CLI option: `-u` 54 | 55 | 56 | v0.1.1: 57 | 1. Logic optimization, while both two connections are encrypted, 58 | traffic will be forwarded without additional encryption and decryption 59 | 60 | 2. Made some little improvements 61 | -------------------------------------------------------------------------------- /operate/fwd_test.go: -------------------------------------------------------------------------------- 1 | package operate 2 | 3 | import ( 4 | "iox/netio" 5 | "net" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | // run forever 11 | func testLocal2Local(t *testing.T) { 12 | msgA := "FROM A" 13 | msgB := "FROM B" 14 | 15 | bufA := make([]byte, 1024) 16 | bufB := make([]byte, 1024) 17 | 18 | go func() { 19 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3) 20 | if err != nil { 21 | t.Error(err.Error()) 22 | } 23 | defer localA.Close() 24 | 25 | localCtxA, err := netio.NewTCPCtx(localA, true) 26 | if err != nil { 27 | t.Error(err.Error()) 28 | } 29 | 30 | localCtxA.EncryptWrite([]byte(msgA)) 31 | localCtxA.DecryptRead(bufA) 32 | }() 33 | 34 | go func() { 35 | localB, err := net.DialTimeout("tcp", "127.0.0.1:8888", time.Second*3) 36 | if err != nil { 37 | t.Error(err.Error()) 38 | } 39 | defer localB.Close() 40 | 41 | localCtxB, err := netio.NewTCPCtx(localB, true) 42 | if err != nil { 43 | t.Error(err.Error()) 44 | } 45 | 46 | localCtxB.EncryptWrite([]byte(msgB)) 47 | localCtxB.DecryptRead(bufB) 48 | }() 49 | 50 | Local2Local(":9999", ":8888", true, true) 51 | 52 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA { 53 | t.Error("Local2Local error") 54 | } 55 | } 56 | 57 | func TestRemote2Remote(t *testing.T) { 58 | msgA := "FROM A" 59 | msgB := "FROM B" 60 | 61 | bufA := make([]byte, 1024) 62 | bufB := make([]byte, 1024) 63 | 64 | go func() { 65 | listenerA, err := net.Listen("tcp", ":9999") 66 | if err != nil { 67 | t.Error(err.Error()) 68 | } 69 | defer listenerA.Close() 70 | 71 | connA, err := listenerA.Accept() 72 | if err != nil { 73 | t.Error(err.Error()) 74 | } 75 | defer connA.Close() 76 | 77 | connCtxA, err := netio.NewTCPCtx(connA, true) 78 | if err != nil { 79 | t.Error(err.Error()) 80 | } 81 | 82 | connCtxA.EncryptWrite([]byte(msgA)) 83 | connCtxA.DecryptRead(bufA) 84 | }() 85 | 86 | go func() { 87 | listenerB, err := net.Listen("tcp", ":8888") 88 | if err != nil { 89 | t.Error(err.Error()) 90 | } 91 | defer listenerB.Close() 92 | 93 | connB, err := listenerB.Accept() 94 | if err != nil { 95 | t.Error(err.Error()) 96 | } 97 | defer connB.Close() 98 | 99 | connCtxB, err := netio.NewTCPCtx(connB, true) 100 | if err != nil { 101 | t.Error(err.Error()) 102 | } 103 | 104 | connCtxB.EncryptWrite([]byte(msgB)) 105 | connCtxB.DecryptRead(bufB) 106 | }() 107 | 108 | Remote2Remote("127.0.0.1:9999", "127.0.0.1:8888", true, true) 109 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA { 110 | t.Error("Remote2Remote error") 111 | } 112 | } 113 | 114 | // run forever 115 | func testLocal2Remote(t *testing.T) { 116 | msgA := "FROM A" 117 | msgB := "FROM B" 118 | 119 | bufA := make([]byte, 1024) 120 | bufB := make([]byte, 1024) 121 | 122 | go func() { 123 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3) 124 | if err != nil { 125 | t.Error(err.Error()) 126 | } 127 | defer localA.Close() 128 | 129 | localCtxA, err := netio.NewTCPCtx(localA, true) 130 | if err != nil { 131 | t.Error(err.Error()) 132 | } 133 | 134 | localCtxA.EncryptWrite([]byte(msgA)) 135 | localCtxA.DecryptRead(bufA) 136 | }() 137 | 138 | go func() { 139 | listenerB, err := net.Listen("tcp", ":8888") 140 | if err != nil { 141 | t.Error(err.Error()) 142 | } 143 | defer listenerB.Close() 144 | 145 | connB, err := listenerB.Accept() 146 | if err != nil { 147 | t.Error(err.Error()) 148 | } 149 | defer connB.Close() 150 | 151 | connCtxB, err := netio.NewTCPCtx(connB, true) 152 | if err != nil { 153 | t.Error(err.Error()) 154 | } 155 | 156 | connCtxB.EncryptWrite([]byte(msgB)) 157 | connCtxB.DecryptRead(bufB) 158 | }() 159 | 160 | Local2Remote(":9999", "127.0.0.1:8888", true, true) 161 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA { 162 | t.Error("Remote2Remote error") 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /option/parsecli.go: -------------------------------------------------------------------------------- 1 | package option 2 | 3 | import ( 4 | "encoding/hex" 5 | "errors" 6 | "iox/crypto" 7 | "strconv" 8 | ) 9 | 10 | var ( 11 | errUnrecognizedMode = errors.New("Unrecognized mode") 12 | errHexDecodeError = errors.New("Not hexadecimal string") 13 | PrintUsage = errors.New("") 14 | errUnrecognizedSubMode = errors.New("Malform args") 15 | errNoSecretKey = errors.New("Must provide secret key") 16 | errNotANumber = errors.New("Timeout must be a number") 17 | errUDPMode = errors.New("UDP mode only support fwd mode") 18 | ) 19 | 20 | const ( 21 | SUBMODE_L2L = iota 22 | SUBMODE_R2R 23 | SUBMODE_L2R 24 | 25 | SUBMODE_LP 26 | SUBMODE_RP 27 | SUBMODE_RPL2L 28 | ) 29 | 30 | // Dont need flag-lib 31 | func ParseCli(args []string) ( 32 | mode string, 33 | submode int, 34 | local []string, 35 | remote []string, 36 | lenc []bool, 37 | renc []bool, 38 | err error) { 39 | 40 | if len(args) == 0 { 41 | err = PrintUsage 42 | return 43 | } 44 | 45 | mode = args[0] 46 | 47 | switch mode { 48 | case "fwd", "proxy": 49 | case "-h", "--help": 50 | err = PrintUsage 51 | return 52 | default: 53 | err = errUnrecognizedMode 54 | return 55 | } 56 | 57 | args = args[1:] 58 | ptr := 0 59 | 60 | for { 61 | if ptr == len(args) { 62 | break 63 | } 64 | 65 | switch args[ptr] { 66 | case "-l", "--local": 67 | l := args[ptr+1] 68 | if l[0] == '*' { 69 | lenc = append(lenc, true) 70 | l = l[1:] 71 | } else { 72 | lenc = append(lenc, false) 73 | } 74 | 75 | local = append(local, ":"+l) 76 | ptr++ 77 | 78 | case "-r", "--remote": 79 | r := args[ptr+1] 80 | if r[0] == '*' { 81 | renc = append(renc, true) 82 | r = r[1:] 83 | } else { 84 | renc = append(renc, false) 85 | } 86 | 87 | remote = append(remote, r) 88 | ptr++ 89 | 90 | case "-u", "--udp": 91 | PROTOCOL = "UDP" 92 | 93 | case "-k", "--key": 94 | var key []byte 95 | key, err = hex.DecodeString(args[ptr+1]) 96 | if err != nil { 97 | err = errHexDecodeError 98 | return 99 | } 100 | crypto.ExpandKey(key) 101 | ptr++ 102 | 103 | case "-t", "--timeout": 104 | TIMEOUT, err = strconv.Atoi(args[ptr+1]) 105 | if err != nil { 106 | err = errNotANumber 107 | return 108 | } 109 | ptr++ 110 | case "-v", "--verbose": 111 | VERBOSE = true 112 | case "-h", "--help": 113 | err = PrintUsage 114 | return 115 | } 116 | 117 | ptr++ 118 | } 119 | 120 | if mode == "fwd" { 121 | switch { 122 | case len(local) == 0 && len(remote) == 2: 123 | submode = SUBMODE_R2R 124 | case len(local) == 1 && len(remote) == 1: 125 | submode = SUBMODE_L2R 126 | case len(local) == 2 && len(remote) == 0: 127 | submode = SUBMODE_L2L 128 | default: 129 | err = errUnrecognizedSubMode 130 | return 131 | } 132 | } else { 133 | switch { 134 | case len(local) == 0 && len(remote) == 1: 135 | submode = SUBMODE_RP 136 | case len(local) == 1 && len(remote) == 0: 137 | submode = SUBMODE_LP 138 | case len(local) == 2 && len(remote) == 0: 139 | submode = SUBMODE_RPL2L 140 | default: 141 | err = errUnrecognizedSubMode 142 | return 143 | } 144 | } 145 | 146 | if len(lenc) != len(local) || len(renc) != len(remote) { 147 | err = errUnrecognizedSubMode 148 | return 149 | } 150 | 151 | if crypto.SECRET_KEY == nil { 152 | for i, _ := range lenc { 153 | if lenc[i] { 154 | err = errNoSecretKey 155 | return 156 | } 157 | } 158 | 159 | for i, _ := range renc { 160 | if renc[i] { 161 | err = errNoSecretKey 162 | return 163 | } 164 | } 165 | } 166 | 167 | if PROTOCOL == "UDP" && mode == "proxy" { 168 | err = errUDPMode 169 | return 170 | } 171 | 172 | shouldFwdWithoutDec(lenc, renc) 173 | 174 | return 175 | } 176 | 177 | func shouldFwdWithoutDec(lenc []bool, renc []bool) { 178 | if len(lenc)+len(renc) != 2 { 179 | return 180 | } 181 | 182 | var result uint8 183 | for i, _ := range lenc { 184 | if lenc[i] { 185 | result++ 186 | } 187 | } 188 | 189 | for i, _ := range renc { 190 | if renc[i] { 191 | result++ 192 | } 193 | } 194 | 195 | if result == 2 { 196 | FORWARD_WITHOUT_DEC = true 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /netio/forward_test.go: -------------------------------------------------------------------------------- 1 | package netio 2 | 3 | import ( 4 | "bytes" 5 | "iox/crypto" 6 | "iox/option" 7 | "net" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | type _buffer struct { 13 | bytes.Buffer 14 | } 15 | 16 | func (b *_buffer) EncryptWrite(bs []byte) (int, error) { return b.Write(bs) } 17 | func (b *_buffer) DecryptRead(bs []byte) (int, error) { return b.Read(bs) } 18 | func (b _buffer) Close() error { return nil } 19 | func (b _buffer) LocalAddr() net.Addr { return nil } 20 | func (b _buffer) RemoteAddr() net.Addr { return nil } 21 | func (b _buffer) SetDeadline(t time.Time) error { return nil } 22 | func (b _buffer) SetReadDeadline(t time.Time) error { return nil } 23 | func (b _buffer) SetWriteDeadline(t time.Time) error { return nil } 24 | 25 | func TestCipherCopy(t *testing.T) { 26 | option.KEY = []byte("KEY") 27 | crypto.ExpandKey(option.KEY) 28 | 29 | listener, err := net.Listen("tcp", "127.0.0.1:9999") 30 | if err != nil { 31 | t.Error(err.Error()) 32 | } 33 | defer listener.Close() 34 | 35 | buf := &_buffer{} 36 | 37 | signal := make(chan struct{}, 1) 38 | go func() { 39 | localConn, err := listener.Accept() 40 | if err != nil { 41 | t.Error(err.Error()) 42 | } 43 | 44 | localConnCtx, err := NewTCPCtx(localConn, true) 45 | if err != nil { 46 | t.Error(err.Error()) 47 | } 48 | 49 | CipherCopy(buf, localConnCtx) 50 | signal <- struct{}{} 51 | }() 52 | 53 | conn, err := net.Dial("tcp", "127.0.0.1:9999") 54 | if err != nil { 55 | t.Error(err.Error()) 56 | } 57 | 58 | connCtx, err := NewTCPCtx(conn, true) 59 | if err != nil { 60 | t.Error(err.Error()) 61 | } 62 | 63 | msg := "testing message." 64 | _, err = connCtx.EncryptWrite([]byte(msg)) 65 | if err != nil { 66 | t.Error(err.Error()) 67 | } 68 | conn.Close() 69 | 70 | <-signal 71 | if buf.String() != msg { 72 | t.Log(buf.Bytes()) 73 | t.Error("CipherCopy error") 74 | } 75 | } 76 | 77 | func TestPipeForward(t *testing.T) { 78 | option.KEY = []byte("KEY") 79 | crypto.ExpandKey(option.KEY) 80 | listenerA, err := net.Listen("tcp", "127.0.0.1:9999") 81 | if err != nil { 82 | t.Error(err.Error()) 83 | } 84 | defer listenerA.Close() 85 | 86 | listenerB, err := net.Listen("tcp", "127.0.0.1:8888") 87 | if err != nil { 88 | t.Error(err.Error()) 89 | } 90 | defer listenerB.Close() 91 | 92 | var connA, connB net.Conn 93 | signal := make(chan struct{}, 1) 94 | 95 | msgA := "FROM A" 96 | msgB := "FROM B" 97 | 98 | bufA := make([]byte, 1024) 99 | bufB := make([]byte, 1024) 100 | 101 | go func() { 102 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3) 103 | if err != nil { 104 | t.Error(err.Error()) 105 | } 106 | defer localA.Close() 107 | 108 | localCtxA, err := NewTCPCtx(localA, true) 109 | if err != nil { 110 | t.Error(err.Error()) 111 | } 112 | 113 | localCtxA.EncryptWrite([]byte(msgA)) 114 | localCtxA.DecryptRead(bufA) 115 | 116 | signal <- struct{}{} 117 | }() 118 | 119 | go func() { 120 | localB, err := net.DialTimeout("tcp", "127.0.0.1:8888", time.Second*3) 121 | if err != nil { 122 | t.Error(err.Error()) 123 | } 124 | defer localB.Close() 125 | 126 | localCtxB, err := NewTCPCtx(localB, true) 127 | if err != nil { 128 | t.Error(err.Error()) 129 | } 130 | 131 | localCtxB.EncryptWrite([]byte(msgB)) 132 | localCtxB.DecryptRead(bufB) 133 | 134 | signal <- struct{}{} 135 | }() 136 | 137 | go func() { 138 | var err error 139 | connA, err = listenerA.Accept() 140 | if err != nil { 141 | t.Error(err.Error()) 142 | } 143 | signal <- struct{}{} 144 | }() 145 | 146 | go func() { 147 | var err error 148 | connB, err = listenerB.Accept() 149 | if err != nil { 150 | t.Error(err.Error()) 151 | } 152 | signal <- struct{}{} 153 | }() 154 | 155 | <-signal 156 | <-signal 157 | 158 | connCtxA, err := NewTCPCtx(connA, true) 159 | if err != nil { 160 | t.Error(err.Error()) 161 | } 162 | 163 | connCtxB, err := NewTCPCtx(connB, true) 164 | if err != nil { 165 | t.Error(err.Error()) 166 | } 167 | 168 | PipeForward(connCtxA, connCtxB) 169 | 170 | <-signal 171 | <-signal 172 | 173 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA { 174 | t.Error("PipeForward error") 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /netio/forward.go: -------------------------------------------------------------------------------- 1 | package netio 2 | 3 | import ( 4 | "io" 5 | "iox/logger" 6 | "iox/option" 7 | ) 8 | 9 | // Memory optimized 10 | func CipherCopy(dst Ctx, src Ctx) (int64, error) { 11 | buffer := make([]byte, option.TCP_BUFFER_SIZE) 12 | var written int64 13 | var err error 14 | 15 | for { 16 | var nr int 17 | var er error 18 | 19 | nr, er = src.DecryptRead(buffer) 20 | 21 | if nr > 0 { 22 | logger.Info(" <== [%d bytes] ==", nr) 23 | 24 | var nw int 25 | var ew error 26 | 27 | nw, ew = dst.EncryptWrite(buffer[:nr]) 28 | 29 | if nw > 0 { 30 | logger.Info(" == [%d bytes] ==> ", nw) 31 | written += int64(nw) 32 | } 33 | if ew != nil { 34 | err = ew 35 | break 36 | } 37 | if nr != nw { 38 | err = io.ErrShortWrite 39 | break 40 | } 41 | } 42 | if er != nil { 43 | if er != io.EOF { 44 | err = er 45 | } 46 | break 47 | } 48 | } 49 | 50 | return written, err 51 | } 52 | 53 | func PipeForward(ctxA Ctx, ctxB Ctx) { 54 | signal := make(chan struct{}, 1) 55 | 56 | go func() { 57 | CipherCopy(ctxA, ctxB) 58 | signal <- struct{}{} 59 | }() 60 | 61 | go func() { 62 | CipherCopy(ctxB, ctxA) 63 | signal <- struct{}{} 64 | }() 65 | 66 | <-signal 67 | } 68 | 69 | // This function will run forever 70 | // If need to do performance optimization in future, 71 | // I will consider a go-routine pool here, but 72 | // this can lead to mutex-lock overhead 73 | func ForwardUDP(ctxA Ctx, ctxB Ctx) { 74 | go func() { 75 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE) 76 | for { 77 | nr, _ := ctxA.DecryptRead(buffer) 78 | if nr > 0 { 79 | if nr == 4 && 80 | buffer[0] == 0xCC && buffer[1] == 0xDD && 81 | buffer[2] == 0xEE && buffer[3] == 0xFF { 82 | continue 83 | } 84 | logger.Info(" <== [%d bytes] ==", nr) 85 | nw, _ := ctxB.EncryptWrite(buffer[:nr]) 86 | if nw > 0 { 87 | logger.Info(" == [%d bytes] ==>", nw) 88 | } 89 | } 90 | } 91 | }() 92 | 93 | go func() { 94 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE) 95 | for { 96 | nr, _ := ctxB.DecryptRead(buffer) 97 | if nr > 0 { 98 | if nr == 4 && 99 | buffer[0] == 0xCC && buffer[1] == 0xDD && 100 | buffer[2] == 0xEE && buffer[3] == 0xFF { 101 | continue 102 | } 103 | logger.Info(" <== [%d bytes] ==", nr) 104 | nw, _ := ctxA.EncryptWrite(buffer[:nr]) 105 | if nw > 0 { 106 | logger.Info(" == [%d bytes] ==>", nw) 107 | } 108 | } 109 | } 110 | }() 111 | 112 | select {} 113 | } 114 | 115 | var UDP_INIT_PACKET = []byte{ 116 | 0xCC, 0xDD, 0xEE, 0xFF, 117 | } 118 | 119 | // Each socket only writes the packet to the address 120 | // which last sent packet to it recently, 121 | // instead of boardcasting to all the address. 122 | // I think it is as expected 123 | func ForwardUnconnectedUDP(ctxA Ctx, ctxB Ctx) { 124 | addrRegistedA := false 125 | addrRegistedB := false 126 | addrRegistedSignalA := make(chan struct{}, 1) 127 | addrRegistedSignalB := make(chan struct{}, 1) 128 | 129 | packetChannelA := make(chan []byte, option.UDP_PACKET_CHANNEL_SIZE) 130 | packetChannelB := make(chan []byte, option.UDP_PACKET_CHANNEL_SIZE) 131 | 132 | // A read 133 | go func() { 134 | for { 135 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE) 136 | nr, _ := ctxA.DecryptRead(buffer) 137 | if nr > 0 { 138 | if !addrRegistedA { 139 | addrRegistedA = true 140 | addrRegistedSignalA <- struct{}{} 141 | } 142 | 143 | if !(nr == 4 && 144 | buffer[0] == 0xCC && buffer[1] == 0xDD && 145 | buffer[2] == 0xEE && buffer[3] == 0xFF) { 146 | logger.Info(" <== [%d bytes] ==", nr) 147 | packetChannelB <- buffer[:nr] 148 | } 149 | } 150 | } 151 | }() 152 | 153 | // B read 154 | go func() { 155 | for { 156 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE) 157 | nr, _ := ctxB.DecryptRead(buffer) 158 | if nr > 0 { 159 | if !addrRegistedB { 160 | addrRegistedB = true 161 | addrRegistedSignalB <- struct{}{} 162 | } 163 | 164 | if !(nr == 4 && 165 | buffer[0] == 0xCC && buffer[1] == 0xDD && 166 | buffer[2] == 0xEE && buffer[3] == 0xFF) { 167 | logger.Info(" <== [%d bytes] ==", nr) 168 | packetChannelA <- buffer[:nr] 169 | } 170 | } 171 | } 172 | }() 173 | 174 | // A write 175 | go func() { 176 | <-addrRegistedSignalA 177 | var n int 178 | for { 179 | packet := <-packetChannelA 180 | n, _ = ctxA.EncryptWrite(packet) 181 | if n > 0 { 182 | logger.Info(" == [%d bytes] ==>", n) 183 | } 184 | } 185 | }() 186 | 187 | // B write 188 | go func() { 189 | <-addrRegistedSignalB 190 | var n int 191 | for { 192 | packet := <-packetChannelB 193 | n, _ = ctxB.EncryptWrite(packet) 194 | if n > 0 { 195 | logger.Info(" == [%d bytes] ==>", n) 196 | } 197 | } 198 | }() 199 | 200 | select {} 201 | } 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iox 2 | 3 | English | [中文](https://github.com/EddieIvan01/iox/tree/master/docs/README_CN.md) 4 | 5 | Tool for port forward & intranet proxy, just like `lcx`/`ew`, but better 6 | 7 | ## Why write? 8 | 9 | `lcx` and `ew` are awesome, but can be improved. 10 | 11 | when I first used them, I can't remember these complicated parameters for a long time, such as `tran, slave, rcsocks, sssocks...`. The work mode is clear, why do they design parameters like this(especially `ew`'s `-l -d -e -f -g -h`) 12 | 13 | Besides, I think the net programming logic could be optimized. 14 | 15 | For example, while running `lcx -listen 8888 9999` command, client must connect to `:8888` first, then `:9999`, in `iox`, there's no limit to the order in two ports. And while running `lcx -slave 1.1.1.1 8888 1.1.1.1 9999` command, `lcx` will connect two hosts serially, but it's more efficient to connect in concurrent, as `iox` does. 16 | 17 | What's more, `iox` provides traffic encryption feature. Actually, you can use `iox` as a simple ShadowSocks. 18 | 19 | And `iox` also provides UDP traffic forward. 20 | 21 | Of course, because `iox` is written in Go, the static-link-program is a little large, raw program is 2.2MB (800KB after UPX compression) 22 | 23 | ## Feature 24 | 25 | + traffic encryption (optional) 26 | + humanized CLI option 27 | + logic optimization 28 | + UDP traffic forward 29 | 30 | ## Usage 31 | 32 | You can see, all params are uniform. `-l/--local` means listen on a local port; `-r/--remote` means connect to remote host 33 | 34 | ### Two mode 35 | 36 | **fwd**: 37 | 38 | Listen on `0.0.0.0:8888` and `0.0.0.0:9999`, forward traffic between 2 connections 39 | 40 | ``` 41 | ./iox fwd -l 8888 -l 9999 42 | 43 | 44 | for lcx: 45 | ./lcx -listen 8888 9999 46 | ``` 47 | 48 | Listen on `0.0.0.0:8888`, forward traffic to `1.1.1.1:9999` 49 | 50 | ``` 51 | ./iox fwd -l 8888 -r 1.1.1.1:9999 52 | 53 | 54 | for lcx: 55 | ./lcx -tran 8888 1.1.1.1 9999 56 | ``` 57 | 58 | Connect `1.1.1.1:8888` and `1.1.1.1:9999`, forward between 2 connection 59 | 60 | ``` 61 | ./iox fwd -r 1.1.1.1:8888 -r 1.1.1.1:9999 62 | 63 | 64 | for lcx: 65 | ./lcx -slave 1.1.1.1 8888 1.1.1.1 9999 66 | ``` 67 | 68 | **proxy** 69 | 70 | Start Socks5 server on `0.0.0.0:1080` 71 | 72 | ``` 73 | ./iox proxy -l 1080 74 | 75 | 76 | for ew: 77 | ./ew -s ssocksd -l 1080 78 | ``` 79 | 80 | Start Socks5 server on be-controlled host, then forward to internet VPS 81 | 82 | VPS forward `0.0.0.0:9999` to `0.0.0.0:1080` 83 | 84 | You must use in pair, because it contains a simple protocol to control connecting back 85 | 86 | ``` 87 | ./iox proxy -r 1.1.1.1:9999 88 | ./iox proxy -l 9999 -l 1080 // notice, the two port are in order 89 | 90 | 91 | for ew: 92 | ./ew -s rcsocks -l 1080 -e 9999 93 | ./ew -s rssocks -d 1.1.1.1 -e 9999 94 | ``` 95 | 96 | Then connect intranet host 97 | 98 | ``` 99 | # proxychains.conf 100 | # socks5://1.1.1.1:1080 101 | 102 | $ proxychains rdesktop 192.168.0.100:3389 103 | ``` 104 | 105 | *** 106 | 107 | ### Enable encryption 108 | 109 | For example, we forward 3389 port in intranet to our VPS 110 | 111 | ``` 112 | // be-controller host 113 | ./iox fwd -r 192.168.0.100:3389 -r *1.1.1.1:8888 -k 656565 114 | 115 | 116 | // our VPS 117 | ./iox fwd -l *8888 -l 33890 -k 656565 118 | ``` 119 | 120 | It's easy to understand: traffic between be-controlled host and our VPS:8888 will be encrypted, the pre-shared secret key is 'AAA', `iox` will use it to generate seed key and nonce **(Normally, nonce shouldn't be reused. But consider that iox's encryption is only for bypassing IDS, in order not to allocate extra space, the TCP stream encryption will reuse the nonce)**, then encrypt with Xchacha20 (replace AES-CTR with Xchacha20 in v0.3 version) 121 | 122 | So, the `*` should be used in pairs 123 | 124 | ``` 125 | ./iox fwd -l 1000 -r *127.0.0.1:1001 -k 000102 126 | ./iox fwd -l *1001 -r *127.0.0.1:1002 -k 000102 127 | ./iox fwd -l *1002 -r *127.0.0.1:1003 -k 000102 128 | ./iox proxy -l *1003 -k 000102 129 | 130 | 131 | $ curl google.com -x socks5://127.0.0.1:1000 132 | ``` 133 | 134 | Using `iox` as a simple ShadowSocks 135 | 136 | ``` 137 | // ssserver 138 | ./iox proxy -l *9999 -k 000102 139 | 140 | 141 | // sslocal 142 | ./iox fwd -l 1080 -r *VPS:9999 -k 000102 143 | ``` 144 | 145 | ### UDP forward 146 | 147 | Only need to add CLI option `-u` 148 | 149 | ``` 150 | ./iox fwd -l 53 -r *127.0.0.1:8888 -k 000102 -u 151 | ./iox fwd -l *8888 -l *9999 -k 000102 -u 152 | ./iox fwd -r *127.0.0.1:9999 -r 8.8.8.8:53 -k 000102 -u 153 | ``` 154 | 155 | **NOTICE: When you make a multistage connection, the `Remote2Remote-UDP-mode` must be started last, which is the No.3 command in above example** 156 | 157 | UDP forwarding may have behavior that is not as you expected. Actually, on GitHub now, there are only examples of forwarding a local listener to a remote host, so I can only implement them with my understanding 158 | 159 | You can find why in the source code. If you have any ideas, PR / issue are welcomed 160 | 161 | ## License 162 | 163 | The MIT license 164 | 165 | -------------------------------------------------------------------------------- /operate/proxy.go: -------------------------------------------------------------------------------- 1 | package operate 2 | 3 | import ( 4 | "iox/logger" 5 | "iox/netio" 6 | "iox/option" 7 | "iox/socks5" 8 | "net" 9 | "os" 10 | "os/signal" 11 | "time" 12 | ) 13 | 14 | // local is :port 15 | func ProxyLocal(local string, encrypted bool) { 16 | listener, err := net.Listen("tcp", local) 17 | if err != nil { 18 | logger.Warn( 19 | "Socks5 listen on %s error: %s", 20 | local, err.Error(), 21 | ) 22 | return 23 | } 24 | 25 | logger.Success("Start socks5 server on %s", local) 26 | 27 | for { 28 | conn, err := listener.Accept() 29 | if err != nil { 30 | logger.Warn( 31 | "Socks5 handle local connect error: %s", 32 | err.Error(), 33 | ) 34 | continue 35 | } 36 | 37 | go func() { 38 | defer conn.Close() 39 | connCtx, err := netio.NewTCPCtx(conn, encrypted) 40 | if err != nil { 41 | return 42 | } 43 | 44 | socks5.HandleConnection(connCtx) 45 | }() 46 | } 47 | } 48 | 49 | // remote is domain:port or ip:port 50 | func ProxyRemote(remote string, encrypted bool) { 51 | masterConn, err := clientHandshake(remote) 52 | if err != nil { 53 | logger.Warn(err.Error()) 54 | return 55 | } 56 | 57 | connectRequest := make(chan uint8, MAX_CONNECTION/2) 58 | defer close(connectRequest) 59 | 60 | endSignal := make(chan struct{}) 61 | 62 | // handle ctrl+C and send heartbeat packets periodically 63 | { 64 | sigs := make(chan os.Signal) 65 | signal.Notify(sigs, os.Interrupt) 66 | go func() { 67 | <-sigs 68 | masterConn.Write(serialize(Protocol{ 69 | CMD: CTL_CLEANUP, 70 | N: 0, 71 | })) 72 | logger.Success("Recv Ctrl+C, exit now") 73 | os.Exit(0) 74 | }() 75 | 76 | // no need for mutex-lock here 77 | ticker := time.NewTicker(time.Second * option.HEARTBEAT_FREQUENCY) 78 | go func() { 79 | for { 80 | <-ticker.C 81 | masterConn.Write(serialize(Protocol{ 82 | CMD: CTL_HEARTBEAT, 83 | N: 0, 84 | })) 85 | } 86 | }() 87 | } 88 | 89 | // handle master conn 90 | go func() { 91 | defer masterConn.Close() 92 | for { 93 | pb, err := readUntilEnd(masterConn) 94 | if err != nil { 95 | continue 96 | } 97 | 98 | p, err := unserialize(pb) 99 | if err != nil { 100 | continue 101 | } 102 | 103 | switch p.CMD { 104 | case CTL_CONNECT_ME: 105 | connectRequest <- p.N 106 | case CTL_CLEANUP: 107 | endSignal <- struct{}{} 108 | return 109 | } 110 | } 111 | }() 112 | 113 | // handle CONNECT_ME request 114 | for { 115 | select { 116 | case <-endSignal: 117 | logger.Success("Recv exit signal from remote, exit now") 118 | return 119 | case n := <-connectRequest: 120 | for n > 0 { 121 | go func() { 122 | conn, err := net.DialTimeout( 123 | "tcp", 124 | remote, 125 | time.Duration(option.TIMEOUT)*time.Millisecond, 126 | ) 127 | if err != nil { 128 | logger.Info(err.Error()) 129 | return 130 | } 131 | 132 | connCtx, err := netio.NewTCPCtx(conn, encrypted) 133 | if err != nil { 134 | return 135 | } 136 | 137 | socks5.HandleConnection(connCtx) 138 | }() 139 | n-- 140 | } 141 | } 142 | } 143 | } 144 | 145 | func ProxyRemoteL2L(master string, local string, menc bool, lenc bool) { 146 | masterListener, err := net.Listen("tcp", master) 147 | if err != nil { 148 | logger.Warn("Listen on %s error", master) 149 | return 150 | } 151 | defer masterListener.Close() 152 | 153 | logger.Info("Listent on %s for remote socks5 server", master) 154 | 155 | localListener, err := net.Listen("tcp", local) 156 | if err != nil { 157 | logger.Warn("Listen on %s error", local) 158 | return 159 | } 160 | defer localListener.Close() 161 | 162 | // HANDSHAKE: 163 | masterConn := serverHandshake(masterListener) 164 | defer func() { 165 | masterConn.Close() 166 | }() 167 | 168 | // handle ctrl+C 169 | { 170 | sigs := make(chan os.Signal) 171 | signal.Notify(sigs, os.Interrupt) 172 | go func() { 173 | <-sigs 174 | masterConn.Write(serialize(Protocol{ 175 | CMD: CTL_CLEANUP, 176 | N: 0, 177 | })) 178 | logger.Success("Recv Ctrl+C, exit now") 179 | os.Exit(0) 180 | }() 181 | } 182 | 183 | localConnBuffer := make(chan net.Conn, MAX_CONNECTION/2) 184 | defer close(localConnBuffer) 185 | 186 | logger.Success("Forward socks5 server to %s", local) 187 | 188 | // handle masterConn read 189 | go func() { 190 | for { 191 | pb, err := readUntilEnd(masterConn) 192 | if err != nil { 193 | continue 194 | } 195 | 196 | p, err := unserialize(pb) 197 | if err != nil { 198 | continue 199 | } 200 | 201 | switch p.CMD { 202 | case CTL_CLEANUP: 203 | logger.Success("Recv exit signal from remote, exit now") 204 | os.Exit(0) 205 | case CTL_HEARTBEAT: 206 | continue 207 | } 208 | } 209 | }() 210 | 211 | // handle local connection 212 | go func() { 213 | for { 214 | localConn, err := localListener.Accept() 215 | if err != nil { 216 | continue 217 | } 218 | 219 | localConnBuffer <- localConn 220 | 221 | // to speed up 222 | // don't need to calculate precisly 223 | var n uint8 224 | l := len(localConnBuffer) 225 | switch { 226 | case l > MAX_CONNECTION/0x40: 227 | n = 2 228 | case l > MAX_CONNECTION/0x20: 229 | n = 3 230 | default: 231 | n = 1 232 | } 233 | 234 | masterConn.Write(serialize(Protocol{ 235 | CMD: CTL_CONNECT_ME, 236 | N: n, 237 | })) 238 | } 239 | }() 240 | 241 | for { 242 | remoteConn, err := masterListener.Accept() 243 | if err != nil { 244 | continue 245 | } 246 | 247 | localConn := <-localConnBuffer 248 | 249 | go func() { 250 | defer remoteConn.Close() 251 | defer localConn.Close() 252 | 253 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, menc) 254 | if err != nil { 255 | return 256 | } 257 | 258 | localConnCtx, err := netio.NewTCPCtx(localConn, lenc) 259 | if err != nil { 260 | return 261 | } 262 | 263 | netio.PipeForward(remoteConnCtx, localConnCtx) 264 | }() 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /socks5/socks5.go: -------------------------------------------------------------------------------- 1 | // code from https://github.com/ring04h/s5.go 2 | package socks5 3 | 4 | import ( 5 | "errors" 6 | "io" 7 | "iox/logger" 8 | "iox/netio" 9 | "iox/option" 10 | "net" 11 | "strconv" 12 | "time" 13 | ) 14 | 15 | var ( 16 | Commands = []string{"CONNECT", "BIND", "UDP ASSOCIATE"} 17 | AddrType = []string{"", "IPv4", "", "Domain", "IPv6"} 18 | Conns = make([]net.Conn, 0) 19 | Verbose = false 20 | 21 | errAddrType = errors.New("socks addr type not supported") 22 | errVer = errors.New("socks version not supported") 23 | errMethod = errors.New("socks only support noauth method") 24 | errAuthExtraData = errors.New("socks authentication get extra data") 25 | errReqExtraData = errors.New("socks request get extra data") 26 | errCmd = errors.New("socks only support connect command") 27 | ) 28 | 29 | const ( 30 | socksVer5 = 0x05 31 | socksCmdConnect = 0x01 32 | ) 33 | 34 | func readAtLeast(r netio.Ctx, buf []byte, min int) (n int, err error) { 35 | if len(buf) < min { 36 | return 0, io.ErrShortBuffer 37 | } 38 | 39 | for n < min && err == nil { 40 | var nn int 41 | nn, err = r.DecryptRead(buf[n:]) 42 | n += nn 43 | } 44 | if n >= min { 45 | err = nil 46 | } else if n > 0 && err == io.EOF { 47 | err = io.ErrUnexpectedEOF 48 | } 49 | return 50 | } 51 | 52 | func handShake(conn netio.Ctx) (err error) { 53 | const ( 54 | idVer = 0 55 | idNmethod = 1 56 | ) 57 | 58 | buf := make([]byte, 258) 59 | 60 | var n int 61 | 62 | // make sure we get the nmethod field 63 | if n, err = readAtLeast(conn, buf, idNmethod+1); err != nil { 64 | return 65 | } 66 | 67 | if buf[idVer] != socksVer5 { 68 | return errVer 69 | } 70 | 71 | nmethod := int(buf[idNmethod]) // client support auth mode 72 | msgLen := nmethod + 2 // auth msg length 73 | if n == msgLen { // handshake done, common case 74 | // do nothing, jump directly to send confirmation 75 | } else if n < msgLen { // has more methods to read, rare case 76 | if _, err = readAtLeast(conn, buf[n:msgLen], len(buf[n:msgLen])); err != nil { 77 | return 78 | } 79 | } else { // error, should not get extra data 80 | return errAuthExtraData 81 | } 82 | /* 83 | X'00' NO AUTHENTICATION REQUIRED 84 | X'01' GSSAPI 85 | X'02' USERNAME/PASSWORD 86 | X'03' to X'7F' IANA ASSIGNED 87 | X'80' to X'FE' RESERVED FOR PRIVATE METHODS 88 | X'FF' NO ACCEPTABLE METHODS 89 | */ 90 | // send confirmation: version 5, no authentication required 91 | _, err = conn.EncryptWrite([]byte{socksVer5, 0}) 92 | 93 | return 94 | } 95 | 96 | func parseTarget(conn netio.Ctx) (host string, err error) { 97 | const ( 98 | idVer = 0 99 | idCmd = 1 100 | idType = 3 // address type index 101 | idIP0 = 4 // ip addres start index 102 | idDmLen = 4 // domain address length index 103 | idDm0 = 5 // domain address start index 104 | 105 | typeIPv4 = 1 // type is ipv4 address 106 | typeDm = 3 // type is domain address 107 | typeIPv6 = 4 // type is ipv6 address 108 | 109 | lenIPv4 = 3 + 1 + net.IPv4len + 2 // 3(ver+cmd+rsv) + 1addrType + ipv4 + 2port 110 | lenIPv6 = 3 + 1 + net.IPv6len + 2 // 3(ver+cmd+rsv) + 1addrType + ipv6 + 2port 111 | lenDmBase = 3 + 1 + 1 + 2 // 3 + 1addrType + 1addrLen + 2port, plus addrLen 112 | ) 113 | // refer to getRequest in server.go for why set buffer size to 263 114 | buf := make([]byte, 263) 115 | var n int 116 | 117 | // read till we get possible domain length field 118 | if n, err = readAtLeast(conn, buf, idDmLen+1); err != nil { 119 | return 120 | } 121 | 122 | // check version and cmd 123 | if buf[idVer] != socksVer5 { 124 | err = errVer 125 | return 126 | } 127 | 128 | /* 129 | CONNECT X'01' 130 | BIND X'02' 131 | UDP ASSOCIATE X'03' 132 | */ 133 | 134 | if buf[idCmd] > 0x03 || buf[idCmd] == 0x00 { 135 | logger.Info( 136 | "Unknown Command", 137 | buf[idCmd], 138 | ) 139 | } 140 | 141 | if buf[idCmd] != socksCmdConnect { // only support CONNECT mode 142 | err = errCmd 143 | return 144 | } 145 | 146 | // read target address 147 | reqLen := -1 148 | switch buf[idType] { 149 | case typeIPv4: 150 | reqLen = lenIPv4 151 | case typeIPv6: 152 | reqLen = lenIPv6 153 | case typeDm: // domain name 154 | reqLen = int(buf[idDmLen]) + lenDmBase 155 | default: 156 | err = errAddrType 157 | return 158 | } 159 | 160 | if n == reqLen { 161 | // common case, do nothing 162 | } else if n < reqLen { // rare case 163 | if _, err = readAtLeast(conn, buf[n:reqLen], len(buf[n:reqLen])); err != nil { 164 | return 165 | } 166 | } else { 167 | err = errReqExtraData 168 | return 169 | } 170 | 171 | switch buf[idType] { 172 | case typeIPv4: 173 | host = net.IP(buf[idIP0 : idIP0+net.IPv4len]).String() 174 | case typeIPv6: 175 | host = net.IP(buf[idIP0 : idIP0+net.IPv6len]).String() 176 | case typeDm: 177 | host = string(buf[idDm0 : idDm0+buf[idDmLen]]) 178 | } 179 | port := bigEndianUint16(buf[reqLen-2 : reqLen]) 180 | host = net.JoinHostPort(host, strconv.Itoa(int(port))) 181 | 182 | return 183 | } 184 | 185 | func bigEndianUint16(b []byte) uint16 { 186 | _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 187 | return uint16(b[1]) | uint16(b[0])<<8 188 | } 189 | 190 | func pipeWhenClose(conn netio.Ctx, target string) { 191 | remoteConn, err := net.DialTimeout( 192 | "tcp", 193 | target, 194 | time.Millisecond*time.Duration(option.TIMEOUT), 195 | ) 196 | if err != nil { 197 | logger.Info("Connect remote :" + err.Error()) 198 | return 199 | } 200 | 201 | tcpAddr := remoteConn.LocalAddr().(*net.TCPAddr) 202 | if tcpAddr.Zone == "" { 203 | if tcpAddr.IP.Equal(tcpAddr.IP.To4()) { 204 | tcpAddr.Zone = "ip4" 205 | } else { 206 | tcpAddr.Zone = "ip6" 207 | } 208 | } 209 | 210 | rep := make([]byte, 256) 211 | rep[0] = 0x05 212 | rep[1] = 0x00 // success 213 | rep[2] = 0x00 //RSV 214 | 215 | //IP 216 | if tcpAddr.Zone == "ip6" { 217 | rep[3] = 0x04 //IPv6 218 | } else { 219 | rep[3] = 0x01 //IPv4 220 | } 221 | 222 | var ip net.IP 223 | if "ip6" == tcpAddr.Zone { 224 | ip = tcpAddr.IP.To16() 225 | } else { 226 | ip = tcpAddr.IP.To4() 227 | } 228 | pindex := 4 229 | for _, b := range ip { 230 | rep[pindex] = b 231 | pindex += 1 232 | } 233 | rep[pindex] = byte((tcpAddr.Port >> 8) & 0xff) 234 | rep[pindex+1] = byte(tcpAddr.Port & 0xff) 235 | 236 | conn.EncryptWrite(rep[0 : pindex+2]) 237 | // Transfer data 238 | 239 | defer remoteConn.Close() 240 | 241 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, false) 242 | if err != nil { 243 | logger.Info( 244 | "Socks5 remote connect error: %s", 245 | err.Error(), 246 | ) 247 | return 248 | } 249 | 250 | netio.PipeForward(conn, remoteConnCtx) 251 | } 252 | 253 | func HandleConnection(conn netio.Ctx) { 254 | if err := handShake(conn); err != nil { 255 | logger.Info( 256 | "Socks5 handshake error: %s", 257 | err.Error(), 258 | ) 259 | return 260 | } 261 | addr, err := parseTarget(conn) 262 | if err != nil { 263 | logger.Info( 264 | "socks consult transfer mode or parse target: %s", 265 | err.Error(), 266 | ) 267 | return 268 | } 269 | pipeWhenClose(conn, addr) 270 | } 271 | -------------------------------------------------------------------------------- /crypto/chacha20/chacha20.go: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // Package chacha20 implements the ChaCha20 stream cipher. 17 | package chacha20 18 | 19 | import ( 20 | "crypto/cipher" 21 | "encoding/binary" 22 | "errors" 23 | "iox/crypto/chacha20/internal/api" 24 | "iox/crypto/chacha20/internal/hardware" 25 | "iox/crypto/chacha20/internal/ref" 26 | "math" 27 | ) 28 | 29 | const ( 30 | // KeySize is the ChaCha20 key size in bytes. 31 | KeySize = 32 32 | 33 | // NonceSize is the ChaCha20 nonce size in bytes. 34 | NonceSize = 8 35 | 36 | // INonceSize is the IETF ChaCha20 nonce size in bytes. 37 | INonceSize = 12 38 | 39 | // XNonceSize is the XChaCha20 nonce size in bytes. 40 | XNonceSize = 24 41 | 42 | // HNonceSize is the HChaCha20 nonce size in bytes. 43 | HNonceSize = 16 44 | ) 45 | 46 | var ( 47 | // ErrInvalidKey is the error returned when the key is invalid. 48 | ErrInvalidKey = errors.New("chacha20: key length must be KeySize bytes") 49 | 50 | // ErrInvalidNonce is the error returned when the nonce is invalid. 51 | ErrInvalidNonce = errors.New("chacha20: nonce length must be NonceSize/INonceSize/XNonceSize bytes") 52 | 53 | // ErrInvalidCounter is the error returned when the counter is invalid. 54 | ErrInvalidCounter = errors.New("chacha20: block counter is invalid (out of range)") 55 | 56 | supportedImpls []api.Implementation 57 | activeImpl api.Implementation 58 | 59 | _ cipher.Stream = (*Cipher)(nil) 60 | ) 61 | 62 | // Cipher is an instance of ChaCha20/XChaCha20 using a particular key and nonce. 63 | type Cipher struct { 64 | state [api.StateSize]uint32 65 | buf [api.BlockSize]byte 66 | 67 | off int 68 | ietf bool 69 | } 70 | 71 | // Reset zeros the key data so that it will no longer appear in the process's 72 | // memory. 73 | func (c *Cipher) Reset() { 74 | for i := range c.state { 75 | c.state[i] = 0 76 | } 77 | for i := range c.buf { 78 | c.buf[i] = 0 79 | } 80 | } 81 | 82 | // Seek sets the block counter to a given offset. 83 | func (c *Cipher) Seek(blockCounter uint64) error { 84 | if c.ietf { 85 | if blockCounter > math.MaxUint32 { 86 | return ErrInvalidCounter 87 | } 88 | c.state[12] = uint32(blockCounter) 89 | } else { 90 | c.state[12] = uint32(blockCounter) 91 | c.state[13] = uint32(blockCounter >> 32) 92 | } 93 | c.off = api.BlockSize 94 | return nil 95 | } 96 | 97 | // ReKey reinitializes the ChaCha20/XChaCha20 instance with the provided key 98 | // and nonce. 99 | func (c *Cipher) ReKey(key, nonce []byte) error { 100 | c.Reset() 101 | return c.doReKey(key, nonce) 102 | } 103 | 104 | func (c *Cipher) doReKey(key, nonce []byte) error { 105 | if len(key) != KeySize { 106 | return ErrInvalidKey 107 | } 108 | 109 | var subKey []byte 110 | switch len(nonce) { 111 | case NonceSize, INonceSize: 112 | case XNonceSize: 113 | subKey = c.buf[:KeySize] 114 | activeImpl.HChaCha(key, nonce, subKey) 115 | key = subKey 116 | nonce = nonce[16:24] 117 | default: 118 | return ErrInvalidNonce 119 | } 120 | 121 | _ = key[31] // Force bounds check elimination. 122 | 123 | c.state[0] = api.Sigma0 124 | c.state[1] = api.Sigma1 125 | c.state[2] = api.Sigma2 126 | c.state[3] = api.Sigma3 127 | c.state[4] = binary.LittleEndian.Uint32(key[0:4]) 128 | c.state[5] = binary.LittleEndian.Uint32(key[4:8]) 129 | c.state[6] = binary.LittleEndian.Uint32(key[8:12]) 130 | c.state[7] = binary.LittleEndian.Uint32(key[12:16]) 131 | c.state[8] = binary.LittleEndian.Uint32(key[16:20]) 132 | c.state[9] = binary.LittleEndian.Uint32(key[20:24]) 133 | c.state[10] = binary.LittleEndian.Uint32(key[24:28]) 134 | c.state[11] = binary.LittleEndian.Uint32(key[28:32]) 135 | c.state[12] = 0 136 | if len(nonce) == INonceSize { 137 | _ = nonce[11] // Force bounds check elimination. 138 | c.state[13] = binary.LittleEndian.Uint32(nonce[0:4]) 139 | c.state[14] = binary.LittleEndian.Uint32(nonce[4:8]) 140 | c.state[15] = binary.LittleEndian.Uint32(nonce[8:12]) 141 | c.ietf = true 142 | } else { 143 | _ = nonce[7] // Force bounds check elimination. 144 | c.state[13] = 0 145 | c.state[14] = binary.LittleEndian.Uint32(nonce[0:4]) 146 | c.state[15] = binary.LittleEndian.Uint32(nonce[4:8]) 147 | c.ietf = false 148 | } 149 | c.off = api.BlockSize 150 | 151 | if subKey != nil { 152 | for i := range subKey { 153 | subKey[i] = 0 154 | } 155 | } 156 | 157 | return nil 158 | } 159 | 160 | // New returns a new ChaCha20/XChaCha20 instance. 161 | func New(key, nonce []byte) (*Cipher, error) { 162 | var c Cipher 163 | if err := c.doReKey(key, nonce); err != nil { 164 | return nil, err 165 | } 166 | 167 | return &c, nil 168 | } 169 | 170 | // HChaCha is the HChaCha20 hash function used to make XChaCha. 171 | func HChaCha(key, nonce []byte, dst *[32]byte) { 172 | activeImpl.HChaCha(key, nonce, dst[:]) 173 | } 174 | 175 | // XORKeyStream sets dst to the result of XORing src with the key stream. Dst 176 | // and src may be the same slice but otherwise should not overlap. 177 | func (c *Cipher) XORKeyStream(dst, src []byte) { 178 | if len(dst) < len(src) { 179 | src = src[:len(dst)] 180 | } 181 | 182 | for remaining := len(src); remaining > 0; { 183 | // Process multiple blocks at once. 184 | if c.off == api.BlockSize { 185 | nrBlocks := remaining / api.BlockSize 186 | directBytes := nrBlocks * api.BlockSize 187 | if nrBlocks > 0 { 188 | c.doBlocks(dst, src, nrBlocks) 189 | remaining -= directBytes 190 | if remaining == 0 { 191 | return 192 | } 193 | dst = dst[directBytes:] 194 | src = src[directBytes:] 195 | } 196 | 197 | // If there's a partial block, generate 1 block of keystream into 198 | // the internal buffer. 199 | c.doBlocks(c.buf[:], nil, 1) 200 | c.off = 0 201 | } 202 | 203 | // Process partial blocks from the buffered keystream. 204 | toXor := api.BlockSize - c.off 205 | if remaining < toXor { 206 | toXor = remaining 207 | } 208 | if toXor > 0 { 209 | // The inliner doesn't want to inline this function, but my 210 | // attempts to force BCE don't seem to work with manual 211 | // inlining. 212 | // 213 | // Taking the extra function call overhead here appears to be 214 | // worth it. 215 | c.xorBufBytes(dst, src, toXor) 216 | 217 | dst = dst[toXor:] 218 | src = src[toXor:] 219 | 220 | remaining -= toXor 221 | } 222 | } 223 | } 224 | 225 | func (c *Cipher) xorBufBytes(dst, src []byte, n int) { 226 | // Force bounds check elimination. 227 | buf := c.buf[c.off:] 228 | _ = buf[n-1] 229 | _ = dst[n-1] 230 | _ = src[n-1] 231 | 232 | for i := 0; i < n; i++ { 233 | dst[i] = buf[i] ^ src[i] 234 | } 235 | c.off += n 236 | } 237 | 238 | // KeyStream sets dst to the raw keystream. 239 | func (c *Cipher) KeyStream(dst []byte) { 240 | for remaining := len(dst); remaining > 0; { 241 | // Process multiple blocks at once. 242 | if c.off == api.BlockSize { 243 | nrBlocks := remaining / api.BlockSize 244 | directBytes := nrBlocks * api.BlockSize 245 | if nrBlocks > 0 { 246 | c.doBlocks(dst, nil, nrBlocks) 247 | remaining -= directBytes 248 | if remaining == 0 { 249 | return 250 | } 251 | dst = dst[directBytes:] 252 | } 253 | 254 | // If there's a partial block, generate 1 block of keystream into 255 | // the internal buffer. 256 | c.doBlocks(c.buf[:], nil, 1) 257 | c.off = 0 258 | } 259 | 260 | // Process partial blocks from the buffered keystream. 261 | toCopy := api.BlockSize - c.off 262 | if remaining < toCopy { 263 | toCopy = remaining 264 | } 265 | if toCopy > 0 { 266 | copy(dst[:toCopy], c.buf[c.off:c.off+toCopy]) 267 | dst = dst[toCopy:] 268 | remaining -= toCopy 269 | c.off += toCopy 270 | } 271 | } 272 | } 273 | 274 | func (c *Cipher) doBlocks(dst, src []byte, nrBlocks int) { 275 | if c.ietf { 276 | ctr := uint64(c.state[12]) 277 | if ctr+uint64(nrBlocks) > math.MaxUint32 { 278 | panic("chacha20: will exceed key stream per nonce limit") 279 | } 280 | } 281 | 282 | activeImpl.Blocks(&c.state, dst, src, nrBlocks) 283 | } 284 | 285 | func init() { 286 | supportedImpls = hardware.Register(supportedImpls) 287 | supportedImpls = ref.Register(supportedImpls) 288 | activeImpl = supportedImpls[0] 289 | } 290 | -------------------------------------------------------------------------------- /crypto/chacha20/internal/ref/impl.go: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // Package ref provides the portable ChaCha20 implementation. 17 | package ref 18 | 19 | import ( 20 | "encoding/binary" 21 | "iox/crypto/chacha20/internal/api" 22 | "math/bits" 23 | ) 24 | 25 | const rounds = 20 26 | 27 | // Impl is the reference implementation (exposed for testing). 28 | var Impl = &implRef{} 29 | 30 | type implRef struct{} 31 | 32 | func (impl *implRef) Name() string { 33 | return "ref" 34 | } 35 | 36 | func (impl *implRef) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) { 37 | for n := 0; n < nrBlocks; n++ { 38 | x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3 39 | x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] 40 | 41 | for i := rounds; i > 0; i -= 2 { 42 | // quarterround(x, 0, 4, 8, 12) 43 | x0 += x4 44 | x12 ^= x0 45 | x12 = bits.RotateLeft32(x12, 16) 46 | x8 += x12 47 | x4 ^= x8 48 | x4 = bits.RotateLeft32(x4, 12) 49 | x0 += x4 50 | x12 ^= x0 51 | x12 = bits.RotateLeft32(x12, 8) 52 | x8 += x12 53 | x4 ^= x8 54 | x4 = bits.RotateLeft32(x4, 7) 55 | 56 | // quarterround(x, 1, 5, 9, 13) 57 | x1 += x5 58 | x13 ^= x1 59 | x13 = bits.RotateLeft32(x13, 16) 60 | x9 += x13 61 | x5 ^= x9 62 | x5 = bits.RotateLeft32(x5, 12) 63 | x1 += x5 64 | x13 ^= x1 65 | x13 = bits.RotateLeft32(x13, 8) 66 | x9 += x13 67 | x5 ^= x9 68 | x5 = bits.RotateLeft32(x5, 7) 69 | 70 | // quarterround(x, 2, 6, 10, 14) 71 | x2 += x6 72 | x14 ^= x2 73 | x14 = bits.RotateLeft32(x14, 16) 74 | x10 += x14 75 | x6 ^= x10 76 | x6 = bits.RotateLeft32(x6, 12) 77 | x2 += x6 78 | x14 ^= x2 79 | x14 = bits.RotateLeft32(x14, 8) 80 | x10 += x14 81 | x6 ^= x10 82 | x6 = bits.RotateLeft32(x6, 7) 83 | 84 | // quarterround(x, 3, 7, 11, 15) 85 | x3 += x7 86 | x15 ^= x3 87 | x15 = bits.RotateLeft32(x15, 16) 88 | x11 += x15 89 | x7 ^= x11 90 | x7 = bits.RotateLeft32(x7, 12) 91 | x3 += x7 92 | x15 ^= x3 93 | x15 = bits.RotateLeft32(x15, 8) 94 | x11 += x15 95 | x7 ^= x11 96 | x7 = bits.RotateLeft32(x7, 7) 97 | 98 | // quarterround(x, 0, 5, 10, 15) 99 | x0 += x5 100 | x15 ^= x0 101 | x15 = bits.RotateLeft32(x15, 16) 102 | x10 += x15 103 | x5 ^= x10 104 | x5 = bits.RotateLeft32(x5, 12) 105 | x0 += x5 106 | x15 ^= x0 107 | x15 = bits.RotateLeft32(x15, 8) 108 | x10 += x15 109 | x5 ^= x10 110 | x5 = bits.RotateLeft32(x5, 7) 111 | 112 | // quarterround(x, 1, 6, 11, 12) 113 | x1 += x6 114 | x12 ^= x1 115 | x12 = bits.RotateLeft32(x12, 16) 116 | x11 += x12 117 | x6 ^= x11 118 | x6 = bits.RotateLeft32(x6, 12) 119 | x1 += x6 120 | x12 ^= x1 121 | x12 = bits.RotateLeft32(x12, 8) 122 | x11 += x12 123 | x6 ^= x11 124 | x6 = bits.RotateLeft32(x6, 7) 125 | 126 | // quarterround(x, 2, 7, 8, 13) 127 | x2 += x7 128 | x13 ^= x2 129 | x13 = bits.RotateLeft32(x13, 16) 130 | x8 += x13 131 | x7 ^= x8 132 | x7 = bits.RotateLeft32(x7, 12) 133 | x2 += x7 134 | x13 ^= x2 135 | x13 = bits.RotateLeft32(x13, 8) 136 | x8 += x13 137 | x7 ^= x8 138 | x7 = bits.RotateLeft32(x7, 7) 139 | 140 | // quarterround(x, 3, 4, 9, 14) 141 | x3 += x4 142 | x14 ^= x3 143 | x14 = bits.RotateLeft32(x14, 16) 144 | x9 += x14 145 | x4 ^= x9 146 | x4 = bits.RotateLeft32(x4, 12) 147 | x3 += x4 148 | x14 ^= x3 149 | x14 = bits.RotateLeft32(x14, 8) 150 | x9 += x14 151 | x4 ^= x9 152 | x4 = bits.RotateLeft32(x4, 7) 153 | } 154 | 155 | x0 += api.Sigma0 156 | x1 += api.Sigma1 157 | x2 += api.Sigma2 158 | x3 += api.Sigma3 159 | x4 += x[4] 160 | x5 += x[5] 161 | x6 += x[6] 162 | x7 += x[7] 163 | x8 += x[8] 164 | x9 += x[9] 165 | x10 += x[10] 166 | x11 += x[11] 167 | x12 += x[12] 168 | x13 += x[13] 169 | x14 += x[14] 170 | x15 += x[15] 171 | 172 | _ = dst[api.BlockSize-1] // Force bounds check elimination. 173 | 174 | if src != nil { 175 | _ = src[api.BlockSize-1] // Force bounds check elimination. 176 | binary.LittleEndian.PutUint32(dst[0:4], binary.LittleEndian.Uint32(src[0:4])^x0) 177 | binary.LittleEndian.PutUint32(dst[4:8], binary.LittleEndian.Uint32(src[4:8])^x1) 178 | binary.LittleEndian.PutUint32(dst[8:12], binary.LittleEndian.Uint32(src[8:12])^x2) 179 | binary.LittleEndian.PutUint32(dst[12:16], binary.LittleEndian.Uint32(src[12:16])^x3) 180 | binary.LittleEndian.PutUint32(dst[16:20], binary.LittleEndian.Uint32(src[16:20])^x4) 181 | binary.LittleEndian.PutUint32(dst[20:24], binary.LittleEndian.Uint32(src[20:24])^x5) 182 | binary.LittleEndian.PutUint32(dst[24:28], binary.LittleEndian.Uint32(src[24:28])^x6) 183 | binary.LittleEndian.PutUint32(dst[28:32], binary.LittleEndian.Uint32(src[28:32])^x7) 184 | binary.LittleEndian.PutUint32(dst[32:36], binary.LittleEndian.Uint32(src[32:36])^x8) 185 | binary.LittleEndian.PutUint32(dst[36:40], binary.LittleEndian.Uint32(src[36:40])^x9) 186 | binary.LittleEndian.PutUint32(dst[40:44], binary.LittleEndian.Uint32(src[40:44])^x10) 187 | binary.LittleEndian.PutUint32(dst[44:48], binary.LittleEndian.Uint32(src[44:48])^x11) 188 | binary.LittleEndian.PutUint32(dst[48:52], binary.LittleEndian.Uint32(src[48:52])^x12) 189 | binary.LittleEndian.PutUint32(dst[52:56], binary.LittleEndian.Uint32(src[52:56])^x13) 190 | binary.LittleEndian.PutUint32(dst[56:60], binary.LittleEndian.Uint32(src[56:60])^x14) 191 | binary.LittleEndian.PutUint32(dst[60:64], binary.LittleEndian.Uint32(src[60:64])^x15) 192 | src = src[api.BlockSize:] 193 | } else { 194 | binary.LittleEndian.PutUint32(dst[0:4], x0) 195 | binary.LittleEndian.PutUint32(dst[4:8], x1) 196 | binary.LittleEndian.PutUint32(dst[8:12], x2) 197 | binary.LittleEndian.PutUint32(dst[12:16], x3) 198 | binary.LittleEndian.PutUint32(dst[16:20], x4) 199 | binary.LittleEndian.PutUint32(dst[20:24], x5) 200 | binary.LittleEndian.PutUint32(dst[24:28], x6) 201 | binary.LittleEndian.PutUint32(dst[28:32], x7) 202 | binary.LittleEndian.PutUint32(dst[32:36], x8) 203 | binary.LittleEndian.PutUint32(dst[36:40], x9) 204 | binary.LittleEndian.PutUint32(dst[40:44], x10) 205 | binary.LittleEndian.PutUint32(dst[44:48], x11) 206 | binary.LittleEndian.PutUint32(dst[48:52], x12) 207 | binary.LittleEndian.PutUint32(dst[52:56], x13) 208 | binary.LittleEndian.PutUint32(dst[56:60], x14) 209 | binary.LittleEndian.PutUint32(dst[60:64], x15) 210 | } 211 | dst = dst[api.BlockSize:] 212 | 213 | // Stoping at 2^70 bytes per nonce is the user's responsibility. 214 | ctr := uint64(x[13])<<32 | uint64(x[12]) 215 | ctr++ 216 | x[12] = uint32(ctr) 217 | x[13] = uint32(ctr >> 32) 218 | } 219 | } 220 | 221 | func (impl *implRef) HChaCha(key, nonce []byte, dst []byte) { 222 | // Force bounds check elimination. 223 | _ = key[31] 224 | _ = nonce[api.HNonceSize-1] 225 | 226 | x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3 227 | x4 := binary.LittleEndian.Uint32(key[0:4]) 228 | x5 := binary.LittleEndian.Uint32(key[4:8]) 229 | x6 := binary.LittleEndian.Uint32(key[8:12]) 230 | x7 := binary.LittleEndian.Uint32(key[12:16]) 231 | x8 := binary.LittleEndian.Uint32(key[16:20]) 232 | x9 := binary.LittleEndian.Uint32(key[20:24]) 233 | x10 := binary.LittleEndian.Uint32(key[24:28]) 234 | x11 := binary.LittleEndian.Uint32(key[28:32]) 235 | x12 := binary.LittleEndian.Uint32(nonce[0:4]) 236 | x13 := binary.LittleEndian.Uint32(nonce[4:8]) 237 | x14 := binary.LittleEndian.Uint32(nonce[8:12]) 238 | x15 := binary.LittleEndian.Uint32(nonce[12:16]) 239 | 240 | // Yes, this could be carved out into a function for code reuse (TM) 241 | // however the go inliner won't inline it. 242 | for i := rounds; i > 0; i -= 2 { 243 | // quarterround(x, 0, 4, 8, 12) 244 | x0 += x4 245 | x12 ^= x0 246 | x12 = bits.RotateLeft32(x12, 16) 247 | x8 += x12 248 | x4 ^= x8 249 | x4 = bits.RotateLeft32(x4, 12) 250 | x0 += x4 251 | x12 ^= x0 252 | x12 = bits.RotateLeft32(x12, 8) 253 | x8 += x12 254 | x4 ^= x8 255 | x4 = bits.RotateLeft32(x4, 7) 256 | 257 | // quarterround(x, 1, 5, 9, 13) 258 | x1 += x5 259 | x13 ^= x1 260 | x13 = bits.RotateLeft32(x13, 16) 261 | x9 += x13 262 | x5 ^= x9 263 | x5 = bits.RotateLeft32(x5, 12) 264 | x1 += x5 265 | x13 ^= x1 266 | x13 = bits.RotateLeft32(x13, 8) 267 | x9 += x13 268 | x5 ^= x9 269 | x5 = bits.RotateLeft32(x5, 7) 270 | 271 | // quarterround(x, 2, 6, 10, 14) 272 | x2 += x6 273 | x14 ^= x2 274 | x14 = bits.RotateLeft32(x14, 16) 275 | x10 += x14 276 | x6 ^= x10 277 | x6 = bits.RotateLeft32(x6, 12) 278 | x2 += x6 279 | x14 ^= x2 280 | x14 = bits.RotateLeft32(x14, 8) 281 | x10 += x14 282 | x6 ^= x10 283 | x6 = bits.RotateLeft32(x6, 7) 284 | 285 | // quarterround(x, 3, 7, 11, 15) 286 | x3 += x7 287 | x15 ^= x3 288 | x15 = bits.RotateLeft32(x15, 16) 289 | x11 += x15 290 | x7 ^= x11 291 | x7 = bits.RotateLeft32(x7, 12) 292 | x3 += x7 293 | x15 ^= x3 294 | x15 = bits.RotateLeft32(x15, 8) 295 | x11 += x15 296 | x7 ^= x11 297 | x7 = bits.RotateLeft32(x7, 7) 298 | 299 | // quarterround(x, 0, 5, 10, 15) 300 | x0 += x5 301 | x15 ^= x0 302 | x15 = bits.RotateLeft32(x15, 16) 303 | x10 += x15 304 | x5 ^= x10 305 | x5 = bits.RotateLeft32(x5, 12) 306 | x0 += x5 307 | x15 ^= x0 308 | x15 = bits.RotateLeft32(x15, 8) 309 | x10 += x15 310 | x5 ^= x10 311 | x5 = bits.RotateLeft32(x5, 7) 312 | 313 | // quarterround(x, 1, 6, 11, 12) 314 | x1 += x6 315 | x12 ^= x1 316 | x12 = bits.RotateLeft32(x12, 16) 317 | x11 += x12 318 | x6 ^= x11 319 | x6 = bits.RotateLeft32(x6, 12) 320 | x1 += x6 321 | x12 ^= x1 322 | x12 = bits.RotateLeft32(x12, 8) 323 | x11 += x12 324 | x6 ^= x11 325 | x6 = bits.RotateLeft32(x6, 7) 326 | 327 | // quarterround(x, 2, 7, 8, 13) 328 | x2 += x7 329 | x13 ^= x2 330 | x13 = bits.RotateLeft32(x13, 16) 331 | x8 += x13 332 | x7 ^= x8 333 | x7 = bits.RotateLeft32(x7, 12) 334 | x2 += x7 335 | x13 ^= x2 336 | x13 = bits.RotateLeft32(x13, 8) 337 | x8 += x13 338 | x7 ^= x8 339 | x7 = bits.RotateLeft32(x7, 7) 340 | 341 | // quarterround(x, 3, 4, 9, 14) 342 | x3 += x4 343 | x14 ^= x3 344 | x14 = bits.RotateLeft32(x14, 16) 345 | x9 += x14 346 | x4 ^= x9 347 | x4 = bits.RotateLeft32(x4, 12) 348 | x3 += x4 349 | x14 ^= x3 350 | x14 = bits.RotateLeft32(x14, 8) 351 | x9 += x14 352 | x4 ^= x9 353 | x4 = bits.RotateLeft32(x4, 7) 354 | } 355 | 356 | // HChaCha returns x0...x3 | x12...x15, which corresponds to the 357 | // indexes of the ChaCha constant and the indexes of the IV. 358 | _ = dst[api.HashSize-1] // Force bounds check elimination. 359 | binary.LittleEndian.PutUint32(dst[0:4], x0) 360 | binary.LittleEndian.PutUint32(dst[4:8], x1) 361 | binary.LittleEndian.PutUint32(dst[8:12], x2) 362 | binary.LittleEndian.PutUint32(dst[12:16], x3) 363 | binary.LittleEndian.PutUint32(dst[16:20], x12) 364 | binary.LittleEndian.PutUint32(dst[20:24], x13) 365 | binary.LittleEndian.PutUint32(dst[24:28], x14) 366 | binary.LittleEndian.PutUint32(dst[28:32], x15) 367 | } 368 | 369 | // Register appends the implementation to the provided slice, and returns the 370 | // new slice. 371 | func Register(impls []api.Implementation) []api.Implementation { 372 | return append(impls, Impl) 373 | } 374 | -------------------------------------------------------------------------------- /operate/fwd.go: -------------------------------------------------------------------------------- 1 | package operate 2 | 3 | import ( 4 | "iox/crypto" 5 | "iox/logger" 6 | "iox/netio" 7 | "iox/option" 8 | "net" 9 | "time" 10 | ) 11 | 12 | // local is :port 13 | // remote is ip:port 14 | // Local2Remote(":9999", "1.1.1.1:9999") 15 | func Local2Remote(local string, remote string, lenc bool, renc bool) { 16 | if option.PROTOCOL == "TCP" { 17 | listener, err := net.Listen("tcp", local) 18 | if err != nil { 19 | logger.Warn( 20 | "Listen on %s error: %s", 21 | local, err.Error(), 22 | ) 23 | return 24 | } 25 | defer listener.Close() 26 | 27 | logger.Success("Forward between %s and %s", local, remote) 28 | 29 | for { 30 | logger.Info("Wait for connection on %s", local) 31 | 32 | localConn, err := listener.Accept() 33 | if err != nil { 34 | logger.Warn( 35 | "Handle local connect error: %s", 36 | err.Error(), 37 | ) 38 | continue 39 | } 40 | 41 | logger.Info( 42 | "Connection from %s", 43 | localConn.RemoteAddr().String(), 44 | ) 45 | logger.Info("Connecting " + remote) 46 | 47 | go func() { 48 | defer localConn.Close() 49 | 50 | localConnCtx, err := netio.NewTCPCtx(localConn, lenc) 51 | if err != nil { 52 | logger.Warn( 53 | "Handle local connect error: %s", 54 | err.Error(), 55 | ) 56 | return 57 | } 58 | 59 | remoteConn, err := net.DialTimeout( 60 | "tcp", 61 | remote, 62 | time.Millisecond*time.Duration(option.TIMEOUT), 63 | ) 64 | if err != nil { 65 | logger.Warn("Connect remote %s error: %s", 66 | remote, err.Error(), 67 | ) 68 | return 69 | } 70 | defer remoteConn.Close() 71 | 72 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, renc) 73 | if err != nil { 74 | logger.Warn("Connect remote %s error: %s", 75 | remote, err.Error(), 76 | ) 77 | return 78 | } 79 | 80 | logger.Info( 81 | "Open pipe: %s <== FWD ==> %s", 82 | localConn.RemoteAddr().String(), 83 | remoteConn.RemoteAddr().String(), 84 | ) 85 | 86 | netio.PipeForward(localConnCtx, remoteConnCtx) 87 | 88 | logger.Info( 89 | "Close pipe: %s <== FWD ==> %s", 90 | localConn.RemoteAddr().String(), 91 | remoteConn.RemoteAddr().String(), 92 | ) 93 | }() 94 | } 95 | 96 | } else { 97 | localAddr, err := net.ResolveUDPAddr("udp", local) 98 | if err != nil { 99 | logger.Warn( 100 | "Parse udp address %s error: %s", 101 | local, err.Error(), 102 | ) 103 | return 104 | } 105 | listener, err := net.ListenUDP("udp", localAddr) 106 | if err != nil { 107 | logger.Warn( 108 | "Listen udp on %s error: %s", 109 | local, err.Error(), 110 | ) 111 | return 112 | } 113 | 114 | remoteAddr, err := net.ResolveUDPAddr("udp", remote) 115 | if err != nil { 116 | logger.Warn( 117 | "Parse udp address %s error: %s", 118 | local, err.Error(), 119 | ) 120 | return 121 | } 122 | remoteConn, err := net.DialUDP("udp", nil, remoteAddr) 123 | if err != nil { 124 | logger.Warn( 125 | "Dial remote udp %s error: %s", 126 | local, err.Error(), 127 | ) 128 | return 129 | } 130 | 131 | listenerCtx, err := netio.NewUDPCtx(listener, lenc, false) 132 | if err != nil { 133 | return 134 | } 135 | remoteCtx, err := netio.NewUDPCtx(remoteConn, renc, true) 136 | if err != nil { 137 | return 138 | } 139 | 140 | logger.Success("Forward udp between %s and %s", local, remote) 141 | netio.ForwardUDP(listenerCtx, remoteCtx) 142 | } 143 | } 144 | 145 | func Local2Local(localA string, localB string, laenc bool, lbenc bool) { 146 | if option.PROTOCOL == "TCP" { 147 | logger.Success("Forward between %s and %s", localA, localB) 148 | 149 | var listenerA net.Listener 150 | var listenerB net.Listener 151 | 152 | for { 153 | signal := make(chan byte) 154 | var localConnA, localConnB net.Conn 155 | 156 | go func() { 157 | // Call listener.Close when goroutine returns. 158 | // Listener in Go will release the port immediately 159 | // after calling listener.Close without waiting for TIME_WAIT 160 | var err error 161 | listenerA, err = net.Listen("tcp", localA) 162 | if err != nil { 163 | logger.Warn( 164 | "Listen on %s error: %s", 165 | localA, err.Error(), 166 | ) 167 | return 168 | } 169 | defer listenerA.Close() 170 | 171 | for { 172 | logger.Info( 173 | "Wait for connection on %s", 174 | localA, 175 | ) 176 | 177 | var err error 178 | localConnA, err = listenerA.Accept() 179 | if err != nil { 180 | logger.Warn( 181 | "Handle connection error: %s", 182 | err.Error(), 183 | ) 184 | continue 185 | } 186 | break 187 | } 188 | signal <- 'A' 189 | }() 190 | 191 | go func() { 192 | var err error 193 | listenerB, err = net.Listen("tcp", localB) 194 | if err != nil { 195 | logger.Warn( 196 | "Listen on %s error: %s", 197 | localB, err.Error(), 198 | ) 199 | return 200 | } 201 | defer listenerB.Close() 202 | 203 | for { 204 | logger.Info( 205 | "Wait for connection on %s", 206 | localB, 207 | ) 208 | 209 | var err error 210 | localConnB, err = listenerB.Accept() 211 | if err != nil { 212 | logger.Warn( 213 | "Handle connection error: %s", 214 | err.Error(), 215 | ) 216 | continue 217 | } 218 | break 219 | } 220 | signal <- 'B' 221 | }() 222 | 223 | switch <-signal { 224 | case 'A': 225 | logger.Info( 226 | "%s connected, waiting for %s", 227 | localA, localB, 228 | ) 229 | case 'B': 230 | logger.Info( 231 | "%s connected, waiting for %s", 232 | localB, localA, 233 | ) 234 | } 235 | 236 | <-signal 237 | 238 | go func() { 239 | defer localConnA.Close() 240 | defer localConnB.Close() 241 | 242 | localConnCtxA, err := netio.NewTCPCtx(localConnA, laenc) 243 | if err != nil { 244 | logger.Warn( 245 | "handle local %s error: %s", 246 | localA, err.Error(), 247 | ) 248 | } 249 | 250 | localConnCtxB, err := netio.NewTCPCtx(localConnB, lbenc) 251 | if err != nil { 252 | logger.Warn( 253 | "handle local %s error: %s", 254 | localB, err.Error(), 255 | ) 256 | } 257 | 258 | logger.Info( 259 | "Open pipe: %s <== FWD ==> %s", 260 | localConnA.RemoteAddr().String(), 261 | localConnB.RemoteAddr().String(), 262 | ) 263 | netio.PipeForward(localConnCtxA, localConnCtxB) 264 | logger.Info( 265 | "Close pipe: %s <== FWD ==> %s", 266 | localConnA.RemoteAddr().String(), 267 | localConnB.RemoteAddr().String(), 268 | ) 269 | }() 270 | } 271 | } else { 272 | localAddrA, err := net.ResolveUDPAddr("udp", localA) 273 | if err != nil { 274 | logger.Warn( 275 | "Parse udp address %s error: %s", 276 | localA, err.Error(), 277 | ) 278 | return 279 | } 280 | listenerA, err := net.ListenUDP("udp", localAddrA) 281 | if err != nil { 282 | logger.Warn( 283 | "Listen udp on %s error: %s", 284 | localA, err.Error(), 285 | ) 286 | return 287 | } 288 | localAddrB, err := net.ResolveUDPAddr("udp", localB) 289 | if err != nil { 290 | logger.Warn( 291 | "Parse udp address %s error: %s", 292 | localB, err.Error(), 293 | ) 294 | return 295 | } 296 | listenerB, err := net.ListenUDP("udp", localAddrB) 297 | if err != nil { 298 | logger.Warn( 299 | "Listen udp on %s error: %s", 300 | localB, err.Error(), 301 | ) 302 | return 303 | } 304 | 305 | listenerCtxA, err := netio.NewUDPCtx(listenerA, laenc, false) 306 | if err != nil { 307 | return 308 | } 309 | listenerCtxB, err := netio.NewUDPCtx(listenerB, lbenc, false) 310 | if err != nil { 311 | return 312 | } 313 | 314 | logger.Success("Forward udp between %s and %s", localA, localB) 315 | netio.ForwardUnconnectedUDP(listenerCtxA, listenerCtxB) 316 | } 317 | } 318 | 319 | // When you make a multistage UDP connection, this function must be called last 320 | func Remote2Remote(remoteA string, remoteB string, raenc bool, rbenc bool) { 321 | if option.PROTOCOL == "TCP" { 322 | logger.Success("Forward between %s and %s", remoteA, remoteB) 323 | 324 | for { 325 | var remoteConnA net.Conn 326 | var remoteConnB net.Conn 327 | 328 | signal := make(chan struct{}) 329 | 330 | go func() { 331 | for { 332 | var err error 333 | logger.Info( 334 | "Connecting remote %s", 335 | remoteA, 336 | ) 337 | 338 | remoteConnA, err = net.DialTimeout( 339 | "tcp", 340 | remoteA, 341 | time.Millisecond*time.Duration(option.TIMEOUT), 342 | ) 343 | if err != nil { 344 | logger.Info( 345 | "Connect remote %s error, retrying", 346 | remoteA, 347 | ) 348 | time.Sleep(1500 * time.Millisecond) 349 | continue 350 | } 351 | 352 | break 353 | } 354 | 355 | signal <- struct{}{} 356 | }() 357 | 358 | go func() { 359 | for { 360 | var err error 361 | logger.Info( 362 | "Connecting remote %s", 363 | remoteB, 364 | ) 365 | 366 | remoteConnB, err = net.DialTimeout( 367 | "tcp", 368 | remoteB, 369 | time.Millisecond*time.Duration(option.TIMEOUT), 370 | ) 371 | if err != nil { 372 | logger.Info( 373 | "Connect remote %s error, retrying", 374 | remoteB, 375 | ) 376 | time.Sleep(1500 * time.Millisecond) 377 | continue 378 | } 379 | break 380 | } 381 | 382 | signal <- struct{}{} 383 | }() 384 | 385 | <-signal 386 | <-signal 387 | 388 | go func() { 389 | defer func() { 390 | if remoteConnA != nil { 391 | remoteConnA.Close() 392 | } 393 | 394 | if remoteConnB != nil { 395 | remoteConnB.Close() 396 | } 397 | }() 398 | 399 | if remoteConnA != nil && remoteConnB != nil { 400 | remoteConnCtxA, err := netio.NewTCPCtx(remoteConnA, raenc) 401 | if err != nil { 402 | logger.Warn( 403 | "Handle remote %s error: %s", 404 | remoteA, err.Error(), 405 | ) 406 | } 407 | remoteConnCtxB, err := netio.NewTCPCtx(remoteConnB, rbenc) 408 | if err != nil { 409 | logger.Warn( 410 | "Handle remote %s error: %s", 411 | remoteB, err.Error(), 412 | ) 413 | } 414 | 415 | logger.Info( 416 | "Start pipe: %s <== FWD ==> %s", 417 | remoteConnA.RemoteAddr().String(), 418 | remoteConnB.RemoteAddr().String(), 419 | ) 420 | netio.PipeForward(remoteConnCtxA, remoteConnCtxB) 421 | logger.Info( 422 | "Close pipe: %s <== FWD ==> %s", 423 | remoteConnA.RemoteAddr().String(), 424 | remoteConnB.RemoteAddr().String(), 425 | ) 426 | } 427 | }() 428 | } 429 | } else { 430 | remoteAddrA, err := net.ResolveUDPAddr("udp", remoteA) 431 | if err != nil { 432 | logger.Warn( 433 | "Parse udp address %s error: %s", 434 | remoteA, err.Error(), 435 | ) 436 | return 437 | } 438 | remoteConnA, err := net.DialUDP("udp", nil, remoteAddrA) 439 | if err != nil { 440 | logger.Warn( 441 | "Dial remote udp %s error: %s", 442 | remoteA, err.Error(), 443 | ) 444 | return 445 | } 446 | remoteAddrB, err := net.ResolveUDPAddr("udp", remoteB) 447 | if err != nil { 448 | logger.Warn( 449 | "Parse udp address %s error: %s", 450 | remoteB, err.Error(), 451 | ) 452 | return 453 | } 454 | remoteConnB, err := net.DialUDP("udp", nil, remoteAddrB) 455 | if err != nil { 456 | logger.Warn( 457 | "Dial remote udp %s error: %s", 458 | remoteB, err.Error(), 459 | ) 460 | return 461 | } 462 | 463 | remoteCtxA, err := netio.NewUDPCtx(remoteConnA, raenc, true) 464 | if err != nil { 465 | return 466 | } 467 | remoteCtxB, err := netio.NewUDPCtx(remoteConnB, rbenc, true) 468 | if err != nil { 469 | return 470 | } 471 | 472 | // I need to send init packet to register the remote address 473 | // Even tough target is not `iox`, it doesn't matter 474 | // 475 | // There is a design fault here, and I need to consider 476 | // the case where the FORWARD_WITHOUT_DEC flag 477 | // is set but actually needs to be encrypted, 478 | // otherwise there is no IV in the ciphertext, 479 | // the opposite cannot process it 480 | if raenc { 481 | iv, err := crypto.RandomNonce() 482 | cipher, err := crypto.NewCipher(iv) 483 | if err != nil { 484 | return 485 | } 486 | 487 | b := make([]byte, 4, 20) 488 | copy(b, netio.UDP_INIT_PACKET) 489 | 490 | cipher.StreamXOR(b, b) 491 | b = append(b, iv...) 492 | remoteCtxA.Write(b) 493 | 494 | } else { 495 | remoteCtxA.Write(netio.UDP_INIT_PACKET) 496 | } 497 | if rbenc { 498 | iv, err := crypto.RandomNonce() 499 | cipher, err := crypto.NewCipher(iv) 500 | if err != nil { 501 | return 502 | } 503 | 504 | b := make([]byte, 4, 20) 505 | copy(b, netio.UDP_INIT_PACKET) 506 | 507 | cipher.StreamXOR(b, b) 508 | b = append(b, iv...) 509 | remoteCtxB.Write(b) 510 | 511 | } else { 512 | remoteCtxB.Write(netio.UDP_INIT_PACKET) 513 | } 514 | 515 | logger.Success("Forward udp between %s and %s", remoteA, remoteB) 516 | netio.ForwardUDP(remoteCtxA, remoteCtxB) 517 | } 518 | } 519 | -------------------------------------------------------------------------------- /crypto/chacha20/LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published by 637 | the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /crypto/chacha20/internal/hardware/impl_amd64.s: -------------------------------------------------------------------------------- 1 | // Copryright (C) 2019 Yawning Angel 2 | // 3 | // This program is free software: you can redistribute it and/or modify 4 | // it under the terms of the GNU Affero General Public License as 5 | // published by the Free Software Foundation, either version 3 of the 6 | // License, or (at your option) any later version. 7 | // 8 | // This program is distributed in the hope that it will be useful, 9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | // GNU Affero General Public License for more details. 12 | // 13 | // You should have received a copy of the GNU Affero General Public License 14 | // along with this program. If not, see . 15 | 16 | // +build !noasm 17 | 18 | #include "textflag.h" 19 | 20 | DATA ·chacha_constants<>+0x00(SB)/4, $0x61707865 21 | DATA ·chacha_constants<>+0x04(SB)/4, $0x3320646E 22 | DATA ·chacha_constants<>+0x08(SB)/4, $0x79622D32 23 | DATA ·chacha_constants<>+0x0c(SB)/4, $0x6B206574 24 | DATA ·chacha_constants<>+0x10(SB)/8, $0x0504070601000302 25 | DATA ·chacha_constants<>+0x18(SB)/8, $0x0D0C0F0E09080B0A 26 | DATA ·chacha_constants<>+0x20(SB)/8, $0x0605040702010003 27 | DATA ·chacha_constants<>+0x28(SB)/8, $0x0E0D0C0F0A09080B 28 | GLOBL ·chacha_constants<>(SB), (NOPTR+RODATA), $48 29 | 30 | // func blocksAVX2(s *[api.StateSize]uint32, in, out []byte) 31 | TEXT ·blocksAVX2(SB), NOSPLIT, $576-56 32 | // This is Andrew Moon's AVX2 ChaCha implementation taken from 33 | // supercop-20171218, with some minor changes, primarily calling 34 | // convention and assembly dialect related. 35 | 36 | // Align the stack on a 64 byte boundary. 37 | MOVQ SP, BP 38 | ADDQ $64, BP 39 | ANDQ $-64, BP 40 | 41 | // Go calling convention -> SYSV AMD64 (and a fixup). 42 | MOVQ s+0(FP), DI // &s -> DI 43 | ADDQ $16, DI // Skip the ChaCha constants in the chachaState. 44 | MOVQ in+8(FP), SI // &in[0] -> SI 45 | MOVQ out+32(FP), DX // &out[0] -> DX 46 | MOVQ in_len+16(FP), CX // len(in) -> CX 47 | 48 | // Begin the main body of `chacha_blocks_avx2`. 49 | // 50 | // Mostly a direct translation except: 51 | // * The number of rounds is always 20. 52 | // * %rbp is used instead of %rsp. 53 | LEAQ ·chacha_constants<>(SB), AX 54 | VMOVDQU 0(AX), X8 55 | VMOVDQU 16(AX), X6 56 | VMOVDQU 32(AX), X7 57 | VMOVDQU 0(DI), X9 58 | VMOVDQU 16(DI), X10 59 | VMOVDQU 32(DI), X11 60 | 61 | // MOVQ 48(DI), AX 62 | MOVQ $1, R9 63 | VMOVDQA X8, 0(BP) 64 | VMOVDQA X9, 16(BP) 65 | VMOVDQA X10, 32(BP) 66 | VMOVDQA X11, 48(BP) 67 | 68 | // MOVQ AX, 64(BP) 69 | VMOVDQA X6, 448(BP) 70 | VMOVDQA X6, 464(BP) 71 | VMOVDQA X7, 480(BP) 72 | VMOVDQA X7, 496(BP) 73 | CMPQ CX, $512 74 | JAE chacha_blocks_avx2_atleast512 75 | CMPQ CX, $256 76 | JAE chacha_blocks_avx2_atleast256 77 | JMP chacha_blocks_avx2_below256 78 | 79 | chacha_blocks_avx2_atleast512: 80 | MOVQ 48(BP), AX 81 | LEAQ 1(AX), R8 82 | LEAQ 2(AX), R9 83 | LEAQ 3(AX), R10 84 | LEAQ 4(AX), BX 85 | LEAQ 5(AX), R11 86 | LEAQ 6(AX), R12 87 | LEAQ 7(AX), R13 88 | LEAQ 8(AX), R14 89 | MOVL AX, 128(BP) 90 | MOVL R8, 4+128(BP) 91 | MOVL R9, 8+128(BP) 92 | MOVL R10, 12+128(BP) 93 | MOVL BX, 16+128(BP) 94 | MOVL R11, 20+128(BP) 95 | MOVL R12, 24+128(BP) 96 | MOVL R13, 28+128(BP) 97 | SHRQ $32, AX 98 | SHRQ $32, R8 99 | SHRQ $32, R9 100 | SHRQ $32, R10 101 | SHRQ $32, BX 102 | SHRQ $32, R11 103 | SHRQ $32, R12 104 | SHRQ $32, R13 105 | MOVL AX, 160(BP) 106 | MOVL R8, 4+160(BP) 107 | MOVL R9, 8+160(BP) 108 | MOVL R10, 12+160(BP) 109 | MOVL BX, 16+160(BP) 110 | MOVL R11, 20+160(BP) 111 | MOVL R12, 24+160(BP) 112 | MOVL R13, 28+160(BP) 113 | MOVQ R14, 48(BP) 114 | 115 | // MOVQ 64(BP), AX 116 | MOVQ $20, AX 117 | VPBROADCASTD 0(BP), Y0 118 | VPBROADCASTD 4+0(BP), Y1 119 | VPBROADCASTD 8+0(BP), Y2 120 | VPBROADCASTD 12+0(BP), Y3 121 | VPBROADCASTD 16(BP), Y4 122 | VPBROADCASTD 4+16(BP), Y5 123 | VPBROADCASTD 8+16(BP), Y6 124 | VPBROADCASTD 12+16(BP), Y7 125 | VPBROADCASTD 32(BP), Y8 126 | VPBROADCASTD 4+32(BP), Y9 127 | VPBROADCASTD 8+32(BP), Y10 128 | VPBROADCASTD 12+32(BP), Y11 129 | VPBROADCASTD 8+48(BP), Y14 130 | VPBROADCASTD 12+48(BP), Y15 131 | VMOVDQA 128(BP), Y12 132 | VMOVDQA 160(BP), Y13 133 | 134 | chacha_blocks_avx2_mainloop1: 135 | VPADDD Y0, Y4, Y0 136 | VPADDD Y1, Y5, Y1 137 | VPXOR Y12, Y0, Y12 138 | VPXOR Y13, Y1, Y13 139 | VPADDD Y2, Y6, Y2 140 | VPADDD Y3, Y7, Y3 141 | VPXOR Y14, Y2, Y14 142 | VPXOR Y15, Y3, Y15 143 | VPSHUFB 448(BP), Y12, Y12 144 | VPSHUFB 448(BP), Y13, Y13 145 | VPADDD Y8, Y12, Y8 146 | VPADDD Y9, Y13, Y9 147 | VPSHUFB 448(BP), Y14, Y14 148 | VPSHUFB 448(BP), Y15, Y15 149 | VPADDD Y10, Y14, Y10 150 | VPADDD Y11, Y15, Y11 151 | VMOVDQA Y12, 96(BP) 152 | VPXOR Y4, Y8, Y4 153 | VPXOR Y5, Y9, Y5 154 | VPSLLD $ 12, Y4, Y12 155 | VPSRLD $20, Y4, Y4 156 | VPXOR Y4, Y12, Y4 157 | VPSLLD $ 12, Y5, Y12 158 | VPSRLD $20, Y5, Y5 159 | VPXOR Y5, Y12, Y5 160 | VPXOR Y6, Y10, Y6 161 | VPXOR Y7, Y11, Y7 162 | VPSLLD $ 12, Y6, Y12 163 | VPSRLD $20, Y6, Y6 164 | VPXOR Y6, Y12, Y6 165 | VPSLLD $ 12, Y7, Y12 166 | VPSRLD $20, Y7, Y7 167 | VPXOR Y7, Y12, Y7 168 | VPADDD Y0, Y4, Y0 169 | VPADDD Y1, Y5, Y1 170 | VPXOR 96(BP), Y0, Y12 171 | VPXOR Y13, Y1, Y13 172 | VPADDD Y2, Y6, Y2 173 | VPADDD Y3, Y7, Y3 174 | VPXOR Y14, Y2, Y14 175 | VPXOR Y15, Y3, Y15 176 | VPSHUFB 480(BP), Y12, Y12 177 | VPSHUFB 480(BP), Y13, Y13 178 | VPADDD Y8, Y12, Y8 179 | VPADDD Y9, Y13, Y9 180 | VPSHUFB 480(BP), Y14, Y14 181 | VPSHUFB 480(BP), Y15, Y15 182 | VPADDD Y10, Y14, Y10 183 | VPADDD Y11, Y15, Y11 184 | VMOVDQA Y12, 96(BP) 185 | VPXOR Y4, Y8, Y4 186 | VPXOR Y5, Y9, Y5 187 | VPSLLD $ 7, Y4, Y12 188 | VPSRLD $25, Y4, Y4 189 | VPXOR Y4, Y12, Y4 190 | VPSLLD $ 7, Y5, Y12 191 | VPSRLD $25, Y5, Y5 192 | VPXOR Y5, Y12, Y5 193 | VPXOR Y6, Y10, Y6 194 | VPXOR Y7, Y11, Y7 195 | VPSLLD $ 7, Y6, Y12 196 | VPSRLD $25, Y6, Y6 197 | VPXOR Y6, Y12, Y6 198 | VPSLLD $ 7, Y7, Y12 199 | VPSRLD $25, Y7, Y7 200 | VPXOR Y7, Y12, Y7 201 | VPADDD Y0, Y5, Y0 202 | VPADDD Y1, Y6, Y1 203 | VPXOR Y15, Y0, Y15 204 | VPXOR 96(BP), Y1, Y12 205 | VPADDD Y2, Y7, Y2 206 | VPADDD Y3, Y4, Y3 207 | VPXOR Y13, Y2, Y13 208 | VPXOR Y14, Y3, Y14 209 | VPSHUFB 448(BP), Y15, Y15 210 | VPSHUFB 448(BP), Y12, Y12 211 | VPADDD Y10, Y15, Y10 212 | VPADDD Y11, Y12, Y11 213 | VPSHUFB 448(BP), Y13, Y13 214 | VPSHUFB 448(BP), Y14, Y14 215 | VPADDD Y8, Y13, Y8 216 | VPADDD Y9, Y14, Y9 217 | VMOVDQA Y15, 96(BP) 218 | VPXOR Y5, Y10, Y5 219 | VPXOR Y6, Y11, Y6 220 | VPSLLD $ 12, Y5, Y15 221 | VPSRLD $20, Y5, Y5 222 | VPXOR Y5, Y15, Y5 223 | VPSLLD $ 12, Y6, Y15 224 | VPSRLD $20, Y6, Y6 225 | VPXOR Y6, Y15, Y6 226 | VPXOR Y7, Y8, Y7 227 | VPXOR Y4, Y9, Y4 228 | VPSLLD $ 12, Y7, Y15 229 | VPSRLD $20, Y7, Y7 230 | VPXOR Y7, Y15, Y7 231 | VPSLLD $ 12, Y4, Y15 232 | VPSRLD $20, Y4, Y4 233 | VPXOR Y4, Y15, Y4 234 | VPADDD Y0, Y5, Y0 235 | VPADDD Y1, Y6, Y1 236 | VPXOR 96(BP), Y0, Y15 237 | VPXOR Y12, Y1, Y12 238 | VPADDD Y2, Y7, Y2 239 | VPADDD Y3, Y4, Y3 240 | VPXOR Y13, Y2, Y13 241 | VPXOR Y14, Y3, Y14 242 | VPSHUFB 480(BP), Y15, Y15 243 | VPSHUFB 480(BP), Y12, Y12 244 | VPADDD Y10, Y15, Y10 245 | VPADDD Y11, Y12, Y11 246 | VPSHUFB 480(BP), Y13, Y13 247 | VPSHUFB 480(BP), Y14, Y14 248 | VPADDD Y8, Y13, Y8 249 | VPADDD Y9, Y14, Y9 250 | VMOVDQA Y15, 96(BP) 251 | VPXOR Y5, Y10, Y5 252 | VPXOR Y6, Y11, Y6 253 | VPSLLD $ 7, Y5, Y15 254 | VPSRLD $25, Y5, Y5 255 | VPXOR Y5, Y15, Y5 256 | VPSLLD $ 7, Y6, Y15 257 | VPSRLD $25, Y6, Y6 258 | VPXOR Y6, Y15, Y6 259 | VPXOR Y7, Y8, Y7 260 | VPXOR Y4, Y9, Y4 261 | VPSLLD $ 7, Y7, Y15 262 | VPSRLD $25, Y7, Y7 263 | VPXOR Y7, Y15, Y7 264 | VPSLLD $ 7, Y4, Y15 265 | VPSRLD $25, Y4, Y4 266 | VPXOR Y4, Y15, Y4 267 | VMOVDQA 96(BP), Y15 268 | SUBQ $2, AX 269 | JNZ chacha_blocks_avx2_mainloop1 270 | VMOVDQA Y8, 192(BP) 271 | VMOVDQA Y9, 224(BP) 272 | VMOVDQA Y10, 256(BP) 273 | VMOVDQA Y11, 288(BP) 274 | VMOVDQA Y12, 320(BP) 275 | VMOVDQA Y13, 352(BP) 276 | VMOVDQA Y14, 384(BP) 277 | VMOVDQA Y15, 416(BP) 278 | VPBROADCASTD 0(BP), Y8 279 | VPBROADCASTD 4+0(BP), Y9 280 | VPBROADCASTD 8+0(BP), Y10 281 | VPBROADCASTD 12+0(BP), Y11 282 | VPBROADCASTD 16(BP), Y12 283 | VPBROADCASTD 4+16(BP), Y13 284 | VPBROADCASTD 8+16(BP), Y14 285 | VPBROADCASTD 12+16(BP), Y15 286 | VPADDD Y8, Y0, Y0 287 | VPADDD Y9, Y1, Y1 288 | VPADDD Y10, Y2, Y2 289 | VPADDD Y11, Y3, Y3 290 | VPADDD Y12, Y4, Y4 291 | VPADDD Y13, Y5, Y5 292 | VPADDD Y14, Y6, Y6 293 | VPADDD Y15, Y7, Y7 294 | VPUNPCKLDQ Y1, Y0, Y8 295 | VPUNPCKLDQ Y3, Y2, Y9 296 | VPUNPCKHDQ Y1, Y0, Y12 297 | VPUNPCKHDQ Y3, Y2, Y13 298 | VPUNPCKLDQ Y5, Y4, Y10 299 | VPUNPCKLDQ Y7, Y6, Y11 300 | VPUNPCKHDQ Y5, Y4, Y14 301 | VPUNPCKHDQ Y7, Y6, Y15 302 | VPUNPCKLQDQ Y9, Y8, Y0 303 | VPUNPCKLQDQ Y11, Y10, Y1 304 | VPUNPCKHQDQ Y9, Y8, Y2 305 | VPUNPCKHQDQ Y11, Y10, Y3 306 | VPUNPCKLQDQ Y13, Y12, Y4 307 | VPUNPCKLQDQ Y15, Y14, Y5 308 | VPUNPCKHQDQ Y13, Y12, Y6 309 | VPUNPCKHQDQ Y15, Y14, Y7 310 | VPERM2I128 $0x20, Y1, Y0, Y8 311 | VPERM2I128 $0x20, Y3, Y2, Y9 312 | VPERM2I128 $0x31, Y1, Y0, Y12 313 | VPERM2I128 $0x31, Y3, Y2, Y13 314 | VPERM2I128 $0x20, Y5, Y4, Y10 315 | VPERM2I128 $0x20, Y7, Y6, Y11 316 | VPERM2I128 $0x31, Y5, Y4, Y14 317 | VPERM2I128 $0x31, Y7, Y6, Y15 318 | ANDQ SI, SI 319 | JZ chacha_blocks_avx2_noinput1 320 | VPXOR 0(SI), Y8, Y8 321 | VPXOR 64(SI), Y9, Y9 322 | VPXOR 128(SI), Y10, Y10 323 | VPXOR 192(SI), Y11, Y11 324 | VPXOR 256(SI), Y12, Y12 325 | VPXOR 320(SI), Y13, Y13 326 | VPXOR 384(SI), Y14, Y14 327 | VPXOR 448(SI), Y15, Y15 328 | VMOVDQU Y8, 0(DX) 329 | VMOVDQU Y9, 64(DX) 330 | VMOVDQU Y10, 128(DX) 331 | VMOVDQU Y11, 192(DX) 332 | VMOVDQU Y12, 256(DX) 333 | VMOVDQU Y13, 320(DX) 334 | VMOVDQU Y14, 384(DX) 335 | VMOVDQU Y15, 448(DX) 336 | VMOVDQA 192(BP), Y0 337 | VMOVDQA 224(BP), Y1 338 | VMOVDQA 256(BP), Y2 339 | VMOVDQA 288(BP), Y3 340 | VMOVDQA 320(BP), Y4 341 | VMOVDQA 352(BP), Y5 342 | VMOVDQA 384(BP), Y6 343 | VMOVDQA 416(BP), Y7 344 | VPBROADCASTD 32(BP), Y8 345 | VPBROADCASTD 4+32(BP), Y9 346 | VPBROADCASTD 8+32(BP), Y10 347 | VPBROADCASTD 12+32(BP), Y11 348 | VMOVDQA 128(BP), Y12 349 | VMOVDQA 160(BP), Y13 350 | VPBROADCASTD 8+48(BP), Y14 351 | VPBROADCASTD 12+48(BP), Y15 352 | VPADDD Y8, Y0, Y0 353 | VPADDD Y9, Y1, Y1 354 | VPADDD Y10, Y2, Y2 355 | VPADDD Y11, Y3, Y3 356 | VPADDD Y12, Y4, Y4 357 | VPADDD Y13, Y5, Y5 358 | VPADDD Y14, Y6, Y6 359 | VPADDD Y15, Y7, Y7 360 | VPUNPCKLDQ Y1, Y0, Y8 361 | VPUNPCKLDQ Y3, Y2, Y9 362 | VPUNPCKHDQ Y1, Y0, Y12 363 | VPUNPCKHDQ Y3, Y2, Y13 364 | VPUNPCKLDQ Y5, Y4, Y10 365 | VPUNPCKLDQ Y7, Y6, Y11 366 | VPUNPCKHDQ Y5, Y4, Y14 367 | VPUNPCKHDQ Y7, Y6, Y15 368 | VPUNPCKLQDQ Y9, Y8, Y0 369 | VPUNPCKLQDQ Y11, Y10, Y1 370 | VPUNPCKHQDQ Y9, Y8, Y2 371 | VPUNPCKHQDQ Y11, Y10, Y3 372 | VPUNPCKLQDQ Y13, Y12, Y4 373 | VPUNPCKLQDQ Y15, Y14, Y5 374 | VPUNPCKHQDQ Y13, Y12, Y6 375 | VPUNPCKHQDQ Y15, Y14, Y7 376 | VPERM2I128 $0x20, Y1, Y0, Y8 377 | VPERM2I128 $0x20, Y3, Y2, Y9 378 | VPERM2I128 $0x31, Y1, Y0, Y12 379 | VPERM2I128 $0x31, Y3, Y2, Y13 380 | VPERM2I128 $0x20, Y5, Y4, Y10 381 | VPERM2I128 $0x20, Y7, Y6, Y11 382 | VPERM2I128 $0x31, Y5, Y4, Y14 383 | VPERM2I128 $0x31, Y7, Y6, Y15 384 | VPXOR 32(SI), Y8, Y8 385 | VPXOR 96(SI), Y9, Y9 386 | VPXOR 160(SI), Y10, Y10 387 | VPXOR 224(SI), Y11, Y11 388 | VPXOR 288(SI), Y12, Y12 389 | VPXOR 352(SI), Y13, Y13 390 | VPXOR 416(SI), Y14, Y14 391 | VPXOR 480(SI), Y15, Y15 392 | VMOVDQU Y8, 32(DX) 393 | VMOVDQU Y9, 96(DX) 394 | VMOVDQU Y10, 160(DX) 395 | VMOVDQU Y11, 224(DX) 396 | VMOVDQU Y12, 288(DX) 397 | VMOVDQU Y13, 352(DX) 398 | VMOVDQU Y14, 416(DX) 399 | VMOVDQU Y15, 480(DX) 400 | ADDQ $512, SI 401 | JMP chacha_blocks_avx2_mainloop1_cont 402 | 403 | chacha_blocks_avx2_noinput1: 404 | VMOVDQU Y8, 0(DX) 405 | VMOVDQU Y9, 64(DX) 406 | VMOVDQU Y10, 128(DX) 407 | VMOVDQU Y11, 192(DX) 408 | VMOVDQU Y12, 256(DX) 409 | VMOVDQU Y13, 320(DX) 410 | VMOVDQU Y14, 384(DX) 411 | VMOVDQU Y15, 448(DX) 412 | VMOVDQA 192(BP), Y0 413 | VMOVDQA 224(BP), Y1 414 | VMOVDQA 256(BP), Y2 415 | VMOVDQA 288(BP), Y3 416 | VMOVDQA 320(BP), Y4 417 | VMOVDQA 352(BP), Y5 418 | VMOVDQA 384(BP), Y6 419 | VMOVDQA 416(BP), Y7 420 | VPBROADCASTD 32(BP), Y8 421 | VPBROADCASTD 4+32(BP), Y9 422 | VPBROADCASTD 8+32(BP), Y10 423 | VPBROADCASTD 12+32(BP), Y11 424 | VMOVDQA 128(BP), Y12 425 | VMOVDQA 160(BP), Y13 426 | VPBROADCASTD 8+48(BP), Y14 427 | VPBROADCASTD 12+48(BP), Y15 428 | VPADDD Y8, Y0, Y0 429 | VPADDD Y9, Y1, Y1 430 | VPADDD Y10, Y2, Y2 431 | VPADDD Y11, Y3, Y3 432 | VPADDD Y12, Y4, Y4 433 | VPADDD Y13, Y5, Y5 434 | VPADDD Y14, Y6, Y6 435 | VPADDD Y15, Y7, Y7 436 | VPUNPCKLDQ Y1, Y0, Y8 437 | VPUNPCKLDQ Y3, Y2, Y9 438 | VPUNPCKHDQ Y1, Y0, Y12 439 | VPUNPCKHDQ Y3, Y2, Y13 440 | VPUNPCKLDQ Y5, Y4, Y10 441 | VPUNPCKLDQ Y7, Y6, Y11 442 | VPUNPCKHDQ Y5, Y4, Y14 443 | VPUNPCKHDQ Y7, Y6, Y15 444 | VPUNPCKLQDQ Y9, Y8, Y0 445 | VPUNPCKLQDQ Y11, Y10, Y1 446 | VPUNPCKHQDQ Y9, Y8, Y2 447 | VPUNPCKHQDQ Y11, Y10, Y3 448 | VPUNPCKLQDQ Y13, Y12, Y4 449 | VPUNPCKLQDQ Y15, Y14, Y5 450 | VPUNPCKHQDQ Y13, Y12, Y6 451 | VPUNPCKHQDQ Y15, Y14, Y7 452 | VPERM2I128 $0x20, Y1, Y0, Y8 453 | VPERM2I128 $0x20, Y3, Y2, Y9 454 | VPERM2I128 $0x31, Y1, Y0, Y12 455 | VPERM2I128 $0x31, Y3, Y2, Y13 456 | VPERM2I128 $0x20, Y5, Y4, Y10 457 | VPERM2I128 $0x20, Y7, Y6, Y11 458 | VPERM2I128 $0x31, Y5, Y4, Y14 459 | VPERM2I128 $0x31, Y7, Y6, Y15 460 | VMOVDQU Y8, 32(DX) 461 | VMOVDQU Y9, 96(DX) 462 | VMOVDQU Y10, 160(DX) 463 | VMOVDQU Y11, 224(DX) 464 | VMOVDQU Y12, 288(DX) 465 | VMOVDQU Y13, 352(DX) 466 | VMOVDQU Y14, 416(DX) 467 | VMOVDQU Y15, 480(DX) 468 | 469 | chacha_blocks_avx2_mainloop1_cont: 470 | ADDQ $512, DX 471 | SUBQ $512, CX 472 | CMPQ CX, $512 473 | JAE chacha_blocks_avx2_atleast512 474 | CMPQ CX, $256 475 | JB chacha_blocks_avx2_below256_fixup 476 | 477 | chacha_blocks_avx2_atleast256: 478 | MOVQ 48(BP), AX 479 | LEAQ 1(AX), R8 480 | LEAQ 2(AX), R9 481 | LEAQ 3(AX), R10 482 | LEAQ 4(AX), BX 483 | MOVL AX, 128(BP) 484 | MOVL R8, 4+128(BP) 485 | MOVL R9, 8+128(BP) 486 | MOVL R10, 12+128(BP) 487 | SHRQ $32, AX 488 | SHRQ $32, R8 489 | SHRQ $32, R9 490 | SHRQ $32, R10 491 | MOVL AX, 160(BP) 492 | MOVL R8, 4+160(BP) 493 | MOVL R9, 8+160(BP) 494 | MOVL R10, 12+160(BP) 495 | MOVQ BX, 48(BP) 496 | 497 | // MOVQ 64(BP), AX 498 | MOVQ $20, AX 499 | VPBROADCASTD 0(BP), X0 500 | VPBROADCASTD 4+0(BP), X1 501 | VPBROADCASTD 8+0(BP), X2 502 | VPBROADCASTD 12+0(BP), X3 503 | VPBROADCASTD 16(BP), X4 504 | VPBROADCASTD 4+16(BP), X5 505 | VPBROADCASTD 8+16(BP), X6 506 | VPBROADCASTD 12+16(BP), X7 507 | VPBROADCASTD 32(BP), X8 508 | VPBROADCASTD 4+32(BP), X9 509 | VPBROADCASTD 8+32(BP), X10 510 | VPBROADCASTD 12+32(BP), X11 511 | VMOVDQA 128(BP), X12 512 | VMOVDQA 160(BP), X13 513 | VPBROADCASTD 8+48(BP), X14 514 | VPBROADCASTD 12+48(BP), X15 515 | 516 | chacha_blocks_avx2_mainloop2: 517 | VPADDD X0, X4, X0 518 | VPADDD X1, X5, X1 519 | VPXOR X12, X0, X12 520 | VPXOR X13, X1, X13 521 | VPADDD X2, X6, X2 522 | VPADDD X3, X7, X3 523 | VPXOR X14, X2, X14 524 | VPXOR X15, X3, X15 525 | VPSHUFB 448(BP), X12, X12 526 | VPSHUFB 448(BP), X13, X13 527 | VPADDD X8, X12, X8 528 | VPADDD X9, X13, X9 529 | VPSHUFB 448(BP), X14, X14 530 | VPSHUFB 448(BP), X15, X15 531 | VPADDD X10, X14, X10 532 | VPADDD X11, X15, X11 533 | VMOVDQA X12, 96(BP) 534 | VPXOR X4, X8, X4 535 | VPXOR X5, X9, X5 536 | VPSLLD $ 12, X4, X12 537 | VPSRLD $20, X4, X4 538 | VPXOR X4, X12, X4 539 | VPSLLD $ 12, X5, X12 540 | VPSRLD $20, X5, X5 541 | VPXOR X5, X12, X5 542 | VPXOR X6, X10, X6 543 | VPXOR X7, X11, X7 544 | VPSLLD $ 12, X6, X12 545 | VPSRLD $20, X6, X6 546 | VPXOR X6, X12, X6 547 | VPSLLD $ 12, X7, X12 548 | VPSRLD $20, X7, X7 549 | VPXOR X7, X12, X7 550 | VPADDD X0, X4, X0 551 | VPADDD X1, X5, X1 552 | VPXOR 96(BP), X0, X12 553 | VPXOR X13, X1, X13 554 | VPADDD X2, X6, X2 555 | VPADDD X3, X7, X3 556 | VPXOR X14, X2, X14 557 | VPXOR X15, X3, X15 558 | VPSHUFB 480(BP), X12, X12 559 | VPSHUFB 480(BP), X13, X13 560 | VPADDD X8, X12, X8 561 | VPADDD X9, X13, X9 562 | VPSHUFB 480(BP), X14, X14 563 | VPSHUFB 480(BP), X15, X15 564 | VPADDD X10, X14, X10 565 | VPADDD X11, X15, X11 566 | VMOVDQA X12, 96(BP) 567 | VPXOR X4, X8, X4 568 | VPXOR X5, X9, X5 569 | VPSLLD $ 7, X4, X12 570 | VPSRLD $25, X4, X4 571 | VPXOR X4, X12, X4 572 | VPSLLD $ 7, X5, X12 573 | VPSRLD $25, X5, X5 574 | VPXOR X5, X12, X5 575 | VPXOR X6, X10, X6 576 | VPXOR X7, X11, X7 577 | VPSLLD $ 7, X6, X12 578 | VPSRLD $25, X6, X6 579 | VPXOR X6, X12, X6 580 | VPSLLD $ 7, X7, X12 581 | VPSRLD $25, X7, X7 582 | VPXOR X7, X12, X7 583 | VPADDD X0, X5, X0 584 | VPADDD X1, X6, X1 585 | VPXOR X15, X0, X15 586 | VPXOR 96(BP), X1, X12 587 | VPADDD X2, X7, X2 588 | VPADDD X3, X4, X3 589 | VPXOR X13, X2, X13 590 | VPXOR X14, X3, X14 591 | VPSHUFB 448(BP), X15, X15 592 | VPSHUFB 448(BP), X12, X12 593 | VPADDD X10, X15, X10 594 | VPADDD X11, X12, X11 595 | VPSHUFB 448(BP), X13, X13 596 | VPSHUFB 448(BP), X14, X14 597 | VPADDD X8, X13, X8 598 | VPADDD X9, X14, X9 599 | VMOVDQA X15, 96(BP) 600 | VPXOR X5, X10, X5 601 | VPXOR X6, X11, X6 602 | VPSLLD $ 12, X5, X15 603 | VPSRLD $20, X5, X5 604 | VPXOR X5, X15, X5 605 | VPSLLD $ 12, X6, X15 606 | VPSRLD $20, X6, X6 607 | VPXOR X6, X15, X6 608 | VPXOR X7, X8, X7 609 | VPXOR X4, X9, X4 610 | VPSLLD $ 12, X7, X15 611 | VPSRLD $20, X7, X7 612 | VPXOR X7, X15, X7 613 | VPSLLD $ 12, X4, X15 614 | VPSRLD $20, X4, X4 615 | VPXOR X4, X15, X4 616 | VPADDD X0, X5, X0 617 | VPADDD X1, X6, X1 618 | VPXOR 96(BP), X0, X15 619 | VPXOR X12, X1, X12 620 | VPADDD X2, X7, X2 621 | VPADDD X3, X4, X3 622 | VPXOR X13, X2, X13 623 | VPXOR X14, X3, X14 624 | VPSHUFB 480(BP), X15, X15 625 | VPSHUFB 480(BP), X12, X12 626 | VPADDD X10, X15, X10 627 | VPADDD X11, X12, X11 628 | VPSHUFB 480(BP), X13, X13 629 | VPSHUFB 480(BP), X14, X14 630 | VPADDD X8, X13, X8 631 | VPADDD X9, X14, X9 632 | VMOVDQA X15, 96(BP) 633 | VPXOR X5, X10, X5 634 | VPXOR X6, X11, X6 635 | VPSLLD $ 7, X5, X15 636 | VPSRLD $25, X5, X5 637 | VPXOR X5, X15, X5 638 | VPSLLD $ 7, X6, X15 639 | VPSRLD $25, X6, X6 640 | VPXOR X6, X15, X6 641 | VPXOR X7, X8, X7 642 | VPXOR X4, X9, X4 643 | VPSLLD $ 7, X7, X15 644 | VPSRLD $25, X7, X7 645 | VPXOR X7, X15, X7 646 | VPSLLD $ 7, X4, X15 647 | VPSRLD $25, X4, X4 648 | VPXOR X4, X15, X4 649 | VMOVDQA 96(BP), X15 650 | SUBQ $2, AX 651 | JNZ chacha_blocks_avx2_mainloop2 652 | VMOVDQA X8, 192(BP) 653 | VMOVDQA X9, 208(BP) 654 | VMOVDQA X10, 224(BP) 655 | VMOVDQA X11, 240(BP) 656 | VMOVDQA X12, 256(BP) 657 | VMOVDQA X13, 272(BP) 658 | VMOVDQA X14, 288(BP) 659 | VMOVDQA X15, 304(BP) 660 | VPBROADCASTD 0(BP), X8 661 | VPBROADCASTD 4+0(BP), X9 662 | VPBROADCASTD 8+0(BP), X10 663 | VPBROADCASTD 12+0(BP), X11 664 | VPBROADCASTD 16(BP), X12 665 | VPBROADCASTD 4+16(BP), X13 666 | VPBROADCASTD 8+16(BP), X14 667 | VPBROADCASTD 12+16(BP), X15 668 | VPADDD X8, X0, X0 669 | VPADDD X9, X1, X1 670 | VPADDD X10, X2, X2 671 | VPADDD X11, X3, X3 672 | VPADDD X12, X4, X4 673 | VPADDD X13, X5, X5 674 | VPADDD X14, X6, X6 675 | VPADDD X15, X7, X7 676 | VPUNPCKLDQ X1, X0, X8 677 | VPUNPCKLDQ X3, X2, X9 678 | VPUNPCKHDQ X1, X0, X12 679 | VPUNPCKHDQ X3, X2, X13 680 | VPUNPCKLDQ X5, X4, X10 681 | VPUNPCKLDQ X7, X6, X11 682 | VPUNPCKHDQ X5, X4, X14 683 | VPUNPCKHDQ X7, X6, X15 684 | VPUNPCKLQDQ X9, X8, X0 685 | VPUNPCKLQDQ X11, X10, X1 686 | VPUNPCKHQDQ X9, X8, X2 687 | VPUNPCKHQDQ X11, X10, X3 688 | VPUNPCKLQDQ X13, X12, X4 689 | VPUNPCKLQDQ X15, X14, X5 690 | VPUNPCKHQDQ X13, X12, X6 691 | VPUNPCKHQDQ X15, X14, X7 692 | ANDQ SI, SI 693 | JZ chacha_blocks_avx2_noinput2 694 | VPXOR 0(SI), X0, X0 695 | VPXOR 16(SI), X1, X1 696 | VPXOR 64(SI), X2, X2 697 | VPXOR 80(SI), X3, X3 698 | VPXOR 128(SI), X4, X4 699 | VPXOR 144(SI), X5, X5 700 | VPXOR 192(SI), X6, X6 701 | VPXOR 208(SI), X7, X7 702 | VMOVDQU X0, 0(DX) 703 | VMOVDQU X1, 16(DX) 704 | VMOVDQU X2, 64(DX) 705 | VMOVDQU X3, 80(DX) 706 | VMOVDQU X4, 128(DX) 707 | VMOVDQU X5, 144(DX) 708 | VMOVDQU X6, 192(DX) 709 | VMOVDQU X7, 208(DX) 710 | VMOVDQA 192(BP), X0 711 | VMOVDQA 208(BP), X1 712 | VMOVDQA 224(BP), X2 713 | VMOVDQA 240(BP), X3 714 | VMOVDQA 256(BP), X4 715 | VMOVDQA 272(BP), X5 716 | VMOVDQA 288(BP), X6 717 | VMOVDQA 304(BP), X7 718 | VPBROADCASTD 32(BP), X8 719 | VPBROADCASTD 4+32(BP), X9 720 | VPBROADCASTD 8+32(BP), X10 721 | VPBROADCASTD 12+32(BP), X11 722 | VMOVDQA 128(BP), X12 723 | VMOVDQA 160(BP), X13 724 | VPBROADCASTD 8+48(BP), X14 725 | VPBROADCASTD 12+48(BP), X15 726 | VPADDD X8, X0, X0 727 | VPADDD X9, X1, X1 728 | VPADDD X10, X2, X2 729 | VPADDD X11, X3, X3 730 | VPADDD X12, X4, X4 731 | VPADDD X13, X5, X5 732 | VPADDD X14, X6, X6 733 | VPADDD X15, X7, X7 734 | VPUNPCKLDQ X1, X0, X8 735 | VPUNPCKLDQ X3, X2, X9 736 | VPUNPCKHDQ X1, X0, X12 737 | VPUNPCKHDQ X3, X2, X13 738 | VPUNPCKLDQ X5, X4, X10 739 | VPUNPCKLDQ X7, X6, X11 740 | VPUNPCKHDQ X5, X4, X14 741 | VPUNPCKHDQ X7, X6, X15 742 | VPUNPCKLQDQ X9, X8, X0 743 | VPUNPCKLQDQ X11, X10, X1 744 | VPUNPCKHQDQ X9, X8, X2 745 | VPUNPCKHQDQ X11, X10, X3 746 | VPUNPCKLQDQ X13, X12, X4 747 | VPUNPCKLQDQ X15, X14, X5 748 | VPUNPCKHQDQ X13, X12, X6 749 | VPUNPCKHQDQ X15, X14, X7 750 | VPXOR 32(SI), X0, X0 751 | VPXOR 48(SI), X1, X1 752 | VPXOR 96(SI), X2, X2 753 | VPXOR 112(SI), X3, X3 754 | VPXOR 160(SI), X4, X4 755 | VPXOR 176(SI), X5, X5 756 | VPXOR 224(SI), X6, X6 757 | VPXOR 240(SI), X7, X7 758 | VMOVDQU X0, 32(DX) 759 | VMOVDQU X1, 48(DX) 760 | VMOVDQU X2, 96(DX) 761 | VMOVDQU X3, 112(DX) 762 | VMOVDQU X4, 160(DX) 763 | VMOVDQU X5, 176(DX) 764 | VMOVDQU X6, 224(DX) 765 | VMOVDQU X7, 240(DX) 766 | ADDQ $256, SI 767 | JMP chacha_blocks_avx2_mainloop2_cont 768 | 769 | chacha_blocks_avx2_noinput2: 770 | VMOVDQU X0, 0(DX) 771 | VMOVDQU X1, 16(DX) 772 | VMOVDQU X2, 64(DX) 773 | VMOVDQU X3, 80(DX) 774 | VMOVDQU X4, 128(DX) 775 | VMOVDQU X5, 144(DX) 776 | VMOVDQU X6, 192(DX) 777 | VMOVDQU X7, 208(DX) 778 | VMOVDQA 192(BP), X0 779 | VMOVDQA 208(BP), X1 780 | VMOVDQA 224(BP), X2 781 | VMOVDQA 240(BP), X3 782 | VMOVDQA 256(BP), X4 783 | VMOVDQA 272(BP), X5 784 | VMOVDQA 288(BP), X6 785 | VMOVDQA 304(BP), X7 786 | VPBROADCASTD 32(BP), X8 787 | VPBROADCASTD 4+32(BP), X9 788 | VPBROADCASTD 8+32(BP), X10 789 | VPBROADCASTD 12+32(BP), X11 790 | VMOVDQA 128(BP), X12 791 | VMOVDQA 160(BP), X13 792 | VPBROADCASTD 8+48(BP), X14 793 | VPBROADCASTD 12+48(BP), X15 794 | VPADDD X8, X0, X0 795 | VPADDD X9, X1, X1 796 | VPADDD X10, X2, X2 797 | VPADDD X11, X3, X3 798 | VPADDD X12, X4, X4 799 | VPADDD X13, X5, X5 800 | VPADDD X14, X6, X6 801 | VPADDD X15, X7, X7 802 | VPUNPCKLDQ X1, X0, X8 803 | VPUNPCKLDQ X3, X2, X9 804 | VPUNPCKHDQ X1, X0, X12 805 | VPUNPCKHDQ X3, X2, X13 806 | VPUNPCKLDQ X5, X4, X10 807 | VPUNPCKLDQ X7, X6, X11 808 | VPUNPCKHDQ X5, X4, X14 809 | VPUNPCKHDQ X7, X6, X15 810 | VPUNPCKLQDQ X9, X8, X0 811 | VPUNPCKLQDQ X11, X10, X1 812 | VPUNPCKHQDQ X9, X8, X2 813 | VPUNPCKHQDQ X11, X10, X3 814 | VPUNPCKLQDQ X13, X12, X4 815 | VPUNPCKLQDQ X15, X14, X5 816 | VPUNPCKHQDQ X13, X12, X6 817 | VPUNPCKHQDQ X15, X14, X7 818 | VMOVDQU X0, 32(DX) 819 | VMOVDQU X1, 48(DX) 820 | VMOVDQU X2, 96(DX) 821 | VMOVDQU X3, 112(DX) 822 | VMOVDQU X4, 160(DX) 823 | VMOVDQU X5, 176(DX) 824 | VMOVDQU X6, 224(DX) 825 | VMOVDQU X7, 240(DX) 826 | 827 | chacha_blocks_avx2_mainloop2_cont: 828 | ADDQ $256, DX 829 | SUBQ $256, CX 830 | CMPQ CX, $256 831 | JAE chacha_blocks_avx2_atleast256 832 | 833 | chacha_blocks_avx2_below256_fixup: 834 | VMOVDQA 448(BP), X6 835 | VMOVDQA 480(BP), X7 836 | VMOVDQA 0(BP), X8 837 | VMOVDQA 16(BP), X9 838 | VMOVDQA 32(BP), X10 839 | VMOVDQA 48(BP), X11 840 | MOVQ $1, R9 841 | 842 | chacha_blocks_avx2_below256: 843 | VMOVQ R9, X5 844 | ANDQ CX, CX 845 | JZ chacha_blocks_avx2_done 846 | CMPQ CX, $64 847 | JAE chacha_blocks_avx2_above63 848 | MOVQ DX, R9 849 | ANDQ SI, SI 850 | JZ chacha_blocks_avx2_noinput3 851 | MOVQ CX, R10 852 | MOVQ BP, DX 853 | ADDQ R10, SI 854 | ADDQ R10, DX 855 | NEGQ R10 856 | 857 | chacha_blocks_avx2_copyinput: 858 | MOVB (SI)(R10*1), AX 859 | MOVB AX, (DX)(R10*1) 860 | INCQ R10 861 | JNZ chacha_blocks_avx2_copyinput 862 | MOVQ BP, SI 863 | 864 | chacha_blocks_avx2_noinput3: 865 | MOVQ BP, DX 866 | 867 | chacha_blocks_avx2_above63: 868 | VMOVDQA X8, X0 869 | VMOVDQA X9, X1 870 | VMOVDQA X10, X2 871 | VMOVDQA X11, X3 872 | 873 | // MOVQ 64(BP), AX 874 | MOVQ $20, AX 875 | 876 | chacha_blocks_avx2_mainloop3: 877 | VPADDD X0, X1, X0 878 | VPXOR X3, X0, X3 879 | VPSHUFB X6, X3, X3 880 | VPADDD X2, X3, X2 881 | VPXOR X1, X2, X1 882 | VPSLLD $12, X1, X4 883 | VPSRLD $20, X1, X1 884 | VPXOR X1, X4, X1 885 | VPADDD X0, X1, X0 886 | VPXOR X3, X0, X3 887 | VPSHUFB X7, X3, X3 888 | VPSHUFD $0x93, X0, X0 889 | VPADDD X2, X3, X2 890 | VPSHUFD $0x4e, X3, X3 891 | VPXOR X1, X2, X1 892 | VPSHUFD $0x39, X2, X2 893 | VPSLLD $7, X1, X4 894 | VPSRLD $25, X1, X1 895 | VPXOR X1, X4, X1 896 | VPADDD X0, X1, X0 897 | VPXOR X3, X0, X3 898 | VPSHUFB X6, X3, X3 899 | VPADDD X2, X3, X2 900 | VPXOR X1, X2, X1 901 | VPSLLD $12, X1, X4 902 | VPSRLD $20, X1, X1 903 | VPXOR X1, X4, X1 904 | VPADDD X0, X1, X0 905 | VPXOR X3, X0, X3 906 | VPSHUFB X7, X3, X3 907 | VPSHUFD $0x39, X0, X0 908 | VPADDD X2, X3, X2 909 | VPSHUFD $0x4e, X3, X3 910 | VPXOR X1, X2, X1 911 | VPSHUFD $0x93, X2, X2 912 | VPSLLD $7, X1, X4 913 | VPSRLD $25, X1, X1 914 | VPXOR X1, X4, X1 915 | SUBQ $2, AX 916 | JNZ chacha_blocks_avx2_mainloop3 917 | VPADDD X0, X8, X0 918 | VPADDD X1, X9, X1 919 | VPADDD X2, X10, X2 920 | VPADDD X3, X11, X3 921 | ANDQ SI, SI 922 | JZ chacha_blocks_avx2_noinput4 923 | VPXOR 0(SI), X0, X0 924 | VPXOR 16(SI), X1, X1 925 | VPXOR 32(SI), X2, X2 926 | VPXOR 48(SI), X3, X3 927 | ADDQ $64, SI 928 | 929 | chacha_blocks_avx2_noinput4: 930 | VMOVDQU X0, 0(DX) 931 | VMOVDQU X1, 16(DX) 932 | VMOVDQU X2, 32(DX) 933 | VMOVDQU X3, 48(DX) 934 | VPADDQ X11, X5, X11 935 | CMPQ CX, $64 936 | JBE chacha_blocks_avx2_mainloop3_finishup 937 | ADDQ $64, DX 938 | SUBQ $64, CX 939 | JMP chacha_blocks_avx2_below256 940 | 941 | chacha_blocks_avx2_mainloop3_finishup: 942 | CMPQ CX, $64 943 | JE chacha_blocks_avx2_done 944 | ADDQ CX, R9 945 | ADDQ CX, DX 946 | NEGQ CX 947 | 948 | chacha_blocks_avx2_copyoutput: 949 | MOVB (DX)(CX*1), AX 950 | MOVB AX, (R9)(CX*1) 951 | INCQ CX 952 | JNZ chacha_blocks_avx2_copyoutput 953 | 954 | chacha_blocks_avx2_done: 955 | VMOVDQU X11, 32(DI) 956 | 957 | VZEROUPPER 958 | RET 959 | 960 | // func hChaChaAVX2(key, nonce []byte, dst *byte) 961 | TEXT ·hChaChaAVX2(SB), NOSPLIT|NOFRAME, $0-56 962 | MOVQ key+0(FP), DI 963 | MOVQ nonce+24(FP), SI 964 | MOVQ dst+48(FP), DX 965 | 966 | MOVL $20, CX 967 | 968 | LEAQ ·chacha_constants<>(SB), AX 969 | VMOVDQA 0(AX), X0 970 | VMOVDQA 16(AX), X6 971 | VMOVDQA 32(AX), X5 972 | 973 | VMOVDQU 0(DI), X1 974 | VMOVDQU 16(DI), X2 975 | VMOVDQU 0(SI), X3 976 | 977 | hhacha_mainloop_avx2: 978 | VPADDD X0, X1, X0 979 | VPXOR X3, X0, X3 980 | VPSHUFB X6, X3, X3 981 | VPADDD X2, X3, X2 982 | VPXOR X1, X2, X1 983 | VPSLLD $12, X1, X4 984 | VPSRLD $20, X1, X1 985 | VPXOR X1, X4, X1 986 | VPADDD X0, X1, X0 987 | VPXOR X3, X0, X3 988 | VPSHUFB X5, X3, X3 989 | VPADDD X2, X3, X2 990 | VPXOR X1, X2, X1 991 | VPSLLD $7, X1, X4 992 | VPSRLD $25, X1, X1 993 | VPSHUFD $0x93, X0, X0 994 | VPXOR X1, X4, X1 995 | VPSHUFD $0x4e, X3, X3 996 | VPADDD X0, X1, X0 997 | VPXOR X3, X0, X3 998 | VPSHUFB X6, X3, X3 999 | VPSHUFD $0x39, X2, X2 1000 | VPADDD X2, X3, X2 1001 | VPXOR X1, X2, X1 1002 | VPSLLD $12, X1, X4 1003 | VPSRLD $20, X1, X1 1004 | VPXOR X1, X4, X1 1005 | VPADDD X0, X1, X0 1006 | VPXOR X3, X0, X3 1007 | VPSHUFB X5, X3, X3 1008 | VPADDD X2, X3, X2 1009 | VPXOR X1, X2, X1 1010 | VPSHUFD $0x39, X0, X0 1011 | VPSLLD $7, X1, X4 1012 | VPSHUFD $0x4e, X3, X3 1013 | VPSRLD $25, X1, X1 1014 | VPSHUFD $0x93, X2, X2 1015 | VPXOR X1, X4, X1 1016 | SUBL $2, CX 1017 | JNE hhacha_mainloop_avx2 1018 | 1019 | VMOVDQU X0, (DX) 1020 | VMOVDQU X3, 16(DX) 1021 | 1022 | VZEROUPPER 1023 | RET 1024 | 1025 | // func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte) 1026 | TEXT ·blocksSSSE3(SB), NOSPLIT, $576-56 1027 | // This is Andrew Moon's SSSE3 ChaCha implementation taken from 1028 | // supercop-20190110, with some minor changes, primarily calling 1029 | // convention and assembly dialect related. 1030 | 1031 | // Align the stack on a 64 byte boundary. 1032 | MOVQ SP, BP 1033 | ADDQ $64, BP 1034 | ANDQ $-64, BP 1035 | 1036 | // Go calling convention -> SYSV AMD64 (and a fixup). 1037 | MOVQ s+0(FP), DI // &s -> DI 1038 | ADDQ $16, DI // Skip the ChaCha constants in the chachaState. 1039 | MOVQ in+8(FP), SI // &in[0] -> SI 1040 | MOVQ out+32(FP), DX // &out[0] -> DX 1041 | MOVQ in_len+16(FP), CX // len(in) -> CX 1042 | 1043 | // Begin the main body of `chacha_blocks_ssse3`. 1044 | // 1045 | // Mostly a direct translation except: 1046 | // * The number of rounds is always 20. 1047 | // * %rbp is used instead of BP. 1048 | LEAQ ·chacha_constants<>(SB), AX 1049 | MOVO 0(AX), X8 1050 | MOVO 16(AX), X6 1051 | MOVO 32(AX), X7 1052 | MOVOU 0(DI), X9 1053 | MOVOU 16(DI), X10 1054 | MOVOU 32(DI), X11 1055 | 1056 | // MOVQ 48(DI), AX 1057 | MOVQ $1, R9 1058 | MOVO X8, 0(BP) 1059 | MOVO X9, 16(BP) 1060 | MOVO X10, 32(BP) 1061 | MOVO X11, 48(BP) 1062 | 1063 | MOVO X6, 80(BP) 1064 | MOVO X7, 96(BP) 1065 | // MOVQ AX, 64(BP) 1066 | CMPQ CX, $256 1067 | JB chacha_blocks_ssse3_below256 1068 | PSHUFD $0x00, X8, X0 1069 | PSHUFD $0x55, X8, X1 1070 | PSHUFD $0xaa, X8, X2 1071 | PSHUFD $0xff, X8, X3 1072 | MOVO X0, 128(BP) 1073 | MOVO X1, 144(BP) 1074 | MOVO X2, 160(BP) 1075 | MOVO X3, 176(BP) 1076 | PSHUFD $0x00, X9, X0 1077 | PSHUFD $0x55, X9, X1 1078 | PSHUFD $0xaa, X9, X2 1079 | PSHUFD $0xff, X9, X3 1080 | MOVO X0, 192(BP) 1081 | MOVO X1, 208(BP) 1082 | MOVO X2, 224(BP) 1083 | MOVO X3, 240(BP) 1084 | PSHUFD $0x00, X10, X0 1085 | PSHUFD $0x55, X10, X1 1086 | PSHUFD $0xaa, X10, X2 1087 | PSHUFD $0xff, X10, X3 1088 | MOVO X0, 256(BP) 1089 | MOVO X1, 272(BP) 1090 | MOVO X2, 288(BP) 1091 | MOVO X3, 304(BP) 1092 | PSHUFD $0xaa, X11, X0 1093 | PSHUFD $0xff, X11, X1 1094 | MOVO X0, 352(BP) 1095 | MOVO X1, 368(BP) 1096 | JMP chacha_blocks_ssse3_atleast256 1097 | 1098 | // .p2align 6,,63 1099 | // # align to 4 mod 64 1100 | // nop;nop;nop;nop; 1101 | chacha_blocks_ssse3_atleast256: 1102 | MOVQ 48(BP), AX 1103 | LEAQ 1(AX), R8 1104 | LEAQ 2(AX), R9 1105 | LEAQ 3(AX), R10 1106 | LEAQ 4(AX), BX 1107 | MOVL AX, 320(BP) 1108 | MOVL R8, 4+320(BP) 1109 | MOVL R9, 8+320(BP) 1110 | MOVL R10, 12+320(BP) 1111 | SHRQ $32, AX 1112 | SHRQ $32, R8 1113 | SHRQ $32, R9 1114 | SHRQ $32, R10 1115 | MOVL AX, 336(BP) 1116 | MOVL R8, 4+336(BP) 1117 | MOVL R9, 8+336(BP) 1118 | MOVL R10, 12+336(BP) 1119 | MOVQ BX, 48(BP) 1120 | 1121 | // MOVQ 64(BP), AX 1122 | MOVQ $20, AX 1123 | MOVO 128(BP), X0 1124 | MOVO 144(BP), X1 1125 | MOVO 160(BP), X2 1126 | MOVO 176(BP), X3 1127 | MOVO 192(BP), X4 1128 | MOVO 208(BP), X5 1129 | MOVO 224(BP), X6 1130 | MOVO 240(BP), X7 1131 | MOVO 256(BP), X8 1132 | MOVO 272(BP), X9 1133 | MOVO 288(BP), X10 1134 | MOVO 304(BP), X11 1135 | MOVO 320(BP), X12 1136 | MOVO 336(BP), X13 1137 | MOVO 352(BP), X14 1138 | MOVO 368(BP), X15 1139 | 1140 | chacha_blocks_ssse3_mainloop1: 1141 | PADDD X4, X0 1142 | PADDD X5, X1 1143 | PXOR X0, X12 1144 | PXOR X1, X13 1145 | PADDD X6, X2 1146 | PADDD X7, X3 1147 | PXOR X2, X14 1148 | PXOR X3, X15 1149 | PSHUFB 80(BP), X12 1150 | PSHUFB 80(BP), X13 1151 | PADDD X12, X8 1152 | PADDD X13, X9 1153 | PSHUFB 80(BP), X14 1154 | PSHUFB 80(BP), X15 1155 | PADDD X14, X10 1156 | PADDD X15, X11 1157 | MOVO X12, 112(BP) 1158 | PXOR X8, X4 1159 | PXOR X9, X5 1160 | MOVO X4, X12 1161 | PSLLL $ 12, X4 1162 | PSRLL $20, X12 1163 | PXOR X12, X4 1164 | MOVO X5, X12 1165 | PSLLL $ 12, X5 1166 | PSRLL $20, X12 1167 | PXOR X12, X5 1168 | PXOR X10, X6 1169 | PXOR X11, X7 1170 | MOVO X6, X12 1171 | PSLLL $ 12, X6 1172 | PSRLL $20, X12 1173 | PXOR X12, X6 1174 | MOVO X7, X12 1175 | PSLLL $ 12, X7 1176 | PSRLL $20, X12 1177 | PXOR X12, X7 1178 | MOVO 112(BP), X12 1179 | PADDD X4, X0 1180 | PADDD X5, X1 1181 | PXOR X0, X12 1182 | PXOR X1, X13 1183 | PADDD X6, X2 1184 | PADDD X7, X3 1185 | PXOR X2, X14 1186 | PXOR X3, X15 1187 | PSHUFB 96(BP), X12 1188 | PSHUFB 96(BP), X13 1189 | PADDD X12, X8 1190 | PADDD X13, X9 1191 | PSHUFB 96(BP), X14 1192 | PSHUFB 96(BP), X15 1193 | PADDD X14, X10 1194 | PADDD X15, X11 1195 | MOVO X12, 112(BP) 1196 | PXOR X8, X4 1197 | PXOR X9, X5 1198 | MOVO X4, X12 1199 | PSLLL $ 7, X4 1200 | PSRLL $25, X12 1201 | PXOR X12, X4 1202 | MOVO X5, X12 1203 | PSLLL $ 7, X5 1204 | PSRLL $25, X12 1205 | PXOR X12, X5 1206 | PXOR X10, X6 1207 | PXOR X11, X7 1208 | MOVO X6, X12 1209 | PSLLL $ 7, X6 1210 | PSRLL $25, X12 1211 | PXOR X12, X6 1212 | MOVO X7, X12 1213 | PSLLL $ 7, X7 1214 | PSRLL $25, X12 1215 | PXOR X12, X7 1216 | MOVO 112(BP), X12 1217 | PADDD X5, X0 1218 | PADDD X6, X1 1219 | PXOR X0, X15 1220 | PXOR X1, X12 1221 | PADDD X7, X2 1222 | PADDD X4, X3 1223 | PXOR X2, X13 1224 | PXOR X3, X14 1225 | PSHUFB 80(BP), X15 1226 | PSHUFB 80(BP), X12 1227 | PADDD X15, X10 1228 | PADDD X12, X11 1229 | PSHUFB 80(BP), X13 1230 | PSHUFB 80(BP), X14 1231 | PADDD X13, X8 1232 | PADDD X14, X9 1233 | MOVO X15, 112(BP) 1234 | PXOR X10, X5 1235 | PXOR X11, X6 1236 | MOVO X5, X15 1237 | PSLLL $ 12, X5 1238 | PSRLL $20, X15 1239 | PXOR X15, X5 1240 | MOVO X6, X15 1241 | PSLLL $ 12, X6 1242 | PSRLL $20, X15 1243 | PXOR X15, X6 1244 | PXOR X8, X7 1245 | PXOR X9, X4 1246 | MOVO X7, X15 1247 | PSLLL $ 12, X7 1248 | PSRLL $20, X15 1249 | PXOR X15, X7 1250 | MOVO X4, X15 1251 | PSLLL $ 12, X4 1252 | PSRLL $20, X15 1253 | PXOR X15, X4 1254 | MOVO 112(BP), X15 1255 | PADDD X5, X0 1256 | PADDD X6, X1 1257 | PXOR X0, X15 1258 | PXOR X1, X12 1259 | PADDD X7, X2 1260 | PADDD X4, X3 1261 | PXOR X2, X13 1262 | PXOR X3, X14 1263 | PSHUFB 96(BP), X15 1264 | PSHUFB 96(BP), X12 1265 | PADDD X15, X10 1266 | PADDD X12, X11 1267 | PSHUFB 96(BP), X13 1268 | PSHUFB 96(BP), X14 1269 | PADDD X13, X8 1270 | PADDD X14, X9 1271 | MOVO X15, 112(BP) 1272 | PXOR X10, X5 1273 | PXOR X11, X6 1274 | MOVO X5, X15 1275 | PSLLL $ 7, X5 1276 | PSRLL $25, X15 1277 | PXOR X15, X5 1278 | MOVO X6, X15 1279 | PSLLL $ 7, X6 1280 | PSRLL $25, X15 1281 | PXOR X15, X6 1282 | PXOR X8, X7 1283 | PXOR X9, X4 1284 | MOVO X7, X15 1285 | PSLLL $ 7, X7 1286 | PSRLL $25, X15 1287 | PXOR X15, X7 1288 | MOVO X4, X15 1289 | PSLLL $ 7, X4 1290 | PSRLL $25, X15 1291 | PXOR X15, X4 1292 | SUBQ $2, AX 1293 | MOVO 112(BP), X15 1294 | JNZ chacha_blocks_ssse3_mainloop1 1295 | PADDD 128(BP), X0 1296 | PADDD 144(BP), X1 1297 | PADDD 160(BP), X2 1298 | PADDD 176(BP), X3 1299 | PADDD 192(BP), X4 1300 | PADDD 208(BP), X5 1301 | PADDD 224(BP), X6 1302 | PADDD 240(BP), X7 1303 | PADDD 256(BP), X8 1304 | PADDD 272(BP), X9 1305 | PADDD 288(BP), X10 1306 | PADDD 304(BP), X11 1307 | PADDD 320(BP), X12 1308 | PADDD 336(BP), X13 1309 | PADDD 352(BP), X14 1310 | PADDD 368(BP), X15 1311 | MOVO X8, 384(BP) 1312 | MOVO X9, 400(BP) 1313 | MOVO X10, 416(BP) 1314 | MOVO X11, 432(BP) 1315 | MOVO X12, 448(BP) 1316 | MOVO X13, 464(BP) 1317 | MOVO X14, 480(BP) 1318 | MOVO X15, 496(BP) 1319 | MOVO X0, X8 1320 | MOVO X2, X9 1321 | MOVO X4, X10 1322 | MOVO X6, X11 1323 | PUNPCKHLQ X1, X0 1324 | PUNPCKHLQ X3, X2 1325 | PUNPCKHLQ X5, X4 1326 | PUNPCKHLQ X7, X6 1327 | PUNPCKLLQ X1, X8 1328 | PUNPCKLLQ X3, X9 1329 | PUNPCKLLQ X5, X10 1330 | PUNPCKLLQ X7, X11 1331 | MOVO X0, X1 1332 | MOVO X4, X3 1333 | MOVO X8, X5 1334 | MOVO X10, X7 1335 | PUNPCKHQDQ X2, X0 1336 | PUNPCKHQDQ X6, X4 1337 | PUNPCKHQDQ X9, X8 1338 | PUNPCKHQDQ X11, X10 1339 | PUNPCKLQDQ X2, X1 1340 | PUNPCKLQDQ X6, X3 1341 | PUNPCKLQDQ X9, X5 1342 | PUNPCKLQDQ X11, X7 1343 | ANDQ SI, SI 1344 | JZ chacha_blocks_ssse3_noinput1 1345 | MOVOU 0(SI), X2 1346 | MOVOU 16(SI), X6 1347 | MOVOU 64(SI), X9 1348 | MOVOU 80(SI), X11 1349 | MOVOU 128(SI), X12 1350 | MOVOU 144(SI), X13 1351 | MOVOU 192(SI), X14 1352 | MOVOU 208(SI), X15 1353 | PXOR X2, X5 1354 | PXOR X6, X7 1355 | PXOR X9, X8 1356 | PXOR X11, X10 1357 | PXOR X12, X1 1358 | PXOR X13, X3 1359 | PXOR X14, X0 1360 | PXOR X15, X4 1361 | MOVOU X5, 0(DX) 1362 | MOVOU X7, 16(DX) 1363 | MOVOU X8, 64(DX) 1364 | MOVOU X10, 80(DX) 1365 | MOVOU X1, 128(DX) 1366 | MOVOU X3, 144(DX) 1367 | MOVOU X0, 192(DX) 1368 | MOVOU X4, 208(DX) 1369 | MOVO 384(BP), X0 1370 | MOVO 400(BP), X1 1371 | MOVO 416(BP), X2 1372 | MOVO 432(BP), X3 1373 | MOVO 448(BP), X4 1374 | MOVO 464(BP), X5 1375 | MOVO 480(BP), X6 1376 | MOVO 496(BP), X7 1377 | MOVO X0, X8 1378 | MOVO X2, X9 1379 | MOVO X4, X10 1380 | MOVO X6, X11 1381 | PUNPCKLLQ X1, X8 1382 | PUNPCKLLQ X3, X9 1383 | PUNPCKHLQ X1, X0 1384 | PUNPCKHLQ X3, X2 1385 | PUNPCKLLQ X5, X10 1386 | PUNPCKLLQ X7, X11 1387 | PUNPCKHLQ X5, X4 1388 | PUNPCKHLQ X7, X6 1389 | MOVO X8, X1 1390 | MOVO X0, X3 1391 | MOVO X10, X5 1392 | MOVO X4, X7 1393 | PUNPCKLQDQ X9, X1 1394 | PUNPCKLQDQ X11, X5 1395 | PUNPCKHQDQ X9, X8 1396 | PUNPCKHQDQ X11, X10 1397 | PUNPCKLQDQ X2, X3 1398 | PUNPCKLQDQ X6, X7 1399 | PUNPCKHQDQ X2, X0 1400 | PUNPCKHQDQ X6, X4 1401 | MOVOU 32(SI), X2 1402 | MOVOU 48(SI), X6 1403 | MOVOU 96(SI), X9 1404 | MOVOU 112(SI), X11 1405 | MOVOU 160(SI), X12 1406 | MOVOU 176(SI), X13 1407 | MOVOU 224(SI), X14 1408 | MOVOU 240(SI), X15 1409 | PXOR X2, X1 1410 | PXOR X6, X5 1411 | PXOR X9, X8 1412 | PXOR X11, X10 1413 | PXOR X12, X3 1414 | PXOR X13, X7 1415 | PXOR X14, X0 1416 | PXOR X15, X4 1417 | MOVOU X1, 32(DX) 1418 | MOVOU X5, 48(DX) 1419 | MOVOU X8, 96(DX) 1420 | MOVOU X10, 112(DX) 1421 | MOVOU X3, 160(DX) 1422 | MOVOU X7, 176(DX) 1423 | MOVOU X0, 224(DX) 1424 | MOVOU X4, 240(DX) 1425 | ADDQ $256, SI 1426 | JMP chacha_blocks_ssse3_mainloop_cont 1427 | 1428 | chacha_blocks_ssse3_noinput1: 1429 | MOVOU X5, 0(DX) 1430 | MOVOU X7, 16(DX) 1431 | MOVOU X8, 64(DX) 1432 | MOVOU X10, 80(DX) 1433 | MOVOU X1, 128(DX) 1434 | MOVOU X3, 144(DX) 1435 | MOVOU X0, 192(DX) 1436 | MOVOU X4, 208(DX) 1437 | MOVO 384(BP), X0 1438 | MOVO 400(BP), X1 1439 | MOVO 416(BP), X2 1440 | MOVO 432(BP), X3 1441 | MOVO 448(BP), X4 1442 | MOVO 464(BP), X5 1443 | MOVO 480(BP), X6 1444 | MOVO 496(BP), X7 1445 | MOVO X0, X8 1446 | MOVO X2, X9 1447 | MOVO X4, X10 1448 | MOVO X6, X11 1449 | PUNPCKLLQ X1, X8 1450 | PUNPCKLLQ X3, X9 1451 | PUNPCKHLQ X1, X0 1452 | PUNPCKHLQ X3, X2 1453 | PUNPCKLLQ X5, X10 1454 | PUNPCKLLQ X7, X11 1455 | PUNPCKHLQ X5, X4 1456 | PUNPCKHLQ X7, X6 1457 | MOVO X8, X1 1458 | MOVO X0, X3 1459 | MOVO X10, X5 1460 | MOVO X4, X7 1461 | PUNPCKLQDQ X9, X1 1462 | PUNPCKLQDQ X11, X5 1463 | PUNPCKHQDQ X9, X8 1464 | PUNPCKHQDQ X11, X10 1465 | PUNPCKLQDQ X2, X3 1466 | PUNPCKLQDQ X6, X7 1467 | PUNPCKHQDQ X2, X0 1468 | PUNPCKHQDQ X6, X4 1469 | MOVOU X1, 32(DX) 1470 | MOVOU X5, 48(DX) 1471 | MOVOU X8, 96(DX) 1472 | MOVOU X10, 112(DX) 1473 | MOVOU X3, 160(DX) 1474 | MOVOU X7, 176(DX) 1475 | MOVOU X0, 224(DX) 1476 | MOVOU X4, 240(DX) 1477 | 1478 | chacha_blocks_ssse3_mainloop_cont: 1479 | ADDQ $256, DX 1480 | SUBQ $256, CX 1481 | CMPQ CX, $256 1482 | JAE chacha_blocks_ssse3_atleast256 1483 | MOVO 80(BP), X6 1484 | MOVO 96(BP), X7 1485 | MOVO 0(BP), X8 1486 | MOVO 16(BP), X9 1487 | MOVO 32(BP), X10 1488 | MOVO 48(BP), X11 1489 | MOVQ $1, R9 1490 | 1491 | chacha_blocks_ssse3_below256: 1492 | MOVQ R9, X5 1493 | ANDQ CX, CX 1494 | JZ chacha_blocks_ssse3_done 1495 | CMPQ CX, $64 1496 | JAE chacha_blocks_ssse3_above63 1497 | MOVQ DX, R9 1498 | ANDQ SI, SI 1499 | JZ chacha_blocks_ssse3_noinput2 1500 | MOVQ CX, R10 1501 | MOVQ BP, DX 1502 | ADDQ R10, SI 1503 | ADDQ R10, DX 1504 | NEGQ R10 1505 | 1506 | chacha_blocks_ssse3_copyinput: 1507 | MOVB (SI)(R10*1), AX 1508 | MOVB AX, (DX)(R10*1) 1509 | INCQ R10 1510 | JNZ chacha_blocks_ssse3_copyinput 1511 | MOVQ BP, SI 1512 | 1513 | chacha_blocks_ssse3_noinput2: 1514 | MOVQ BP, DX 1515 | 1516 | chacha_blocks_ssse3_above63: 1517 | MOVO X8, X0 1518 | MOVO X9, X1 1519 | MOVO X10, X2 1520 | MOVO X11, X3 1521 | 1522 | // MOVQ 64(BP), AX 1523 | MOVQ $20, AX 1524 | 1525 | chacha_blocks_ssse3_mainloop2: 1526 | PADDD X1, X0 1527 | PXOR X0, X3 1528 | PSHUFB X6, X3 1529 | PADDD X3, X2 1530 | PXOR X2, X1 1531 | MOVO X1, X4 1532 | PSLLL $12, X4 1533 | PSRLL $20, X1 1534 | PXOR X4, X1 1535 | PADDD X1, X0 1536 | PXOR X0, X3 1537 | PSHUFB X7, X3 1538 | PSHUFD $0x93, X0, X0 1539 | PADDD X3, X2 1540 | PSHUFD $0x4e, X3, X3 1541 | PXOR X2, X1 1542 | PSHUFD $0x39, X2, X2 1543 | MOVO X1, X4 1544 | PSLLL $7, X4 1545 | PSRLL $25, X1 1546 | PXOR X4, X1 1547 | PADDD X1, X0 1548 | PXOR X0, X3 1549 | PSHUFB X6, X3 1550 | PADDD X3, X2 1551 | PXOR X2, X1 1552 | MOVO X1, X4 1553 | PSLLL $12, X4 1554 | PSRLL $20, X1 1555 | PXOR X4, X1 1556 | PADDD X1, X0 1557 | PXOR X0, X3 1558 | PSHUFB X7, X3 1559 | PSHUFD $0x39, X0, X0 1560 | PADDD X3, X2 1561 | PSHUFD $0x4e, X3, X3 1562 | PXOR X2, X1 1563 | PSHUFD $0x93, X2, X2 1564 | MOVO X1, X4 1565 | PSLLL $7, X4 1566 | PSRLL $25, X1 1567 | PXOR X4, X1 1568 | SUBQ $2, AX 1569 | JNZ chacha_blocks_ssse3_mainloop2 1570 | PADDD X8, X0 1571 | PADDD X9, X1 1572 | PADDD X10, X2 1573 | PADDD X11, X3 1574 | ANDQ SI, SI 1575 | JZ chacha_blocks_ssse3_noinput3 1576 | MOVOU 0(SI), X12 1577 | MOVOU 16(SI), X13 1578 | MOVOU 32(SI), X14 1579 | MOVOU 48(SI), X15 1580 | PXOR X12, X0 1581 | PXOR X13, X1 1582 | PXOR X14, X2 1583 | PXOR X15, X3 1584 | ADDQ $64, SI 1585 | 1586 | chacha_blocks_ssse3_noinput3: 1587 | MOVOU X0, 0(DX) 1588 | MOVOU X1, 16(DX) 1589 | MOVOU X2, 32(DX) 1590 | MOVOU X3, 48(DX) 1591 | PADDQ X5, X11 1592 | CMPQ CX, $64 1593 | JBE chacha_blocks_ssse3_mainloop2_finishup 1594 | ADDQ $64, DX 1595 | SUBQ $64, CX 1596 | JMP chacha_blocks_ssse3_below256 1597 | 1598 | chacha_blocks_ssse3_mainloop2_finishup: 1599 | CMPQ CX, $64 1600 | JE chacha_blocks_ssse3_done 1601 | ADDQ CX, R9 1602 | ADDQ CX, DX 1603 | NEGQ CX 1604 | 1605 | chacha_blocks_ssse3_copyoutput: 1606 | MOVB (DX)(CX*1), AX 1607 | MOVB AX, (R9)(CX*1) 1608 | INCQ CX 1609 | JNZ chacha_blocks_ssse3_copyoutput 1610 | 1611 | chacha_blocks_ssse3_done: 1612 | MOVOU X11, 32(DI) 1613 | 1614 | RET 1615 | 1616 | // func hChaChaSSSE3(key, nonce []byte, dst *byte) 1617 | TEXT ·hChaChaSSSE3(SB), NOSPLIT|NOFRAME, $0-56 1618 | MOVQ key+0(FP), DI 1619 | MOVQ nonce+24(FP), SI 1620 | MOVQ dst+48(FP), DX 1621 | 1622 | MOVL $20, CX 1623 | 1624 | LEAQ ·chacha_constants<>(SB), AX 1625 | MOVO 0(AX), X0 1626 | MOVO 16(AX), X5 1627 | MOVO 32(AX), X6 1628 | 1629 | MOVOU 0(DI), X1 1630 | MOVOU 16(DI), X2 1631 | MOVOU 0(SI), X3 1632 | 1633 | hchacha_ssse3_mainloop: 1634 | PADDD X1, X0 1635 | PXOR X0, X3 1636 | PSHUFB X5, X3 1637 | PADDD X3, X2 1638 | PXOR X2, X1 1639 | MOVO X1, X4 1640 | PSLLL $12, X1 1641 | PSRLL $20, X4 1642 | PXOR X4, X1 1643 | PADDD X1, X0 1644 | PXOR X0, X3 1645 | PSHUFB X6, X3 1646 | PSHUFD $0X93, X0, X0 1647 | PADDD X3, X2 1648 | PSHUFD $0X4E, X3, X3 1649 | PXOR X2, X1 1650 | PSHUFD $0X39, X2, X2 1651 | MOVO X1, X4 1652 | PSLLL $7, X1 1653 | PSRLL $25, X4 1654 | PXOR X4, X1 1655 | SUBQ $2, CX 1656 | PADDD X1, X0 1657 | PXOR X0, X3 1658 | PSHUFB X5, X3 1659 | PADDD X3, X2 1660 | PXOR X2, X1 1661 | MOVO X1, X4 1662 | PSLLL $12, X1 1663 | PSRLL $20, X4 1664 | PXOR X4, X1 1665 | PADDD X1, X0 1666 | PXOR X0, X3 1667 | PSHUFB X6, X3 1668 | PSHUFD $0X39, X0, X0 1669 | PADDD X3, X2 1670 | PSHUFD $0X4E, X3, X3 1671 | PXOR X2, X1 1672 | PSHUFD $0X93, X2, X2 1673 | MOVO X1, X4 1674 | PSLLL $7, X1 1675 | PSRLL $25, X4 1676 | PXOR X4, X1 1677 | JA hchacha_ssse3_mainloop 1678 | 1679 | MOVOU X0, 0(DX) 1680 | MOVOU X3, 16(DX) 1681 | 1682 | RET 1683 | --------------------------------------------------------------------------------