├── .gitignore
├── .editorconfig
├── go.mod
├── operate
├── proxy_test.go
├── proxy_ctl.go
├── fwd_test.go
├── proxy.go
└── fwd.go
├── go.sum
├── option
├── option.go
├── parsecli_test.go
└── parsecli.go
├── crypto
├── chacha20
│ ├── README.md
│ ├── internal
│ │ ├── hardware
│ │ │ ├── impl.go
│ │ │ ├── impl_amd64.go
│ │ │ └── impl_amd64.s
│ │ ├── api
│ │ │ └── api.go
│ │ └── ref
│ │ │ └── impl.go
│ ├── chacha20.go
│ └── LICENSE
├── xchacha20.go
└── xchacha20_test.go
├── logger
└── logger.go
├── .goreleaser.yml
├── LICENSE
├── main.go
├── netio
├── handler_test.go
├── handler.go
├── forward_test.go
└── forward.go
├── docs
└── README_CN.md
├── CHANGELOG
├── README.md
└── socks5
└── socks5.go
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | .vscode
3 | *.exe
4 | dist
5 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = tab
5 | indent_size = 4
6 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module iox
2 |
3 | go 1.12
4 |
5 | require golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f
6 |
--------------------------------------------------------------------------------
/operate/proxy_test.go:
--------------------------------------------------------------------------------
1 | package operate
2 |
3 | import "testing"
4 |
5 | func TestProxyLocal(t *testing.T) {
6 | ProxyLocal(":9999", false)
7 | }
8 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
2 | golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
3 |
--------------------------------------------------------------------------------
/option/option.go:
--------------------------------------------------------------------------------
1 | package option
2 |
3 | const (
4 | TCP_BUFFER_SIZE = 0x8000
5 |
6 | CONN_BUFFER_SIZE = 0x20
7 |
8 | // UDP protocol's max capacity
9 | UDP_PACKET_MAX_SIZE = 0xFFFF - 28
10 |
11 | UDP_PACKET_CHANNEL_SIZE = 0x400
12 |
13 | MAX_UDP_FWD_WORKER = 0x10
14 |
15 | HEARTBEAT_FREQUENCY = 30
16 | )
17 |
18 | var (
19 | TIMEOUT = 5000
20 |
21 | PROTOCOL = "TCP"
22 |
23 | // enable log output
24 | VERBOSE = false
25 |
26 | // logic optimization, changed in v0.1.1
27 | FORWARD_WITHOUT_DEC = false
28 | )
29 |
--------------------------------------------------------------------------------
/crypto/chacha20/README.md:
--------------------------------------------------------------------------------
1 | ### chacha20 - ChaCha20
2 | #### Yawning Angel (yawning at schwanenlied dot me)
3 |
4 | Yet another Go ChaCha20 implementation. Everything else I found was slow,
5 | didn't support all the variants I need to use, or relied on cgo to go fast.
6 |
7 | Features:
8 |
9 | * 20 round, 256 bit key only. Everything else is pointless and stupid.
10 | * IETF 96 bit nonce variant.
11 | * XChaCha 24 byte nonce variant.
12 | * SSSE3 and AVX2 support on amd64 targets.
13 | * Incremental encrypt/decrypt support, unlike golang.org/x/crypto/salsa20.
14 |
--------------------------------------------------------------------------------
/logger/logger.go:
--------------------------------------------------------------------------------
1 | package logger
2 |
3 | import (
4 | "fmt"
5 | "iox/option"
6 | "os"
7 | )
8 |
9 | const (
10 | WARN = "[!]"
11 | INFO = "[+]"
12 | SUCCESS = "[*]"
13 | )
14 |
15 | func Info(format string, args ...interface{}) {
16 | if option.VERBOSE {
17 | fmt.Fprintf(os.Stdout, INFO+" "+format+"\n", args...)
18 | }
19 | }
20 |
21 | func Warn(format string, args ...interface{}) {
22 | fmt.Fprintf(os.Stderr, WARN+" "+format+"\n", args...)
23 | }
24 |
25 | func Success(format string, args ...interface{}) {
26 | fmt.Fprintf(os.Stdout, SUCCESS+" "+format+"\n", args...)
27 | }
28 |
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | # This is an example goreleaser.yaml file with some sane defaults.
2 | # Make sure to check the documentation at http://goreleaser.com
3 | before:
4 | hooks:
5 | # you may remove this if you don't use vgo
6 | - go mod download
7 | # you may remove this if you don't need go generate
8 | - go generate ./...
9 | builds:
10 | - env:
11 | - CGO_ENABLED=0
12 | goos:
13 | - windows
14 | - linux
15 | - darwin
16 | goarch:
17 | - amd64
18 | - 386
19 | archives:
20 | - replacements:
21 | darwin: Darwin
22 | linux: Linux
23 | windows: Windows
24 | 386: i386
25 | amd64: x86_64
26 | checksum:
27 | name_template: 'checksums.txt'
28 | snapshot:
29 | name_template: "{{ .Tag }}-next"
30 | changelog:
31 | sort: asc
32 | filters:
33 | exclude:
34 | - '^docs:'
35 | - '^test:'
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 iv4n
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/crypto/chacha20/internal/hardware/impl.go:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // Package hardware provides the hardware accelerated ChaCha20 implementations.
17 | package hardware
18 |
19 | import "iox/crypto/chacha20/internal/api"
20 |
21 | var hardwareImpls []api.Implementation
22 |
23 | // Register appends the implementation(s) to the provided slice, and returns the
24 | // new slice.
25 | func Register(impls []api.Implementation) []api.Implementation {
26 | return append(impls, hardwareImpls...)
27 | }
28 |
--------------------------------------------------------------------------------
/crypto/xchacha20.go:
--------------------------------------------------------------------------------
1 | /*
2 | Third-party chacha20 lib from https://github.com/Yawning/chacha20
3 | */
4 | package crypto
5 |
6 | import (
7 | "crypto/rand"
8 | "iox/crypto/chacha20"
9 | )
10 |
11 | var (
12 | SECRET_KEY []byte
13 | NONCE []byte
14 | )
15 |
16 | func expand32(key []byte) ([]byte, []byte) {
17 | if len(key) >= 0x20 {
18 | return key[:0x20], append(key[:0xC], key[len(key)-0xC:]...)
19 | }
20 |
21 | var c byte = 0x20 - byte(len(key)&0x1F)
22 |
23 | for i := 0; i < int(c); i++ {
24 | key = append(key, c)
25 | }
26 | return key[:0x20], append(key[:0xC], key[len(key)-0xC:]...)
27 | }
28 |
29 | func ExpandKey(key []byte) {
30 | SECRET_KEY, NONCE = expand32(key)
31 | }
32 |
33 | type Cipher struct {
34 | c *chacha20.Cipher
35 | }
36 |
37 | func NewCipherPair() (*Cipher, *Cipher, error) {
38 | ccA, err := chacha20.New(SECRET_KEY, NONCE)
39 | if err != nil {
40 | return nil, nil, err
41 | }
42 | ccB, err := chacha20.New(SECRET_KEY, NONCE)
43 | if err != nil {
44 | return nil, nil, err
45 | }
46 |
47 | return &Cipher{c: ccA}, &Cipher{c: ccB}, nil
48 | }
49 |
50 | func RandomNonce() ([]byte, error) {
51 | iv := make([]byte, 0x18)
52 | _, err := rand.Read(iv)
53 | if err != nil {
54 | return nil, err
55 | }
56 | return iv, nil
57 | }
58 |
59 | func NewCipher(nonce []byte) (*Cipher, error) {
60 | cc, err := chacha20.New(SECRET_KEY, nonce)
61 | if err != nil {
62 | return nil, err
63 | }
64 |
65 | return &Cipher{
66 | c: cc,
67 | }, nil
68 | }
69 |
70 | func (c Cipher) StreamXOR(dst []byte, src []byte) {
71 | c.c.XORKeyStream(dst, src)
72 | }
73 |
--------------------------------------------------------------------------------
/option/parsecli_test.go:
--------------------------------------------------------------------------------
1 | package option
2 |
3 | import "testing"
4 |
5 | func TestParseCli(t *testing.T) {
6 | var mode string
7 | var submode int
8 | var local, remote []string
9 | var lenc, renc []bool
10 | var err error
11 |
12 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", "9999", "-r", "1.1.1.1:8888", "-k", "0001", "-v"})
13 | if mode != "fwd" || submode != SUBMODE_L2R || lenc[0] || renc[0] || local[0] != ":9999" || remote[0] != "1.1.1.1:8888" || err != nil {
14 | t.Error("Error case 1")
15 | }
16 |
17 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", "9999", "-l", "*8888", "-k", "0001", "-v"})
18 | if mode != "fwd" || submode != SUBMODE_L2L || lenc[0] || !lenc[1] || local[0] != ":9999" || local[1] != ":8888" || err != nil {
19 | t.Error("Error case 2")
20 | }
21 |
22 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-r", "*1.1.1.1:9999", "-r", "*1.1.1.1:8888", "-k", "0001", "-v"})
23 | if mode != "fwd" || submode != SUBMODE_R2R || !renc[0] || !renc[1] || remote[0] != "1.1.1.1:9999" || remote[1] != "1.1.1.1:8888" || err != nil {
24 | t.Error(mode, submode, local, remote, lenc, renc, err, "Error case 3")
25 | }
26 |
27 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"proxy", "-r", "*1.1.1.1:9999", "-r", "*1.1.1.1:8888", "-k", "0001", "-v"})
28 | if mode != "proxy" || err != errUnrecognizedSubMode {
29 | t.Error("Error case 4")
30 | }
31 |
32 | mode, submode, local, remote, lenc, renc, err = ParseCli([]string{"fwd", "-l", ":9999", "-r", "1.1.1.1:8888", "-k", "0001", "-h"})
33 | if mode != "fwd" || err != PrintUsage {
34 | t.Error("Error case 5")
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "iox/operate"
6 | "iox/option"
7 | "os"
8 | )
9 |
10 | const VERSION = "0.3"
11 |
12 | func Usage() {
13 | fmt.Printf(
14 | "iox v%v\n"+
15 | " Roaming intranet easier (https://github.com/eddieivan01/iox)\n\n"+
16 | "Usage: iox fwd/proxy [-l [*]PORT] [-r [*]HOST:PORT] [-k HEX] [-t TIMEOUT] [-u] [-h] [-v]\n\n"+
17 | "Options:\n"+
18 | " -l [*]PORT\n"+
19 | " port to listen on. `*` means encrypted socket\n"+
20 | " -r [*]HOST:PORT\n"+
21 | " remote host to connect, HOST can be IP or Domain. `*` means encrypted socket\n"+
22 | " -k HEX\n"+
23 | " hexadecimal format key, be used to generate AES Key and IV\n"+
24 | " -u\n"+
25 | " udp forward mode\n"+
26 | " -t TIMEOUT\n"+
27 | " set connection timeout(millisecond), default is 5000\n"+
28 | " -v\n"+
29 | " enable log output\n"+
30 | " -h\n"+
31 | " print usage then exit\n", VERSION,
32 | )
33 | }
34 |
35 | func main() {
36 | mode, submode, local, remote, lenc, renc, err := option.ParseCli(os.Args[1:])
37 | if err != nil {
38 | if err == option.PrintUsage {
39 | Usage()
40 | } else {
41 | fmt.Println(err.Error())
42 | }
43 | return
44 | }
45 |
46 | switch mode {
47 | case "fwd":
48 | switch submode {
49 | case option.SUBMODE_L2R:
50 | operate.Local2Remote(local[0], remote[0], lenc[0], renc[0])
51 | case option.SUBMODE_L2L:
52 | operate.Local2Local(local[0], local[1], lenc[0], lenc[1])
53 | case option.SUBMODE_R2R:
54 | operate.Remote2Remote(remote[0], remote[1], renc[0], renc[1])
55 | }
56 | case "proxy":
57 | switch submode {
58 | case option.SUBMODE_LP:
59 | operate.ProxyLocal(local[0], lenc[0])
60 | case option.SUBMODE_RP:
61 | operate.ProxyRemote(remote[0], renc[0])
62 | case option.SUBMODE_RPL2L:
63 | operate.ProxyRemoteL2L(local[0], local[1], lenc[0], lenc[1])
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/netio/handler_test.go:
--------------------------------------------------------------------------------
1 | package netio
2 |
3 | import (
4 | "net"
5 | "testing"
6 | )
7 |
8 | func bytesEq(a, b []byte) bool {
9 | if len(a) != len(b) {
10 | return false
11 | }
12 |
13 | for i := 0; i < len(a); i++ {
14 | if a[i] != b[i] {
15 | return false
16 | }
17 | }
18 |
19 | return true
20 | }
21 |
22 | func TestTCPCtx(t *testing.T) {
23 | listener, err := net.Listen("tcp", "127.0.0.1:9999")
24 | if err != nil {
25 | t.Error(err.Error())
26 | }
27 | defer listener.Close()
28 |
29 | buf := make([]byte, 1024)
30 | signal := make(chan struct{}, 1)
31 | msg := "testing message."
32 |
33 | go func() {
34 | server, err := listener.Accept()
35 | if err != nil {
36 | t.Error(err.Error())
37 | }
38 | defer server.Close()
39 |
40 | serverCtx, _ := NewTCPCtx(server, true)
41 | serverCtx.DecryptRead(buf)
42 | signal <- struct{}{}
43 | }()
44 |
45 | client, err := net.Dial("tcp", "127.0.0.1:9999")
46 | if err != nil {
47 | t.Error(err.Error())
48 | }
49 | defer client.Close()
50 |
51 | clientCtx, err := NewTCPCtx(client, true)
52 | if err != nil {
53 | t.Error(err.Error())
54 | }
55 | clientCtx.EncryptWrite([]byte(msg))
56 |
57 | <-signal
58 | if !bytesEq([]byte(msg), buf[:len(msg)]) {
59 | t.Error("TCPCtx error")
60 | }
61 | }
62 |
63 | func TestUDPConn(t *testing.T) {
64 | addr, _ := net.ResolveUDPAddr("udp", ":9999")
65 | l, _ := net.ListenUDP("udp", addr)
66 | lCtx, _ := NewUDPCtx(l, true, false)
67 |
68 | signal := make(chan struct{}, 0)
69 |
70 | go func() {
71 | addr, _ := net.ResolveUDPAddr("udp", "127.0.0.1:9999")
72 | c, _ := net.DialUDP("udp", nil, addr)
73 | cCtx, _ := NewUDPCtx(c, true, true)
74 |
75 | cCtx.EncryptWrite([]byte("testing message."))
76 |
77 | signal <- struct{}{}
78 | }()
79 |
80 | <-signal
81 |
82 | buf := make([]byte, 32)
83 | n, err := lCtx.DecryptRead(buf)
84 | if err != nil {
85 | t.Error(err.Error())
86 | }
87 |
88 | if string(buf[:n]) != "testing message." {
89 | t.Log(buf[:n])
90 | t.Error("UDPCtx Error")
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/crypto/xchacha20_test.go:
--------------------------------------------------------------------------------
1 | package crypto
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | func bytesEq(a, b []byte) bool {
8 | if len(a) != len(b) {
9 | return false
10 | }
11 |
12 | for i := 0; i < len(a); i++ {
13 | if a[i] != b[i] {
14 | return false
15 | }
16 | }
17 |
18 | return true
19 | }
20 |
21 | /*
22 | func TestExpand32(t *testing.T) {
23 | src36 := []byte{
24 | 0, 1, 2, 3, 4, 5, 6, 7,
25 | 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
26 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
27 | 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
28 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
29 | }
30 |
31 | src16 := []byte{
32 | 0, 1, 2, 3, 4, 5, 6, 7,
33 | 8, 9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
34 | }
35 |
36 | src10 := []byte{
37 | 0, 1, 2, 3, 4, 5, 6, 7,
38 | 8, 9,
39 | }
40 |
41 | var key, iv []byte
42 | key, iv = expand32(src36)
43 | if !bytesEq(key, src16) || !bytesEq(iv, []byte{
44 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
45 | 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
46 | }) {
47 | t.Error("src36 error")
48 | }
49 |
50 | key, iv = expand32(src16)
51 | if !bytesEq(key, src16) || !bytesEq(iv, []byte{
52 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
53 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
54 | }) {
55 | t.Error("src16 error")
56 | }
57 |
58 | key, iv = expand32(src10)
59 | if !bytesEq(key, append(src10, []byte{
60 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16,
61 | }...)) || !bytesEq(iv, []byte{
62 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16,
63 | 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16,
64 | }) {
65 | t.Error("src10 error")
66 | }
67 | }
68 | */
69 |
70 | func TestStreamXOR(t *testing.T) {
71 | ExpandKey([]byte("KEY"))
72 | cipherA, cipherB, _ := NewCipherPair()
73 | plain := []byte("testing plain text...")
74 | output1 := make([]byte, len(plain))
75 | cipherA.StreamXOR(output1, plain)
76 |
77 | output2 := make([]byte, len(plain))
78 | cipherB.StreamXOR(output2, output1)
79 |
80 | if !bytesEq(output2, plain) || bytesEq(output1, plain) {
81 | t.Error("AES-CTR error")
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/crypto/chacha20/internal/api/api.go:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // Package api provides the ChaCha20 implementation abstract interface.
17 | package api
18 |
19 | const (
20 | // BlockSize is the size of a ChaCha20 block in bytes.
21 | BlockSize = 64
22 |
23 | // StateSize is the size of the ChaCha20 state as 32 bit unsigned words.
24 | StateSize = 16
25 |
26 | // HashSize is the size of the HChaCha output in bytes.
27 | HashSize = 32
28 |
29 | // HNonceSize is the HChaCha20 nonce size in bytes.
30 | HNonceSize = 16
31 |
32 | // Sigma0 is the first word of the ChaCha constant.
33 | Sigma0 = uint32(0x61707865)
34 |
35 | // Sigma1 is the second word of the ChaCha constant.
36 | Sigma1 = uint32(0x3320646e)
37 |
38 | // Sigma2 is the third word of the ChaCha constant.
39 | Sigma2 = uint32(0x79622d32)
40 |
41 | // Sigma3 is the fourth word of the ChaCha constant.
42 | Sigma3 = uint32(0x6b206574)
43 | )
44 |
45 | // Implementation is a ChaCha20 implementation
46 | type Implementation interface {
47 | // Name returns the name of the implementation.
48 | Name() string
49 |
50 | // Blocks calculates the ChaCha20 blocks. If src is not nil, dst will
51 | // be set to the XOR of src with the key stream, otherwise dst will be
52 | // set to the key stream.
53 | Blocks(x *[StateSize]uint32, dst, src []byte, nrBlocks int)
54 |
55 | // HChaCha calculates the HChaCha20 hash.
56 | //
57 | // Note: `dst` is guaranteed to be HashSize bytes.
58 | HChaCha(key, nonce []byte, dst []byte)
59 | }
60 |
--------------------------------------------------------------------------------
/crypto/chacha20/internal/hardware/impl_amd64.go:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // +build amd64,!noasm
17 |
18 | package hardware
19 |
20 | import (
21 | "iox/crypto/chacha20/internal/api"
22 |
23 | "golang.org/x/sys/cpu"
24 | )
25 |
26 | //go:noescape
27 | func blocksAVX2(s *[api.StateSize]uint32, in, out []byte)
28 |
29 | //go:noescape
30 | func hChaChaAVX2(key, nonce []byte, dst *byte)
31 |
32 | //go:noescape
33 | func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte)
34 |
35 | //go:noescape
36 | func hChaChaSSSE3(key, nonce []byte, dst *byte)
37 |
38 | type implAmd64 struct {
39 | name string
40 |
41 | blocksFn func(*[api.StateSize]uint32, []byte, []byte, int)
42 | hChaChaFn func([]byte, []byte, *byte)
43 | }
44 |
45 | func (impl *implAmd64) Name() string {
46 | return impl.name
47 | }
48 |
49 | func (impl *implAmd64) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
50 | impl.blocksFn(x, dst, src, nrBlocks)
51 | }
52 |
53 | func (impl *implAmd64) HChaCha(key, nonce []byte, dst []byte) {
54 | impl.hChaChaFn(key, nonce, &dst[0])
55 | }
56 |
57 | func blockWrapper(fn func(*[api.StateSize]uint32, []byte, []byte)) func(*[api.StateSize]uint32, []byte, []byte, int) {
58 | return func(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
59 | sz := nrBlocks * api.BlockSize
60 | if src != nil {
61 | fn(x, src[:sz], dst[:sz])
62 | } else {
63 | // Sub-optimal, but the compiler special cases this to an assembly
64 | // optimized runtime.memclrNoHeapPointers, so it's not terrible.
65 | for i := range dst[:sz] {
66 | dst[i] = 0
67 | }
68 | fn(x, dst[:sz], dst[:sz])
69 | }
70 | }
71 | }
72 |
73 | func init() {
74 | if cpu.X86.HasAVX2 {
75 | hardwareImpls = append(hardwareImpls, &implAmd64{
76 | name: "amd64_avx2",
77 | blocksFn: blockWrapper(blocksAVX2),
78 | hChaChaFn: hChaChaAVX2,
79 | })
80 | }
81 | if cpu.X86.HasSSE3 {
82 | hardwareImpls = append(hardwareImpls, &implAmd64{
83 | name: "amd64_ssse3",
84 | blocksFn: blockWrapper(blocksSSSE3),
85 | hChaChaFn: hChaChaSSSE3,
86 | })
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/netio/handler.go:
--------------------------------------------------------------------------------
1 | package netio
2 |
3 | import (
4 | "iox/crypto"
5 | "iox/option"
6 | "net"
7 | )
8 |
9 | type Ctx interface {
10 | DecryptRead(b []byte) (int, error)
11 | EncryptWrite(b []byte) (int, error)
12 |
13 | net.Conn
14 | }
15 |
16 | type TCPCtx struct {
17 | net.Conn
18 | encrypted bool
19 |
20 | // Ensure stream cipher synchronous
21 | encCipher *crypto.Cipher
22 | decCipher *crypto.Cipher
23 | }
24 |
25 | func NewTCPCtx(conn net.Conn, encrypted bool) (*TCPCtx, error) {
26 | // if tc, ok := conn.(*net.TCPConn); ok {
27 | // tc.SetLinger(0)
28 | // }
29 |
30 | encrypted = encrypted && !option.FORWARD_WITHOUT_DEC
31 |
32 | ctx := &TCPCtx{
33 | Conn: conn,
34 | encrypted: encrypted,
35 | }
36 |
37 | if encrypted {
38 | encCipher, decCipher, err := crypto.NewCipherPair()
39 | if err != nil {
40 | return nil, err
41 | }
42 |
43 | ctx.encCipher = encCipher
44 | ctx.decCipher = decCipher
45 | }
46 |
47 | return ctx, nil
48 | }
49 |
50 | func (c *TCPCtx) DecryptRead(b []byte) (int, error) {
51 | n, err := c.Read(b)
52 | if err != nil {
53 | return n, err
54 | }
55 |
56 | if c.encrypted {
57 | c.decCipher.StreamXOR(b[:n], b[:n])
58 | }
59 |
60 | return n, err
61 | }
62 |
63 | func (c *TCPCtx) EncryptWrite(b []byte) (int, error) {
64 | if c.encrypted {
65 | c.encCipher.StreamXOR(b, b)
66 | }
67 | return c.Write(b)
68 | }
69 |
70 | type UDPCtx struct {
71 | *net.UDPConn
72 | encrypted bool
73 | connected bool
74 | remoteAddr *net.UDPAddr
75 |
76 | // sync.Mutex
77 | }
78 |
79 | func NewUDPCtx(conn *net.UDPConn, encrypted bool, connected bool) (*UDPCtx, error) {
80 | encrypted = encrypted && !option.FORWARD_WITHOUT_DEC
81 |
82 | ctx := &UDPCtx{
83 | UDPConn: conn,
84 | encrypted: encrypted,
85 | connected: connected,
86 | }
87 |
88 | return ctx, nil
89 | }
90 |
91 | // Encryption for packet is different from stream
92 | func (c *UDPCtx) DecryptRead(b []byte) (int, error) {
93 | var n int
94 | var err error
95 |
96 | if !c.connected {
97 | var remoteAddr *net.UDPAddr
98 | n, remoteAddr, err = c.ReadFromUDP(b)
99 | if err != nil {
100 | return n, err
101 | }
102 | c.remoteAddr = remoteAddr
103 |
104 | } else {
105 | n, err = c.Read(b)
106 | if err != nil {
107 | return n, err
108 | }
109 | }
110 |
111 | if c.encrypted {
112 | // no nonce, skip this packet
113 | if len(b) < 0x18 {
114 | return 0, nil
115 | }
116 | nonce := b[n-0x18 : n]
117 | b = b[:n-0x18]
118 |
119 | cipher, err := crypto.NewCipher(nonce)
120 | if err != nil {
121 | return 0, err
122 | }
123 |
124 | n -= 0x18
125 | cipher.StreamXOR(b[:n], b[:n])
126 | }
127 |
128 | return n, err
129 | }
130 |
131 | func (c *UDPCtx) EncryptWrite(b []byte) (int, error) {
132 | if c.encrypted {
133 | iv, err := crypto.RandomNonce()
134 | cipher, err := crypto.NewCipher(iv)
135 | if err != nil {
136 | return 0, err
137 | }
138 |
139 | cipher.StreamXOR(b, b)
140 | b = append(b, iv...)
141 | }
142 |
143 | if !c.connected {
144 | return c.WriteTo(b, c.remoteAddr)
145 | }
146 | return c.Write(b)
147 | }
148 |
149 | func (c UDPCtx) IsRemoteAddrRegisted() bool {
150 | return c.remoteAddr != nil
151 | }
152 |
--------------------------------------------------------------------------------
/operate/proxy_ctl.go:
--------------------------------------------------------------------------------
1 | package operate
2 |
3 | import (
4 | "errors"
5 | "io"
6 | "iox/logger"
7 | "iox/option"
8 | "net"
9 | "time"
10 | )
11 |
12 | const (
13 | CTL_HANDSHAKE = iota
14 | CTL_CONNECT_ME
15 | CTL_CLEANUP
16 | CTL_HEARTBEAT
17 |
18 | MAX_CONNECTION = 0x400
19 | CLIENT_HANDSHAKE = 0xC0
20 | SERVER_HANDSHAKE = 0xE0
21 | )
22 |
23 | type Protocol struct {
24 | CMD byte
25 | N byte
26 |
27 | // ACK uint16
28 | }
29 |
30 | var END = []byte{0xEE, 0xFF}
31 |
32 | func serialize(p Protocol) []byte {
33 | buf := make([]byte, 4)
34 | buf[0] = p.CMD
35 | buf[1] = p.N
36 |
37 | buf[2], buf[3] = END[0], END[1]
38 | return buf
39 | }
40 |
41 | func unserialize(b []byte) (*Protocol, error) {
42 | if len(b) < 2 {
43 | return nil, errors.New("Protocol data too short")
44 | }
45 |
46 | p := &Protocol{
47 | CMD: b[0],
48 | N: b[1],
49 | }
50 |
51 | return p, nil
52 | }
53 |
54 | func bytesEq(a, b []byte) bool {
55 | for i := 0; i < len(a); i++ {
56 | if a[i] != b[i] {
57 | return false
58 | }
59 | }
60 |
61 | return true
62 | }
63 |
64 | func readUntilEnd(conn net.Conn) ([]byte, error) {
65 | buf := make([]byte, 1)
66 | output := make([]byte, 0, 4)
67 |
68 | for {
69 | n, err := conn.Read(buf)
70 | if err != nil {
71 | if err == io.EOF {
72 | break
73 | }
74 | return nil, err
75 | }
76 |
77 | if n != 1 {
78 | return nil, errors.New("Transmission error")
79 | }
80 |
81 | output = append(output, buf[0])
82 |
83 | if len(output) >= 2 && bytesEq(END, output[len(output)-2:len(output)]) {
84 | break
85 | }
86 | }
87 |
88 | return output[:2], nil
89 | }
90 |
91 | func serverHandshake(listener net.Listener) net.Conn {
92 | var masterConn net.Conn
93 | var err error
94 | for {
95 | masterConn, err = listener.Accept()
96 | if err != nil {
97 | continue
98 | }
99 |
100 | pb, err := readUntilEnd(masterConn)
101 | if err != nil {
102 | continue
103 | }
104 |
105 | p, err := unserialize(pb)
106 | if err != nil {
107 | continue
108 | }
109 |
110 | if p.CMD == CTL_HANDSHAKE && p.N == CLIENT_HANDSHAKE {
111 | logger.Success("Remote socks5 handshake ok")
112 | masterConn.Write(serialize(Protocol{
113 | CMD: CTL_HANDSHAKE,
114 | N: SERVER_HANDSHAKE,
115 | }))
116 | break
117 | }
118 | }
119 |
120 | return masterConn
121 | }
122 |
123 | func clientHandshake(remote string) (net.Conn, error) {
124 | masterConn, err := net.DialTimeout(
125 | "tcp",
126 | remote,
127 | time.Millisecond*time.Duration(option.TIMEOUT),
128 | )
129 | if err != nil {
130 | return nil, err
131 | }
132 |
133 | masterConn.Write(serialize(Protocol{
134 | CMD: CTL_HANDSHAKE,
135 | N: CLIENT_HANDSHAKE,
136 | }))
137 |
138 | pb, err := readUntilEnd(masterConn)
139 | if err != nil {
140 | return nil, errors.New("Connect to remote forward server error")
141 | }
142 |
143 | p, err := unserialize(pb)
144 | if err != nil {
145 | return nil, errors.New("Connect to remote forward server error")
146 | }
147 | if p.CMD == CTL_HANDSHAKE && p.N == SERVER_HANDSHAKE {
148 | logger.Success("Connect to remote forward server ok")
149 | } else {
150 | return nil, errors.New("Connect to remote forward server error")
151 | }
152 |
153 | return masterConn, nil
154 | }
155 |
--------------------------------------------------------------------------------
/docs/README_CN.md:
--------------------------------------------------------------------------------
1 | # iox
2 |
3 | [English](https://github.com/EddieIvan01/iox) | 中文
4 |
5 | 端口转发 & 内网代理工具,功能类似于`lcx`/`ew`,但是比它们更好
6 |
7 | ## 为什么写iox?
8 |
9 | `lcx`和`ew`是很优秀的工具,但还可以提高
10 |
11 | 在最初使用它们的一段时间里,我都记不住那些复杂的命令行参数,诸如`tran, slave, rcsocks, sssocks`。工具的工作模式很清晰,明明可以用简单的参数表示,为什么他们要设计成这样(特别是`ew`的`-l -d -e -f -g -h`)
12 |
13 | 除此之外,我认为网络编程的逻辑可以优化
14 |
15 | 举个栗子,当运行`lcx -listen 8888 9999`命令时,客户端必须先连`:8888`,再连`:9999`,实际上这两个端口是平等的,在`iox`里则没有这个限制。当运行`lcx -slave 1.1.1.1 8888 1.1.1.1 9999`命令时,`lcx`会串行的连接两个主机,但是并发连接两个主机会更高效,毕竟是纯I/O操作,`iox`就是这样做的
16 |
17 | 更进一步,`iox`提供了流量加密功能。实际上,你可以直接将`iox`当做一个简易的ShadowSocks使用
18 |
19 | `iox`还提供了UDP流量转发的功能
20 |
21 | 当然,因为`iox`是用Go写的,所以静态连接的程序有一点大,原程序有2.2MB(UPX压缩后800KB)
22 |
23 | ## 特性
24 |
25 | + 流量加密(可选)
26 | + 友好的命令行参数
27 | + 逻辑优化
28 | + UDP流量转发
29 |
30 | ## 用法
31 |
32 | 所有的参数都是统一的。`-l/--local`意为监听本地端口;`-r/--remote`意为连接远端主机
33 |
34 | ### 两种模式
35 |
36 | **fwd**:
37 |
38 | 监听 `0.0.0.0:8888` 和`0.0.0.0:9999`,将两个连接间的流量转发
39 |
40 | ```
41 | ./iox fwd -l 8888 -l 9999
42 |
43 |
44 | for lcx:
45 | ./lcx -listen 8888 9999
46 | ```
47 |
48 | 监听`0.0.0.0:8888`,把流量转发到`1.1.1.1:9999`
49 |
50 | ```
51 | ./iox fwd -l 8888 -r 1.1.1.1:9999
52 |
53 |
54 | for lcx:
55 | ./lcx -tran 8888 1.1.1.1 9999
56 | ```
57 |
58 | 连接`1.1.1.1:8888`和`1.1.1.1:9999`, 在两个连接间转发
59 |
60 | ```
61 | ./iox fwd -r 1.1.1.1:8888 -r 1.1.1.1:9999
62 |
63 |
64 | for lcx:
65 | ./lcx -slave 1.1.1.1 8888 1.1.1.1 9999
66 | ```
67 |
68 | **proxy**
69 |
70 | 在本地 `0.0.0.0:1080`启动Socks5服务
71 |
72 | ```
73 | ./iox proxy -l 1080
74 |
75 |
76 | for ew:
77 | ./ew -s ssocksd -l 1080
78 | ```
79 |
80 | 在被控机开启Socks5服务,将服务转发到公网VPS
81 |
82 | 在VPS上转发`0.0.0.0:9999`到`0.0.0.0:1080`
83 |
84 | 你必须将两条命令成对使用,因为它内部包含了一个简单的协议来控制回连
85 |
86 | ```
87 | ./iox proxy -r 1.1.1.1:9999
88 | ./iox proxy -l 9999 -l 1080 // 注意,这两个端口是有顺序的
89 |
90 |
91 | for ew:
92 | ./ew -s rcsocks -l 1080 -e 9999
93 | ./ew -s rssocks -d 1.1.1.1 -e 9999
94 | ```
95 |
96 | 接着连接内网主机
97 |
98 | ```
99 | # proxychains.conf
100 | # socks5://1.1.1.1:1080
101 |
102 | $ proxychains rdesktop 192.168.0.100:3389
103 | ```
104 |
105 | ***
106 |
107 | ### 启用加密
108 |
109 | 举个栗子,我们把内网3389端口转发到VPS
110 |
111 | ```
112 | // 被控主机
113 | ./iox fwd -r 192.168.0.100:3389 -r *1.1.1.1:8888 -k 656565
114 |
115 |
116 | // 我们的VPS
117 | ./iox fwd -l *8888 -l 33890 -k 656565
118 | ```
119 |
120 | 很好理解:被控主机和VPS:8888之间的流量会被加密,预共享的密钥是'AAA',`iox`会用这个密钥生成种子密钥和nonce(**正常来讲,不应该复用nonce。但是考虑到iox的加密功能仅仅为了绕过IDS等设备,为了不额外分配空间,TCP流加密会复用nonce**),并用Xchacha20流加密 (在v0.3版本中用Xchacha20替换掉了AES-CTR)
121 |
122 | 所以,`*`应该成对使用
123 |
124 | ```
125 | ./iox fwd -l 1000 -r *127.0.0.1:1001 -k 000102
126 | ./iox fwd -l *1001 -r *127.0.0.1:1002 -k 000102
127 | ./iox fwd -l *1002 -r *127.0.0.1:1003 -k 000102
128 | ./iox proxy -l *1003 -k 000102
129 |
130 |
131 | $ curl google.com -x socks5://127.0.0.1:1000
132 | ```
133 |
134 | 你也可以把`iox`当做一个简单的ShadowSocks来用:
135 |
136 | ```
137 | // ssserver
138 | ./iox proxy -l *9999 -k 000102
139 |
140 |
141 | // sslocal
142 | ./iox fwd -l 1080 -r *VPS:9999 -k 000102
143 | ```
144 |
145 | ### UDP转发
146 |
147 | 只需要添加命令行参数:`-u`
148 |
149 | ```
150 | ./iox fwd -l 53 -r *127.0.0.1:8888 -k 000102 -u
151 | ./iox fwd -l *8888 -l *9999 -k 000102 -u
152 | ./iox fwd -r *127.0.0.1:9999 -r 8.8.8.8:53 -k 000102 -u
153 | ```
154 |
155 | **注意:当你做多级连接的转发时,`Remote2Remote-UDP-mode`必须最后一个被启动,也就是上面示例中的第三条**
156 |
157 | UDP转发可能会有一些不合你预期的行为。实际上,目前在GitHub上只有将本地监听的UDP流量转发到远程主机的例子,所以我只能以我的理解来实现
158 |
159 | 你可以在源码里找到答案,如果你有什么想法,欢迎提PR / issue
160 |
161 | ## 许可
162 |
163 | The MIT license
164 |
165 |
--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | v0.3:
2 | 1. Replace AES-CTR with XChaCha20.
3 | Because in Golang, only AES-GCM on AMD64/ARM64 has special assembly-speed-up
4 | optimization. In summary, in Golang, except for the AES-GCM on AMD64/ARM64 architecture,
5 | all AES-XXX else are slower than ChaCha20 (look at the benchmark below).
6 | Also, AES-GCM in Golang's implementation is not a stream operator, and needs
7 | 2X alloc overhead, so it doesn't fit.
8 | Considering all, ChaCha20 is the better choice.
9 | The benchmark:
10 | goos: windows
11 | goarch: amd64
12 | BenchmarkAESGCMSeal1K-4 3880944 308 ns/op 3326.14 MB/s
13 | BenchmarkAESGCMOpen1K-4 4092999 290 ns/op 3525.53 MB/s
14 | BenchmarkAESGCMSeal8K-4 600762 1849 ns/op 4429.61 MB/s
15 | BenchmarkAESGCMOpen8K-4 632307 1828 ns/op 4481.21 MB/s
16 | BenchmarkAESGCMSeal32K-4 164750 7034 ns/op 4658.32 MB/s
17 | BenchmarkAESGCMOpen32K-4 169389 6871 ns/op 4768.88 MB/s
18 | BenchmarkAESCTR1K-4 752043 1408 ns/op 723.51 MB/s
19 | BenchmarkAESCTR8K-4 105513 11078 ns/op 739.05 MB/s
20 | BenchmarkAESCTR32K-4 26914 44505 ns/op 736.17 MB/s
21 | BenchmarkChacha201K-4 1000000 1083 ns/op 940.83 MB/s
22 | BenchmarkChacha208K-4 300565 3942 ns/op 2076.80 MB/s
23 | BenchmarkChacha2032K-4 85921 13720 ns/op 2387.96 MB/s
24 |
25 | goos: windows
26 | goarch: 386
27 | BenchmarkAESGCMSeal1K-4 56220 21181 ns/op 48.35 MB/s
28 | BenchmarkAESGCMOpen1K-4 56224 21535 ns/op 47.55 MB/s
29 | BenchmarkAESGCMSeal8K-4 6332 166170 ns/op 49.30 MB/s
30 | BenchmarkAESGCMOpen8K-4 7063 167895 ns/op 48.79 MB/s
31 | BenchmarkAESGCMSeal32K-4 1693 667421 ns/op 49.10 MB/s
32 | BenchmarkAESGCMOpen32K-4 1718 660650 ns/op 49.60 MB/s
33 | BenchmarkAESCTR1K-4 132240 9035 ns/op 112.78 MB/s
34 | BenchmarkAESCTR8K-4 16527 72232 ns/op 113.34 MB/s
35 | BenchmarkAESCTR32K-4 4009 288576 ns/op 113.53 MB/s
36 | BenchmarkChacha201K-4 343777 3426 ns/op 297.41 MB/s
37 | BenchmarkChacha208K-4 48712 24447 ns/op 334.89 MB/s
38 | BenchmarkChacha2032K-4 12442 95950 ns/op 341.46 MB/s
39 |
40 | 2. Increse the TCP_BUFFER_SIZE to 0x8000
41 |
42 | 3. Fix a bug in UDP forward
43 |
44 |
45 | v0.2.1:
46 | 1. Add heartbeat for remote-proxy's ctl-connection, to prevent
47 | the NAT device drops mapping rules
48 |
49 | 2. Reduce `Remote2Remote` function's retry frequency
50 |
51 |
52 | v0.2:
53 | 1. Add UDP forward mode, CLI option: `-u`
54 |
55 |
56 | v0.1.1:
57 | 1. Logic optimization, while both two connections are encrypted,
58 | traffic will be forwarded without additional encryption and decryption
59 |
60 | 2. Made some little improvements
61 |
--------------------------------------------------------------------------------
/operate/fwd_test.go:
--------------------------------------------------------------------------------
1 | package operate
2 |
3 | import (
4 | "iox/netio"
5 | "net"
6 | "testing"
7 | "time"
8 | )
9 |
10 | // run forever
11 | func testLocal2Local(t *testing.T) {
12 | msgA := "FROM A"
13 | msgB := "FROM B"
14 |
15 | bufA := make([]byte, 1024)
16 | bufB := make([]byte, 1024)
17 |
18 | go func() {
19 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3)
20 | if err != nil {
21 | t.Error(err.Error())
22 | }
23 | defer localA.Close()
24 |
25 | localCtxA, err := netio.NewTCPCtx(localA, true)
26 | if err != nil {
27 | t.Error(err.Error())
28 | }
29 |
30 | localCtxA.EncryptWrite([]byte(msgA))
31 | localCtxA.DecryptRead(bufA)
32 | }()
33 |
34 | go func() {
35 | localB, err := net.DialTimeout("tcp", "127.0.0.1:8888", time.Second*3)
36 | if err != nil {
37 | t.Error(err.Error())
38 | }
39 | defer localB.Close()
40 |
41 | localCtxB, err := netio.NewTCPCtx(localB, true)
42 | if err != nil {
43 | t.Error(err.Error())
44 | }
45 |
46 | localCtxB.EncryptWrite([]byte(msgB))
47 | localCtxB.DecryptRead(bufB)
48 | }()
49 |
50 | Local2Local(":9999", ":8888", true, true)
51 |
52 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA {
53 | t.Error("Local2Local error")
54 | }
55 | }
56 |
57 | func TestRemote2Remote(t *testing.T) {
58 | msgA := "FROM A"
59 | msgB := "FROM B"
60 |
61 | bufA := make([]byte, 1024)
62 | bufB := make([]byte, 1024)
63 |
64 | go func() {
65 | listenerA, err := net.Listen("tcp", ":9999")
66 | if err != nil {
67 | t.Error(err.Error())
68 | }
69 | defer listenerA.Close()
70 |
71 | connA, err := listenerA.Accept()
72 | if err != nil {
73 | t.Error(err.Error())
74 | }
75 | defer connA.Close()
76 |
77 | connCtxA, err := netio.NewTCPCtx(connA, true)
78 | if err != nil {
79 | t.Error(err.Error())
80 | }
81 |
82 | connCtxA.EncryptWrite([]byte(msgA))
83 | connCtxA.DecryptRead(bufA)
84 | }()
85 |
86 | go func() {
87 | listenerB, err := net.Listen("tcp", ":8888")
88 | if err != nil {
89 | t.Error(err.Error())
90 | }
91 | defer listenerB.Close()
92 |
93 | connB, err := listenerB.Accept()
94 | if err != nil {
95 | t.Error(err.Error())
96 | }
97 | defer connB.Close()
98 |
99 | connCtxB, err := netio.NewTCPCtx(connB, true)
100 | if err != nil {
101 | t.Error(err.Error())
102 | }
103 |
104 | connCtxB.EncryptWrite([]byte(msgB))
105 | connCtxB.DecryptRead(bufB)
106 | }()
107 |
108 | Remote2Remote("127.0.0.1:9999", "127.0.0.1:8888", true, true)
109 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA {
110 | t.Error("Remote2Remote error")
111 | }
112 | }
113 |
114 | // run forever
115 | func testLocal2Remote(t *testing.T) {
116 | msgA := "FROM A"
117 | msgB := "FROM B"
118 |
119 | bufA := make([]byte, 1024)
120 | bufB := make([]byte, 1024)
121 |
122 | go func() {
123 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3)
124 | if err != nil {
125 | t.Error(err.Error())
126 | }
127 | defer localA.Close()
128 |
129 | localCtxA, err := netio.NewTCPCtx(localA, true)
130 | if err != nil {
131 | t.Error(err.Error())
132 | }
133 |
134 | localCtxA.EncryptWrite([]byte(msgA))
135 | localCtxA.DecryptRead(bufA)
136 | }()
137 |
138 | go func() {
139 | listenerB, err := net.Listen("tcp", ":8888")
140 | if err != nil {
141 | t.Error(err.Error())
142 | }
143 | defer listenerB.Close()
144 |
145 | connB, err := listenerB.Accept()
146 | if err != nil {
147 | t.Error(err.Error())
148 | }
149 | defer connB.Close()
150 |
151 | connCtxB, err := netio.NewTCPCtx(connB, true)
152 | if err != nil {
153 | t.Error(err.Error())
154 | }
155 |
156 | connCtxB.EncryptWrite([]byte(msgB))
157 | connCtxB.DecryptRead(bufB)
158 | }()
159 |
160 | Local2Remote(":9999", "127.0.0.1:8888", true, true)
161 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA {
162 | t.Error("Remote2Remote error")
163 | }
164 | }
165 |
--------------------------------------------------------------------------------
/option/parsecli.go:
--------------------------------------------------------------------------------
1 | package option
2 |
3 | import (
4 | "encoding/hex"
5 | "errors"
6 | "iox/crypto"
7 | "strconv"
8 | )
9 |
10 | var (
11 | errUnrecognizedMode = errors.New("Unrecognized mode")
12 | errHexDecodeError = errors.New("Not hexadecimal string")
13 | PrintUsage = errors.New("")
14 | errUnrecognizedSubMode = errors.New("Malform args")
15 | errNoSecretKey = errors.New("Must provide secret key")
16 | errNotANumber = errors.New("Timeout must be a number")
17 | errUDPMode = errors.New("UDP mode only support fwd mode")
18 | )
19 |
20 | const (
21 | SUBMODE_L2L = iota
22 | SUBMODE_R2R
23 | SUBMODE_L2R
24 |
25 | SUBMODE_LP
26 | SUBMODE_RP
27 | SUBMODE_RPL2L
28 | )
29 |
30 | // Dont need flag-lib
31 | func ParseCli(args []string) (
32 | mode string,
33 | submode int,
34 | local []string,
35 | remote []string,
36 | lenc []bool,
37 | renc []bool,
38 | err error) {
39 |
40 | if len(args) == 0 {
41 | err = PrintUsage
42 | return
43 | }
44 |
45 | mode = args[0]
46 |
47 | switch mode {
48 | case "fwd", "proxy":
49 | case "-h", "--help":
50 | err = PrintUsage
51 | return
52 | default:
53 | err = errUnrecognizedMode
54 | return
55 | }
56 |
57 | args = args[1:]
58 | ptr := 0
59 |
60 | for {
61 | if ptr == len(args) {
62 | break
63 | }
64 |
65 | switch args[ptr] {
66 | case "-l", "--local":
67 | l := args[ptr+1]
68 | if l[0] == '*' {
69 | lenc = append(lenc, true)
70 | l = l[1:]
71 | } else {
72 | lenc = append(lenc, false)
73 | }
74 |
75 | local = append(local, ":"+l)
76 | ptr++
77 |
78 | case "-r", "--remote":
79 | r := args[ptr+1]
80 | if r[0] == '*' {
81 | renc = append(renc, true)
82 | r = r[1:]
83 | } else {
84 | renc = append(renc, false)
85 | }
86 |
87 | remote = append(remote, r)
88 | ptr++
89 |
90 | case "-u", "--udp":
91 | PROTOCOL = "UDP"
92 |
93 | case "-k", "--key":
94 | var key []byte
95 | key, err = hex.DecodeString(args[ptr+1])
96 | if err != nil {
97 | err = errHexDecodeError
98 | return
99 | }
100 | crypto.ExpandKey(key)
101 | ptr++
102 |
103 | case "-t", "--timeout":
104 | TIMEOUT, err = strconv.Atoi(args[ptr+1])
105 | if err != nil {
106 | err = errNotANumber
107 | return
108 | }
109 | ptr++
110 | case "-v", "--verbose":
111 | VERBOSE = true
112 | case "-h", "--help":
113 | err = PrintUsage
114 | return
115 | }
116 |
117 | ptr++
118 | }
119 |
120 | if mode == "fwd" {
121 | switch {
122 | case len(local) == 0 && len(remote) == 2:
123 | submode = SUBMODE_R2R
124 | case len(local) == 1 && len(remote) == 1:
125 | submode = SUBMODE_L2R
126 | case len(local) == 2 && len(remote) == 0:
127 | submode = SUBMODE_L2L
128 | default:
129 | err = errUnrecognizedSubMode
130 | return
131 | }
132 | } else {
133 | switch {
134 | case len(local) == 0 && len(remote) == 1:
135 | submode = SUBMODE_RP
136 | case len(local) == 1 && len(remote) == 0:
137 | submode = SUBMODE_LP
138 | case len(local) == 2 && len(remote) == 0:
139 | submode = SUBMODE_RPL2L
140 | default:
141 | err = errUnrecognizedSubMode
142 | return
143 | }
144 | }
145 |
146 | if len(lenc) != len(local) || len(renc) != len(remote) {
147 | err = errUnrecognizedSubMode
148 | return
149 | }
150 |
151 | if crypto.SECRET_KEY == nil {
152 | for i, _ := range lenc {
153 | if lenc[i] {
154 | err = errNoSecretKey
155 | return
156 | }
157 | }
158 |
159 | for i, _ := range renc {
160 | if renc[i] {
161 | err = errNoSecretKey
162 | return
163 | }
164 | }
165 | }
166 |
167 | if PROTOCOL == "UDP" && mode == "proxy" {
168 | err = errUDPMode
169 | return
170 | }
171 |
172 | shouldFwdWithoutDec(lenc, renc)
173 |
174 | return
175 | }
176 |
177 | func shouldFwdWithoutDec(lenc []bool, renc []bool) {
178 | if len(lenc)+len(renc) != 2 {
179 | return
180 | }
181 |
182 | var result uint8
183 | for i, _ := range lenc {
184 | if lenc[i] {
185 | result++
186 | }
187 | }
188 |
189 | for i, _ := range renc {
190 | if renc[i] {
191 | result++
192 | }
193 | }
194 |
195 | if result == 2 {
196 | FORWARD_WITHOUT_DEC = true
197 | }
198 | }
199 |
--------------------------------------------------------------------------------
/netio/forward_test.go:
--------------------------------------------------------------------------------
1 | package netio
2 |
3 | import (
4 | "bytes"
5 | "iox/crypto"
6 | "iox/option"
7 | "net"
8 | "testing"
9 | "time"
10 | )
11 |
12 | type _buffer struct {
13 | bytes.Buffer
14 | }
15 |
16 | func (b *_buffer) EncryptWrite(bs []byte) (int, error) { return b.Write(bs) }
17 | func (b *_buffer) DecryptRead(bs []byte) (int, error) { return b.Read(bs) }
18 | func (b _buffer) Close() error { return nil }
19 | func (b _buffer) LocalAddr() net.Addr { return nil }
20 | func (b _buffer) RemoteAddr() net.Addr { return nil }
21 | func (b _buffer) SetDeadline(t time.Time) error { return nil }
22 | func (b _buffer) SetReadDeadline(t time.Time) error { return nil }
23 | func (b _buffer) SetWriteDeadline(t time.Time) error { return nil }
24 |
25 | func TestCipherCopy(t *testing.T) {
26 | option.KEY = []byte("KEY")
27 | crypto.ExpandKey(option.KEY)
28 |
29 | listener, err := net.Listen("tcp", "127.0.0.1:9999")
30 | if err != nil {
31 | t.Error(err.Error())
32 | }
33 | defer listener.Close()
34 |
35 | buf := &_buffer{}
36 |
37 | signal := make(chan struct{}, 1)
38 | go func() {
39 | localConn, err := listener.Accept()
40 | if err != nil {
41 | t.Error(err.Error())
42 | }
43 |
44 | localConnCtx, err := NewTCPCtx(localConn, true)
45 | if err != nil {
46 | t.Error(err.Error())
47 | }
48 |
49 | CipherCopy(buf, localConnCtx)
50 | signal <- struct{}{}
51 | }()
52 |
53 | conn, err := net.Dial("tcp", "127.0.0.1:9999")
54 | if err != nil {
55 | t.Error(err.Error())
56 | }
57 |
58 | connCtx, err := NewTCPCtx(conn, true)
59 | if err != nil {
60 | t.Error(err.Error())
61 | }
62 |
63 | msg := "testing message."
64 | _, err = connCtx.EncryptWrite([]byte(msg))
65 | if err != nil {
66 | t.Error(err.Error())
67 | }
68 | conn.Close()
69 |
70 | <-signal
71 | if buf.String() != msg {
72 | t.Log(buf.Bytes())
73 | t.Error("CipherCopy error")
74 | }
75 | }
76 |
77 | func TestPipeForward(t *testing.T) {
78 | option.KEY = []byte("KEY")
79 | crypto.ExpandKey(option.KEY)
80 | listenerA, err := net.Listen("tcp", "127.0.0.1:9999")
81 | if err != nil {
82 | t.Error(err.Error())
83 | }
84 | defer listenerA.Close()
85 |
86 | listenerB, err := net.Listen("tcp", "127.0.0.1:8888")
87 | if err != nil {
88 | t.Error(err.Error())
89 | }
90 | defer listenerB.Close()
91 |
92 | var connA, connB net.Conn
93 | signal := make(chan struct{}, 1)
94 |
95 | msgA := "FROM A"
96 | msgB := "FROM B"
97 |
98 | bufA := make([]byte, 1024)
99 | bufB := make([]byte, 1024)
100 |
101 | go func() {
102 | localA, err := net.DialTimeout("tcp", "127.0.0.1:9999", time.Second*3)
103 | if err != nil {
104 | t.Error(err.Error())
105 | }
106 | defer localA.Close()
107 |
108 | localCtxA, err := NewTCPCtx(localA, true)
109 | if err != nil {
110 | t.Error(err.Error())
111 | }
112 |
113 | localCtxA.EncryptWrite([]byte(msgA))
114 | localCtxA.DecryptRead(bufA)
115 |
116 | signal <- struct{}{}
117 | }()
118 |
119 | go func() {
120 | localB, err := net.DialTimeout("tcp", "127.0.0.1:8888", time.Second*3)
121 | if err != nil {
122 | t.Error(err.Error())
123 | }
124 | defer localB.Close()
125 |
126 | localCtxB, err := NewTCPCtx(localB, true)
127 | if err != nil {
128 | t.Error(err.Error())
129 | }
130 |
131 | localCtxB.EncryptWrite([]byte(msgB))
132 | localCtxB.DecryptRead(bufB)
133 |
134 | signal <- struct{}{}
135 | }()
136 |
137 | go func() {
138 | var err error
139 | connA, err = listenerA.Accept()
140 | if err != nil {
141 | t.Error(err.Error())
142 | }
143 | signal <- struct{}{}
144 | }()
145 |
146 | go func() {
147 | var err error
148 | connB, err = listenerB.Accept()
149 | if err != nil {
150 | t.Error(err.Error())
151 | }
152 | signal <- struct{}{}
153 | }()
154 |
155 | <-signal
156 | <-signal
157 |
158 | connCtxA, err := NewTCPCtx(connA, true)
159 | if err != nil {
160 | t.Error(err.Error())
161 | }
162 |
163 | connCtxB, err := NewTCPCtx(connB, true)
164 | if err != nil {
165 | t.Error(err.Error())
166 | }
167 |
168 | PipeForward(connCtxA, connCtxB)
169 |
170 | <-signal
171 | <-signal
172 |
173 | if string(bufA[:len(msgB)]) != msgB || string(bufB[:len(msgA)]) != msgA {
174 | t.Error("PipeForward error")
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/netio/forward.go:
--------------------------------------------------------------------------------
1 | package netio
2 |
3 | import (
4 | "io"
5 | "iox/logger"
6 | "iox/option"
7 | )
8 |
9 | // Memory optimized
10 | func CipherCopy(dst Ctx, src Ctx) (int64, error) {
11 | buffer := make([]byte, option.TCP_BUFFER_SIZE)
12 | var written int64
13 | var err error
14 |
15 | for {
16 | var nr int
17 | var er error
18 |
19 | nr, er = src.DecryptRead(buffer)
20 |
21 | if nr > 0 {
22 | logger.Info(" <== [%d bytes] ==", nr)
23 |
24 | var nw int
25 | var ew error
26 |
27 | nw, ew = dst.EncryptWrite(buffer[:nr])
28 |
29 | if nw > 0 {
30 | logger.Info(" == [%d bytes] ==> ", nw)
31 | written += int64(nw)
32 | }
33 | if ew != nil {
34 | err = ew
35 | break
36 | }
37 | if nr != nw {
38 | err = io.ErrShortWrite
39 | break
40 | }
41 | }
42 | if er != nil {
43 | if er != io.EOF {
44 | err = er
45 | }
46 | break
47 | }
48 | }
49 |
50 | return written, err
51 | }
52 |
53 | func PipeForward(ctxA Ctx, ctxB Ctx) {
54 | signal := make(chan struct{}, 1)
55 |
56 | go func() {
57 | CipherCopy(ctxA, ctxB)
58 | signal <- struct{}{}
59 | }()
60 |
61 | go func() {
62 | CipherCopy(ctxB, ctxA)
63 | signal <- struct{}{}
64 | }()
65 |
66 | <-signal
67 | }
68 |
69 | // This function will run forever
70 | // If need to do performance optimization in future,
71 | // I will consider a go-routine pool here, but
72 | // this can lead to mutex-lock overhead
73 | func ForwardUDP(ctxA Ctx, ctxB Ctx) {
74 | go func() {
75 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE)
76 | for {
77 | nr, _ := ctxA.DecryptRead(buffer)
78 | if nr > 0 {
79 | if nr == 4 &&
80 | buffer[0] == 0xCC && buffer[1] == 0xDD &&
81 | buffer[2] == 0xEE && buffer[3] == 0xFF {
82 | continue
83 | }
84 | logger.Info(" <== [%d bytes] ==", nr)
85 | nw, _ := ctxB.EncryptWrite(buffer[:nr])
86 | if nw > 0 {
87 | logger.Info(" == [%d bytes] ==>", nw)
88 | }
89 | }
90 | }
91 | }()
92 |
93 | go func() {
94 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE)
95 | for {
96 | nr, _ := ctxB.DecryptRead(buffer)
97 | if nr > 0 {
98 | if nr == 4 &&
99 | buffer[0] == 0xCC && buffer[1] == 0xDD &&
100 | buffer[2] == 0xEE && buffer[3] == 0xFF {
101 | continue
102 | }
103 | logger.Info(" <== [%d bytes] ==", nr)
104 | nw, _ := ctxA.EncryptWrite(buffer[:nr])
105 | if nw > 0 {
106 | logger.Info(" == [%d bytes] ==>", nw)
107 | }
108 | }
109 | }
110 | }()
111 |
112 | select {}
113 | }
114 |
115 | var UDP_INIT_PACKET = []byte{
116 | 0xCC, 0xDD, 0xEE, 0xFF,
117 | }
118 |
119 | // Each socket only writes the packet to the address
120 | // which last sent packet to it recently,
121 | // instead of boardcasting to all the address.
122 | // I think it is as expected
123 | func ForwardUnconnectedUDP(ctxA Ctx, ctxB Ctx) {
124 | addrRegistedA := false
125 | addrRegistedB := false
126 | addrRegistedSignalA := make(chan struct{}, 1)
127 | addrRegistedSignalB := make(chan struct{}, 1)
128 |
129 | packetChannelA := make(chan []byte, option.UDP_PACKET_CHANNEL_SIZE)
130 | packetChannelB := make(chan []byte, option.UDP_PACKET_CHANNEL_SIZE)
131 |
132 | // A read
133 | go func() {
134 | for {
135 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE)
136 | nr, _ := ctxA.DecryptRead(buffer)
137 | if nr > 0 {
138 | if !addrRegistedA {
139 | addrRegistedA = true
140 | addrRegistedSignalA <- struct{}{}
141 | }
142 |
143 | if !(nr == 4 &&
144 | buffer[0] == 0xCC && buffer[1] == 0xDD &&
145 | buffer[2] == 0xEE && buffer[3] == 0xFF) {
146 | logger.Info(" <== [%d bytes] ==", nr)
147 | packetChannelB <- buffer[:nr]
148 | }
149 | }
150 | }
151 | }()
152 |
153 | // B read
154 | go func() {
155 | for {
156 | buffer := make([]byte, option.UDP_PACKET_MAX_SIZE)
157 | nr, _ := ctxB.DecryptRead(buffer)
158 | if nr > 0 {
159 | if !addrRegistedB {
160 | addrRegistedB = true
161 | addrRegistedSignalB <- struct{}{}
162 | }
163 |
164 | if !(nr == 4 &&
165 | buffer[0] == 0xCC && buffer[1] == 0xDD &&
166 | buffer[2] == 0xEE && buffer[3] == 0xFF) {
167 | logger.Info(" <== [%d bytes] ==", nr)
168 | packetChannelA <- buffer[:nr]
169 | }
170 | }
171 | }
172 | }()
173 |
174 | // A write
175 | go func() {
176 | <-addrRegistedSignalA
177 | var n int
178 | for {
179 | packet := <-packetChannelA
180 | n, _ = ctxA.EncryptWrite(packet)
181 | if n > 0 {
182 | logger.Info(" == [%d bytes] ==>", n)
183 | }
184 | }
185 | }()
186 |
187 | // B write
188 | go func() {
189 | <-addrRegistedSignalB
190 | var n int
191 | for {
192 | packet := <-packetChannelB
193 | n, _ = ctxB.EncryptWrite(packet)
194 | if n > 0 {
195 | logger.Info(" == [%d bytes] ==>", n)
196 | }
197 | }
198 | }()
199 |
200 | select {}
201 | }
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # iox
2 |
3 | English | [中文](https://github.com/EddieIvan01/iox/tree/master/docs/README_CN.md)
4 |
5 | Tool for port forward & intranet proxy, just like `lcx`/`ew`, but better
6 |
7 | ## Why write?
8 |
9 | `lcx` and `ew` are awesome, but can be improved.
10 |
11 | when I first used them, I can't remember these complicated parameters for a long time, such as `tran, slave, rcsocks, sssocks...`. The work mode is clear, why do they design parameters like this(especially `ew`'s `-l -d -e -f -g -h`)
12 |
13 | Besides, I think the net programming logic could be optimized.
14 |
15 | For example, while running `lcx -listen 8888 9999` command, client must connect to `:8888` first, then `:9999`, in `iox`, there's no limit to the order in two ports. And while running `lcx -slave 1.1.1.1 8888 1.1.1.1 9999` command, `lcx` will connect two hosts serially, but it's more efficient to connect in concurrent, as `iox` does.
16 |
17 | What's more, `iox` provides traffic encryption feature. Actually, you can use `iox` as a simple ShadowSocks.
18 |
19 | And `iox` also provides UDP traffic forward.
20 |
21 | Of course, because `iox` is written in Go, the static-link-program is a little large, raw program is 2.2MB (800KB after UPX compression)
22 |
23 | ## Feature
24 |
25 | + traffic encryption (optional)
26 | + humanized CLI option
27 | + logic optimization
28 | + UDP traffic forward
29 |
30 | ## Usage
31 |
32 | You can see, all params are uniform. `-l/--local` means listen on a local port; `-r/--remote` means connect to remote host
33 |
34 | ### Two mode
35 |
36 | **fwd**:
37 |
38 | Listen on `0.0.0.0:8888` and `0.0.0.0:9999`, forward traffic between 2 connections
39 |
40 | ```
41 | ./iox fwd -l 8888 -l 9999
42 |
43 |
44 | for lcx:
45 | ./lcx -listen 8888 9999
46 | ```
47 |
48 | Listen on `0.0.0.0:8888`, forward traffic to `1.1.1.1:9999`
49 |
50 | ```
51 | ./iox fwd -l 8888 -r 1.1.1.1:9999
52 |
53 |
54 | for lcx:
55 | ./lcx -tran 8888 1.1.1.1 9999
56 | ```
57 |
58 | Connect `1.1.1.1:8888` and `1.1.1.1:9999`, forward between 2 connection
59 |
60 | ```
61 | ./iox fwd -r 1.1.1.1:8888 -r 1.1.1.1:9999
62 |
63 |
64 | for lcx:
65 | ./lcx -slave 1.1.1.1 8888 1.1.1.1 9999
66 | ```
67 |
68 | **proxy**
69 |
70 | Start Socks5 server on `0.0.0.0:1080`
71 |
72 | ```
73 | ./iox proxy -l 1080
74 |
75 |
76 | for ew:
77 | ./ew -s ssocksd -l 1080
78 | ```
79 |
80 | Start Socks5 server on be-controlled host, then forward to internet VPS
81 |
82 | VPS forward `0.0.0.0:9999` to `0.0.0.0:1080`
83 |
84 | You must use in pair, because it contains a simple protocol to control connecting back
85 |
86 | ```
87 | ./iox proxy -r 1.1.1.1:9999
88 | ./iox proxy -l 9999 -l 1080 // notice, the two port are in order
89 |
90 |
91 | for ew:
92 | ./ew -s rcsocks -l 1080 -e 9999
93 | ./ew -s rssocks -d 1.1.1.1 -e 9999
94 | ```
95 |
96 | Then connect intranet host
97 |
98 | ```
99 | # proxychains.conf
100 | # socks5://1.1.1.1:1080
101 |
102 | $ proxychains rdesktop 192.168.0.100:3389
103 | ```
104 |
105 | ***
106 |
107 | ### Enable encryption
108 |
109 | For example, we forward 3389 port in intranet to our VPS
110 |
111 | ```
112 | // be-controller host
113 | ./iox fwd -r 192.168.0.100:3389 -r *1.1.1.1:8888 -k 656565
114 |
115 |
116 | // our VPS
117 | ./iox fwd -l *8888 -l 33890 -k 656565
118 | ```
119 |
120 | It's easy to understand: traffic between be-controlled host and our VPS:8888 will be encrypted, the pre-shared secret key is 'AAA', `iox` will use it to generate seed key and nonce **(Normally, nonce shouldn't be reused. But consider that iox's encryption is only for bypassing IDS, in order not to allocate extra space, the TCP stream encryption will reuse the nonce)**, then encrypt with Xchacha20 (replace AES-CTR with Xchacha20 in v0.3 version)
121 |
122 | So, the `*` should be used in pairs
123 |
124 | ```
125 | ./iox fwd -l 1000 -r *127.0.0.1:1001 -k 000102
126 | ./iox fwd -l *1001 -r *127.0.0.1:1002 -k 000102
127 | ./iox fwd -l *1002 -r *127.0.0.1:1003 -k 000102
128 | ./iox proxy -l *1003 -k 000102
129 |
130 |
131 | $ curl google.com -x socks5://127.0.0.1:1000
132 | ```
133 |
134 | Using `iox` as a simple ShadowSocks
135 |
136 | ```
137 | // ssserver
138 | ./iox proxy -l *9999 -k 000102
139 |
140 |
141 | // sslocal
142 | ./iox fwd -l 1080 -r *VPS:9999 -k 000102
143 | ```
144 |
145 | ### UDP forward
146 |
147 | Only need to add CLI option `-u`
148 |
149 | ```
150 | ./iox fwd -l 53 -r *127.0.0.1:8888 -k 000102 -u
151 | ./iox fwd -l *8888 -l *9999 -k 000102 -u
152 | ./iox fwd -r *127.0.0.1:9999 -r 8.8.8.8:53 -k 000102 -u
153 | ```
154 |
155 | **NOTICE: When you make a multistage connection, the `Remote2Remote-UDP-mode` must be started last, which is the No.3 command in above example**
156 |
157 | UDP forwarding may have behavior that is not as you expected. Actually, on GitHub now, there are only examples of forwarding a local listener to a remote host, so I can only implement them with my understanding
158 |
159 | You can find why in the source code. If you have any ideas, PR / issue are welcomed
160 |
161 | ## License
162 |
163 | The MIT license
164 |
165 |
--------------------------------------------------------------------------------
/operate/proxy.go:
--------------------------------------------------------------------------------
1 | package operate
2 |
3 | import (
4 | "iox/logger"
5 | "iox/netio"
6 | "iox/option"
7 | "iox/socks5"
8 | "net"
9 | "os"
10 | "os/signal"
11 | "time"
12 | )
13 |
14 | // local is :port
15 | func ProxyLocal(local string, encrypted bool) {
16 | listener, err := net.Listen("tcp", local)
17 | if err != nil {
18 | logger.Warn(
19 | "Socks5 listen on %s error: %s",
20 | local, err.Error(),
21 | )
22 | return
23 | }
24 |
25 | logger.Success("Start socks5 server on %s", local)
26 |
27 | for {
28 | conn, err := listener.Accept()
29 | if err != nil {
30 | logger.Warn(
31 | "Socks5 handle local connect error: %s",
32 | err.Error(),
33 | )
34 | continue
35 | }
36 |
37 | go func() {
38 | defer conn.Close()
39 | connCtx, err := netio.NewTCPCtx(conn, encrypted)
40 | if err != nil {
41 | return
42 | }
43 |
44 | socks5.HandleConnection(connCtx)
45 | }()
46 | }
47 | }
48 |
49 | // remote is domain:port or ip:port
50 | func ProxyRemote(remote string, encrypted bool) {
51 | masterConn, err := clientHandshake(remote)
52 | if err != nil {
53 | logger.Warn(err.Error())
54 | return
55 | }
56 |
57 | connectRequest := make(chan uint8, MAX_CONNECTION/2)
58 | defer close(connectRequest)
59 |
60 | endSignal := make(chan struct{})
61 |
62 | // handle ctrl+C and send heartbeat packets periodically
63 | {
64 | sigs := make(chan os.Signal)
65 | signal.Notify(sigs, os.Interrupt)
66 | go func() {
67 | <-sigs
68 | masterConn.Write(serialize(Protocol{
69 | CMD: CTL_CLEANUP,
70 | N: 0,
71 | }))
72 | logger.Success("Recv Ctrl+C, exit now")
73 | os.Exit(0)
74 | }()
75 |
76 | // no need for mutex-lock here
77 | ticker := time.NewTicker(time.Second * option.HEARTBEAT_FREQUENCY)
78 | go func() {
79 | for {
80 | <-ticker.C
81 | masterConn.Write(serialize(Protocol{
82 | CMD: CTL_HEARTBEAT,
83 | N: 0,
84 | }))
85 | }
86 | }()
87 | }
88 |
89 | // handle master conn
90 | go func() {
91 | defer masterConn.Close()
92 | for {
93 | pb, err := readUntilEnd(masterConn)
94 | if err != nil {
95 | continue
96 | }
97 |
98 | p, err := unserialize(pb)
99 | if err != nil {
100 | continue
101 | }
102 |
103 | switch p.CMD {
104 | case CTL_CONNECT_ME:
105 | connectRequest <- p.N
106 | case CTL_CLEANUP:
107 | endSignal <- struct{}{}
108 | return
109 | }
110 | }
111 | }()
112 |
113 | // handle CONNECT_ME request
114 | for {
115 | select {
116 | case <-endSignal:
117 | logger.Success("Recv exit signal from remote, exit now")
118 | return
119 | case n := <-connectRequest:
120 | for n > 0 {
121 | go func() {
122 | conn, err := net.DialTimeout(
123 | "tcp",
124 | remote,
125 | time.Duration(option.TIMEOUT)*time.Millisecond,
126 | )
127 | if err != nil {
128 | logger.Info(err.Error())
129 | return
130 | }
131 |
132 | connCtx, err := netio.NewTCPCtx(conn, encrypted)
133 | if err != nil {
134 | return
135 | }
136 |
137 | socks5.HandleConnection(connCtx)
138 | }()
139 | n--
140 | }
141 | }
142 | }
143 | }
144 |
145 | func ProxyRemoteL2L(master string, local string, menc bool, lenc bool) {
146 | masterListener, err := net.Listen("tcp", master)
147 | if err != nil {
148 | logger.Warn("Listen on %s error", master)
149 | return
150 | }
151 | defer masterListener.Close()
152 |
153 | logger.Info("Listent on %s for remote socks5 server", master)
154 |
155 | localListener, err := net.Listen("tcp", local)
156 | if err != nil {
157 | logger.Warn("Listen on %s error", local)
158 | return
159 | }
160 | defer localListener.Close()
161 |
162 | // HANDSHAKE:
163 | masterConn := serverHandshake(masterListener)
164 | defer func() {
165 | masterConn.Close()
166 | }()
167 |
168 | // handle ctrl+C
169 | {
170 | sigs := make(chan os.Signal)
171 | signal.Notify(sigs, os.Interrupt)
172 | go func() {
173 | <-sigs
174 | masterConn.Write(serialize(Protocol{
175 | CMD: CTL_CLEANUP,
176 | N: 0,
177 | }))
178 | logger.Success("Recv Ctrl+C, exit now")
179 | os.Exit(0)
180 | }()
181 | }
182 |
183 | localConnBuffer := make(chan net.Conn, MAX_CONNECTION/2)
184 | defer close(localConnBuffer)
185 |
186 | logger.Success("Forward socks5 server to %s", local)
187 |
188 | // handle masterConn read
189 | go func() {
190 | for {
191 | pb, err := readUntilEnd(masterConn)
192 | if err != nil {
193 | continue
194 | }
195 |
196 | p, err := unserialize(pb)
197 | if err != nil {
198 | continue
199 | }
200 |
201 | switch p.CMD {
202 | case CTL_CLEANUP:
203 | logger.Success("Recv exit signal from remote, exit now")
204 | os.Exit(0)
205 | case CTL_HEARTBEAT:
206 | continue
207 | }
208 | }
209 | }()
210 |
211 | // handle local connection
212 | go func() {
213 | for {
214 | localConn, err := localListener.Accept()
215 | if err != nil {
216 | continue
217 | }
218 |
219 | localConnBuffer <- localConn
220 |
221 | // to speed up
222 | // don't need to calculate precisly
223 | var n uint8
224 | l := len(localConnBuffer)
225 | switch {
226 | case l > MAX_CONNECTION/0x40:
227 | n = 2
228 | case l > MAX_CONNECTION/0x20:
229 | n = 3
230 | default:
231 | n = 1
232 | }
233 |
234 | masterConn.Write(serialize(Protocol{
235 | CMD: CTL_CONNECT_ME,
236 | N: n,
237 | }))
238 | }
239 | }()
240 |
241 | for {
242 | remoteConn, err := masterListener.Accept()
243 | if err != nil {
244 | continue
245 | }
246 |
247 | localConn := <-localConnBuffer
248 |
249 | go func() {
250 | defer remoteConn.Close()
251 | defer localConn.Close()
252 |
253 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, menc)
254 | if err != nil {
255 | return
256 | }
257 |
258 | localConnCtx, err := netio.NewTCPCtx(localConn, lenc)
259 | if err != nil {
260 | return
261 | }
262 |
263 | netio.PipeForward(remoteConnCtx, localConnCtx)
264 | }()
265 | }
266 | }
267 |
--------------------------------------------------------------------------------
/socks5/socks5.go:
--------------------------------------------------------------------------------
1 | // code from https://github.com/ring04h/s5.go
2 | package socks5
3 |
4 | import (
5 | "errors"
6 | "io"
7 | "iox/logger"
8 | "iox/netio"
9 | "iox/option"
10 | "net"
11 | "strconv"
12 | "time"
13 | )
14 |
15 | var (
16 | Commands = []string{"CONNECT", "BIND", "UDP ASSOCIATE"}
17 | AddrType = []string{"", "IPv4", "", "Domain", "IPv6"}
18 | Conns = make([]net.Conn, 0)
19 | Verbose = false
20 |
21 | errAddrType = errors.New("socks addr type not supported")
22 | errVer = errors.New("socks version not supported")
23 | errMethod = errors.New("socks only support noauth method")
24 | errAuthExtraData = errors.New("socks authentication get extra data")
25 | errReqExtraData = errors.New("socks request get extra data")
26 | errCmd = errors.New("socks only support connect command")
27 | )
28 |
29 | const (
30 | socksVer5 = 0x05
31 | socksCmdConnect = 0x01
32 | )
33 |
34 | func readAtLeast(r netio.Ctx, buf []byte, min int) (n int, err error) {
35 | if len(buf) < min {
36 | return 0, io.ErrShortBuffer
37 | }
38 |
39 | for n < min && err == nil {
40 | var nn int
41 | nn, err = r.DecryptRead(buf[n:])
42 | n += nn
43 | }
44 | if n >= min {
45 | err = nil
46 | } else if n > 0 && err == io.EOF {
47 | err = io.ErrUnexpectedEOF
48 | }
49 | return
50 | }
51 |
52 | func handShake(conn netio.Ctx) (err error) {
53 | const (
54 | idVer = 0
55 | idNmethod = 1
56 | )
57 |
58 | buf := make([]byte, 258)
59 |
60 | var n int
61 |
62 | // make sure we get the nmethod field
63 | if n, err = readAtLeast(conn, buf, idNmethod+1); err != nil {
64 | return
65 | }
66 |
67 | if buf[idVer] != socksVer5 {
68 | return errVer
69 | }
70 |
71 | nmethod := int(buf[idNmethod]) // client support auth mode
72 | msgLen := nmethod + 2 // auth msg length
73 | if n == msgLen { // handshake done, common case
74 | // do nothing, jump directly to send confirmation
75 | } else if n < msgLen { // has more methods to read, rare case
76 | if _, err = readAtLeast(conn, buf[n:msgLen], len(buf[n:msgLen])); err != nil {
77 | return
78 | }
79 | } else { // error, should not get extra data
80 | return errAuthExtraData
81 | }
82 | /*
83 | X'00' NO AUTHENTICATION REQUIRED
84 | X'01' GSSAPI
85 | X'02' USERNAME/PASSWORD
86 | X'03' to X'7F' IANA ASSIGNED
87 | X'80' to X'FE' RESERVED FOR PRIVATE METHODS
88 | X'FF' NO ACCEPTABLE METHODS
89 | */
90 | // send confirmation: version 5, no authentication required
91 | _, err = conn.EncryptWrite([]byte{socksVer5, 0})
92 |
93 | return
94 | }
95 |
96 | func parseTarget(conn netio.Ctx) (host string, err error) {
97 | const (
98 | idVer = 0
99 | idCmd = 1
100 | idType = 3 // address type index
101 | idIP0 = 4 // ip addres start index
102 | idDmLen = 4 // domain address length index
103 | idDm0 = 5 // domain address start index
104 |
105 | typeIPv4 = 1 // type is ipv4 address
106 | typeDm = 3 // type is domain address
107 | typeIPv6 = 4 // type is ipv6 address
108 |
109 | lenIPv4 = 3 + 1 + net.IPv4len + 2 // 3(ver+cmd+rsv) + 1addrType + ipv4 + 2port
110 | lenIPv6 = 3 + 1 + net.IPv6len + 2 // 3(ver+cmd+rsv) + 1addrType + ipv6 + 2port
111 | lenDmBase = 3 + 1 + 1 + 2 // 3 + 1addrType + 1addrLen + 2port, plus addrLen
112 | )
113 | // refer to getRequest in server.go for why set buffer size to 263
114 | buf := make([]byte, 263)
115 | var n int
116 |
117 | // read till we get possible domain length field
118 | if n, err = readAtLeast(conn, buf, idDmLen+1); err != nil {
119 | return
120 | }
121 |
122 | // check version and cmd
123 | if buf[idVer] != socksVer5 {
124 | err = errVer
125 | return
126 | }
127 |
128 | /*
129 | CONNECT X'01'
130 | BIND X'02'
131 | UDP ASSOCIATE X'03'
132 | */
133 |
134 | if buf[idCmd] > 0x03 || buf[idCmd] == 0x00 {
135 | logger.Info(
136 | "Unknown Command",
137 | buf[idCmd],
138 | )
139 | }
140 |
141 | if buf[idCmd] != socksCmdConnect { // only support CONNECT mode
142 | err = errCmd
143 | return
144 | }
145 |
146 | // read target address
147 | reqLen := -1
148 | switch buf[idType] {
149 | case typeIPv4:
150 | reqLen = lenIPv4
151 | case typeIPv6:
152 | reqLen = lenIPv6
153 | case typeDm: // domain name
154 | reqLen = int(buf[idDmLen]) + lenDmBase
155 | default:
156 | err = errAddrType
157 | return
158 | }
159 |
160 | if n == reqLen {
161 | // common case, do nothing
162 | } else if n < reqLen { // rare case
163 | if _, err = readAtLeast(conn, buf[n:reqLen], len(buf[n:reqLen])); err != nil {
164 | return
165 | }
166 | } else {
167 | err = errReqExtraData
168 | return
169 | }
170 |
171 | switch buf[idType] {
172 | case typeIPv4:
173 | host = net.IP(buf[idIP0 : idIP0+net.IPv4len]).String()
174 | case typeIPv6:
175 | host = net.IP(buf[idIP0 : idIP0+net.IPv6len]).String()
176 | case typeDm:
177 | host = string(buf[idDm0 : idDm0+buf[idDmLen]])
178 | }
179 | port := bigEndianUint16(buf[reqLen-2 : reqLen])
180 | host = net.JoinHostPort(host, strconv.Itoa(int(port)))
181 |
182 | return
183 | }
184 |
185 | func bigEndianUint16(b []byte) uint16 {
186 | _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
187 | return uint16(b[1]) | uint16(b[0])<<8
188 | }
189 |
190 | func pipeWhenClose(conn netio.Ctx, target string) {
191 | remoteConn, err := net.DialTimeout(
192 | "tcp",
193 | target,
194 | time.Millisecond*time.Duration(option.TIMEOUT),
195 | )
196 | if err != nil {
197 | logger.Info("Connect remote :" + err.Error())
198 | return
199 | }
200 |
201 | tcpAddr := remoteConn.LocalAddr().(*net.TCPAddr)
202 | if tcpAddr.Zone == "" {
203 | if tcpAddr.IP.Equal(tcpAddr.IP.To4()) {
204 | tcpAddr.Zone = "ip4"
205 | } else {
206 | tcpAddr.Zone = "ip6"
207 | }
208 | }
209 |
210 | rep := make([]byte, 256)
211 | rep[0] = 0x05
212 | rep[1] = 0x00 // success
213 | rep[2] = 0x00 //RSV
214 |
215 | //IP
216 | if tcpAddr.Zone == "ip6" {
217 | rep[3] = 0x04 //IPv6
218 | } else {
219 | rep[3] = 0x01 //IPv4
220 | }
221 |
222 | var ip net.IP
223 | if "ip6" == tcpAddr.Zone {
224 | ip = tcpAddr.IP.To16()
225 | } else {
226 | ip = tcpAddr.IP.To4()
227 | }
228 | pindex := 4
229 | for _, b := range ip {
230 | rep[pindex] = b
231 | pindex += 1
232 | }
233 | rep[pindex] = byte((tcpAddr.Port >> 8) & 0xff)
234 | rep[pindex+1] = byte(tcpAddr.Port & 0xff)
235 |
236 | conn.EncryptWrite(rep[0 : pindex+2])
237 | // Transfer data
238 |
239 | defer remoteConn.Close()
240 |
241 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, false)
242 | if err != nil {
243 | logger.Info(
244 | "Socks5 remote connect error: %s",
245 | err.Error(),
246 | )
247 | return
248 | }
249 |
250 | netio.PipeForward(conn, remoteConnCtx)
251 | }
252 |
253 | func HandleConnection(conn netio.Ctx) {
254 | if err := handShake(conn); err != nil {
255 | logger.Info(
256 | "Socks5 handshake error: %s",
257 | err.Error(),
258 | )
259 | return
260 | }
261 | addr, err := parseTarget(conn)
262 | if err != nil {
263 | logger.Info(
264 | "socks consult transfer mode or parse target: %s",
265 | err.Error(),
266 | )
267 | return
268 | }
269 | pipeWhenClose(conn, addr)
270 | }
271 |
--------------------------------------------------------------------------------
/crypto/chacha20/chacha20.go:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // Package chacha20 implements the ChaCha20 stream cipher.
17 | package chacha20
18 |
19 | import (
20 | "crypto/cipher"
21 | "encoding/binary"
22 | "errors"
23 | "iox/crypto/chacha20/internal/api"
24 | "iox/crypto/chacha20/internal/hardware"
25 | "iox/crypto/chacha20/internal/ref"
26 | "math"
27 | )
28 |
29 | const (
30 | // KeySize is the ChaCha20 key size in bytes.
31 | KeySize = 32
32 |
33 | // NonceSize is the ChaCha20 nonce size in bytes.
34 | NonceSize = 8
35 |
36 | // INonceSize is the IETF ChaCha20 nonce size in bytes.
37 | INonceSize = 12
38 |
39 | // XNonceSize is the XChaCha20 nonce size in bytes.
40 | XNonceSize = 24
41 |
42 | // HNonceSize is the HChaCha20 nonce size in bytes.
43 | HNonceSize = 16
44 | )
45 |
46 | var (
47 | // ErrInvalidKey is the error returned when the key is invalid.
48 | ErrInvalidKey = errors.New("chacha20: key length must be KeySize bytes")
49 |
50 | // ErrInvalidNonce is the error returned when the nonce is invalid.
51 | ErrInvalidNonce = errors.New("chacha20: nonce length must be NonceSize/INonceSize/XNonceSize bytes")
52 |
53 | // ErrInvalidCounter is the error returned when the counter is invalid.
54 | ErrInvalidCounter = errors.New("chacha20: block counter is invalid (out of range)")
55 |
56 | supportedImpls []api.Implementation
57 | activeImpl api.Implementation
58 |
59 | _ cipher.Stream = (*Cipher)(nil)
60 | )
61 |
62 | // Cipher is an instance of ChaCha20/XChaCha20 using a particular key and nonce.
63 | type Cipher struct {
64 | state [api.StateSize]uint32
65 | buf [api.BlockSize]byte
66 |
67 | off int
68 | ietf bool
69 | }
70 |
71 | // Reset zeros the key data so that it will no longer appear in the process's
72 | // memory.
73 | func (c *Cipher) Reset() {
74 | for i := range c.state {
75 | c.state[i] = 0
76 | }
77 | for i := range c.buf {
78 | c.buf[i] = 0
79 | }
80 | }
81 |
82 | // Seek sets the block counter to a given offset.
83 | func (c *Cipher) Seek(blockCounter uint64) error {
84 | if c.ietf {
85 | if blockCounter > math.MaxUint32 {
86 | return ErrInvalidCounter
87 | }
88 | c.state[12] = uint32(blockCounter)
89 | } else {
90 | c.state[12] = uint32(blockCounter)
91 | c.state[13] = uint32(blockCounter >> 32)
92 | }
93 | c.off = api.BlockSize
94 | return nil
95 | }
96 |
97 | // ReKey reinitializes the ChaCha20/XChaCha20 instance with the provided key
98 | // and nonce.
99 | func (c *Cipher) ReKey(key, nonce []byte) error {
100 | c.Reset()
101 | return c.doReKey(key, nonce)
102 | }
103 |
104 | func (c *Cipher) doReKey(key, nonce []byte) error {
105 | if len(key) != KeySize {
106 | return ErrInvalidKey
107 | }
108 |
109 | var subKey []byte
110 | switch len(nonce) {
111 | case NonceSize, INonceSize:
112 | case XNonceSize:
113 | subKey = c.buf[:KeySize]
114 | activeImpl.HChaCha(key, nonce, subKey)
115 | key = subKey
116 | nonce = nonce[16:24]
117 | default:
118 | return ErrInvalidNonce
119 | }
120 |
121 | _ = key[31] // Force bounds check elimination.
122 |
123 | c.state[0] = api.Sigma0
124 | c.state[1] = api.Sigma1
125 | c.state[2] = api.Sigma2
126 | c.state[3] = api.Sigma3
127 | c.state[4] = binary.LittleEndian.Uint32(key[0:4])
128 | c.state[5] = binary.LittleEndian.Uint32(key[4:8])
129 | c.state[6] = binary.LittleEndian.Uint32(key[8:12])
130 | c.state[7] = binary.LittleEndian.Uint32(key[12:16])
131 | c.state[8] = binary.LittleEndian.Uint32(key[16:20])
132 | c.state[9] = binary.LittleEndian.Uint32(key[20:24])
133 | c.state[10] = binary.LittleEndian.Uint32(key[24:28])
134 | c.state[11] = binary.LittleEndian.Uint32(key[28:32])
135 | c.state[12] = 0
136 | if len(nonce) == INonceSize {
137 | _ = nonce[11] // Force bounds check elimination.
138 | c.state[13] = binary.LittleEndian.Uint32(nonce[0:4])
139 | c.state[14] = binary.LittleEndian.Uint32(nonce[4:8])
140 | c.state[15] = binary.LittleEndian.Uint32(nonce[8:12])
141 | c.ietf = true
142 | } else {
143 | _ = nonce[7] // Force bounds check elimination.
144 | c.state[13] = 0
145 | c.state[14] = binary.LittleEndian.Uint32(nonce[0:4])
146 | c.state[15] = binary.LittleEndian.Uint32(nonce[4:8])
147 | c.ietf = false
148 | }
149 | c.off = api.BlockSize
150 |
151 | if subKey != nil {
152 | for i := range subKey {
153 | subKey[i] = 0
154 | }
155 | }
156 |
157 | return nil
158 | }
159 |
160 | // New returns a new ChaCha20/XChaCha20 instance.
161 | func New(key, nonce []byte) (*Cipher, error) {
162 | var c Cipher
163 | if err := c.doReKey(key, nonce); err != nil {
164 | return nil, err
165 | }
166 |
167 | return &c, nil
168 | }
169 |
170 | // HChaCha is the HChaCha20 hash function used to make XChaCha.
171 | func HChaCha(key, nonce []byte, dst *[32]byte) {
172 | activeImpl.HChaCha(key, nonce, dst[:])
173 | }
174 |
175 | // XORKeyStream sets dst to the result of XORing src with the key stream. Dst
176 | // and src may be the same slice but otherwise should not overlap.
177 | func (c *Cipher) XORKeyStream(dst, src []byte) {
178 | if len(dst) < len(src) {
179 | src = src[:len(dst)]
180 | }
181 |
182 | for remaining := len(src); remaining > 0; {
183 | // Process multiple blocks at once.
184 | if c.off == api.BlockSize {
185 | nrBlocks := remaining / api.BlockSize
186 | directBytes := nrBlocks * api.BlockSize
187 | if nrBlocks > 0 {
188 | c.doBlocks(dst, src, nrBlocks)
189 | remaining -= directBytes
190 | if remaining == 0 {
191 | return
192 | }
193 | dst = dst[directBytes:]
194 | src = src[directBytes:]
195 | }
196 |
197 | // If there's a partial block, generate 1 block of keystream into
198 | // the internal buffer.
199 | c.doBlocks(c.buf[:], nil, 1)
200 | c.off = 0
201 | }
202 |
203 | // Process partial blocks from the buffered keystream.
204 | toXor := api.BlockSize - c.off
205 | if remaining < toXor {
206 | toXor = remaining
207 | }
208 | if toXor > 0 {
209 | // The inliner doesn't want to inline this function, but my
210 | // attempts to force BCE don't seem to work with manual
211 | // inlining.
212 | //
213 | // Taking the extra function call overhead here appears to be
214 | // worth it.
215 | c.xorBufBytes(dst, src, toXor)
216 |
217 | dst = dst[toXor:]
218 | src = src[toXor:]
219 |
220 | remaining -= toXor
221 | }
222 | }
223 | }
224 |
225 | func (c *Cipher) xorBufBytes(dst, src []byte, n int) {
226 | // Force bounds check elimination.
227 | buf := c.buf[c.off:]
228 | _ = buf[n-1]
229 | _ = dst[n-1]
230 | _ = src[n-1]
231 |
232 | for i := 0; i < n; i++ {
233 | dst[i] = buf[i] ^ src[i]
234 | }
235 | c.off += n
236 | }
237 |
238 | // KeyStream sets dst to the raw keystream.
239 | func (c *Cipher) KeyStream(dst []byte) {
240 | for remaining := len(dst); remaining > 0; {
241 | // Process multiple blocks at once.
242 | if c.off == api.BlockSize {
243 | nrBlocks := remaining / api.BlockSize
244 | directBytes := nrBlocks * api.BlockSize
245 | if nrBlocks > 0 {
246 | c.doBlocks(dst, nil, nrBlocks)
247 | remaining -= directBytes
248 | if remaining == 0 {
249 | return
250 | }
251 | dst = dst[directBytes:]
252 | }
253 |
254 | // If there's a partial block, generate 1 block of keystream into
255 | // the internal buffer.
256 | c.doBlocks(c.buf[:], nil, 1)
257 | c.off = 0
258 | }
259 |
260 | // Process partial blocks from the buffered keystream.
261 | toCopy := api.BlockSize - c.off
262 | if remaining < toCopy {
263 | toCopy = remaining
264 | }
265 | if toCopy > 0 {
266 | copy(dst[:toCopy], c.buf[c.off:c.off+toCopy])
267 | dst = dst[toCopy:]
268 | remaining -= toCopy
269 | c.off += toCopy
270 | }
271 | }
272 | }
273 |
274 | func (c *Cipher) doBlocks(dst, src []byte, nrBlocks int) {
275 | if c.ietf {
276 | ctr := uint64(c.state[12])
277 | if ctr+uint64(nrBlocks) > math.MaxUint32 {
278 | panic("chacha20: will exceed key stream per nonce limit")
279 | }
280 | }
281 |
282 | activeImpl.Blocks(&c.state, dst, src, nrBlocks)
283 | }
284 |
285 | func init() {
286 | supportedImpls = hardware.Register(supportedImpls)
287 | supportedImpls = ref.Register(supportedImpls)
288 | activeImpl = supportedImpls[0]
289 | }
290 |
--------------------------------------------------------------------------------
/crypto/chacha20/internal/ref/impl.go:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // Package ref provides the portable ChaCha20 implementation.
17 | package ref
18 |
19 | import (
20 | "encoding/binary"
21 | "iox/crypto/chacha20/internal/api"
22 | "math/bits"
23 | )
24 |
25 | const rounds = 20
26 |
27 | // Impl is the reference implementation (exposed for testing).
28 | var Impl = &implRef{}
29 |
30 | type implRef struct{}
31 |
32 | func (impl *implRef) Name() string {
33 | return "ref"
34 | }
35 |
36 | func (impl *implRef) Blocks(x *[api.StateSize]uint32, dst, src []byte, nrBlocks int) {
37 | for n := 0; n < nrBlocks; n++ {
38 | x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
39 | x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
40 |
41 | for i := rounds; i > 0; i -= 2 {
42 | // quarterround(x, 0, 4, 8, 12)
43 | x0 += x4
44 | x12 ^= x0
45 | x12 = bits.RotateLeft32(x12, 16)
46 | x8 += x12
47 | x4 ^= x8
48 | x4 = bits.RotateLeft32(x4, 12)
49 | x0 += x4
50 | x12 ^= x0
51 | x12 = bits.RotateLeft32(x12, 8)
52 | x8 += x12
53 | x4 ^= x8
54 | x4 = bits.RotateLeft32(x4, 7)
55 |
56 | // quarterround(x, 1, 5, 9, 13)
57 | x1 += x5
58 | x13 ^= x1
59 | x13 = bits.RotateLeft32(x13, 16)
60 | x9 += x13
61 | x5 ^= x9
62 | x5 = bits.RotateLeft32(x5, 12)
63 | x1 += x5
64 | x13 ^= x1
65 | x13 = bits.RotateLeft32(x13, 8)
66 | x9 += x13
67 | x5 ^= x9
68 | x5 = bits.RotateLeft32(x5, 7)
69 |
70 | // quarterround(x, 2, 6, 10, 14)
71 | x2 += x6
72 | x14 ^= x2
73 | x14 = bits.RotateLeft32(x14, 16)
74 | x10 += x14
75 | x6 ^= x10
76 | x6 = bits.RotateLeft32(x6, 12)
77 | x2 += x6
78 | x14 ^= x2
79 | x14 = bits.RotateLeft32(x14, 8)
80 | x10 += x14
81 | x6 ^= x10
82 | x6 = bits.RotateLeft32(x6, 7)
83 |
84 | // quarterround(x, 3, 7, 11, 15)
85 | x3 += x7
86 | x15 ^= x3
87 | x15 = bits.RotateLeft32(x15, 16)
88 | x11 += x15
89 | x7 ^= x11
90 | x7 = bits.RotateLeft32(x7, 12)
91 | x3 += x7
92 | x15 ^= x3
93 | x15 = bits.RotateLeft32(x15, 8)
94 | x11 += x15
95 | x7 ^= x11
96 | x7 = bits.RotateLeft32(x7, 7)
97 |
98 | // quarterround(x, 0, 5, 10, 15)
99 | x0 += x5
100 | x15 ^= x0
101 | x15 = bits.RotateLeft32(x15, 16)
102 | x10 += x15
103 | x5 ^= x10
104 | x5 = bits.RotateLeft32(x5, 12)
105 | x0 += x5
106 | x15 ^= x0
107 | x15 = bits.RotateLeft32(x15, 8)
108 | x10 += x15
109 | x5 ^= x10
110 | x5 = bits.RotateLeft32(x5, 7)
111 |
112 | // quarterround(x, 1, 6, 11, 12)
113 | x1 += x6
114 | x12 ^= x1
115 | x12 = bits.RotateLeft32(x12, 16)
116 | x11 += x12
117 | x6 ^= x11
118 | x6 = bits.RotateLeft32(x6, 12)
119 | x1 += x6
120 | x12 ^= x1
121 | x12 = bits.RotateLeft32(x12, 8)
122 | x11 += x12
123 | x6 ^= x11
124 | x6 = bits.RotateLeft32(x6, 7)
125 |
126 | // quarterround(x, 2, 7, 8, 13)
127 | x2 += x7
128 | x13 ^= x2
129 | x13 = bits.RotateLeft32(x13, 16)
130 | x8 += x13
131 | x7 ^= x8
132 | x7 = bits.RotateLeft32(x7, 12)
133 | x2 += x7
134 | x13 ^= x2
135 | x13 = bits.RotateLeft32(x13, 8)
136 | x8 += x13
137 | x7 ^= x8
138 | x7 = bits.RotateLeft32(x7, 7)
139 |
140 | // quarterround(x, 3, 4, 9, 14)
141 | x3 += x4
142 | x14 ^= x3
143 | x14 = bits.RotateLeft32(x14, 16)
144 | x9 += x14
145 | x4 ^= x9
146 | x4 = bits.RotateLeft32(x4, 12)
147 | x3 += x4
148 | x14 ^= x3
149 | x14 = bits.RotateLeft32(x14, 8)
150 | x9 += x14
151 | x4 ^= x9
152 | x4 = bits.RotateLeft32(x4, 7)
153 | }
154 |
155 | x0 += api.Sigma0
156 | x1 += api.Sigma1
157 | x2 += api.Sigma2
158 | x3 += api.Sigma3
159 | x4 += x[4]
160 | x5 += x[5]
161 | x6 += x[6]
162 | x7 += x[7]
163 | x8 += x[8]
164 | x9 += x[9]
165 | x10 += x[10]
166 | x11 += x[11]
167 | x12 += x[12]
168 | x13 += x[13]
169 | x14 += x[14]
170 | x15 += x[15]
171 |
172 | _ = dst[api.BlockSize-1] // Force bounds check elimination.
173 |
174 | if src != nil {
175 | _ = src[api.BlockSize-1] // Force bounds check elimination.
176 | binary.LittleEndian.PutUint32(dst[0:4], binary.LittleEndian.Uint32(src[0:4])^x0)
177 | binary.LittleEndian.PutUint32(dst[4:8], binary.LittleEndian.Uint32(src[4:8])^x1)
178 | binary.LittleEndian.PutUint32(dst[8:12], binary.LittleEndian.Uint32(src[8:12])^x2)
179 | binary.LittleEndian.PutUint32(dst[12:16], binary.LittleEndian.Uint32(src[12:16])^x3)
180 | binary.LittleEndian.PutUint32(dst[16:20], binary.LittleEndian.Uint32(src[16:20])^x4)
181 | binary.LittleEndian.PutUint32(dst[20:24], binary.LittleEndian.Uint32(src[20:24])^x5)
182 | binary.LittleEndian.PutUint32(dst[24:28], binary.LittleEndian.Uint32(src[24:28])^x6)
183 | binary.LittleEndian.PutUint32(dst[28:32], binary.LittleEndian.Uint32(src[28:32])^x7)
184 | binary.LittleEndian.PutUint32(dst[32:36], binary.LittleEndian.Uint32(src[32:36])^x8)
185 | binary.LittleEndian.PutUint32(dst[36:40], binary.LittleEndian.Uint32(src[36:40])^x9)
186 | binary.LittleEndian.PutUint32(dst[40:44], binary.LittleEndian.Uint32(src[40:44])^x10)
187 | binary.LittleEndian.PutUint32(dst[44:48], binary.LittleEndian.Uint32(src[44:48])^x11)
188 | binary.LittleEndian.PutUint32(dst[48:52], binary.LittleEndian.Uint32(src[48:52])^x12)
189 | binary.LittleEndian.PutUint32(dst[52:56], binary.LittleEndian.Uint32(src[52:56])^x13)
190 | binary.LittleEndian.PutUint32(dst[56:60], binary.LittleEndian.Uint32(src[56:60])^x14)
191 | binary.LittleEndian.PutUint32(dst[60:64], binary.LittleEndian.Uint32(src[60:64])^x15)
192 | src = src[api.BlockSize:]
193 | } else {
194 | binary.LittleEndian.PutUint32(dst[0:4], x0)
195 | binary.LittleEndian.PutUint32(dst[4:8], x1)
196 | binary.LittleEndian.PutUint32(dst[8:12], x2)
197 | binary.LittleEndian.PutUint32(dst[12:16], x3)
198 | binary.LittleEndian.PutUint32(dst[16:20], x4)
199 | binary.LittleEndian.PutUint32(dst[20:24], x5)
200 | binary.LittleEndian.PutUint32(dst[24:28], x6)
201 | binary.LittleEndian.PutUint32(dst[28:32], x7)
202 | binary.LittleEndian.PutUint32(dst[32:36], x8)
203 | binary.LittleEndian.PutUint32(dst[36:40], x9)
204 | binary.LittleEndian.PutUint32(dst[40:44], x10)
205 | binary.LittleEndian.PutUint32(dst[44:48], x11)
206 | binary.LittleEndian.PutUint32(dst[48:52], x12)
207 | binary.LittleEndian.PutUint32(dst[52:56], x13)
208 | binary.LittleEndian.PutUint32(dst[56:60], x14)
209 | binary.LittleEndian.PutUint32(dst[60:64], x15)
210 | }
211 | dst = dst[api.BlockSize:]
212 |
213 | // Stoping at 2^70 bytes per nonce is the user's responsibility.
214 | ctr := uint64(x[13])<<32 | uint64(x[12])
215 | ctr++
216 | x[12] = uint32(ctr)
217 | x[13] = uint32(ctr >> 32)
218 | }
219 | }
220 |
221 | func (impl *implRef) HChaCha(key, nonce []byte, dst []byte) {
222 | // Force bounds check elimination.
223 | _ = key[31]
224 | _ = nonce[api.HNonceSize-1]
225 |
226 | x0, x1, x2, x3 := api.Sigma0, api.Sigma1, api.Sigma2, api.Sigma3
227 | x4 := binary.LittleEndian.Uint32(key[0:4])
228 | x5 := binary.LittleEndian.Uint32(key[4:8])
229 | x6 := binary.LittleEndian.Uint32(key[8:12])
230 | x7 := binary.LittleEndian.Uint32(key[12:16])
231 | x8 := binary.LittleEndian.Uint32(key[16:20])
232 | x9 := binary.LittleEndian.Uint32(key[20:24])
233 | x10 := binary.LittleEndian.Uint32(key[24:28])
234 | x11 := binary.LittleEndian.Uint32(key[28:32])
235 | x12 := binary.LittleEndian.Uint32(nonce[0:4])
236 | x13 := binary.LittleEndian.Uint32(nonce[4:8])
237 | x14 := binary.LittleEndian.Uint32(nonce[8:12])
238 | x15 := binary.LittleEndian.Uint32(nonce[12:16])
239 |
240 | // Yes, this could be carved out into a function for code reuse (TM)
241 | // however the go inliner won't inline it.
242 | for i := rounds; i > 0; i -= 2 {
243 | // quarterround(x, 0, 4, 8, 12)
244 | x0 += x4
245 | x12 ^= x0
246 | x12 = bits.RotateLeft32(x12, 16)
247 | x8 += x12
248 | x4 ^= x8
249 | x4 = bits.RotateLeft32(x4, 12)
250 | x0 += x4
251 | x12 ^= x0
252 | x12 = bits.RotateLeft32(x12, 8)
253 | x8 += x12
254 | x4 ^= x8
255 | x4 = bits.RotateLeft32(x4, 7)
256 |
257 | // quarterround(x, 1, 5, 9, 13)
258 | x1 += x5
259 | x13 ^= x1
260 | x13 = bits.RotateLeft32(x13, 16)
261 | x9 += x13
262 | x5 ^= x9
263 | x5 = bits.RotateLeft32(x5, 12)
264 | x1 += x5
265 | x13 ^= x1
266 | x13 = bits.RotateLeft32(x13, 8)
267 | x9 += x13
268 | x5 ^= x9
269 | x5 = bits.RotateLeft32(x5, 7)
270 |
271 | // quarterround(x, 2, 6, 10, 14)
272 | x2 += x6
273 | x14 ^= x2
274 | x14 = bits.RotateLeft32(x14, 16)
275 | x10 += x14
276 | x6 ^= x10
277 | x6 = bits.RotateLeft32(x6, 12)
278 | x2 += x6
279 | x14 ^= x2
280 | x14 = bits.RotateLeft32(x14, 8)
281 | x10 += x14
282 | x6 ^= x10
283 | x6 = bits.RotateLeft32(x6, 7)
284 |
285 | // quarterround(x, 3, 7, 11, 15)
286 | x3 += x7
287 | x15 ^= x3
288 | x15 = bits.RotateLeft32(x15, 16)
289 | x11 += x15
290 | x7 ^= x11
291 | x7 = bits.RotateLeft32(x7, 12)
292 | x3 += x7
293 | x15 ^= x3
294 | x15 = bits.RotateLeft32(x15, 8)
295 | x11 += x15
296 | x7 ^= x11
297 | x7 = bits.RotateLeft32(x7, 7)
298 |
299 | // quarterround(x, 0, 5, 10, 15)
300 | x0 += x5
301 | x15 ^= x0
302 | x15 = bits.RotateLeft32(x15, 16)
303 | x10 += x15
304 | x5 ^= x10
305 | x5 = bits.RotateLeft32(x5, 12)
306 | x0 += x5
307 | x15 ^= x0
308 | x15 = bits.RotateLeft32(x15, 8)
309 | x10 += x15
310 | x5 ^= x10
311 | x5 = bits.RotateLeft32(x5, 7)
312 |
313 | // quarterround(x, 1, 6, 11, 12)
314 | x1 += x6
315 | x12 ^= x1
316 | x12 = bits.RotateLeft32(x12, 16)
317 | x11 += x12
318 | x6 ^= x11
319 | x6 = bits.RotateLeft32(x6, 12)
320 | x1 += x6
321 | x12 ^= x1
322 | x12 = bits.RotateLeft32(x12, 8)
323 | x11 += x12
324 | x6 ^= x11
325 | x6 = bits.RotateLeft32(x6, 7)
326 |
327 | // quarterround(x, 2, 7, 8, 13)
328 | x2 += x7
329 | x13 ^= x2
330 | x13 = bits.RotateLeft32(x13, 16)
331 | x8 += x13
332 | x7 ^= x8
333 | x7 = bits.RotateLeft32(x7, 12)
334 | x2 += x7
335 | x13 ^= x2
336 | x13 = bits.RotateLeft32(x13, 8)
337 | x8 += x13
338 | x7 ^= x8
339 | x7 = bits.RotateLeft32(x7, 7)
340 |
341 | // quarterround(x, 3, 4, 9, 14)
342 | x3 += x4
343 | x14 ^= x3
344 | x14 = bits.RotateLeft32(x14, 16)
345 | x9 += x14
346 | x4 ^= x9
347 | x4 = bits.RotateLeft32(x4, 12)
348 | x3 += x4
349 | x14 ^= x3
350 | x14 = bits.RotateLeft32(x14, 8)
351 | x9 += x14
352 | x4 ^= x9
353 | x4 = bits.RotateLeft32(x4, 7)
354 | }
355 |
356 | // HChaCha returns x0...x3 | x12...x15, which corresponds to the
357 | // indexes of the ChaCha constant and the indexes of the IV.
358 | _ = dst[api.HashSize-1] // Force bounds check elimination.
359 | binary.LittleEndian.PutUint32(dst[0:4], x0)
360 | binary.LittleEndian.PutUint32(dst[4:8], x1)
361 | binary.LittleEndian.PutUint32(dst[8:12], x2)
362 | binary.LittleEndian.PutUint32(dst[12:16], x3)
363 | binary.LittleEndian.PutUint32(dst[16:20], x12)
364 | binary.LittleEndian.PutUint32(dst[20:24], x13)
365 | binary.LittleEndian.PutUint32(dst[24:28], x14)
366 | binary.LittleEndian.PutUint32(dst[28:32], x15)
367 | }
368 |
369 | // Register appends the implementation to the provided slice, and returns the
370 | // new slice.
371 | func Register(impls []api.Implementation) []api.Implementation {
372 | return append(impls, Impl)
373 | }
374 |
--------------------------------------------------------------------------------
/operate/fwd.go:
--------------------------------------------------------------------------------
1 | package operate
2 |
3 | import (
4 | "iox/crypto"
5 | "iox/logger"
6 | "iox/netio"
7 | "iox/option"
8 | "net"
9 | "time"
10 | )
11 |
12 | // local is :port
13 | // remote is ip:port
14 | // Local2Remote(":9999", "1.1.1.1:9999")
15 | func Local2Remote(local string, remote string, lenc bool, renc bool) {
16 | if option.PROTOCOL == "TCP" {
17 | listener, err := net.Listen("tcp", local)
18 | if err != nil {
19 | logger.Warn(
20 | "Listen on %s error: %s",
21 | local, err.Error(),
22 | )
23 | return
24 | }
25 | defer listener.Close()
26 |
27 | logger.Success("Forward between %s and %s", local, remote)
28 |
29 | for {
30 | logger.Info("Wait for connection on %s", local)
31 |
32 | localConn, err := listener.Accept()
33 | if err != nil {
34 | logger.Warn(
35 | "Handle local connect error: %s",
36 | err.Error(),
37 | )
38 | continue
39 | }
40 |
41 | logger.Info(
42 | "Connection from %s",
43 | localConn.RemoteAddr().String(),
44 | )
45 | logger.Info("Connecting " + remote)
46 |
47 | go func() {
48 | defer localConn.Close()
49 |
50 | localConnCtx, err := netio.NewTCPCtx(localConn, lenc)
51 | if err != nil {
52 | logger.Warn(
53 | "Handle local connect error: %s",
54 | err.Error(),
55 | )
56 | return
57 | }
58 |
59 | remoteConn, err := net.DialTimeout(
60 | "tcp",
61 | remote,
62 | time.Millisecond*time.Duration(option.TIMEOUT),
63 | )
64 | if err != nil {
65 | logger.Warn("Connect remote %s error: %s",
66 | remote, err.Error(),
67 | )
68 | return
69 | }
70 | defer remoteConn.Close()
71 |
72 | remoteConnCtx, err := netio.NewTCPCtx(remoteConn, renc)
73 | if err != nil {
74 | logger.Warn("Connect remote %s error: %s",
75 | remote, err.Error(),
76 | )
77 | return
78 | }
79 |
80 | logger.Info(
81 | "Open pipe: %s <== FWD ==> %s",
82 | localConn.RemoteAddr().String(),
83 | remoteConn.RemoteAddr().String(),
84 | )
85 |
86 | netio.PipeForward(localConnCtx, remoteConnCtx)
87 |
88 | logger.Info(
89 | "Close pipe: %s <== FWD ==> %s",
90 | localConn.RemoteAddr().String(),
91 | remoteConn.RemoteAddr().String(),
92 | )
93 | }()
94 | }
95 |
96 | } else {
97 | localAddr, err := net.ResolveUDPAddr("udp", local)
98 | if err != nil {
99 | logger.Warn(
100 | "Parse udp address %s error: %s",
101 | local, err.Error(),
102 | )
103 | return
104 | }
105 | listener, err := net.ListenUDP("udp", localAddr)
106 | if err != nil {
107 | logger.Warn(
108 | "Listen udp on %s error: %s",
109 | local, err.Error(),
110 | )
111 | return
112 | }
113 |
114 | remoteAddr, err := net.ResolveUDPAddr("udp", remote)
115 | if err != nil {
116 | logger.Warn(
117 | "Parse udp address %s error: %s",
118 | local, err.Error(),
119 | )
120 | return
121 | }
122 | remoteConn, err := net.DialUDP("udp", nil, remoteAddr)
123 | if err != nil {
124 | logger.Warn(
125 | "Dial remote udp %s error: %s",
126 | local, err.Error(),
127 | )
128 | return
129 | }
130 |
131 | listenerCtx, err := netio.NewUDPCtx(listener, lenc, false)
132 | if err != nil {
133 | return
134 | }
135 | remoteCtx, err := netio.NewUDPCtx(remoteConn, renc, true)
136 | if err != nil {
137 | return
138 | }
139 |
140 | logger.Success("Forward udp between %s and %s", local, remote)
141 | netio.ForwardUDP(listenerCtx, remoteCtx)
142 | }
143 | }
144 |
145 | func Local2Local(localA string, localB string, laenc bool, lbenc bool) {
146 | if option.PROTOCOL == "TCP" {
147 | logger.Success("Forward between %s and %s", localA, localB)
148 |
149 | var listenerA net.Listener
150 | var listenerB net.Listener
151 |
152 | for {
153 | signal := make(chan byte)
154 | var localConnA, localConnB net.Conn
155 |
156 | go func() {
157 | // Call listener.Close when goroutine returns.
158 | // Listener in Go will release the port immediately
159 | // after calling listener.Close without waiting for TIME_WAIT
160 | var err error
161 | listenerA, err = net.Listen("tcp", localA)
162 | if err != nil {
163 | logger.Warn(
164 | "Listen on %s error: %s",
165 | localA, err.Error(),
166 | )
167 | return
168 | }
169 | defer listenerA.Close()
170 |
171 | for {
172 | logger.Info(
173 | "Wait for connection on %s",
174 | localA,
175 | )
176 |
177 | var err error
178 | localConnA, err = listenerA.Accept()
179 | if err != nil {
180 | logger.Warn(
181 | "Handle connection error: %s",
182 | err.Error(),
183 | )
184 | continue
185 | }
186 | break
187 | }
188 | signal <- 'A'
189 | }()
190 |
191 | go func() {
192 | var err error
193 | listenerB, err = net.Listen("tcp", localB)
194 | if err != nil {
195 | logger.Warn(
196 | "Listen on %s error: %s",
197 | localB, err.Error(),
198 | )
199 | return
200 | }
201 | defer listenerB.Close()
202 |
203 | for {
204 | logger.Info(
205 | "Wait for connection on %s",
206 | localB,
207 | )
208 |
209 | var err error
210 | localConnB, err = listenerB.Accept()
211 | if err != nil {
212 | logger.Warn(
213 | "Handle connection error: %s",
214 | err.Error(),
215 | )
216 | continue
217 | }
218 | break
219 | }
220 | signal <- 'B'
221 | }()
222 |
223 | switch <-signal {
224 | case 'A':
225 | logger.Info(
226 | "%s connected, waiting for %s",
227 | localA, localB,
228 | )
229 | case 'B':
230 | logger.Info(
231 | "%s connected, waiting for %s",
232 | localB, localA,
233 | )
234 | }
235 |
236 | <-signal
237 |
238 | go func() {
239 | defer localConnA.Close()
240 | defer localConnB.Close()
241 |
242 | localConnCtxA, err := netio.NewTCPCtx(localConnA, laenc)
243 | if err != nil {
244 | logger.Warn(
245 | "handle local %s error: %s",
246 | localA, err.Error(),
247 | )
248 | }
249 |
250 | localConnCtxB, err := netio.NewTCPCtx(localConnB, lbenc)
251 | if err != nil {
252 | logger.Warn(
253 | "handle local %s error: %s",
254 | localB, err.Error(),
255 | )
256 | }
257 |
258 | logger.Info(
259 | "Open pipe: %s <== FWD ==> %s",
260 | localConnA.RemoteAddr().String(),
261 | localConnB.RemoteAddr().String(),
262 | )
263 | netio.PipeForward(localConnCtxA, localConnCtxB)
264 | logger.Info(
265 | "Close pipe: %s <== FWD ==> %s",
266 | localConnA.RemoteAddr().String(),
267 | localConnB.RemoteAddr().String(),
268 | )
269 | }()
270 | }
271 | } else {
272 | localAddrA, err := net.ResolveUDPAddr("udp", localA)
273 | if err != nil {
274 | logger.Warn(
275 | "Parse udp address %s error: %s",
276 | localA, err.Error(),
277 | )
278 | return
279 | }
280 | listenerA, err := net.ListenUDP("udp", localAddrA)
281 | if err != nil {
282 | logger.Warn(
283 | "Listen udp on %s error: %s",
284 | localA, err.Error(),
285 | )
286 | return
287 | }
288 | localAddrB, err := net.ResolveUDPAddr("udp", localB)
289 | if err != nil {
290 | logger.Warn(
291 | "Parse udp address %s error: %s",
292 | localB, err.Error(),
293 | )
294 | return
295 | }
296 | listenerB, err := net.ListenUDP("udp", localAddrB)
297 | if err != nil {
298 | logger.Warn(
299 | "Listen udp on %s error: %s",
300 | localB, err.Error(),
301 | )
302 | return
303 | }
304 |
305 | listenerCtxA, err := netio.NewUDPCtx(listenerA, laenc, false)
306 | if err != nil {
307 | return
308 | }
309 | listenerCtxB, err := netio.NewUDPCtx(listenerB, lbenc, false)
310 | if err != nil {
311 | return
312 | }
313 |
314 | logger.Success("Forward udp between %s and %s", localA, localB)
315 | netio.ForwardUnconnectedUDP(listenerCtxA, listenerCtxB)
316 | }
317 | }
318 |
319 | // When you make a multistage UDP connection, this function must be called last
320 | func Remote2Remote(remoteA string, remoteB string, raenc bool, rbenc bool) {
321 | if option.PROTOCOL == "TCP" {
322 | logger.Success("Forward between %s and %s", remoteA, remoteB)
323 |
324 | for {
325 | var remoteConnA net.Conn
326 | var remoteConnB net.Conn
327 |
328 | signal := make(chan struct{})
329 |
330 | go func() {
331 | for {
332 | var err error
333 | logger.Info(
334 | "Connecting remote %s",
335 | remoteA,
336 | )
337 |
338 | remoteConnA, err = net.DialTimeout(
339 | "tcp",
340 | remoteA,
341 | time.Millisecond*time.Duration(option.TIMEOUT),
342 | )
343 | if err != nil {
344 | logger.Info(
345 | "Connect remote %s error, retrying",
346 | remoteA,
347 | )
348 | time.Sleep(1500 * time.Millisecond)
349 | continue
350 | }
351 |
352 | break
353 | }
354 |
355 | signal <- struct{}{}
356 | }()
357 |
358 | go func() {
359 | for {
360 | var err error
361 | logger.Info(
362 | "Connecting remote %s",
363 | remoteB,
364 | )
365 |
366 | remoteConnB, err = net.DialTimeout(
367 | "tcp",
368 | remoteB,
369 | time.Millisecond*time.Duration(option.TIMEOUT),
370 | )
371 | if err != nil {
372 | logger.Info(
373 | "Connect remote %s error, retrying",
374 | remoteB,
375 | )
376 | time.Sleep(1500 * time.Millisecond)
377 | continue
378 | }
379 | break
380 | }
381 |
382 | signal <- struct{}{}
383 | }()
384 |
385 | <-signal
386 | <-signal
387 |
388 | go func() {
389 | defer func() {
390 | if remoteConnA != nil {
391 | remoteConnA.Close()
392 | }
393 |
394 | if remoteConnB != nil {
395 | remoteConnB.Close()
396 | }
397 | }()
398 |
399 | if remoteConnA != nil && remoteConnB != nil {
400 | remoteConnCtxA, err := netio.NewTCPCtx(remoteConnA, raenc)
401 | if err != nil {
402 | logger.Warn(
403 | "Handle remote %s error: %s",
404 | remoteA, err.Error(),
405 | )
406 | }
407 | remoteConnCtxB, err := netio.NewTCPCtx(remoteConnB, rbenc)
408 | if err != nil {
409 | logger.Warn(
410 | "Handle remote %s error: %s",
411 | remoteB, err.Error(),
412 | )
413 | }
414 |
415 | logger.Info(
416 | "Start pipe: %s <== FWD ==> %s",
417 | remoteConnA.RemoteAddr().String(),
418 | remoteConnB.RemoteAddr().String(),
419 | )
420 | netio.PipeForward(remoteConnCtxA, remoteConnCtxB)
421 | logger.Info(
422 | "Close pipe: %s <== FWD ==> %s",
423 | remoteConnA.RemoteAddr().String(),
424 | remoteConnB.RemoteAddr().String(),
425 | )
426 | }
427 | }()
428 | }
429 | } else {
430 | remoteAddrA, err := net.ResolveUDPAddr("udp", remoteA)
431 | if err != nil {
432 | logger.Warn(
433 | "Parse udp address %s error: %s",
434 | remoteA, err.Error(),
435 | )
436 | return
437 | }
438 | remoteConnA, err := net.DialUDP("udp", nil, remoteAddrA)
439 | if err != nil {
440 | logger.Warn(
441 | "Dial remote udp %s error: %s",
442 | remoteA, err.Error(),
443 | )
444 | return
445 | }
446 | remoteAddrB, err := net.ResolveUDPAddr("udp", remoteB)
447 | if err != nil {
448 | logger.Warn(
449 | "Parse udp address %s error: %s",
450 | remoteB, err.Error(),
451 | )
452 | return
453 | }
454 | remoteConnB, err := net.DialUDP("udp", nil, remoteAddrB)
455 | if err != nil {
456 | logger.Warn(
457 | "Dial remote udp %s error: %s",
458 | remoteB, err.Error(),
459 | )
460 | return
461 | }
462 |
463 | remoteCtxA, err := netio.NewUDPCtx(remoteConnA, raenc, true)
464 | if err != nil {
465 | return
466 | }
467 | remoteCtxB, err := netio.NewUDPCtx(remoteConnB, rbenc, true)
468 | if err != nil {
469 | return
470 | }
471 |
472 | // I need to send init packet to register the remote address
473 | // Even tough target is not `iox`, it doesn't matter
474 | //
475 | // There is a design fault here, and I need to consider
476 | // the case where the FORWARD_WITHOUT_DEC flag
477 | // is set but actually needs to be encrypted,
478 | // otherwise there is no IV in the ciphertext,
479 | // the opposite cannot process it
480 | if raenc {
481 | iv, err := crypto.RandomNonce()
482 | cipher, err := crypto.NewCipher(iv)
483 | if err != nil {
484 | return
485 | }
486 |
487 | b := make([]byte, 4, 20)
488 | copy(b, netio.UDP_INIT_PACKET)
489 |
490 | cipher.StreamXOR(b, b)
491 | b = append(b, iv...)
492 | remoteCtxA.Write(b)
493 |
494 | } else {
495 | remoteCtxA.Write(netio.UDP_INIT_PACKET)
496 | }
497 | if rbenc {
498 | iv, err := crypto.RandomNonce()
499 | cipher, err := crypto.NewCipher(iv)
500 | if err != nil {
501 | return
502 | }
503 |
504 | b := make([]byte, 4, 20)
505 | copy(b, netio.UDP_INIT_PACKET)
506 |
507 | cipher.StreamXOR(b, b)
508 | b = append(b, iv...)
509 | remoteCtxB.Write(b)
510 |
511 | } else {
512 | remoteCtxB.Write(netio.UDP_INIT_PACKET)
513 | }
514 |
515 | logger.Success("Forward udp between %s and %s", remoteA, remoteB)
516 | netio.ForwardUDP(remoteCtxA, remoteCtxB)
517 | }
518 | }
519 |
--------------------------------------------------------------------------------
/crypto/chacha20/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published by
637 | the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/crypto/chacha20/internal/hardware/impl_amd64.s:
--------------------------------------------------------------------------------
1 | // Copryright (C) 2019 Yawning Angel
2 | //
3 | // This program is free software: you can redistribute it and/or modify
4 | // it under the terms of the GNU Affero General Public License as
5 | // published by the Free Software Foundation, either version 3 of the
6 | // License, or (at your option) any later version.
7 | //
8 | // This program is distributed in the hope that it will be useful,
9 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | // GNU Affero General Public License for more details.
12 | //
13 | // You should have received a copy of the GNU Affero General Public License
14 | // along with this program. If not, see .
15 |
16 | // +build !noasm
17 |
18 | #include "textflag.h"
19 |
20 | DATA ·chacha_constants<>+0x00(SB)/4, $0x61707865
21 | DATA ·chacha_constants<>+0x04(SB)/4, $0x3320646E
22 | DATA ·chacha_constants<>+0x08(SB)/4, $0x79622D32
23 | DATA ·chacha_constants<>+0x0c(SB)/4, $0x6B206574
24 | DATA ·chacha_constants<>+0x10(SB)/8, $0x0504070601000302
25 | DATA ·chacha_constants<>+0x18(SB)/8, $0x0D0C0F0E09080B0A
26 | DATA ·chacha_constants<>+0x20(SB)/8, $0x0605040702010003
27 | DATA ·chacha_constants<>+0x28(SB)/8, $0x0E0D0C0F0A09080B
28 | GLOBL ·chacha_constants<>(SB), (NOPTR+RODATA), $48
29 |
30 | // func blocksAVX2(s *[api.StateSize]uint32, in, out []byte)
31 | TEXT ·blocksAVX2(SB), NOSPLIT, $576-56
32 | // This is Andrew Moon's AVX2 ChaCha implementation taken from
33 | // supercop-20171218, with some minor changes, primarily calling
34 | // convention and assembly dialect related.
35 |
36 | // Align the stack on a 64 byte boundary.
37 | MOVQ SP, BP
38 | ADDQ $64, BP
39 | ANDQ $-64, BP
40 |
41 | // Go calling convention -> SYSV AMD64 (and a fixup).
42 | MOVQ s+0(FP), DI // &s -> DI
43 | ADDQ $16, DI // Skip the ChaCha constants in the chachaState.
44 | MOVQ in+8(FP), SI // &in[0] -> SI
45 | MOVQ out+32(FP), DX // &out[0] -> DX
46 | MOVQ in_len+16(FP), CX // len(in) -> CX
47 |
48 | // Begin the main body of `chacha_blocks_avx2`.
49 | //
50 | // Mostly a direct translation except:
51 | // * The number of rounds is always 20.
52 | // * %rbp is used instead of %rsp.
53 | LEAQ ·chacha_constants<>(SB), AX
54 | VMOVDQU 0(AX), X8
55 | VMOVDQU 16(AX), X6
56 | VMOVDQU 32(AX), X7
57 | VMOVDQU 0(DI), X9
58 | VMOVDQU 16(DI), X10
59 | VMOVDQU 32(DI), X11
60 |
61 | // MOVQ 48(DI), AX
62 | MOVQ $1, R9
63 | VMOVDQA X8, 0(BP)
64 | VMOVDQA X9, 16(BP)
65 | VMOVDQA X10, 32(BP)
66 | VMOVDQA X11, 48(BP)
67 |
68 | // MOVQ AX, 64(BP)
69 | VMOVDQA X6, 448(BP)
70 | VMOVDQA X6, 464(BP)
71 | VMOVDQA X7, 480(BP)
72 | VMOVDQA X7, 496(BP)
73 | CMPQ CX, $512
74 | JAE chacha_blocks_avx2_atleast512
75 | CMPQ CX, $256
76 | JAE chacha_blocks_avx2_atleast256
77 | JMP chacha_blocks_avx2_below256
78 |
79 | chacha_blocks_avx2_atleast512:
80 | MOVQ 48(BP), AX
81 | LEAQ 1(AX), R8
82 | LEAQ 2(AX), R9
83 | LEAQ 3(AX), R10
84 | LEAQ 4(AX), BX
85 | LEAQ 5(AX), R11
86 | LEAQ 6(AX), R12
87 | LEAQ 7(AX), R13
88 | LEAQ 8(AX), R14
89 | MOVL AX, 128(BP)
90 | MOVL R8, 4+128(BP)
91 | MOVL R9, 8+128(BP)
92 | MOVL R10, 12+128(BP)
93 | MOVL BX, 16+128(BP)
94 | MOVL R11, 20+128(BP)
95 | MOVL R12, 24+128(BP)
96 | MOVL R13, 28+128(BP)
97 | SHRQ $32, AX
98 | SHRQ $32, R8
99 | SHRQ $32, R9
100 | SHRQ $32, R10
101 | SHRQ $32, BX
102 | SHRQ $32, R11
103 | SHRQ $32, R12
104 | SHRQ $32, R13
105 | MOVL AX, 160(BP)
106 | MOVL R8, 4+160(BP)
107 | MOVL R9, 8+160(BP)
108 | MOVL R10, 12+160(BP)
109 | MOVL BX, 16+160(BP)
110 | MOVL R11, 20+160(BP)
111 | MOVL R12, 24+160(BP)
112 | MOVL R13, 28+160(BP)
113 | MOVQ R14, 48(BP)
114 |
115 | // MOVQ 64(BP), AX
116 | MOVQ $20, AX
117 | VPBROADCASTD 0(BP), Y0
118 | VPBROADCASTD 4+0(BP), Y1
119 | VPBROADCASTD 8+0(BP), Y2
120 | VPBROADCASTD 12+0(BP), Y3
121 | VPBROADCASTD 16(BP), Y4
122 | VPBROADCASTD 4+16(BP), Y5
123 | VPBROADCASTD 8+16(BP), Y6
124 | VPBROADCASTD 12+16(BP), Y7
125 | VPBROADCASTD 32(BP), Y8
126 | VPBROADCASTD 4+32(BP), Y9
127 | VPBROADCASTD 8+32(BP), Y10
128 | VPBROADCASTD 12+32(BP), Y11
129 | VPBROADCASTD 8+48(BP), Y14
130 | VPBROADCASTD 12+48(BP), Y15
131 | VMOVDQA 128(BP), Y12
132 | VMOVDQA 160(BP), Y13
133 |
134 | chacha_blocks_avx2_mainloop1:
135 | VPADDD Y0, Y4, Y0
136 | VPADDD Y1, Y5, Y1
137 | VPXOR Y12, Y0, Y12
138 | VPXOR Y13, Y1, Y13
139 | VPADDD Y2, Y6, Y2
140 | VPADDD Y3, Y7, Y3
141 | VPXOR Y14, Y2, Y14
142 | VPXOR Y15, Y3, Y15
143 | VPSHUFB 448(BP), Y12, Y12
144 | VPSHUFB 448(BP), Y13, Y13
145 | VPADDD Y8, Y12, Y8
146 | VPADDD Y9, Y13, Y9
147 | VPSHUFB 448(BP), Y14, Y14
148 | VPSHUFB 448(BP), Y15, Y15
149 | VPADDD Y10, Y14, Y10
150 | VPADDD Y11, Y15, Y11
151 | VMOVDQA Y12, 96(BP)
152 | VPXOR Y4, Y8, Y4
153 | VPXOR Y5, Y9, Y5
154 | VPSLLD $ 12, Y4, Y12
155 | VPSRLD $20, Y4, Y4
156 | VPXOR Y4, Y12, Y4
157 | VPSLLD $ 12, Y5, Y12
158 | VPSRLD $20, Y5, Y5
159 | VPXOR Y5, Y12, Y5
160 | VPXOR Y6, Y10, Y6
161 | VPXOR Y7, Y11, Y7
162 | VPSLLD $ 12, Y6, Y12
163 | VPSRLD $20, Y6, Y6
164 | VPXOR Y6, Y12, Y6
165 | VPSLLD $ 12, Y7, Y12
166 | VPSRLD $20, Y7, Y7
167 | VPXOR Y7, Y12, Y7
168 | VPADDD Y0, Y4, Y0
169 | VPADDD Y1, Y5, Y1
170 | VPXOR 96(BP), Y0, Y12
171 | VPXOR Y13, Y1, Y13
172 | VPADDD Y2, Y6, Y2
173 | VPADDD Y3, Y7, Y3
174 | VPXOR Y14, Y2, Y14
175 | VPXOR Y15, Y3, Y15
176 | VPSHUFB 480(BP), Y12, Y12
177 | VPSHUFB 480(BP), Y13, Y13
178 | VPADDD Y8, Y12, Y8
179 | VPADDD Y9, Y13, Y9
180 | VPSHUFB 480(BP), Y14, Y14
181 | VPSHUFB 480(BP), Y15, Y15
182 | VPADDD Y10, Y14, Y10
183 | VPADDD Y11, Y15, Y11
184 | VMOVDQA Y12, 96(BP)
185 | VPXOR Y4, Y8, Y4
186 | VPXOR Y5, Y9, Y5
187 | VPSLLD $ 7, Y4, Y12
188 | VPSRLD $25, Y4, Y4
189 | VPXOR Y4, Y12, Y4
190 | VPSLLD $ 7, Y5, Y12
191 | VPSRLD $25, Y5, Y5
192 | VPXOR Y5, Y12, Y5
193 | VPXOR Y6, Y10, Y6
194 | VPXOR Y7, Y11, Y7
195 | VPSLLD $ 7, Y6, Y12
196 | VPSRLD $25, Y6, Y6
197 | VPXOR Y6, Y12, Y6
198 | VPSLLD $ 7, Y7, Y12
199 | VPSRLD $25, Y7, Y7
200 | VPXOR Y7, Y12, Y7
201 | VPADDD Y0, Y5, Y0
202 | VPADDD Y1, Y6, Y1
203 | VPXOR Y15, Y0, Y15
204 | VPXOR 96(BP), Y1, Y12
205 | VPADDD Y2, Y7, Y2
206 | VPADDD Y3, Y4, Y3
207 | VPXOR Y13, Y2, Y13
208 | VPXOR Y14, Y3, Y14
209 | VPSHUFB 448(BP), Y15, Y15
210 | VPSHUFB 448(BP), Y12, Y12
211 | VPADDD Y10, Y15, Y10
212 | VPADDD Y11, Y12, Y11
213 | VPSHUFB 448(BP), Y13, Y13
214 | VPSHUFB 448(BP), Y14, Y14
215 | VPADDD Y8, Y13, Y8
216 | VPADDD Y9, Y14, Y9
217 | VMOVDQA Y15, 96(BP)
218 | VPXOR Y5, Y10, Y5
219 | VPXOR Y6, Y11, Y6
220 | VPSLLD $ 12, Y5, Y15
221 | VPSRLD $20, Y5, Y5
222 | VPXOR Y5, Y15, Y5
223 | VPSLLD $ 12, Y6, Y15
224 | VPSRLD $20, Y6, Y6
225 | VPXOR Y6, Y15, Y6
226 | VPXOR Y7, Y8, Y7
227 | VPXOR Y4, Y9, Y4
228 | VPSLLD $ 12, Y7, Y15
229 | VPSRLD $20, Y7, Y7
230 | VPXOR Y7, Y15, Y7
231 | VPSLLD $ 12, Y4, Y15
232 | VPSRLD $20, Y4, Y4
233 | VPXOR Y4, Y15, Y4
234 | VPADDD Y0, Y5, Y0
235 | VPADDD Y1, Y6, Y1
236 | VPXOR 96(BP), Y0, Y15
237 | VPXOR Y12, Y1, Y12
238 | VPADDD Y2, Y7, Y2
239 | VPADDD Y3, Y4, Y3
240 | VPXOR Y13, Y2, Y13
241 | VPXOR Y14, Y3, Y14
242 | VPSHUFB 480(BP), Y15, Y15
243 | VPSHUFB 480(BP), Y12, Y12
244 | VPADDD Y10, Y15, Y10
245 | VPADDD Y11, Y12, Y11
246 | VPSHUFB 480(BP), Y13, Y13
247 | VPSHUFB 480(BP), Y14, Y14
248 | VPADDD Y8, Y13, Y8
249 | VPADDD Y9, Y14, Y9
250 | VMOVDQA Y15, 96(BP)
251 | VPXOR Y5, Y10, Y5
252 | VPXOR Y6, Y11, Y6
253 | VPSLLD $ 7, Y5, Y15
254 | VPSRLD $25, Y5, Y5
255 | VPXOR Y5, Y15, Y5
256 | VPSLLD $ 7, Y6, Y15
257 | VPSRLD $25, Y6, Y6
258 | VPXOR Y6, Y15, Y6
259 | VPXOR Y7, Y8, Y7
260 | VPXOR Y4, Y9, Y4
261 | VPSLLD $ 7, Y7, Y15
262 | VPSRLD $25, Y7, Y7
263 | VPXOR Y7, Y15, Y7
264 | VPSLLD $ 7, Y4, Y15
265 | VPSRLD $25, Y4, Y4
266 | VPXOR Y4, Y15, Y4
267 | VMOVDQA 96(BP), Y15
268 | SUBQ $2, AX
269 | JNZ chacha_blocks_avx2_mainloop1
270 | VMOVDQA Y8, 192(BP)
271 | VMOVDQA Y9, 224(BP)
272 | VMOVDQA Y10, 256(BP)
273 | VMOVDQA Y11, 288(BP)
274 | VMOVDQA Y12, 320(BP)
275 | VMOVDQA Y13, 352(BP)
276 | VMOVDQA Y14, 384(BP)
277 | VMOVDQA Y15, 416(BP)
278 | VPBROADCASTD 0(BP), Y8
279 | VPBROADCASTD 4+0(BP), Y9
280 | VPBROADCASTD 8+0(BP), Y10
281 | VPBROADCASTD 12+0(BP), Y11
282 | VPBROADCASTD 16(BP), Y12
283 | VPBROADCASTD 4+16(BP), Y13
284 | VPBROADCASTD 8+16(BP), Y14
285 | VPBROADCASTD 12+16(BP), Y15
286 | VPADDD Y8, Y0, Y0
287 | VPADDD Y9, Y1, Y1
288 | VPADDD Y10, Y2, Y2
289 | VPADDD Y11, Y3, Y3
290 | VPADDD Y12, Y4, Y4
291 | VPADDD Y13, Y5, Y5
292 | VPADDD Y14, Y6, Y6
293 | VPADDD Y15, Y7, Y7
294 | VPUNPCKLDQ Y1, Y0, Y8
295 | VPUNPCKLDQ Y3, Y2, Y9
296 | VPUNPCKHDQ Y1, Y0, Y12
297 | VPUNPCKHDQ Y3, Y2, Y13
298 | VPUNPCKLDQ Y5, Y4, Y10
299 | VPUNPCKLDQ Y7, Y6, Y11
300 | VPUNPCKHDQ Y5, Y4, Y14
301 | VPUNPCKHDQ Y7, Y6, Y15
302 | VPUNPCKLQDQ Y9, Y8, Y0
303 | VPUNPCKLQDQ Y11, Y10, Y1
304 | VPUNPCKHQDQ Y9, Y8, Y2
305 | VPUNPCKHQDQ Y11, Y10, Y3
306 | VPUNPCKLQDQ Y13, Y12, Y4
307 | VPUNPCKLQDQ Y15, Y14, Y5
308 | VPUNPCKHQDQ Y13, Y12, Y6
309 | VPUNPCKHQDQ Y15, Y14, Y7
310 | VPERM2I128 $0x20, Y1, Y0, Y8
311 | VPERM2I128 $0x20, Y3, Y2, Y9
312 | VPERM2I128 $0x31, Y1, Y0, Y12
313 | VPERM2I128 $0x31, Y3, Y2, Y13
314 | VPERM2I128 $0x20, Y5, Y4, Y10
315 | VPERM2I128 $0x20, Y7, Y6, Y11
316 | VPERM2I128 $0x31, Y5, Y4, Y14
317 | VPERM2I128 $0x31, Y7, Y6, Y15
318 | ANDQ SI, SI
319 | JZ chacha_blocks_avx2_noinput1
320 | VPXOR 0(SI), Y8, Y8
321 | VPXOR 64(SI), Y9, Y9
322 | VPXOR 128(SI), Y10, Y10
323 | VPXOR 192(SI), Y11, Y11
324 | VPXOR 256(SI), Y12, Y12
325 | VPXOR 320(SI), Y13, Y13
326 | VPXOR 384(SI), Y14, Y14
327 | VPXOR 448(SI), Y15, Y15
328 | VMOVDQU Y8, 0(DX)
329 | VMOVDQU Y9, 64(DX)
330 | VMOVDQU Y10, 128(DX)
331 | VMOVDQU Y11, 192(DX)
332 | VMOVDQU Y12, 256(DX)
333 | VMOVDQU Y13, 320(DX)
334 | VMOVDQU Y14, 384(DX)
335 | VMOVDQU Y15, 448(DX)
336 | VMOVDQA 192(BP), Y0
337 | VMOVDQA 224(BP), Y1
338 | VMOVDQA 256(BP), Y2
339 | VMOVDQA 288(BP), Y3
340 | VMOVDQA 320(BP), Y4
341 | VMOVDQA 352(BP), Y5
342 | VMOVDQA 384(BP), Y6
343 | VMOVDQA 416(BP), Y7
344 | VPBROADCASTD 32(BP), Y8
345 | VPBROADCASTD 4+32(BP), Y9
346 | VPBROADCASTD 8+32(BP), Y10
347 | VPBROADCASTD 12+32(BP), Y11
348 | VMOVDQA 128(BP), Y12
349 | VMOVDQA 160(BP), Y13
350 | VPBROADCASTD 8+48(BP), Y14
351 | VPBROADCASTD 12+48(BP), Y15
352 | VPADDD Y8, Y0, Y0
353 | VPADDD Y9, Y1, Y1
354 | VPADDD Y10, Y2, Y2
355 | VPADDD Y11, Y3, Y3
356 | VPADDD Y12, Y4, Y4
357 | VPADDD Y13, Y5, Y5
358 | VPADDD Y14, Y6, Y6
359 | VPADDD Y15, Y7, Y7
360 | VPUNPCKLDQ Y1, Y0, Y8
361 | VPUNPCKLDQ Y3, Y2, Y9
362 | VPUNPCKHDQ Y1, Y0, Y12
363 | VPUNPCKHDQ Y3, Y2, Y13
364 | VPUNPCKLDQ Y5, Y4, Y10
365 | VPUNPCKLDQ Y7, Y6, Y11
366 | VPUNPCKHDQ Y5, Y4, Y14
367 | VPUNPCKHDQ Y7, Y6, Y15
368 | VPUNPCKLQDQ Y9, Y8, Y0
369 | VPUNPCKLQDQ Y11, Y10, Y1
370 | VPUNPCKHQDQ Y9, Y8, Y2
371 | VPUNPCKHQDQ Y11, Y10, Y3
372 | VPUNPCKLQDQ Y13, Y12, Y4
373 | VPUNPCKLQDQ Y15, Y14, Y5
374 | VPUNPCKHQDQ Y13, Y12, Y6
375 | VPUNPCKHQDQ Y15, Y14, Y7
376 | VPERM2I128 $0x20, Y1, Y0, Y8
377 | VPERM2I128 $0x20, Y3, Y2, Y9
378 | VPERM2I128 $0x31, Y1, Y0, Y12
379 | VPERM2I128 $0x31, Y3, Y2, Y13
380 | VPERM2I128 $0x20, Y5, Y4, Y10
381 | VPERM2I128 $0x20, Y7, Y6, Y11
382 | VPERM2I128 $0x31, Y5, Y4, Y14
383 | VPERM2I128 $0x31, Y7, Y6, Y15
384 | VPXOR 32(SI), Y8, Y8
385 | VPXOR 96(SI), Y9, Y9
386 | VPXOR 160(SI), Y10, Y10
387 | VPXOR 224(SI), Y11, Y11
388 | VPXOR 288(SI), Y12, Y12
389 | VPXOR 352(SI), Y13, Y13
390 | VPXOR 416(SI), Y14, Y14
391 | VPXOR 480(SI), Y15, Y15
392 | VMOVDQU Y8, 32(DX)
393 | VMOVDQU Y9, 96(DX)
394 | VMOVDQU Y10, 160(DX)
395 | VMOVDQU Y11, 224(DX)
396 | VMOVDQU Y12, 288(DX)
397 | VMOVDQU Y13, 352(DX)
398 | VMOVDQU Y14, 416(DX)
399 | VMOVDQU Y15, 480(DX)
400 | ADDQ $512, SI
401 | JMP chacha_blocks_avx2_mainloop1_cont
402 |
403 | chacha_blocks_avx2_noinput1:
404 | VMOVDQU Y8, 0(DX)
405 | VMOVDQU Y9, 64(DX)
406 | VMOVDQU Y10, 128(DX)
407 | VMOVDQU Y11, 192(DX)
408 | VMOVDQU Y12, 256(DX)
409 | VMOVDQU Y13, 320(DX)
410 | VMOVDQU Y14, 384(DX)
411 | VMOVDQU Y15, 448(DX)
412 | VMOVDQA 192(BP), Y0
413 | VMOVDQA 224(BP), Y1
414 | VMOVDQA 256(BP), Y2
415 | VMOVDQA 288(BP), Y3
416 | VMOVDQA 320(BP), Y4
417 | VMOVDQA 352(BP), Y5
418 | VMOVDQA 384(BP), Y6
419 | VMOVDQA 416(BP), Y7
420 | VPBROADCASTD 32(BP), Y8
421 | VPBROADCASTD 4+32(BP), Y9
422 | VPBROADCASTD 8+32(BP), Y10
423 | VPBROADCASTD 12+32(BP), Y11
424 | VMOVDQA 128(BP), Y12
425 | VMOVDQA 160(BP), Y13
426 | VPBROADCASTD 8+48(BP), Y14
427 | VPBROADCASTD 12+48(BP), Y15
428 | VPADDD Y8, Y0, Y0
429 | VPADDD Y9, Y1, Y1
430 | VPADDD Y10, Y2, Y2
431 | VPADDD Y11, Y3, Y3
432 | VPADDD Y12, Y4, Y4
433 | VPADDD Y13, Y5, Y5
434 | VPADDD Y14, Y6, Y6
435 | VPADDD Y15, Y7, Y7
436 | VPUNPCKLDQ Y1, Y0, Y8
437 | VPUNPCKLDQ Y3, Y2, Y9
438 | VPUNPCKHDQ Y1, Y0, Y12
439 | VPUNPCKHDQ Y3, Y2, Y13
440 | VPUNPCKLDQ Y5, Y4, Y10
441 | VPUNPCKLDQ Y7, Y6, Y11
442 | VPUNPCKHDQ Y5, Y4, Y14
443 | VPUNPCKHDQ Y7, Y6, Y15
444 | VPUNPCKLQDQ Y9, Y8, Y0
445 | VPUNPCKLQDQ Y11, Y10, Y1
446 | VPUNPCKHQDQ Y9, Y8, Y2
447 | VPUNPCKHQDQ Y11, Y10, Y3
448 | VPUNPCKLQDQ Y13, Y12, Y4
449 | VPUNPCKLQDQ Y15, Y14, Y5
450 | VPUNPCKHQDQ Y13, Y12, Y6
451 | VPUNPCKHQDQ Y15, Y14, Y7
452 | VPERM2I128 $0x20, Y1, Y0, Y8
453 | VPERM2I128 $0x20, Y3, Y2, Y9
454 | VPERM2I128 $0x31, Y1, Y0, Y12
455 | VPERM2I128 $0x31, Y3, Y2, Y13
456 | VPERM2I128 $0x20, Y5, Y4, Y10
457 | VPERM2I128 $0x20, Y7, Y6, Y11
458 | VPERM2I128 $0x31, Y5, Y4, Y14
459 | VPERM2I128 $0x31, Y7, Y6, Y15
460 | VMOVDQU Y8, 32(DX)
461 | VMOVDQU Y9, 96(DX)
462 | VMOVDQU Y10, 160(DX)
463 | VMOVDQU Y11, 224(DX)
464 | VMOVDQU Y12, 288(DX)
465 | VMOVDQU Y13, 352(DX)
466 | VMOVDQU Y14, 416(DX)
467 | VMOVDQU Y15, 480(DX)
468 |
469 | chacha_blocks_avx2_mainloop1_cont:
470 | ADDQ $512, DX
471 | SUBQ $512, CX
472 | CMPQ CX, $512
473 | JAE chacha_blocks_avx2_atleast512
474 | CMPQ CX, $256
475 | JB chacha_blocks_avx2_below256_fixup
476 |
477 | chacha_blocks_avx2_atleast256:
478 | MOVQ 48(BP), AX
479 | LEAQ 1(AX), R8
480 | LEAQ 2(AX), R9
481 | LEAQ 3(AX), R10
482 | LEAQ 4(AX), BX
483 | MOVL AX, 128(BP)
484 | MOVL R8, 4+128(BP)
485 | MOVL R9, 8+128(BP)
486 | MOVL R10, 12+128(BP)
487 | SHRQ $32, AX
488 | SHRQ $32, R8
489 | SHRQ $32, R9
490 | SHRQ $32, R10
491 | MOVL AX, 160(BP)
492 | MOVL R8, 4+160(BP)
493 | MOVL R9, 8+160(BP)
494 | MOVL R10, 12+160(BP)
495 | MOVQ BX, 48(BP)
496 |
497 | // MOVQ 64(BP), AX
498 | MOVQ $20, AX
499 | VPBROADCASTD 0(BP), X0
500 | VPBROADCASTD 4+0(BP), X1
501 | VPBROADCASTD 8+0(BP), X2
502 | VPBROADCASTD 12+0(BP), X3
503 | VPBROADCASTD 16(BP), X4
504 | VPBROADCASTD 4+16(BP), X5
505 | VPBROADCASTD 8+16(BP), X6
506 | VPBROADCASTD 12+16(BP), X7
507 | VPBROADCASTD 32(BP), X8
508 | VPBROADCASTD 4+32(BP), X9
509 | VPBROADCASTD 8+32(BP), X10
510 | VPBROADCASTD 12+32(BP), X11
511 | VMOVDQA 128(BP), X12
512 | VMOVDQA 160(BP), X13
513 | VPBROADCASTD 8+48(BP), X14
514 | VPBROADCASTD 12+48(BP), X15
515 |
516 | chacha_blocks_avx2_mainloop2:
517 | VPADDD X0, X4, X0
518 | VPADDD X1, X5, X1
519 | VPXOR X12, X0, X12
520 | VPXOR X13, X1, X13
521 | VPADDD X2, X6, X2
522 | VPADDD X3, X7, X3
523 | VPXOR X14, X2, X14
524 | VPXOR X15, X3, X15
525 | VPSHUFB 448(BP), X12, X12
526 | VPSHUFB 448(BP), X13, X13
527 | VPADDD X8, X12, X8
528 | VPADDD X9, X13, X9
529 | VPSHUFB 448(BP), X14, X14
530 | VPSHUFB 448(BP), X15, X15
531 | VPADDD X10, X14, X10
532 | VPADDD X11, X15, X11
533 | VMOVDQA X12, 96(BP)
534 | VPXOR X4, X8, X4
535 | VPXOR X5, X9, X5
536 | VPSLLD $ 12, X4, X12
537 | VPSRLD $20, X4, X4
538 | VPXOR X4, X12, X4
539 | VPSLLD $ 12, X5, X12
540 | VPSRLD $20, X5, X5
541 | VPXOR X5, X12, X5
542 | VPXOR X6, X10, X6
543 | VPXOR X7, X11, X7
544 | VPSLLD $ 12, X6, X12
545 | VPSRLD $20, X6, X6
546 | VPXOR X6, X12, X6
547 | VPSLLD $ 12, X7, X12
548 | VPSRLD $20, X7, X7
549 | VPXOR X7, X12, X7
550 | VPADDD X0, X4, X0
551 | VPADDD X1, X5, X1
552 | VPXOR 96(BP), X0, X12
553 | VPXOR X13, X1, X13
554 | VPADDD X2, X6, X2
555 | VPADDD X3, X7, X3
556 | VPXOR X14, X2, X14
557 | VPXOR X15, X3, X15
558 | VPSHUFB 480(BP), X12, X12
559 | VPSHUFB 480(BP), X13, X13
560 | VPADDD X8, X12, X8
561 | VPADDD X9, X13, X9
562 | VPSHUFB 480(BP), X14, X14
563 | VPSHUFB 480(BP), X15, X15
564 | VPADDD X10, X14, X10
565 | VPADDD X11, X15, X11
566 | VMOVDQA X12, 96(BP)
567 | VPXOR X4, X8, X4
568 | VPXOR X5, X9, X5
569 | VPSLLD $ 7, X4, X12
570 | VPSRLD $25, X4, X4
571 | VPXOR X4, X12, X4
572 | VPSLLD $ 7, X5, X12
573 | VPSRLD $25, X5, X5
574 | VPXOR X5, X12, X5
575 | VPXOR X6, X10, X6
576 | VPXOR X7, X11, X7
577 | VPSLLD $ 7, X6, X12
578 | VPSRLD $25, X6, X6
579 | VPXOR X6, X12, X6
580 | VPSLLD $ 7, X7, X12
581 | VPSRLD $25, X7, X7
582 | VPXOR X7, X12, X7
583 | VPADDD X0, X5, X0
584 | VPADDD X1, X6, X1
585 | VPXOR X15, X0, X15
586 | VPXOR 96(BP), X1, X12
587 | VPADDD X2, X7, X2
588 | VPADDD X3, X4, X3
589 | VPXOR X13, X2, X13
590 | VPXOR X14, X3, X14
591 | VPSHUFB 448(BP), X15, X15
592 | VPSHUFB 448(BP), X12, X12
593 | VPADDD X10, X15, X10
594 | VPADDD X11, X12, X11
595 | VPSHUFB 448(BP), X13, X13
596 | VPSHUFB 448(BP), X14, X14
597 | VPADDD X8, X13, X8
598 | VPADDD X9, X14, X9
599 | VMOVDQA X15, 96(BP)
600 | VPXOR X5, X10, X5
601 | VPXOR X6, X11, X6
602 | VPSLLD $ 12, X5, X15
603 | VPSRLD $20, X5, X5
604 | VPXOR X5, X15, X5
605 | VPSLLD $ 12, X6, X15
606 | VPSRLD $20, X6, X6
607 | VPXOR X6, X15, X6
608 | VPXOR X7, X8, X7
609 | VPXOR X4, X9, X4
610 | VPSLLD $ 12, X7, X15
611 | VPSRLD $20, X7, X7
612 | VPXOR X7, X15, X7
613 | VPSLLD $ 12, X4, X15
614 | VPSRLD $20, X4, X4
615 | VPXOR X4, X15, X4
616 | VPADDD X0, X5, X0
617 | VPADDD X1, X6, X1
618 | VPXOR 96(BP), X0, X15
619 | VPXOR X12, X1, X12
620 | VPADDD X2, X7, X2
621 | VPADDD X3, X4, X3
622 | VPXOR X13, X2, X13
623 | VPXOR X14, X3, X14
624 | VPSHUFB 480(BP), X15, X15
625 | VPSHUFB 480(BP), X12, X12
626 | VPADDD X10, X15, X10
627 | VPADDD X11, X12, X11
628 | VPSHUFB 480(BP), X13, X13
629 | VPSHUFB 480(BP), X14, X14
630 | VPADDD X8, X13, X8
631 | VPADDD X9, X14, X9
632 | VMOVDQA X15, 96(BP)
633 | VPXOR X5, X10, X5
634 | VPXOR X6, X11, X6
635 | VPSLLD $ 7, X5, X15
636 | VPSRLD $25, X5, X5
637 | VPXOR X5, X15, X5
638 | VPSLLD $ 7, X6, X15
639 | VPSRLD $25, X6, X6
640 | VPXOR X6, X15, X6
641 | VPXOR X7, X8, X7
642 | VPXOR X4, X9, X4
643 | VPSLLD $ 7, X7, X15
644 | VPSRLD $25, X7, X7
645 | VPXOR X7, X15, X7
646 | VPSLLD $ 7, X4, X15
647 | VPSRLD $25, X4, X4
648 | VPXOR X4, X15, X4
649 | VMOVDQA 96(BP), X15
650 | SUBQ $2, AX
651 | JNZ chacha_blocks_avx2_mainloop2
652 | VMOVDQA X8, 192(BP)
653 | VMOVDQA X9, 208(BP)
654 | VMOVDQA X10, 224(BP)
655 | VMOVDQA X11, 240(BP)
656 | VMOVDQA X12, 256(BP)
657 | VMOVDQA X13, 272(BP)
658 | VMOVDQA X14, 288(BP)
659 | VMOVDQA X15, 304(BP)
660 | VPBROADCASTD 0(BP), X8
661 | VPBROADCASTD 4+0(BP), X9
662 | VPBROADCASTD 8+0(BP), X10
663 | VPBROADCASTD 12+0(BP), X11
664 | VPBROADCASTD 16(BP), X12
665 | VPBROADCASTD 4+16(BP), X13
666 | VPBROADCASTD 8+16(BP), X14
667 | VPBROADCASTD 12+16(BP), X15
668 | VPADDD X8, X0, X0
669 | VPADDD X9, X1, X1
670 | VPADDD X10, X2, X2
671 | VPADDD X11, X3, X3
672 | VPADDD X12, X4, X4
673 | VPADDD X13, X5, X5
674 | VPADDD X14, X6, X6
675 | VPADDD X15, X7, X7
676 | VPUNPCKLDQ X1, X0, X8
677 | VPUNPCKLDQ X3, X2, X9
678 | VPUNPCKHDQ X1, X0, X12
679 | VPUNPCKHDQ X3, X2, X13
680 | VPUNPCKLDQ X5, X4, X10
681 | VPUNPCKLDQ X7, X6, X11
682 | VPUNPCKHDQ X5, X4, X14
683 | VPUNPCKHDQ X7, X6, X15
684 | VPUNPCKLQDQ X9, X8, X0
685 | VPUNPCKLQDQ X11, X10, X1
686 | VPUNPCKHQDQ X9, X8, X2
687 | VPUNPCKHQDQ X11, X10, X3
688 | VPUNPCKLQDQ X13, X12, X4
689 | VPUNPCKLQDQ X15, X14, X5
690 | VPUNPCKHQDQ X13, X12, X6
691 | VPUNPCKHQDQ X15, X14, X7
692 | ANDQ SI, SI
693 | JZ chacha_blocks_avx2_noinput2
694 | VPXOR 0(SI), X0, X0
695 | VPXOR 16(SI), X1, X1
696 | VPXOR 64(SI), X2, X2
697 | VPXOR 80(SI), X3, X3
698 | VPXOR 128(SI), X4, X4
699 | VPXOR 144(SI), X5, X5
700 | VPXOR 192(SI), X6, X6
701 | VPXOR 208(SI), X7, X7
702 | VMOVDQU X0, 0(DX)
703 | VMOVDQU X1, 16(DX)
704 | VMOVDQU X2, 64(DX)
705 | VMOVDQU X3, 80(DX)
706 | VMOVDQU X4, 128(DX)
707 | VMOVDQU X5, 144(DX)
708 | VMOVDQU X6, 192(DX)
709 | VMOVDQU X7, 208(DX)
710 | VMOVDQA 192(BP), X0
711 | VMOVDQA 208(BP), X1
712 | VMOVDQA 224(BP), X2
713 | VMOVDQA 240(BP), X3
714 | VMOVDQA 256(BP), X4
715 | VMOVDQA 272(BP), X5
716 | VMOVDQA 288(BP), X6
717 | VMOVDQA 304(BP), X7
718 | VPBROADCASTD 32(BP), X8
719 | VPBROADCASTD 4+32(BP), X9
720 | VPBROADCASTD 8+32(BP), X10
721 | VPBROADCASTD 12+32(BP), X11
722 | VMOVDQA 128(BP), X12
723 | VMOVDQA 160(BP), X13
724 | VPBROADCASTD 8+48(BP), X14
725 | VPBROADCASTD 12+48(BP), X15
726 | VPADDD X8, X0, X0
727 | VPADDD X9, X1, X1
728 | VPADDD X10, X2, X2
729 | VPADDD X11, X3, X3
730 | VPADDD X12, X4, X4
731 | VPADDD X13, X5, X5
732 | VPADDD X14, X6, X6
733 | VPADDD X15, X7, X7
734 | VPUNPCKLDQ X1, X0, X8
735 | VPUNPCKLDQ X3, X2, X9
736 | VPUNPCKHDQ X1, X0, X12
737 | VPUNPCKHDQ X3, X2, X13
738 | VPUNPCKLDQ X5, X4, X10
739 | VPUNPCKLDQ X7, X6, X11
740 | VPUNPCKHDQ X5, X4, X14
741 | VPUNPCKHDQ X7, X6, X15
742 | VPUNPCKLQDQ X9, X8, X0
743 | VPUNPCKLQDQ X11, X10, X1
744 | VPUNPCKHQDQ X9, X8, X2
745 | VPUNPCKHQDQ X11, X10, X3
746 | VPUNPCKLQDQ X13, X12, X4
747 | VPUNPCKLQDQ X15, X14, X5
748 | VPUNPCKHQDQ X13, X12, X6
749 | VPUNPCKHQDQ X15, X14, X7
750 | VPXOR 32(SI), X0, X0
751 | VPXOR 48(SI), X1, X1
752 | VPXOR 96(SI), X2, X2
753 | VPXOR 112(SI), X3, X3
754 | VPXOR 160(SI), X4, X4
755 | VPXOR 176(SI), X5, X5
756 | VPXOR 224(SI), X6, X6
757 | VPXOR 240(SI), X7, X7
758 | VMOVDQU X0, 32(DX)
759 | VMOVDQU X1, 48(DX)
760 | VMOVDQU X2, 96(DX)
761 | VMOVDQU X3, 112(DX)
762 | VMOVDQU X4, 160(DX)
763 | VMOVDQU X5, 176(DX)
764 | VMOVDQU X6, 224(DX)
765 | VMOVDQU X7, 240(DX)
766 | ADDQ $256, SI
767 | JMP chacha_blocks_avx2_mainloop2_cont
768 |
769 | chacha_blocks_avx2_noinput2:
770 | VMOVDQU X0, 0(DX)
771 | VMOVDQU X1, 16(DX)
772 | VMOVDQU X2, 64(DX)
773 | VMOVDQU X3, 80(DX)
774 | VMOVDQU X4, 128(DX)
775 | VMOVDQU X5, 144(DX)
776 | VMOVDQU X6, 192(DX)
777 | VMOVDQU X7, 208(DX)
778 | VMOVDQA 192(BP), X0
779 | VMOVDQA 208(BP), X1
780 | VMOVDQA 224(BP), X2
781 | VMOVDQA 240(BP), X3
782 | VMOVDQA 256(BP), X4
783 | VMOVDQA 272(BP), X5
784 | VMOVDQA 288(BP), X6
785 | VMOVDQA 304(BP), X7
786 | VPBROADCASTD 32(BP), X8
787 | VPBROADCASTD 4+32(BP), X9
788 | VPBROADCASTD 8+32(BP), X10
789 | VPBROADCASTD 12+32(BP), X11
790 | VMOVDQA 128(BP), X12
791 | VMOVDQA 160(BP), X13
792 | VPBROADCASTD 8+48(BP), X14
793 | VPBROADCASTD 12+48(BP), X15
794 | VPADDD X8, X0, X0
795 | VPADDD X9, X1, X1
796 | VPADDD X10, X2, X2
797 | VPADDD X11, X3, X3
798 | VPADDD X12, X4, X4
799 | VPADDD X13, X5, X5
800 | VPADDD X14, X6, X6
801 | VPADDD X15, X7, X7
802 | VPUNPCKLDQ X1, X0, X8
803 | VPUNPCKLDQ X3, X2, X9
804 | VPUNPCKHDQ X1, X0, X12
805 | VPUNPCKHDQ X3, X2, X13
806 | VPUNPCKLDQ X5, X4, X10
807 | VPUNPCKLDQ X7, X6, X11
808 | VPUNPCKHDQ X5, X4, X14
809 | VPUNPCKHDQ X7, X6, X15
810 | VPUNPCKLQDQ X9, X8, X0
811 | VPUNPCKLQDQ X11, X10, X1
812 | VPUNPCKHQDQ X9, X8, X2
813 | VPUNPCKHQDQ X11, X10, X3
814 | VPUNPCKLQDQ X13, X12, X4
815 | VPUNPCKLQDQ X15, X14, X5
816 | VPUNPCKHQDQ X13, X12, X6
817 | VPUNPCKHQDQ X15, X14, X7
818 | VMOVDQU X0, 32(DX)
819 | VMOVDQU X1, 48(DX)
820 | VMOVDQU X2, 96(DX)
821 | VMOVDQU X3, 112(DX)
822 | VMOVDQU X4, 160(DX)
823 | VMOVDQU X5, 176(DX)
824 | VMOVDQU X6, 224(DX)
825 | VMOVDQU X7, 240(DX)
826 |
827 | chacha_blocks_avx2_mainloop2_cont:
828 | ADDQ $256, DX
829 | SUBQ $256, CX
830 | CMPQ CX, $256
831 | JAE chacha_blocks_avx2_atleast256
832 |
833 | chacha_blocks_avx2_below256_fixup:
834 | VMOVDQA 448(BP), X6
835 | VMOVDQA 480(BP), X7
836 | VMOVDQA 0(BP), X8
837 | VMOVDQA 16(BP), X9
838 | VMOVDQA 32(BP), X10
839 | VMOVDQA 48(BP), X11
840 | MOVQ $1, R9
841 |
842 | chacha_blocks_avx2_below256:
843 | VMOVQ R9, X5
844 | ANDQ CX, CX
845 | JZ chacha_blocks_avx2_done
846 | CMPQ CX, $64
847 | JAE chacha_blocks_avx2_above63
848 | MOVQ DX, R9
849 | ANDQ SI, SI
850 | JZ chacha_blocks_avx2_noinput3
851 | MOVQ CX, R10
852 | MOVQ BP, DX
853 | ADDQ R10, SI
854 | ADDQ R10, DX
855 | NEGQ R10
856 |
857 | chacha_blocks_avx2_copyinput:
858 | MOVB (SI)(R10*1), AX
859 | MOVB AX, (DX)(R10*1)
860 | INCQ R10
861 | JNZ chacha_blocks_avx2_copyinput
862 | MOVQ BP, SI
863 |
864 | chacha_blocks_avx2_noinput3:
865 | MOVQ BP, DX
866 |
867 | chacha_blocks_avx2_above63:
868 | VMOVDQA X8, X0
869 | VMOVDQA X9, X1
870 | VMOVDQA X10, X2
871 | VMOVDQA X11, X3
872 |
873 | // MOVQ 64(BP), AX
874 | MOVQ $20, AX
875 |
876 | chacha_blocks_avx2_mainloop3:
877 | VPADDD X0, X1, X0
878 | VPXOR X3, X0, X3
879 | VPSHUFB X6, X3, X3
880 | VPADDD X2, X3, X2
881 | VPXOR X1, X2, X1
882 | VPSLLD $12, X1, X4
883 | VPSRLD $20, X1, X1
884 | VPXOR X1, X4, X1
885 | VPADDD X0, X1, X0
886 | VPXOR X3, X0, X3
887 | VPSHUFB X7, X3, X3
888 | VPSHUFD $0x93, X0, X0
889 | VPADDD X2, X3, X2
890 | VPSHUFD $0x4e, X3, X3
891 | VPXOR X1, X2, X1
892 | VPSHUFD $0x39, X2, X2
893 | VPSLLD $7, X1, X4
894 | VPSRLD $25, X1, X1
895 | VPXOR X1, X4, X1
896 | VPADDD X0, X1, X0
897 | VPXOR X3, X0, X3
898 | VPSHUFB X6, X3, X3
899 | VPADDD X2, X3, X2
900 | VPXOR X1, X2, X1
901 | VPSLLD $12, X1, X4
902 | VPSRLD $20, X1, X1
903 | VPXOR X1, X4, X1
904 | VPADDD X0, X1, X0
905 | VPXOR X3, X0, X3
906 | VPSHUFB X7, X3, X3
907 | VPSHUFD $0x39, X0, X0
908 | VPADDD X2, X3, X2
909 | VPSHUFD $0x4e, X3, X3
910 | VPXOR X1, X2, X1
911 | VPSHUFD $0x93, X2, X2
912 | VPSLLD $7, X1, X4
913 | VPSRLD $25, X1, X1
914 | VPXOR X1, X4, X1
915 | SUBQ $2, AX
916 | JNZ chacha_blocks_avx2_mainloop3
917 | VPADDD X0, X8, X0
918 | VPADDD X1, X9, X1
919 | VPADDD X2, X10, X2
920 | VPADDD X3, X11, X3
921 | ANDQ SI, SI
922 | JZ chacha_blocks_avx2_noinput4
923 | VPXOR 0(SI), X0, X0
924 | VPXOR 16(SI), X1, X1
925 | VPXOR 32(SI), X2, X2
926 | VPXOR 48(SI), X3, X3
927 | ADDQ $64, SI
928 |
929 | chacha_blocks_avx2_noinput4:
930 | VMOVDQU X0, 0(DX)
931 | VMOVDQU X1, 16(DX)
932 | VMOVDQU X2, 32(DX)
933 | VMOVDQU X3, 48(DX)
934 | VPADDQ X11, X5, X11
935 | CMPQ CX, $64
936 | JBE chacha_blocks_avx2_mainloop3_finishup
937 | ADDQ $64, DX
938 | SUBQ $64, CX
939 | JMP chacha_blocks_avx2_below256
940 |
941 | chacha_blocks_avx2_mainloop3_finishup:
942 | CMPQ CX, $64
943 | JE chacha_blocks_avx2_done
944 | ADDQ CX, R9
945 | ADDQ CX, DX
946 | NEGQ CX
947 |
948 | chacha_blocks_avx2_copyoutput:
949 | MOVB (DX)(CX*1), AX
950 | MOVB AX, (R9)(CX*1)
951 | INCQ CX
952 | JNZ chacha_blocks_avx2_copyoutput
953 |
954 | chacha_blocks_avx2_done:
955 | VMOVDQU X11, 32(DI)
956 |
957 | VZEROUPPER
958 | RET
959 |
960 | // func hChaChaAVX2(key, nonce []byte, dst *byte)
961 | TEXT ·hChaChaAVX2(SB), NOSPLIT|NOFRAME, $0-56
962 | MOVQ key+0(FP), DI
963 | MOVQ nonce+24(FP), SI
964 | MOVQ dst+48(FP), DX
965 |
966 | MOVL $20, CX
967 |
968 | LEAQ ·chacha_constants<>(SB), AX
969 | VMOVDQA 0(AX), X0
970 | VMOVDQA 16(AX), X6
971 | VMOVDQA 32(AX), X5
972 |
973 | VMOVDQU 0(DI), X1
974 | VMOVDQU 16(DI), X2
975 | VMOVDQU 0(SI), X3
976 |
977 | hhacha_mainloop_avx2:
978 | VPADDD X0, X1, X0
979 | VPXOR X3, X0, X3
980 | VPSHUFB X6, X3, X3
981 | VPADDD X2, X3, X2
982 | VPXOR X1, X2, X1
983 | VPSLLD $12, X1, X4
984 | VPSRLD $20, X1, X1
985 | VPXOR X1, X4, X1
986 | VPADDD X0, X1, X0
987 | VPXOR X3, X0, X3
988 | VPSHUFB X5, X3, X3
989 | VPADDD X2, X3, X2
990 | VPXOR X1, X2, X1
991 | VPSLLD $7, X1, X4
992 | VPSRLD $25, X1, X1
993 | VPSHUFD $0x93, X0, X0
994 | VPXOR X1, X4, X1
995 | VPSHUFD $0x4e, X3, X3
996 | VPADDD X0, X1, X0
997 | VPXOR X3, X0, X3
998 | VPSHUFB X6, X3, X3
999 | VPSHUFD $0x39, X2, X2
1000 | VPADDD X2, X3, X2
1001 | VPXOR X1, X2, X1
1002 | VPSLLD $12, X1, X4
1003 | VPSRLD $20, X1, X1
1004 | VPXOR X1, X4, X1
1005 | VPADDD X0, X1, X0
1006 | VPXOR X3, X0, X3
1007 | VPSHUFB X5, X3, X3
1008 | VPADDD X2, X3, X2
1009 | VPXOR X1, X2, X1
1010 | VPSHUFD $0x39, X0, X0
1011 | VPSLLD $7, X1, X4
1012 | VPSHUFD $0x4e, X3, X3
1013 | VPSRLD $25, X1, X1
1014 | VPSHUFD $0x93, X2, X2
1015 | VPXOR X1, X4, X1
1016 | SUBL $2, CX
1017 | JNE hhacha_mainloop_avx2
1018 |
1019 | VMOVDQU X0, (DX)
1020 | VMOVDQU X3, 16(DX)
1021 |
1022 | VZEROUPPER
1023 | RET
1024 |
1025 | // func blocksSSSE3(s *[api.StateSize]uint32, in, out []byte)
1026 | TEXT ·blocksSSSE3(SB), NOSPLIT, $576-56
1027 | // This is Andrew Moon's SSSE3 ChaCha implementation taken from
1028 | // supercop-20190110, with some minor changes, primarily calling
1029 | // convention and assembly dialect related.
1030 |
1031 | // Align the stack on a 64 byte boundary.
1032 | MOVQ SP, BP
1033 | ADDQ $64, BP
1034 | ANDQ $-64, BP
1035 |
1036 | // Go calling convention -> SYSV AMD64 (and a fixup).
1037 | MOVQ s+0(FP), DI // &s -> DI
1038 | ADDQ $16, DI // Skip the ChaCha constants in the chachaState.
1039 | MOVQ in+8(FP), SI // &in[0] -> SI
1040 | MOVQ out+32(FP), DX // &out[0] -> DX
1041 | MOVQ in_len+16(FP), CX // len(in) -> CX
1042 |
1043 | // Begin the main body of `chacha_blocks_ssse3`.
1044 | //
1045 | // Mostly a direct translation except:
1046 | // * The number of rounds is always 20.
1047 | // * %rbp is used instead of BP.
1048 | LEAQ ·chacha_constants<>(SB), AX
1049 | MOVO 0(AX), X8
1050 | MOVO 16(AX), X6
1051 | MOVO 32(AX), X7
1052 | MOVOU 0(DI), X9
1053 | MOVOU 16(DI), X10
1054 | MOVOU 32(DI), X11
1055 |
1056 | // MOVQ 48(DI), AX
1057 | MOVQ $1, R9
1058 | MOVO X8, 0(BP)
1059 | MOVO X9, 16(BP)
1060 | MOVO X10, 32(BP)
1061 | MOVO X11, 48(BP)
1062 |
1063 | MOVO X6, 80(BP)
1064 | MOVO X7, 96(BP)
1065 | // MOVQ AX, 64(BP)
1066 | CMPQ CX, $256
1067 | JB chacha_blocks_ssse3_below256
1068 | PSHUFD $0x00, X8, X0
1069 | PSHUFD $0x55, X8, X1
1070 | PSHUFD $0xaa, X8, X2
1071 | PSHUFD $0xff, X8, X3
1072 | MOVO X0, 128(BP)
1073 | MOVO X1, 144(BP)
1074 | MOVO X2, 160(BP)
1075 | MOVO X3, 176(BP)
1076 | PSHUFD $0x00, X9, X0
1077 | PSHUFD $0x55, X9, X1
1078 | PSHUFD $0xaa, X9, X2
1079 | PSHUFD $0xff, X9, X3
1080 | MOVO X0, 192(BP)
1081 | MOVO X1, 208(BP)
1082 | MOVO X2, 224(BP)
1083 | MOVO X3, 240(BP)
1084 | PSHUFD $0x00, X10, X0
1085 | PSHUFD $0x55, X10, X1
1086 | PSHUFD $0xaa, X10, X2
1087 | PSHUFD $0xff, X10, X3
1088 | MOVO X0, 256(BP)
1089 | MOVO X1, 272(BP)
1090 | MOVO X2, 288(BP)
1091 | MOVO X3, 304(BP)
1092 | PSHUFD $0xaa, X11, X0
1093 | PSHUFD $0xff, X11, X1
1094 | MOVO X0, 352(BP)
1095 | MOVO X1, 368(BP)
1096 | JMP chacha_blocks_ssse3_atleast256
1097 |
1098 | // .p2align 6,,63
1099 | // # align to 4 mod 64
1100 | // nop;nop;nop;nop;
1101 | chacha_blocks_ssse3_atleast256:
1102 | MOVQ 48(BP), AX
1103 | LEAQ 1(AX), R8
1104 | LEAQ 2(AX), R9
1105 | LEAQ 3(AX), R10
1106 | LEAQ 4(AX), BX
1107 | MOVL AX, 320(BP)
1108 | MOVL R8, 4+320(BP)
1109 | MOVL R9, 8+320(BP)
1110 | MOVL R10, 12+320(BP)
1111 | SHRQ $32, AX
1112 | SHRQ $32, R8
1113 | SHRQ $32, R9
1114 | SHRQ $32, R10
1115 | MOVL AX, 336(BP)
1116 | MOVL R8, 4+336(BP)
1117 | MOVL R9, 8+336(BP)
1118 | MOVL R10, 12+336(BP)
1119 | MOVQ BX, 48(BP)
1120 |
1121 | // MOVQ 64(BP), AX
1122 | MOVQ $20, AX
1123 | MOVO 128(BP), X0
1124 | MOVO 144(BP), X1
1125 | MOVO 160(BP), X2
1126 | MOVO 176(BP), X3
1127 | MOVO 192(BP), X4
1128 | MOVO 208(BP), X5
1129 | MOVO 224(BP), X6
1130 | MOVO 240(BP), X7
1131 | MOVO 256(BP), X8
1132 | MOVO 272(BP), X9
1133 | MOVO 288(BP), X10
1134 | MOVO 304(BP), X11
1135 | MOVO 320(BP), X12
1136 | MOVO 336(BP), X13
1137 | MOVO 352(BP), X14
1138 | MOVO 368(BP), X15
1139 |
1140 | chacha_blocks_ssse3_mainloop1:
1141 | PADDD X4, X0
1142 | PADDD X5, X1
1143 | PXOR X0, X12
1144 | PXOR X1, X13
1145 | PADDD X6, X2
1146 | PADDD X7, X3
1147 | PXOR X2, X14
1148 | PXOR X3, X15
1149 | PSHUFB 80(BP), X12
1150 | PSHUFB 80(BP), X13
1151 | PADDD X12, X8
1152 | PADDD X13, X9
1153 | PSHUFB 80(BP), X14
1154 | PSHUFB 80(BP), X15
1155 | PADDD X14, X10
1156 | PADDD X15, X11
1157 | MOVO X12, 112(BP)
1158 | PXOR X8, X4
1159 | PXOR X9, X5
1160 | MOVO X4, X12
1161 | PSLLL $ 12, X4
1162 | PSRLL $20, X12
1163 | PXOR X12, X4
1164 | MOVO X5, X12
1165 | PSLLL $ 12, X5
1166 | PSRLL $20, X12
1167 | PXOR X12, X5
1168 | PXOR X10, X6
1169 | PXOR X11, X7
1170 | MOVO X6, X12
1171 | PSLLL $ 12, X6
1172 | PSRLL $20, X12
1173 | PXOR X12, X6
1174 | MOVO X7, X12
1175 | PSLLL $ 12, X7
1176 | PSRLL $20, X12
1177 | PXOR X12, X7
1178 | MOVO 112(BP), X12
1179 | PADDD X4, X0
1180 | PADDD X5, X1
1181 | PXOR X0, X12
1182 | PXOR X1, X13
1183 | PADDD X6, X2
1184 | PADDD X7, X3
1185 | PXOR X2, X14
1186 | PXOR X3, X15
1187 | PSHUFB 96(BP), X12
1188 | PSHUFB 96(BP), X13
1189 | PADDD X12, X8
1190 | PADDD X13, X9
1191 | PSHUFB 96(BP), X14
1192 | PSHUFB 96(BP), X15
1193 | PADDD X14, X10
1194 | PADDD X15, X11
1195 | MOVO X12, 112(BP)
1196 | PXOR X8, X4
1197 | PXOR X9, X5
1198 | MOVO X4, X12
1199 | PSLLL $ 7, X4
1200 | PSRLL $25, X12
1201 | PXOR X12, X4
1202 | MOVO X5, X12
1203 | PSLLL $ 7, X5
1204 | PSRLL $25, X12
1205 | PXOR X12, X5
1206 | PXOR X10, X6
1207 | PXOR X11, X7
1208 | MOVO X6, X12
1209 | PSLLL $ 7, X6
1210 | PSRLL $25, X12
1211 | PXOR X12, X6
1212 | MOVO X7, X12
1213 | PSLLL $ 7, X7
1214 | PSRLL $25, X12
1215 | PXOR X12, X7
1216 | MOVO 112(BP), X12
1217 | PADDD X5, X0
1218 | PADDD X6, X1
1219 | PXOR X0, X15
1220 | PXOR X1, X12
1221 | PADDD X7, X2
1222 | PADDD X4, X3
1223 | PXOR X2, X13
1224 | PXOR X3, X14
1225 | PSHUFB 80(BP), X15
1226 | PSHUFB 80(BP), X12
1227 | PADDD X15, X10
1228 | PADDD X12, X11
1229 | PSHUFB 80(BP), X13
1230 | PSHUFB 80(BP), X14
1231 | PADDD X13, X8
1232 | PADDD X14, X9
1233 | MOVO X15, 112(BP)
1234 | PXOR X10, X5
1235 | PXOR X11, X6
1236 | MOVO X5, X15
1237 | PSLLL $ 12, X5
1238 | PSRLL $20, X15
1239 | PXOR X15, X5
1240 | MOVO X6, X15
1241 | PSLLL $ 12, X6
1242 | PSRLL $20, X15
1243 | PXOR X15, X6
1244 | PXOR X8, X7
1245 | PXOR X9, X4
1246 | MOVO X7, X15
1247 | PSLLL $ 12, X7
1248 | PSRLL $20, X15
1249 | PXOR X15, X7
1250 | MOVO X4, X15
1251 | PSLLL $ 12, X4
1252 | PSRLL $20, X15
1253 | PXOR X15, X4
1254 | MOVO 112(BP), X15
1255 | PADDD X5, X0
1256 | PADDD X6, X1
1257 | PXOR X0, X15
1258 | PXOR X1, X12
1259 | PADDD X7, X2
1260 | PADDD X4, X3
1261 | PXOR X2, X13
1262 | PXOR X3, X14
1263 | PSHUFB 96(BP), X15
1264 | PSHUFB 96(BP), X12
1265 | PADDD X15, X10
1266 | PADDD X12, X11
1267 | PSHUFB 96(BP), X13
1268 | PSHUFB 96(BP), X14
1269 | PADDD X13, X8
1270 | PADDD X14, X9
1271 | MOVO X15, 112(BP)
1272 | PXOR X10, X5
1273 | PXOR X11, X6
1274 | MOVO X5, X15
1275 | PSLLL $ 7, X5
1276 | PSRLL $25, X15
1277 | PXOR X15, X5
1278 | MOVO X6, X15
1279 | PSLLL $ 7, X6
1280 | PSRLL $25, X15
1281 | PXOR X15, X6
1282 | PXOR X8, X7
1283 | PXOR X9, X4
1284 | MOVO X7, X15
1285 | PSLLL $ 7, X7
1286 | PSRLL $25, X15
1287 | PXOR X15, X7
1288 | MOVO X4, X15
1289 | PSLLL $ 7, X4
1290 | PSRLL $25, X15
1291 | PXOR X15, X4
1292 | SUBQ $2, AX
1293 | MOVO 112(BP), X15
1294 | JNZ chacha_blocks_ssse3_mainloop1
1295 | PADDD 128(BP), X0
1296 | PADDD 144(BP), X1
1297 | PADDD 160(BP), X2
1298 | PADDD 176(BP), X3
1299 | PADDD 192(BP), X4
1300 | PADDD 208(BP), X5
1301 | PADDD 224(BP), X6
1302 | PADDD 240(BP), X7
1303 | PADDD 256(BP), X8
1304 | PADDD 272(BP), X9
1305 | PADDD 288(BP), X10
1306 | PADDD 304(BP), X11
1307 | PADDD 320(BP), X12
1308 | PADDD 336(BP), X13
1309 | PADDD 352(BP), X14
1310 | PADDD 368(BP), X15
1311 | MOVO X8, 384(BP)
1312 | MOVO X9, 400(BP)
1313 | MOVO X10, 416(BP)
1314 | MOVO X11, 432(BP)
1315 | MOVO X12, 448(BP)
1316 | MOVO X13, 464(BP)
1317 | MOVO X14, 480(BP)
1318 | MOVO X15, 496(BP)
1319 | MOVO X0, X8
1320 | MOVO X2, X9
1321 | MOVO X4, X10
1322 | MOVO X6, X11
1323 | PUNPCKHLQ X1, X0
1324 | PUNPCKHLQ X3, X2
1325 | PUNPCKHLQ X5, X4
1326 | PUNPCKHLQ X7, X6
1327 | PUNPCKLLQ X1, X8
1328 | PUNPCKLLQ X3, X9
1329 | PUNPCKLLQ X5, X10
1330 | PUNPCKLLQ X7, X11
1331 | MOVO X0, X1
1332 | MOVO X4, X3
1333 | MOVO X8, X5
1334 | MOVO X10, X7
1335 | PUNPCKHQDQ X2, X0
1336 | PUNPCKHQDQ X6, X4
1337 | PUNPCKHQDQ X9, X8
1338 | PUNPCKHQDQ X11, X10
1339 | PUNPCKLQDQ X2, X1
1340 | PUNPCKLQDQ X6, X3
1341 | PUNPCKLQDQ X9, X5
1342 | PUNPCKLQDQ X11, X7
1343 | ANDQ SI, SI
1344 | JZ chacha_blocks_ssse3_noinput1
1345 | MOVOU 0(SI), X2
1346 | MOVOU 16(SI), X6
1347 | MOVOU 64(SI), X9
1348 | MOVOU 80(SI), X11
1349 | MOVOU 128(SI), X12
1350 | MOVOU 144(SI), X13
1351 | MOVOU 192(SI), X14
1352 | MOVOU 208(SI), X15
1353 | PXOR X2, X5
1354 | PXOR X6, X7
1355 | PXOR X9, X8
1356 | PXOR X11, X10
1357 | PXOR X12, X1
1358 | PXOR X13, X3
1359 | PXOR X14, X0
1360 | PXOR X15, X4
1361 | MOVOU X5, 0(DX)
1362 | MOVOU X7, 16(DX)
1363 | MOVOU X8, 64(DX)
1364 | MOVOU X10, 80(DX)
1365 | MOVOU X1, 128(DX)
1366 | MOVOU X3, 144(DX)
1367 | MOVOU X0, 192(DX)
1368 | MOVOU X4, 208(DX)
1369 | MOVO 384(BP), X0
1370 | MOVO 400(BP), X1
1371 | MOVO 416(BP), X2
1372 | MOVO 432(BP), X3
1373 | MOVO 448(BP), X4
1374 | MOVO 464(BP), X5
1375 | MOVO 480(BP), X6
1376 | MOVO 496(BP), X7
1377 | MOVO X0, X8
1378 | MOVO X2, X9
1379 | MOVO X4, X10
1380 | MOVO X6, X11
1381 | PUNPCKLLQ X1, X8
1382 | PUNPCKLLQ X3, X9
1383 | PUNPCKHLQ X1, X0
1384 | PUNPCKHLQ X3, X2
1385 | PUNPCKLLQ X5, X10
1386 | PUNPCKLLQ X7, X11
1387 | PUNPCKHLQ X5, X4
1388 | PUNPCKHLQ X7, X6
1389 | MOVO X8, X1
1390 | MOVO X0, X3
1391 | MOVO X10, X5
1392 | MOVO X4, X7
1393 | PUNPCKLQDQ X9, X1
1394 | PUNPCKLQDQ X11, X5
1395 | PUNPCKHQDQ X9, X8
1396 | PUNPCKHQDQ X11, X10
1397 | PUNPCKLQDQ X2, X3
1398 | PUNPCKLQDQ X6, X7
1399 | PUNPCKHQDQ X2, X0
1400 | PUNPCKHQDQ X6, X4
1401 | MOVOU 32(SI), X2
1402 | MOVOU 48(SI), X6
1403 | MOVOU 96(SI), X9
1404 | MOVOU 112(SI), X11
1405 | MOVOU 160(SI), X12
1406 | MOVOU 176(SI), X13
1407 | MOVOU 224(SI), X14
1408 | MOVOU 240(SI), X15
1409 | PXOR X2, X1
1410 | PXOR X6, X5
1411 | PXOR X9, X8
1412 | PXOR X11, X10
1413 | PXOR X12, X3
1414 | PXOR X13, X7
1415 | PXOR X14, X0
1416 | PXOR X15, X4
1417 | MOVOU X1, 32(DX)
1418 | MOVOU X5, 48(DX)
1419 | MOVOU X8, 96(DX)
1420 | MOVOU X10, 112(DX)
1421 | MOVOU X3, 160(DX)
1422 | MOVOU X7, 176(DX)
1423 | MOVOU X0, 224(DX)
1424 | MOVOU X4, 240(DX)
1425 | ADDQ $256, SI
1426 | JMP chacha_blocks_ssse3_mainloop_cont
1427 |
1428 | chacha_blocks_ssse3_noinput1:
1429 | MOVOU X5, 0(DX)
1430 | MOVOU X7, 16(DX)
1431 | MOVOU X8, 64(DX)
1432 | MOVOU X10, 80(DX)
1433 | MOVOU X1, 128(DX)
1434 | MOVOU X3, 144(DX)
1435 | MOVOU X0, 192(DX)
1436 | MOVOU X4, 208(DX)
1437 | MOVO 384(BP), X0
1438 | MOVO 400(BP), X1
1439 | MOVO 416(BP), X2
1440 | MOVO 432(BP), X3
1441 | MOVO 448(BP), X4
1442 | MOVO 464(BP), X5
1443 | MOVO 480(BP), X6
1444 | MOVO 496(BP), X7
1445 | MOVO X0, X8
1446 | MOVO X2, X9
1447 | MOVO X4, X10
1448 | MOVO X6, X11
1449 | PUNPCKLLQ X1, X8
1450 | PUNPCKLLQ X3, X9
1451 | PUNPCKHLQ X1, X0
1452 | PUNPCKHLQ X3, X2
1453 | PUNPCKLLQ X5, X10
1454 | PUNPCKLLQ X7, X11
1455 | PUNPCKHLQ X5, X4
1456 | PUNPCKHLQ X7, X6
1457 | MOVO X8, X1
1458 | MOVO X0, X3
1459 | MOVO X10, X5
1460 | MOVO X4, X7
1461 | PUNPCKLQDQ X9, X1
1462 | PUNPCKLQDQ X11, X5
1463 | PUNPCKHQDQ X9, X8
1464 | PUNPCKHQDQ X11, X10
1465 | PUNPCKLQDQ X2, X3
1466 | PUNPCKLQDQ X6, X7
1467 | PUNPCKHQDQ X2, X0
1468 | PUNPCKHQDQ X6, X4
1469 | MOVOU X1, 32(DX)
1470 | MOVOU X5, 48(DX)
1471 | MOVOU X8, 96(DX)
1472 | MOVOU X10, 112(DX)
1473 | MOVOU X3, 160(DX)
1474 | MOVOU X7, 176(DX)
1475 | MOVOU X0, 224(DX)
1476 | MOVOU X4, 240(DX)
1477 |
1478 | chacha_blocks_ssse3_mainloop_cont:
1479 | ADDQ $256, DX
1480 | SUBQ $256, CX
1481 | CMPQ CX, $256
1482 | JAE chacha_blocks_ssse3_atleast256
1483 | MOVO 80(BP), X6
1484 | MOVO 96(BP), X7
1485 | MOVO 0(BP), X8
1486 | MOVO 16(BP), X9
1487 | MOVO 32(BP), X10
1488 | MOVO 48(BP), X11
1489 | MOVQ $1, R9
1490 |
1491 | chacha_blocks_ssse3_below256:
1492 | MOVQ R9, X5
1493 | ANDQ CX, CX
1494 | JZ chacha_blocks_ssse3_done
1495 | CMPQ CX, $64
1496 | JAE chacha_blocks_ssse3_above63
1497 | MOVQ DX, R9
1498 | ANDQ SI, SI
1499 | JZ chacha_blocks_ssse3_noinput2
1500 | MOVQ CX, R10
1501 | MOVQ BP, DX
1502 | ADDQ R10, SI
1503 | ADDQ R10, DX
1504 | NEGQ R10
1505 |
1506 | chacha_blocks_ssse3_copyinput:
1507 | MOVB (SI)(R10*1), AX
1508 | MOVB AX, (DX)(R10*1)
1509 | INCQ R10
1510 | JNZ chacha_blocks_ssse3_copyinput
1511 | MOVQ BP, SI
1512 |
1513 | chacha_blocks_ssse3_noinput2:
1514 | MOVQ BP, DX
1515 |
1516 | chacha_blocks_ssse3_above63:
1517 | MOVO X8, X0
1518 | MOVO X9, X1
1519 | MOVO X10, X2
1520 | MOVO X11, X3
1521 |
1522 | // MOVQ 64(BP), AX
1523 | MOVQ $20, AX
1524 |
1525 | chacha_blocks_ssse3_mainloop2:
1526 | PADDD X1, X0
1527 | PXOR X0, X3
1528 | PSHUFB X6, X3
1529 | PADDD X3, X2
1530 | PXOR X2, X1
1531 | MOVO X1, X4
1532 | PSLLL $12, X4
1533 | PSRLL $20, X1
1534 | PXOR X4, X1
1535 | PADDD X1, X0
1536 | PXOR X0, X3
1537 | PSHUFB X7, X3
1538 | PSHUFD $0x93, X0, X0
1539 | PADDD X3, X2
1540 | PSHUFD $0x4e, X3, X3
1541 | PXOR X2, X1
1542 | PSHUFD $0x39, X2, X2
1543 | MOVO X1, X4
1544 | PSLLL $7, X4
1545 | PSRLL $25, X1
1546 | PXOR X4, X1
1547 | PADDD X1, X0
1548 | PXOR X0, X3
1549 | PSHUFB X6, X3
1550 | PADDD X3, X2
1551 | PXOR X2, X1
1552 | MOVO X1, X4
1553 | PSLLL $12, X4
1554 | PSRLL $20, X1
1555 | PXOR X4, X1
1556 | PADDD X1, X0
1557 | PXOR X0, X3
1558 | PSHUFB X7, X3
1559 | PSHUFD $0x39, X0, X0
1560 | PADDD X3, X2
1561 | PSHUFD $0x4e, X3, X3
1562 | PXOR X2, X1
1563 | PSHUFD $0x93, X2, X2
1564 | MOVO X1, X4
1565 | PSLLL $7, X4
1566 | PSRLL $25, X1
1567 | PXOR X4, X1
1568 | SUBQ $2, AX
1569 | JNZ chacha_blocks_ssse3_mainloop2
1570 | PADDD X8, X0
1571 | PADDD X9, X1
1572 | PADDD X10, X2
1573 | PADDD X11, X3
1574 | ANDQ SI, SI
1575 | JZ chacha_blocks_ssse3_noinput3
1576 | MOVOU 0(SI), X12
1577 | MOVOU 16(SI), X13
1578 | MOVOU 32(SI), X14
1579 | MOVOU 48(SI), X15
1580 | PXOR X12, X0
1581 | PXOR X13, X1
1582 | PXOR X14, X2
1583 | PXOR X15, X3
1584 | ADDQ $64, SI
1585 |
1586 | chacha_blocks_ssse3_noinput3:
1587 | MOVOU X0, 0(DX)
1588 | MOVOU X1, 16(DX)
1589 | MOVOU X2, 32(DX)
1590 | MOVOU X3, 48(DX)
1591 | PADDQ X5, X11
1592 | CMPQ CX, $64
1593 | JBE chacha_blocks_ssse3_mainloop2_finishup
1594 | ADDQ $64, DX
1595 | SUBQ $64, CX
1596 | JMP chacha_blocks_ssse3_below256
1597 |
1598 | chacha_blocks_ssse3_mainloop2_finishup:
1599 | CMPQ CX, $64
1600 | JE chacha_blocks_ssse3_done
1601 | ADDQ CX, R9
1602 | ADDQ CX, DX
1603 | NEGQ CX
1604 |
1605 | chacha_blocks_ssse3_copyoutput:
1606 | MOVB (DX)(CX*1), AX
1607 | MOVB AX, (R9)(CX*1)
1608 | INCQ CX
1609 | JNZ chacha_blocks_ssse3_copyoutput
1610 |
1611 | chacha_blocks_ssse3_done:
1612 | MOVOU X11, 32(DI)
1613 |
1614 | RET
1615 |
1616 | // func hChaChaSSSE3(key, nonce []byte, dst *byte)
1617 | TEXT ·hChaChaSSSE3(SB), NOSPLIT|NOFRAME, $0-56
1618 | MOVQ key+0(FP), DI
1619 | MOVQ nonce+24(FP), SI
1620 | MOVQ dst+48(FP), DX
1621 |
1622 | MOVL $20, CX
1623 |
1624 | LEAQ ·chacha_constants<>(SB), AX
1625 | MOVO 0(AX), X0
1626 | MOVO 16(AX), X5
1627 | MOVO 32(AX), X6
1628 |
1629 | MOVOU 0(DI), X1
1630 | MOVOU 16(DI), X2
1631 | MOVOU 0(SI), X3
1632 |
1633 | hchacha_ssse3_mainloop:
1634 | PADDD X1, X0
1635 | PXOR X0, X3
1636 | PSHUFB X5, X3
1637 | PADDD X3, X2
1638 | PXOR X2, X1
1639 | MOVO X1, X4
1640 | PSLLL $12, X1
1641 | PSRLL $20, X4
1642 | PXOR X4, X1
1643 | PADDD X1, X0
1644 | PXOR X0, X3
1645 | PSHUFB X6, X3
1646 | PSHUFD $0X93, X0, X0
1647 | PADDD X3, X2
1648 | PSHUFD $0X4E, X3, X3
1649 | PXOR X2, X1
1650 | PSHUFD $0X39, X2, X2
1651 | MOVO X1, X4
1652 | PSLLL $7, X1
1653 | PSRLL $25, X4
1654 | PXOR X4, X1
1655 | SUBQ $2, CX
1656 | PADDD X1, X0
1657 | PXOR X0, X3
1658 | PSHUFB X5, X3
1659 | PADDD X3, X2
1660 | PXOR X2, X1
1661 | MOVO X1, X4
1662 | PSLLL $12, X1
1663 | PSRLL $20, X4
1664 | PXOR X4, X1
1665 | PADDD X1, X0
1666 | PXOR X0, X3
1667 | PSHUFB X6, X3
1668 | PSHUFD $0X39, X0, X0
1669 | PADDD X3, X2
1670 | PSHUFD $0X4E, X3, X3
1671 | PXOR X2, X1
1672 | PSHUFD $0X93, X2, X2
1673 | MOVO X1, X4
1674 | PSLLL $7, X1
1675 | PSRLL $25, X4
1676 | PXOR X4, X1
1677 | JA hchacha_ssse3_mainloop
1678 |
1679 | MOVOU X0, 0(DX)
1680 | MOVOU X3, 16(DX)
1681 |
1682 | RET
1683 |
--------------------------------------------------------------------------------