├── .goreleaser.yaml
├── vendor
    ├── github.com
    │   ├── dlclark
    │   │   └── regexp2
    │   │   │   ├── .travis.yml
    │   │   │   ├── syntax
    │   │   │       ├── fuzz.go
    │   │   │       ├── replacerdata.go
    │   │   │       ├── escape.go
    │   │   │       └── code.go
    │   │   │   ├── .gitignore
    │   │   │   ├── LICENSE
    │   │   │   ├── replace.go
    │   │   │   ├── README.md
    │   │   │   ├── ATTRIB
    │   │   │   ├── match.go
    │   │   │   └── regexp.go
    │   ├── pkg
    │   │   └── errors
    │   │   │   ├── .travis.yml
    │   │   │   ├── .gitignore
    │   │   │   ├── appveyor.yml
    │   │   │   ├── Makefile
    │   │   │   ├── LICENSE
    │   │   │   ├── go113.go
    │   │   │   ├── README.md
    │   │   │   ├── stack.go
    │   │   │   └── errors.go
    │   ├── stretchr
    │   │   └── testify
    │   │   │   ├── require
    │   │   │       ├── require_forward.go.tmpl
    │   │   │       ├── require.go.tmpl
    │   │   │       ├── forward_requirements.go
    │   │   │       ├── doc.go
    │   │   │       └── requirements.go
    │   │   │   ├── assert
    │   │   │       ├── assertion_format.go.tmpl
    │   │   │       ├── assertion_forward.go.tmpl
    │   │   │       ├── errors.go
    │   │   │       ├── forward_assertions.go
    │   │   │       ├── doc.go
    │   │   │       ├── assertion_order.go
    │   │   │       └── http_assertions.go
    │   │   │   └── LICENSE
    │   ├── davecgh
    │   │   └── go-spew
    │   │   │   ├── LICENSE
    │   │   │   └── spew
    │   │   │       ├── bypasssafe.go
    │   │   │       ├── bypass.go
    │   │   │       ├── spew.go
    │   │   │       ├── doc.go
    │   │   │       └── common.go
    │   └── pmezard
    │   │   └── go-difflib
    │   │       └── LICENSE
    ├── gopkg.in
    │   └── yaml.v3
    │   │   ├── .travis.yml
    │   │   ├── NOTICE
    │   │   ├── writerc.go
    │   │   ├── LICENSE
    │   │   ├── sorter.go
    │   │   ├── README.md
    │   │   ├── yamlprivateh.go
    │   │   └── resolve.go
    └── modules.txt
├── README.md
├── go.mod
├── .gitignore
├── .github
    ├── workflows
    │   ├── go.yml
    │   ├── golangci-lint.yaml
    │   └── goreleaser.yaml
    ├── build-test.yaml
    └── .golangci.yaml
├── golangci-lint.yaml
├── go.sum
├── frequency.go
├── encoder_test.go
├── frequency_test.go
├── bpe.go
├── encoder.go
└── LICENSE


/.goreleaser.yaml:
--------------------------------------------------------------------------------
1 | builds:
2 | - skip: true
3 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 | 
3 | go:
4 |   - 1.9
5 |   - tip


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Outdated
2 | 
3 | Please refer to [tokenize](https://docs.cohere.com/reference/tokenize) and [detokenize](https://docs.cohere.com/reference/detokenize) APIs for up to date Cohere tokenizers.
4 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | go_import_path: github.com/pkg/errors
 3 | go:
 4 |   - 1.11.x
 5 |   - 1.12.x
 6 |   - 1.13.x
 7 |   - tip
 8 | 
 9 | script:
10 |   - make check
11 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/require/require_forward.go.tmpl:
--------------------------------------------------------------------------------
1 | {{.CommentWithoutT "a"}}
2 | func (a *Assertions) {{.DocInfo.Name}}({{.Params}}) {
3 | 	if h, ok := a.t.(tHelper); ok { h.Helper() }
4 | 	{{.DocInfo.Name}}(a.t, {{.ForwardedParams}})
5 | }
6 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/assertion_format.go.tmpl:
--------------------------------------------------------------------------------
1 | {{.CommentFormat}}
2 | func {{.DocInfo.Name}}f(t TestingT, {{.ParamsFormat}}) bool {
3 | 	if h, ok := t.(tHelper); ok { h.Helper() }
4 | 	return {{.DocInfo.Name}}(t, {{.ForwardedParamsFormat}})
5 | }
6 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/assertion_forward.go.tmpl:
--------------------------------------------------------------------------------
1 | {{.CommentWithoutT "a"}}
2 | func (a *Assertions) {{.DocInfo.Name}}({{.Params}}) bool {
3 | 	if h, ok := a.t.(tHelper); ok { h.Helper() }
4 | 	return {{.DocInfo.Name}}(a.t, {{.ForwardedParams}})
5 | }
6 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/require/require.go.tmpl:
--------------------------------------------------------------------------------
1 | {{.Comment}}
2 | func {{.DocInfo.Name}}(t TestingT, {{.Params}}) {
3 | 	if h, ok := t.(tHelper); ok { h.Helper() }
4 | 	if assert.{{.DocInfo.Name}}(t, {{.ForwardedParams}}) { return }
5 | 	t.FailNow()
6 | }
7 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go:
 4 |     - "1.4.x"
 5 |     - "1.5.x"
 6 |     - "1.6.x"
 7 |     - "1.7.x"
 8 |     - "1.8.x"
 9 |     - "1.9.x"
10 |     - "1.10.x"
11 |     - "1.11.x"
12 |     - "1.12.x"
13 |     - "1.13.x"
14 |     - "tip"
15 | 
16 | go_import_path: gopkg.in/yaml.v3
17 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/cohere-ai/tokenizer
 2 | 
 3 | go 1.17
 4 | 
 5 | require (
 6 | 	github.com/dlclark/regexp2 v1.4.0
 7 | 	github.com/pkg/errors v0.9.1
 8 | 	github.com/stretchr/testify v1.7.0
 9 | )
10 | 
11 | require (
12 | 	github.com/davecgh/go-spew v1.1.0 // indirect
13 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
14 | 	gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
15 | )
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | 
 8 | # Test binary, built with `go test -c`
 9 | *.test
10 | 
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 | 
14 | # Dependency directories (remove the comment below to include it)
15 | # vendor/
16 | 
17 | # goreleaser dist autogenerated directory
18 | /dist
19 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | *.prof
25 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/errors.go:
--------------------------------------------------------------------------------
 1 | package assert
 2 | 
 3 | import (
 4 | 	"errors"
 5 | )
 6 | 
 7 | // AnError is an error instance useful for testing.  If the code does not care
 8 | // about error specifics, and only needs to return the error for example, this
 9 | // error should be used to make the test code more readable.
10 | var AnError = errors.New("assert.AnError general error for testing")
11 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/syntax/fuzz.go:
--------------------------------------------------------------------------------
 1 | // +build gofuzz
 2 | 
 3 | package syntax
 4 | 
 5 | // Fuzz is the input point for go-fuzz
 6 | func Fuzz(data []byte) int {
 7 | 	sdata := string(data)
 8 | 	tree, err := Parse(sdata, RegexOptions(0))
 9 | 	if err != nil {
10 | 		return 0
11 | 	}
12 | 
13 | 	// translate it to code
14 | 	_, err = Write(tree)
15 | 	if err != nil {
16 | 		panic(err)
17 | 	}
18 | 
19 | 	return 1
20 | }
21 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | *.prof
25 | *.out
26 | 
27 | .DS_Store
28 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v2
18 |       with:
19 |         go-version: 1.17
20 | 
21 |     - name: Build
22 |       run: go build -v ./...
23 | 
24 |     - name: Test
25 |       run: go test -v ./...
26 | 


--------------------------------------------------------------------------------
/.github/build-test.yaml:
--------------------------------------------------------------------------------
 1 | name: build-test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   workflow_dispatch:
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |     - uses: actions/checkout@v2
14 | 
15 |     - name: Set up Go
16 |       uses: actions/setup-go@v2
17 |       with:
18 |         go-version: 1.17
19 | 
20 |     - name: build
21 |       run: go build -v 
22 | 
23 |     - name: test
24 |       run: go test -v
25 | 


--------------------------------------------------------------------------------
/golangci-lint.yaml:
--------------------------------------------------------------------------------
 1 | name: golangci-lint
 2 | on:
 3 |   push:
 4 | jobs:
 5 |   golangci:
 6 |     name: go-lint
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 |       - name: golangci-lint
11 |         uses: golangci/golangci-lint-action@v2.3.0
12 |         with:
13 |           # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
14 |           version: v1.29
15 |           args: --timeout=3m0s
16 | 


--------------------------------------------------------------------------------
/.github/workflows/golangci-lint.yaml:
--------------------------------------------------------------------------------
 1 | name: golangci-lint
 2 | on:
 3 |   push:
 4 | jobs:
 5 |   golangci:
 6 |     name: go-lint
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 |       - name: golangci-lint
11 |         uses: golangci/golangci-lint-action@v2.3.0
12 |         with:
13 |           # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
14 |           version: v1.48
15 |           args: --timeout=3m0s
16 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/forward_assertions.go:
--------------------------------------------------------------------------------
 1 | package assert
 2 | 
 3 | // Assertions provides assertion methods around the
 4 | // TestingT interface.
 5 | type Assertions struct {
 6 | 	t TestingT
 7 | }
 8 | 
 9 | // New makes a new Assertions object for the specified TestingT.
10 | func New(t TestingT) *Assertions {
11 | 	return &Assertions{
12 | 		t: t,
13 | 	}
14 | }
15 | 
16 | //go:generate sh -c "cd ../_codegen && go build && cd - && ../_codegen/_codegen -output-package=assert -template=assertion_forward.go.tmpl -include-format-funcs"
17 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/require/forward_requirements.go:
--------------------------------------------------------------------------------
 1 | package require
 2 | 
 3 | // Assertions provides assertion methods around the
 4 | // TestingT interface.
 5 | type Assertions struct {
 6 | 	t TestingT
 7 | }
 8 | 
 9 | // New makes a new Assertions object for the specified TestingT.
10 | func New(t TestingT) *Assertions {
11 | 	return &Assertions{
12 | 		t: t,
13 | 	}
14 | }
15 | 
16 | //go:generate sh -c "cd ../_codegen && go build && cd - && ../_codegen/_codegen -output-package=require -template=require_forward.go.tmpl -include-format-funcs"
17 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/NOTICE:
--------------------------------------------------------------------------------
 1 | Copyright 2011-2016 Canonical Ltd.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/vendor/modules.txt:
--------------------------------------------------------------------------------
 1 | # github.com/davecgh/go-spew v1.1.0
 2 | ## explicit
 3 | github.com/davecgh/go-spew/spew
 4 | # github.com/dlclark/regexp2 v1.4.0
 5 | ## explicit
 6 | github.com/dlclark/regexp2
 7 | github.com/dlclark/regexp2/syntax
 8 | # github.com/pkg/errors v0.9.1
 9 | ## explicit
10 | github.com/pkg/errors
11 | # github.com/pmezard/go-difflib v1.0.0
12 | ## explicit
13 | github.com/pmezard/go-difflib/difflib
14 | # github.com/stretchr/testify v1.7.0
15 | ## explicit; go 1.13
16 | github.com/stretchr/testify/assert
17 | github.com/stretchr/testify/require
18 | # gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
19 | ## explicit
20 | gopkg.in/yaml.v3
21 | 


--------------------------------------------------------------------------------
/.github/workflows/goreleaser.yaml:
--------------------------------------------------------------------------------
 1 | name: goreleaser
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     tags:
 7 |       - "*"
 8 | 
 9 | permissions:
10 |   contents: write
11 | 
12 | env:
13 |   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
14 | 
15 | jobs:
16 |   goreleaser:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v2
21 |         with:
22 |           fetch-depth: 0
23 |       - name: Set up Go
24 |         uses: actions/setup-go@v2
25 |         with:
26 |           go-version: 1.17
27 |       - name: Run GoReleaser
28 |         uses: goreleaser/goreleaser-action@v2
29 |         with:
30 |           distribution: goreleaser
31 |           version: latest
32 |           args: release --rm-dist
33 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: build-{build}.{branch}
 2 | 
 3 | clone_folder: C:\gopath\src\github.com\pkg\errors
 4 | shallow_clone: true # for startup speed
 5 | 
 6 | environment:
 7 |   GOPATH: C:\gopath
 8 | 
 9 | platform:
10 |   - x64
11 | 
12 | # http://www.appveyor.com/docs/installed-software
13 | install:
14 |   # some helpful output for debugging builds
15 |   - go version
16 |   - go env
17 |   # pre-installed MinGW at C:\MinGW is 32bit only
18 |   # but MSYS2 at C:\msys64 has mingw64
19 |   - set PATH=C:\msys64\mingw64\bin;%PATH%
20 |   - gcc --version
21 |   - g++ --version
22 | 
23 | build_script:
24 |   - go install -v ./...
25 | 
26 | test_script:
27 |   - set PATH=C:\gopath\bin;%PATH%
28 |   - go test -v ./...
29 | 
30 | #artifacts:
31 | #  - path: '%GOPATH%\bin\*.exe'
32 | deploy: off
33 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/LICENSE:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright (c) 2012-2016 Dave Collins <dave@davec.name>
 4 | 
 5 | Permission to use, copy, modify, and distribute this software for any
 6 | purpose with or without fee is hereby granted, provided that the above
 7 | copyright notice and this permission notice appear in all copies.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/require/doc.go:
--------------------------------------------------------------------------------
 1 | // Package require implements the same assertions as the `assert` package but
 2 | // stops test execution when a test fails.
 3 | //
 4 | // Example Usage
 5 | //
 6 | // The following is a complete example using require in a standard test function:
 7 | //    import (
 8 | //      "testing"
 9 | //      "github.com/stretchr/testify/require"
10 | //    )
11 | //
12 | //    func TestSomething(t *testing.T) {
13 | //
14 | //      var a string = "Hello"
15 | //      var b string = "Hello"
16 | //
17 | //      require.Equal(t, a, b, "The two words should be the same.")
18 | //
19 | //    }
20 | //
21 | // Assertions
22 | //
23 | // The `require` package have same global functions as in the `assert` package,
24 | // but instead of returning a boolean result they call `t.FailNow()`.
25 | //
26 | // Every assertion function also takes an optional string message as the final argument,
27 | // allowing custom error messages to be appended to the message the assertion method outputs.
28 | package require
29 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/Makefile:
--------------------------------------------------------------------------------
 1 | PKGS := github.com/pkg/errors
 2 | SRCDIRS := $(shell go list -f '{{.Dir}}' $(PKGS))
 3 | GO := go
 4 | 
 5 | check: test vet gofmt misspell unconvert staticcheck ineffassign unparam
 6 | 
 7 | test: 
 8 | 	$(GO) test $(PKGS)
 9 | 
10 | vet: | test
11 | 	$(GO) vet $(PKGS)
12 | 
13 | staticcheck:
14 | 	$(GO) get honnef.co/go/tools/cmd/staticcheck
15 | 	staticcheck -checks all $(PKGS)
16 | 
17 | misspell:
18 | 	$(GO) get github.com/client9/misspell/cmd/misspell
19 | 	misspell \
20 | 		-locale GB \
21 | 		-error \
22 | 		*.md *.go
23 | 
24 | unconvert:
25 | 	$(GO) get github.com/mdempsky/unconvert
26 | 	unconvert -v $(PKGS)
27 | 
28 | ineffassign:
29 | 	$(GO) get github.com/gordonklaus/ineffassign
30 | 	find $(SRCDIRS) -name '*.go' | xargs ineffassign
31 | 
32 | pedantic: check errcheck
33 | 
34 | unparam:
35 | 	$(GO) get mvdan.cc/unparam
36 | 	unparam ./...
37 | 
38 | errcheck:
39 | 	$(GO) get github.com/kisielk/errcheck
40 | 	errcheck $(PKGS)
41 | 
42 | gofmt:  
43 | 	@echo Checking code is gofmted
44 | 	@test -z "$(shell gofmt -s -l -d -e $(SRCDIRS) | tee /dev/stderr)"
45 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Doug Clark
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2012-2020 Mat Ryer, Tyler Bunnell and contributors.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/require/requirements.go:
--------------------------------------------------------------------------------
 1 | package require
 2 | 
 3 | // TestingT is an interface wrapper around *testing.T
 4 | type TestingT interface {
 5 | 	Errorf(format string, args ...interface{})
 6 | 	FailNow()
 7 | }
 8 | 
 9 | type tHelper interface {
10 | 	Helper()
11 | }
12 | 
13 | // ComparisonAssertionFunc is a common function prototype when comparing two values.  Can be useful
14 | // for table driven tests.
15 | type ComparisonAssertionFunc func(TestingT, interface{}, interface{}, ...interface{})
16 | 
17 | // ValueAssertionFunc is a common function prototype when validating a single value.  Can be useful
18 | // for table driven tests.
19 | type ValueAssertionFunc func(TestingT, interface{}, ...interface{})
20 | 
21 | // BoolAssertionFunc is a common function prototype when validating a bool value.  Can be useful
22 | // for table driven tests.
23 | type BoolAssertionFunc func(TestingT, bool, ...interface{})
24 | 
25 | // ErrorAssertionFunc is a common function prototype when validating an error value.  Can be useful
26 | // for table driven tests.
27 | type ErrorAssertionFunc func(TestingT, error, ...interface{})
28 | 
29 | //go:generate sh -c "cd ../_codegen && go build && cd - && ../_codegen/_codegen -output-package=require -template=require.go.tmpl -include-format-funcs"
30 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 3 | github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
 4 | github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 5 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 6 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 9 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
10 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
11 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
12 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
14 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
16 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Dave Cheney <dave@cheney.net>
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/vendor/github.com/pmezard/go-difflib/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Patrick Mezard
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     Redistributions of source code must retain the above copyright
 9 | notice, this list of conditions and the following disclaimer.
10 |     Redistributions in binary form must reproduce the above copyright
11 | notice, this list of conditions and the following disclaimer in the
12 | documentation and/or other materials provided with the distribution.
13 |     The names of its contributors may not be used to endorse or promote
14 | products derived from this software without specific prior written
15 | permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
18 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/doc.go:
--------------------------------------------------------------------------------
 1 | // Package assert provides a set of comprehensive testing tools for use with the normal Go testing system.
 2 | //
 3 | // Example Usage
 4 | //
 5 | // The following is a complete example using assert in a standard test function:
 6 | //    import (
 7 | //      "testing"
 8 | //      "github.com/stretchr/testify/assert"
 9 | //    )
10 | //
11 | //    func TestSomething(t *testing.T) {
12 | //
13 | //      var a string = "Hello"
14 | //      var b string = "Hello"
15 | //
16 | //      assert.Equal(t, a, b, "The two words should be the same.")
17 | //
18 | //    }
19 | //
20 | // if you assert many times, use the format below:
21 | //
22 | //    import (
23 | //      "testing"
24 | //      "github.com/stretchr/testify/assert"
25 | //    )
26 | //
27 | //    func TestSomething(t *testing.T) {
28 | //      assert := assert.New(t)
29 | //
30 | //      var a string = "Hello"
31 | //      var b string = "Hello"
32 | //
33 | //      assert.Equal(a, b, "The two words should be the same.")
34 | //    }
35 | //
36 | // Assertions
37 | //
38 | // Assertions allow you to easily write test code, and are global funcs in the `assert` package.
39 | // All assertion functions take, as the first argument, the `*testing.T` object provided by the
40 | // testing framework. This allows the assertion funcs to write the failings and other details to
41 | // the correct place.
42 | //
43 | // Every assertion function also takes an optional string message as the final argument,
44 | // allowing custom error messages to be appended to the message the assertion method outputs.
45 | package assert
46 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/go113.go:
--------------------------------------------------------------------------------
 1 | // +build go1.13
 2 | 
 3 | package errors
 4 | 
 5 | import (
 6 | 	stderrors "errors"
 7 | )
 8 | 
 9 | // Is reports whether any error in err's chain matches target.
10 | //
11 | // The chain consists of err itself followed by the sequence of errors obtained by
12 | // repeatedly calling Unwrap.
13 | //
14 | // An error is considered to match a target if it is equal to that target or if
15 | // it implements a method Is(error) bool such that Is(target) returns true.
16 | func Is(err, target error) bool { return stderrors.Is(err, target) }
17 | 
18 | // As finds the first error in err's chain that matches target, and if so, sets
19 | // target to that error value and returns true.
20 | //
21 | // The chain consists of err itself followed by the sequence of errors obtained by
22 | // repeatedly calling Unwrap.
23 | //
24 | // An error matches target if the error's concrete value is assignable to the value
25 | // pointed to by target, or if the error has a method As(interface{}) bool such that
26 | // As(target) returns true. In the latter case, the As method is responsible for
27 | // setting target.
28 | //
29 | // As will panic if target is not a non-nil pointer to either a type that implements
30 | // error, or to any interface type. As returns false if err is nil.
31 | func As(err error, target interface{}) bool { return stderrors.As(err, target) }
32 | 
33 | // Unwrap returns the result of calling the Unwrap method on err, if err's
34 | // type contains an Unwrap method returning error.
35 | // Otherwise, Unwrap returns nil.
36 | func Unwrap(err error) error {
37 | 	return stderrors.Unwrap(err)
38 | }
39 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/spew/bypasssafe.go:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2015-2016 Dave Collins <dave@davec.name>
 2 | //
 3 | // Permission to use, copy, modify, and distribute this software for any
 4 | // purpose with or without fee is hereby granted, provided that the above
 5 | // copyright notice and this permission notice appear in all copies.
 6 | //
 7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 | 
15 | // NOTE: Due to the following build constraints, this file will only be compiled
16 | // when the code is running on Google App Engine, compiled by GopherJS, or
17 | // "-tags safe" is added to the go build command line.  The "disableunsafe"
18 | // tag is deprecated and thus should not be used.
19 | // +build js appengine safe disableunsafe
20 | 
21 | package spew
22 | 
23 | import "reflect"
24 | 
25 | const (
26 | 	// UnsafeDisabled is a build-time constant which specifies whether or
27 | 	// not access to the unsafe package is available.
28 | 	UnsafeDisabled = true
29 | )
30 | 
31 | // unsafeReflectValue typically converts the passed reflect.Value into a one
32 | // that bypasses the typical safety restrictions preventing access to
33 | // unaddressable and unexported data.  However, doing this relies on access to
34 | // the unsafe package.  This is a stub version which simply returns the passed
35 | // reflect.Value when the unsafe package is not available.
36 | func unsafeReflectValue(v reflect.Value) reflect.Value {
37 | 	return v
38 | }
39 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/writerc.go:
--------------------------------------------------------------------------------
 1 | // 
 2 | // Copyright (c) 2011-2019 Canonical Ltd
 3 | // Copyright (c) 2006-2010 Kirill Simonov
 4 | // 
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | // this software and associated documentation files (the "Software"), to deal in
 7 | // the Software without restriction, including without limitation the rights to
 8 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 9 | // of the Software, and to permit persons to whom the Software is furnished to do
10 | // so, subject to the following conditions:
11 | // 
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | // 
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | package yaml
24 | 
25 | // Set the writer error and return false.
26 | func yaml_emitter_set_writer_error(emitter *yaml_emitter_t, problem string) bool {
27 | 	emitter.error = yaml_WRITER_ERROR
28 | 	emitter.problem = problem
29 | 	return false
30 | }
31 | 
32 | // Flush the output buffer.
33 | func yaml_emitter_flush(emitter *yaml_emitter_t) bool {
34 | 	if emitter.write_handler == nil {
35 | 		panic("write handler not set")
36 | 	}
37 | 
38 | 	// Check if the buffer is empty.
39 | 	if emitter.buffer_pos == 0 {
40 | 		return true
41 | 	}
42 | 
43 | 	if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil {
44 | 		return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error())
45 | 	}
46 | 	emitter.buffer_pos = 0
47 | 	return true
48 | }
49 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/syntax/replacerdata.go:
--------------------------------------------------------------------------------
 1 | package syntax
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"errors"
 6 | )
 7 | 
 8 | type ReplacerData struct {
 9 | 	Rep     string
10 | 	Strings []string
11 | 	Rules   []int
12 | }
13 | 
14 | const (
15 | 	replaceSpecials     = 4
16 | 	replaceLeftPortion  = -1
17 | 	replaceRightPortion = -2
18 | 	replaceLastGroup    = -3
19 | 	replaceWholeString  = -4
20 | )
21 | 
22 | //ErrReplacementError is a general error during parsing the replacement text
23 | var ErrReplacementError = errors.New("Replacement pattern error.")
24 | 
25 | // NewReplacerData will populate a reusable replacer data struct based on the given replacement string
26 | // and the capture group data from a regexp
27 | func NewReplacerData(rep string, caps map[int]int, capsize int, capnames map[string]int, op RegexOptions) (*ReplacerData, error) {
28 | 	p := parser{
29 | 		options:  op,
30 | 		caps:     caps,
31 | 		capsize:  capsize,
32 | 		capnames: capnames,
33 | 	}
34 | 	p.setPattern(rep)
35 | 	concat, err := p.scanReplacement()
36 | 	if err != nil {
37 | 		return nil, err
38 | 	}
39 | 
40 | 	if concat.t != ntConcatenate {
41 | 		panic(ErrReplacementError)
42 | 	}
43 | 
44 | 	sb := &bytes.Buffer{}
45 | 	var (
46 | 		strings []string
47 | 		rules   []int
48 | 	)
49 | 
50 | 	for _, child := range concat.children {
51 | 		switch child.t {
52 | 		case ntMulti:
53 | 			child.writeStrToBuf(sb)
54 | 
55 | 		case ntOne:
56 | 			sb.WriteRune(child.ch)
57 | 
58 | 		case ntRef:
59 | 			if sb.Len() > 0 {
60 | 				rules = append(rules, len(strings))
61 | 				strings = append(strings, sb.String())
62 | 				sb.Reset()
63 | 			}
64 | 			slot := child.m
65 | 
66 | 			if len(caps) > 0 && slot >= 0 {
67 | 				slot = caps[slot]
68 | 			}
69 | 
70 | 			rules = append(rules, -replaceSpecials-1-slot)
71 | 
72 | 		default:
73 | 			panic(ErrReplacementError)
74 | 		}
75 | 	}
76 | 
77 | 	if sb.Len() > 0 {
78 | 		rules = append(rules, len(strings))
79 | 		strings = append(strings, sb.String())
80 | 	}
81 | 
82 | 	return &ReplacerData{
83 | 		Rep:     rep,
84 | 		Strings: strings,
85 | 		Rules:   rules,
86 | 	}, nil
87 | }
88 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/syntax/escape.go:
--------------------------------------------------------------------------------
 1 | package syntax
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"strconv"
 6 | 	"strings"
 7 | 	"unicode"
 8 | )
 9 | 
10 | func Escape(input string) string {
11 | 	b := &bytes.Buffer{}
12 | 	for _, r := range input {
13 | 		escape(b, r, false)
14 | 	}
15 | 	return b.String()
16 | }
17 | 
18 | const meta = `\.+*?()|[]{}^$# `
19 | 
20 | func escape(b *bytes.Buffer, r rune, force bool) {
21 | 	if unicode.IsPrint(r) {
22 | 		if strings.IndexRune(meta, r) >= 0 || force {
23 | 			b.WriteRune('\\')
24 | 		}
25 | 		b.WriteRune(r)
26 | 		return
27 | 	}
28 | 
29 | 	switch r {
30 | 	case '\a':
31 | 		b.WriteString(`\a`)
32 | 	case '\f':
33 | 		b.WriteString(`\f`)
34 | 	case '\n':
35 | 		b.WriteString(`\n`)
36 | 	case '\r':
37 | 		b.WriteString(`\r`)
38 | 	case '\t':
39 | 		b.WriteString(`\t`)
40 | 	case '\v':
41 | 		b.WriteString(`\v`)
42 | 	default:
43 | 		if r < 0x100 {
44 | 			b.WriteString(`\x`)
45 | 			s := strconv.FormatInt(int64(r), 16)
46 | 			if len(s) == 1 {
47 | 				b.WriteRune('0')
48 | 			}
49 | 			b.WriteString(s)
50 | 			break
51 | 		}
52 | 		b.WriteString(`\u`)
53 | 		b.WriteString(strconv.FormatInt(int64(r), 16))
54 | 	}
55 | }
56 | 
57 | func Unescape(input string) (string, error) {
58 | 	idx := strings.IndexRune(input, '\\')
59 | 	// no slashes means no unescape needed
60 | 	if idx == -1 {
61 | 		return input, nil
62 | 	}
63 | 
64 | 	buf := bytes.NewBufferString(input[:idx])
65 | 	// get the runes for the rest of the string -- we're going full parser scan on this
66 | 
67 | 	p := parser{}
68 | 	p.setPattern(input[idx+1:])
69 | 	for {
70 | 		if p.rightMost() {
71 | 			return "", p.getErr(ErrIllegalEndEscape)
72 | 		}
73 | 		r, err := p.scanCharEscape()
74 | 		if err != nil {
75 | 			return "", err
76 | 		}
77 | 		buf.WriteRune(r)
78 | 		// are we done?
79 | 		if p.rightMost() {
80 | 			return buf.String(), nil
81 | 		}
82 | 
83 | 		r = p.moveRightGetChar()
84 | 		for r != '\\' {
85 | 			buf.WriteRune(r)
86 | 			if p.rightMost() {
87 | 				// we're done, no more slashes
88 | 				return buf.String(), nil
89 | 			}
90 | 			// keep scanning until we get another slash
91 | 			r = p.moveRightGetChar()
92 | 		}
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | This project is covered by two different licenses: MIT and Apache.
 3 | 
 4 | #### MIT License ####
 5 | 
 6 | The following files were ported to Go from C files of libyaml, and thus
 7 | are still covered by their original MIT license, with the additional
 8 | copyright staring in 2011 when the project was ported over:
 9 | 
10 |     apic.go emitterc.go parserc.go readerc.go scannerc.go
11 |     writerc.go yamlh.go yamlprivateh.go
12 | 
13 | Copyright (c) 2006-2010 Kirill Simonov
14 | Copyright (c) 2006-2011 Kirill Simonov
15 | 
16 | Permission is hereby granted, free of charge, to any person obtaining a copy of
17 | this software and associated documentation files (the "Software"), to deal in
18 | the Software without restriction, including without limitation the rights to
19 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
20 | of the Software, and to permit persons to whom the Software is furnished to do
21 | so, subject to the following conditions:
22 | 
23 | The above copyright notice and this permission notice shall be included in all
24 | copies or substantial portions of the Software.
25 | 
26 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
31 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 | SOFTWARE.
33 | 
34 | ### Apache License ###
35 | 
36 | All the remaining project files are covered by the Apache license:
37 | 
38 | Copyright (c) 2011-2019 Canonical Ltd
39 | 
40 | Licensed under the Apache License, Version 2.0 (the "License");
41 | you may not use this file except in compliance with the License.
42 | You may obtain a copy of the License at
43 | 
44 |     http://www.apache.org/licenses/LICENSE-2.0
45 | 
46 | Unless required by applicable law or agreed to in writing, software
47 | distributed under the License is distributed on an "AS IS" BASIS,
48 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
49 | See the License for the specific language governing permissions and
50 | limitations under the License.
51 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/README.md:
--------------------------------------------------------------------------------
 1 | # errors [![Travis-CI](https://travis-ci.org/pkg/errors.svg)](https://travis-ci.org/pkg/errors) [![AppVeyor](https://ci.appveyor.com/api/projects/status/b98mptawhudj53ep/branch/master?svg=true)](https://ci.appveyor.com/project/davecheney/errors/branch/master) [![GoDoc](https://godoc.org/github.com/pkg/errors?status.svg)](http://godoc.org/github.com/pkg/errors) [![Report card](https://goreportcard.com/badge/github.com/pkg/errors)](https://goreportcard.com/report/github.com/pkg/errors) [![Sourcegraph](https://sourcegraph.com/github.com/pkg/errors/-/badge.svg)](https://sourcegraph.com/github.com/pkg/errors?badge)
 2 | 
 3 | Package errors provides simple error handling primitives.
 4 | 
 5 | `go get github.com/pkg/errors`
 6 | 
 7 | The traditional error handling idiom in Go is roughly akin to
 8 | ```go
 9 | if err != nil {
10 |         return err
11 | }
12 | ```
13 | which applied recursively up the call stack results in error reports without context or debugging information. The errors package allows programmers to add context to the failure path in their code in a way that does not destroy the original value of the error.
14 | 
15 | ## Adding context to an error
16 | 
17 | The errors.Wrap function returns a new error that adds context to the original error. For example
18 | ```go
19 | _, err := ioutil.ReadAll(r)
20 | if err != nil {
21 |         return errors.Wrap(err, "read failed")
22 | }
23 | ```
24 | ## Retrieving the cause of an error
25 | 
26 | Using `errors.Wrap` constructs a stack of errors, adding context to the preceding error. Depending on the nature of the error it may be necessary to reverse the operation of errors.Wrap to retrieve the original error for inspection. Any error value which implements this interface can be inspected by `errors.Cause`.
27 | ```go
28 | type causer interface {
29 |         Cause() error
30 | }
31 | ```
32 | `errors.Cause` will recursively retrieve the topmost error which does not implement `causer`, which is assumed to be the original cause. For example:
33 | ```go
34 | switch err := errors.Cause(err).(type) {
35 | case *MyError:
36 |         // handle specifically
37 | default:
38 |         // unknown error
39 | }
40 | ```
41 | 
42 | [Read the package documentation for more information](https://godoc.org/github.com/pkg/errors).
43 | 
44 | ## Roadmap
45 | 
46 | With the upcoming [Go2 error proposals](https://go.googlesource.com/proposal/+/master/design/go2draft.md) this package is moving into maintenance mode. The roadmap for a 1.0 release is as follows:
47 | 
48 | - 0.9. Remove pre Go 1.9 and Go 1.10 support, address outstanding pull requests (if possible)
49 | - 1.0. Final release.
50 | 
51 | ## Contributing
52 | 
53 | Because of the Go2 errors changes, this package is not accepting proposals for new functionality. With that said, we welcome pull requests, bug fixes and issue reports. 
54 | 
55 | Before sending a PR, please discuss your change by raising an issue.
56 | 
57 | ## License
58 | 
59 | BSD-2-Clause
60 | 


--------------------------------------------------------------------------------
/frequency.go:
--------------------------------------------------------------------------------
  1 | package tokenizer
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"io"
  6 | 	"regexp"
  7 | 	"runtime"
  8 | 	"sort"
  9 | 	"strings"
 10 | 	"sync"
 11 | 
 12 | 	"github.com/dlclark/regexp2"
 13 | )
 14 | 
 15 | var (
 16 | 	tabRegex     = regexp.MustCompile(`\t`) // Multiple tabs in a row are relevant, but not multiple spaces or newlines
 17 | 	newlineRegex = regexp.MustCompile(`[\n\r]+`)
 18 | 	spaceRegex   = regexp.MustCompile(`\p{Z}+`)
 19 | 	repeatRegex  = regexp2.MustCompile(`.*(.)\1{5,}.*`, 0)
 20 | )
 21 | 
 22 | type WordCount struct {
 23 | 	Pieces []string `json:"pieces"`
 24 | 	Count  int64    `json:"count"`
 25 | }
 26 | 
 27 | func CountString(s string) map[string]int64 {
 28 | 	s = tabRegex.ReplaceAllString(s, "\t")
 29 | 	s = newlineRegex.ReplaceAllString(s, "\n")
 30 | 	s = spaceRegex.ReplaceAllString(s, " ")
 31 | 
 32 | 	words := WordSplit(s)
 33 | 	frequencies := map[string]int64{}
 34 | 	for _, word := range words {
 35 | 		token := unicodeEncode(word)
 36 | 
 37 | 		// only add non-repeating tokens to the frequencies
 38 | 		if _, ok := frequencies[token]; !ok {
 39 | 			if match, err := repeatRegex.MatchString(token); match || err != nil {
 40 | 				continue
 41 | 			}
 42 | 		}
 43 | 
 44 | 		frequencies[token]++
 45 | 	}
 46 | 	return frequencies
 47 | }
 48 | 
 49 | func mapToSortedWordCount(freq map[string]int64) []WordCount {
 50 | 	counts := make([]WordCount, len(freq))
 51 | 	idx := 0
 52 | 	for word, count := range freq {
 53 | 		counts[idx] = WordCount{Pieces: strings.Split(word, ""), Count: count}
 54 | 		idx++
 55 | 	}
 56 | 	sort.Sort(byWordCount(counts))
 57 | 	return counts
 58 | }
 59 | 
 60 | func MergeCounts(a map[string]int64, b map[string]int64) {
 61 | 	for k, v := range b {
 62 | 		a[k] += v
 63 | 	}
 64 | }
 65 | 
 66 | func CountReader(reader io.Reader) (map[string]int64, error) {
 67 | 	bufReader := bufio.NewReader(reader)
 68 | 	wg := sync.WaitGroup{}
 69 | 	mergeWG := sync.WaitGroup{}
 70 | 	countJobs := make(chan map[string]int64)
 71 | 	totalCount := map[string]int64{}
 72 | 	mergeWorker := func(counts <-chan map[string]int64) {
 73 | 		for count := range counts {
 74 | 			MergeCounts(totalCount, count)
 75 | 		}
 76 | 		mergeWG.Done()
 77 | 	}
 78 | 
 79 | 	mergeWG.Add(1)
 80 | 	go mergeWorker(countJobs)
 81 | 
 82 | 	worker := func(jobs <-chan string) {
 83 | 		for text := range jobs {
 84 | 			countJobs <- CountString(text)
 85 | 		}
 86 | 		wg.Done()
 87 | 	}
 88 | 
 89 | 	// assuming each line is 1kb~, this gives us around a 100mb mem cap on this queue
 90 | 	jobs := make(chan string, 100000)
 91 | 	for i := 0; i < runtime.NumCPU(); i++ {
 92 | 		wg.Add(1)
 93 | 		go worker(jobs)
 94 | 	}
 95 | 
 96 | 	EOF := false
 97 | 	for {
 98 | 		if EOF {
 99 | 			break
100 | 		}
101 | 
102 | 		line, err := bufReader.ReadString('\n')
103 | 		if err != nil {
104 | 			if err != io.EOF && err != io.ErrUnexpectedEOF {
105 | 				return nil, err
106 | 			}
107 | 			EOF = true
108 | 		}
109 | 
110 | 		jobs <- line
111 | 	}
112 | 	close(jobs)
113 | 	wg.Wait()
114 | 
115 | 	close(countJobs)
116 | 	mergeWG.Wait()
117 | 
118 | 	return totalCount, nil
119 | }
120 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/assertion_order.go:
--------------------------------------------------------------------------------
 1 | package assert
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"reflect"
 6 | )
 7 | 
 8 | // isOrdered checks that collection contains orderable elements.
 9 | func isOrdered(t TestingT, object interface{}, allowedComparesResults []CompareType, failMessage string, msgAndArgs ...interface{}) bool {
10 | 	objKind := reflect.TypeOf(object).Kind()
11 | 	if objKind != reflect.Slice && objKind != reflect.Array {
12 | 		return false
13 | 	}
14 | 
15 | 	objValue := reflect.ValueOf(object)
16 | 	objLen := objValue.Len()
17 | 
18 | 	if objLen <= 1 {
19 | 		return true
20 | 	}
21 | 
22 | 	value := objValue.Index(0)
23 | 	valueInterface := value.Interface()
24 | 	firstValueKind := value.Kind()
25 | 
26 | 	for i := 1; i < objLen; i++ {
27 | 		prevValue := value
28 | 		prevValueInterface := valueInterface
29 | 
30 | 		value = objValue.Index(i)
31 | 		valueInterface = value.Interface()
32 | 
33 | 		compareResult, isComparable := compare(prevValueInterface, valueInterface, firstValueKind)
34 | 
35 | 		if !isComparable {
36 | 			return Fail(t, fmt.Sprintf("Can not compare type \"%s\" and \"%s\"", reflect.TypeOf(value), reflect.TypeOf(prevValue)), msgAndArgs...)
37 | 		}
38 | 
39 | 		if !containsValue(allowedComparesResults, compareResult) {
40 | 			return Fail(t, fmt.Sprintf(failMessage, prevValue, value), msgAndArgs...)
41 | 		}
42 | 	}
43 | 
44 | 	return true
45 | }
46 | 
47 | // IsIncreasing asserts that the collection is increasing
48 | //
49 | //    assert.IsIncreasing(t, []int{1, 2, 3})
50 | //    assert.IsIncreasing(t, []float{1, 2})
51 | //    assert.IsIncreasing(t, []string{"a", "b"})
52 | func IsIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool {
53 | 	return isOrdered(t, object, []CompareType{compareLess}, "\"%v\" is not less than \"%v\"", msgAndArgs)
54 | }
55 | 
56 | // IsNonIncreasing asserts that the collection is not increasing
57 | //
58 | //    assert.IsNonIncreasing(t, []int{2, 1, 1})
59 | //    assert.IsNonIncreasing(t, []float{2, 1})
60 | //    assert.IsNonIncreasing(t, []string{"b", "a"})
61 | func IsNonIncreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool {
62 | 	return isOrdered(t, object, []CompareType{compareEqual, compareGreater}, "\"%v\" is not greater than or equal to \"%v\"", msgAndArgs)
63 | }
64 | 
65 | // IsDecreasing asserts that the collection is decreasing
66 | //
67 | //    assert.IsDecreasing(t, []int{2, 1, 0})
68 | //    assert.IsDecreasing(t, []float{2, 1})
69 | //    assert.IsDecreasing(t, []string{"b", "a"})
70 | func IsDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool {
71 | 	return isOrdered(t, object, []CompareType{compareGreater}, "\"%v\" is not greater than \"%v\"", msgAndArgs)
72 | }
73 | 
74 | // IsNonDecreasing asserts that the collection is not decreasing
75 | //
76 | //    assert.IsNonDecreasing(t, []int{1, 1, 2})
77 | //    assert.IsNonDecreasing(t, []float{1, 2})
78 | //    assert.IsNonDecreasing(t, []string{"a", "b"})
79 | func IsNonDecreasing(t TestingT, object interface{}, msgAndArgs ...interface{}) bool {
80 | 	return isOrdered(t, object, []CompareType{compareLess, compareEqual}, "\"%v\" is not less than or equal to \"%v\"", msgAndArgs)
81 | }
82 | 


--------------------------------------------------------------------------------
/.github/.golangci.yaml:
--------------------------------------------------------------------------------
  1 | linters-settings:
  2 |   depguard:
  3 |     list-type: blacklist
  4 |     packages:
  5 |       # logging is allowed only by logutils.Log, logrus
  6 |       # is allowed to use only in logutils package
  7 |       - github.com/sirupsen/logrus
  8 |     packages-with-error-message:
  9 |       - github.com/sirupsen/logrus: "logging is allowed only by logutils.Log"
 10 |   dupl:
 11 |     threshold: 200
 12 |   goconst:
 13 |     min-len: 2
 14 |     min-occurrences: 2
 15 |   gocritic:
 16 |     settings:
 17 |       rangeValCopy:
 18 |         sizeThreshold: 512
 19 |       hugeParam:
 20 |         sizeThreshold: 512
 21 |     enabled-tags:
 22 |       - diagnostic
 23 |       - experimental
 24 |       - opinionated
 25 |       - performance
 26 |       - style
 27 |     disabled-checks:
 28 |       - dupImport # https://github.com/go-critic/go-critic/issues/845
 29 |       - ifElseChain
 30 |       - octalLiteral
 31 |       - whyNoLint
 32 |       - wrapperFunc
 33 |       - docStub
 34 |       - importShadow
 35 |       - unnamedResult
 36 |       - commentedOutCode
 37 |       - exitAfterDefer
 38 |       - emptyStringTest
 39 |       - paramTypeCombine
 40 |   gosimple:
 41 |     disabled-checks:
 42 |       - S1023
 43 |       - S1000
 44 |   gocyclo:
 45 |     min-complexity: 100
 46 |   goimports:
 47 |     local-prefixes: github.com/golangci/golangci-lint
 48 |   golint:
 49 |     min-confidence: 0.8
 50 |   govet:
 51 |     check-shadowing: false
 52 |     settings:
 53 |       printf:
 54 |         funcs:
 55 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Infof
 56 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Warnf
 57 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
 58 |           - (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf
 59 |   maligned:
 60 |     suggest-new: true
 61 |   nolintlint:
 62 |     allow-leading-space: true # don't require machine-readable nolint directives (i.e. with no leading space)
 63 |     allow-unused: false # report any unused nolint directives
 64 |     require-explanation: false # don't require an explanation for nolint directives
 65 |     require-specific: false # don't require nolint directives to be specific about which linter is being skipped
 66 | 
 67 | linters:
 68 |   # please, do not use `enable-all`: it's deprecated and will be removed soon.
 69 |   # inverted configuration with `enable-all` and `disable` is not scalable during updates of golangci-lint
 70 |   disable-all: true
 71 |   enable:
 72 |     - bodyclose
 73 |     - deadcode
 74 |     - depguard
 75 |     - dogsled
 76 |     - dupl
 77 |     - gocritic
 78 |     - gocyclo
 79 |     - gofmt
 80 |     - goimports
 81 |     - golint
 82 |     - goprintffuncname
 83 |     - gosimple
 84 |     - govet
 85 |     - interfacer
 86 |     - nakedret
 87 |     - nolintlint
 88 |     - rowserrcheck
 89 |     - staticcheck
 90 |     - structcheck
 91 |     - stylecheck
 92 |     - typecheck
 93 |     - unparam
 94 |     - unused
 95 |     - varcheck
 96 |     - whitespace
 97 | 
 98 |   # don't enable:
 99 |   # - asciicheck
100 |   # - gochecknoglobals
101 |   # - gocognit
102 |   # - godot
103 |   # - godox
104 |   # - goerr113
105 |   # - maligned
106 |   # - nestif
107 |   # - prealloc
108 |   # - testpackage
109 |   # - wsl
110 | 
111 | issues:
112 |   # Excluding configuration per-path, per-linter, per-text and per-source
113 |   exclude-rules:
114 |     - path: _test\.go
115 |       linters:
116 |         - gomnd
117 | 
118 |     # https://github.com/go-critic/go-critic/issues/926
119 |     - linters:
120 |         - gocritic
121 |       text: "unnecessaryDefer:"
122 | 
123 | run:
124 |   modules-download-mode: vendor
125 |   skip-dirs:
126 |     - src/mocks/
127 | 
128 | # golangci.com configuration
129 | # https://github.com/golangci/golangci/wiki/Configuration
130 | service:
131 |   golangci-lint-version: 1.29.x # use the fixed version to not introduce new linters unexpectedly
132 |   prepare:
133 |     - echo "here I can run custom commands, but no preparation needed for this repo"
134 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/sorter.go:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright (c) 2011-2019 Canonical Ltd
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | package yaml
 17 | 
 18 | import (
 19 | 	"reflect"
 20 | 	"unicode"
 21 | )
 22 | 
 23 | type keyList []reflect.Value
 24 | 
 25 | func (l keyList) Len() int      { return len(l) }
 26 | func (l keyList) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
 27 | func (l keyList) Less(i, j int) bool {
 28 | 	a := l[i]
 29 | 	b := l[j]
 30 | 	ak := a.Kind()
 31 | 	bk := b.Kind()
 32 | 	for (ak == reflect.Interface || ak == reflect.Ptr) && !a.IsNil() {
 33 | 		a = a.Elem()
 34 | 		ak = a.Kind()
 35 | 	}
 36 | 	for (bk == reflect.Interface || bk == reflect.Ptr) && !b.IsNil() {
 37 | 		b = b.Elem()
 38 | 		bk = b.Kind()
 39 | 	}
 40 | 	af, aok := keyFloat(a)
 41 | 	bf, bok := keyFloat(b)
 42 | 	if aok && bok {
 43 | 		if af != bf {
 44 | 			return af < bf
 45 | 		}
 46 | 		if ak != bk {
 47 | 			return ak < bk
 48 | 		}
 49 | 		return numLess(a, b)
 50 | 	}
 51 | 	if ak != reflect.String || bk != reflect.String {
 52 | 		return ak < bk
 53 | 	}
 54 | 	ar, br := []rune(a.String()), []rune(b.String())
 55 | 	digits := false
 56 | 	for i := 0; i < len(ar) && i < len(br); i++ {
 57 | 		if ar[i] == br[i] {
 58 | 			digits = unicode.IsDigit(ar[i])
 59 | 			continue
 60 | 		}
 61 | 		al := unicode.IsLetter(ar[i])
 62 | 		bl := unicode.IsLetter(br[i])
 63 | 		if al && bl {
 64 | 			return ar[i] < br[i]
 65 | 		}
 66 | 		if al || bl {
 67 | 			if digits {
 68 | 				return al
 69 | 			} else {
 70 | 				return bl
 71 | 			}
 72 | 		}
 73 | 		var ai, bi int
 74 | 		var an, bn int64
 75 | 		if ar[i] == '0' || br[i] == '0' {
 76 | 			for j := i - 1; j >= 0 && unicode.IsDigit(ar[j]); j-- {
 77 | 				if ar[j] != '0' {
 78 | 					an = 1
 79 | 					bn = 1
 80 | 					break
 81 | 				}
 82 | 			}
 83 | 		}
 84 | 		for ai = i; ai < len(ar) && unicode.IsDigit(ar[ai]); ai++ {
 85 | 			an = an*10 + int64(ar[ai]-'0')
 86 | 		}
 87 | 		for bi = i; bi < len(br) && unicode.IsDigit(br[bi]); bi++ {
 88 | 			bn = bn*10 + int64(br[bi]-'0')
 89 | 		}
 90 | 		if an != bn {
 91 | 			return an < bn
 92 | 		}
 93 | 		if ai != bi {
 94 | 			return ai < bi
 95 | 		}
 96 | 		return ar[i] < br[i]
 97 | 	}
 98 | 	return len(ar) < len(br)
 99 | }
100 | 
101 | // keyFloat returns a float value for v if it is a number/bool
102 | // and whether it is a number/bool or not.
103 | func keyFloat(v reflect.Value) (f float64, ok bool) {
104 | 	switch v.Kind() {
105 | 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
106 | 		return float64(v.Int()), true
107 | 	case reflect.Float32, reflect.Float64:
108 | 		return v.Float(), true
109 | 	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
110 | 		return float64(v.Uint()), true
111 | 	case reflect.Bool:
112 | 		if v.Bool() {
113 | 			return 1, true
114 | 		}
115 | 		return 0, true
116 | 	}
117 | 	return 0, false
118 | }
119 | 
120 | // numLess returns whether a < b.
121 | // a and b must necessarily have the same kind.
122 | func numLess(a, b reflect.Value) bool {
123 | 	switch a.Kind() {
124 | 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
125 | 		return a.Int() < b.Int()
126 | 	case reflect.Float32, reflect.Float64:
127 | 		return a.Float() < b.Float()
128 | 	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
129 | 		return a.Uint() < b.Uint()
130 | 	case reflect.Bool:
131 | 		return !a.Bool() && b.Bool()
132 | 	}
133 | 	panic("not a number")
134 | }
135 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/README.md:
--------------------------------------------------------------------------------
  1 | # YAML support for the Go language
  2 | 
  3 | Introduction
  4 | ------------
  5 | 
  6 | The yaml package enables Go programs to comfortably encode and decode YAML
  7 | values. It was developed within [Canonical](https://www.canonical.com) as
  8 | part of the [juju](https://juju.ubuntu.com) project, and is based on a
  9 | pure Go port of the well-known [libyaml](http://pyyaml.org/wiki/LibYAML)
 10 | C library to parse and generate YAML data quickly and reliably.
 11 | 
 12 | Compatibility
 13 | -------------
 14 | 
 15 | The yaml package supports most of YAML 1.2, but preserves some behavior
 16 | from 1.1 for backwards compatibility.
 17 | 
 18 | Specifically, as of v3 of the yaml package:
 19 | 
 20 |  - YAML 1.1 bools (_yes/no, on/off_) are supported as long as they are being
 21 |    decoded into a typed bool value. Otherwise they behave as a string. Booleans
 22 |    in YAML 1.2 are _true/false_ only.
 23 |  - Octals encode and decode as _0777_ per YAML 1.1, rather than _0o777_
 24 |    as specified in YAML 1.2, because most parsers still use the old format.
 25 |    Octals in the  _0o777_ format are supported though, so new files work.
 26 |  - Does not support base-60 floats. These are gone from YAML 1.2, and were
 27 |    actually never supported by this package as it's clearly a poor choice.
 28 | 
 29 | and offers backwards
 30 | compatibility with YAML 1.1 in some cases.
 31 | 1.2, including support for
 32 | anchors, tags, map merging, etc. Multi-document unmarshalling is not yet
 33 | implemented, and base-60 floats from YAML 1.1 are purposefully not
 34 | supported since they're a poor design and are gone in YAML 1.2.
 35 | 
 36 | Installation and usage
 37 | ----------------------
 38 | 
 39 | The import path for the package is *gopkg.in/yaml.v3*.
 40 | 
 41 | To install it, run:
 42 | 
 43 |     go get gopkg.in/yaml.v3
 44 | 
 45 | API documentation
 46 | -----------------
 47 | 
 48 | If opened in a browser, the import path itself leads to the API documentation:
 49 | 
 50 |   - [https://gopkg.in/yaml.v3](https://gopkg.in/yaml.v3)
 51 | 
 52 | API stability
 53 | -------------
 54 | 
 55 | The package API for yaml v3 will remain stable as described in [gopkg.in](https://gopkg.in).
 56 | 
 57 | 
 58 | License
 59 | -------
 60 | 
 61 | The yaml package is licensed under the MIT and Apache License 2.0 licenses.
 62 | Please see the LICENSE file for details.
 63 | 
 64 | 
 65 | Example
 66 | -------
 67 | 
 68 | ```Go
 69 | package main
 70 | 
 71 | import (
 72 |         "fmt"
 73 |         "log"
 74 | 
 75 |         "gopkg.in/yaml.v3"
 76 | )
 77 | 
 78 | var data = `
 79 | a: Easy!
 80 | b:
 81 |   c: 2
 82 |   d: [3, 4]
 83 | `
 84 | 
 85 | // Note: struct fields must be public in order for unmarshal to
 86 | // correctly populate the data.
 87 | type T struct {
 88 |         A string
 89 |         B struct {
 90 |                 RenamedC int   `yaml:"c"`
 91 |                 D        []int `yaml:",flow"`
 92 |         }
 93 | }
 94 | 
 95 | func main() {
 96 |         t := T{}
 97 |     
 98 |         err := yaml.Unmarshal([]byte(data), &t)
 99 |         if err != nil {
100 |                 log.Fatalf("error: %v", err)
101 |         }
102 |         fmt.Printf("--- t:\n%v\n\n", t)
103 |     
104 |         d, err := yaml.Marshal(&t)
105 |         if err != nil {
106 |                 log.Fatalf("error: %v", err)
107 |         }
108 |         fmt.Printf("--- t dump:\n%s\n\n", string(d))
109 |     
110 |         m := make(map[interface{}]interface{})
111 |     
112 |         err = yaml.Unmarshal([]byte(data), &m)
113 |         if err != nil {
114 |                 log.Fatalf("error: %v", err)
115 |         }
116 |         fmt.Printf("--- m:\n%v\n\n", m)
117 |     
118 |         d, err = yaml.Marshal(&m)
119 |         if err != nil {
120 |                 log.Fatalf("error: %v", err)
121 |         }
122 |         fmt.Printf("--- m dump:\n%s\n\n", string(d))
123 | }
124 | ```
125 | 
126 | This example will generate the following output:
127 | 
128 | ```
129 | --- t:
130 | {Easy! {2 [3 4]}}
131 | 
132 | --- t dump:
133 | a: Easy!
134 | b:
135 |   c: 2
136 |   d: [3, 4]
137 | 
138 | 
139 | --- m:
140 | map[a:Easy! b:map[c:2 d:[3 4]]]
141 | 
142 | --- m dump:
143 | a: Easy!
144 | b:
145 |   c: 2
146 |   d:
147 |   - 3
148 |   - 4
149 | ```
150 | 
151 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/replace.go:
--------------------------------------------------------------------------------
  1 | package regexp2
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 
  7 | 	"github.com/dlclark/regexp2/syntax"
  8 | )
  9 | 
 10 | const (
 11 | 	replaceSpecials     = 4
 12 | 	replaceLeftPortion  = -1
 13 | 	replaceRightPortion = -2
 14 | 	replaceLastGroup    = -3
 15 | 	replaceWholeString  = -4
 16 | )
 17 | 
 18 | // MatchEvaluator is a function that takes a match and returns a replacement string to be used
 19 | type MatchEvaluator func(Match) string
 20 | 
 21 | // Three very similar algorithms appear below: replace (pattern),
 22 | // replace (evaluator), and split.
 23 | 
 24 | // Replace Replaces all occurrences of the regex in the string with the
 25 | // replacement pattern.
 26 | //
 27 | // Note that the special case of no matches is handled on its own:
 28 | // with no matches, the input string is returned unchanged.
 29 | // The right-to-left case is split out because StringBuilder
 30 | // doesn't handle right-to-left string building directly very well.
 31 | func replace(regex *Regexp, data *syntax.ReplacerData, evaluator MatchEvaluator, input string, startAt, count int) (string, error) {
 32 | 	if count < -1 {
 33 | 		return "", errors.New("Count too small")
 34 | 	}
 35 | 	if count == 0 {
 36 | 		return "", nil
 37 | 	}
 38 | 
 39 | 	m, err := regex.FindStringMatchStartingAt(input, startAt)
 40 | 
 41 | 	if err != nil {
 42 | 		return "", err
 43 | 	}
 44 | 	if m == nil {
 45 | 		return input, nil
 46 | 	}
 47 | 
 48 | 	buf := &bytes.Buffer{}
 49 | 	text := m.text
 50 | 
 51 | 	if !regex.RightToLeft() {
 52 | 		prevat := 0
 53 | 		for m != nil {
 54 | 			if m.Index != prevat {
 55 | 				buf.WriteString(string(text[prevat:m.Index]))
 56 | 			}
 57 | 			prevat = m.Index + m.Length
 58 | 			if evaluator == nil {
 59 | 				replacementImpl(data, buf, m)
 60 | 			} else {
 61 | 				buf.WriteString(evaluator(*m))
 62 | 			}
 63 | 
 64 | 			count--
 65 | 			if count == 0 {
 66 | 				break
 67 | 			}
 68 | 			m, err = regex.FindNextMatch(m)
 69 | 			if err != nil {
 70 | 				return "", nil
 71 | 			}
 72 | 		}
 73 | 
 74 | 		if prevat < len(text) {
 75 | 			buf.WriteString(string(text[prevat:]))
 76 | 		}
 77 | 	} else {
 78 | 		prevat := len(text)
 79 | 		var al []string
 80 | 
 81 | 		for m != nil {
 82 | 			if m.Index+m.Length != prevat {
 83 | 				al = append(al, string(text[m.Index+m.Length:prevat]))
 84 | 			}
 85 | 			prevat = m.Index
 86 | 			if evaluator == nil {
 87 | 				replacementImplRTL(data, &al, m)
 88 | 			} else {
 89 | 				al = append(al, evaluator(*m))
 90 | 			}
 91 | 
 92 | 			count--
 93 | 			if count == 0 {
 94 | 				break
 95 | 			}
 96 | 			m, err = regex.FindNextMatch(m)
 97 | 			if err != nil {
 98 | 				return "", nil
 99 | 			}
100 | 		}
101 | 
102 | 		if prevat > 0 {
103 | 			buf.WriteString(string(text[:prevat]))
104 | 		}
105 | 
106 | 		for i := len(al) - 1; i >= 0; i-- {
107 | 			buf.WriteString(al[i])
108 | 		}
109 | 	}
110 | 
111 | 	return buf.String(), nil
112 | }
113 | 
114 | // Given a Match, emits into the StringBuilder the evaluated
115 | // substitution pattern.
116 | func replacementImpl(data *syntax.ReplacerData, buf *bytes.Buffer, m *Match) {
117 | 	for _, r := range data.Rules {
118 | 
119 | 		if r >= 0 { // string lookup
120 | 			buf.WriteString(data.Strings[r])
121 | 		} else if r < -replaceSpecials { // group lookup
122 | 			m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
123 | 		} else {
124 | 			switch -replaceSpecials - 1 - r { // special insertion patterns
125 | 			case replaceLeftPortion:
126 | 				for i := 0; i < m.Index; i++ {
127 | 					buf.WriteRune(m.text[i])
128 | 				}
129 | 			case replaceRightPortion:
130 | 				for i := m.Index + m.Length; i < len(m.text); i++ {
131 | 					buf.WriteRune(m.text[i])
132 | 				}
133 | 			case replaceLastGroup:
134 | 				m.groupValueAppendToBuf(m.GroupCount()-1, buf)
135 | 			case replaceWholeString:
136 | 				for i := 0; i < len(m.text); i++ {
137 | 					buf.WriteRune(m.text[i])
138 | 				}
139 | 			}
140 | 		}
141 | 	}
142 | }
143 | 
144 | func replacementImplRTL(data *syntax.ReplacerData, al *[]string, m *Match) {
145 | 	l := *al
146 | 	buf := &bytes.Buffer{}
147 | 
148 | 	for _, r := range data.Rules {
149 | 		buf.Reset()
150 | 		if r >= 0 { // string lookup
151 | 			l = append(l, data.Strings[r])
152 | 		} else if r < -replaceSpecials { // group lookup
153 | 			m.groupValueAppendToBuf(-replaceSpecials-1-r, buf)
154 | 			l = append(l, buf.String())
155 | 		} else {
156 | 			switch -replaceSpecials - 1 - r { // special insertion patterns
157 | 			case replaceLeftPortion:
158 | 				for i := 0; i < m.Index; i++ {
159 | 					buf.WriteRune(m.text[i])
160 | 				}
161 | 			case replaceRightPortion:
162 | 				for i := m.Index + m.Length; i < len(m.text); i++ {
163 | 					buf.WriteRune(m.text[i])
164 | 				}
165 | 			case replaceLastGroup:
166 | 				m.groupValueAppendToBuf(m.GroupCount()-1, buf)
167 | 			case replaceWholeString:
168 | 				for i := 0; i < len(m.text); i++ {
169 | 					buf.WriteRune(m.text[i])
170 | 				}
171 | 			}
172 | 			l = append(l, buf.String())
173 | 		}
174 | 	}
175 | 
176 | 	*al = l
177 | }
178 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/stack.go:
--------------------------------------------------------------------------------
  1 | package errors
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"path"
  7 | 	"runtime"
  8 | 	"strconv"
  9 | 	"strings"
 10 | )
 11 | 
 12 | // Frame represents a program counter inside a stack frame.
 13 | // For historical reasons if Frame is interpreted as a uintptr
 14 | // its value represents the program counter + 1.
 15 | type Frame uintptr
 16 | 
 17 | // pc returns the program counter for this frame;
 18 | // multiple frames may have the same PC value.
 19 | func (f Frame) pc() uintptr { return uintptr(f) - 1 }
 20 | 
 21 | // file returns the full path to the file that contains the
 22 | // function for this Frame's pc.
 23 | func (f Frame) file() string {
 24 | 	fn := runtime.FuncForPC(f.pc())
 25 | 	if fn == nil {
 26 | 		return "unknown"
 27 | 	}
 28 | 	file, _ := fn.FileLine(f.pc())
 29 | 	return file
 30 | }
 31 | 
 32 | // line returns the line number of source code of the
 33 | // function for this Frame's pc.
 34 | func (f Frame) line() int {
 35 | 	fn := runtime.FuncForPC(f.pc())
 36 | 	if fn == nil {
 37 | 		return 0
 38 | 	}
 39 | 	_, line := fn.FileLine(f.pc())
 40 | 	return line
 41 | }
 42 | 
 43 | // name returns the name of this function, if known.
 44 | func (f Frame) name() string {
 45 | 	fn := runtime.FuncForPC(f.pc())
 46 | 	if fn == nil {
 47 | 		return "unknown"
 48 | 	}
 49 | 	return fn.Name()
 50 | }
 51 | 
 52 | // Format formats the frame according to the fmt.Formatter interface.
 53 | //
 54 | //    %s    source file
 55 | //    %d    source line
 56 | //    %n    function name
 57 | //    %v    equivalent to %s:%d
 58 | //
 59 | // Format accepts flags that alter the printing of some verbs, as follows:
 60 | //
 61 | //    %+s   function name and path of source file relative to the compile time
 62 | //          GOPATH separated by \n\t (<funcname>\n\t<path>)
 63 | //    %+v   equivalent to %+s:%d
 64 | func (f Frame) Format(s fmt.State, verb rune) {
 65 | 	switch verb {
 66 | 	case 's':
 67 | 		switch {
 68 | 		case s.Flag('+'):
 69 | 			io.WriteString(s, f.name())
 70 | 			io.WriteString(s, "\n\t")
 71 | 			io.WriteString(s, f.file())
 72 | 		default:
 73 | 			io.WriteString(s, path.Base(f.file()))
 74 | 		}
 75 | 	case 'd':
 76 | 		io.WriteString(s, strconv.Itoa(f.line()))
 77 | 	case 'n':
 78 | 		io.WriteString(s, funcname(f.name()))
 79 | 	case 'v':
 80 | 		f.Format(s, 's')
 81 | 		io.WriteString(s, ":")
 82 | 		f.Format(s, 'd')
 83 | 	}
 84 | }
 85 | 
 86 | // MarshalText formats a stacktrace Frame as a text string. The output is the
 87 | // same as that of fmt.Sprintf("%+v", f), but without newlines or tabs.
 88 | func (f Frame) MarshalText() ([]byte, error) {
 89 | 	name := f.name()
 90 | 	if name == "unknown" {
 91 | 		return []byte(name), nil
 92 | 	}
 93 | 	return []byte(fmt.Sprintf("%s %s:%d", name, f.file(), f.line())), nil
 94 | }
 95 | 
 96 | // StackTrace is stack of Frames from innermost (newest) to outermost (oldest).
 97 | type StackTrace []Frame
 98 | 
 99 | // Format formats the stack of Frames according to the fmt.Formatter interface.
100 | //
101 | //    %s	lists source files for each Frame in the stack
102 | //    %v	lists the source file and line number for each Frame in the stack
103 | //
104 | // Format accepts flags that alter the printing of some verbs, as follows:
105 | //
106 | //    %+v   Prints filename, function, and line number for each Frame in the stack.
107 | func (st StackTrace) Format(s fmt.State, verb rune) {
108 | 	switch verb {
109 | 	case 'v':
110 | 		switch {
111 | 		case s.Flag('+'):
112 | 			for _, f := range st {
113 | 				io.WriteString(s, "\n")
114 | 				f.Format(s, verb)
115 | 			}
116 | 		case s.Flag('#'):
117 | 			fmt.Fprintf(s, "%#v", []Frame(st))
118 | 		default:
119 | 			st.formatSlice(s, verb)
120 | 		}
121 | 	case 's':
122 | 		st.formatSlice(s, verb)
123 | 	}
124 | }
125 | 
126 | // formatSlice will format this StackTrace into the given buffer as a slice of
127 | // Frame, only valid when called with '%s' or '%v'.
128 | func (st StackTrace) formatSlice(s fmt.State, verb rune) {
129 | 	io.WriteString(s, "[")
130 | 	for i, f := range st {
131 | 		if i > 0 {
132 | 			io.WriteString(s, " ")
133 | 		}
134 | 		f.Format(s, verb)
135 | 	}
136 | 	io.WriteString(s, "]")
137 | }
138 | 
139 | // stack represents a stack of program counters.
140 | type stack []uintptr
141 | 
142 | func (s *stack) Format(st fmt.State, verb rune) {
143 | 	switch verb {
144 | 	case 'v':
145 | 		switch {
146 | 		case st.Flag('+'):
147 | 			for _, pc := range *s {
148 | 				f := Frame(pc)
149 | 				fmt.Fprintf(st, "\n%+v", f)
150 | 			}
151 | 		}
152 | 	}
153 | }
154 | 
155 | func (s *stack) StackTrace() StackTrace {
156 | 	f := make([]Frame, len(*s))
157 | 	for i := 0; i < len(f); i++ {
158 | 		f[i] = Frame((*s)[i])
159 | 	}
160 | 	return f
161 | }
162 | 
163 | func callers() *stack {
164 | 	const depth = 32
165 | 	var pcs [depth]uintptr
166 | 	n := runtime.Callers(3, pcs[:])
167 | 	var st stack = pcs[0:n]
168 | 	return &st
169 | }
170 | 
171 | // funcname removes the path prefix component of a function's name reported by func.Name().
172 | func funcname(name string) string {
173 | 	i := strings.LastIndex(name, "/")
174 | 	name = name[i+1:]
175 | 	i = strings.Index(name, ".")
176 | 	return name[i+1:]
177 | }
178 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/README.md:
--------------------------------------------------------------------------------
 1 | # regexp2 - full featured regular expressions for Go
 2 | Regexp2 is a feature-rich RegExp engine for Go.  It doesn't have constant time guarantees like the built-in `regexp` package, but it allows backtracking and is compatible with Perl5 and .NET.  You'll likely be better off with the RE2 engine from the `regexp` package and should only use this if you need to write very complex patterns or require compatibility with .NET.
 3 | 
 4 | ## Basis of the engine
 5 | The engine is ported from the .NET framework's System.Text.RegularExpressions.Regex engine.  That engine was open sourced in 2015 under the MIT license.  There are some fundamental differences between .NET strings and Go strings that required a bit of borrowing from the Go framework regex engine as well.  I cleaned up a couple of the dirtier bits during the port (regexcharclass.cs was terrible), but the parse tree, code emmitted, and therefore patterns matched should be identical.
 6 | 
 7 | ## Installing
 8 | This is a go-gettable library, so install is easy:
 9 | 
10 |     go get github.com/dlclark/regexp2/...
11 | 
12 | ## Usage
13 | Usage is similar to the Go `regexp` package.  Just like in `regexp`, you start by converting a regex into a state machine via the `Compile` or `MustCompile` methods.  They ultimately do the same thing, but `MustCompile` will panic if the regex is invalid.  You can then use the provided `Regexp` struct to find matches repeatedly.  A `Regexp` struct is safe to use across goroutines.
14 | 
15 | ```go
16 | re := regexp2.MustCompile(`Your pattern`, 0)
17 | if isMatch, _ := re.MatchString(`Something to match`); isMatch {
18 |     //do something
19 | }
20 | ```
21 | 
22 | The only error that the `*Match*` methods *should* return is a Timeout if you set the `re.MatchTimeout` field.  Any other error is a bug in the `regexp2` package.  If you need more details about capture groups in a match then use the `FindStringMatch` method, like so:
23 | 
24 | ```go
25 | if m, _ := re.FindStringMatch(`Something to match`); m != nil {
26 |     // the whole match is always group 0
27 |     fmt.Printf("Group 0: %v\n", m.String())
28 | 
29 |     // you can get all the groups too
30 |     gps := m.Groups()
31 | 
32 |     // a group can be captured multiple times, so each cap is separately addressable
33 |     fmt.Printf("Group 1, first capture", gps[1].Captures[0].String())
34 |     fmt.Printf("Group 1, second capture", gps[1].Captures[1].String())
35 | }
36 | ```
37 | 
38 | Group 0 is embedded in the Match.  Group 0 is an automatically-assigned group that encompasses the whole pattern.  This means that `m.String()` is the same as `m.Group.String()` and `m.Groups()[0].String()`
39 | 
40 | The __last__ capture is embedded in each group, so `g.String()` will return the same thing as `g.Capture.String()` and  `g.Captures[len(g.Captures)-1].String()`.
41 | 
42 | ## Compare `regexp` and `regexp2`
43 | | Category | regexp | regexp2 |
44 | | --- | --- | --- |
45 | | Catastrophic backtracking possible | no, constant execution time guarantees | yes, if your pattern is at risk you can use the `re.MatchTimeout` field |
46 | | Python-style capture groups `(?P<name>re)` | yes | no (yes in RE2 compat mode) |
47 | | .NET-style capture groups `(?<name>re)` or `(?'name're)` | no | yes |
48 | | comments `(?#comment)` | no | yes |
49 | | branch numbering reset `(?\|a\|b)` | no | no |
50 | | possessive match `(?>re)` | no | yes |
51 | | positive lookahead `(?=re)` | no | yes |
52 | | negative lookahead `(?!re)` | no | yes |
53 | | positive lookbehind `(?<=re)` | no | yes |
54 | | negative lookbehind `(?<!re)` | no | yes |
55 | | back reference `\1` | no | yes |
56 | | named back reference `\k'name'` | no | yes |
57 | | named ascii character class `[[:foo:]]`| yes | no (yes in RE2 compat mode) |
58 | | conditionals `(?(expr)yes\|no)` | no | yes |
59 | 
60 | ## RE2 compatibility mode
61 | The default behavior of `regexp2` is to match the .NET regexp engine, however the `RE2` option is provided to change the parsing to increase compatibility with RE2.  Using the `RE2` option when compiling a regexp will not take away any features, but will change the following behaviors:
62 | * add support for named ascii character classes (e.g. `[[:foo:]]`)
63 | * add support for python-style capture groups (e.g. `(P<name>re)`)
64 | * change singleline behavior for `$` to only match end of string (like RE2) (see [#24](https://github.com/dlclark/regexp2/issues/24))
65 |  
66 | ```go
67 | re := regexp2.MustCompile(`Your RE2-compatible pattern`, regexp2.RE2)
68 | if isMatch, _ := re.MatchString(`Something to match`); isMatch {
69 |     //do something
70 | }
71 | ```
72 | 
73 | This feature is a work in progress and I'm open to ideas for more things to put here (maybe more relaxed character escaping rules?).
74 | 
75 | 
76 | ## Library features that I'm still working on
77 | - Regex split
78 | 
79 | ## Potential bugs
80 | I've run a battery of tests against regexp2 from various sources and found the debug output matches the .NET engine, but .NET and Go handle strings very differently.  I've attempted to handle these differences, but most of my testing deals with basic ASCII with a little bit of multi-byte Unicode.  There's a chance that there are bugs in the string handling related to character sets with supplementary Unicode chars.  Right-to-Left support is coded, but not well tested either.
81 | 
82 | ## Find a bug?
83 | I'm open to new issues and pull requests with tests if you find something odd!
84 | 


--------------------------------------------------------------------------------
/encoder_test.go:
--------------------------------------------------------------------------------
  1 | package tokenizer
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"os"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | type TestCase struct {
 14 | 	Name string
 15 | 	Err  error
 16 | }
 17 | 
 18 | var characterSet = []rune("1234567890[]',.pyfgcrl/=\aoeuidhtns-;qjkxbmwvz!@#$%^&*(){}\"<>PYFGCRL?+|AOEUIDHTNS_:QJKXBMWVZ")
 19 | 
 20 | func defaultEncoder(t *testing.T) *Encoder {
 21 | 	encoder, err := NewFromPrebuilt("coheretext-50k")
 22 | 	require.NoError(t, err)
 23 | 	return encoder
 24 | }
 25 | 
 26 | func defaultBenchmarkEncoder(b *testing.B) *Encoder {
 27 | 	encoder, err := NewFromPrebuilt("coheretext-50k")
 28 | 	require.NoError(b, err)
 29 | 	return encoder
 30 | }
 31 | 
 32 | func randomString(n int) string {
 33 | 	b := make([]rune, n)
 34 | 	for i := range b {
 35 | 		b[i] = characterSet[rand.Intn(len(characterSet))]
 36 | 	}
 37 | 	return string(b)
 38 | }
 39 | func benchmarkEncode(text string, b *testing.B) {
 40 | 	b.ReportAllocs()
 41 | 	encoder := defaultBenchmarkEncoder(b)
 42 | 	for n := 0; n < b.N; n++ {
 43 | 		encoder.Encode(text)
 44 | 	}
 45 | }
 46 | func BenchmarkEncode1Sentence(b *testing.B)  { benchmarkEncode(randomString(100), b) }
 47 | func BenchmarkEncode1Paragraph(b *testing.B) { benchmarkEncode(randomString(600), b) }
 48 | func BenchmarkEncode1KB(b *testing.B)        { benchmarkEncode(randomString(1000), b) }
 49 | func BenchmarkEncode1MB(b *testing.B)        { benchmarkEncode(randomString(1000000), b) }
 50 | 
 51 | func TestUnicodeEncode(t *testing.T) {
 52 | 	testCases := []struct {
 53 | 		testCase   TestCase
 54 | 		inputWord  string
 55 | 		outputWord string
 56 | 	}{
 57 | 		{
 58 | 			testCase:   TestCase{Name: "normal word"},
 59 | 			inputWord:  "testing",
 60 | 			outputWord: "testing",
 61 | 		},
 62 | 	}
 63 | 	for _, tc := range testCases {
 64 | 		t.Run(tc.testCase.Name, func(tt *testing.T) {
 65 | 			require.Equal(tt, tc.outputWord, unicodeEncode(tc.inputWord))
 66 | 		})
 67 | 	}
 68 | }
 69 | func TestEncodeDecodeSuccess(t *testing.T) {
 70 | 	encoder := defaultEncoder(t)
 71 | 
 72 | 	testCases := []struct {
 73 | 		testCase TestCase
 74 | 		tokens   []string
 75 | 	}{
 76 | 		{
 77 | 			testCase: TestCase{Name: "{ }"},
 78 | 			tokens: []string{
 79 | 				" ",
 80 | 			},
 81 | 		},
 82 | 		{
 83 | 			testCase: TestCase{Name: "a"},
 84 | 			tokens: []string{
 85 | 				"a",
 86 | 			},
 87 | 		},
 88 | 		{
 89 | 			testCase: TestCase{Name: "{ }apple"},
 90 | 			tokens: []string{
 91 | 				" apple",
 92 | 			},
 93 | 		},
 94 | 		{
 95 | 			testCase: TestCase{Name: "lorem ipsum"},
 96 | 			tokens: []string{
 97 | 				"L", "orem", " ipsum", " dolor", " sit", " amet", ",", " consectetur", " adip", "iscing", " elit", ".", " N", "ulla", " quis", ".",
 98 | 			},
 99 | 		},
100 | 		{
101 | 			testCase: TestCase{Name: "weird character"},
102 | 			tokens: []string{
103 | 				"È",
104 | 			},
105 | 		},
106 | 		{
107 | 			testCase: TestCase{Name: "upper-case quotes should not match pre-tokenizer"},
108 | 			tokens: []string{
109 | 				"O", "'", "SH", "E", "AN", " DON", "'", "T", " BE", " BA", "'", "D", " '", "MAN", " YOU", "'", "RE", " CO", "ULD", "'", "VE", " HE", "'", "L", "LP", "ED",
110 | 			},
111 | 		},
112 | 		{
113 | 			testCase: TestCase{Name: "lower-case quotes should match pre-tokenizer"},
114 | 			tokens: []string{
115 | 				"o", "'s", "he", "an", " don", "'t", " be", " ba", "'d", " '", "man", " you", "'re", " could", "'ve", " he", "'ll", "ped",
116 | 			},
117 | 		},
118 | 	}
119 | 	for _, tc := range testCases {
120 | 		t.Run(tc.testCase.Name, func(tt *testing.T) {
121 | 			joinedTokens := strings.Join(tc.tokens, "")
122 | 			tokenIDs, tokenStrings := encoder.Encode(joinedTokens)
123 | 
124 | 			for i, token := range tc.tokens {
125 | 				require.Equal(t, token, encoder.Decode([]int64{tokenIDs[i]}))
126 | 				require.Equal(t, token, tokenStrings[i])
127 | 			}
128 | 
129 | 			require.Equal(t, joinedTokens, encoder.Decode(tokenIDs))
130 | 		})
131 | 	}
132 | }
133 | 
134 | // benchmarking 1k token speed
135 | func Benchmark1000TokensDecode(b *testing.B) { benchmarkDecode(1000, b) }
136 | func Benchmark1000TokensEncode(b *testing.B) { benchmarkTokenDecode(1000, b) }
137 | 
138 | func generateTokens(numTokens int) []int64 {
139 | 	var tokens []int64
140 | 	for n := 0; n < numTokens; n++ {
141 | 		tokens = append(tokens, rand.Int63n(50000-1)+1)
142 | 	}
143 | 	return tokens
144 | }
145 | 
146 | func benchmarkTokenDecode(numTokens int, b *testing.B) {
147 | 	b.ReportAllocs()
148 | 	encoder := defaultBenchmarkEncoder(b)
149 | 	tokens := generateTokens(numTokens)
150 | 	s := encoder.Decode(tokens)
151 | 	b.ResetTimer()
152 | 
153 | 	for n := 0; n < b.N; n++ {
154 | 		encoder.Encode(s)
155 | 	}
156 | }
157 | 
158 | func benchmarkDecode(numTokens int, b *testing.B) {
159 | 	b.ReportAllocs()
160 | 	encoder := defaultBenchmarkEncoder(b)
161 | 	tokens := generateTokens(numTokens)
162 | 
163 | 	b.ResetTimer()
164 | 	for n := 0; n < b.N; n++ {
165 | 		encoder.Decode(tokens)
166 | 	}
167 | }
168 | 
169 | func TestFromPrebuiltAndFromReader(t *testing.T) {
170 | 	ePrebuilt := defaultEncoder(t)
171 | 
172 | 	encoderReader, err := os.Open("vocab/coheretext-50k/encoder.json")
173 | 	require.NoError(t, err)
174 | 	vocabReader, err := os.Open("vocab/coheretext-50k/vocab.bpe")
175 | 	require.NoError(t, err)
176 | 
177 | 	eReader, err := NewFromReaders(encoderReader, vocabReader)
178 | 	require.NoError(t, err)
179 | 
180 | 	if !(reflect.DeepEqual(ePrebuilt.Encoder, eReader.Encoder) &&
181 | 		reflect.DeepEqual(ePrebuilt.Decoder, eReader.Decoder) &&
182 | 		reflect.DeepEqual(ePrebuilt.BPERanks, eReader.BPERanks) &&
183 | 		reflect.DeepEqual(ePrebuilt.Cache, eReader.Cache) &&
184 | 		ePrebuilt.VocabSize == eReader.VocabSize) {
185 | 
186 | 		t.Logf("The encoders are not the same.")
187 | 		t.Fail()
188 | 	}
189 | }
190 | 


--------------------------------------------------------------------------------
/vendor/github.com/stretchr/testify/assert/http_assertions.go:
--------------------------------------------------------------------------------
  1 | package assert
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"net/http"
  6 | 	"net/http/httptest"
  7 | 	"net/url"
  8 | 	"strings"
  9 | )
 10 | 
 11 | // httpCode is a helper that returns HTTP code of the response. It returns -1 and
 12 | // an error if building a new request fails.
 13 | func httpCode(handler http.HandlerFunc, method, url string, values url.Values) (int, error) {
 14 | 	w := httptest.NewRecorder()
 15 | 	req, err := http.NewRequest(method, url, nil)
 16 | 	if err != nil {
 17 | 		return -1, err
 18 | 	}
 19 | 	req.URL.RawQuery = values.Encode()
 20 | 	handler(w, req)
 21 | 	return w.Code, nil
 22 | }
 23 | 
 24 | // HTTPSuccess asserts that a specified handler returns a success status code.
 25 | //
 26 | //  assert.HTTPSuccess(t, myHandler, "POST", "http://www.google.com", nil)
 27 | //
 28 | // Returns whether the assertion was successful (true) or not (false).
 29 | func HTTPSuccess(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, msgAndArgs ...interface{}) bool {
 30 | 	if h, ok := t.(tHelper); ok {
 31 | 		h.Helper()
 32 | 	}
 33 | 	code, err := httpCode(handler, method, url, values)
 34 | 	if err != nil {
 35 | 		Fail(t, fmt.Sprintf("Failed to build test request, got error: %s", err))
 36 | 	}
 37 | 
 38 | 	isSuccessCode := code >= http.StatusOK && code <= http.StatusPartialContent
 39 | 	if !isSuccessCode {
 40 | 		Fail(t, fmt.Sprintf("Expected HTTP success status code for %q but received %d", url+"?"+values.Encode(), code))
 41 | 	}
 42 | 
 43 | 	return isSuccessCode
 44 | }
 45 | 
 46 | // HTTPRedirect asserts that a specified handler returns a redirect status code.
 47 | //
 48 | //  assert.HTTPRedirect(t, myHandler, "GET", "/a/b/c", url.Values{"a": []string{"b", "c"}}
 49 | //
 50 | // Returns whether the assertion was successful (true) or not (false).
 51 | func HTTPRedirect(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, msgAndArgs ...interface{}) bool {
 52 | 	if h, ok := t.(tHelper); ok {
 53 | 		h.Helper()
 54 | 	}
 55 | 	code, err := httpCode(handler, method, url, values)
 56 | 	if err != nil {
 57 | 		Fail(t, fmt.Sprintf("Failed to build test request, got error: %s", err))
 58 | 	}
 59 | 
 60 | 	isRedirectCode := code >= http.StatusMultipleChoices && code <= http.StatusTemporaryRedirect
 61 | 	if !isRedirectCode {
 62 | 		Fail(t, fmt.Sprintf("Expected HTTP redirect status code for %q but received %d", url+"?"+values.Encode(), code))
 63 | 	}
 64 | 
 65 | 	return isRedirectCode
 66 | }
 67 | 
 68 | // HTTPError asserts that a specified handler returns an error status code.
 69 | //
 70 | //  assert.HTTPError(t, myHandler, "POST", "/a/b/c", url.Values{"a": []string{"b", "c"}}
 71 | //
 72 | // Returns whether the assertion was successful (true) or not (false).
 73 | func HTTPError(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, msgAndArgs ...interface{}) bool {
 74 | 	if h, ok := t.(tHelper); ok {
 75 | 		h.Helper()
 76 | 	}
 77 | 	code, err := httpCode(handler, method, url, values)
 78 | 	if err != nil {
 79 | 		Fail(t, fmt.Sprintf("Failed to build test request, got error: %s", err))
 80 | 	}
 81 | 
 82 | 	isErrorCode := code >= http.StatusBadRequest
 83 | 	if !isErrorCode {
 84 | 		Fail(t, fmt.Sprintf("Expected HTTP error status code for %q but received %d", url+"?"+values.Encode(), code))
 85 | 	}
 86 | 
 87 | 	return isErrorCode
 88 | }
 89 | 
 90 | // HTTPStatusCode asserts that a specified handler returns a specified status code.
 91 | //
 92 | //  assert.HTTPStatusCode(t, myHandler, "GET", "/notImplemented", nil, 501)
 93 | //
 94 | // Returns whether the assertion was successful (true) or not (false).
 95 | func HTTPStatusCode(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, statuscode int, msgAndArgs ...interface{}) bool {
 96 | 	if h, ok := t.(tHelper); ok {
 97 | 		h.Helper()
 98 | 	}
 99 | 	code, err := httpCode(handler, method, url, values)
100 | 	if err != nil {
101 | 		Fail(t, fmt.Sprintf("Failed to build test request, got error: %s", err))
102 | 	}
103 | 
104 | 	successful := code == statuscode
105 | 	if !successful {
106 | 		Fail(t, fmt.Sprintf("Expected HTTP status code %d for %q but received %d", statuscode, url+"?"+values.Encode(), code))
107 | 	}
108 | 
109 | 	return successful
110 | }
111 | 
112 | // HTTPBody is a helper that returns HTTP body of the response. It returns
113 | // empty string if building a new request fails.
114 | func HTTPBody(handler http.HandlerFunc, method, url string, values url.Values) string {
115 | 	w := httptest.NewRecorder()
116 | 	req, err := http.NewRequest(method, url+"?"+values.Encode(), nil)
117 | 	if err != nil {
118 | 		return ""
119 | 	}
120 | 	handler(w, req)
121 | 	return w.Body.String()
122 | }
123 | 
124 | // HTTPBodyContains asserts that a specified handler returns a
125 | // body that contains a string.
126 | //
127 | //  assert.HTTPBodyContains(t, myHandler, "GET", "www.google.com", nil, "I'm Feeling Lucky")
128 | //
129 | // Returns whether the assertion was successful (true) or not (false).
130 | func HTTPBodyContains(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, str interface{}, msgAndArgs ...interface{}) bool {
131 | 	if h, ok := t.(tHelper); ok {
132 | 		h.Helper()
133 | 	}
134 | 	body := HTTPBody(handler, method, url, values)
135 | 
136 | 	contains := strings.Contains(body, fmt.Sprint(str))
137 | 	if !contains {
138 | 		Fail(t, fmt.Sprintf("Expected response body for \"%s\" to contain \"%s\" but found \"%s\"", url+"?"+values.Encode(), str, body))
139 | 	}
140 | 
141 | 	return contains
142 | }
143 | 
144 | // HTTPBodyNotContains asserts that a specified handler returns a
145 | // body that does not contain a string.
146 | //
147 | //  assert.HTTPBodyNotContains(t, myHandler, "GET", "www.google.com", nil, "I'm Feeling Lucky")
148 | //
149 | // Returns whether the assertion was successful (true) or not (false).
150 | func HTTPBodyNotContains(t TestingT, handler http.HandlerFunc, method, url string, values url.Values, str interface{}, msgAndArgs ...interface{}) bool {
151 | 	if h, ok := t.(tHelper); ok {
152 | 		h.Helper()
153 | 	}
154 | 	body := HTTPBody(handler, method, url, values)
155 | 
156 | 	contains := strings.Contains(body, fmt.Sprint(str))
157 | 	if contains {
158 | 		Fail(t, fmt.Sprintf("Expected response body for \"%s\" to NOT contain \"%s\" but found \"%s\"", url+"?"+values.Encode(), str, body))
159 | 	}
160 | 
161 | 	return !contains
162 | }
163 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/spew/bypass.go:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2015-2016 Dave Collins <dave@davec.name>
  2 | //
  3 | // Permission to use, copy, modify, and distribute this software for any
  4 | // purpose with or without fee is hereby granted, provided that the above
  5 | // copyright notice and this permission notice appear in all copies.
  6 | //
  7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 10 | // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 12 | // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 13 | // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 14 | 
 15 | // NOTE: Due to the following build constraints, this file will only be compiled
 16 | // when the code is not running on Google App Engine, compiled by GopherJS, and
 17 | // "-tags safe" is not added to the go build command line.  The "disableunsafe"
 18 | // tag is deprecated and thus should not be used.
 19 | // +build !js,!appengine,!safe,!disableunsafe
 20 | 
 21 | package spew
 22 | 
 23 | import (
 24 | 	"reflect"
 25 | 	"unsafe"
 26 | )
 27 | 
 28 | const (
 29 | 	// UnsafeDisabled is a build-time constant which specifies whether or
 30 | 	// not access to the unsafe package is available.
 31 | 	UnsafeDisabled = false
 32 | 
 33 | 	// ptrSize is the size of a pointer on the current arch.
 34 | 	ptrSize = unsafe.Sizeof((*byte)(nil))
 35 | )
 36 | 
 37 | var (
 38 | 	// offsetPtr, offsetScalar, and offsetFlag are the offsets for the
 39 | 	// internal reflect.Value fields.  These values are valid before golang
 40 | 	// commit ecccf07e7f9d which changed the format.  The are also valid
 41 | 	// after commit 82f48826c6c7 which changed the format again to mirror
 42 | 	// the original format.  Code in the init function updates these offsets
 43 | 	// as necessary.
 44 | 	offsetPtr    = uintptr(ptrSize)
 45 | 	offsetScalar = uintptr(0)
 46 | 	offsetFlag   = uintptr(ptrSize * 2)
 47 | 
 48 | 	// flagKindWidth and flagKindShift indicate various bits that the
 49 | 	// reflect package uses internally to track kind information.
 50 | 	//
 51 | 	// flagRO indicates whether or not the value field of a reflect.Value is
 52 | 	// read-only.
 53 | 	//
 54 | 	// flagIndir indicates whether the value field of a reflect.Value is
 55 | 	// the actual data or a pointer to the data.
 56 | 	//
 57 | 	// These values are valid before golang commit 90a7c3c86944 which
 58 | 	// changed their positions.  Code in the init function updates these
 59 | 	// flags as necessary.
 60 | 	flagKindWidth = uintptr(5)
 61 | 	flagKindShift = uintptr(flagKindWidth - 1)
 62 | 	flagRO        = uintptr(1 << 0)
 63 | 	flagIndir     = uintptr(1 << 1)
 64 | )
 65 | 
 66 | func init() {
 67 | 	// Older versions of reflect.Value stored small integers directly in the
 68 | 	// ptr field (which is named val in the older versions).  Versions
 69 | 	// between commits ecccf07e7f9d and 82f48826c6c7 added a new field named
 70 | 	// scalar for this purpose which unfortunately came before the flag
 71 | 	// field, so the offset of the flag field is different for those
 72 | 	// versions.
 73 | 	//
 74 | 	// This code constructs a new reflect.Value from a known small integer
 75 | 	// and checks if the size of the reflect.Value struct indicates it has
 76 | 	// the scalar field. When it does, the offsets are updated accordingly.
 77 | 	vv := reflect.ValueOf(0xf00)
 78 | 	if unsafe.Sizeof(vv) == (ptrSize * 4) {
 79 | 		offsetScalar = ptrSize * 2
 80 | 		offsetFlag = ptrSize * 3
 81 | 	}
 82 | 
 83 | 	// Commit 90a7c3c86944 changed the flag positions such that the low
 84 | 	// order bits are the kind.  This code extracts the kind from the flags
 85 | 	// field and ensures it's the correct type.  When it's not, the flag
 86 | 	// order has been changed to the newer format, so the flags are updated
 87 | 	// accordingly.
 88 | 	upf := unsafe.Pointer(uintptr(unsafe.Pointer(&vv)) + offsetFlag)
 89 | 	upfv := *(*uintptr)(upf)
 90 | 	flagKindMask := uintptr((1<<flagKindWidth - 1) << flagKindShift)
 91 | 	if (upfv&flagKindMask)>>flagKindShift != uintptr(reflect.Int) {
 92 | 		flagKindShift = 0
 93 | 		flagRO = 1 << 5
 94 | 		flagIndir = 1 << 6
 95 | 
 96 | 		// Commit adf9b30e5594 modified the flags to separate the
 97 | 		// flagRO flag into two bits which specifies whether or not the
 98 | 		// field is embedded.  This causes flagIndir to move over a bit
 99 | 		// and means that flagRO is the combination of either of the
100 | 		// original flagRO bit and the new bit.
101 | 		//
102 | 		// This code detects the change by extracting what used to be
103 | 		// the indirect bit to ensure it's set.  When it's not, the flag
104 | 		// order has been changed to the newer format, so the flags are
105 | 		// updated accordingly.
106 | 		if upfv&flagIndir == 0 {
107 | 			flagRO = 3 << 5
108 | 			flagIndir = 1 << 7
109 | 		}
110 | 	}
111 | }
112 | 
113 | // unsafeReflectValue converts the passed reflect.Value into a one that bypasses
114 | // the typical safety restrictions preventing access to unaddressable and
115 | // unexported data.  It works by digging the raw pointer to the underlying
116 | // value out of the protected value and generating a new unprotected (unsafe)
117 | // reflect.Value to it.
118 | //
119 | // This allows us to check for implementations of the Stringer and error
120 | // interfaces to be used for pretty printing ordinarily unaddressable and
121 | // inaccessible values such as unexported struct fields.
122 | func unsafeReflectValue(v reflect.Value) (rv reflect.Value) {
123 | 	indirects := 1
124 | 	vt := v.Type()
125 | 	upv := unsafe.Pointer(uintptr(unsafe.Pointer(&v)) + offsetPtr)
126 | 	rvf := *(*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&v)) + offsetFlag))
127 | 	if rvf&flagIndir != 0 {
128 | 		vt = reflect.PtrTo(v.Type())
129 | 		indirects++
130 | 	} else if offsetScalar != 0 {
131 | 		// The value is in the scalar field when it's not one of the
132 | 		// reference types.
133 | 		switch vt.Kind() {
134 | 		case reflect.Uintptr:
135 | 		case reflect.Chan:
136 | 		case reflect.Func:
137 | 		case reflect.Map:
138 | 		case reflect.Ptr:
139 | 		case reflect.UnsafePointer:
140 | 		default:
141 | 			upv = unsafe.Pointer(uintptr(unsafe.Pointer(&v)) +
142 | 				offsetScalar)
143 | 		}
144 | 	}
145 | 
146 | 	pv := reflect.NewAt(vt, upv)
147 | 	rv = pv
148 | 	for i := 0; i < indirects; i++ {
149 | 		rv = rv.Elem()
150 | 	}
151 | 	return rv
152 | }
153 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/spew/spew.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013-2016 Dave Collins <dave@davec.name>
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | package spew
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"io"
 22 | )
 23 | 
 24 | // Errorf is a wrapper for fmt.Errorf that treats each argument as if it were
 25 | // passed with a default Formatter interface returned by NewFormatter.  It
 26 | // returns the formatted string as a value that satisfies error.  See
 27 | // NewFormatter for formatting details.
 28 | //
 29 | // This function is shorthand for the following syntax:
 30 | //
 31 | //	fmt.Errorf(format, spew.NewFormatter(a), spew.NewFormatter(b))
 32 | func Errorf(format string, a ...interface{}) (err error) {
 33 | 	return fmt.Errorf(format, convertArgs(a)...)
 34 | }
 35 | 
 36 | // Fprint is a wrapper for fmt.Fprint that treats each argument as if it were
 37 | // passed with a default Formatter interface returned by NewFormatter.  It
 38 | // returns the number of bytes written and any write error encountered.  See
 39 | // NewFormatter for formatting details.
 40 | //
 41 | // This function is shorthand for the following syntax:
 42 | //
 43 | //	fmt.Fprint(w, spew.NewFormatter(a), spew.NewFormatter(b))
 44 | func Fprint(w io.Writer, a ...interface{}) (n int, err error) {
 45 | 	return fmt.Fprint(w, convertArgs(a)...)
 46 | }
 47 | 
 48 | // Fprintf is a wrapper for fmt.Fprintf that treats each argument as if it were
 49 | // passed with a default Formatter interface returned by NewFormatter.  It
 50 | // returns the number of bytes written and any write error encountered.  See
 51 | // NewFormatter for formatting details.
 52 | //
 53 | // This function is shorthand for the following syntax:
 54 | //
 55 | //	fmt.Fprintf(w, format, spew.NewFormatter(a), spew.NewFormatter(b))
 56 | func Fprintf(w io.Writer, format string, a ...interface{}) (n int, err error) {
 57 | 	return fmt.Fprintf(w, format, convertArgs(a)...)
 58 | }
 59 | 
 60 | // Fprintln is a wrapper for fmt.Fprintln that treats each argument as if it
 61 | // passed with a default Formatter interface returned by NewFormatter.  See
 62 | // NewFormatter for formatting details.
 63 | //
 64 | // This function is shorthand for the following syntax:
 65 | //
 66 | //	fmt.Fprintln(w, spew.NewFormatter(a), spew.NewFormatter(b))
 67 | func Fprintln(w io.Writer, a ...interface{}) (n int, err error) {
 68 | 	return fmt.Fprintln(w, convertArgs(a)...)
 69 | }
 70 | 
 71 | // Print is a wrapper for fmt.Print that treats each argument as if it were
 72 | // passed with a default Formatter interface returned by NewFormatter.  It
 73 | // returns the number of bytes written and any write error encountered.  See
 74 | // NewFormatter for formatting details.
 75 | //
 76 | // This function is shorthand for the following syntax:
 77 | //
 78 | //	fmt.Print(spew.NewFormatter(a), spew.NewFormatter(b))
 79 | func Print(a ...interface{}) (n int, err error) {
 80 | 	return fmt.Print(convertArgs(a)...)
 81 | }
 82 | 
 83 | // Printf is a wrapper for fmt.Printf that treats each argument as if it were
 84 | // passed with a default Formatter interface returned by NewFormatter.  It
 85 | // returns the number of bytes written and any write error encountered.  See
 86 | // NewFormatter for formatting details.
 87 | //
 88 | // This function is shorthand for the following syntax:
 89 | //
 90 | //	fmt.Printf(format, spew.NewFormatter(a), spew.NewFormatter(b))
 91 | func Printf(format string, a ...interface{}) (n int, err error) {
 92 | 	return fmt.Printf(format, convertArgs(a)...)
 93 | }
 94 | 
 95 | // Println is a wrapper for fmt.Println that treats each argument as if it were
 96 | // passed with a default Formatter interface returned by NewFormatter.  It
 97 | // returns the number of bytes written and any write error encountered.  See
 98 | // NewFormatter for formatting details.
 99 | //
100 | // This function is shorthand for the following syntax:
101 | //
102 | //	fmt.Println(spew.NewFormatter(a), spew.NewFormatter(b))
103 | func Println(a ...interface{}) (n int, err error) {
104 | 	return fmt.Println(convertArgs(a)...)
105 | }
106 | 
107 | // Sprint is a wrapper for fmt.Sprint that treats each argument as if it were
108 | // passed with a default Formatter interface returned by NewFormatter.  It
109 | // returns the resulting string.  See NewFormatter for formatting details.
110 | //
111 | // This function is shorthand for the following syntax:
112 | //
113 | //	fmt.Sprint(spew.NewFormatter(a), spew.NewFormatter(b))
114 | func Sprint(a ...interface{}) string {
115 | 	return fmt.Sprint(convertArgs(a)...)
116 | }
117 | 
118 | // Sprintf is a wrapper for fmt.Sprintf that treats each argument as if it were
119 | // passed with a default Formatter interface returned by NewFormatter.  It
120 | // returns the resulting string.  See NewFormatter for formatting details.
121 | //
122 | // This function is shorthand for the following syntax:
123 | //
124 | //	fmt.Sprintf(format, spew.NewFormatter(a), spew.NewFormatter(b))
125 | func Sprintf(format string, a ...interface{}) string {
126 | 	return fmt.Sprintf(format, convertArgs(a)...)
127 | }
128 | 
129 | // Sprintln is a wrapper for fmt.Sprintln that treats each argument as if it
130 | // were passed with a default Formatter interface returned by NewFormatter.  It
131 | // returns the resulting string.  See NewFormatter for formatting details.
132 | //
133 | // This function is shorthand for the following syntax:
134 | //
135 | //	fmt.Sprintln(spew.NewFormatter(a), spew.NewFormatter(b))
136 | func Sprintln(a ...interface{}) string {
137 | 	return fmt.Sprintln(convertArgs(a)...)
138 | }
139 | 
140 | // convertArgs accepts a slice of arguments and returns a slice of the same
141 | // length with each argument converted to a default spew Formatter interface.
142 | func convertArgs(args []interface{}) (formatters []interface{}) {
143 | 	formatters = make([]interface{}, len(args))
144 | 	for index, arg := range args {
145 | 		formatters[index] = NewFormatter(arg)
146 | 	}
147 | 	return formatters
148 | }
149 | 


--------------------------------------------------------------------------------
/frequency_test.go:
--------------------------------------------------------------------------------
  1 | package tokenizer
  2 | 
  3 | import (
  4 | 	"log"
  5 | 	"strings"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/pkg/errors"
  9 | )
 10 | 
 11 | func TestFrequencySuite(t *testing.T) {
 12 | 	t.Run("CountString", testCountString)
 13 | 	t.Run("FrequencyCounter.CountReader", testCountReader)
 14 | 	t.Run("FrequencyCounter.AddCounts", testAddCounts)
 15 | }
 16 | 
 17 | func testCountString(t *testing.T) {
 18 | 	tests := []struct {
 19 | 		input               string
 20 | 		expectedFrequencies map[string]int64
 21 | 	}{
 22 | 		{
 23 | 			input: "today my friend and I went running. I liked it.",
 24 | 			expectedFrequencies: map[string]int64{
 25 | 				"today":    1,
 26 | 				"ĠI":       2,
 27 | 				"Ġmy":      1,
 28 | 				"Ġfriend":  1,
 29 | 				"Ġand":     1,
 30 | 				"Ġwent":    1,
 31 | 				"Ġrunning": 1,
 32 | 				".":        2,
 33 | 				"Ġliked":   1,
 34 | 				"Ġit":      1,
 35 | 			},
 36 | 		},
 37 | 		{
 38 | 			input: "起来Qǐlái!！ 不愿Búyuàn做zuò奴隶núlì的de人们rénmen!",
 39 | 			expectedFrequencies: map[string]int64{
 40 | 				"!":             1,
 41 | 				"!ï¼ģ":          1,
 42 | 				"èµ·æĿ¥QÇĲlÃ¡i": 1,
 43 | 				"Ġä¸įæĦ¿BÃºyuÃłnåģļzuÃ²å¥´éļ¶nÃºlÃ¬çļĦdeäººä»¬rÃ©nmen": 1,
 44 | 			},
 45 | 		},
 46 | 		{
 47 | 			input: "a b c d e f g h i j k l 		m n o p q r s t u v w x y z",
 48 | 			expectedFrequencies: map[string]int64{
 49 | 				"a":  1,
 50 | 				"m":  1,
 51 | 				"ĉ":  1,
 52 | 				"Ġb": 1,
 53 | 				"Ġc": 1,
 54 | 				"Ġd": 1,
 55 | 				"Ġe": 1,
 56 | 				"Ġf": 1,
 57 | 				"Ġg": 1,
 58 | 				"Ġh": 1,
 59 | 				"Ġi": 1,
 60 | 				"Ġj": 1,
 61 | 				"Ġk": 1,
 62 | 				"Ġl": 1,
 63 | 				"Ġn": 1,
 64 | 				"Ġo": 1,
 65 | 				"Ġp": 1,
 66 | 				"Ġq": 1,
 67 | 				"Ġr": 1,
 68 | 				"Ġs": 1,
 69 | 				"Ġt": 1,
 70 | 				"Ġu": 1,
 71 | 				"Ġv": 1,
 72 | 				"Ġw": 1,
 73 | 				"Ġx": 1,
 74 | 				"Ġy": 1,
 75 | 				"Ġz": 1,
 76 | 				"Ġĉ": 1,
 77 | 			},
 78 | 		},
 79 | 		{
 80 | 			input: "🐋🐳 🤯",
 81 | 			expectedFrequencies: map[string]int64{
 82 | 				"ðŁĲĭðŁĲ³": 1,
 83 | 				"ĠðŁ¤¯":    1,
 84 | 			},
 85 | 		},
 86 | 	}
 87 | 
 88 | 	for _, tt := range tests {
 89 | 		counts := CountString(tt.input)
 90 | 		if len(counts) != len(tt.expectedFrequencies) {
 91 | 			t.Fatalf("expected %d words but got %d", len(tt.expectedFrequencies), len(counts))
 92 | 		}
 93 | 		for expectedk, expectedv := range tt.expectedFrequencies {
 94 | 			v, ok := counts[expectedk]
 95 | 			if !ok {
 96 | 				t.Fatalf("expected frequencies to contain \"%s\"", expectedk)
 97 | 			}
 98 | 
 99 | 			if expectedv != v {
100 | 				t.Fatalf("expected %s to have count %d but got %d", expectedk, expectedv, v)
101 | 			}
102 | 		}
103 | 	}
104 | }
105 | 
106 | func testCountReader(t *testing.T) {
107 | 	tests := []struct {
108 | 		input               string
109 | 		expectedFrequencies map[string]int64
110 | 	}{
111 | 		{
112 | 			input: `today my friend and I went running. I liked it.
113 | 			起来Qǐlái!！ 不愿Búyuàn做zuò奴隶núlì的de人们rénmen!
114 | 			a b c d e f g h i j k l 		m n o p q r s t u v w x y z
115 | 			🐋🐳 🤯
116 | 			`,
117 | 			expectedFrequencies: map[string]int64{
118 | 				"today":         1,
119 | 				"ĠI":            2,
120 | 				"Ġmy":           1,
121 | 				"Ġfriend":       1,
122 | 				"Ġand":          1,
123 | 				"Ġwent":         1,
124 | 				"Ġrunning":      1,
125 | 				".":             2,
126 | 				"Ġliked":        1,
127 | 				"Ġit":           1,
128 | 				"!":             1,
129 | 				"!ï¼ģ":          1,
130 | 				"èµ·æĿ¥QÇĲlÃ¡i": 1,
131 | 				"Ġä¸įæĦ¿BÃºyuÃłnåģļzuÃ²å¥´éļ¶nÃºlÃ¬çļĦdeäººä»¬rÃ©nmen": 1,
132 | 				"a":        1,
133 | 				"m":        1,
134 | 				"Ġb":       1,
135 | 				"Ġc":       1,
136 | 				"Ġd":       1,
137 | 				"Ġe":       1,
138 | 				"Ġf":       1,
139 | 				"Ġg":       1,
140 | 				"Ġh":       1,
141 | 				"Ġi":       1,
142 | 				"Ġj":       1,
143 | 				"Ġk":       1,
144 | 				"Ġl":       1,
145 | 				"Ġn":       1,
146 | 				"Ġo":       1,
147 | 				"Ġp":       1,
148 | 				"Ġq":       1,
149 | 				"Ġr":       1,
150 | 				"Ġs":       1,
151 | 				"Ġt":       1,
152 | 				"Ġu":       1,
153 | 				"Ġv":       1,
154 | 				"Ġw":       1,
155 | 				"Ġx":       1,
156 | 				"Ġy":       1,
157 | 				"Ġz":       1,
158 | 				"Ġĉ":       1,
159 | 				"ðŁĲĭðŁĲ³": 1,
160 | 				"ĠðŁ¤¯":    1,
161 | 				"ĉ":        4,
162 | 				"ĉĉ":       3,
163 | 				"ĉĉĉ":      1,
164 | 				"Ċ":        4,
165 | 			},
166 | 		},
167 | 	}
168 | 
169 | 	for _, tt := range tests {
170 | 		freq, err := CountReader(strings.NewReader(tt.input))
171 | 		if err != nil {
172 | 			log.Fatal(errors.Wrap(err, "failed to count reader"))
173 | 		}
174 | 
175 | 		if len(freq) != len(tt.expectedFrequencies) {
176 | 			t.Fatalf("expected %d words but got %d", len(tt.expectedFrequencies), len(freq))
177 | 		}
178 | 
179 | 		for expectedk, expectedv := range tt.expectedFrequencies {
180 | 			v, ok := freq[expectedk]
181 | 			if !ok {
182 | 				t.Fatalf("expected frequencies to contain \"%s\"", expectedk)
183 | 			}
184 | 
185 | 			if expectedv != v {
186 | 				t.Fatalf("expected %s to have count %d but got %d", expectedk, expectedv, v)
187 | 			}
188 | 		}
189 | 	}
190 | }
191 | 
192 | func testAddCounts(t *testing.T) {
193 | 	tests := []struct {
194 | 		initial             map[string]int64
195 | 		input               map[string]int64
196 | 		expectedFrequencies map[string]int64
197 | 		expectedNumWords    int64
198 | 	}{
199 | 		{
200 | 			initial: map[string]int64{},
201 | 			input: map[string]int64{
202 | 				"a": 1,
203 | 				"b": 2,
204 | 				"c": 3,
205 | 			},
206 | 			expectedFrequencies: map[string]int64{
207 | 				"a": 1,
208 | 				"b": 2,
209 | 				"c": 3,
210 | 			},
211 | 		},
212 | 		{
213 | 			initial: map[string]int64{
214 | 				"a": 1,
215 | 				"b": 2,
216 | 				"c": 3,
217 | 			},
218 | 			input: nil,
219 | 			expectedFrequencies: map[string]int64{
220 | 				"a": 1,
221 | 				"b": 2,
222 | 				"c": 3,
223 | 			},
224 | 		},
225 | 		{
226 | 			initial: map[string]int64{
227 | 				"a": 1,
228 | 				"b": 2,
229 | 				"c": 3,
230 | 			},
231 | 			input: map[string]int64{
232 | 				"a": 1,
233 | 				"b": 2,
234 | 				"c": 3,
235 | 			},
236 | 			expectedFrequencies: map[string]int64{
237 | 				"a": 2,
238 | 				"b": 4,
239 | 				"c": 6,
240 | 			},
241 | 		},
242 | 		{
243 | 			initial: map[string]int64{
244 | 				"a": 1,
245 | 				"b": 2,
246 | 				"c": 3,
247 | 			},
248 | 			input: map[string]int64{
249 | 				"d": 1,
250 | 				"e": 2,
251 | 				"f": 3,
252 | 			},
253 | 			expectedFrequencies: map[string]int64{
254 | 				"a": 1,
255 | 				"b": 2,
256 | 				"c": 3,
257 | 				"d": 1,
258 | 				"e": 2,
259 | 				"f": 3,
260 | 			},
261 | 		},
262 | 	}
263 | 
264 | 	for _, tt := range tests {
265 | 		counter := tt.initial
266 | 		MergeCounts(counter, tt.input)
267 | 		if len(counter) != len(tt.expectedFrequencies) {
268 | 			t.Fatalf("expected %d words but got %d", len(tt.expectedFrequencies), len(counter))
269 | 		}
270 | 
271 | 		for expectedk, expectedv := range tt.expectedFrequencies {
272 | 			v, ok := counter[expectedk]
273 | 			if !ok {
274 | 				t.Fatalf("expected frequencies to contain \"%s\"", expectedk)
275 | 			}
276 | 
277 | 			if expectedv != v {
278 | 				t.Fatalf("expected %s to have count %d but got %d", expectedk, expectedv, v)
279 | 			}
280 | 		}
281 | 	}
282 | }
283 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/yamlprivateh.go:
--------------------------------------------------------------------------------
  1 | // 
  2 | // Copyright (c) 2011-2019 Canonical Ltd
  3 | // Copyright (c) 2006-2010 Kirill Simonov
  4 | // 
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | // this software and associated documentation files (the "Software"), to deal in
  7 | // the Software without restriction, including without limitation the rights to
  8 | // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  9 | // of the Software, and to permit persons to whom the Software is furnished to do
 10 | // so, subject to the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | package yaml
 24 | 
 25 | const (
 26 | 	// The size of the input raw buffer.
 27 | 	input_raw_buffer_size = 512
 28 | 
 29 | 	// The size of the input buffer.
 30 | 	// It should be possible to decode the whole raw buffer.
 31 | 	input_buffer_size = input_raw_buffer_size * 3
 32 | 
 33 | 	// The size of the output buffer.
 34 | 	output_buffer_size = 128
 35 | 
 36 | 	// The size of the output raw buffer.
 37 | 	// It should be possible to encode the whole output buffer.
 38 | 	output_raw_buffer_size = (output_buffer_size*2 + 2)
 39 | 
 40 | 	// The size of other stacks and queues.
 41 | 	initial_stack_size  = 16
 42 | 	initial_queue_size  = 16
 43 | 	initial_string_size = 16
 44 | )
 45 | 
 46 | // Check if the character at the specified position is an alphabetical
 47 | // character, a digit, '_', or '-'.
 48 | func is_alpha(b []byte, i int) bool {
 49 | 	return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'Z' || b[i] >= 'a' && b[i] <= 'z' || b[i] == '_' || b[i] == '-'
 50 | }
 51 | 
 52 | // Check if the character at the specified position is a digit.
 53 | func is_digit(b []byte, i int) bool {
 54 | 	return b[i] >= '0' && b[i] <= '9'
 55 | }
 56 | 
 57 | // Get the value of a digit.
 58 | func as_digit(b []byte, i int) int {
 59 | 	return int(b[i]) - '0'
 60 | }
 61 | 
 62 | // Check if the character at the specified position is a hex-digit.
 63 | func is_hex(b []byte, i int) bool {
 64 | 	return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'F' || b[i] >= 'a' && b[i] <= 'f'
 65 | }
 66 | 
 67 | // Get the value of a hex-digit.
 68 | func as_hex(b []byte, i int) int {
 69 | 	bi := b[i]
 70 | 	if bi >= 'A' && bi <= 'F' {
 71 | 		return int(bi) - 'A' + 10
 72 | 	}
 73 | 	if bi >= 'a' && bi <= 'f' {
 74 | 		return int(bi) - 'a' + 10
 75 | 	}
 76 | 	return int(bi) - '0'
 77 | }
 78 | 
 79 | // Check if the character is ASCII.
 80 | func is_ascii(b []byte, i int) bool {
 81 | 	return b[i] <= 0x7F
 82 | }
 83 | 
 84 | // Check if the character at the start of the buffer can be printed unescaped.
 85 | func is_printable(b []byte, i int) bool {
 86 | 	return ((b[i] == 0x0A) || // . == #x0A
 87 | 		(b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E
 88 | 		(b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF
 89 | 		(b[i] > 0xC2 && b[i] < 0xED) ||
 90 | 		(b[i] == 0xED && b[i+1] < 0xA0) ||
 91 | 		(b[i] == 0xEE) ||
 92 | 		(b[i] == 0xEF && // #xE000 <= . <= #xFFFD
 93 | 			!(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF
 94 | 			!(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF))))
 95 | }
 96 | 
 97 | // Check if the character at the specified position is NUL.
 98 | func is_z(b []byte, i int) bool {
 99 | 	return b[i] == 0x00
100 | }
101 | 
102 | // Check if the beginning of the buffer is a BOM.
103 | func is_bom(b []byte, i int) bool {
104 | 	return b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF
105 | }
106 | 
107 | // Check if the character at the specified position is space.
108 | func is_space(b []byte, i int) bool {
109 | 	return b[i] == ' '
110 | }
111 | 
112 | // Check if the character at the specified position is tab.
113 | func is_tab(b []byte, i int) bool {
114 | 	return b[i] == '\t'
115 | }
116 | 
117 | // Check if the character at the specified position is blank (space or tab).
118 | func is_blank(b []byte, i int) bool {
119 | 	//return is_space(b, i) || is_tab(b, i)
120 | 	return b[i] == ' ' || b[i] == '\t'
121 | }
122 | 
123 | // Check if the character at the specified position is a line break.
124 | func is_break(b []byte, i int) bool {
125 | 	return (b[i] == '\r' || // CR (#xD)
126 | 		b[i] == '\n' || // LF (#xA)
127 | 		b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85)
128 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028)
129 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9) // PS (#x2029)
130 | }
131 | 
132 | func is_crlf(b []byte, i int) bool {
133 | 	return b[i] == '\r' && b[i+1] == '\n'
134 | }
135 | 
136 | // Check if the character is a line break or NUL.
137 | func is_breakz(b []byte, i int) bool {
138 | 	//return is_break(b, i) || is_z(b, i)
139 | 	return (
140 | 		// is_break:
141 | 		b[i] == '\r' || // CR (#xD)
142 | 		b[i] == '\n' || // LF (#xA)
143 | 		b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85)
144 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028)
145 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029)
146 | 		// is_z:
147 | 		b[i] == 0)
148 | }
149 | 
150 | // Check if the character is a line break, space, or NUL.
151 | func is_spacez(b []byte, i int) bool {
152 | 	//return is_space(b, i) || is_breakz(b, i)
153 | 	return (
154 | 		// is_space:
155 | 		b[i] == ' ' ||
156 | 		// is_breakz:
157 | 		b[i] == '\r' || // CR (#xD)
158 | 		b[i] == '\n' || // LF (#xA)
159 | 		b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85)
160 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028)
161 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029)
162 | 		b[i] == 0)
163 | }
164 | 
165 | // Check if the character is a line break, space, tab, or NUL.
166 | func is_blankz(b []byte, i int) bool {
167 | 	//return is_blank(b, i) || is_breakz(b, i)
168 | 	return (
169 | 		// is_blank:
170 | 		b[i] == ' ' || b[i] == '\t' ||
171 | 		// is_breakz:
172 | 		b[i] == '\r' || // CR (#xD)
173 | 		b[i] == '\n' || // LF (#xA)
174 | 		b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85)
175 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028)
176 | 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9 || // PS (#x2029)
177 | 		b[i] == 0)
178 | }
179 | 
180 | // Determine the width of the character.
181 | func width(b byte) int {
182 | 	// Don't replace these by a switch without first
183 | 	// confirming that it is being inlined.
184 | 	if b&0x80 == 0x00 {
185 | 		return 1
186 | 	}
187 | 	if b&0xE0 == 0xC0 {
188 | 		return 2
189 | 	}
190 | 	if b&0xF0 == 0xE0 {
191 | 		return 3
192 | 	}
193 | 	if b&0xF8 == 0xF0 {
194 | 		return 4
195 | 	}
196 | 	return 0
197 | 
198 | }
199 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/ATTRIB:
--------------------------------------------------------------------------------
  1 | ============
  2 | These pieces of code were ported from dotnet/corefx:
  3 | 
  4 | syntax/charclass.go (from RegexCharClass.cs): ported to use the built-in Go unicode classes.  Canonicalize is 
  5 |     a direct port, but most of the other code required large changes because the C# implementation 
  6 |     used a string to represent the CharSet data structure and I cleaned that up in my implementation.
  7 | 
  8 | syntax/code.go (from RegexCode.cs): ported literally with various cleanups and layout to make it more Go-ish.
  9 | 
 10 | syntax/escape.go (from RegexParser.cs): ported Escape method and added some optimizations.  Unescape is inspired by 
 11 |     the C# implementation but couldn't be directly ported because of the lack of do-while syntax in Go.
 12 | 
 13 | syntax/parser.go (from RegexpParser.cs and RegexOptions.cs): ported parser struct and associated methods as 
 14 |     literally as possible. Several language differences required changes.  E.g. lack pre/post-fix increments as 
 15 |     expressions, lack of do-while loops, lack of overloads, etc.
 16 | 
 17 | syntax/prefix.go (from RegexFCD.cs and RegexBoyerMoore.cs): ported as literally as possible and added support
 18 |     for unicode chars that are longer than the 16-bit char in C# for the 32-bit rune in Go.
 19 | 
 20 | syntax/replacerdata.go (from RegexReplacement.cs): conceptually ported and re-organized to handle differences 
 21 |     in charclass implementation, and fix odd code layout between RegexParser.cs, Regex.cs, and RegexReplacement.cs.
 22 | 
 23 | syntax/tree.go (from RegexTree.cs and RegexNode.cs): ported literally as possible.
 24 | 
 25 | syntax/writer.go (from RegexWriter.cs): ported literally with minor changes to make it more Go-ish.
 26 | 
 27 | match.go (from RegexMatch.cs): ported, simplified, and changed to handle Go's lack of inheritence.
 28 | 
 29 | regexp.go (from Regex.cs and RegexOptions.cs): conceptually serves the same "starting point", but is simplified 
 30 |     and changed to handle differences in C# strings and Go strings/runes.  
 31 | 
 32 | replace.go (from RegexReplacement.cs): ported closely and then cleaned up to combine the MatchEvaluator and 
 33 |     simple string replace implementations.
 34 | 
 35 | runner.go (from RegexRunner.cs): ported literally as possible.
 36 | 
 37 | regexp_test.go (from CaptureTests.cs and GroupNamesAndNumbers.cs): conceptually ported, but the code was 
 38 |     manually structured like Go tests.
 39 | 
 40 | replace_test.go (from RegexReplaceStringTest0.cs): conceptually ported
 41 | 
 42 | rtl_test.go (from RightToLeft.cs): conceptually ported
 43 | ---
 44 | dotnet/corefx was released under this license:
 45 | 
 46 | The MIT License (MIT)
 47 | 
 48 | Copyright (c) Microsoft Corporation
 49 | 
 50 | Permission is hereby granted, free of charge, to any person obtaining a copy
 51 | of this software and associated documentation files (the "Software"), to deal
 52 | in the Software without restriction, including without limitation the rights
 53 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 54 | copies of the Software, and to permit persons to whom the Software is
 55 | furnished to do so, subject to the following conditions:
 56 | 
 57 | The above copyright notice and this permission notice shall be included in all
 58 | copies or substantial portions of the Software.
 59 | 
 60 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 61 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 62 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 63 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 64 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 65 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 66 | SOFTWARE.
 67 | 
 68 | ============
 69 | These pieces of code are copied from the Go framework:
 70 | 
 71 | - The overall directory structure of regexp2 was inspired by the Go runtime regexp package.
 72 | - The optimization in the escape method of syntax/escape.go is from the Go runtime QuoteMeta() func in regexp/regexp.go
 73 | - The method signatures in regexp.go are designed to match the Go framework regexp methods closely
 74 | - func regexp2.MustCompile and func quote are almost identifical to the regexp package versions
 75 | - BenchmarkMatch* and TestProgramTooLong* funcs in regexp_performance_test.go were copied from the framework 
 76 |     regexp/exec_test.go
 77 | ---
 78 | The Go framework was released under this license:
 79 | 
 80 | Copyright (c) 2012 The Go Authors. All rights reserved.
 81 | 
 82 | Redistribution and use in source and binary forms, with or without
 83 | modification, are permitted provided that the following conditions are
 84 | met:
 85 | 
 86 |    * Redistributions of source code must retain the above copyright
 87 | notice, this list of conditions and the following disclaimer.
 88 |    * Redistributions in binary form must reproduce the above
 89 | copyright notice, this list of conditions and the following disclaimer
 90 | in the documentation and/or other materials provided with the
 91 | distribution.
 92 |    * Neither the name of Google Inc. nor the names of its
 93 | contributors may be used to endorse or promote products derived from
 94 | this software without specific prior written permission.
 95 | 
 96 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 97 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 98 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 99 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
101 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
102 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
103 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
104 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
105 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
106 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107 | 
108 | ============
109 | Some test data were gathered from the Mono project.
110 | 
111 | regexp_mono_test.go: ported from https://github.com/mono/mono/blob/master/mcs/class/System/Test/System.Text.RegularExpressions/PerlTrials.cs
112 | ---
113 | Mono tests released under this license:
114 | 
115 | Permission is hereby granted, free of charge, to any person obtaining
116 | a copy of this software and associated documentation files (the
117 | "Software"), to deal in the Software without restriction, including
118 | without limitation the rights to use, copy, modify, merge, publish,
119 | distribute, sublicense, and/or sell copies of the Software, and to
120 | permit persons to whom the Software is furnished to do so, subject to
121 | the following conditions:
122 | 
123 | The above copyright notice and this permission notice shall be
124 | included in all copies or substantial portions of the Software.
125 | 
126 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
127 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
128 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
129 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
130 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
131 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
132 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
133 | 
134 | 


--------------------------------------------------------------------------------
/bpe.go:
--------------------------------------------------------------------------------
  1 | package tokenizer
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"math"
  7 | )
  8 | 
  9 | var (
 10 | 	specialTokens = int64(1) // this is a wit meme, but for now it shifts vocab indices by 1 because 0 is reserved for padding
 11 | )
 12 | 
 13 | type Merge struct {
 14 | 	Merge [2]string
 15 | 	Count int64
 16 | }
 17 | 
 18 | type byWordCount []WordCount
 19 | 
 20 | func (s byWordCount) Len() int      { return len(s) }
 21 | func (s byWordCount) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
 22 | func (s byWordCount) Less(i, j int) bool {
 23 | 	first, second := s[i], s[j]
 24 | 	switch {
 25 | 	case first.Count > second.Count:
 26 | 		return true
 27 | 	case first.Count < second.Count:
 28 | 		return false
 29 | 	default:
 30 | 		min := int(math.Min(float64(len(first.Pieces)), float64(len(second.Pieces))))
 31 | 		for k := 0; k < min; k++ {
 32 | 			if first.Pieces[k] < second.Pieces[k] {
 33 | 				return true
 34 | 			} else if first.Pieces[k] > second.Pieces[k] {
 35 | 				return false
 36 | 			}
 37 | 		}
 38 | 	}
 39 | 
 40 | 	return false
 41 | }
 42 | 
 43 | func getPairStatistics(vocab []WordCount) (map[[2]string]int64, map[[2]string]map[int64]int64) {
 44 | 	stats := map[[2]string]int64{}
 45 | 	indices := map[[2]string]map[int64]int64{}
 46 | 
 47 | 	var prevChar string
 48 | 	for i, symbol := range vocab {
 49 | 		if len(symbol.Pieces) < 2 {
 50 | 			continue
 51 | 		}
 52 | 		prevChar = symbol.Pieces[0]
 53 | 		for _, c := range symbol.Pieces[1:] {
 54 | 			key := [2]string{prevChar, c}
 55 | 			stats[key] += symbol.Count
 56 | 			if _, ok := indices[key]; !ok {
 57 | 				indices[key] = make(map[int64]int64)
 58 | 			}
 59 | 			indices[key][int64(i)]++
 60 | 			prevChar = c
 61 | 		}
 62 | 	}
 63 | 
 64 | 	return stats, indices
 65 | }
 66 | 
 67 | func getMaxStat(stats map[[2]string]int64) [2]string {
 68 | 	var maxKeys [][2]string
 69 | 
 70 | 	maximum := int64(-1)
 71 | 	for key, count := range stats {
 72 | 		if count == maximum {
 73 | 			maxKeys = append(maxKeys, key)
 74 | 		}
 75 | 		if count > maximum {
 76 | 			maximum = count
 77 | 			maxKeys = [][2]string{key}
 78 | 		}
 79 | 	}
 80 | 	maxKey := maxKeys[0]
 81 | 	for _, key := range maxKeys[1:] {
 82 | 		for k := 0; k < 2; k++ {
 83 | 			if maxKey[k] > key[k] {
 84 | 				maxKey = key
 85 | 			} else if maxKey[k] < key[k] {
 86 | 				break
 87 | 			}
 88 | 		}
 89 | 	}
 90 | 	return maxKey
 91 | }
 92 | 
 93 | func pruneStats(stats, bigStats map[[2]string]int64, threshold float64) {
 94 | 	var pruneCount int64
 95 | 	for item, freq := range stats {
 96 | 		if float64(freq) >= threshold {
 97 | 			continue
 98 | 		}
 99 | 		delete(stats, item)
100 | 		pruneCount++
101 | 		if freq < 0 {
102 | 			bigStats[item] += freq
103 | 		} else {
104 | 			bigStats[item] = freq
105 | 		}
106 | 	}
107 | }
108 | 
109 | func deepCopyStats(stats map[[2]string]int64) map[[2]string]int64 {
110 | 	newStats := map[[2]string]int64{}
111 | 	for k, v := range stats {
112 | 		newStats[k] = v
113 | 	}
114 | 	return newStats
115 | }
116 | 
117 | type change struct {
118 | 	Index     int64
119 | 	Word      []string
120 | 	OldWord   []string
121 | 	Frequency int64
122 | }
123 | 
124 | func replacePair(bigram [2]string, sortedVocab []WordCount, indices map[[2]string]map[int64]int64) []change {
125 | 	changes := []change{}
126 | 	for i, freq := range indices[bigram] {
127 | 		if freq < 1 {
128 | 			continue
129 | 		}
130 | 
131 | 		symbol := sortedVocab[i]
132 | 		word, freq := symbol.Pieces, symbol.Count
133 | 		newWordPieces := replace(word, bigram)
134 | 		sortedVocab[i] = WordCount{Pieces: newWordPieces, Count: freq}
135 | 		changes = append(changes, change{int64(i), newWordPieces, word, freq})
136 | 	}
137 | 	return changes
138 | }
139 | 
140 | func strSliceIndexOf(s []string, elem string, from int) int {
141 | 	for i := from; i < len(s); i++ {
142 | 		if s[i] == elem {
143 | 			return i
144 | 		}
145 | 	}
146 | 	return -1
147 | }
148 | 
149 | func updatePairStatistics(pair [2]string, changed []change, stats map[[2]string]int64, indices map[[2]string]map[int64]int64) {
150 | 	stats[pair] = 0
151 | 	indices[pair] = make(map[int64]int64)
152 | 	first, second := pair[0], pair[1]
153 | 	newPair := first + second
154 | 	var prev [2]string
155 | 	for _, change := range changed {
156 | 		// find all instances of pair, and update frequency/indices around it
157 | 		i := 0
158 | 		for {
159 | 			i = strSliceIndexOf(change.OldWord, first, i)
160 | 			if i < 0 {
161 | 				break
162 | 			}
163 | 
164 | 			// if first symbol is followed by second symbol, we've found an occurrence of pair (old_word[i:i+2])
165 | 			if i < len(change.OldWord)-1 && change.OldWord[i+1] == second {
166 | 				// assuming a symbol sequence "A B C", if "B C" is merged, reduce the frequency of "A B"
167 | 				if i > 0 {
168 | 					prev = [2]string{change.OldWord[i-1], change.OldWord[i]}
169 | 					stats[prev] -= change.Frequency
170 | 					if _, ok := indices[prev]; !ok {
171 | 						indices[prev] = map[int64]int64{}
172 | 					}
173 | 					indices[prev][change.Index]--
174 | 				}
175 | 
176 | 				if i < len(change.OldWord)-2 {
177 | 					// assuming a symbol sequence "A B C B", if "B C" is merged, reduce the frequency of "C B".
178 | 					// however, skip this if the sequence is A B C B C, because the frequency of "C B" will be reduced by the previous code block
179 | 					if change.OldWord[i+2] != first || i >= len(change.OldWord)-3 || change.OldWord[i+3] != second {
180 | 						nex := [2]string{change.OldWord[i+1], change.OldWord[i+2]}
181 | 						stats[nex] -= change.Frequency
182 | 						if _, ok := indices[nex]; !ok {
183 | 							indices[nex] = map[int64]int64{}
184 | 						}
185 | 						indices[nex][change.Index]--
186 | 					}
187 | 				}
188 | 
189 | 				i += 2
190 | 			} else {
191 | 				i++
192 | 			}
193 | 		}
194 | 
195 | 		i = 0
196 | 		for {
197 | 			i = strSliceIndexOf(change.Word, newPair, i)
198 | 			if i < 0 {
199 | 				break
200 | 			}
201 | 
202 | 			if i > 0 {
203 | 				prev = [2]string{change.Word[i-1], change.Word[i]}
204 | 				stats[prev] += change.Frequency
205 | 				if _, ok := indices[prev]; !ok {
206 | 					indices[prev] = map[int64]int64{}
207 | 				}
208 | 				indices[prev][change.Index]++
209 | 			}
210 | 
211 | 			if i < len(change.Word)-1 && change.Word[i+1] != newPair {
212 | 				nex := [2]string{change.Word[i], change.Word[i+1]}
213 | 				stats[nex] += change.Frequency
214 | 				if _, ok := indices[nex]; !ok {
215 | 					indices[nex] = map[int64]int64{}
216 | 				}
217 | 				indices[nex][change.Index]++
218 | 			}
219 | 			i++
220 | 		}
221 | 	}
222 | }
223 | 
224 | func baseEncoder() map[string]int64 {
225 | 	encoder := make(map[string]int64)
226 | 	for k, v := range bytesEncoderInverse {
227 | 		encoder[string(k)] = int64(v) + specialTokens
228 | 	}
229 | 	return encoder
230 | }
231 | 
232 | func BPE(freq map[string]int64, numSymbols, minFrequency int64) (map[string]int64, []*Merge, error) {
233 | 	frequencies := mapToSortedWordCount(freq)
234 | 
235 | 	if minFrequency <= 0 {
236 | 		return nil, nil, errors.New("min frequency can't be 0")
237 | 	}
238 | 
239 | 	stats, indices := getPairStatistics(frequencies)
240 | 	bigStats := deepCopyStats(stats)
241 | 
242 | 	merges := []*Merge{}
243 | 	encoder := baseEncoder()
244 | 	encoderIdx := int64(len(encoder)) + specialTokens
245 | 	if len(freq) == 0 {
246 | 		return encoder, merges, nil
247 | 	}
248 | 
249 | 	threshold := float64(stats[getMaxStat(stats)]) / 10.0
250 | 
251 | 	for i := int64(0); i < numSymbols; i++ {
252 | 		var mostFrequent [2]string
253 | 		if len(stats) > 0 {
254 | 			mostFrequent = getMaxStat(stats)
255 | 		}
256 | 
257 | 		// we probably missed the best pair because of pruning; go back to full statistics
258 | 		if len(stats) == 0 || (i > 0 && float64(stats[mostFrequent]) < threshold) {
259 | 			pruneStats(stats, bigStats, threshold)
260 | 			stats = deepCopyStats(bigStats)
261 | 			mostFrequent = getMaxStat(stats)
262 | 			threshold = float64(stats[mostFrequent]) * float64(i) / (float64(i) + 10000.0)
263 | 			pruneStats(stats, bigStats, threshold)
264 | 		}
265 | 
266 | 		if stats[mostFrequent] < minFrequency {
267 | 			break
268 | 		}
269 | 
270 | 		merges = append(merges, &Merge{Merge: mostFrequent, Count: stats[mostFrequent]})
271 | 
272 | 		encoder[fmt.Sprintf("%s%s", mostFrequent[0], mostFrequent[1])] = encoderIdx
273 | 		encoderIdx++
274 | 
275 | 		changes := replacePair(mostFrequent, frequencies, indices)
276 | 
277 | 		updatePairStatistics(mostFrequent, changes, stats, indices)
278 | 		stats[mostFrequent] = 0
279 | 	}
280 | 
281 | 	return encoder, merges, nil
282 | }
283 | 


--------------------------------------------------------------------------------
/vendor/github.com/pkg/errors/errors.go:
--------------------------------------------------------------------------------
  1 | // Package errors provides simple error handling primitives.
  2 | //
  3 | // The traditional error handling idiom in Go is roughly akin to
  4 | //
  5 | //     if err != nil {
  6 | //             return err
  7 | //     }
  8 | //
  9 | // which when applied recursively up the call stack results in error reports
 10 | // without context or debugging information. The errors package allows
 11 | // programmers to add context to the failure path in their code in a way
 12 | // that does not destroy the original value of the error.
 13 | //
 14 | // Adding context to an error
 15 | //
 16 | // The errors.Wrap function returns a new error that adds context to the
 17 | // original error by recording a stack trace at the point Wrap is called,
 18 | // together with the supplied message. For example
 19 | //
 20 | //     _, err := ioutil.ReadAll(r)
 21 | //     if err != nil {
 22 | //             return errors.Wrap(err, "read failed")
 23 | //     }
 24 | //
 25 | // If additional control is required, the errors.WithStack and
 26 | // errors.WithMessage functions destructure errors.Wrap into its component
 27 | // operations: annotating an error with a stack trace and with a message,
 28 | // respectively.
 29 | //
 30 | // Retrieving the cause of an error
 31 | //
 32 | // Using errors.Wrap constructs a stack of errors, adding context to the
 33 | // preceding error. Depending on the nature of the error it may be necessary
 34 | // to reverse the operation of errors.Wrap to retrieve the original error
 35 | // for inspection. Any error value which implements this interface
 36 | //
 37 | //     type causer interface {
 38 | //             Cause() error
 39 | //     }
 40 | //
 41 | // can be inspected by errors.Cause. errors.Cause will recursively retrieve
 42 | // the topmost error that does not implement causer, which is assumed to be
 43 | // the original cause. For example:
 44 | //
 45 | //     switch err := errors.Cause(err).(type) {
 46 | //     case *MyError:
 47 | //             // handle specifically
 48 | //     default:
 49 | //             // unknown error
 50 | //     }
 51 | //
 52 | // Although the causer interface is not exported by this package, it is
 53 | // considered a part of its stable public interface.
 54 | //
 55 | // Formatted printing of errors
 56 | //
 57 | // All error values returned from this package implement fmt.Formatter and can
 58 | // be formatted by the fmt package. The following verbs are supported:
 59 | //
 60 | //     %s    print the error. If the error has a Cause it will be
 61 | //           printed recursively.
 62 | //     %v    see %s
 63 | //     %+v   extended format. Each Frame of the error's StackTrace will
 64 | //           be printed in detail.
 65 | //
 66 | // Retrieving the stack trace of an error or wrapper
 67 | //
 68 | // New, Errorf, Wrap, and Wrapf record a stack trace at the point they are
 69 | // invoked. This information can be retrieved with the following interface:
 70 | //
 71 | //     type stackTracer interface {
 72 | //             StackTrace() errors.StackTrace
 73 | //     }
 74 | //
 75 | // The returned errors.StackTrace type is defined as
 76 | //
 77 | //     type StackTrace []Frame
 78 | //
 79 | // The Frame type represents a call site in the stack trace. Frame supports
 80 | // the fmt.Formatter interface that can be used for printing information about
 81 | // the stack trace of this error. For example:
 82 | //
 83 | //     if err, ok := err.(stackTracer); ok {
 84 | //             for _, f := range err.StackTrace() {
 85 | //                     fmt.Printf("%+s:%d\n", f, f)
 86 | //             }
 87 | //     }
 88 | //
 89 | // Although the stackTracer interface is not exported by this package, it is
 90 | // considered a part of its stable public interface.
 91 | //
 92 | // See the documentation for Frame.Format for more details.
 93 | package errors
 94 | 
 95 | import (
 96 | 	"fmt"
 97 | 	"io"
 98 | )
 99 | 
100 | // New returns an error with the supplied message.
101 | // New also records the stack trace at the point it was called.
102 | func New(message string) error {
103 | 	return &fundamental{
104 | 		msg:   message,
105 | 		stack: callers(),
106 | 	}
107 | }
108 | 
109 | // Errorf formats according to a format specifier and returns the string
110 | // as a value that satisfies error.
111 | // Errorf also records the stack trace at the point it was called.
112 | func Errorf(format string, args ...interface{}) error {
113 | 	return &fundamental{
114 | 		msg:   fmt.Sprintf(format, args...),
115 | 		stack: callers(),
116 | 	}
117 | }
118 | 
119 | // fundamental is an error that has a message and a stack, but no caller.
120 | type fundamental struct {
121 | 	msg string
122 | 	*stack
123 | }
124 | 
125 | func (f *fundamental) Error() string { return f.msg }
126 | 
127 | func (f *fundamental) Format(s fmt.State, verb rune) {
128 | 	switch verb {
129 | 	case 'v':
130 | 		if s.Flag('+') {
131 | 			io.WriteString(s, f.msg)
132 | 			f.stack.Format(s, verb)
133 | 			return
134 | 		}
135 | 		fallthrough
136 | 	case 's':
137 | 		io.WriteString(s, f.msg)
138 | 	case 'q':
139 | 		fmt.Fprintf(s, "%q", f.msg)
140 | 	}
141 | }
142 | 
143 | // WithStack annotates err with a stack trace at the point WithStack was called.
144 | // If err is nil, WithStack returns nil.
145 | func WithStack(err error) error {
146 | 	if err == nil {
147 | 		return nil
148 | 	}
149 | 	return &withStack{
150 | 		err,
151 | 		callers(),
152 | 	}
153 | }
154 | 
155 | type withStack struct {
156 | 	error
157 | 	*stack
158 | }
159 | 
160 | func (w *withStack) Cause() error { return w.error }
161 | 
162 | // Unwrap provides compatibility for Go 1.13 error chains.
163 | func (w *withStack) Unwrap() error { return w.error }
164 | 
165 | func (w *withStack) Format(s fmt.State, verb rune) {
166 | 	switch verb {
167 | 	case 'v':
168 | 		if s.Flag('+') {
169 | 			fmt.Fprintf(s, "%+v", w.Cause())
170 | 			w.stack.Format(s, verb)
171 | 			return
172 | 		}
173 | 		fallthrough
174 | 	case 's':
175 | 		io.WriteString(s, w.Error())
176 | 	case 'q':
177 | 		fmt.Fprintf(s, "%q", w.Error())
178 | 	}
179 | }
180 | 
181 | // Wrap returns an error annotating err with a stack trace
182 | // at the point Wrap is called, and the supplied message.
183 | // If err is nil, Wrap returns nil.
184 | func Wrap(err error, message string) error {
185 | 	if err == nil {
186 | 		return nil
187 | 	}
188 | 	err = &withMessage{
189 | 		cause: err,
190 | 		msg:   message,
191 | 	}
192 | 	return &withStack{
193 | 		err,
194 | 		callers(),
195 | 	}
196 | }
197 | 
198 | // Wrapf returns an error annotating err with a stack trace
199 | // at the point Wrapf is called, and the format specifier.
200 | // If err is nil, Wrapf returns nil.
201 | func Wrapf(err error, format string, args ...interface{}) error {
202 | 	if err == nil {
203 | 		return nil
204 | 	}
205 | 	err = &withMessage{
206 | 		cause: err,
207 | 		msg:   fmt.Sprintf(format, args...),
208 | 	}
209 | 	return &withStack{
210 | 		err,
211 | 		callers(),
212 | 	}
213 | }
214 | 
215 | // WithMessage annotates err with a new message.
216 | // If err is nil, WithMessage returns nil.
217 | func WithMessage(err error, message string) error {
218 | 	if err == nil {
219 | 		return nil
220 | 	}
221 | 	return &withMessage{
222 | 		cause: err,
223 | 		msg:   message,
224 | 	}
225 | }
226 | 
227 | // WithMessagef annotates err with the format specifier.
228 | // If err is nil, WithMessagef returns nil.
229 | func WithMessagef(err error, format string, args ...interface{}) error {
230 | 	if err == nil {
231 | 		return nil
232 | 	}
233 | 	return &withMessage{
234 | 		cause: err,
235 | 		msg:   fmt.Sprintf(format, args...),
236 | 	}
237 | }
238 | 
239 | type withMessage struct {
240 | 	cause error
241 | 	msg   string
242 | }
243 | 
244 | func (w *withMessage) Error() string { return w.msg + ": " + w.cause.Error() }
245 | func (w *withMessage) Cause() error  { return w.cause }
246 | 
247 | // Unwrap provides compatibility for Go 1.13 error chains.
248 | func (w *withMessage) Unwrap() error { return w.cause }
249 | 
250 | func (w *withMessage) Format(s fmt.State, verb rune) {
251 | 	switch verb {
252 | 	case 'v':
253 | 		if s.Flag('+') {
254 | 			fmt.Fprintf(s, "%+v\n", w.Cause())
255 | 			io.WriteString(s, w.msg)
256 | 			return
257 | 		}
258 | 		fallthrough
259 | 	case 's', 'q':
260 | 		io.WriteString(s, w.Error())
261 | 	}
262 | }
263 | 
264 | // Cause returns the underlying cause of the error, if possible.
265 | // An error value has a cause if it implements the following
266 | // interface:
267 | //
268 | //     type causer interface {
269 | //            Cause() error
270 | //     }
271 | //
272 | // If the error does not implement Cause, the original error will
273 | // be returned. If the error is nil, nil will be returned without further
274 | // investigation.
275 | func Cause(err error) error {
276 | 	type causer interface {
277 | 		Cause() error
278 | 	}
279 | 
280 | 	for err != nil {
281 | 		cause, ok := err.(causer)
282 | 		if !ok {
283 | 			break
284 | 		}
285 | 		err = cause.Cause()
286 | 	}
287 | 	return err
288 | }
289 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/spew/doc.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013-2016 Dave Collins <dave@davec.name>
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | /*
 18 | Package spew implements a deep pretty printer for Go data structures to aid in
 19 | debugging.
 20 | 
 21 | A quick overview of the additional features spew provides over the built-in
 22 | printing facilities for Go data types are as follows:
 23 | 
 24 | 	* Pointers are dereferenced and followed
 25 | 	* Circular data structures are detected and handled properly
 26 | 	* Custom Stringer/error interfaces are optionally invoked, including
 27 | 	  on unexported types
 28 | 	* Custom types which only implement the Stringer/error interfaces via
 29 | 	  a pointer receiver are optionally invoked when passing non-pointer
 30 | 	  variables
 31 | 	* Byte arrays and slices are dumped like the hexdump -C command which
 32 | 	  includes offsets, byte values in hex, and ASCII output (only when using
 33 | 	  Dump style)
 34 | 
 35 | There are two different approaches spew allows for dumping Go data structures:
 36 | 
 37 | 	* Dump style which prints with newlines, customizable indentation,
 38 | 	  and additional debug information such as types and all pointer addresses
 39 | 	  used to indirect to the final value
 40 | 	* A custom Formatter interface that integrates cleanly with the standard fmt
 41 | 	  package and replaces %v, %+v, %#v, and %#+v to provide inline printing
 42 | 	  similar to the default %v while providing the additional functionality
 43 | 	  outlined above and passing unsupported format verbs such as %x and %q
 44 | 	  along to fmt
 45 | 
 46 | Quick Start
 47 | 
 48 | This section demonstrates how to quickly get started with spew.  See the
 49 | sections below for further details on formatting and configuration options.
 50 | 
 51 | To dump a variable with full newlines, indentation, type, and pointer
 52 | information use Dump, Fdump, or Sdump:
 53 | 	spew.Dump(myVar1, myVar2, ...)
 54 | 	spew.Fdump(someWriter, myVar1, myVar2, ...)
 55 | 	str := spew.Sdump(myVar1, myVar2, ...)
 56 | 
 57 | Alternatively, if you would prefer to use format strings with a compacted inline
 58 | printing style, use the convenience wrappers Printf, Fprintf, etc with
 59 | %v (most compact), %+v (adds pointer addresses), %#v (adds types), or
 60 | %#+v (adds types and pointer addresses):
 61 | 	spew.Printf("myVar1: %v -- myVar2: %+v", myVar1, myVar2)
 62 | 	spew.Printf("myVar3: %#v -- myVar4: %#+v", myVar3, myVar4)
 63 | 	spew.Fprintf(someWriter, "myVar1: %v -- myVar2: %+v", myVar1, myVar2)
 64 | 	spew.Fprintf(someWriter, "myVar3: %#v -- myVar4: %#+v", myVar3, myVar4)
 65 | 
 66 | Configuration Options
 67 | 
 68 | Configuration of spew is handled by fields in the ConfigState type.  For
 69 | convenience, all of the top-level functions use a global state available
 70 | via the spew.Config global.
 71 | 
 72 | It is also possible to create a ConfigState instance that provides methods
 73 | equivalent to the top-level functions.  This allows concurrent configuration
 74 | options.  See the ConfigState documentation for more details.
 75 | 
 76 | The following configuration options are available:
 77 | 	* Indent
 78 | 		String to use for each indentation level for Dump functions.
 79 | 		It is a single space by default.  A popular alternative is "\t".
 80 | 
 81 | 	* MaxDepth
 82 | 		Maximum number of levels to descend into nested data structures.
 83 | 		There is no limit by default.
 84 | 
 85 | 	* DisableMethods
 86 | 		Disables invocation of error and Stringer interface methods.
 87 | 		Method invocation is enabled by default.
 88 | 
 89 | 	* DisablePointerMethods
 90 | 		Disables invocation of error and Stringer interface methods on types
 91 | 		which only accept pointer receivers from non-pointer variables.
 92 | 		Pointer method invocation is enabled by default.
 93 | 
 94 | 	* DisablePointerAddresses
 95 | 		DisablePointerAddresses specifies whether to disable the printing of
 96 | 		pointer addresses. This is useful when diffing data structures in tests.
 97 | 
 98 | 	* DisableCapacities
 99 | 		DisableCapacities specifies whether to disable the printing of
100 | 		capacities for arrays, slices, maps and channels. This is useful when
101 | 		diffing data structures in tests.
102 | 
103 | 	* ContinueOnMethod
104 | 		Enables recursion into types after invoking error and Stringer interface
105 | 		methods. Recursion after method invocation is disabled by default.
106 | 
107 | 	* SortKeys
108 | 		Specifies map keys should be sorted before being printed. Use
109 | 		this to have a more deterministic, diffable output.  Note that
110 | 		only native types (bool, int, uint, floats, uintptr and string)
111 | 		and types which implement error or Stringer interfaces are
112 | 		supported with other types sorted according to the
113 | 		reflect.Value.String() output which guarantees display
114 | 		stability.  Natural map order is used by default.
115 | 
116 | 	* SpewKeys
117 | 		Specifies that, as a last resort attempt, map keys should be
118 | 		spewed to strings and sorted by those strings.  This is only
119 | 		considered if SortKeys is true.
120 | 
121 | Dump Usage
122 | 
123 | Simply call spew.Dump with a list of variables you want to dump:
124 | 
125 | 	spew.Dump(myVar1, myVar2, ...)
126 | 
127 | You may also call spew.Fdump if you would prefer to output to an arbitrary
128 | io.Writer.  For example, to dump to standard error:
129 | 
130 | 	spew.Fdump(os.Stderr, myVar1, myVar2, ...)
131 | 
132 | A third option is to call spew.Sdump to get the formatted output as a string:
133 | 
134 | 	str := spew.Sdump(myVar1, myVar2, ...)
135 | 
136 | Sample Dump Output
137 | 
138 | See the Dump example for details on the setup of the types and variables being
139 | shown here.
140 | 
141 | 	(main.Foo) {
142 | 	 unexportedField: (*main.Bar)(0xf84002e210)({
143 | 	  flag: (main.Flag) flagTwo,
144 | 	  data: (uintptr) <nil>
145 | 	 }),
146 | 	 ExportedField: (map[interface {}]interface {}) (len=1) {
147 | 	  (string) (len=3) "one": (bool) true
148 | 	 }
149 | 	}
150 | 
151 | Byte (and uint8) arrays and slices are displayed uniquely like the hexdump -C
152 | command as shown.
153 | 	([]uint8) (len=32 cap=32) {
154 | 	 00000000  11 12 13 14 15 16 17 18  19 1a 1b 1c 1d 1e 1f 20  |............... |
155 | 	 00000010  21 22 23 24 25 26 27 28  29 2a 2b 2c 2d 2e 2f 30  |!"#$%&'()*+,-./0|
156 | 	 00000020  31 32                                             |12|
157 | 	}
158 | 
159 | Custom Formatter
160 | 
161 | Spew provides a custom formatter that implements the fmt.Formatter interface
162 | so that it integrates cleanly with standard fmt package printing functions. The
163 | formatter is useful for inline printing of smaller data types similar to the
164 | standard %v format specifier.
165 | 
166 | The custom formatter only responds to the %v (most compact), %+v (adds pointer
167 | addresses), %#v (adds types), or %#+v (adds types and pointer addresses) verb
168 | combinations.  Any other verbs such as %x and %q will be sent to the the
169 | standard fmt package for formatting.  In addition, the custom formatter ignores
170 | the width and precision arguments (however they will still work on the format
171 | specifiers not handled by the custom formatter).
172 | 
173 | Custom Formatter Usage
174 | 
175 | The simplest way to make use of the spew custom formatter is to call one of the
176 | convenience functions such as spew.Printf, spew.Println, or spew.Printf.  The
177 | functions have syntax you are most likely already familiar with:
178 | 
179 | 	spew.Printf("myVar1: %v -- myVar2: %+v", myVar1, myVar2)
180 | 	spew.Printf("myVar3: %#v -- myVar4: %#+v", myVar3, myVar4)
181 | 	spew.Println(myVar, myVar2)
182 | 	spew.Fprintf(os.Stderr, "myVar1: %v -- myVar2: %+v", myVar1, myVar2)
183 | 	spew.Fprintf(os.Stderr, "myVar3: %#v -- myVar4: %#+v", myVar3, myVar4)
184 | 
185 | See the Index for the full list convenience functions.
186 | 
187 | Sample Formatter Output
188 | 
189 | Double pointer to a uint8:
190 | 	  %v: <**>5
191 | 	 %+v: <**>(0xf8400420d0->0xf8400420c8)5
192 | 	 %#v: (**uint8)5
193 | 	%#+v: (**uint8)(0xf8400420d0->0xf8400420c8)5
194 | 
195 | Pointer to circular struct with a uint8 field and a pointer to itself:
196 | 	  %v: <*>{1 <*><shown>}
197 | 	 %+v: <*>(0xf84003e260){ui8:1 c:<*>(0xf84003e260)<shown>}
198 | 	 %#v: (*main.circular){ui8:(uint8)1 c:(*main.circular)<shown>}
199 | 	%#+v: (*main.circular)(0xf84003e260){ui8:(uint8)1 c:(*main.circular)(0xf84003e260)<shown>}
200 | 
201 | See the Printf example for details on the setup of variables being shown
202 | here.
203 | 
204 | Errors
205 | 
206 | Since it is possible for custom Stringer/error interfaces to panic, spew
207 | detects them and handles them internally by printing the panic information
208 | inline with the output.  Since spew is intended to provide deep pretty printing
209 | capabilities on structures, it intentionally does not return any errors.
210 | */
211 | package spew
212 | 


--------------------------------------------------------------------------------
/encoder.go:
--------------------------------------------------------------------------------
  1 | package tokenizer
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"embed"
  7 | 	"encoding/json"
  8 | 	"fmt"
  9 | 	"io"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/dlclark/regexp2"
 13 | 	"github.com/pkg/errors"
 14 | )
 15 | 
 16 | const (
 17 | 	defaultNumMerges = 50_000
 18 | )
 19 | 
 20 | //go:embed vocab/*
 21 | var f embed.FS
 22 | 
 23 | var (
 24 | 	splitRegex                        = regexp2.MustCompile(`(?:'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+)`, 0)
 25 | 	bytesEncoder, bytesEncoderInverse = bytesToUnicode()
 26 | )
 27 | 
 28 | type Encoder struct {
 29 | 	Encoder   map[string]int64
 30 | 	Decoder   map[int64]string
 31 | 	BPERanks  map[[2]string]int64
 32 | 	Cache     map[string]string
 33 | 	VocabSize int64
 34 | }
 35 | 
 36 | func NewFromReaders(encoderReader, vocabReader io.Reader) (*Encoder, error) {
 37 | 	bpeMerges := make([][2]string, 0, defaultNumMerges)
 38 | 
 39 | 	vocabScanner := bufio.NewScanner(vocabReader)
 40 | 	for vocabScanner.Scan() {
 41 | 		// each line will look something like:
 42 | 		// fanta stic 4234234
 43 | 		// we ignore the last count column for encoding purposes
 44 | 		split := strings.Split(vocabScanner.Text(), " ")
 45 | 
 46 | 		bpeMerges = append(bpeMerges, [2]string{split[0], split[1]})
 47 | 	}
 48 | 
 49 | 	encoderContents, err := io.ReadAll(encoderReader)
 50 | 	if err != nil {
 51 | 		return nil, errors.Wrap(err, "failed to read encoder file")
 52 | 	}
 53 | 
 54 | 	encoderMap := map[string]int64{}
 55 | 	if err := json.Unmarshal(encoderContents, &encoderMap); err != nil {
 56 | 		return nil, errors.Wrap(err, "corrupted encoder file")
 57 | 	}
 58 | 
 59 | 	return New(encoderMap, bpeMerges)
 60 | }
 61 | 
 62 | func NewFromPrebuilt(name string) (*Encoder, error) {
 63 | 	encoderPath := fmt.Sprintf("vocab/%s/encoder.json", name)
 64 | 	vocabPath := fmt.Sprintf("vocab/%s/vocab.bpe", name)
 65 | 
 66 | 	_, encoderOpenErr := f.Open(encoderPath)
 67 | 	_, vocabOpenErr := f.Open(vocabPath)
 68 | 	if vocabOpenErr != nil || encoderOpenErr != nil {
 69 | 		return nil, errors.New("failed to load prebuilt tokenizer")
 70 | 	}
 71 | 	encoderContents, err := f.ReadFile(encoderPath)
 72 | 	if err != nil {
 73 | 		return nil, errors.Wrap(err, "failed to read encoder file")
 74 | 	}
 75 | 	encoderMap := map[string]int64{}
 76 | 	if err := json.Unmarshal(encoderContents, &encoderMap); err != nil {
 77 | 		return nil, errors.Wrap(err, "encoder file had invalid json")
 78 | 	}
 79 | 
 80 | 	vocabContents, err := f.ReadFile(vocabPath)
 81 | 	if err != nil {
 82 | 		return nil, errors.Wrap(err, "failed to read vocab file")
 83 | 	}
 84 | 	vocabScanner := bufio.NewScanner(bytes.NewReader(vocabContents))
 85 | 
 86 | 	bpeMerges := make([][2]string, 0, defaultNumMerges)
 87 | 	for vocabScanner.Scan() {
 88 | 		split := strings.Split(vocabScanner.Text(), " ")
 89 | 		bpeMerges = append(bpeMerges, [2]string{split[0], split[1]})
 90 | 	}
 91 | 
 92 | 	return New(encoderMap, bpeMerges)
 93 | }
 94 | 
 95 | func New(encoder map[string]int64, bpeMerges [][2]string) (*Encoder, error) {
 96 | 	var vocabSize int64
 97 | 	decoder := make(map[int64]string, len(encoder))
 98 | 	for k, v := range encoder {
 99 | 		decoder[v] = k
100 | 		vocabSize++
101 | 	}
102 | 
103 | 	bpeRanks := make(map[[2]string]int64, len(bpeMerges))
104 | 	for i := int64(0); i < int64(len(bpeMerges)); i++ {
105 | 		bpeRanks[bpeMerges[i]] = i
106 | 	}
107 | 
108 | 	return &Encoder{
109 | 		Encoder:   encoder,
110 | 		Decoder:   decoder,
111 | 		BPERanks:  bpeRanks,
112 | 		Cache:     map[string]string{},
113 | 		VocabSize: vocabSize,
114 | 	}, nil
115 | }
116 | 
117 | func getPairs(wordPieces []string) [][2]string {
118 | 	if len(wordPieces) == 0 {
119 | 		return nil
120 | 	}
121 | 
122 | 	pairs := make([][2]string, len(wordPieces)-1)
123 | 	prevChar := wordPieces[0]
124 | 	for i, wordPiece := range wordPieces[1:] {
125 | 		pairs[i] = [2]string{prevChar, wordPiece}
126 | 		prevChar = wordPiece
127 | 	}
128 | 
129 | 	return pairs
130 | }
131 | 
132 | func (e *Encoder) getMinPair(pairs [][2]string) [2]string {
133 | 	outOfVocab := int64(len(e.BPERanks)) + 1
134 | 	minimumPair := pairs[0]
135 | 	minimumValue, ok := e.BPERanks[minimumPair]
136 | 	if !ok {
137 | 		minimumValue = outOfVocab
138 | 	}
139 | 	for _, pair := range pairs[1:] {
140 | 		pairValue, ok := e.BPERanks[pair]
141 | 		if !ok {
142 | 			pairValue = outOfVocab
143 | 		}
144 | 
145 | 		if pairValue < minimumValue {
146 | 			minimumPair = pair
147 | 			minimumValue = pairValue
148 | 		}
149 | 	}
150 | 
151 | 	return minimumPair
152 | }
153 | 
154 | func (e *Encoder) tokenizerBPE(token string) []string {
155 | 	wordPieces := strings.Split(token, "")
156 | 	pairs := getPairs(wordPieces)
157 | 	if len(pairs) == 0 {
158 | 		return []string{token}
159 | 	}
160 | 
161 | 	for {
162 | 		bigram := e.getMinPair(pairs)
163 | 		if _, ok := e.BPERanks[bigram]; !ok {
164 | 			break
165 | 		}
166 | 
167 | 		newWord := replace(wordPieces, bigram)
168 | 		wordPieces = newWord
169 | 		if len(wordPieces) == 1 {
170 | 			break
171 | 		} else {
172 | 			pairs = getPairs(wordPieces)
173 | 		}
174 | 	}
175 | 
176 | 	return wordPieces
177 | }
178 | 
179 | func (e *Encoder) EncodeWords(words []string) ([]int64, []string) {
180 | 	bpeTokens := make([]int64, 0, len(words)*2)
181 | 	bpeTokenStrings := make([]string, 0, len(bpeTokens))
182 | 	for _, word := range words {
183 | 		token := unicodeEncode(word)
184 | 		bpeEncoded := e.tokenizerBPE(token)
185 | 		for _, bpeEnc := range bpeEncoded {
186 | 			if _, ok := e.Encoder[bpeEnc]; ok {
187 | 				bpeTokens = append(bpeTokens, e.Encoder[bpeEnc])
188 | 				bpeTokenStrings = append(bpeTokenStrings, unicodeDecode(bpeEnc))
189 | 			}
190 | 		}
191 | 	}
192 | 	return bpeTokens, bpeTokenStrings
193 | }
194 | 
195 | func unicodeEncode(word string) string {
196 | 	var tokenBuffer bytes.Buffer
197 | 
198 | 	for _, b := range []byte(word) {
199 | 		encodedRune := bytesEncoder[b]
200 | 		tokenBuffer.WriteRune(encodedRune)
201 | 	}
202 | 
203 | 	word = tokenBuffer.String()
204 | 	return word
205 | }
206 | 
207 | func unicodeDecode(word string) string {
208 | 	var decodeBuffer bytes.Buffer
209 | 	for _, dt := range word {
210 | 		decodeBuffer.WriteByte(bytesEncoderInverse[dt])
211 | 	}
212 | 
213 | 	return decodeBuffer.String()
214 | }
215 | 
216 | func WordSplit(s string) []string {
217 | 	results := make([]string, 0)
218 | 	wordsMatch, _ := splitRegex.FindStringMatch(s)
219 | 	if wordsMatch == nil {
220 | 		return nil
221 | 	}
222 | 
223 | 	for {
224 | 		word := wordsMatch.String()
225 | 		if word != "" {
226 | 			results = append(results, word)
227 | 		}
228 | 
229 | 		wordsMatch, _ = splitRegex.FindNextMatch(wordsMatch)
230 | 		if wordsMatch == nil {
231 | 			break
232 | 		}
233 | 	}
234 | 
235 | 	return results
236 | }
237 | 
238 | func runeContains(bs []int, b int) bool {
239 | 	for _, v := range bs {
240 | 		if b == v {
241 | 			return true
242 | 		}
243 | 	}
244 | 	return false
245 | }
246 | 
247 | func bytesToUnicode() (map[byte]rune, map[rune]byte) {
248 | 	bs := []int{}
249 | 	for i := 33; i < 127; i++ {
250 | 		bs = append(bs, i)
251 | 	}
252 | 	for i := 161; i < 173; i++ {
253 | 		bs = append(bs, i)
254 | 	}
255 | 	for i := 174; i < 256; i++ {
256 | 		bs = append(bs, i)
257 | 	}
258 | 
259 | 	cs := make([]int, 0, len(bs)+256)
260 | 	for i := 0; i < len(bs); i++ {
261 | 		cs = append(cs, bs[i])
262 | 	}
263 | 
264 | 	var n int
265 | 	for b := 0; b < 256; b++ {
266 | 		if !runeContains(bs, b) {
267 | 			bs = append(bs, b)
268 | 			cs = append(cs, 256+n)
269 | 			n++
270 | 		}
271 | 	}
272 | 
273 | 	result := map[byte]rune{}
274 | 	for i := range bs {
275 | 		result[byte(bs[i])] = rune(cs[i])
276 | 	}
277 | 
278 | 	resultInverse := map[rune]byte{}
279 | 	for k, v := range result {
280 | 		resultInverse[v] = k
281 | 	}
282 | 
283 | 	return result, resultInverse
284 | }
285 | 
286 | func indexOf(wordPieces []string, word string, i int64) int64 {
287 | 	for j := i; j < int64(len(wordPieces)); j++ {
288 | 		if word == wordPieces[j] {
289 | 			return j
290 | 		}
291 | 	}
292 | 
293 | 	return -1
294 | }
295 | 
296 | func replace(wordPieces []string, bigram [2]string) []string {
297 | 	first, second := bigram[0], bigram[1]
298 | 	pairStr := fmt.Sprintf("%s%s", first, second)
299 | 	newWord := make([]string, 0, len(wordPieces))
300 | 	var i int64
301 | 	for i < int64(len(wordPieces)) {
302 | 		j := indexOf(wordPieces, first, i)
303 | 		// If we don't find the first word of the bigram then add the remaining word pieces
304 | 		// and break.
305 | 		if j == -1 {
306 | 			newWord = append(newWord, wordPieces[i:]...)
307 | 			break
308 | 		}
309 | 
310 | 		// If the index of first word piece of the bigram is not the current index then add all
311 | 		// word pieces up to that index.
312 | 		if i != j {
313 | 			newWord = append(newWord, wordPieces[i:j]...)
314 | 		}
315 | 
316 | 		// If we're at the last word piece or the next word piece is not equal to the second
317 | 		// word of the bigram then add the current word piece and continue.
318 | 		if j == int64(len(wordPieces))-1 || wordPieces[j+1] != second {
319 | 			newWord = append(newWord, wordPieces[j])
320 | 			i = j + 1
321 | 			continue
322 | 		}
323 | 
324 | 		// Otherwise, we've found a bigram match.
325 | 		newWord = append(newWord, pairStr)
326 | 		i = j + 2
327 | 	}
328 | 	return newWord
329 | }
330 | 
331 | func (e *Encoder) Encode(text string) ([]int64, []string) {
332 | 	words := WordSplit(text)
333 | 	return e.EncodeWords(words)
334 | }
335 | 
336 | func (e *Encoder) Decode(tokens []int64) string {
337 | 	var decodeBuffer bytes.Buffer
338 | 	for _, token := range tokens {
339 | 		for _, dt := range e.Decoder[token] {
340 | 			decodeBuffer.WriteByte(bytesEncoderInverse[dt])
341 | 		}
342 | 	}
343 | 
344 | 	return decodeBuffer.String()
345 | }
346 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/syntax/code.go:
--------------------------------------------------------------------------------
  1 | package syntax
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"math"
  7 | )
  8 | 
  9 | // similar to prog.go in the go regex package...also with comment 'may not belong in this package'
 10 | 
 11 | // File provides operator constants for use by the Builder and the Machine.
 12 | 
 13 | // Implementation notes:
 14 | //
 15 | // Regexps are built into RegexCodes, which contain an operation array,
 16 | // a string table, and some constants.
 17 | //
 18 | // Each operation is one of the codes below, followed by the integer
 19 | // operands specified for each op.
 20 | //
 21 | // Strings and sets are indices into a string table.
 22 | 
 23 | type InstOp int
 24 | 
 25 | const (
 26 | 	// 					    lef/back operands        description
 27 | 
 28 | 	Onerep    InstOp = 0 // lef,back char,min,max    a {n}
 29 | 	Notonerep        = 1 // lef,back char,min,max    .{n}
 30 | 	Setrep           = 2 // lef,back set,min,max     [\d]{n}
 31 | 
 32 | 	Oneloop    = 3 // lef,back char,min,max    a {,n}
 33 | 	Notoneloop = 4 // lef,back char,min,max    .{,n}
 34 | 	Setloop    = 5 // lef,back set,min,max     [\d]{,n}
 35 | 
 36 | 	Onelazy    = 6 // lef,back char,min,max    a {,n}?
 37 | 	Notonelazy = 7 // lef,back char,min,max    .{,n}?
 38 | 	Setlazy    = 8 // lef,back set,min,max     [\d]{,n}?
 39 | 
 40 | 	One    = 9  // lef      char            a
 41 | 	Notone = 10 // lef      char            [^a]
 42 | 	Set    = 11 // lef      set             [a-z\s]  \w \s \d
 43 | 
 44 | 	Multi = 12 // lef      string          abcd
 45 | 	Ref   = 13 // lef      group           \#
 46 | 
 47 | 	Bol         = 14 //                          ^
 48 | 	Eol         = 15 //                          $
 49 | 	Boundary    = 16 //                          \b
 50 | 	Nonboundary = 17 //                          \B
 51 | 	Beginning   = 18 //                          \A
 52 | 	Start       = 19 //                          \G
 53 | 	EndZ        = 20 //                          \Z
 54 | 	End         = 21 //                          \Z
 55 | 
 56 | 	Nothing = 22 //                          Reject!
 57 | 
 58 | 	// Primitive control structures
 59 | 
 60 | 	Lazybranch      = 23 // back     jump            straight first
 61 | 	Branchmark      = 24 // back     jump            branch first for loop
 62 | 	Lazybranchmark  = 25 // back     jump            straight first for loop
 63 | 	Nullcount       = 26 // back     val             set counter, null mark
 64 | 	Setcount        = 27 // back     val             set counter, make mark
 65 | 	Branchcount     = 28 // back     jump,limit      branch++ if zero<=c<limit
 66 | 	Lazybranchcount = 29 // back     jump,limit      same, but straight first
 67 | 	Nullmark        = 30 // back                     save position
 68 | 	Setmark         = 31 // back                     save position
 69 | 	Capturemark     = 32 // back     group           define group
 70 | 	Getmark         = 33 // back                     recall position
 71 | 	Setjump         = 34 // back                     save backtrack state
 72 | 	Backjump        = 35 //                          zap back to saved state
 73 | 	Forejump        = 36 //                          zap backtracking state
 74 | 	Testref         = 37 //                          backtrack if ref undefined
 75 | 	Goto            = 38 //          jump            just go
 76 | 
 77 | 	Prune = 39 //                          prune it baby
 78 | 	Stop  = 40 //                          done!
 79 | 
 80 | 	ECMABoundary    = 41 //                          \b
 81 | 	NonECMABoundary = 42 //                          \B
 82 | 
 83 | 	// Modifiers for alternate modes
 84 | 
 85 | 	Mask  = 63  // Mask to get unmodified ordinary operator
 86 | 	Rtl   = 64  // bit to indicate that we're reverse scanning.
 87 | 	Back  = 128 // bit to indicate that we're backtracking.
 88 | 	Back2 = 256 // bit to indicate that we're backtracking on a second branch.
 89 | 	Ci    = 512 // bit to indicate that we're case-insensitive.
 90 | )
 91 | 
 92 | type Code struct {
 93 | 	Codes       []int       // the code
 94 | 	Strings     [][]rune    // string table
 95 | 	Sets        []*CharSet  //character set table
 96 | 	TrackCount  int         // how many instructions use backtracking
 97 | 	Caps        map[int]int // mapping of user group numbers -> impl group slots
 98 | 	Capsize     int         // number of impl group slots
 99 | 	FcPrefix    *Prefix     // the set of candidate first characters (may be null)
100 | 	BmPrefix    *BmPrefix   // the fixed prefix string as a Boyer-Moore machine (may be null)
101 | 	Anchors     AnchorLoc   // the set of zero-length start anchors (RegexFCD.Bol, etc)
102 | 	RightToLeft bool        // true if right to left
103 | }
104 | 
105 | func opcodeBacktracks(op InstOp) bool {
106 | 	op &= Mask
107 | 
108 | 	switch op {
109 | 	case Oneloop, Notoneloop, Setloop, Onelazy, Notonelazy, Setlazy, Lazybranch, Branchmark, Lazybranchmark,
110 | 		Nullcount, Setcount, Branchcount, Lazybranchcount, Setmark, Capturemark, Getmark, Setjump, Backjump,
111 | 		Forejump, Goto:
112 | 		return true
113 | 
114 | 	default:
115 | 		return false
116 | 	}
117 | }
118 | 
119 | func opcodeSize(op InstOp) int {
120 | 	op &= Mask
121 | 
122 | 	switch op {
123 | 	case Nothing, Bol, Eol, Boundary, Nonboundary, ECMABoundary, NonECMABoundary, Beginning, Start, EndZ,
124 | 		End, Nullmark, Setmark, Getmark, Setjump, Backjump, Forejump, Stop:
125 | 		return 1
126 | 
127 | 	case One, Notone, Multi, Ref, Testref, Goto, Nullcount, Setcount, Lazybranch, Branchmark, Lazybranchmark,
128 | 		Prune, Set:
129 | 		return 2
130 | 
131 | 	case Capturemark, Branchcount, Lazybranchcount, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy,
132 | 		Setlazy, Setrep, Setloop:
133 | 		return 3
134 | 
135 | 	default:
136 | 		panic(fmt.Errorf("Unexpected op code: %v", op))
137 | 	}
138 | }
139 | 
140 | var codeStr = []string{
141 | 	"Onerep", "Notonerep", "Setrep",
142 | 	"Oneloop", "Notoneloop", "Setloop",
143 | 	"Onelazy", "Notonelazy", "Setlazy",
144 | 	"One", "Notone", "Set",
145 | 	"Multi", "Ref",
146 | 	"Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End",
147 | 	"Nothing",
148 | 	"Lazybranch", "Branchmark", "Lazybranchmark",
149 | 	"Nullcount", "Setcount", "Branchcount", "Lazybranchcount",
150 | 	"Nullmark", "Setmark", "Capturemark", "Getmark",
151 | 	"Setjump", "Backjump", "Forejump", "Testref", "Goto",
152 | 	"Prune", "Stop",
153 | 	"ECMABoundary", "NonECMABoundary",
154 | }
155 | 
156 | func operatorDescription(op InstOp) string {
157 | 	desc := codeStr[op&Mask]
158 | 	if (op & Ci) != 0 {
159 | 		desc += "-Ci"
160 | 	}
161 | 	if (op & Rtl) != 0 {
162 | 		desc += "-Rtl"
163 | 	}
164 | 	if (op & Back) != 0 {
165 | 		desc += "-Back"
166 | 	}
167 | 	if (op & Back2) != 0 {
168 | 		desc += "-Back2"
169 | 	}
170 | 
171 | 	return desc
172 | }
173 | 
174 | // OpcodeDescription is a humman readable string of the specific offset
175 | func (c *Code) OpcodeDescription(offset int) string {
176 | 	buf := &bytes.Buffer{}
177 | 
178 | 	op := InstOp(c.Codes[offset])
179 | 	fmt.Fprintf(buf, "%06d ", offset)
180 | 
181 | 	if opcodeBacktracks(op & Mask) {
182 | 		buf.WriteString("*")
183 | 	} else {
184 | 		buf.WriteString(" ")
185 | 	}
186 | 	buf.WriteString(operatorDescription(op))
187 | 	buf.WriteString("(")
188 | 	op &= Mask
189 | 
190 | 	switch op {
191 | 	case One, Notone, Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy:
192 | 		buf.WriteString("Ch = ")
193 | 		buf.WriteString(CharDescription(rune(c.Codes[offset+1])))
194 | 
195 | 	case Set, Setrep, Setloop, Setlazy:
196 | 		buf.WriteString("Set = ")
197 | 		buf.WriteString(c.Sets[c.Codes[offset+1]].String())
198 | 
199 | 	case Multi:
200 | 		fmt.Fprintf(buf, "String = %s", string(c.Strings[c.Codes[offset+1]]))
201 | 
202 | 	case Ref, Testref:
203 | 		fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
204 | 
205 | 	case Capturemark:
206 | 		fmt.Fprintf(buf, "Index = %d", c.Codes[offset+1])
207 | 		if c.Codes[offset+2] != -1 {
208 | 			fmt.Fprintf(buf, ", Unindex = %d", c.Codes[offset+2])
209 | 		}
210 | 
211 | 	case Nullcount, Setcount:
212 | 		fmt.Fprintf(buf, "Value = %d", c.Codes[offset+1])
213 | 
214 | 	case Goto, Lazybranch, Branchmark, Lazybranchmark, Branchcount, Lazybranchcount:
215 | 		fmt.Fprintf(buf, "Addr = %d", c.Codes[offset+1])
216 | 	}
217 | 
218 | 	switch op {
219 | 	case Onerep, Notonerep, Oneloop, Notoneloop, Onelazy, Notonelazy, Setrep, Setloop, Setlazy:
220 | 		buf.WriteString(", Rep = ")
221 | 		if c.Codes[offset+2] == math.MaxInt32 {
222 | 			buf.WriteString("inf")
223 | 		} else {
224 | 			fmt.Fprintf(buf, "%d", c.Codes[offset+2])
225 | 		}
226 | 
227 | 	case Branchcount, Lazybranchcount:
228 | 		buf.WriteString(", Limit = ")
229 | 		if c.Codes[offset+2] == math.MaxInt32 {
230 | 			buf.WriteString("inf")
231 | 		} else {
232 | 			fmt.Fprintf(buf, "%d", c.Codes[offset+2])
233 | 		}
234 | 
235 | 	}
236 | 
237 | 	buf.WriteString(")")
238 | 
239 | 	return buf.String()
240 | }
241 | 
242 | func (c *Code) Dump() string {
243 | 	buf := &bytes.Buffer{}
244 | 
245 | 	if c.RightToLeft {
246 | 		fmt.Fprintln(buf, "Direction:  right-to-left")
247 | 	} else {
248 | 		fmt.Fprintln(buf, "Direction:  left-to-right")
249 | 	}
250 | 	if c.FcPrefix == nil {
251 | 		fmt.Fprintln(buf, "Firstchars: n/a")
252 | 	} else {
253 | 		fmt.Fprintf(buf, "Firstchars: %v\n", c.FcPrefix.PrefixSet.String())
254 | 	}
255 | 
256 | 	if c.BmPrefix == nil {
257 | 		fmt.Fprintln(buf, "Prefix:     n/a")
258 | 	} else {
259 | 		fmt.Fprintf(buf, "Prefix:     %v\n", Escape(c.BmPrefix.String()))
260 | 	}
261 | 
262 | 	fmt.Fprintf(buf, "Anchors:    %v\n", c.Anchors)
263 | 	fmt.Fprintln(buf)
264 | 
265 | 	if c.BmPrefix != nil {
266 | 		fmt.Fprintln(buf, "BoyerMoore:")
267 | 		fmt.Fprintln(buf, c.BmPrefix.Dump("    "))
268 | 	}
269 | 	for i := 0; i < len(c.Codes); i += opcodeSize(InstOp(c.Codes[i])) {
270 | 		fmt.Fprintln(buf, c.OpcodeDescription(i))
271 | 	}
272 | 
273 | 	return buf.String()
274 | }
275 | 


--------------------------------------------------------------------------------
/vendor/gopkg.in/yaml.v3/resolve.go:
--------------------------------------------------------------------------------
  1 | //
  2 | // Copyright (c) 2011-2019 Canonical Ltd
  3 | //
  4 | // Licensed under the Apache License, Version 2.0 (the "License");
  5 | // you may not use this file except in compliance with the License.
  6 | // You may obtain a copy of the License at
  7 | //
  8 | //     http://www.apache.org/licenses/LICENSE-2.0
  9 | //
 10 | // Unless required by applicable law or agreed to in writing, software
 11 | // distributed under the License is distributed on an "AS IS" BASIS,
 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | // See the License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | package yaml
 17 | 
 18 | import (
 19 | 	"encoding/base64"
 20 | 	"math"
 21 | 	"regexp"
 22 | 	"strconv"
 23 | 	"strings"
 24 | 	"time"
 25 | )
 26 | 
 27 | type resolveMapItem struct {
 28 | 	value interface{}
 29 | 	tag   string
 30 | }
 31 | 
 32 | var resolveTable = make([]byte, 256)
 33 | var resolveMap = make(map[string]resolveMapItem)
 34 | 
 35 | func init() {
 36 | 	t := resolveTable
 37 | 	t[int('+')] = 'S' // Sign
 38 | 	t[int('-')] = 'S'
 39 | 	for _, c := range "0123456789" {
 40 | 		t[int(c)] = 'D' // Digit
 41 | 	}
 42 | 	for _, c := range "yYnNtTfFoO~" {
 43 | 		t[int(c)] = 'M' // In map
 44 | 	}
 45 | 	t[int('.')] = '.' // Float (potentially in map)
 46 | 
 47 | 	var resolveMapList = []struct {
 48 | 		v   interface{}
 49 | 		tag string
 50 | 		l   []string
 51 | 	}{
 52 | 		{true, boolTag, []string{"true", "True", "TRUE"}},
 53 | 		{false, boolTag, []string{"false", "False", "FALSE"}},
 54 | 		{nil, nullTag, []string{"", "~", "null", "Null", "NULL"}},
 55 | 		{math.NaN(), floatTag, []string{".nan", ".NaN", ".NAN"}},
 56 | 		{math.Inf(+1), floatTag, []string{".inf", ".Inf", ".INF"}},
 57 | 		{math.Inf(+1), floatTag, []string{"+.inf", "+.Inf", "+.INF"}},
 58 | 		{math.Inf(-1), floatTag, []string{"-.inf", "-.Inf", "-.INF"}},
 59 | 		{"<<", mergeTag, []string{"<<"}},
 60 | 	}
 61 | 
 62 | 	m := resolveMap
 63 | 	for _, item := range resolveMapList {
 64 | 		for _, s := range item.l {
 65 | 			m[s] = resolveMapItem{item.v, item.tag}
 66 | 		}
 67 | 	}
 68 | }
 69 | 
 70 | const (
 71 | 	nullTag      = "!!null"
 72 | 	boolTag      = "!!bool"
 73 | 	strTag       = "!!str"
 74 | 	intTag       = "!!int"
 75 | 	floatTag     = "!!float"
 76 | 	timestampTag = "!!timestamp"
 77 | 	seqTag       = "!!seq"
 78 | 	mapTag       = "!!map"
 79 | 	binaryTag    = "!!binary"
 80 | 	mergeTag     = "!!merge"
 81 | )
 82 | 
 83 | var longTags = make(map[string]string)
 84 | var shortTags = make(map[string]string)
 85 | 
 86 | func init() {
 87 | 	for _, stag := range []string{nullTag, boolTag, strTag, intTag, floatTag, timestampTag, seqTag, mapTag, binaryTag, mergeTag} {
 88 | 		ltag := longTag(stag)
 89 | 		longTags[stag] = ltag
 90 | 		shortTags[ltag] = stag
 91 | 	}
 92 | }
 93 | 
 94 | const longTagPrefix = "tag:yaml.org,2002:"
 95 | 
 96 | func shortTag(tag string) string {
 97 | 	if strings.HasPrefix(tag, longTagPrefix) {
 98 | 		if stag, ok := shortTags[tag]; ok {
 99 | 			return stag
100 | 		}
101 | 		return "!!" + tag[len(longTagPrefix):]
102 | 	}
103 | 	return tag
104 | }
105 | 
106 | func longTag(tag string) string {
107 | 	if strings.HasPrefix(tag, "!!") {
108 | 		if ltag, ok := longTags[tag]; ok {
109 | 			return ltag
110 | 		}
111 | 		return longTagPrefix + tag[2:]
112 | 	}
113 | 	return tag
114 | }
115 | 
116 | func resolvableTag(tag string) bool {
117 | 	switch tag {
118 | 	case "", strTag, boolTag, intTag, floatTag, nullTag, timestampTag:
119 | 		return true
120 | 	}
121 | 	return false
122 | }
123 | 
124 | var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`)
125 | 
126 | func resolve(tag string, in string) (rtag string, out interface{}) {
127 | 	tag = shortTag(tag)
128 | 	if !resolvableTag(tag) {
129 | 		return tag, in
130 | 	}
131 | 
132 | 	defer func() {
133 | 		switch tag {
134 | 		case "", rtag, strTag, binaryTag:
135 | 			return
136 | 		case floatTag:
137 | 			if rtag == intTag {
138 | 				switch v := out.(type) {
139 | 				case int64:
140 | 					rtag = floatTag
141 | 					out = float64(v)
142 | 					return
143 | 				case int:
144 | 					rtag = floatTag
145 | 					out = float64(v)
146 | 					return
147 | 				}
148 | 			}
149 | 		}
150 | 		failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag))
151 | 	}()
152 | 
153 | 	// Any data is accepted as a !!str or !!binary.
154 | 	// Otherwise, the prefix is enough of a hint about what it might be.
155 | 	hint := byte('N')
156 | 	if in != "" {
157 | 		hint = resolveTable[in[0]]
158 | 	}
159 | 	if hint != 0 && tag != strTag && tag != binaryTag {
160 | 		// Handle things we can lookup in a map.
161 | 		if item, ok := resolveMap[in]; ok {
162 | 			return item.tag, item.value
163 | 		}
164 | 
165 | 		// Base 60 floats are a bad idea, were dropped in YAML 1.2, and
166 | 		// are purposefully unsupported here. They're still quoted on
167 | 		// the way out for compatibility with other parser, though.
168 | 
169 | 		switch hint {
170 | 		case 'M':
171 | 			// We've already checked the map above.
172 | 
173 | 		case '.':
174 | 			// Not in the map, so maybe a normal float.
175 | 			floatv, err := strconv.ParseFloat(in, 64)
176 | 			if err == nil {
177 | 				return floatTag, floatv
178 | 			}
179 | 
180 | 		case 'D', 'S':
181 | 			// Int, float, or timestamp.
182 | 			// Only try values as a timestamp if the value is unquoted or there's an explicit
183 | 			// !!timestamp tag.
184 | 			if tag == "" || tag == timestampTag {
185 | 				t, ok := parseTimestamp(in)
186 | 				if ok {
187 | 					return timestampTag, t
188 | 				}
189 | 			}
190 | 
191 | 			plain := strings.Replace(in, "_", "", -1)
192 | 			intv, err := strconv.ParseInt(plain, 0, 64)
193 | 			if err == nil {
194 | 				if intv == int64(int(intv)) {
195 | 					return intTag, int(intv)
196 | 				} else {
197 | 					return intTag, intv
198 | 				}
199 | 			}
200 | 			uintv, err := strconv.ParseUint(plain, 0, 64)
201 | 			if err == nil {
202 | 				return intTag, uintv
203 | 			}
204 | 			if yamlStyleFloat.MatchString(plain) {
205 | 				floatv, err := strconv.ParseFloat(plain, 64)
206 | 				if err == nil {
207 | 					return floatTag, floatv
208 | 				}
209 | 			}
210 | 			if strings.HasPrefix(plain, "0b") {
211 | 				intv, err := strconv.ParseInt(plain[2:], 2, 64)
212 | 				if err == nil {
213 | 					if intv == int64(int(intv)) {
214 | 						return intTag, int(intv)
215 | 					} else {
216 | 						return intTag, intv
217 | 					}
218 | 				}
219 | 				uintv, err := strconv.ParseUint(plain[2:], 2, 64)
220 | 				if err == nil {
221 | 					return intTag, uintv
222 | 				}
223 | 			} else if strings.HasPrefix(plain, "-0b") {
224 | 				intv, err := strconv.ParseInt("-"+plain[3:], 2, 64)
225 | 				if err == nil {
226 | 					if true || intv == int64(int(intv)) {
227 | 						return intTag, int(intv)
228 | 					} else {
229 | 						return intTag, intv
230 | 					}
231 | 				}
232 | 			}
233 | 			// Octals as introduced in version 1.2 of the spec.
234 | 			// Octals from the 1.1 spec, spelled as 0777, are still
235 | 			// decoded by default in v3 as well for compatibility.
236 | 			// May be dropped in v4 depending on how usage evolves.
237 | 			if strings.HasPrefix(plain, "0o") {
238 | 				intv, err := strconv.ParseInt(plain[2:], 8, 64)
239 | 				if err == nil {
240 | 					if intv == int64(int(intv)) {
241 | 						return intTag, int(intv)
242 | 					} else {
243 | 						return intTag, intv
244 | 					}
245 | 				}
246 | 				uintv, err := strconv.ParseUint(plain[2:], 8, 64)
247 | 				if err == nil {
248 | 					return intTag, uintv
249 | 				}
250 | 			} else if strings.HasPrefix(plain, "-0o") {
251 | 				intv, err := strconv.ParseInt("-"+plain[3:], 8, 64)
252 | 				if err == nil {
253 | 					if true || intv == int64(int(intv)) {
254 | 						return intTag, int(intv)
255 | 					} else {
256 | 						return intTag, intv
257 | 					}
258 | 				}
259 | 			}
260 | 		default:
261 | 			panic("internal error: missing handler for resolver table: " + string(rune(hint)) + " (with " + in + ")")
262 | 		}
263 | 	}
264 | 	return strTag, in
265 | }
266 | 
267 | // encodeBase64 encodes s as base64 that is broken up into multiple lines
268 | // as appropriate for the resulting length.
269 | func encodeBase64(s string) string {
270 | 	const lineLen = 70
271 | 	encLen := base64.StdEncoding.EncodedLen(len(s))
272 | 	lines := encLen/lineLen + 1
273 | 	buf := make([]byte, encLen*2+lines)
274 | 	in := buf[0:encLen]
275 | 	out := buf[encLen:]
276 | 	base64.StdEncoding.Encode(in, []byte(s))
277 | 	k := 0
278 | 	for i := 0; i < len(in); i += lineLen {
279 | 		j := i + lineLen
280 | 		if j > len(in) {
281 | 			j = len(in)
282 | 		}
283 | 		k += copy(out[k:], in[i:j])
284 | 		if lines > 1 {
285 | 			out[k] = '\n'
286 | 			k++
287 | 		}
288 | 	}
289 | 	return string(out[:k])
290 | }
291 | 
292 | // This is a subset of the formats allowed by the regular expression
293 | // defined at http://yaml.org/type/timestamp.html.
294 | var allowedTimestampFormats = []string{
295 | 	"2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields.
296 | 	"2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t".
297 | 	"2006-1-2 15:4:5.999999999",       // space separated with no time zone
298 | 	"2006-1-2",                        // date only
299 | 	// Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5"
300 | 	// from the set of examples.
301 | }
302 | 
303 | // parseTimestamp parses s as a timestamp string and
304 | // returns the timestamp and reports whether it succeeded.
305 | // Timestamp formats are defined at http://yaml.org/type/timestamp.html
306 | func parseTimestamp(s string) (time.Time, bool) {
307 | 	// TODO write code to check all the formats supported by
308 | 	// http://yaml.org/type/timestamp.html instead of using time.Parse.
309 | 
310 | 	// Quick check: all date formats start with YYYY-.
311 | 	i := 0
312 | 	for ; i < len(s); i++ {
313 | 		if c := s[i]; c < '0' || c > '9' {
314 | 			break
315 | 		}
316 | 	}
317 | 	if i != 4 || i == len(s) || s[i] != '-' {
318 | 		return time.Time{}, false
319 | 	}
320 | 	for _, format := range allowedTimestampFormats {
321 | 		if t, err := time.Parse(format, s); err == nil {
322 | 			return t, true
323 | 		}
324 | 	}
325 | 	return time.Time{}, false
326 | }
327 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/match.go:
--------------------------------------------------------------------------------
  1 | package regexp2
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | )
  7 | 
  8 | // Match is a single regex result match that contains groups and repeated captures
  9 | // 	-Groups
 10 | //    -Capture
 11 | type Match struct {
 12 | 	Group //embeded group 0
 13 | 
 14 | 	regex       *Regexp
 15 | 	otherGroups []Group
 16 | 
 17 | 	// input to the match
 18 | 	textpos   int
 19 | 	textstart int
 20 | 
 21 | 	capcount   int
 22 | 	caps       []int
 23 | 	sparseCaps map[int]int
 24 | 
 25 | 	// output from the match
 26 | 	matches    [][]int
 27 | 	matchcount []int
 28 | 
 29 | 	// whether we've done any balancing with this match.  If we
 30 | 	// have done balancing, we'll need to do extra work in Tidy().
 31 | 	balancing bool
 32 | }
 33 | 
 34 | // Group is an explicit or implit (group 0) matched group within the pattern
 35 | type Group struct {
 36 | 	Capture // the last capture of this group is embeded for ease of use
 37 | 
 38 | 	Name     string    // group name
 39 | 	Captures []Capture // captures of this group
 40 | }
 41 | 
 42 | // Capture is a single capture of text within the larger original string
 43 | type Capture struct {
 44 | 	// the original string
 45 | 	text []rune
 46 | 	// the position in the original string where the first character of
 47 | 	// captured substring was found.
 48 | 	Index int
 49 | 	// the length of the captured substring.
 50 | 	Length int
 51 | }
 52 | 
 53 | // String returns the captured text as a String
 54 | func (c *Capture) String() string {
 55 | 	return string(c.text[c.Index : c.Index+c.Length])
 56 | }
 57 | 
 58 | // Runes returns the captured text as a rune slice
 59 | func (c *Capture) Runes() []rune {
 60 | 	return c.text[c.Index : c.Index+c.Length]
 61 | }
 62 | 
 63 | func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
 64 | 	m := Match{
 65 | 		regex:      regex,
 66 | 		matchcount: make([]int, capcount),
 67 | 		matches:    make([][]int, capcount),
 68 | 		textstart:  startpos,
 69 | 		balancing:  false,
 70 | 	}
 71 | 	m.Name = "0"
 72 | 	m.text = text
 73 | 	m.matches[0] = make([]int, 2)
 74 | 	return &m
 75 | }
 76 | 
 77 | func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
 78 | 	m := newMatch(regex, capcount, text, startpos)
 79 | 	m.sparseCaps = caps
 80 | 	return m
 81 | }
 82 | 
 83 | func (m *Match) reset(text []rune, textstart int) {
 84 | 	m.text = text
 85 | 	m.textstart = textstart
 86 | 	for i := 0; i < len(m.matchcount); i++ {
 87 | 		m.matchcount[i] = 0
 88 | 	}
 89 | 	m.balancing = false
 90 | }
 91 | 
 92 | func (m *Match) tidy(textpos int) {
 93 | 
 94 | 	interval := m.matches[0]
 95 | 	m.Index = interval[0]
 96 | 	m.Length = interval[1]
 97 | 	m.textpos = textpos
 98 | 	m.capcount = m.matchcount[0]
 99 | 	//copy our root capture to the list
100 | 	m.Group.Captures = []Capture{m.Group.Capture}
101 | 
102 | 	if m.balancing {
103 | 		// The idea here is that we want to compact all of our unbalanced captures.  To do that we
104 | 		// use j basically as a count of how many unbalanced captures we have at any given time
105 | 		// (really j is an index, but j/2 is the count).  First we skip past all of the real captures
106 | 		// until we find a balance captures.  Then we check each subsequent entry.  If it's a balance
107 | 		// capture (it's negative), we decrement j.  If it's a real capture, we increment j and copy
108 | 		// it down to the last free position.
109 | 		for cap := 0; cap < len(m.matchcount); cap++ {
110 | 			limit := m.matchcount[cap] * 2
111 | 			matcharray := m.matches[cap]
112 | 
113 | 			var i, j int
114 | 
115 | 			for i = 0; i < limit; i++ {
116 | 				if matcharray[i] < 0 {
117 | 					break
118 | 				}
119 | 			}
120 | 
121 | 			for j = i; i < limit; i++ {
122 | 				if matcharray[i] < 0 {
123 | 					// skip negative values
124 | 					j--
125 | 				} else {
126 | 					// but if we find something positive (an actual capture), copy it back to the last
127 | 					// unbalanced position.
128 | 					if i != j {
129 | 						matcharray[j] = matcharray[i]
130 | 					}
131 | 					j++
132 | 				}
133 | 			}
134 | 
135 | 			m.matchcount[cap] = j / 2
136 | 		}
137 | 
138 | 		m.balancing = false
139 | 	}
140 | }
141 | 
142 | // isMatched tells if a group was matched by capnum
143 | func (m *Match) isMatched(cap int) bool {
144 | 	return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
145 | }
146 | 
147 | // matchIndex returns the index of the last specified matched group by capnum
148 | func (m *Match) matchIndex(cap int) int {
149 | 	i := m.matches[cap][m.matchcount[cap]*2-2]
150 | 	if i >= 0 {
151 | 		return i
152 | 	}
153 | 
154 | 	return m.matches[cap][-3-i]
155 | }
156 | 
157 | // matchLength returns the length of the last specified matched group by capnum
158 | func (m *Match) matchLength(cap int) int {
159 | 	i := m.matches[cap][m.matchcount[cap]*2-1]
160 | 	if i >= 0 {
161 | 		return i
162 | 	}
163 | 
164 | 	return m.matches[cap][-3-i]
165 | }
166 | 
167 | // Nonpublic builder: add a capture to the group specified by "c"
168 | func (m *Match) addMatch(c, start, l int) {
169 | 
170 | 	if m.matches[c] == nil {
171 | 		m.matches[c] = make([]int, 2)
172 | 	}
173 | 
174 | 	capcount := m.matchcount[c]
175 | 
176 | 	if capcount*2+2 > len(m.matches[c]) {
177 | 		oldmatches := m.matches[c]
178 | 		newmatches := make([]int, capcount*8)
179 | 		copy(newmatches, oldmatches[:capcount*2])
180 | 		m.matches[c] = newmatches
181 | 	}
182 | 
183 | 	m.matches[c][capcount*2] = start
184 | 	m.matches[c][capcount*2+1] = l
185 | 	m.matchcount[c] = capcount + 1
186 | 	//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
187 | }
188 | 
189 | // Nonpublic builder: Add a capture to balance the specified group.  This is used by the
190 | //                     balanced match construct. (?<foo-foo2>...)
191 | //
192 | // If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
193 | // However, since we have backtracking, we need to keep track of everything.
194 | func (m *Match) balanceMatch(c int) {
195 | 	m.balancing = true
196 | 
197 | 	// we'll look at the last capture first
198 | 	capcount := m.matchcount[c]
199 | 	target := capcount*2 - 2
200 | 
201 | 	// first see if it is negative, and therefore is a reference to the next available
202 | 	// capture group for balancing.  If it is, we'll reset target to point to that capture.
203 | 	if m.matches[c][target] < 0 {
204 | 		target = -3 - m.matches[c][target]
205 | 	}
206 | 
207 | 	// move back to the previous capture
208 | 	target -= 2
209 | 
210 | 	// if the previous capture is a reference, just copy that reference to the end.  Otherwise, point to it.
211 | 	if target >= 0 && m.matches[c][target] < 0 {
212 | 		m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
213 | 	} else {
214 | 		m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
215 | 	}
216 | }
217 | 
218 | // Nonpublic builder: removes a group match by capnum
219 | func (m *Match) removeMatch(c int) {
220 | 	m.matchcount[c]--
221 | }
222 | 
223 | // GroupCount returns the number of groups this match has matched
224 | func (m *Match) GroupCount() int {
225 | 	return len(m.matchcount)
226 | }
227 | 
228 | // GroupByName returns a group based on the name of the group, or nil if the group name does not exist
229 | func (m *Match) GroupByName(name string) *Group {
230 | 	num := m.regex.GroupNumberFromName(name)
231 | 	if num < 0 {
232 | 		return nil
233 | 	}
234 | 	return m.GroupByNumber(num)
235 | }
236 | 
237 | // GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
238 | func (m *Match) GroupByNumber(num int) *Group {
239 | 	// check our sparse map
240 | 	if m.sparseCaps != nil {
241 | 		if newNum, ok := m.sparseCaps[num]; ok {
242 | 			num = newNum
243 | 		}
244 | 	}
245 | 	if num >= len(m.matchcount) || num < 0 {
246 | 		return nil
247 | 	}
248 | 
249 | 	if num == 0 {
250 | 		return &m.Group
251 | 	}
252 | 
253 | 	m.populateOtherGroups()
254 | 
255 | 	return &m.otherGroups[num-1]
256 | }
257 | 
258 | // Groups returns all the capture groups, starting with group 0 (the full match)
259 | func (m *Match) Groups() []Group {
260 | 	m.populateOtherGroups()
261 | 	g := make([]Group, len(m.otherGroups)+1)
262 | 	g[0] = m.Group
263 | 	copy(g[1:], m.otherGroups)
264 | 	return g
265 | }
266 | 
267 | func (m *Match) populateOtherGroups() {
268 | 	// Construct all the Group objects first time called
269 | 	if m.otherGroups == nil {
270 | 		m.otherGroups = make([]Group, len(m.matchcount)-1)
271 | 		for i := 0; i < len(m.otherGroups); i++ {
272 | 			m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
273 | 		}
274 | 	}
275 | }
276 | 
277 | func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
278 | 	c := m.matchcount[groupnum]
279 | 	if c == 0 {
280 | 		return
281 | 	}
282 | 
283 | 	matches := m.matches[groupnum]
284 | 
285 | 	index := matches[(c-1)*2]
286 | 	last := index + matches[(c*2)-1]
287 | 
288 | 	for ; index < last; index++ {
289 | 		buf.WriteRune(m.text[index])
290 | 	}
291 | }
292 | 
293 | func newGroup(name string, text []rune, caps []int, capcount int) Group {
294 | 	g := Group{}
295 | 	g.text = text
296 | 	if capcount > 0 {
297 | 		g.Index = caps[(capcount-1)*2]
298 | 		g.Length = caps[(capcount*2)-1]
299 | 	}
300 | 	g.Name = name
301 | 	g.Captures = make([]Capture, capcount)
302 | 	for i := 0; i < capcount; i++ {
303 | 		g.Captures[i] = Capture{
304 | 			text:   text,
305 | 			Index:  caps[i*2],
306 | 			Length: caps[i*2+1],
307 | 		}
308 | 	}
309 | 	//log.Printf("newGroup! capcount %v, %+v", capcount, g)
310 | 
311 | 	return g
312 | }
313 | 
314 | func (m *Match) dump() string {
315 | 	buf := &bytes.Buffer{}
316 | 	buf.WriteRune('\n')
317 | 	if len(m.sparseCaps) > 0 {
318 | 		for k, v := range m.sparseCaps {
319 | 			fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
320 | 		}
321 | 	}
322 | 
323 | 	for i, g := range m.Groups() {
324 | 		fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
325 | 
326 | 		for _, c := range g.Captures {
327 | 			fmt.Fprintf(buf, "  (%v, %v) %v\n", c.Index, c.Length, c.String())
328 | 		}
329 | 	}
330 | 	/*
331 | 		for i := 0; i < len(m.matchcount); i++ {
332 | 			fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
333 | 
334 | 			for j := 0; j < m.matchcount[i]; j++ {
335 | 				text := ""
336 | 
337 | 				if m.matches[i][j*2] >= 0 {
338 | 					start := m.matches[i][j*2]
339 | 					text = m.text[start : start+m.matches[i][j*2+1]]
340 | 				}
341 | 
342 | 				fmt.Fprintf(buf, "  (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
343 | 			}
344 | 		}
345 | 	*/
346 | 	return buf.String()
347 | }
348 | 


--------------------------------------------------------------------------------
/vendor/github.com/dlclark/regexp2/regexp.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Package regexp2 is a regexp package that has an interface similar to Go's framework regexp engine but uses a
  3 | more feature full regex engine behind the scenes.
  4 | 
  5 | It doesn't have constant time guarantees, but it allows backtracking and is compatible with Perl5 and .NET.
  6 | You'll likely be better off with the RE2 engine from the regexp package and should only use this if you
  7 | need to write very complex patterns or require compatibility with .NET.
  8 | */
  9 | package regexp2
 10 | 
 11 | import (
 12 | 	"errors"
 13 | 	"math"
 14 | 	"strconv"
 15 | 	"sync"
 16 | 	"time"
 17 | 
 18 | 	"github.com/dlclark/regexp2/syntax"
 19 | )
 20 | 
 21 | // Default timeout used when running regexp matches -- "forever"
 22 | var DefaultMatchTimeout = time.Duration(math.MaxInt64)
 23 | 
 24 | // Regexp is the representation of a compiled regular expression.
 25 | // A Regexp is safe for concurrent use by multiple goroutines.
 26 | type Regexp struct {
 27 | 	//timeout when trying to find matches
 28 | 	MatchTimeout time.Duration
 29 | 
 30 | 	// read-only after Compile
 31 | 	pattern string       // as passed to Compile
 32 | 	options RegexOptions // options
 33 | 
 34 | 	caps     map[int]int    // capnum->index
 35 | 	capnames map[string]int //capture group name -> index
 36 | 	capslist []string       //sorted list of capture group names
 37 | 	capsize  int            // size of the capture array
 38 | 
 39 | 	code *syntax.Code // compiled program
 40 | 
 41 | 	// cache of machines for running regexp
 42 | 	muRun  sync.Mutex
 43 | 	runner []*runner
 44 | }
 45 | 
 46 | // Compile parses a regular expression and returns, if successful,
 47 | // a Regexp object that can be used to match against text.
 48 | func Compile(expr string, opt RegexOptions) (*Regexp, error) {
 49 | 	// parse it
 50 | 	tree, err := syntax.Parse(expr, syntax.RegexOptions(opt))
 51 | 	if err != nil {
 52 | 		return nil, err
 53 | 	}
 54 | 
 55 | 	// translate it to code
 56 | 	code, err := syntax.Write(tree)
 57 | 	if err != nil {
 58 | 		return nil, err
 59 | 	}
 60 | 
 61 | 	// return it
 62 | 	return &Regexp{
 63 | 		pattern:      expr,
 64 | 		options:      opt,
 65 | 		caps:         code.Caps,
 66 | 		capnames:     tree.Capnames,
 67 | 		capslist:     tree.Caplist,
 68 | 		capsize:      code.Capsize,
 69 | 		code:         code,
 70 | 		MatchTimeout: DefaultMatchTimeout,
 71 | 	}, nil
 72 | }
 73 | 
 74 | // MustCompile is like Compile but panics if the expression cannot be parsed.
 75 | // It simplifies safe initialization of global variables holding compiled regular
 76 | // expressions.
 77 | func MustCompile(str string, opt RegexOptions) *Regexp {
 78 | 	regexp, error := Compile(str, opt)
 79 | 	if error != nil {
 80 | 		panic(`regexp2: Compile(` + quote(str) + `): ` + error.Error())
 81 | 	}
 82 | 	return regexp
 83 | }
 84 | 
 85 | // Escape adds backslashes to any special characters in the input string
 86 | func Escape(input string) string {
 87 | 	return syntax.Escape(input)
 88 | }
 89 | 
 90 | // Unescape removes any backslashes from previously-escaped special characters in the input string
 91 | func Unescape(input string) (string, error) {
 92 | 	return syntax.Unescape(input)
 93 | }
 94 | 
 95 | // String returns the source text used to compile the regular expression.
 96 | func (re *Regexp) String() string {
 97 | 	return re.pattern
 98 | }
 99 | 
100 | func quote(s string) string {
101 | 	if strconv.CanBackquote(s) {
102 | 		return "`" + s + "`"
103 | 	}
104 | 	return strconv.Quote(s)
105 | }
106 | 
107 | // RegexOptions impact the runtime and parsing behavior
108 | // for each specific regex.  They are setable in code as well
109 | // as in the regex pattern itself.
110 | type RegexOptions int32
111 | 
112 | const (
113 | 	None                    RegexOptions = 0x0
114 | 	IgnoreCase                           = 0x0001 // "i"
115 | 	Multiline                            = 0x0002 // "m"
116 | 	ExplicitCapture                      = 0x0004 // "n"
117 | 	Compiled                             = 0x0008 // "c"
118 | 	Singleline                           = 0x0010 // "s"
119 | 	IgnorePatternWhitespace              = 0x0020 // "x"
120 | 	RightToLeft                          = 0x0040 // "r"
121 | 	Debug                                = 0x0080 // "d"
122 | 	ECMAScript                           = 0x0100 // "e"
123 | 	RE2                                  = 0x0200 // RE2 (regexp package) compatibility mode
124 | )
125 | 
126 | func (re *Regexp) RightToLeft() bool {
127 | 	return re.options&RightToLeft != 0
128 | }
129 | 
130 | func (re *Regexp) Debug() bool {
131 | 	return re.options&Debug != 0
132 | }
133 | 
134 | // Replace searches the input string and replaces each match found with the replacement text.
135 | // Count will limit the number of matches attempted and startAt will allow
136 | // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
137 | // Set startAt and count to -1 to go through the whole string
138 | func (re *Regexp) Replace(input, replacement string, startAt, count int) (string, error) {
139 | 	data, err := syntax.NewReplacerData(replacement, re.caps, re.capsize, re.capnames, syntax.RegexOptions(re.options))
140 | 	if err != nil {
141 | 		return "", err
142 | 	}
143 | 	//TODO: cache ReplacerData
144 | 
145 | 	return replace(re, data, nil, input, startAt, count)
146 | }
147 | 
148 | // ReplaceFunc searches the input string and replaces each match found using the string from the evaluator
149 | // Count will limit the number of matches attempted and startAt will allow
150 | // us to skip past possible matches at the start of the input (left or right depending on RightToLeft option).
151 | // Set startAt and count to -1 to go through the whole string.
152 | func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, count int) (string, error) {
153 | 	return replace(re, nil, evaluator, input, startAt, count)
154 | }
155 | 
156 | // FindStringMatch searches the input string for a Regexp match
157 | func (re *Regexp) FindStringMatch(s string) (*Match, error) {
158 | 	// convert string to runes
159 | 	return re.run(false, -1, getRunes(s))
160 | }
161 | 
162 | // FindRunesMatch searches the input rune slice for a Regexp match
163 | func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
164 | 	return re.run(false, -1, r)
165 | }
166 | 
167 | // FindStringMatchStartingAt searches the input string for a Regexp match starting at the startAt index
168 | func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, error) {
169 | 	if startAt > len(s) {
170 | 		return nil, errors.New("startAt must be less than the length of the input string")
171 | 	}
172 | 	r, startAt := re.getRunesAndStart(s, startAt)
173 | 	if startAt == -1 {
174 | 		// we didn't find our start index in the string -- that's a problem
175 | 		return nil, errors.New("startAt must align to the start of a valid rune in the input string")
176 | 	}
177 | 
178 | 	return re.run(false, startAt, r)
179 | }
180 | 
181 | // FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
182 | func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
183 | 	return re.run(false, startAt, r)
184 | }
185 | 
186 | // FindNextMatch returns the next match in the same input string as the match parameter.
187 | // Will return nil if there is no next match or if given a nil match.
188 | func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
189 | 	if m == nil {
190 | 		return nil, nil
191 | 	}
192 | 
193 | 	// If previous match was empty, advance by one before matching to prevent
194 | 	// infinite loop
195 | 	startAt := m.textpos
196 | 	if m.Length == 0 {
197 | 		if m.textpos == len(m.text) {
198 | 			return nil, nil
199 | 		}
200 | 
201 | 		if re.RightToLeft() {
202 | 			startAt--
203 | 		} else {
204 | 			startAt++
205 | 		}
206 | 	}
207 | 	return re.run(false, startAt, m.text)
208 | }
209 | 
210 | // MatchString return true if the string matches the regex
211 | // error will be set if a timeout occurs
212 | func (re *Regexp) MatchString(s string) (bool, error) {
213 | 	m, err := re.run(true, -1, getRunes(s))
214 | 	if err != nil {
215 | 		return false, err
216 | 	}
217 | 	return m != nil, nil
218 | }
219 | 
220 | func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
221 | 	if startAt < 0 {
222 | 		if re.RightToLeft() {
223 | 			r := getRunes(s)
224 | 			return r, len(r)
225 | 		}
226 | 		return getRunes(s), 0
227 | 	}
228 | 	ret := make([]rune, len(s))
229 | 	i := 0
230 | 	runeIdx := -1
231 | 	for strIdx, r := range s {
232 | 		if strIdx == startAt {
233 | 			runeIdx = i
234 | 		}
235 | 		ret[i] = r
236 | 		i++
237 | 	}
238 | 	if startAt == len(s) {
239 | 		runeIdx = i
240 | 	}
241 | 	return ret[:i], runeIdx
242 | }
243 | 
244 | func getRunes(s string) []rune {
245 | 	return []rune(s)
246 | }
247 | 
248 | // MatchRunes return true if the runes matches the regex
249 | // error will be set if a timeout occurs
250 | func (re *Regexp) MatchRunes(r []rune) (bool, error) {
251 | 	m, err := re.run(true, -1, r)
252 | 	if err != nil {
253 | 		return false, err
254 | 	}
255 | 	return m != nil, nil
256 | }
257 | 
258 | // GetGroupNames Returns the set of strings used to name capturing groups in the expression.
259 | func (re *Regexp) GetGroupNames() []string {
260 | 	var result []string
261 | 
262 | 	if re.capslist == nil {
263 | 		result = make([]string, re.capsize)
264 | 
265 | 		for i := 0; i < len(result); i++ {
266 | 			result[i] = strconv.Itoa(i)
267 | 		}
268 | 	} else {
269 | 		result = make([]string, len(re.capslist))
270 | 		copy(result, re.capslist)
271 | 	}
272 | 
273 | 	return result
274 | }
275 | 
276 | // GetGroupNumbers returns the integer group numbers corresponding to a group name.
277 | func (re *Regexp) GetGroupNumbers() []int {
278 | 	var result []int
279 | 
280 | 	if re.caps == nil {
281 | 		result = make([]int, re.capsize)
282 | 
283 | 		for i := 0; i < len(result); i++ {
284 | 			result[i] = i
285 | 		}
286 | 	} else {
287 | 		result = make([]int, len(re.caps))
288 | 
289 | 		for k, v := range re.caps {
290 | 			result[v] = k
291 | 		}
292 | 	}
293 | 
294 | 	return result
295 | }
296 | 
297 | // GroupNameFromNumber retrieves a group name that corresponds to a group number.
298 | // It will return "" for and unknown group number.  Unnamed groups automatically
299 | // receive a name that is the decimal string equivalent of its number.
300 | func (re *Regexp) GroupNameFromNumber(i int) string {
301 | 	if re.capslist == nil {
302 | 		if i >= 0 && i < re.capsize {
303 | 			return strconv.Itoa(i)
304 | 		}
305 | 
306 | 		return ""
307 | 	}
308 | 
309 | 	if re.caps != nil {
310 | 		var ok bool
311 | 		if i, ok = re.caps[i]; !ok {
312 | 			return ""
313 | 		}
314 | 	}
315 | 
316 | 	if i >= 0 && i < len(re.capslist) {
317 | 		return re.capslist[i]
318 | 	}
319 | 
320 | 	return ""
321 | }
322 | 
323 | // GroupNumberFromName returns a group number that corresponds to a group name.
324 | // Returns -1 if the name is not a recognized group name.  Numbered groups
325 | // automatically get a group name that is the decimal string equivalent of its number.
326 | func (re *Regexp) GroupNumberFromName(name string) int {
327 | 	// look up name if we have a hashtable of names
328 | 	if re.capnames != nil {
329 | 		if k, ok := re.capnames[name]; ok {
330 | 			return k
331 | 		}
332 | 
333 | 		return -1
334 | 	}
335 | 
336 | 	// convert to an int if it looks like a number
337 | 	result := 0
338 | 	for i := 0; i < len(name); i++ {
339 | 		ch := name[i]
340 | 
341 | 		if ch > '9' || ch < '0' {
342 | 			return -1
343 | 		}
344 | 
345 | 		result *= 10
346 | 		result += int(ch - '0')
347 | 	}
348 | 
349 | 	// return int if it's in range
350 | 	if result >= 0 && result < re.capsize {
351 | 		return result
352 | 	}
353 | 
354 | 	return -1
355 | }
356 | 


--------------------------------------------------------------------------------
/vendor/github.com/davecgh/go-spew/spew/common.go:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013-2016 Dave Collins <dave@davec.name>
  3 |  *
  4 |  * Permission to use, copy, modify, and distribute this software for any
  5 |  * purpose with or without fee is hereby granted, provided that the above
  6 |  * copyright notice and this permission notice appear in all copies.
  7 |  *
  8 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11 |  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13 |  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14 |  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15 |  */
 16 | 
 17 | package spew
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"fmt"
 22 | 	"io"
 23 | 	"reflect"
 24 | 	"sort"
 25 | 	"strconv"
 26 | )
 27 | 
 28 | // Some constants in the form of bytes to avoid string overhead.  This mirrors
 29 | // the technique used in the fmt package.
 30 | var (
 31 | 	panicBytes            = []byte("(PANIC=")
 32 | 	plusBytes             = []byte("+")
 33 | 	iBytes                = []byte("i")
 34 | 	trueBytes             = []byte("true")
 35 | 	falseBytes            = []byte("false")
 36 | 	interfaceBytes        = []byte("(interface {})")
 37 | 	commaNewlineBytes     = []byte(",\n")
 38 | 	newlineBytes          = []byte("\n")
 39 | 	openBraceBytes        = []byte("{")
 40 | 	openBraceNewlineBytes = []byte("{\n")
 41 | 	closeBraceBytes       = []byte("}")
 42 | 	asteriskBytes         = []byte("*")
 43 | 	colonBytes            = []byte(":")
 44 | 	colonSpaceBytes       = []byte(": ")
 45 | 	openParenBytes        = []byte("(")
 46 | 	closeParenBytes       = []byte(")")
 47 | 	spaceBytes            = []byte(" ")
 48 | 	pointerChainBytes     = []byte("->")
 49 | 	nilAngleBytes         = []byte("<nil>")
 50 | 	maxNewlineBytes       = []byte("<max depth reached>\n")
 51 | 	maxShortBytes         = []byte("<max>")
 52 | 	circularBytes         = []byte("<already shown>")
 53 | 	circularShortBytes    = []byte("<shown>")
 54 | 	invalidAngleBytes     = []byte("<invalid>")
 55 | 	openBracketBytes      = []byte("[")
 56 | 	closeBracketBytes     = []byte("]")
 57 | 	percentBytes          = []byte("%")
 58 | 	precisionBytes        = []byte(".")
 59 | 	openAngleBytes        = []byte("<")
 60 | 	closeAngleBytes       = []byte(">")
 61 | 	openMapBytes          = []byte("map[")
 62 | 	closeMapBytes         = []byte("]")
 63 | 	lenEqualsBytes        = []byte("len=")
 64 | 	capEqualsBytes        = []byte("cap=")
 65 | )
 66 | 
 67 | // hexDigits is used to map a decimal value to a hex digit.
 68 | var hexDigits = "0123456789abcdef"
 69 | 
 70 | // catchPanic handles any panics that might occur during the handleMethods
 71 | // calls.
 72 | func catchPanic(w io.Writer, v reflect.Value) {
 73 | 	if err := recover(); err != nil {
 74 | 		w.Write(panicBytes)
 75 | 		fmt.Fprintf(w, "%v", err)
 76 | 		w.Write(closeParenBytes)
 77 | 	}
 78 | }
 79 | 
 80 | // handleMethods attempts to call the Error and String methods on the underlying
 81 | // type the passed reflect.Value represents and outputes the result to Writer w.
 82 | //
 83 | // It handles panics in any called methods by catching and displaying the error
 84 | // as the formatted value.
 85 | func handleMethods(cs *ConfigState, w io.Writer, v reflect.Value) (handled bool) {
 86 | 	// We need an interface to check if the type implements the error or
 87 | 	// Stringer interface.  However, the reflect package won't give us an
 88 | 	// interface on certain things like unexported struct fields in order
 89 | 	// to enforce visibility rules.  We use unsafe, when it's available,
 90 | 	// to bypass these restrictions since this package does not mutate the
 91 | 	// values.
 92 | 	if !v.CanInterface() {
 93 | 		if UnsafeDisabled {
 94 | 			return false
 95 | 		}
 96 | 
 97 | 		v = unsafeReflectValue(v)
 98 | 	}
 99 | 
100 | 	// Choose whether or not to do error and Stringer interface lookups against
101 | 	// the base type or a pointer to the base type depending on settings.
102 | 	// Technically calling one of these methods with a pointer receiver can
103 | 	// mutate the value, however, types which choose to satisify an error or
104 | 	// Stringer interface with a pointer receiver should not be mutating their
105 | 	// state inside these interface methods.
106 | 	if !cs.DisablePointerMethods && !UnsafeDisabled && !v.CanAddr() {
107 | 		v = unsafeReflectValue(v)
108 | 	}
109 | 	if v.CanAddr() {
110 | 		v = v.Addr()
111 | 	}
112 | 
113 | 	// Is it an error or Stringer?
114 | 	switch iface := v.Interface().(type) {
115 | 	case error:
116 | 		defer catchPanic(w, v)
117 | 		if cs.ContinueOnMethod {
118 | 			w.Write(openParenBytes)
119 | 			w.Write([]byte(iface.Error()))
120 | 			w.Write(closeParenBytes)
121 | 			w.Write(spaceBytes)
122 | 			return false
123 | 		}
124 | 
125 | 		w.Write([]byte(iface.Error()))
126 | 		return true
127 | 
128 | 	case fmt.Stringer:
129 | 		defer catchPanic(w, v)
130 | 		if cs.ContinueOnMethod {
131 | 			w.Write(openParenBytes)
132 | 			w.Write([]byte(iface.String()))
133 | 			w.Write(closeParenBytes)
134 | 			w.Write(spaceBytes)
135 | 			return false
136 | 		}
137 | 		w.Write([]byte(iface.String()))
138 | 		return true
139 | 	}
140 | 	return false
141 | }
142 | 
143 | // printBool outputs a boolean value as true or false to Writer w.
144 | func printBool(w io.Writer, val bool) {
145 | 	if val {
146 | 		w.Write(trueBytes)
147 | 	} else {
148 | 		w.Write(falseBytes)
149 | 	}
150 | }
151 | 
152 | // printInt outputs a signed integer value to Writer w.
153 | func printInt(w io.Writer, val int64, base int) {
154 | 	w.Write([]byte(strconv.FormatInt(val, base)))
155 | }
156 | 
157 | // printUint outputs an unsigned integer value to Writer w.
158 | func printUint(w io.Writer, val uint64, base int) {
159 | 	w.Write([]byte(strconv.FormatUint(val, base)))
160 | }
161 | 
162 | // printFloat outputs a floating point value using the specified precision,
163 | // which is expected to be 32 or 64bit, to Writer w.
164 | func printFloat(w io.Writer, val float64, precision int) {
165 | 	w.Write([]byte(strconv.FormatFloat(val, 'g', -1, precision)))
166 | }
167 | 
168 | // printComplex outputs a complex value using the specified float precision
169 | // for the real and imaginary parts to Writer w.
170 | func printComplex(w io.Writer, c complex128, floatPrecision int) {
171 | 	r := real(c)
172 | 	w.Write(openParenBytes)
173 | 	w.Write([]byte(strconv.FormatFloat(r, 'g', -1, floatPrecision)))
174 | 	i := imag(c)
175 | 	if i >= 0 {
176 | 		w.Write(plusBytes)
177 | 	}
178 | 	w.Write([]byte(strconv.FormatFloat(i, 'g', -1, floatPrecision)))
179 | 	w.Write(iBytes)
180 | 	w.Write(closeParenBytes)
181 | }
182 | 
183 | // printHexPtr outputs a uintptr formatted as hexidecimal with a leading '0x'
184 | // prefix to Writer w.
185 | func printHexPtr(w io.Writer, p uintptr) {
186 | 	// Null pointer.
187 | 	num := uint64(p)
188 | 	if num == 0 {
189 | 		w.Write(nilAngleBytes)
190 | 		return
191 | 	}
192 | 
193 | 	// Max uint64 is 16 bytes in hex + 2 bytes for '0x' prefix
194 | 	buf := make([]byte, 18)
195 | 
196 | 	// It's simpler to construct the hex string right to left.
197 | 	base := uint64(16)
198 | 	i := len(buf) - 1
199 | 	for num >= base {
200 | 		buf[i] = hexDigits[num%base]
201 | 		num /= base
202 | 		i--
203 | 	}
204 | 	buf[i] = hexDigits[num]
205 | 
206 | 	// Add '0x' prefix.
207 | 	i--
208 | 	buf[i] = 'x'
209 | 	i--
210 | 	buf[i] = '0'
211 | 
212 | 	// Strip unused leading bytes.
213 | 	buf = buf[i:]
214 | 	w.Write(buf)
215 | }
216 | 
217 | // valuesSorter implements sort.Interface to allow a slice of reflect.Value
218 | // elements to be sorted.
219 | type valuesSorter struct {
220 | 	values  []reflect.Value
221 | 	strings []string // either nil or same len and values
222 | 	cs      *ConfigState
223 | }
224 | 
225 | // newValuesSorter initializes a valuesSorter instance, which holds a set of
226 | // surrogate keys on which the data should be sorted.  It uses flags in
227 | // ConfigState to decide if and how to populate those surrogate keys.
228 | func newValuesSorter(values []reflect.Value, cs *ConfigState) sort.Interface {
229 | 	vs := &valuesSorter{values: values, cs: cs}
230 | 	if canSortSimply(vs.values[0].Kind()) {
231 | 		return vs
232 | 	}
233 | 	if !cs.DisableMethods {
234 | 		vs.strings = make([]string, len(values))
235 | 		for i := range vs.values {
236 | 			b := bytes.Buffer{}
237 | 			if !handleMethods(cs, &b, vs.values[i]) {
238 | 				vs.strings = nil
239 | 				break
240 | 			}
241 | 			vs.strings[i] = b.String()
242 | 		}
243 | 	}
244 | 	if vs.strings == nil && cs.SpewKeys {
245 | 		vs.strings = make([]string, len(values))
246 | 		for i := range vs.values {
247 | 			vs.strings[i] = Sprintf("%#v", vs.values[i].Interface())
248 | 		}
249 | 	}
250 | 	return vs
251 | }
252 | 
253 | // canSortSimply tests whether a reflect.Kind is a primitive that can be sorted
254 | // directly, or whether it should be considered for sorting by surrogate keys
255 | // (if the ConfigState allows it).
256 | func canSortSimply(kind reflect.Kind) bool {
257 | 	// This switch parallels valueSortLess, except for the default case.
258 | 	switch kind {
259 | 	case reflect.Bool:
260 | 		return true
261 | 	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
262 | 		return true
263 | 	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint:
264 | 		return true
265 | 	case reflect.Float32, reflect.Float64:
266 | 		return true
267 | 	case reflect.String:
268 | 		return true
269 | 	case reflect.Uintptr:
270 | 		return true
271 | 	case reflect.Array:
272 | 		return true
273 | 	}
274 | 	return false
275 | }
276 | 
277 | // Len returns the number of values in the slice.  It is part of the
278 | // sort.Interface implementation.
279 | func (s *valuesSorter) Len() int {
280 | 	return len(s.values)
281 | }
282 | 
283 | // Swap swaps the values at the passed indices.  It is part of the
284 | // sort.Interface implementation.
285 | func (s *valuesSorter) Swap(i, j int) {
286 | 	s.values[i], s.values[j] = s.values[j], s.values[i]
287 | 	if s.strings != nil {
288 | 		s.strings[i], s.strings[j] = s.strings[j], s.strings[i]
289 | 	}
290 | }
291 | 
292 | // valueSortLess returns whether the first value should sort before the second
293 | // value.  It is used by valueSorter.Less as part of the sort.Interface
294 | // implementation.
295 | func valueSortLess(a, b reflect.Value) bool {
296 | 	switch a.Kind() {
297 | 	case reflect.Bool:
298 | 		return !a.Bool() && b.Bool()
299 | 	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
300 | 		return a.Int() < b.Int()
301 | 	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint:
302 | 		return a.Uint() < b.Uint()
303 | 	case reflect.Float32, reflect.Float64:
304 | 		return a.Float() < b.Float()
305 | 	case reflect.String:
306 | 		return a.String() < b.String()
307 | 	case reflect.Uintptr:
308 | 		return a.Uint() < b.Uint()
309 | 	case reflect.Array:
310 | 		// Compare the contents of both arrays.
311 | 		l := a.Len()
312 | 		for i := 0; i < l; i++ {
313 | 			av := a.Index(i)
314 | 			bv := b.Index(i)
315 | 			if av.Interface() == bv.Interface() {
316 | 				continue
317 | 			}
318 | 			return valueSortLess(av, bv)
319 | 		}
320 | 	}
321 | 	return a.String() < b.String()
322 | }
323 | 
324 | // Less returns whether the value at index i should sort before the
325 | // value at index j.  It is part of the sort.Interface implementation.
326 | func (s *valuesSorter) Less(i, j int) bool {
327 | 	if s.strings == nil {
328 | 		return valueSortLess(s.values[i], s.values[j])
329 | 	}
330 | 	return s.strings[i] < s.strings[j]
331 | }
332 | 
333 | // sortValues is a sort function that handles both native types and any type that
334 | // can be converted to error or Stringer.  Other inputs are sorted according to
335 | // their Value.String() value to ensure display stability.
336 | func sortValues(values []reflect.Value, cs *ConfigState) {
337 | 	if len(values) == 0 {
338 | 		return
339 | 	}
340 | 	sort.Sort(newValuesSorter(values, cs))
341 | }
342 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------