├── test ├── testdata │ └── .gitkeep ├── download_text8.sh └── e2e.go ├── .dockerignore ├── .github ├── pull_request_template.md ├── ISSUE_TEMPLATE │ └── issue.md └── workflows │ └── go.yml ├── .gitignore ├── Dockerfile ├── go.mod ├── pkg ├── embedding │ ├── embutil │ │ ├── embutil.go │ │ └── embutil_test.go │ ├── embedding_test.go │ └── embedding.go ├── util │ ├── verbose │ │ └── verbose.go │ └── clock │ │ └── clock.go ├── search │ ├── searchutil │ │ ├── searchutil.go │ │ └── searchutil_test.go │ ├── console │ │ ├── op.go │ │ └── console.go │ ├── search.go │ └── search_test.go ├── model │ ├── word2vec │ │ ├── sigmoid_table_test.go │ │ ├── sigmoid_table.go │ │ ├── optimizer.go │ │ ├── model.go │ │ ├── word2vec.go │ │ └── options.go │ ├── model.go │ ├── modelutil │ │ ├── matrix │ │ │ └── matrix.go │ │ ├── modelutil.go │ │ ├── subsample │ │ │ └── subsample.go │ │ └── vector │ │ │ └── vector.go │ ├── glove │ │ ├── item.go │ │ ├── solver.go │ │ ├── glove.go │ │ └── options.go │ └── lexvec │ │ ├── item.go │ │ ├── options.go │ │ └── lexvec.go └── corpus │ ├── cooccurrence │ ├── cooccurrence_test.go │ ├── encode │ │ └── encode.go │ └── cooccurrence.go │ ├── corpus.go │ ├── dictionary │ ├── node │ │ └── node.go │ ├── huffman.go │ └── dictionary.go │ ├── options.go │ ├── cpsutil │ ├── cpsutil_test.go │ └── cpsutil.go │ ├── filter │ └── options.go │ ├── memory │ └── memory.go │ └── fs │ └── fs.go ├── cmd ├── query │ ├── cmdutil │ │ └── cmdutil.go │ ├── console │ │ └── console.go │ └── query.go └── model │ ├── cmdutil │ └── cmdutil.go │ ├── glove │ └── glove.go │ ├── word2vec │ └── word2vec.go │ └── lexvec │ └── lexvec.go ├── examples ├── query │ └── main.go └── word2vec │ └── main.go ├── wego.go ├── CODE_OF_CONDUCT.md ├── README.md ├── LICENSE └── go.sum /test/testdata/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Overview 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Issue 3 | about: Report an issue 4 | --- 5 | 6 | ### Overview 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | vendor/ 3 | *.txt 4 | 5 | text8 6 | text8.zip 7 | 8 | wego 9 | cpu.prof 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.14.3-alpine3.11 AS builder 2 | 3 | ENV CGO_ENABLED=0 4 | ENV GOOS=linux 5 | ENV GOARCH=amd64 6 | 7 | WORKDIR /go/src/github.com/ynqa/wego 8 | COPY . . 9 | RUN go build -v -o wego . 10 | 11 | FROM busybox 12 | COPY --from=builder /go/src/github.com/ynqa/wego/wego /usr/local/bin/wego 13 | 14 | ENTRYPOINT ["wego"] 15 | CMD ["help"] 16 | -------------------------------------------------------------------------------- /test/download_text8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | WORKSPACE="$(cd $(dirname $0); pwd)/testdata/" 4 | 5 | if [ ! -e "${WORKSPACE}/text8" ]; then 6 | echo "Download text8 corpus" 7 | if hash wget 2>/dev/null; then 8 | wget -q --show-progress -P "${WORKSPACE}" http://mattmahoney.net/dc/text8.zip 9 | else 10 | curl --progress-bar -o "${WORKSPACE}/text8.zip" -O http://mattmahoney.net/dc/text8.zip 11 | fi 12 | 13 | echo "Unzip text8.zip" 14 | unzip "${WORKSPACE}/text8.zip" -d ${WORKSPACE} 15 | rm "${WORKSPACE}/text8.zip" 16 | else 17 | echo "Text8 has been already downloaded" 18 | fi 19 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a golang project 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go 3 | 4 | name: Go 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | jobs: 13 | 14 | build: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - name: Set up Go 20 | uses: actions/setup-go@v3 21 | with: 22 | go-version: 1.19 23 | 24 | - name: Build 25 | run: go build -v ./... 26 | 27 | - name: Test 28 | run: go test -v ./... 29 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ynqa/wego 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/olekukonko/tablewriter v0.0.4 7 | github.com/peterh/liner v1.2.0 8 | github.com/pkg/errors v0.9.1 9 | github.com/spf13/cobra v1.1.1 10 | github.com/stretchr/testify v1.6.1 11 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 12 | ) 13 | 14 | require ( 15 | github.com/davecgh/go-spew v1.1.1 // indirect 16 | github.com/inconshreveable/mousetrap v1.0.0 // indirect 17 | github.com/mattn/go-runewidth v0.0.7 // indirect 18 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect 19 | github.com/pmezard/go-difflib v1.0.0 // indirect 20 | github.com/spf13/pflag v1.0.5 // indirect 21 | gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b // indirect 22 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect 23 | ) 24 | -------------------------------------------------------------------------------- /pkg/embedding/embutil/embutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package embutil 16 | 17 | import ( 18 | "math" 19 | ) 20 | 21 | func Norm(vec []float64) float64 { 22 | var n float64 23 | for _, v := range vec { 24 | n += v * v 25 | } 26 | return math.Sqrt(n) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/util/verbose/verbose.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package verbose 16 | 17 | type Verbose struct { 18 | flag bool 19 | } 20 | 21 | func New(flag bool) *Verbose { 22 | return &Verbose{ 23 | flag: flag, 24 | } 25 | } 26 | 27 | func (v *Verbose) Do(fn func()) { 28 | if v.flag { 29 | fn() 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /pkg/search/searchutil/searchutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package searchutil 16 | 17 | func Cosine(v1, v2 []float64, n1, n2 float64) float64 { 18 | if n1 == 0 || n2 == 0 { 19 | return 0 20 | } 21 | var dot float64 22 | for i := range v1 { 23 | dot += v1[i] * v2[i] 24 | } 25 | return dot / n1 / n2 26 | } 27 | -------------------------------------------------------------------------------- /pkg/model/word2vec/sigmoid_table_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "testing" 19 | ) 20 | 21 | func TestSigmoid(t *testing.T) { 22 | table := newSigmoidTable() 23 | f := table.sigmoid(3) 24 | if !(f >= 0 || f <= 1) { 25 | t.Errorf("Expected range is 0 < sigmoid(x) < 1, but got %v", f) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /pkg/model/model.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package model 16 | 17 | import ( 18 | "io" 19 | 20 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 21 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 22 | ) 23 | 24 | type Model interface { 25 | Train(io.ReadSeeker) error 26 | Save(io.Writer, vector.Type) error 27 | WordVector(vector.Type) *matrix.Matrix 28 | } 29 | -------------------------------------------------------------------------------- /pkg/util/clock/clock.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package clock 16 | 17 | import ( 18 | "time" 19 | ) 20 | 21 | type Clock struct { 22 | start, last time.Time 23 | } 24 | 25 | func New() *Clock { 26 | n := time.Now() 27 | return &Clock{ 28 | start: n, 29 | last: n, 30 | } 31 | } 32 | 33 | func (c *Clock) AllElapsed() time.Duration { 34 | return time.Now().Sub(c.start) 35 | } 36 | -------------------------------------------------------------------------------- /pkg/corpus/cooccurrence/cooccurrence_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package co 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/assert" 21 | ) 22 | 23 | func TestCooccurrence(t *testing.T) { 24 | pw, err := New(Increment) 25 | assert.NoError(t, err) 26 | assert.NoError(t, pw.Add(1, 2)) 27 | assert.Equal(t, 1, len(pw.EncodedMatrix())) 28 | } 29 | 30 | func TestCooccurrenceWithInvalidCountType(t *testing.T) { 31 | _, err := New(CountType("invalid type")) 32 | assert.Error(t, err) 33 | } 34 | -------------------------------------------------------------------------------- /cmd/query/cmdutil/cmdutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmdutil 16 | 17 | import ( 18 | "github.com/spf13/cobra" 19 | ) 20 | 21 | const ( 22 | defaultInputFile = "example/word_vectors.txt" 23 | defaultRank = 10 24 | ) 25 | 26 | func AddInputFlags(cmd *cobra.Command, input *string) { 27 | cmd.Flags().StringVarP(input, "input", "i", defaultInputFile, "input file path for trained word vector") 28 | } 29 | 30 | func AddRankFlags(cmd *cobra.Command, rank *int) { 31 | cmd.Flags().IntVarP(rank, "rank", "r", defaultRank, "how many similar words will be displayed") 32 | } 33 | -------------------------------------------------------------------------------- /pkg/embedding/embutil/embutil_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package embutil 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/assert" 21 | ) 22 | 23 | func TestNorm(t *testing.T) { 24 | testCases := []struct { 25 | name string 26 | vec []float64 27 | expect float64 28 | }{ 29 | { 30 | name: "norm", 31 | vec: []float64{1, 1, 1, 1, 0, 0}, 32 | expect: 2., 33 | }, 34 | } 35 | 36 | for _, tc := range testCases { 37 | t.Run(tc.name, func(t *testing.T) { 38 | assert.Equal(t, tc.expect, Norm(tc.vec)) 39 | }) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pkg/corpus/corpus.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package corpus 16 | 17 | import ( 18 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 19 | "github.com/ynqa/wego/pkg/corpus/dictionary" 20 | "github.com/ynqa/wego/pkg/util/verbose" 21 | ) 22 | 23 | type Corpus interface { 24 | IndexedDoc() []int 25 | BatchWords(chan []int, int) error 26 | Dictionary() *dictionary.Dictionary 27 | Cooccurrence() *co.Cooccurrence 28 | Len() int 29 | Load(*WithCooccurrence, *verbose.Verbose, int) error 30 | } 31 | 32 | type WithCooccurrence struct { 33 | CountType co.CountType 34 | Window int 35 | } 36 | -------------------------------------------------------------------------------- /pkg/corpus/cooccurrence/encode/encode.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package encode 16 | 17 | // data structure for co-occurrence mapping: 18 | // - https://blog.chewxy.com/2017/07/12/21-bits-english/ 19 | 20 | // EncodeBigram creates id between two words. 21 | func EncodeBigram(l1, l2 uint64) uint64 { 22 | if l1 < l2 { 23 | return encode(l1, l2) 24 | } else { 25 | return encode(l2, l1) 26 | } 27 | } 28 | 29 | func encode(l1, l2 uint64) uint64 { 30 | return l1 | (l2 << 32) 31 | } 32 | 33 | // DecodeBigram reverts pair id to two word ids. 34 | func DecodeBigram(pid uint64) (uint64, uint64) { 35 | f := pid >> 32 36 | return pid - (f << 32), f 37 | } 38 | -------------------------------------------------------------------------------- /examples/query/main.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "log" 19 | "os" 20 | 21 | "github.com/ynqa/wego/pkg/embedding" 22 | "github.com/ynqa/wego/pkg/search" 23 | ) 24 | 25 | func main() { 26 | input, err := os.Open("word_vector.txt") 27 | if err != nil { 28 | log.Fatal(err) 29 | } 30 | defer input.Close() 31 | embs, err := embedding.Load(input) 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | searcher, err := search.New(embs...) 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | neighbors, err := searcher.SearchInternal("given_word", 10) 40 | if err != nil { 41 | log.Fatal(err) 42 | } 43 | neighbors.Describe() 44 | } 45 | -------------------------------------------------------------------------------- /pkg/corpus/dictionary/node/node.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package node 16 | 17 | type Node struct { 18 | cache []*Node 19 | Parent *Node 20 | Val int 21 | 22 | Code int 23 | Vector []float64 24 | } 25 | 26 | func (n *Node) GetPath(depth int) []*Node { 27 | if n.cache == nil { 28 | re := func(nodes []*Node) { 29 | for i, j := 0, len(nodes)-1; i < j; i, j = i+1, j-1 { 30 | nodes[i], nodes[j] = nodes[j], nodes[i] 31 | } 32 | } 33 | n.cache = make([]*Node, 0) 34 | for p := n; p != nil; p = p.Parent { 35 | n.cache = append(n.cache, p) 36 | } 37 | re(n.cache) 38 | } 39 | if depth > len(n.cache) { 40 | depth = len(n.cache) 41 | } 42 | return n.cache[:depth] 43 | } 44 | -------------------------------------------------------------------------------- /examples/word2vec/main.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "os" 19 | 20 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 21 | "github.com/ynqa/wego/pkg/model/word2vec" 22 | ) 23 | 24 | func main() { 25 | model, err := word2vec.New( 26 | word2vec.Window(5), 27 | word2vec.Model(word2vec.Cbow), 28 | word2vec.Optimizer(word2vec.NegativeSampling), 29 | word2vec.NegativeSampleSize(5), 30 | word2vec.Verbose(), 31 | ) 32 | if err != nil { 33 | // failed to create word2vec. 34 | } 35 | 36 | input, _ := os.Open("text8") 37 | defer input.Close() 38 | if err = model.Train(input); err != nil { 39 | // failed to train. 40 | } 41 | 42 | // write word vector. 43 | model.Save(os.Stdin, vector.Agg) 44 | } 45 | -------------------------------------------------------------------------------- /pkg/corpus/options.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package corpus 16 | 17 | import ( 18 | "github.com/spf13/cobra" 19 | ) 20 | 21 | const ( 22 | defaultDocInMemory = false 23 | defaultToLower = false 24 | ) 25 | 26 | type Options struct { 27 | DocInMemory bool 28 | ToLower bool 29 | } 30 | 31 | func DefaultOptions() Options { 32 | return Options{ 33 | DocInMemory: defaultDocInMemory, 34 | ToLower: defaultToLower, 35 | } 36 | } 37 | 38 | func LoadForCmd(cmd *cobra.Command, opts *Options) { 39 | cmd.Flags().BoolVar(&opts.DocInMemory, "in-memory", defaultDocInMemory, "whether to store the doc in memory") 40 | cmd.Flags().BoolVar(&opts.ToLower, "lower", defaultToLower, "whether the words on corpus convert to lowercase or not") 41 | } 42 | -------------------------------------------------------------------------------- /pkg/search/searchutil/searchutil_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package searchutil 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/stretchr/testify/assert" 21 | 22 | "github.com/ynqa/wego/pkg/embedding/embutil" 23 | ) 24 | 25 | func TestCosine(t *testing.T) { 26 | testCases := []struct { 27 | name string 28 | v1 []float64 29 | v2 []float64 30 | expect float64 31 | }{ 32 | { 33 | name: "cosine", 34 | v1: []float64{1, 1, 1, 1, 0, 0}, 35 | v2: []float64{1, 1, 0, 0, 1, 1}, 36 | expect: 0.5, 37 | }, 38 | } 39 | 40 | for _, tc := range testCases { 41 | t.Run(tc.name, func(t *testing.T) { 42 | assert.Equal(t, tc.expect, Cosine(tc.v1, tc.v2, embutil.Norm(tc.v1), embutil.Norm(tc.v2))) 43 | }) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pkg/model/modelutil/matrix/matrix.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package matrix 16 | 17 | type Matrix struct { 18 | array []float64 19 | row int 20 | col int 21 | } 22 | 23 | func New(row, col int, fn func(int, []float64)) *Matrix { 24 | mat := &Matrix{ 25 | array: make([]float64, row*col), 26 | row: row, 27 | col: col, 28 | } 29 | for i := 0; i < row; i++ { 30 | fn(i, mat.Slice(i)) 31 | } 32 | return mat 33 | } 34 | 35 | func (m *Matrix) startIndex(id int) int { 36 | return id * m.col 37 | } 38 | 39 | func (m *Matrix) Row() int { 40 | return m.row 41 | } 42 | 43 | func (m *Matrix) Col() int { 44 | return m.col 45 | } 46 | 47 | func (m *Matrix) Slice(id int) []float64 { 48 | start := m.startIndex(id) 49 | return m.array[start : start+m.col] 50 | } 51 | -------------------------------------------------------------------------------- /pkg/model/modelutil/modelutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package modelutil 16 | 17 | import ( 18 | "math" 19 | ) 20 | 21 | var ( 22 | next uint64 = 1 23 | ) 24 | 25 | // NextRandom is linear congruential generator (rand.Intn). 26 | func NextRandom(value int) int { 27 | next = next*uint64(25214903917) + 11 28 | return int(next % uint64(value)) 29 | } 30 | 31 | // IndexPerThread creates interval of indices per thread. 32 | func IndexPerThread(threadSize, dataSize int) []int { 33 | indexPerThread := make([]int, threadSize+1) 34 | indexPerThread[0] = 0 35 | indexPerThread[threadSize] = dataSize 36 | for i := 1; i < threadSize; i++ { 37 | indexPerThread[i] = indexPerThread[i-1] + 38 | int(math.Trunc(float64((dataSize+i)/threadSize))) 39 | } 40 | return indexPerThread 41 | } 42 | -------------------------------------------------------------------------------- /pkg/model/modelutil/subsample/subsample.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package subsample 16 | 17 | import ( 18 | "math" 19 | "math/rand" 20 | 21 | "github.com/ynqa/wego/pkg/corpus/dictionary" 22 | ) 23 | 24 | type Subsampler struct { 25 | samples []float64 26 | } 27 | 28 | func New( 29 | dic *dictionary.Dictionary, 30 | threshold float64, 31 | ) *Subsampler { 32 | samples := make([]float64, dic.Len()) 33 | for i := 0; i < dic.Len(); i++ { 34 | z := 1. - math.Sqrt(threshold/float64(dic.IDFreq(i))) 35 | if z < 0 { 36 | z = 0 37 | } 38 | samples[i] = z 39 | } 40 | return &Subsampler{ 41 | samples: samples, 42 | } 43 | } 44 | 45 | func (s *Subsampler) Trial(id int) bool { 46 | bernoulliTrial := rand.Float64() 47 | var ok bool 48 | if s.samples[id] > bernoulliTrial { 49 | ok = true 50 | } 51 | return ok 52 | } 53 | -------------------------------------------------------------------------------- /pkg/corpus/cpsutil/cpsutil_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cpsutil 16 | 17 | import ( 18 | "strings" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | ) 23 | 24 | func TestReadWord(t *testing.T) { 25 | var dic []string 26 | fn := func(w string) (err error) { 27 | dic = append(dic, w) 28 | return 29 | } 30 | 31 | r := strings.NewReader("a bc def") 32 | expected := []string{"a", "bc", "def"} 33 | assert.NoError(t, ReadWord(r, fn)) 34 | assert.Equal(t, expected, dic) 35 | } 36 | 37 | func TestReadWordWithForwardContext(t *testing.T) { 38 | var dic []string 39 | fn := func(w1, w2 string) (err error) { 40 | dic = append(dic, w1+w2) 41 | return 42 | } 43 | 44 | r := strings.NewReader("a b c d e") 45 | expected := []string{"ab", "ac", "bc", "bd", "cd", "ce", "de"} 46 | assert.NoError(t, ReadWordWithForwardContext(r, 2, fn)) 47 | assert.Equal(t, expected, dic) 48 | } 49 | -------------------------------------------------------------------------------- /pkg/model/word2vec/sigmoid_table.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "math" 19 | ) 20 | 21 | type sigmoidTable struct { 22 | expTable []float64 23 | expTableSize int 24 | maxExp float64 25 | cache float64 26 | } 27 | 28 | func newSigmoidTable() *sigmoidTable { 29 | s := new(sigmoidTable) 30 | s.expTableSize = 1000 31 | s.maxExp = 6.0 32 | s.cache = float64(s.expTableSize) / s.maxExp / 2.0 33 | s.expTable = make([]float64, s.expTableSize) 34 | for i := 0; i < s.expTableSize; i++ { 35 | expval := math.Exp((float64(i)/float64(s.expTableSize)*2. - 1.) * s.maxExp) 36 | s.expTable[i] = expval / (expval + 1.) 37 | } 38 | return s 39 | } 40 | 41 | // sigmoid returns: f(x) = (x + max_exp) * (exp_table_size / max_exp / 2) 42 | // If you set x to over |max_exp|, it raises index out of range error. 43 | func (s *sigmoidTable) sigmoid(x float64) float64 { 44 | return s.expTable[int((x+s.maxExp)*s.cache)] 45 | } 46 | -------------------------------------------------------------------------------- /pkg/search/console/op.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package console 16 | 17 | import ( 18 | "github.com/pkg/errors" 19 | ) 20 | 21 | type Operator func(float64, float64) float64 22 | 23 | func elementWise(v1, v2 []float64, op Operator) ([]float64, error) { 24 | if len(v1) != len(v2) { 25 | return nil, errors.Errorf("Both lengths of vector must be the same, got %d and %d", len(v1), len(v2)) 26 | } 27 | v := make([]float64, len(v1)) 28 | for i := 0; i < len(v1); i++ { 29 | v[i] = op(v1[i], v2[i]) 30 | } 31 | return v, nil 32 | } 33 | 34 | func add(v1, v2 []float64) ([]float64, error) { 35 | return elementWise(v1, v2, addOp()) 36 | } 37 | 38 | func addOp() Operator { 39 | return Operator(func(x, y float64) float64 { 40 | return x + y 41 | }) 42 | } 43 | 44 | func sub(v1, v2 []float64) ([]float64, error) { 45 | return elementWise(v1, v2, subOp()) 46 | } 47 | 48 | func subOp() Operator { 49 | return Operator(func(x, y float64) float64 { 50 | return x - y 51 | }) 52 | } 53 | -------------------------------------------------------------------------------- /pkg/corpus/dictionary/huffman.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dictionary 16 | 17 | import ( 18 | "sort" 19 | 20 | "github.com/ynqa/wego/pkg/corpus/dictionary/node" 21 | ) 22 | 23 | func (d *Dictionary) HuffnamTree(dim int) []*node.Node { 24 | nodes := make([]*node.Node, d.maxid) 25 | set := make([]*node.Node, d.maxid) 26 | for i := 0; i < d.maxid; i++ { 27 | n := &node.Node{ 28 | Val: d.IDFreq(i), 29 | } 30 | nodes[i] = n 31 | set[i] = n 32 | } 33 | 34 | sort.SliceStable(nodes, func(i, j int) bool { 35 | return nodes[i].Val < nodes[j].Val 36 | }) 37 | for len(nodes) > 1 { 38 | left, right := nodes[0], nodes[1] 39 | merged := &node.Node{ 40 | Val: left.Val + right.Val, 41 | Vector: make([]float64, dim), 42 | } 43 | left.Code, right.Code = 0, 1 44 | left.Parent, right.Parent = merged, merged 45 | 46 | nodes = nodes[2:] 47 | idx := sort.Search(len(nodes), func(i int) bool { 48 | return nodes[i].Val >= merged.Val 49 | }) 50 | 51 | nodes = append(nodes, &node.Node{}) 52 | copy(nodes[idx+1:], nodes[idx:]) 53 | nodes[idx] = merged 54 | } 55 | 56 | return set 57 | } 58 | -------------------------------------------------------------------------------- /cmd/model/cmdutil/cmdutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmdutil 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/spf13/cobra" 21 | 22 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 23 | ) 24 | 25 | const ( 26 | defaultInputFile = "example/input.txt" 27 | defaultOutputFile = "example/word_vectors.txt" 28 | defaultProf = false 29 | defaultVectorType = vector.Single 30 | ) 31 | 32 | func AddInputFlags(cmd *cobra.Command, input *string) { 33 | cmd.Flags().StringVarP(input, "input", "i", defaultInputFile, "input file path for corpus") 34 | } 35 | 36 | func AddOutputFlags(cmd *cobra.Command, output *string) { 37 | cmd.Flags().StringVarP(output, "output", "o", defaultOutputFile, "output file path to save word vectors") 38 | } 39 | 40 | func AddProfFlags(cmd *cobra.Command, prof *bool) { 41 | cmd.Flags().BoolVar(prof, "prof", defaultProf, "profiling mode to check the performances") 42 | } 43 | 44 | func AddVectorTypeFlags(cmd *cobra.Command, typ *vector.Type) { 45 | cmd.Flags().StringVar(typ, "vec-type", defaultVectorType, fmt.Sprintf("word vector type. One of: %s|%s", vector.Single, vector.Agg)) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/corpus/filter/options.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package filter 16 | 17 | import ( 18 | "github.com/spf13/cobra" 19 | 20 | "github.com/ynqa/wego/pkg/corpus/dictionary" 21 | ) 22 | 23 | var ( 24 | defaultMaxCount = -1 25 | defaultMinCount = 5 26 | ) 27 | 28 | type Options struct { 29 | MaxCount int 30 | MinCount int 31 | } 32 | 33 | func DefaultOption() *Options { 34 | return &Options{ 35 | MaxCount: defaultMaxCount, 36 | MinCount: defaultMinCount, 37 | } 38 | } 39 | 40 | func LoadForCmd(cmd *cobra.Command, opts *Options) { 41 | cmd.Flags().IntVar(&opts.MaxCount, "max-count", defaultMaxCount, "upper limit to filter words") 42 | cmd.Flags().IntVar(&opts.MinCount, "min-count", defaultMinCount, "lower limit to filter words") 43 | } 44 | 45 | type FilterFn func(id int, dic *dictionary.Dictionary) bool 46 | 47 | func MaxCount(v int) FilterFn { 48 | return FilterFn(func(id int, dic *dictionary.Dictionary) bool { 49 | return 0 < v && v < dic.IDFreq(id) 50 | }) 51 | } 52 | 53 | func MinCount(v int) FilterFn { 54 | return FilterFn(func(id int, dic *dictionary.Dictionary) bool { 55 | return 0 <= v && dic.IDFreq(id) < v 56 | }) 57 | } 58 | -------------------------------------------------------------------------------- /wego.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "os" 19 | 20 | "github.com/pkg/errors" 21 | "github.com/spf13/cobra" 22 | 23 | "github.com/ynqa/wego/cmd/model/glove" 24 | "github.com/ynqa/wego/cmd/model/lexvec" 25 | "github.com/ynqa/wego/cmd/model/word2vec" 26 | "github.com/ynqa/wego/cmd/query" 27 | "github.com/ynqa/wego/cmd/query/console" 28 | ) 29 | 30 | func main() { 31 | word2vec := word2vec.New() 32 | glove := glove.New() 33 | lexvec := lexvec.New() 34 | query := query.New() 35 | console := console.New() 36 | 37 | cmd := &cobra.Command{ 38 | Use: "wego", 39 | Short: "tools for embedding words into vector space", 40 | RunE: func(cmd *cobra.Command, args []string) error { 41 | return errors.Errorf("Set sub-command. One of %s|%s|%s|%s|%s", 42 | word2vec.Name(), 43 | glove.Name(), 44 | lexvec.Name(), 45 | query.Name(), 46 | console.Name(), 47 | ) 48 | }, 49 | } 50 | cmd.AddCommand(word2vec) 51 | cmd.AddCommand(glove) 52 | cmd.AddCommand(lexvec) 53 | cmd.AddCommand(query) 54 | cmd.AddCommand(console) 55 | 56 | if err := cmd.Execute(); err != nil { 57 | os.Exit(1) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /pkg/model/glove/item.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package glove 16 | 17 | import ( 18 | "fmt" 19 | "math" 20 | 21 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 22 | "github.com/ynqa/wego/pkg/corpus/cooccurrence/encode" 23 | "github.com/ynqa/wego/pkg/util/clock" 24 | ) 25 | 26 | type item struct { 27 | l1, l2 int 28 | f float64 29 | coef float64 30 | } 31 | 32 | func (g *glove) makeItems(cooc *co.Cooccurrence) []item { 33 | em := cooc.EncodedMatrix() 34 | res, idx, clk := make([]item, len(em)), 0, clock.New() 35 | for enc, f := range em { 36 | u1, u2 := encode.DecodeBigram(enc) 37 | l1, l2 := int(u1), int(u2) 38 | coef := 1. 39 | if f < float64(g.opts.Xmax) { 40 | coef = math.Pow(f/float64(g.opts.Xmax), g.opts.Alpha) 41 | } 42 | res[idx] = item{ 43 | l1: l1, 44 | l2: l2, 45 | f: math.Log(f), 46 | coef: coef, 47 | } 48 | idx++ 49 | g.verbose.Do(func() { 50 | if idx%g.opts.LogBatch == 0 { 51 | fmt.Printf("build %d items %v\r", idx, clk.AllElapsed()) 52 | } 53 | }) 54 | } 55 | g.verbose.Do(func() { 56 | fmt.Printf("build %d items %v\r\n", idx, clk.AllElapsed()) 57 | }) 58 | return res 59 | } 60 | -------------------------------------------------------------------------------- /pkg/corpus/cooccurrence/cooccurrence.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package co 16 | 17 | import ( 18 | "fmt" 19 | "math" 20 | 21 | "github.com/pkg/errors" 22 | 23 | "github.com/ynqa/wego/pkg/corpus/cooccurrence/encode" 24 | ) 25 | 26 | type CountType = string 27 | 28 | const ( 29 | Increment CountType = "inc" 30 | Proximity CountType = "prox" 31 | ) 32 | 33 | func invalidCountTypeError(typ CountType) error { 34 | return fmt.Errorf("invalid relation type: %s not in %s|%s", typ, Increment, Proximity) 35 | } 36 | 37 | type Cooccurrence struct { 38 | typ CountType 39 | 40 | ma map[uint64]float64 41 | } 42 | 43 | func New(typ CountType) (*Cooccurrence, error) { 44 | if typ != Increment && typ != Proximity { 45 | return nil, invalidCountTypeError(typ) 46 | } 47 | return &Cooccurrence{ 48 | typ: typ, 49 | 50 | ma: make(map[uint64]float64), 51 | }, nil 52 | } 53 | 54 | func (c *Cooccurrence) EncodedMatrix() map[uint64]float64 { 55 | return c.ma 56 | } 57 | 58 | func (c *Cooccurrence) Add(left, right int) error { 59 | enc := encode.EncodeBigram(uint64(left), uint64(right)) 60 | var val float64 61 | switch c.typ { 62 | case Increment: 63 | val = 1 64 | case Proximity: 65 | div := left - right 66 | if div == 0 { 67 | return errors.Errorf("Divide by zero on counting co-occurrence") 68 | } 69 | val = 1. / math.Abs(float64(div)) 70 | default: 71 | return invalidCountTypeError(c.typ) 72 | } 73 | c.ma[enc] += val 74 | return nil 75 | } 76 | -------------------------------------------------------------------------------- /pkg/corpus/dictionary/dictionary.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package dictionary 16 | 17 | // inspired by 18 | // - https://github.com/chewxy/lingo/blob/master/corpus/corpus.go 19 | // - https://github.com/RaRe-Technologies/gensim/blob/3.8.1/gensim/corpora/dictionary.py 20 | 21 | type Dictionary struct { 22 | word2id map[string]int 23 | id2word []string 24 | 25 | cfs []int 26 | 27 | maxid int 28 | } 29 | 30 | func New() *Dictionary { 31 | return &Dictionary{ 32 | word2id: make(map[string]int), 33 | id2word: make([]string, 0), 34 | 35 | cfs: make([]int, 0), 36 | } 37 | } 38 | 39 | func (d *Dictionary) Len() int { 40 | return d.maxid 41 | } 42 | 43 | func (d *Dictionary) ID(word string) (int, bool) { 44 | id, ok := d.word2id[word] 45 | return id, ok 46 | } 47 | 48 | func (d *Dictionary) WordFreq(word string) int { 49 | id, ok := d.word2id[word] 50 | if !ok { 51 | return 0 52 | } 53 | return d.cfs[id] 54 | } 55 | 56 | func (d *Dictionary) Word(id int) (string, bool) { 57 | if id >= d.maxid { 58 | return "", false 59 | } 60 | return d.id2word[id], true 61 | } 62 | 63 | func (d *Dictionary) IDFreq(id int) int { 64 | if id >= d.maxid { 65 | return 0 66 | } 67 | return d.cfs[id] 68 | } 69 | 70 | func (d *Dictionary) Add(words ...string) { 71 | for _, word := range words { 72 | if id, ok := d.word2id[word]; ok { 73 | d.cfs[id]++ 74 | } else { 75 | d.word2id[word] = d.maxid 76 | d.id2word = append(d.id2word, word) 77 | d.cfs = append(d.cfs, 1) 78 | d.maxid++ 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /cmd/query/console/console.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package console 16 | 17 | import ( 18 | "os" 19 | 20 | "github.com/pkg/errors" 21 | "github.com/spf13/cobra" 22 | 23 | "github.com/ynqa/wego/cmd/query/cmdutil" 24 | "github.com/ynqa/wego/pkg/embedding" 25 | "github.com/ynqa/wego/pkg/search" 26 | "github.com/ynqa/wego/pkg/search/console" 27 | ) 28 | 29 | var ( 30 | inputFile string 31 | rank int 32 | ) 33 | 34 | func New() *cobra.Command { 35 | cmd := &cobra.Command{ 36 | Use: "console", 37 | Short: "Console to investigate word vectors", 38 | Example: " wego console -i example/word_vectors.txt\n" + 39 | " >> apple + banana\n" + 40 | " ...", 41 | RunE: func(cmd *cobra.Command, args []string) error { 42 | return execute() 43 | }, 44 | } 45 | cmdutil.AddInputFlags(cmd, &inputFile) 46 | cmdutil.AddRankFlags(cmd, &rank) 47 | return cmd 48 | } 49 | 50 | func fileExists(path string) bool { 51 | _, err := os.Stat(path) 52 | return err == nil 53 | } 54 | 55 | func execute() error { 56 | if !fileExists(inputFile) { 57 | return errors.Errorf("Not such a file %s", inputFile) 58 | } 59 | input, err := os.Open(inputFile) 60 | if err != nil { 61 | return err 62 | } 63 | defer input.Close() 64 | embs, err := embedding.Load(input) 65 | if err != nil { 66 | return err 67 | } 68 | searcher, err := search.New(embs...) 69 | if err != nil { 70 | return err 71 | } 72 | console, err := console.New(searcher, rank) 73 | if err != nil { 74 | return err 75 | } 76 | return console.Run() 77 | } 78 | -------------------------------------------------------------------------------- /cmd/query/query.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package query 16 | 17 | import ( 18 | "os" 19 | 20 | "github.com/pkg/errors" 21 | "github.com/spf13/cobra" 22 | 23 | "github.com/ynqa/wego/cmd/query/cmdutil" 24 | "github.com/ynqa/wego/pkg/embedding" 25 | "github.com/ynqa/wego/pkg/search" 26 | ) 27 | 28 | var ( 29 | inputFile string 30 | rank int 31 | ) 32 | 33 | func New() *cobra.Command { 34 | cmd := &cobra.Command{ 35 | Use: "query", 36 | Short: "Query similar words", 37 | Example: " wego query -i example/word_vectors.txt microsoft", 38 | RunE: func(cmd *cobra.Command, args []string) error { 39 | return execute(args) 40 | }, 41 | } 42 | cmdutil.AddInputFlags(cmd, &inputFile) 43 | cmdutil.AddRankFlags(cmd, &rank) 44 | return cmd 45 | } 46 | 47 | func fileExists(path string) bool { 48 | _, err := os.Stat(path) 49 | return err == nil 50 | } 51 | 52 | func execute(args []string) error { 53 | if !fileExists(inputFile) { 54 | return errors.Errorf("Not such a file %s", inputFile) 55 | } else if len(args) != 1 { 56 | return errors.Errorf("Input a single word %v", args) 57 | } 58 | input, err := os.Open(inputFile) 59 | if err != nil { 60 | return err 61 | } 62 | defer input.Close() 63 | embs, err := embedding.Load(input) 64 | if err != nil { 65 | return err 66 | } 67 | searcher, err := search.New(embs...) 68 | if err != nil { 69 | return err 70 | } 71 | neighbors, err := searcher.SearchInternal(args[0], rank) 72 | if err != nil { 73 | return err 74 | } 75 | neighbors.Describe() 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /pkg/model/modelutil/vector/vector.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package vector 16 | 17 | import ( 18 | "bufio" 19 | "bytes" 20 | "fmt" 21 | "io" 22 | 23 | "github.com/pkg/errors" 24 | "github.com/ynqa/wego/pkg/corpus/dictionary" 25 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 26 | "github.com/ynqa/wego/pkg/util/clock" 27 | "github.com/ynqa/wego/pkg/util/verbose" 28 | ) 29 | 30 | func InvalidTypeError(typ Type) error { 31 | return errors.Errorf("invalid vector type: %s not in %s|%s", typ, Single, Agg) 32 | } 33 | 34 | type Type = string 35 | 36 | const ( 37 | Single Type = "single" 38 | Agg Type = "agg" 39 | ) 40 | 41 | func Save(f io.Writer, dic *dictionary.Dictionary, mat *matrix.Matrix, verbose *verbose.Verbose, logBatch int) error { 42 | if dic.Len() != mat.Row() { 43 | return fmt.Errorf("different for length of dic and row of matrix: %d, %d", dic.Len(), mat.Row()) 44 | } 45 | writer := bufio.NewWriter(f) 46 | defer writer.Flush() 47 | 48 | var buf bytes.Buffer 49 | clk := clock.New() 50 | for i := 0; i < dic.Len(); i++ { 51 | word, _ := dic.Word(i) 52 | fmt.Fprintf(&buf, "%v ", word) 53 | for j := 0; j < mat.Col(); j++ { 54 | fmt.Fprintf(&buf, "%f ", mat.Slice(i)[j]) 55 | } 56 | fmt.Fprintln(&buf) 57 | verbose.Do(func() { 58 | if i%logBatch == 0 { 59 | fmt.Printf("saved %d words %v\r", i, clk.AllElapsed()) 60 | } 61 | }) 62 | } 63 | writer.WriteString(fmt.Sprintf("%v", buf.String())) 64 | verbose.Do(func() { 65 | fmt.Printf("saved %d words %v\r\n", dic.Len(), clk.AllElapsed()) 66 | }) 67 | return nil 68 | } 69 | -------------------------------------------------------------------------------- /pkg/embedding/embedding_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package embedding 16 | 17 | import ( 18 | "bytes" 19 | "io/ioutil" 20 | "reflect" 21 | "testing" 22 | 23 | "github.com/stretchr/testify/assert" 24 | "github.com/ynqa/wego/pkg/embedding/embutil" 25 | ) 26 | 27 | func TestLoad(t *testing.T) { 28 | testCases := []struct { 29 | name string 30 | contents string 31 | itemSize int 32 | }{ 33 | { 34 | name: "read vector file", 35 | contents: `apple 1 1 1 1 1 36 | banana 1 1 1 1 1 37 | chocolate 0 0 0 0 0 38 | dragon -1 -1 -1 -1 -1`, 39 | itemSize: 4, 40 | }, 41 | } 42 | 43 | for _, tc := range testCases { 44 | t.Run(tc.name, func(t *testing.T) { 45 | embs, _ := Load(bytes.NewReader([]byte(tc.contents))) 46 | assert.Equal(t, tc.itemSize, len(embs)) 47 | }) 48 | } 49 | } 50 | 51 | func TestParse(t *testing.T) { 52 | testNumVector := 4 53 | testVectorStr := `apple 1 1 1 1 1 54 | banana 1 1 1 1 1 55 | chocolate 0 0 0 0 0 56 | dragon -1 -1 -1 -1 -1` 57 | 58 | f := ioutil.NopCloser(bytes.NewReader([]byte(testVectorStr))) 59 | defer f.Close() 60 | 61 | embs := make([]Embedding, 0) 62 | op := func(emb Embedding) error { 63 | embs = append(embs, emb) 64 | return nil 65 | } 66 | 67 | assert.NoError(t, parse(f, op)) 68 | assert.Equal(t, testNumVector, len(embs)) 69 | } 70 | 71 | func TestParseLine(t *testing.T) { 72 | testCases := []struct { 73 | name string 74 | line string 75 | expected Embedding 76 | }{ 77 | { 78 | name: "parse line into Embedding", 79 | line: "apple 1 1 1 1 1", 80 | expected: Embedding{ 81 | Word: "apple", 82 | Dim: 5, 83 | Vector: []float64{1, 1, 1, 1, 1}, 84 | Norm: embutil.Norm([]float64{1, 1, 1, 1, 1}), 85 | }, 86 | }, 87 | } 88 | 89 | for _, tc := range testCases { 90 | t.Run(tc.name, func(t *testing.T) { 91 | emb, _ := parseLine(tc.line) 92 | assert.Truef(t, reflect.DeepEqual(tc.expected, emb), "Must be equal %v and %v", tc.expected, emb) 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /cmd/model/glove/glove.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package glove 16 | 17 | import ( 18 | "os" 19 | "path/filepath" 20 | "runtime/pprof" 21 | 22 | "github.com/pkg/errors" 23 | "github.com/spf13/cobra" 24 | 25 | "github.com/ynqa/wego/cmd/model/cmdutil" 26 | "github.com/ynqa/wego/pkg/model/glove" 27 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 28 | ) 29 | 30 | var ( 31 | prof bool 32 | inputFile string 33 | outputFile string 34 | vectorType vector.Type 35 | ) 36 | 37 | func New() *cobra.Command { 38 | var opts glove.Options 39 | cmd := &cobra.Command{ 40 | Use: "glove", 41 | Short: "GloVe: Global Vectors for Word Representation", 42 | RunE: func(cmd *cobra.Command, args []string) error { 43 | return execute(opts) 44 | }, 45 | } 46 | 47 | cmdutil.AddInputFlags(cmd, &inputFile) 48 | cmdutil.AddOutputFlags(cmd, &outputFile) 49 | cmdutil.AddProfFlags(cmd, &prof) 50 | cmdutil.AddVectorTypeFlags(cmd, &vectorType) 51 | glove.LoadForCmd(cmd, &opts) 52 | return cmd 53 | } 54 | 55 | func fileExists(path string) bool { 56 | _, err := os.Stat(path) 57 | return err == nil 58 | } 59 | 60 | func execute(opts glove.Options) error { 61 | if prof { 62 | f, err := os.Create("cpu.prof") 63 | if err != nil { 64 | return err 65 | } 66 | pprof.StartCPUProfile(f) 67 | defer pprof.StopCPUProfile() 68 | } 69 | 70 | if fileExists(outputFile) { 71 | return errors.Errorf("%s is already existed", outputFile) 72 | } else if !fileExists(inputFile) { 73 | return errors.Errorf("Not such a file %s", inputFile) 74 | } 75 | if err := os.MkdirAll(filepath.Dir(outputFile), 0777); err != nil { 76 | return err 77 | } 78 | output, err := os.Create(outputFile) 79 | if err != nil { 80 | return err 81 | } 82 | input, err := os.Open(inputFile) 83 | if err != nil { 84 | return err 85 | } 86 | defer input.Close() 87 | mod, err := glove.NewForOptions(opts) 88 | if err != nil { 89 | return err 90 | } 91 | if err := mod.Train(input); err != nil { 92 | return err 93 | } 94 | return mod.Save(output, vectorType) 95 | } 96 | -------------------------------------------------------------------------------- /cmd/model/word2vec/word2vec.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "os" 19 | "path/filepath" 20 | "runtime/pprof" 21 | 22 | "github.com/pkg/errors" 23 | "github.com/spf13/cobra" 24 | 25 | "github.com/ynqa/wego/cmd/model/cmdutil" 26 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 27 | "github.com/ynqa/wego/pkg/model/word2vec" 28 | ) 29 | 30 | var ( 31 | prof bool 32 | inputFile string 33 | outputFile string 34 | vectorType vector.Type 35 | ) 36 | 37 | func New() *cobra.Command { 38 | var opts word2vec.Options 39 | cmd := &cobra.Command{ 40 | Use: "word2vec", 41 | Short: "Word2Vec: Continuous Bag-of-Words and Skip-gram model", 42 | RunE: func(cmd *cobra.Command, args []string) error { 43 | return execute(opts) 44 | }, 45 | } 46 | 47 | cmdutil.AddInputFlags(cmd, &inputFile) 48 | cmdutil.AddOutputFlags(cmd, &outputFile) 49 | cmdutil.AddProfFlags(cmd, &prof) 50 | cmdutil.AddVectorTypeFlags(cmd, &vectorType) 51 | word2vec.LoadForCmd(cmd, &opts) 52 | return cmd 53 | } 54 | 55 | func fileExists(path string) bool { 56 | _, err := os.Stat(path) 57 | return err == nil 58 | } 59 | 60 | func execute(opts word2vec.Options) error { 61 | if prof { 62 | f, err := os.Create("cpu.prof") 63 | if err != nil { 64 | return err 65 | } 66 | pprof.StartCPUProfile(f) 67 | defer pprof.StopCPUProfile() 68 | } 69 | 70 | if fileExists(outputFile) { 71 | return errors.Errorf("%s is already existed", outputFile) 72 | } else if !fileExists(inputFile) { 73 | return errors.Errorf("%s is not found", inputFile) 74 | } 75 | if err := os.MkdirAll(filepath.Dir(outputFile), 0777); err != nil { 76 | return err 77 | } 78 | output, err := os.Create(outputFile) 79 | if err != nil { 80 | return err 81 | } 82 | input, err := os.Open(inputFile) 83 | if err != nil { 84 | return err 85 | } 86 | defer input.Close() 87 | mod, err := word2vec.NewForOptions(opts) 88 | if err != nil { 89 | return err 90 | } 91 | if err := mod.Train(input); err != nil { 92 | return err 93 | } 94 | return mod.Save(output, vectorType) 95 | } 96 | -------------------------------------------------------------------------------- /pkg/model/lexvec/item.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package lexvec 16 | 17 | import ( 18 | "fmt" 19 | "math" 20 | 21 | "github.com/pkg/errors" 22 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 23 | "github.com/ynqa/wego/pkg/corpus/cooccurrence/encode" 24 | "github.com/ynqa/wego/pkg/util/clock" 25 | ) 26 | 27 | func (l *lexvec) makeItems(cooc *co.Cooccurrence) (map[uint64]float64, error) { 28 | em := cooc.EncodedMatrix() 29 | res, idx, clk := make(map[uint64]float64), 0, clock.New() 30 | logTotalFreq := math.Log(math.Pow(float64(l.corpus.Len()), l.opts.Smooth)) 31 | for enc, f := range em { 32 | u1, u2 := encode.DecodeBigram(enc) 33 | l1, l2 := int(u1), int(u2) 34 | v, err := l.calculateRelation( 35 | l.opts.RelationType, 36 | l1, l2, 37 | f, logTotalFreq, 38 | ) 39 | if err != nil { 40 | return nil, err 41 | } 42 | res[enc] = v 43 | idx++ 44 | l.verbose.Do(func() { 45 | if idx%l.opts.LogBatch == 0 { 46 | fmt.Printf("build %d items %v\r", idx, clk.AllElapsed()) 47 | } 48 | }) 49 | } 50 | l.verbose.Do(func() { 51 | fmt.Printf("build %d items %v\r\n", idx, clk.AllElapsed()) 52 | }) 53 | return res, nil 54 | } 55 | 56 | func (l *lexvec) calculateRelation( 57 | typ RelationType, 58 | l1, l2 int, 59 | co, logTotalFreq float64, 60 | ) (float64, error) { 61 | dic := l.corpus.Dictionary() 62 | switch typ { 63 | case PPMI: 64 | if co == 0 { 65 | return 0, nil 66 | } 67 | // TODO: avoid log for l1, l2 every time 68 | ppmi := math.Log(co) - math.Log(float64(dic.IDFreq(l1))) - math.Log(math.Pow(float64(dic.IDFreq(l2)), l.opts.Smooth)) + logTotalFreq 69 | if ppmi < 0 { 70 | ppmi = 0 71 | } 72 | return ppmi, nil 73 | case PMI: 74 | if co == 0 { 75 | return 1, nil 76 | } 77 | pmi := math.Log(co) - math.Log(float64(dic.IDFreq(l1))) - math.Log(math.Pow(float64(dic.IDFreq(l2)), l.opts.Smooth)) + logTotalFreq 78 | return pmi, nil 79 | case Collocation: 80 | return co, nil 81 | case LogCollocation: 82 | return math.Log(co), nil 83 | default: 84 | return 0, errors.Errorf("invalid measure type") 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /cmd/model/lexvec/lexvec.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package lexvec 16 | 17 | import ( 18 | "os" 19 | "path/filepath" 20 | "runtime/pprof" 21 | 22 | "github.com/pkg/errors" 23 | "github.com/spf13/cobra" 24 | 25 | "github.com/ynqa/wego/cmd/model/cmdutil" 26 | "github.com/ynqa/wego/pkg/model/lexvec" 27 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 28 | ) 29 | 30 | var ( 31 | prof bool 32 | inputFile string 33 | outputFile string 34 | vectorType vector.Type 35 | ) 36 | 37 | func New() *cobra.Command { 38 | var opts lexvec.Options 39 | cmd := &cobra.Command{ 40 | Use: "lexvec", 41 | Short: "Lexvec: Matrix Factorization using Window Sampling and Negative Sampling for Improved Word Representations", 42 | RunE: func(cmd *cobra.Command, args []string) error { 43 | return execute(opts) 44 | }, 45 | } 46 | 47 | cmdutil.AddInputFlags(cmd, &inputFile) 48 | cmdutil.AddOutputFlags(cmd, &outputFile) 49 | cmdutil.AddProfFlags(cmd, &prof) 50 | cmdutil.AddVectorTypeFlags(cmd, &vectorType) 51 | lexvec.LoadForCmd(cmd, &opts) 52 | return cmd 53 | } 54 | 55 | func fileExists(path string) bool { 56 | _, err := os.Stat(path) 57 | return err == nil 58 | } 59 | 60 | func execute(opts lexvec.Options) error { 61 | if prof { 62 | f, err := os.Create("cpu.prof") 63 | if err != nil { 64 | return err 65 | } 66 | pprof.StartCPUProfile(f) 67 | defer pprof.StopCPUProfile() 68 | } 69 | 70 | if fileExists(outputFile) { 71 | return errors.Errorf("%s is already existed", outputFile) 72 | } else if !fileExists(inputFile) { 73 | return errors.Errorf("Not such a file %s", inputFile) 74 | } 75 | if err := os.MkdirAll(filepath.Dir(outputFile), 0777); err != nil { 76 | return err 77 | } 78 | output, err := os.Create(outputFile) 79 | if err != nil { 80 | return err 81 | } 82 | input, err := os.Open(inputFile) 83 | if err != nil { 84 | return err 85 | } 86 | defer input.Close() 87 | mod, err := lexvec.NewForOptions(opts) 88 | if err != nil { 89 | return err 90 | } 91 | if err := mod.Train(input); err != nil { 92 | return err 93 | } 94 | return mod.Save(output, vectorType) 95 | } 96 | -------------------------------------------------------------------------------- /pkg/model/glove/solver.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package glove 16 | 17 | import ( 18 | "math" 19 | 20 | "github.com/ynqa/wego/pkg/corpus/dictionary" 21 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 22 | ) 23 | 24 | type solver interface { 25 | trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64) 26 | } 27 | 28 | type stochastic struct { 29 | initlr float64 30 | } 31 | 32 | func newStochastic(opts Options) solver { 33 | return &stochastic{ 34 | initlr: opts.Initlr, 35 | } 36 | } 37 | 38 | func (sol *stochastic) trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64) { 39 | v1, v2 := param.Slice(l1), param.Slice(l2) 40 | dim, diff := len(v1)-1, 0. 41 | for i := 0; i < dim; i++ { 42 | diff += v1[i] * v2[i] 43 | } 44 | diff += v1[dim] + v2[dim] - f 45 | diff *= coef * sol.initlr 46 | for i := 0; i < dim; i++ { 47 | t1, t2 := diff*v2[i], diff*v1[i] 48 | v1[i] -= t1 49 | v2[i] -= t2 50 | } 51 | v1[dim] -= diff 52 | v2[dim] -= diff 53 | } 54 | 55 | type adaGrad struct { 56 | initlr float64 57 | gradsq *matrix.Matrix 58 | } 59 | 60 | func newAdaGrad(dic *dictionary.Dictionary, opts Options) solver { 61 | dimAndBias := opts.Dim + 1 62 | return &adaGrad{ 63 | initlr: opts.Initlr, 64 | gradsq: matrix.New( 65 | dic.Len()*2, 66 | dimAndBias, 67 | func(_ int, vec []float64) { 68 | for i := 0; i < dimAndBias; i++ { 69 | vec[i] = 1. 70 | } 71 | }, 72 | ), 73 | } 74 | } 75 | 76 | func (sol *adaGrad) trainOne(l1, l2 int, param *matrix.Matrix, f, coef float64) { 77 | v1, v2 := param.Slice(l1), param.Slice(l2) 78 | g1, g2 := sol.gradsq.Slice(l1), sol.gradsq.Slice(l2) 79 | dim, diff := len(v1)-1, 0. 80 | for i := 0; i < dim; i++ { 81 | diff += v1[i] * v2[i] 82 | } 83 | diff += v1[dim] + v2[dim] - f 84 | diff *= coef * sol.initlr 85 | for i := 0; i < dim; i++ { 86 | t1, t2 := diff*v2[i], diff*v1[i] 87 | g1[i] += t1 * t1 88 | g2[i] += t2 * t2 89 | t1 /= math.Sqrt(g1[i]) 90 | t2 /= math.Sqrt(g2[i]) 91 | v1[i] -= t1 92 | v2[i] -= t2 93 | } 94 | v1[dim] -= diff / math.Sqrt(g1[dim]) 95 | v2[dim] -= diff / math.Sqrt(g2[dim]) 96 | diff *= diff 97 | g1[dim] += diff 98 | g2[dim] += diff 99 | } 100 | -------------------------------------------------------------------------------- /pkg/corpus/cpsutil/cpsutil.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cpsutil 16 | 17 | import ( 18 | "bufio" 19 | "io" 20 | 21 | "github.com/ynqa/wego/pkg/corpus/dictionary" 22 | ) 23 | 24 | func scanner(r io.Reader) *bufio.Scanner { 25 | s := bufio.NewScanner(r) 26 | s.Split(bufio.ScanWords) 27 | return s 28 | } 29 | 30 | func ReadWord(r io.ReadSeeker, fn func(string) error) error { 31 | r.Seek(0, 0) 32 | scanner := scanner(r) 33 | for scanner.Scan() { 34 | if err := fn(scanner.Text()); err != nil { 35 | return err 36 | } 37 | } 38 | 39 | if err := scanner.Err(); err != nil && err != io.EOF { 40 | return err 41 | } 42 | 43 | return nil 44 | } 45 | 46 | func ReadWordWithForwardContext(r io.ReadSeeker, n int, fn func(string, string) error) error { 47 | r.Seek(0, 0) 48 | scanner := scanner(r) 49 | var ( 50 | axis string 51 | ws []string = make([]string, n) 52 | ) 53 | postFn := func() error { 54 | for _, w := range ws { 55 | if err := fn(axis, w); err != nil { 56 | return err 57 | } 58 | } 59 | return nil 60 | } 61 | for { 62 | if axis == "" { 63 | if !scanner.Scan() { 64 | break 65 | } 66 | axis = scanner.Text() 67 | for i := 0; i < n; i++ { 68 | if !scanner.Scan() { 69 | break 70 | } 71 | ws[i] = scanner.Text() 72 | } 73 | } else { 74 | axis = ws[0] 75 | ws = ws[1:] 76 | if !scanner.Scan() { 77 | break 78 | } 79 | ws = append(ws, scanner.Text()) 80 | } 81 | if err := postFn(); err != nil { 82 | return err 83 | } 84 | } 85 | if err := postFn(); err != nil { 86 | return err 87 | } 88 | 89 | if err := scanner.Err(); err != nil && err != io.EOF { 90 | return err 91 | } 92 | 93 | return nil 94 | } 95 | 96 | type Filters []FilterFn 97 | 98 | func (f Filters) Any(id int, dic *dictionary.Dictionary) bool { 99 | var b bool 100 | for _, fn := range f { 101 | b = b || fn(id, dic) 102 | } 103 | return b 104 | } 105 | 106 | type FilterFn func(int, *dictionary.Dictionary) bool 107 | 108 | func MaxCount(v int) FilterFn { 109 | return FilterFn(func(id int, dic *dictionary.Dictionary) bool { 110 | return 0 < v && v < dic.IDFreq(id) 111 | }) 112 | } 113 | 114 | func MinCount(v int) FilterFn { 115 | return FilterFn(func(id int, dic *dictionary.Dictionary) bool { 116 | return 0 <= v && dic.IDFreq(id) < v 117 | }) 118 | } 119 | -------------------------------------------------------------------------------- /pkg/embedding/embedding.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package embedding 16 | 17 | import ( 18 | "bufio" 19 | "io" 20 | "strconv" 21 | "strings" 22 | 23 | "github.com/pkg/errors" 24 | 25 | "github.com/ynqa/wego/pkg/embedding/embutil" 26 | ) 27 | 28 | type Embedding struct { 29 | Word string 30 | Dim int 31 | Vector []float64 32 | Norm float64 33 | } 34 | 35 | func (e Embedding) Validate() error { 36 | if e.Word == "" { 37 | return errors.New("Word is empty") 38 | } else if e.Dim == 0 || len(e.Vector) == 0 { 39 | return errors.Errorf("Dim of %s is zero", e.Word) 40 | } else if e.Dim != len(e.Vector) { 41 | return errors.Errorf("Dim and length of Vector must be same, Dim=%d, len(Vec)=%d", e.Dim, len(e.Vector)) 42 | } 43 | return nil 44 | } 45 | 46 | type Embeddings []Embedding 47 | 48 | func (embs Embeddings) Empty() bool { 49 | return len(embs) == 0 50 | } 51 | 52 | func (embs Embeddings) Find(word string) (Embedding, bool) { 53 | for _, emb := range embs { 54 | if word == emb.Word { 55 | return emb, true 56 | } 57 | } 58 | return Embedding{}, false 59 | } 60 | 61 | func (embs Embeddings) Validate() error { 62 | if len(embs) > 0 { 63 | dim := embs[0].Dim 64 | for _, emb := range embs { 65 | if dim != emb.Dim { 66 | return errors.Errorf("dimension for all vectors must be the same: %d but got %d", dim, emb.Dim) 67 | } 68 | } 69 | } 70 | return nil 71 | } 72 | 73 | func Load(r io.Reader) (Embeddings, error) { 74 | var embs Embeddings 75 | if err := parse(r, func(emb Embedding) error { 76 | if err := emb.Validate(); err != nil { 77 | return err 78 | } 79 | embs = append(embs, emb) 80 | return nil 81 | }); err != nil { 82 | return nil, err 83 | } 84 | return embs, nil 85 | } 86 | 87 | func parse(r io.Reader, op func(Embedding) error) error { 88 | s := bufio.NewScanner(r) 89 | for s.Scan() { 90 | line := s.Text() 91 | if strings.HasPrefix(line, " ") { 92 | continue 93 | } 94 | emb, err := parseLine(line) 95 | if err != nil { 96 | return err 97 | } 98 | if err := op(emb); err != nil { 99 | return err 100 | } 101 | } 102 | if err := s.Err(); err != nil && err != io.EOF { 103 | return errors.Wrapf(err, "failed to scan") 104 | } 105 | return nil 106 | } 107 | 108 | func parseLine(line string) (Embedding, error) { 109 | slice := strings.Fields(line) 110 | if len(slice) < 2 { 111 | return Embedding{}, errors.New("Must be over 2 lenghth for word and vector elems") 112 | } 113 | word := slice[0] 114 | vector := slice[1:] 115 | dim := len(vector) 116 | 117 | vec := make([]float64, dim) 118 | for k, elem := range vector { 119 | val, err := strconv.ParseFloat(elem, 64) 120 | if err != nil { 121 | return Embedding{}, err 122 | } 123 | vec[k] = val 124 | } 125 | return Embedding{ 126 | Word: word, 127 | Dim: dim, 128 | Vector: vec, 129 | Norm: embutil.Norm(vec), 130 | }, nil 131 | } 132 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at un.pensiero.vano@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /pkg/corpus/memory/memory.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package memory 16 | 17 | import ( 18 | "fmt" 19 | "io" 20 | "strings" 21 | 22 | "github.com/ynqa/wego/pkg/corpus" 23 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 24 | "github.com/ynqa/wego/pkg/corpus/cpsutil" 25 | "github.com/ynqa/wego/pkg/corpus/dictionary" 26 | "github.com/ynqa/wego/pkg/util/clock" 27 | "github.com/ynqa/wego/pkg/util/verbose" 28 | ) 29 | 30 | type Corpus struct { 31 | doc io.ReadSeeker 32 | 33 | dic *dictionary.Dictionary 34 | cooc *co.Cooccurrence 35 | maxLen int 36 | idoc []int 37 | 38 | toLower bool 39 | filters cpsutil.Filters 40 | } 41 | 42 | func New(doc io.ReadSeeker, toLower bool, maxCount, minCount int) corpus.Corpus { 43 | return &Corpus{ 44 | doc: doc, 45 | dic: dictionary.New(), 46 | idoc: make([]int, 0), 47 | 48 | toLower: toLower, 49 | filters: cpsutil.Filters{ 50 | cpsutil.MaxCount(maxCount), 51 | cpsutil.MinCount(minCount), 52 | }, 53 | } 54 | } 55 | 56 | func (c *Corpus) IndexedDoc() []int { 57 | var res []int 58 | for _, id := range c.idoc { 59 | if c.filters.Any(id, c.dic) { 60 | continue 61 | } 62 | res = append(res, id) 63 | } 64 | return res 65 | } 66 | 67 | func (c *Corpus) BatchWords(chan []int, int) error { 68 | return nil 69 | } 70 | 71 | func (c *Corpus) Dictionary() *dictionary.Dictionary { 72 | return c.dic 73 | } 74 | 75 | func (c *Corpus) Cooccurrence() *co.Cooccurrence { 76 | return c.cooc 77 | } 78 | 79 | func (c *Corpus) Len() int { 80 | return c.maxLen 81 | } 82 | 83 | func (c *Corpus) Load(with *corpus.WithCooccurrence, verbose *verbose.Verbose, logBatch int) error { 84 | clk := clock.New() 85 | if err := cpsutil.ReadWord(c.doc, func(word string) error { 86 | if c.toLower { 87 | word = strings.ToLower(word) 88 | } 89 | 90 | c.dic.Add(word) 91 | id, _ := c.dic.ID(word) 92 | c.maxLen++ 93 | c.idoc = append(c.idoc, id) 94 | verbose.Do(func() { 95 | if c.maxLen%logBatch == 0 { 96 | fmt.Printf("read %d words %v\r", c.maxLen, clk.AllElapsed()) 97 | } 98 | }) 99 | 100 | return nil 101 | }); err != nil { 102 | return err 103 | } 104 | verbose.Do(func() { 105 | fmt.Printf("read %d words %v\r\n", c.maxLen, clk.AllElapsed()) 106 | }) 107 | 108 | clk = clock.New() 109 | var ( 110 | err error 111 | cursor int 112 | ) 113 | if with != nil { 114 | c.cooc, err = co.New(with.CountType) 115 | if err != nil { 116 | return err 117 | } 118 | 119 | for i := 0; i < len(c.idoc); i++ { 120 | for j := i + 1; j < len(c.idoc) && j <= i+with.Window; j++ { 121 | if err = c.cooc.Add(c.idoc[i], c.idoc[j]); err != nil { 122 | return err 123 | } 124 | cursor++ 125 | verbose.Do(func() { 126 | if cursor%logBatch == 0 { 127 | fmt.Printf("read %d tuples %v\r", cursor, clk.AllElapsed()) 128 | } 129 | }) 130 | } 131 | } 132 | verbose.Do(func() { 133 | fmt.Printf("read %d tuples %v\r\n", cursor, clk.AllElapsed()) 134 | }) 135 | } 136 | 137 | return nil 138 | } 139 | -------------------------------------------------------------------------------- /pkg/model/word2vec/optimizer.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "math/rand" 19 | 20 | "github.com/ynqa/wego/pkg/corpus/dictionary" 21 | "github.com/ynqa/wego/pkg/corpus/dictionary/node" 22 | "github.com/ynqa/wego/pkg/model/modelutil" 23 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 24 | ) 25 | 26 | type optimizer interface { 27 | optim(id int, lr float64, ctx, tmp []float64) 28 | } 29 | 30 | type negativeSampling struct { 31 | ctx *matrix.Matrix 32 | sigtable *sigmoidTable 33 | sampleSize int 34 | } 35 | 36 | func newNegativeSampling(dic *dictionary.Dictionary, opts Options) optimizer { 37 | return &negativeSampling{ 38 | ctx: matrix.New( 39 | dic.Len(), 40 | opts.Dim, 41 | func(_ int, vec []float64) { 42 | for i := 0; i < opts.Dim; i++ { 43 | vec[i] = (rand.Float64() - 0.5) / float64(opts.Dim) 44 | } 45 | }, 46 | ), 47 | sigtable: newSigmoidTable(), 48 | sampleSize: opts.NegativeSampleSize, 49 | } 50 | } 51 | 52 | func (opt *negativeSampling) optim( 53 | id int, 54 | lr float64, 55 | ctx, tmp []float64, 56 | ) { 57 | var ( 58 | label int 59 | picked int 60 | ) 61 | dim := len(ctx) 62 | for n := -1; n < opt.sampleSize; n++ { 63 | if n == -1 { 64 | label = 1 65 | picked = id 66 | } else { 67 | label = 0 68 | picked = modelutil.NextRandom(opt.ctx.Row()) 69 | if id == picked { 70 | continue 71 | } 72 | } 73 | rnd := opt.ctx.Slice(picked) 74 | var inner float64 75 | for i := 0; i < dim; i++ { 76 | inner += rnd[i] * ctx[i] 77 | } 78 | var g float64 79 | if inner <= -opt.sigtable.maxExp { 80 | g = (float64(label - 0)) * lr 81 | } else if inner >= opt.sigtable.maxExp { 82 | g = (float64(label - 1)) * lr 83 | } else { 84 | g = (float64(label) - opt.sigtable.sigmoid(inner)) * lr 85 | } 86 | for i := 0; i < dim; i++ { 87 | tmp[i] += g * rnd[i] 88 | rnd[i] += g * ctx[i] 89 | } 90 | } 91 | } 92 | 93 | type hierarchicalSoftmax struct { 94 | sigtable *sigmoidTable 95 | nodeset []*node.Node 96 | maxDepth int 97 | } 98 | 99 | func newHierarchicalSoftmax(dic *dictionary.Dictionary, opts Options) optimizer { 100 | return &hierarchicalSoftmax{ 101 | sigtable: newSigmoidTable(), 102 | nodeset: dic.HuffnamTree(opts.Dim), 103 | maxDepth: opts.MaxDepth, 104 | } 105 | } 106 | 107 | func (opt *hierarchicalSoftmax) optim( 108 | id int, 109 | lr float64, 110 | ctx, tmp []float64, 111 | ) { 112 | path := opt.nodeset[id].GetPath(opt.maxDepth) 113 | for i := 0; i < len(path)-1; i++ { 114 | p := path[i] 115 | childCode := path[i+1].Code 116 | var inner float64 117 | for j := 0; j < len(p.Vector); j++ { 118 | inner += ctx[j] * p.Vector[j] 119 | } 120 | if inner <= -opt.sigtable.maxExp || inner >= opt.sigtable.maxExp { 121 | return 122 | } 123 | g := (1.0 - float64(childCode) - opt.sigtable.sigmoid(inner)) * lr 124 | for j := 0; j < len(p.Vector); j++ { 125 | tmp[j] += g * p.Vector[j] 126 | p.Vector[j] += g * ctx[j] 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /pkg/model/word2vec/model.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "github.com/ynqa/wego/pkg/model/modelutil" 19 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 20 | ) 21 | 22 | type mod interface { 23 | trainOne( 24 | doc []int, 25 | pos int, 26 | lr float64, 27 | param *matrix.Matrix, 28 | optimizer optimizer, 29 | ) 30 | } 31 | 32 | type skipGram struct { 33 | ch chan []float64 34 | window int 35 | } 36 | 37 | func newSkipGram(opts Options) mod { 38 | ch := make(chan []float64, opts.Goroutines) 39 | for i := 0; i < opts.Goroutines; i++ { 40 | ch <- make([]float64, opts.Dim) 41 | } 42 | return &skipGram{ 43 | ch: ch, 44 | window: opts.Window, 45 | } 46 | } 47 | 48 | func (mod *skipGram) trainOne( 49 | doc []int, 50 | pos int, 51 | lr float64, 52 | param *matrix.Matrix, 53 | optimizer optimizer, 54 | ) { 55 | tmp := <-mod.ch 56 | defer func() { 57 | mod.ch <- tmp 58 | }() 59 | del := modelutil.NextRandom(mod.window) 60 | for a := del; a < mod.window*2+1-del; a++ { 61 | if a == mod.window { 62 | continue 63 | } 64 | c := pos - mod.window + a 65 | if c < 0 || c >= len(doc) { 66 | continue 67 | } 68 | for i := 0; i < len(tmp); i++ { 69 | tmp[i] = 0 70 | } 71 | ctxID := doc[c] 72 | ctx := param.Slice(ctxID) 73 | optimizer.optim(doc[pos], lr, ctx, tmp) 74 | for i := 0; i < len(ctx); i++ { 75 | ctx[i] += tmp[i] 76 | } 77 | } 78 | } 79 | 80 | type cbowToken struct { 81 | agg []float64 82 | tmp []float64 83 | } 84 | 85 | type cbow struct { 86 | ch chan cbowToken 87 | window int 88 | } 89 | 90 | func newCbow(opts Options) mod { 91 | ch := make(chan cbowToken, opts.Goroutines) 92 | for i := 0; i < opts.Goroutines; i++ { 93 | ch <- cbowToken{ 94 | agg: make([]float64, opts.Dim), 95 | tmp: make([]float64, opts.Dim), 96 | } 97 | } 98 | return &cbow{ 99 | ch: ch, 100 | window: opts.Window, 101 | } 102 | } 103 | 104 | func (mod *cbow) trainOne( 105 | doc []int, 106 | pos int, 107 | lr float64, 108 | param *matrix.Matrix, 109 | optimizer optimizer, 110 | ) { 111 | token := <-mod.ch 112 | agg, tmp := token.agg, token.tmp 113 | defer func() { 114 | token := cbowToken{agg, tmp} 115 | mod.ch <- token 116 | }() 117 | for i := 0; i < len(agg); i++ { 118 | agg[i], tmp[i] = 0, 0 119 | } 120 | mod.dowith(doc, pos, param, agg, tmp, mod.aggregate) 121 | optimizer.optim(doc[pos], lr, agg, tmp) 122 | mod.dowith(doc, pos, param, agg, tmp, mod.update) 123 | } 124 | 125 | func (mod *cbow) dowith( 126 | doc []int, 127 | pos int, 128 | param *matrix.Matrix, 129 | agg, tmp []float64, 130 | fn func(ctx, agg, tmp []float64), 131 | ) { 132 | del := modelutil.NextRandom(mod.window) 133 | for a := del; a < mod.window*2+1-del; a++ { 134 | if a == mod.window { 135 | continue 136 | } 137 | c := pos - mod.window + a 138 | if c < 0 || c >= len(doc) { 139 | continue 140 | } 141 | ctxID := doc[c] 142 | ctx := param.Slice(ctxID) 143 | fn(ctx, agg, tmp) 144 | } 145 | } 146 | 147 | func (c *cbow) aggregate(ctx, agg, _ []float64) { 148 | for i := 0; i < len(ctx); i++ { 149 | agg[i] += ctx[i] 150 | } 151 | } 152 | 153 | func (c *cbow) update(ctx, _, tmp []float64) { 154 | for i := 0; i < len(ctx); i++ { 155 | ctx[i] += tmp[i] 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /pkg/search/search.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package search 16 | 17 | import ( 18 | "fmt" 19 | "os" 20 | 21 | "github.com/olekukonko/tablewriter" 22 | "github.com/pkg/errors" 23 | 24 | "github.com/ynqa/wego/pkg/embedding" 25 | "github.com/ynqa/wego/pkg/embedding/embutil" 26 | "github.com/ynqa/wego/pkg/search/searchutil" 27 | ) 28 | 29 | // Neighbor stores the word with cosine similarity value on the target. 30 | type Neighbor struct { 31 | Word string 32 | Rank uint 33 | Similarity float64 34 | } 35 | 36 | type Neighbors []Neighbor 37 | 38 | func (neighbors Neighbors) Describe() { 39 | table := make([][]string, len(neighbors)) 40 | for i, n := range neighbors { 41 | table[i] = []string{ 42 | fmt.Sprintf("%d", n.Rank), 43 | n.Word, 44 | fmt.Sprintf("%f", n.Similarity), 45 | } 46 | } 47 | 48 | writer := tablewriter.NewWriter(os.Stdout) 49 | writer.SetHeader([]string{"Rank", "Word", "Similarity"}) 50 | writer.SetBorder(false) 51 | writer.AppendBulk(table) 52 | writer.Render() 53 | } 54 | 55 | type Searcher struct { 56 | Items embedding.Embeddings 57 | } 58 | 59 | func New(embs ...embedding.Embedding) (*Searcher, error) { 60 | if err := embedding.Embeddings(embs).Validate(); err != nil { 61 | return nil, err 62 | } 63 | return &Searcher{ 64 | Items: embs, 65 | }, nil 66 | } 67 | 68 | func (s *Searcher) SearchInternal(word string, k int) (Neighbors, error) { 69 | var q embedding.Embedding 70 | for _, item := range s.Items { 71 | if item.Word == word { 72 | q = item 73 | break 74 | } 75 | } 76 | if q.Word == "" { 77 | return nil, errors.Errorf("%s is not found in searcher", word) 78 | } 79 | 80 | neighbors, err := s.Search(q, k, word) 81 | if err != nil { 82 | return nil, err 83 | } 84 | return neighbors, nil 85 | } 86 | 87 | func (s *Searcher) SearchVector(query []float64, k int) (Neighbors, error) { 88 | return s.Search(embedding.Embedding{ 89 | Vector: query, 90 | Norm: embutil.Norm(query), 91 | }, k) 92 | } 93 | 94 | func (s *Searcher) Search(query embedding.Embedding, k int, ignoreWord ...string) (Neighbors, error) { 95 | neighbors := make(Neighbors, k) 96 | 97 | // Map to quickly check if a word is to be ignored. 98 | ignoreWords := make(map[string]int, len(ignoreWord)) 99 | for _, word := range ignoreWord { 100 | ignoreWords[word] = 0 101 | } 102 | 103 | // Keep track of lowest similarity score. 104 | low := .0 105 | for _, item := range s.Items { 106 | // Drop iteration if the word is to be ignored. 107 | _, ok := ignoreWords[item.Word] 108 | if ok { 109 | continue 110 | } 111 | 112 | score := searchutil.Cosine(query.Vector, item.Vector, query.Norm, item.Norm) 113 | // ignore current word if it's similarity is below the lowest score. 114 | if score > low { 115 | temp := Neighbor{Word: item.Word, Similarity: score} 116 | // Bubble up the best match. 117 | for i := 0; i < len(neighbors); i++ { 118 | if temp.Similarity > neighbors[i].Similarity { 119 | temp, neighbors[i] = neighbors[i], temp 120 | neighbors[i].Rank = uint(i) + 1 121 | } 122 | } 123 | // # neighbors is aways sorted. 124 | low = neighbors[len(neighbors)-1].Similarity 125 | } 126 | } 127 | 128 | // Guard too few items in model. 129 | for i := 0; i < len(neighbors); i++ { 130 | if neighbors[i].Word == "" { 131 | k = i 132 | } 133 | } 134 | 135 | return neighbors[:k], nil 136 | } 137 | -------------------------------------------------------------------------------- /pkg/corpus/fs/fs.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fs 16 | 17 | import ( 18 | "fmt" 19 | "io" 20 | "strings" 21 | 22 | "github.com/ynqa/wego/pkg/corpus" 23 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 24 | "github.com/ynqa/wego/pkg/corpus/cpsutil" 25 | "github.com/ynqa/wego/pkg/corpus/dictionary" 26 | "github.com/ynqa/wego/pkg/util/clock" 27 | "github.com/ynqa/wego/pkg/util/verbose" 28 | ) 29 | 30 | type Corpus struct { 31 | doc io.ReadSeeker 32 | 33 | dic *dictionary.Dictionary 34 | cooc *co.Cooccurrence 35 | maxLen int 36 | 37 | toLower bool 38 | filters cpsutil.Filters 39 | } 40 | 41 | func New(r io.ReadSeeker, toLower bool, maxCount, minCount int) corpus.Corpus { 42 | return &Corpus{ 43 | doc: r, 44 | dic: dictionary.New(), 45 | 46 | toLower: toLower, 47 | filters: cpsutil.Filters{ 48 | cpsutil.MaxCount(maxCount), 49 | cpsutil.MinCount(minCount), 50 | }, 51 | } 52 | } 53 | 54 | func (c *Corpus) IndexedDoc() []int { 55 | return nil 56 | } 57 | 58 | func (c *Corpus) BatchWords(ch chan []int, batchSize int) error { 59 | cursor, ids := 0, make([]int, batchSize) 60 | if err := cpsutil.ReadWord(c.doc, func(word string) error { 61 | if c.toLower { 62 | word = strings.ToLower(word) 63 | } 64 | 65 | id, _ := c.dic.ID(word) 66 | if c.filters.Any(id, c.dic) { 67 | return nil 68 | } 69 | 70 | ids[cursor] = id 71 | cursor++ 72 | if cursor == batchSize { 73 | ch <- ids 74 | cursor, ids = 0, make([]int, batchSize) 75 | } 76 | return nil 77 | }); err != nil { 78 | return err 79 | } 80 | 81 | // send left words 82 | ch <- ids[:cursor] 83 | close(ch) 84 | return nil 85 | } 86 | 87 | func (c *Corpus) Dictionary() *dictionary.Dictionary { 88 | return c.dic 89 | } 90 | 91 | func (c *Corpus) Cooccurrence() *co.Cooccurrence { 92 | return c.cooc 93 | } 94 | 95 | func (c *Corpus) Len() int { 96 | return c.maxLen 97 | } 98 | 99 | func (c *Corpus) Load(with *corpus.WithCooccurrence, verbose *verbose.Verbose, logBatch int) error { 100 | clk := clock.New() 101 | if err := cpsutil.ReadWord(c.doc, func(word string) error { 102 | if c.toLower { 103 | word = strings.ToLower(word) 104 | } 105 | 106 | c.dic.Add(word) 107 | c.maxLen++ 108 | verbose.Do(func() { 109 | if c.maxLen%logBatch == 0 { 110 | fmt.Printf("read %d words %v\r", c.maxLen, clk.AllElapsed()) 111 | } 112 | }) 113 | 114 | return nil 115 | }); err != nil { 116 | return err 117 | } 118 | verbose.Do(func() { 119 | fmt.Printf("read %d words %v\r\n", c.maxLen, clk.AllElapsed()) 120 | }) 121 | 122 | clk = clock.New() 123 | var ( 124 | err error 125 | cursor int 126 | ) 127 | if with != nil { 128 | c.cooc, err = co.New(with.CountType) 129 | if err != nil { 130 | return err 131 | } 132 | 133 | if err = cpsutil.ReadWordWithForwardContext(c.doc, with.Window, func(w1, w2 string) error { 134 | id1, _ := c.dic.ID(w1) 135 | id2, _ := c.dic.ID(w2) 136 | if err := c.cooc.Add(id1, id2); err != nil { 137 | return err 138 | } 139 | cursor++ 140 | verbose.Do(func() { 141 | if cursor%logBatch == 0 { 142 | fmt.Printf("read %d tuples %v\r", cursor, clk.AllElapsed()) 143 | } 144 | }) 145 | return nil 146 | }); err != nil { 147 | return err 148 | } 149 | verbose.Do(func() { 150 | fmt.Printf("read %d tuples %v\r\n", cursor, clk.AllElapsed()) 151 | }) 152 | } 153 | 154 | return nil 155 | } 156 | -------------------------------------------------------------------------------- /pkg/search/search_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package search 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | 21 | "github.com/stretchr/testify/assert" 22 | 23 | "github.com/ynqa/wego/pkg/embedding" 24 | "github.com/ynqa/wego/pkg/embedding/embutil" 25 | ) 26 | 27 | func TestSearchInternal(t *testing.T) { 28 | type args struct { 29 | word string 30 | k int 31 | } 32 | 33 | testCases := []struct { 34 | name string 35 | items embedding.Embeddings 36 | args args 37 | expect Neighbors 38 | }{ 39 | { 40 | name: "search internal", 41 | items: embedding.Embeddings{ 42 | { 43 | Word: "apple", 44 | Dim: 5, 45 | Vector: []float64{1, 1, 1, 1, 1}, 46 | Norm: embutil.Norm([]float64{1, 1, 1, 1, 1}), 47 | }, 48 | { 49 | Word: "banana", 50 | Dim: 5, 51 | Vector: []float64{1, 1, 1, 1, 1}, 52 | Norm: embutil.Norm([]float64{1, 1, 1, 1, 1}), 53 | }, 54 | { 55 | Word: "chocolate", 56 | Dim: 5, 57 | Vector: []float64{0, 0, 0, 0, 0}, 58 | Norm: embutil.Norm([]float64{0, 0, 0, 0, 0}), 59 | }, 60 | { 61 | Word: "dragon", 62 | Dim: 5, 63 | Vector: []float64{-1, -1, -1, -1, -1}, 64 | Norm: embutil.Norm([]float64{-1, -1, -1, -1, -1}), 65 | }, 66 | }, 67 | args: args{ 68 | word: "apple", 69 | k: 1, 70 | }, 71 | expect: Neighbors{ 72 | { 73 | Word: "banana", 74 | Rank: 1, 75 | Similarity: 1., 76 | }, 77 | }, 78 | }, 79 | } 80 | 81 | for _, tc := range testCases { 82 | t.Run(tc.name, func(t *testing.T) { 83 | s, _ := New(tc.items...) 84 | neighbors, _ := s.SearchInternal(tc.args.word, tc.args.k) 85 | assert.Truef(t, reflect.DeepEqual(neighbors, tc.expect), "Must be equal %v and %v", neighbors, tc.expect) 86 | }) 87 | } 88 | } 89 | 90 | func TestSearchVector(t *testing.T) { 91 | type args struct { 92 | query []float64 93 | k int 94 | } 95 | 96 | testCases := []struct { 97 | name string 98 | items embedding.Embeddings 99 | args args 100 | expect Neighbors 101 | }{ 102 | { 103 | name: "search vector", 104 | items: embedding.Embeddings{ 105 | { 106 | Word: "apple", 107 | Dim: 5, 108 | Vector: []float64{1, 1, 1, 1, 1}, 109 | Norm: embutil.Norm([]float64{1, 1, 1, 1, 1}), 110 | }, 111 | { 112 | Word: "banana", 113 | Dim: 5, 114 | Vector: []float64{1, 1, 1, 1, 1}, 115 | Norm: embutil.Norm([]float64{1, 1, 1, 1, 1}), 116 | }, 117 | { 118 | Word: "chocolate", 119 | Dim: 5, 120 | Vector: []float64{0, 0, 0, 0, 0}, 121 | Norm: embutil.Norm([]float64{0, 0, 0, 0, 0}), 122 | }, 123 | { 124 | Word: "dragon", 125 | Dim: 5, 126 | Vector: []float64{-1, -1, -1, -1, -1}, 127 | Norm: embutil.Norm([]float64{-1, -1, -1, -1, -1}), 128 | }, 129 | }, 130 | args: args{ 131 | query: []float64{-1, -1, -1, -1, -1}, 132 | k: 1, 133 | }, 134 | expect: Neighbors{ 135 | { 136 | Word: "dragon", 137 | Rank: 1, 138 | Similarity: 1., 139 | }, 140 | }, 141 | }, 142 | } 143 | 144 | for _, tc := range testCases { 145 | t.Run(tc.name, func(t *testing.T) { 146 | s, _ := New(tc.items...) 147 | neighbors, _ := s.SearchVector(tc.args.query, tc.args.k) 148 | assert.Truef(t, reflect.DeepEqual(tc.expect, neighbors), "Must be equal %v and %v", tc.expect, neighbors) 149 | }) 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /pkg/search/console/console.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package console 16 | 17 | import ( 18 | "fmt" 19 | "go/ast" 20 | "go/parser" 21 | "go/token" 22 | 23 | "github.com/peterh/liner" 24 | "github.com/pkg/errors" 25 | "github.com/ynqa/wego/pkg/embedding" 26 | "github.com/ynqa/wego/pkg/embedding/embutil" 27 | "github.com/ynqa/wego/pkg/search" 28 | ) 29 | 30 | type searchparams struct { 31 | dim int 32 | k int 33 | } 34 | 35 | type searchcursor struct { 36 | w1, w2 string 37 | vector []float64 38 | } 39 | 40 | type Console struct { 41 | *liner.State 42 | searcher *search.Searcher 43 | cursor *searchcursor 44 | params *searchparams 45 | } 46 | 47 | func New(searcher *search.Searcher, k int) (*Console, error) { 48 | if searcher.Items.Empty() { 49 | return nil, errors.New("Number of items for searcher must be over 0") 50 | } 51 | return &Console{ 52 | State: liner.NewLiner(), 53 | searcher: searcher, 54 | cursor: &searchcursor{ 55 | vector: make([]float64, searcher.Items[0].Dim), 56 | }, 57 | params: &searchparams{ 58 | dim: searcher.Items[0].Dim, 59 | k: k, 60 | }, 61 | }, nil 62 | } 63 | 64 | func (c *Console) Run() error { 65 | defer c.Close() 66 | for { 67 | l, err := c.Prompt(">> ") 68 | if err != nil { 69 | fmt.Println("error: ", err) 70 | } 71 | switch l { 72 | case "exit": 73 | return nil 74 | case "": 75 | continue 76 | default: 77 | if err := c.eval(l); err != nil { 78 | fmt.Println(err) 79 | } 80 | } 81 | } 82 | } 83 | 84 | func (c *Console) eval(l string) error { 85 | defer func() { 86 | c.cursor.w1 = "" 87 | c.cursor.w2 = "" 88 | c.cursor.vector = make([]float64, c.params.dim) 89 | }() 90 | 91 | expr, err := parser.ParseExpr(l) 92 | if err != nil { 93 | return err 94 | } 95 | 96 | var neighbors search.Neighbors 97 | switch e := expr.(type) { 98 | case *ast.Ident: 99 | neighbors, err = c.searcher.SearchInternal(e.String(), c.params.k) 100 | if err != nil { 101 | fmt.Printf("failed to search with word=%s\n", e.String()) 102 | } 103 | case *ast.BinaryExpr: 104 | if err := c.evalExpr(expr); err != nil { 105 | return err 106 | } 107 | neighbors, err = c.searcher.Search(embedding.Embedding{ 108 | Vector: c.cursor.vector, 109 | Norm: embutil.Norm(c.cursor.vector), 110 | }, c.params.k, c.cursor.w1, c.cursor.w2) 111 | if err != nil { 112 | fmt.Printf("failed to search with vector=%v\n", c.cursor.vector) 113 | } 114 | default: 115 | return errors.Errorf("invalid type %v", e) 116 | } 117 | neighbors.Describe() 118 | return nil 119 | } 120 | 121 | func (c *Console) evalExpr(expr ast.Expr) error { 122 | switch e := expr.(type) { 123 | case *ast.BinaryExpr: 124 | return c.evalBinaryExpr(e) 125 | case *ast.Ident: 126 | return nil 127 | default: 128 | return errors.Errorf("invalid type %v", e) 129 | } 130 | } 131 | 132 | func (c *Console) evalBinaryExpr(expr *ast.BinaryExpr) error { 133 | xi, err := c.evalAsEmbedding(expr.X) 134 | if err != nil { 135 | return err 136 | } 137 | yi, err := c.evalAsEmbedding(expr.Y) 138 | if err != nil { 139 | return nil 140 | } 141 | c.cursor.w1 = xi.Word 142 | c.cursor.w2 = yi.Word 143 | c.cursor.vector, err = arithmetic(xi.Vector, expr.Op, yi.Vector) 144 | return err 145 | } 146 | 147 | func (c *Console) evalAsEmbedding(expr ast.Expr) (embedding.Embedding, error) { 148 | if err := c.evalExpr(expr); err != nil { 149 | return embedding.Embedding{}, err 150 | } 151 | v, ok := expr.(*ast.Ident) 152 | if !ok { 153 | return embedding.Embedding{}, errors.Errorf("failed to parse %v", expr) 154 | } 155 | vi, ok := c.searcher.Items.Find(v.String()) 156 | if !ok { 157 | return embedding.Embedding{}, errors.Errorf("not found word=%s in vector map", v.String()) 158 | } else if err := vi.Validate(); err != nil { 159 | return embedding.Embedding{}, err 160 | } 161 | return vi, nil 162 | } 163 | 164 | func arithmetic(v1 []float64, op token.Token, v2 []float64) ([]float64, error) { 165 | switch op { 166 | case token.ADD: 167 | return add(v1, v2) 168 | case token.SUB: 169 | return sub(v1, v2) 170 | default: 171 | return nil, errors.Errorf("invalid operator %v", op.String()) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /test/e2e.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | "io/ioutil" 20 | "log" 21 | "os" 22 | 23 | "github.com/ynqa/wego/pkg/embedding" 24 | "github.com/ynqa/wego/pkg/model" 25 | "github.com/ynqa/wego/pkg/model/glove" 26 | "github.com/ynqa/wego/pkg/model/lexvec" 27 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 28 | "github.com/ynqa/wego/pkg/model/word2vec" 29 | "github.com/ynqa/wego/pkg/search" 30 | ) 31 | 32 | const ( 33 | text8 = "./test/testdata/text8" 34 | query = "microsoft" 35 | ) 36 | 37 | func unwrap(mod model.Model, err error) model.Model { 38 | return mod 39 | } 40 | 41 | func main() { 42 | testcases := []struct { 43 | title string 44 | mod model.Model 45 | }{ 46 | { 47 | title: "word2vec (model=skip-gram, optimizer=negative sampling)", 48 | mod: unwrap(word2vec.New( 49 | word2vec.BatchSize(10000), 50 | word2vec.Dim(50), 51 | word2vec.Goroutines(20), 52 | word2vec.Iter(1), 53 | word2vec.MinCount(10), 54 | word2vec.Model(word2vec.SkipGram), 55 | word2vec.Optimizer(word2vec.NegativeSampling), 56 | word2vec.Verbose(), 57 | word2vec.Window(5), 58 | )), 59 | }, 60 | { 61 | title: "word2vec (model=skip-gram, optimizer=hierarchical softmax)", 62 | mod: unwrap(word2vec.New( 63 | word2vec.BatchSize(10000), 64 | word2vec.Dim(50), 65 | word2vec.Goroutines(20), 66 | word2vec.Iter(1), 67 | word2vec.MinCount(10), 68 | word2vec.Model(word2vec.SkipGram), 69 | word2vec.Optimizer(word2vec.HierarchicalSoftmax), 70 | word2vec.Verbose(), 71 | word2vec.Window(5), 72 | )), 73 | }, 74 | { 75 | title: "word2vec (model=cbow, optimizer=negative sampling)", 76 | mod: unwrap(word2vec.New( 77 | word2vec.BatchSize(10000), 78 | word2vec.Dim(50), 79 | word2vec.Goroutines(20), 80 | word2vec.Iter(1), 81 | word2vec.MinCount(10), 82 | word2vec.Model(word2vec.Cbow), 83 | word2vec.Optimizer(word2vec.NegativeSampling), 84 | word2vec.Verbose(), 85 | word2vec.Window(5), 86 | )), 87 | }, 88 | { 89 | title: "word2vec (model=cbow, optimizer=hierarchical softmax)", 90 | mod: unwrap(word2vec.New( 91 | word2vec.BatchSize(10000), 92 | word2vec.Dim(50), 93 | word2vec.Goroutines(20), 94 | word2vec.Iter(1), 95 | word2vec.MinCount(10), 96 | word2vec.Model(word2vec.Cbow), 97 | word2vec.Optimizer(word2vec.HierarchicalSoftmax), 98 | word2vec.Verbose(), 99 | word2vec.Window(5), 100 | )), 101 | }, 102 | { 103 | title: "glove (solver=sgd)", 104 | mod: unwrap(glove.New( 105 | glove.BatchSize(10000), 106 | glove.Dim(50), 107 | glove.Goroutines(20), 108 | glove.Initlr(0.03), 109 | glove.Iter(3), 110 | glove.MinCount(20), 111 | glove.Solver(glove.Stochastic), 112 | glove.Verbose(), 113 | glove.Window(10), 114 | )), 115 | }, 116 | { 117 | title: "glove (solver=adagrad)", 118 | mod: unwrap(glove.New( 119 | glove.BatchSize(10000), 120 | glove.Dim(50), 121 | glove.Goroutines(20), 122 | glove.Initlr(0.03), 123 | glove.Iter(3), 124 | glove.MinCount(20), 125 | glove.Solver(glove.AdaGrad), 126 | glove.Verbose(), 127 | glove.Window(10), 128 | )), 129 | }, 130 | { 131 | title: "lexvec", 132 | mod: unwrap(lexvec.New( 133 | lexvec.BatchSize(10000), 134 | lexvec.Dim(50), 135 | lexvec.Goroutines(20), 136 | lexvec.Iter(1), 137 | lexvec.MinCount(10), 138 | lexvec.Relation(lexvec.PPMI), 139 | lexvec.Verbose(), 140 | lexvec.Window(10), 141 | )), 142 | }, 143 | } 144 | for _, tt := range testcases { 145 | fmt.Printf("test in %s\n", tt.title) 146 | if err := e2e(tt.mod); err != nil { 147 | log.Fatal(err) 148 | } 149 | } 150 | } 151 | 152 | func e2e(mod model.Model) error { 153 | input, err := os.Open(text8) 154 | if err != nil { 155 | return err 156 | } 157 | defer input.Close() 158 | output, err := ioutil.TempFile("", "wego") 159 | if err != nil { 160 | log.Fatal(err) 161 | } 162 | if err := mod.Train(input); err != nil { 163 | return err 164 | } 165 | if err := mod.Save(output, vector.Agg); err != nil { 166 | return err 167 | } 168 | 169 | output.Seek(0, 0) 170 | 171 | embs, err := embedding.Load(output) 172 | if err != nil { 173 | return err 174 | } 175 | searcher, err := search.New(embs...) 176 | if err != nil { 177 | return err 178 | } 179 | neighbors, err := searcher.SearchInternal(query, 10) 180 | if err != nil { 181 | return err 182 | } 183 | neighbors.Describe() 184 | 185 | return nil 186 | } 187 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Word Embeddings in Go 2 | 3 | [![Go](https://github.com/ynqa/wego/actions/workflows/go.yml/badge.svg)](https://github.com/ynqa/wego/actions/workflows/go.yml) 4 | [![GoDoc](https://godoc.org/github.com/ynqa/wego?status.svg)](https://godoc.org/github.com/ynqa/wego) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/ynqa/wego)](https://goreportcard.com/report/github.com/ynqa/wego) 6 | 7 | *wego* is the implementations **from scratch** for word embeddings (a.k.a word representation) models in Go. 8 | 9 | ## What's word embeddings? 10 | 11 | [Word embeddings](https://en.wikipedia.org/wiki/Word_embeddings) make words' meaning, structure, and concept mapping into vector space with a low dimension. For representative instance: 12 | ``` 13 | Vector("King") - Vector("Man") + Vector("Woman") = Vector("Queen") 14 | ``` 15 | Like this example, the models generate word vectors that could calculate word meaning by arithmetic operations for other vectors. 16 | 17 | ## Features 18 | 19 | The following models to capture the word vectors are supported in *wego*: 20 | 21 | - Word2Vec: Distributed Representations of Words and Phrases and their Compositionality [[pdf]](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) 22 | 23 | - GloVe: Global Vectors for Word Representation [[pdf]](http://nlp.stanford.edu/pubs/glove.pdf) 24 | 25 | - LexVec: Matrix Factorization using Window Sampling and Negative Sampling for Improved Word Representations [[pdf]](http://anthology.aclweb.org/P16-2068) 26 | 27 | Also, wego provides nearest neighbor search tools that calculate the distances between word vectors and find the nearest words for the target word. "near" for word vectors means "similar" for words. 28 | 29 | Please see the [Usage](#Usage) section if you want to know how to use these for more details. 30 | 31 | ## Why Go? 32 | 33 | Inspired by [Data Science in Go](https://speakerdeck.com/chewxy/data-science-in-go) @chewxy 34 | 35 | ## Installation 36 | 37 | Use `go` command to get this pkg. 38 | 39 | ``` 40 | $ go get -u github.com/ynqa/wego 41 | $ bin/wego -h 42 | ``` 43 | 44 | ## Usage 45 | 46 | *wego* provides CLI and Go SDK for word embeddings. 47 | 48 | ### CLI 49 | 50 | ``` 51 | Usage: 52 | wego [flags] 53 | wego [command] 54 | 55 | Available Commands: 56 | console Console to investigate word vectors 57 | glove GloVe: Global Vectors for Word Representation 58 | help Help about any command 59 | lexvec Lexvec: Matrix Factorization using Window Sampling and Negative Sampling for Improved Word Representations 60 | query Query similar words 61 | word2vec Word2Vec: Continuous Bag-of-Words and Skip-gram model 62 | ``` 63 | 64 | `word2vec`, `glove` and `lexvec` executes the workflow to generate word vectors: 65 | 1. Build a dictionary for vocabularies and count word frequencies by scanning a given corpus. 66 | 2. Start training. The execution time depends on the size of the corpus, the hyperparameters (flags), and so on. 67 | 3. Save the words and their vectors as a text file. 68 | 69 | `query` and `console` are the commands which are related to nearest neighbor searching for the trained word vectors. 70 | 71 | `query` outputs similar words against a given word using sing word vectors which are generated by the above models. 72 | 73 | e.g. `wego query -i word_vector.txt microsoft`: 74 | ``` 75 | RANK | WORD | SIMILARITY 76 | -------+-----------+------------- 77 | 1 | hypercard | 0.791492 78 | 2 | xp | 0.768939 79 | 3 | software | 0.763369 80 | 4 | freebsd | 0.761084 81 | 5 | unix | 0.749563 82 | 6 | linux | 0.747327 83 | 7 | ibm | 0.742115 84 | 8 | windows | 0.731136 85 | 9 | desktop | 0.715790 86 | 10 | linspire | 0.711171 87 | ``` 88 | 89 | *wego* does not reproduce word vectors between each trial because it adopts HogWild! algorithm which updates the parameters (in this case word vector) async. 90 | 91 | `console` is for REPL mode to calculate the basic arithmetic operations (`+` and `-`) for word vectors. 92 | 93 | ### Go SDK 94 | 95 | It can define the hyper parameters for models by functional options. 96 | 97 | ```go 98 | model, err := word2vec.New( 99 | word2vec.Window(5), 100 | word2vec.Model(word2vec.Cbow), 101 | word2vec.Optimizer(word2vec.NegativeSampling), 102 | word2vec.NegativeSampleSize(5), 103 | word2vec.Verbose(), 104 | ) 105 | ``` 106 | 107 | The models have some methods: 108 | 109 | ```go 110 | type Model interface { 111 | Train(io.ReadSeeker) error 112 | Save(io.Writer, vector.Type) error 113 | WordVector(vector.Type) *matrix.Matrix 114 | } 115 | ``` 116 | 117 | ### Formats 118 | 119 | As training word vectors wego requires the following file formats for inputs/outputs. 120 | 121 | #### Input 122 | 123 | Input corpus must be subject to the formats to be divided by space between words like [text8](http://mattmahoney.net/dc/textdata.html). 124 | 125 | ``` 126 | word1 word2 word3 ... 127 | ``` 128 | 129 | #### Output 130 | 131 | After training *wego* save the word vectors into a txt file with the following format (`N` is the dimension for word vectors you given): 132 | 133 | ``` 134 | ... 135 | ``` 136 | -------------------------------------------------------------------------------- /pkg/model/glove/glove.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package glove 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "io" 21 | "math/rand" 22 | "sync" 23 | 24 | "golang.org/x/sync/semaphore" 25 | 26 | "github.com/pkg/errors" 27 | "github.com/ynqa/wego/pkg/corpus" 28 | "github.com/ynqa/wego/pkg/corpus/fs" 29 | "github.com/ynqa/wego/pkg/corpus/memory" 30 | "github.com/ynqa/wego/pkg/model" 31 | "github.com/ynqa/wego/pkg/model/modelutil" 32 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 33 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 34 | "github.com/ynqa/wego/pkg/util/clock" 35 | "github.com/ynqa/wego/pkg/util/verbose" 36 | ) 37 | 38 | type glove struct { 39 | opts Options 40 | 41 | corpus corpus.Corpus 42 | 43 | param *matrix.Matrix 44 | solver solver 45 | 46 | verbose *verbose.Verbose 47 | } 48 | 49 | func New(opts ...ModelOption) (model.Model, error) { 50 | options := DefaultOptions() 51 | for _, fn := range opts { 52 | fn(&options) 53 | } 54 | 55 | return NewForOptions(options) 56 | } 57 | 58 | func NewForOptions(opts Options) (model.Model, error) { 59 | // TODO: validate Options 60 | v := verbose.New(opts.Verbose) 61 | return &glove{ 62 | opts: opts, 63 | 64 | verbose: v, 65 | }, nil 66 | } 67 | 68 | func (g *glove) Train(r io.ReadSeeker) error { 69 | if g.opts.DocInMemory { 70 | g.corpus = memory.New(r, g.opts.ToLower, g.opts.MaxCount, g.opts.MinCount) 71 | } else { 72 | g.corpus = fs.New(r, g.opts.ToLower, g.opts.MaxCount, g.opts.MinCount) 73 | } 74 | 75 | if err := g.corpus.Load( 76 | &corpus.WithCooccurrence{ 77 | CountType: g.opts.CountType, 78 | Window: g.opts.Window, 79 | }, 80 | g.verbose, g.opts.LogBatch, 81 | ); err != nil { 82 | return err 83 | } 84 | 85 | dic, dim := g.corpus.Dictionary(), g.opts.Dim 86 | 87 | dimAndBias := dim + 1 88 | g.param = matrix.New( 89 | dic.Len()*2, 90 | dimAndBias, 91 | func(_ int, vec []float64) { 92 | for i := 0; i < dim+1; i++ { 93 | vec[i] = rand.Float64() / float64(dim) 94 | } 95 | }, 96 | ) 97 | 98 | switch g.opts.SolverType { 99 | case Stochastic: 100 | g.solver = newStochastic(g.opts) 101 | case AdaGrad: 102 | g.solver = newAdaGrad(dic, g.opts) 103 | default: 104 | return errors.Errorf("invalid solver: %s not in %s|%s", g.opts.SolverType, Stochastic, AdaGrad) 105 | } 106 | 107 | return g.train() 108 | } 109 | 110 | func (g *glove) train() error { 111 | items := g.makeItems(g.corpus.Cooccurrence()) 112 | itemSize := len(items) 113 | indexPerThread := modelutil.IndexPerThread( 114 | g.opts.Goroutines, 115 | itemSize, 116 | ) 117 | 118 | for i := 0; i < g.opts.Iter; i++ { 119 | trained, clk := make(chan struct{}), clock.New() 120 | go g.observe(trained, clk) 121 | 122 | sem := semaphore.NewWeighted(int64(g.opts.Goroutines)) 123 | wg := &sync.WaitGroup{} 124 | 125 | for i := 0; i < g.opts.Goroutines; i++ { 126 | wg.Add(1) 127 | s, e := indexPerThread[i], indexPerThread[i+1] 128 | go g.trainPerThread(items[s:e], trained, sem, wg) 129 | } 130 | 131 | wg.Wait() 132 | close(trained) 133 | } 134 | return nil 135 | } 136 | 137 | func (g *glove) trainPerThread( 138 | items []item, 139 | trained chan struct{}, 140 | sem *semaphore.Weighted, 141 | wg *sync.WaitGroup, 142 | ) error { 143 | defer func() { 144 | wg.Done() 145 | sem.Release(1) 146 | }() 147 | 148 | if err := sem.Acquire(context.Background(), 1); err != nil { 149 | return err 150 | } 151 | 152 | dic := g.corpus.Dictionary() 153 | for _, item := range items { 154 | g.solver.trainOne(item.l1, item.l2+dic.Len(), g.param, item.f, item.coef) 155 | g.solver.trainOne(item.l1+dic.Len(), item.l2, g.param, item.f, item.coef) 156 | trained <- struct{}{} 157 | } 158 | 159 | return nil 160 | } 161 | 162 | func (g *glove) observe(trained chan struct{}, clk *clock.Clock) { 163 | var cnt int 164 | for range trained { 165 | g.verbose.Do(func() { 166 | cnt++ 167 | if cnt%g.opts.LogBatch == 0 { 168 | fmt.Printf("trained %d items %v\r", cnt, clk.AllElapsed()) 169 | } 170 | }) 171 | } 172 | g.verbose.Do(func() { 173 | fmt.Printf("trained %d items %v\r\n", cnt, clk.AllElapsed()) 174 | }) 175 | } 176 | 177 | func (g *glove) Save(f io.Writer, typ vector.Type) error { 178 | return vector.Save(f, g.corpus.Dictionary(), g.WordVector(typ), g.verbose, g.opts.LogBatch) 179 | } 180 | 181 | func (g *glove) WordVector(typ vector.Type) *matrix.Matrix { 182 | var mat *matrix.Matrix 183 | dic := g.corpus.Dictionary() 184 | if typ == vector.Agg { 185 | mat = matrix.New(dic.Len(), g.opts.Dim, 186 | func(row int, vec []float64) { 187 | for i := 0; i < g.opts.Dim; i++ { 188 | vec[i] = g.param.Slice(row)[i] 189 | } 190 | }, 191 | ) 192 | } else { 193 | mat = matrix.New(dic.Len(), g.opts.Dim, 194 | func(row int, vec []float64) { 195 | for i := 0; i < g.opts.Dim; i++ { 196 | vec[i] = g.param.Slice(row)[i] + g.param.Slice(row + dic.Len())[i] 197 | } 198 | }, 199 | ) 200 | } 201 | return mat 202 | } 203 | -------------------------------------------------------------------------------- /pkg/model/glove/options.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package glove 15 | 16 | import ( 17 | "fmt" 18 | "runtime" 19 | 20 | "github.com/spf13/cobra" 21 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 22 | ) 23 | 24 | type SolverType = string 25 | 26 | const ( 27 | Stochastic SolverType = "sgd" 28 | AdaGrad SolverType = "adagrad" 29 | ) 30 | 31 | var ( 32 | defaultAlpha = 0.75 33 | defaultBatchSize = 10000 34 | defaultCountType = co.Increment 35 | defaultDim = 10 36 | defaultDocInMemory = false 37 | defaultGoroutines = runtime.NumCPU() 38 | defaultInitlr = 0.025 39 | defaultIter = 15 40 | defaultLogBatch = 100000 41 | defaultMaxCount = -1 42 | defaultMinCount = 5 43 | defaultSolverType = Stochastic 44 | defaultSubsampleThreshold = 1.0e-3 45 | defaultToLower = false 46 | defaultVerbose = false 47 | defaultWindow = 5 48 | defaultXmax = 100 49 | ) 50 | 51 | type Options struct { 52 | Alpha float64 53 | BatchSize int 54 | CountType co.CountType 55 | Dim int 56 | DocInMemory bool 57 | Goroutines int 58 | Initlr float64 59 | Iter int 60 | LogBatch int 61 | MaxCount int 62 | MinCount int 63 | SolverType SolverType 64 | SubsampleThreshold float64 65 | ToLower bool 66 | Verbose bool 67 | Window int 68 | Xmax int 69 | } 70 | 71 | func DefaultOptions() Options { 72 | return Options{ 73 | Alpha: defaultAlpha, 74 | BatchSize: defaultBatchSize, 75 | CountType: defaultCountType, 76 | Dim: defaultDim, 77 | DocInMemory: defaultDocInMemory, 78 | Goroutines: defaultGoroutines, 79 | Initlr: defaultInitlr, 80 | Iter: defaultIter, 81 | LogBatch: defaultLogBatch, 82 | MaxCount: defaultMaxCount, 83 | MinCount: defaultMinCount, 84 | SolverType: defaultSolverType, 85 | SubsampleThreshold: defaultSubsampleThreshold, 86 | ToLower: defaultToLower, 87 | Verbose: defaultVerbose, 88 | Window: defaultWindow, 89 | Xmax: defaultXmax, 90 | } 91 | } 92 | 93 | func LoadForCmd(cmd *cobra.Command, opts *Options) { 94 | cmd.Flags().Float64Var(&opts.Alpha, "alpha", defaultAlpha, "exponent of weighting function") 95 | cmd.Flags().IntVar(&opts.BatchSize, "batch", defaultBatchSize, "batch size to train") 96 | cmd.Flags().StringVar(&opts.CountType, "cnt", defaultCountType, fmt.Sprintf("count type for co-occurrence words. One of %s|%s", co.Increment, co.Proximity)) 97 | cmd.Flags().IntVarP(&opts.Dim, "dim", "d", defaultDim, "dimension for word vector") 98 | cmd.Flags().IntVar(&opts.Goroutines, "goroutines", defaultGoroutines, "number of goroutine") 99 | cmd.Flags().BoolVar(&opts.DocInMemory, "in-memory", defaultDocInMemory, "whether to store the doc in memory") 100 | cmd.Flags().Float64Var(&opts.Initlr, "initlr", defaultInitlr, "initial learning rate") 101 | cmd.Flags().IntVar(&opts.Iter, "iter", defaultIter, "number of iteration") 102 | cmd.Flags().IntVar(&opts.LogBatch, "log-batch", defaultLogBatch, "batch size to log for counting words") 103 | cmd.Flags().IntVar(&opts.MaxCount, "max-count", defaultMaxCount, "upper limit to filter words") 104 | cmd.Flags().IntVar(&opts.MinCount, "min-count", defaultMinCount, "lower limit to filter words") 105 | cmd.Flags().StringVar(&opts.SolverType, "solver", defaultSolverType, fmt.Sprintf("solver for GloVe objective. One of: %s|%s", Stochastic, AdaGrad)) 106 | cmd.Flags().Float64Var(&opts.SubsampleThreshold, "threshold", defaultSubsampleThreshold, "threshold for subsampling") 107 | cmd.Flags().BoolVar(&opts.ToLower, "to-lower", defaultToLower, "whether the words on corpus convert to lowercase or not") 108 | cmd.Flags().BoolVar(&opts.Verbose, "verbose", defaultVerbose, "verbose mode") 109 | cmd.Flags().IntVarP(&opts.Window, "window", "w", defaultWindow, "context window size") 110 | cmd.Flags().IntVar(&opts.Xmax, "xmax", defaultXmax, "specifying cutoff in weighting function") 111 | } 112 | 113 | type ModelOption func(*Options) 114 | 115 | func Alpha(v float64) ModelOption { 116 | return ModelOption(func(opts *Options) { 117 | opts.Alpha = v 118 | }) 119 | } 120 | 121 | func BatchSize(v int) ModelOption { 122 | return ModelOption(func(opts *Options) { 123 | opts.BatchSize = v 124 | }) 125 | } 126 | 127 | func DocInMemory() ModelOption { 128 | return ModelOption(func(opts *Options) { 129 | opts.DocInMemory = true 130 | }) 131 | } 132 | 133 | func Goroutines(v int) ModelOption { 134 | return ModelOption(func(opts *Options) { 135 | opts.Goroutines = v 136 | }) 137 | } 138 | 139 | func Dim(v int) ModelOption { 140 | return ModelOption(func(opts *Options) { 141 | opts.Dim = v 142 | }) 143 | } 144 | 145 | func Initlr(v float64) ModelOption { 146 | return ModelOption(func(opts *Options) { 147 | opts.Initlr = v 148 | }) 149 | } 150 | 151 | func Iter(v int) ModelOption { 152 | return ModelOption(func(opts *Options) { 153 | opts.Iter = v 154 | }) 155 | } 156 | 157 | func MaxCount(v int) ModelOption { 158 | return ModelOption(func(opts *Options) { 159 | opts.MaxCount = v 160 | }) 161 | } 162 | 163 | func MinCount(v int) ModelOption { 164 | return ModelOption(func(opts *Options) { 165 | opts.MinCount = v 166 | }) 167 | } 168 | 169 | func Solver(typ SolverType) ModelOption { 170 | return ModelOption(func(opts *Options) { 171 | opts.SolverType = typ 172 | }) 173 | } 174 | 175 | func SubsampleThreshold(v float64) ModelOption { 176 | return ModelOption(func(opts *Options) { 177 | opts.SubsampleThreshold = v 178 | }) 179 | } 180 | 181 | func ToLower() ModelOption { 182 | return ModelOption(func(opts *Options) { 183 | opts.ToLower = true 184 | }) 185 | } 186 | 187 | func Verbose() ModelOption { 188 | return ModelOption(func(opts *Options) { 189 | opts.Verbose = true 190 | }) 191 | } 192 | 193 | func Window(v int) ModelOption { 194 | return ModelOption(func(opts *Options) { 195 | opts.Window = v 196 | }) 197 | } 198 | 199 | func Xmax(v int) ModelOption { 200 | return ModelOption(func(opts *Options) { 201 | opts.Xmax = v 202 | }) 203 | } 204 | -------------------------------------------------------------------------------- /pkg/model/word2vec/word2vec.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "io" 21 | "math/rand" 22 | "sync" 23 | 24 | "golang.org/x/sync/semaphore" 25 | 26 | "github.com/pkg/errors" 27 | "github.com/ynqa/wego/pkg/corpus" 28 | "github.com/ynqa/wego/pkg/corpus/fs" 29 | "github.com/ynqa/wego/pkg/corpus/memory" 30 | "github.com/ynqa/wego/pkg/model" 31 | "github.com/ynqa/wego/pkg/model/modelutil" 32 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 33 | "github.com/ynqa/wego/pkg/model/modelutil/subsample" 34 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 35 | "github.com/ynqa/wego/pkg/util/clock" 36 | "github.com/ynqa/wego/pkg/util/verbose" 37 | ) 38 | 39 | type word2vec struct { 40 | opts Options 41 | 42 | corpus corpus.Corpus 43 | 44 | param *matrix.Matrix 45 | subsampler *subsample.Subsampler 46 | currentlr float64 47 | mod mod 48 | optimizer optimizer 49 | 50 | verbose *verbose.Verbose 51 | } 52 | 53 | func New(opts ...ModelOption) (model.Model, error) { 54 | options := DefaultOptions() 55 | for _, fn := range opts { 56 | fn(&options) 57 | } 58 | 59 | return NewForOptions(options) 60 | } 61 | 62 | func NewForOptions(opts Options) (model.Model, error) { 63 | // TODO: validate Options 64 | v := verbose.New(opts.Verbose) 65 | return &word2vec{ 66 | opts: opts, 67 | 68 | currentlr: opts.Initlr, 69 | 70 | verbose: v, 71 | }, nil 72 | } 73 | 74 | func (w *word2vec) Train(r io.ReadSeeker) error { 75 | if w.opts.DocInMemory { 76 | w.corpus = memory.New(r, w.opts.ToLower, w.opts.MaxCount, w.opts.MinCount) 77 | } else { 78 | w.corpus = fs.New(r, w.opts.ToLower, w.opts.MaxCount, w.opts.MinCount) 79 | } 80 | 81 | if err := w.corpus.Load(nil, w.verbose, w.opts.LogBatch); err != nil { 82 | return err 83 | } 84 | 85 | dic, dim := w.corpus.Dictionary(), w.opts.Dim 86 | 87 | w.param = matrix.New( 88 | dic.Len(), 89 | dim, 90 | func(_ int, vec []float64) { 91 | for i := 0; i < dim; i++ { 92 | vec[i] = (rand.Float64() - 0.5) / float64(dim) 93 | } 94 | }, 95 | ) 96 | 97 | w.subsampler = subsample.New(dic, w.opts.SubsampleThreshold) 98 | 99 | switch w.opts.ModelType { 100 | case SkipGram: 101 | w.mod = newSkipGram(w.opts) 102 | case Cbow: 103 | w.mod = newCbow(w.opts) 104 | default: 105 | return errors.Errorf("invalid model: %s not in %s|%s", w.opts.ModelType, Cbow, SkipGram) 106 | } 107 | 108 | switch w.opts.OptimizerType { 109 | case NegativeSampling: 110 | w.optimizer = newNegativeSampling( 111 | w.corpus.Dictionary(), 112 | w.opts, 113 | ) 114 | case HierarchicalSoftmax: 115 | w.optimizer = newHierarchicalSoftmax( 116 | w.corpus.Dictionary(), 117 | w.opts, 118 | ) 119 | default: 120 | return errors.Errorf("invalid optimizer: %s not in %s|%s", w.opts.OptimizerType, NegativeSampling, HierarchicalSoftmax) 121 | } 122 | 123 | if w.opts.DocInMemory { 124 | if err := w.train(); err != nil { 125 | return err 126 | } 127 | } else { 128 | if err := w.batchTrain(); err != nil { 129 | return err 130 | } 131 | } 132 | return nil 133 | } 134 | 135 | func (w *word2vec) train() error { 136 | doc := w.corpus.IndexedDoc() 137 | indexPerThread := modelutil.IndexPerThread( 138 | w.opts.Goroutines, 139 | len(doc), 140 | ) 141 | 142 | for i := 1; i <= w.opts.Iter; i++ { 143 | trained, clk := make(chan struct{}), clock.New() 144 | go w.observe(trained, clk) 145 | 146 | sem := semaphore.NewWeighted(int64(w.opts.Goroutines)) 147 | wg := &sync.WaitGroup{} 148 | 149 | for i := 0; i < w.opts.Goroutines; i++ { 150 | wg.Add(1) 151 | s, e := indexPerThread[i], indexPerThread[i+1] 152 | go w.trainPerThread(doc[s:e], trained, sem, wg) 153 | } 154 | 155 | wg.Wait() 156 | close(trained) 157 | } 158 | return nil 159 | } 160 | 161 | func (w *word2vec) batchTrain() error { 162 | for i := 1; i <= w.opts.Iter; i++ { 163 | trained, clk := make(chan struct{}), clock.New() 164 | go w.observe(trained, clk) 165 | 166 | sem := semaphore.NewWeighted(int64(w.opts.Goroutines)) 167 | wg := &sync.WaitGroup{} 168 | 169 | in := make(chan []int, w.opts.Goroutines) 170 | go w.corpus.BatchWords(in, w.opts.BatchSize) 171 | for doc := range in { 172 | wg.Add(1) 173 | go w.trainPerThread(doc, trained, sem, wg) 174 | } 175 | 176 | wg.Wait() 177 | close(trained) 178 | } 179 | return nil 180 | } 181 | 182 | func (w *word2vec) trainPerThread( 183 | doc []int, 184 | trained chan struct{}, 185 | sem *semaphore.Weighted, 186 | wg *sync.WaitGroup, 187 | ) error { 188 | defer func() { 189 | wg.Done() 190 | sem.Release(1) 191 | }() 192 | 193 | if err := sem.Acquire(context.Background(), 1); err != nil { 194 | return err 195 | } 196 | 197 | for pos, id := range doc { 198 | if w.subsampler.Trial(id) { 199 | w.mod.trainOne(doc, pos, w.currentlr, w.param, w.optimizer) 200 | } 201 | trained <- struct{}{} 202 | } 203 | 204 | return nil 205 | } 206 | 207 | func (w *word2vec) observe(trained chan struct{}, clk *clock.Clock) { 208 | var cnt int 209 | for range trained { 210 | cnt++ 211 | if cnt%w.opts.UpdateLRBatch == 0 { 212 | if w.currentlr < w.opts.MinLR { 213 | w.currentlr = w.opts.MinLR 214 | } else { 215 | w.currentlr = w.opts.Initlr * (1.0 - float64(cnt)/float64(w.corpus.Len())) 216 | } 217 | } 218 | w.verbose.Do(func() { 219 | if cnt%w.opts.LogBatch == 0 { 220 | fmt.Printf("trained %d words %v\r", cnt, clk.AllElapsed()) 221 | } 222 | }) 223 | } 224 | w.verbose.Do(func() { 225 | fmt.Printf("trained %d words %v\r\n", cnt, clk.AllElapsed()) 226 | }) 227 | } 228 | 229 | func (w *word2vec) Save(f io.Writer, typ vector.Type) error { 230 | return vector.Save(f, w.corpus.Dictionary(), w.WordVector(typ), w.verbose, w.opts.LogBatch) 231 | } 232 | 233 | func (w *word2vec) WordVector(typ vector.Type) *matrix.Matrix { 234 | var mat *matrix.Matrix 235 | dic := w.corpus.Dictionary() 236 | ng, ok := w.optimizer.(*negativeSampling) 237 | if typ == vector.Agg && ok { 238 | mat = matrix.New(dic.Len(), w.opts.Dim, 239 | func(row int, vec []float64) { 240 | for i := 0; i < w.opts.Dim; i++ { 241 | vec[i] = w.param.Slice(row)[i] + ng.ctx.Slice(row)[i] 242 | } 243 | }, 244 | ) 245 | } else { 246 | mat = matrix.New(dic.Len(), w.opts.Dim, 247 | func(row int, vec []float64) { 248 | for i := 0; i < w.opts.Dim; i++ { 249 | vec[i] = w.param.Slice(row)[i] 250 | } 251 | }, 252 | ) 253 | } 254 | return mat 255 | } 256 | -------------------------------------------------------------------------------- /pkg/model/lexvec/options.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package lexvec 16 | 17 | import ( 18 | "fmt" 19 | "runtime" 20 | 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | type RelationType = string 25 | 26 | const ( 27 | PPMI RelationType = "ppmi" 28 | PMI RelationType = "pmi" 29 | Collocation RelationType = "co" 30 | LogCollocation RelationType = "logco" 31 | ) 32 | 33 | var ( 34 | defaultBatchSize = 10000 35 | defaultDim = 10 36 | defaultDocInMemory = false 37 | defaultGoroutines = runtime.NumCPU() 38 | defaultInitlr = 0.025 39 | defaultIter = 15 40 | defaultLogBatch = 100000 41 | defaultMaxCount = -1 42 | defaultMinCount = 5 43 | defaultMinLR = defaultInitlr * 1.0e-4 44 | defaultNegativeSampleSize = 5 45 | defaultRelationType = PPMI 46 | defaultSmooth = 0.75 47 | defaultSubsampleThreshold = 1.0e-3 48 | defaultToLower = false 49 | defaultUpdateLRBatch = 100000 50 | defaultVerbose = false 51 | defaultWindow = 5 52 | ) 53 | 54 | type Options struct { 55 | BatchSize int 56 | Dim int 57 | DocInMemory bool 58 | Goroutines int 59 | Initlr float64 60 | Iter int 61 | LogBatch int 62 | MaxCount int 63 | MinCount int 64 | MinLR float64 65 | NegativeSampleSize int 66 | RelationType RelationType 67 | Smooth float64 68 | SubsampleThreshold float64 69 | ToLower bool 70 | UpdateLRBatch int 71 | Verbose bool 72 | Window int 73 | } 74 | 75 | func DefaultOptions() Options { 76 | return Options{ 77 | BatchSize: defaultBatchSize, 78 | Dim: defaultDim, 79 | DocInMemory: defaultDocInMemory, 80 | Goroutines: defaultGoroutines, 81 | Initlr: defaultInitlr, 82 | Iter: defaultIter, 83 | LogBatch: defaultLogBatch, 84 | MaxCount: defaultMaxCount, 85 | MinCount: defaultMinCount, 86 | MinLR: defaultMinLR, 87 | NegativeSampleSize: defaultNegativeSampleSize, 88 | RelationType: defaultRelationType, 89 | Smooth: defaultSmooth, 90 | SubsampleThreshold: defaultSubsampleThreshold, 91 | ToLower: defaultToLower, 92 | UpdateLRBatch: defaultUpdateLRBatch, 93 | Verbose: defaultVerbose, 94 | Window: defaultWindow, 95 | } 96 | } 97 | func LoadForCmd(cmd *cobra.Command, opts *Options) { 98 | cmd.Flags().IntVar(&opts.BatchSize, "batch", defaultBatchSize, "batch size to train") 99 | cmd.Flags().IntVarP(&opts.Dim, "dim", "d", defaultDim, "dimension for word vector") 100 | cmd.Flags().IntVar(&opts.Goroutines, "goroutines", defaultGoroutines, "number of goroutine") 101 | cmd.Flags().BoolVar(&opts.DocInMemory, "in-memory", defaultDocInMemory, "whether to store the doc in memory") 102 | cmd.Flags().Float64Var(&opts.Initlr, "initlr", defaultInitlr, "initial learning rate") 103 | cmd.Flags().IntVar(&opts.Iter, "iter", defaultIter, "number of iteration") 104 | cmd.Flags().IntVar(&opts.LogBatch, "log-batch", defaultLogBatch, "batch size to log for counting words") 105 | cmd.Flags().IntVar(&opts.MaxCount, "max-count", defaultMaxCount, "upper limit to filter words") 106 | cmd.Flags().IntVar(&opts.MinCount, "min-count", defaultMinCount, "lower limit to filter words") 107 | cmd.Flags().Float64Var(&opts.MinLR, "min-lr", defaultMinLR, "lower limit of learning rate") 108 | cmd.Flags().IntVar(&opts.NegativeSampleSize, "sample", defaultNegativeSampleSize, "negative sample size") 109 | cmd.Flags().StringVar(&opts.RelationType, "rel", defaultRelationType, fmt.Sprintf("relation type for co-occurrence words. One of %s|%s|%s|%s", PPMI, PMI, Collocation, LogCollocation)) 110 | cmd.Flags().Float64Var(&opts.Smooth, "smooth", defaultSmooth, "smoothing value for co-occurence value") 111 | cmd.Flags().Float64Var(&opts.SubsampleThreshold, "threshold", defaultSubsampleThreshold, "threshold for subsampling") 112 | cmd.Flags().BoolVar(&opts.ToLower, "to-lower", defaultToLower, "whether the words on corpus convert to lowercase or not") 113 | cmd.Flags().IntVar(&opts.UpdateLRBatch, "update-lr-batch", defaultUpdateLRBatch, "batch size to update learning rate") 114 | cmd.Flags().BoolVar(&opts.Verbose, "verbose", defaultVerbose, "verbose mode") 115 | cmd.Flags().IntVarP(&opts.Window, "window", "w", defaultWindow, "context window size") 116 | 117 | } 118 | 119 | type ModelOption func(*Options) 120 | 121 | func BatchSize(v int) ModelOption { 122 | return ModelOption(func(opts *Options) { 123 | opts.BatchSize = v 124 | }) 125 | } 126 | 127 | func DocInMemory() ModelOption { 128 | return ModelOption(func(opts *Options) { 129 | opts.DocInMemory = true 130 | }) 131 | } 132 | 133 | func Goroutines(v int) ModelOption { 134 | return ModelOption(func(opts *Options) { 135 | opts.Goroutines = v 136 | }) 137 | } 138 | 139 | func Dim(v int) ModelOption { 140 | return ModelOption(func(opts *Options) { 141 | opts.Dim = v 142 | }) 143 | } 144 | 145 | func Initlr(v float64) ModelOption { 146 | return ModelOption(func(opts *Options) { 147 | opts.Initlr = v 148 | }) 149 | } 150 | 151 | func Iter(v int) ModelOption { 152 | return ModelOption(func(opts *Options) { 153 | opts.Iter = v 154 | }) 155 | } 156 | 157 | func LogBatch(v int) ModelOption { 158 | return ModelOption(func(opts *Options) { 159 | opts.LogBatch = v 160 | }) 161 | } 162 | 163 | func MaxCount(v int) ModelOption { 164 | return ModelOption(func(opts *Options) { 165 | opts.MaxCount = v 166 | }) 167 | } 168 | 169 | func MinCount(v int) ModelOption { 170 | return ModelOption(func(opts *Options) { 171 | opts.MinCount = v 172 | }) 173 | } 174 | 175 | func MinLR(v float64) ModelOption { 176 | return ModelOption(func(opts *Options) { 177 | opts.MinLR = v 178 | }) 179 | } 180 | 181 | func NegativeSampleSize(v int) ModelOption { 182 | return ModelOption(func(opts *Options) { 183 | opts.NegativeSampleSize = v 184 | }) 185 | } 186 | 187 | func Relation(typ RelationType) ModelOption { 188 | return ModelOption(func(opts *Options) { 189 | opts.RelationType = typ 190 | }) 191 | } 192 | 193 | func Smooth(v float64) ModelOption { 194 | return ModelOption(func(opts *Options) { 195 | opts.Smooth = v 196 | }) 197 | } 198 | 199 | func SubsampleThreshold(v float64) ModelOption { 200 | return ModelOption(func(opts *Options) { 201 | opts.SubsampleThreshold = v 202 | }) 203 | } 204 | 205 | func ToLower() ModelOption { 206 | return ModelOption(func(opts *Options) { 207 | opts.ToLower = true 208 | }) 209 | } 210 | 211 | func UpdateLRBatch(v int) ModelOption { 212 | return ModelOption(func(opts *Options) { 213 | opts.UpdateLRBatch = v 214 | }) 215 | } 216 | 217 | func Verbose() ModelOption { 218 | return ModelOption(func(opts *Options) { 219 | opts.Verbose = true 220 | }) 221 | } 222 | 223 | func Window(v int) ModelOption { 224 | return ModelOption(func(opts *Options) { 225 | opts.Window = v 226 | }) 227 | } 228 | -------------------------------------------------------------------------------- /pkg/model/lexvec/lexvec.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package lexvec 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "io" 21 | "math/rand" 22 | "sync" 23 | 24 | "golang.org/x/sync/semaphore" 25 | 26 | "github.com/ynqa/wego/pkg/corpus" 27 | co "github.com/ynqa/wego/pkg/corpus/cooccurrence" 28 | "github.com/ynqa/wego/pkg/corpus/cooccurrence/encode" 29 | "github.com/ynqa/wego/pkg/corpus/fs" 30 | "github.com/ynqa/wego/pkg/corpus/memory" 31 | "github.com/ynqa/wego/pkg/model" 32 | "github.com/ynqa/wego/pkg/model/modelutil" 33 | "github.com/ynqa/wego/pkg/model/modelutil/matrix" 34 | "github.com/ynqa/wego/pkg/model/modelutil/subsample" 35 | "github.com/ynqa/wego/pkg/model/modelutil/vector" 36 | "github.com/ynqa/wego/pkg/util/clock" 37 | "github.com/ynqa/wego/pkg/util/verbose" 38 | ) 39 | 40 | type lexvec struct { 41 | opts Options 42 | 43 | corpus corpus.Corpus 44 | 45 | param *matrix.Matrix 46 | subsampler *subsample.Subsampler 47 | currentlr float64 48 | 49 | verbose *verbose.Verbose 50 | } 51 | 52 | func New(opts ...ModelOption) (model.Model, error) { 53 | options := DefaultOptions() 54 | for _, fn := range opts { 55 | fn(&options) 56 | } 57 | 58 | return NewForOptions(options) 59 | } 60 | 61 | func NewForOptions(opts Options) (model.Model, error) { 62 | // TODO: validate Options 63 | v := verbose.New(opts.Verbose) 64 | return &lexvec{ 65 | opts: opts, 66 | 67 | currentlr: opts.Initlr, 68 | 69 | verbose: v, 70 | }, nil 71 | } 72 | 73 | func (l *lexvec) Train(r io.ReadSeeker) error { 74 | if l.opts.DocInMemory { 75 | l.corpus = memory.New(r, l.opts.ToLower, l.opts.MaxCount, l.opts.MinCount) 76 | } else { 77 | l.corpus = fs.New(r, l.opts.ToLower, l.opts.MaxCount, l.opts.MinCount) 78 | } 79 | 80 | if err := l.corpus.Load( 81 | &corpus.WithCooccurrence{ 82 | CountType: co.Increment, 83 | Window: l.opts.Window, 84 | }, 85 | l.verbose, l.opts.BatchSize, 86 | ); err != nil { 87 | return err 88 | } 89 | 90 | dic, dim := l.corpus.Dictionary(), l.opts.Dim 91 | 92 | l.param = matrix.New( 93 | dic.Len()*2, 94 | dim, 95 | func(_ int, vec []float64) { 96 | for i := 0; i < dim; i++ { 97 | vec[i] = (rand.Float64() - 0.5) / float64(dim) 98 | } 99 | }, 100 | ) 101 | 102 | l.subsampler = subsample.New(dic, l.opts.SubsampleThreshold) 103 | 104 | if l.opts.DocInMemory { 105 | if err := l.train(); err != nil { 106 | return err 107 | } 108 | } else { 109 | if err := l.batchTrain(); err != nil { 110 | return err 111 | } 112 | } 113 | return nil 114 | } 115 | 116 | func (l *lexvec) train() error { 117 | items, err := l.makeItems(l.corpus.Cooccurrence()) 118 | if err != nil { 119 | return err 120 | } 121 | 122 | doc := l.corpus.IndexedDoc() 123 | indexPerThread := modelutil.IndexPerThread( 124 | l.opts.Goroutines, 125 | len(doc), 126 | ) 127 | 128 | for i := 1; i <= l.opts.Iter; i++ { 129 | trained, clk := make(chan struct{}), clock.New() 130 | go l.observe(trained, clk) 131 | 132 | sem := semaphore.NewWeighted(int64(l.opts.Goroutines)) 133 | wg := &sync.WaitGroup{} 134 | 135 | for i := 0; i < l.opts.Goroutines; i++ { 136 | wg.Add(1) 137 | s, e := indexPerThread[i], indexPerThread[i+1] 138 | go l.trainPerThread(doc[s:e], items, trained, sem, wg) 139 | } 140 | 141 | wg.Wait() 142 | close(trained) 143 | } 144 | return nil 145 | } 146 | 147 | func (l *lexvec) batchTrain() error { 148 | items, err := l.makeItems(l.corpus.Cooccurrence()) 149 | if err != nil { 150 | return err 151 | } 152 | 153 | for i := 1; i <= l.opts.Iter; i++ { 154 | trained, clk := make(chan struct{}), clock.New() 155 | go l.observe(trained, clk) 156 | 157 | sem := semaphore.NewWeighted(int64(l.opts.Goroutines)) 158 | wg := &sync.WaitGroup{} 159 | 160 | in := make(chan []int, l.opts.Goroutines) 161 | go l.corpus.BatchWords(in, l.opts.BatchSize) 162 | for doc := range in { 163 | wg.Add(1) 164 | go l.trainPerThread(doc, items, trained, sem, wg) 165 | } 166 | 167 | wg.Wait() 168 | close(trained) 169 | } 170 | return nil 171 | } 172 | 173 | func (l *lexvec) trainPerThread( 174 | doc []int, 175 | items map[uint64]float64, 176 | trained chan struct{}, 177 | sem *semaphore.Weighted, 178 | wg *sync.WaitGroup, 179 | ) error { 180 | defer func() { 181 | wg.Done() 182 | sem.Release(1) 183 | }() 184 | 185 | if err := sem.Acquire(context.Background(), 1); err != nil { 186 | return err 187 | } 188 | 189 | for pos, id := range doc { 190 | if l.subsampler.Trial(id) { 191 | l.trainOne(doc, pos, items) 192 | } 193 | trained <- struct{}{} 194 | } 195 | 196 | return nil 197 | } 198 | 199 | func (l *lexvec) trainOne(doc []int, pos int, items map[uint64]float64) { 200 | dic := l.corpus.Dictionary() 201 | del := modelutil.NextRandom(l.opts.Window) 202 | for a := del; a < l.opts.Window*2+1-del; a++ { 203 | if a == l.opts.Window { 204 | continue 205 | } 206 | c := pos - l.opts.Window + a 207 | if c < 0 || c >= len(doc) { 208 | continue 209 | } 210 | enc := encode.EncodeBigram(uint64(doc[pos]), uint64(doc[c])) 211 | l.update(doc[pos], doc[c], items[enc]) 212 | for n := 0; n < l.opts.NegativeSampleSize; n++ { 213 | sample := modelutil.NextRandom(dic.Len()) 214 | enc := encode.EncodeBigram(uint64(doc[pos]), uint64(sample)) 215 | l.update(doc[pos], sample+dic.Len(), items[enc]) 216 | } 217 | } 218 | } 219 | 220 | func (l *lexvec) update(l1, l2 int, f float64) { 221 | var diff float64 222 | for i := 0; i < l.opts.Dim; i++ { 223 | diff += l.param.Slice(l1)[i] * l.param.Slice(l2)[i] 224 | } 225 | diff = (diff - f) * l.currentlr 226 | for i := 0; i < l.opts.Dim; i++ { 227 | t1 := diff * l.param.Slice(l2)[i] 228 | t2 := diff * l.param.Slice(l1)[i] 229 | l.param.Slice(l1)[i] -= t1 230 | l.param.Slice(l2)[i] -= t2 231 | } 232 | } 233 | 234 | func (l *lexvec) observe(trained chan struct{}, clk *clock.Clock) { 235 | var cnt int 236 | for range trained { 237 | cnt++ 238 | if cnt%l.opts.UpdateLRBatch == 0 { 239 | if l.currentlr < l.opts.MinLR { 240 | l.currentlr = l.opts.MinLR 241 | } else { 242 | l.currentlr = l.opts.Initlr * (1.0 - float64(cnt)/float64(l.corpus.Len())) 243 | } 244 | } 245 | l.verbose.Do(func() { 246 | if cnt%l.opts.LogBatch == 0 { 247 | fmt.Printf("trained %d words %v\r", cnt, clk.AllElapsed()) 248 | } 249 | }) 250 | } 251 | l.verbose.Do(func() { 252 | fmt.Printf("trained %d words %v\r\n", cnt, clk.AllElapsed()) 253 | }) 254 | } 255 | 256 | func (l *lexvec) Save(f io.Writer, typ vector.Type) error { 257 | return vector.Save(f, l.corpus.Dictionary(), l.WordVector(typ), l.verbose, l.opts.LogBatch) 258 | } 259 | 260 | func (l *lexvec) WordVector(typ vector.Type) *matrix.Matrix { 261 | var mat *matrix.Matrix 262 | dic := l.corpus.Dictionary() 263 | if typ == vector.Agg { 264 | mat = matrix.New(dic.Len(), l.opts.Dim, 265 | func(row int, vec []float64) { 266 | for i := 0; i < l.opts.Dim; i++ { 267 | vec[i] = l.param.Slice(row)[i] 268 | } 269 | }, 270 | ) 271 | } else { 272 | dic := l.corpus.Dictionary() 273 | mat = matrix.New(dic.Len(), l.opts.Dim, 274 | func(row int, vec []float64) { 275 | for i := 0; i < l.opts.Dim; i++ { 276 | vec[i] = l.param.Slice(row)[i] + l.param.Slice(row + dic.Len())[i] 277 | } 278 | }, 279 | ) 280 | } 281 | return mat 282 | } 283 | -------------------------------------------------------------------------------- /pkg/model/word2vec/options.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2020 wego authors 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package word2vec 16 | 17 | import ( 18 | "fmt" 19 | "runtime" 20 | 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | type ModelType = string 25 | 26 | const ( 27 | Cbow ModelType = "cbow" 28 | SkipGram ModelType = "skipgram" 29 | ) 30 | 31 | type OptimizerType = string 32 | 33 | const ( 34 | NegativeSampling OptimizerType = "ns" 35 | HierarchicalSoftmax OptimizerType = "hs" 36 | ) 37 | 38 | var ( 39 | defaultBatchSize = 10000 40 | defaultDim = 10 41 | defaultDocInMemory = false 42 | defaultGoroutines = runtime.NumCPU() 43 | defaultInitlr = 0.025 44 | defaultIter = 15 45 | defaultLogBatch = 100000 46 | defaultMaxCount = -1 47 | defaultMaxDepth = 100 48 | defaultMinCount = 5 49 | defaultMinLR = defaultInitlr * 1.0e-4 50 | defaultModelType = Cbow 51 | defaultNegativeSampleSize = 5 52 | defaultOptimizerType = NegativeSampling 53 | defaultSubsampleThreshold = 1.0e-3 54 | defaultToLower = false 55 | defaultUpdateLRBatch = 100000 56 | defaultVerbose = false 57 | defaultWindow = 5 58 | ) 59 | 60 | type Options struct { 61 | BatchSize int 62 | Dim int 63 | DocInMemory bool 64 | Goroutines int 65 | Initlr float64 66 | Iter int 67 | LogBatch int 68 | MaxCount int 69 | MaxDepth int 70 | MinCount int 71 | MinLR float64 72 | ModelType ModelType 73 | NegativeSampleSize int 74 | OptimizerType OptimizerType 75 | SubsampleThreshold float64 76 | ToLower bool 77 | UpdateLRBatch int 78 | Verbose bool 79 | Window int 80 | } 81 | 82 | func DefaultOptions() Options { 83 | return Options{ 84 | BatchSize: defaultBatchSize, 85 | Dim: defaultDim, 86 | DocInMemory: defaultDocInMemory, 87 | Goroutines: defaultGoroutines, 88 | Initlr: defaultInitlr, 89 | Iter: defaultIter, 90 | LogBatch: defaultLogBatch, 91 | MaxCount: defaultMaxCount, 92 | MaxDepth: defaultMaxDepth, 93 | MinCount: defaultMinCount, 94 | MinLR: defaultMinLR, 95 | ModelType: defaultModelType, 96 | NegativeSampleSize: defaultNegativeSampleSize, 97 | OptimizerType: defaultOptimizerType, 98 | SubsampleThreshold: defaultSubsampleThreshold, 99 | ToLower: defaultToLower, 100 | UpdateLRBatch: defaultUpdateLRBatch, 101 | Verbose: defaultVerbose, 102 | Window: defaultWindow, 103 | } 104 | } 105 | 106 | func LoadForCmd(cmd *cobra.Command, opts *Options) { 107 | cmd.Flags().IntVar(&opts.BatchSize, "batch", defaultBatchSize, "batch size to train") 108 | cmd.Flags().IntVarP(&opts.Dim, "dim", "d", defaultDim, "dimension for word vector") 109 | cmd.Flags().IntVar(&opts.Goroutines, "goroutines", defaultGoroutines, "number of goroutine") 110 | cmd.Flags().BoolVar(&opts.DocInMemory, "in-memory", defaultDocInMemory, "whether to store the doc in memory") 111 | cmd.Flags().Float64Var(&opts.Initlr, "initlr", defaultInitlr, "initial learning rate") 112 | cmd.Flags().IntVar(&opts.Iter, "iter", defaultIter, "number of iteration") 113 | cmd.Flags().IntVar(&opts.LogBatch, "log-batch", defaultLogBatch, "batch size to log for counting words") 114 | cmd.Flags().IntVar(&opts.MaxCount, "max-count", defaultMaxCount, "upper limit to filter words") 115 | cmd.Flags().IntVar(&opts.MaxDepth, "max-depth", defaultMaxDepth, "times to track huffman tree, max-depth=0 means to track full path from root to word (for hierarchical softmax only)") 116 | cmd.Flags().IntVar(&opts.MinCount, "min-count", defaultMinCount, "lower limit to filter words") 117 | cmd.Flags().Float64Var(&opts.MinLR, "min-lr", defaultMinLR, "lower limit of learning rate") 118 | cmd.Flags().StringVar(&opts.ModelType, "model", defaultModelType, fmt.Sprintf("which model does it use? one of: %s|%s", Cbow, SkipGram)) 119 | cmd.Flags().IntVar(&opts.NegativeSampleSize, "sample", defaultNegativeSampleSize, "negative sample size(for negative sampling only)") 120 | cmd.Flags().StringVar(&opts.OptimizerType, "optimizer", defaultOptimizerType, fmt.Sprintf("which optimizer does it use? one of: %s|%s", HierarchicalSoftmax, NegativeSampling)) 121 | cmd.Flags().Float64Var(&opts.SubsampleThreshold, "threshold", defaultSubsampleThreshold, "threshold for subsampling") 122 | cmd.Flags().BoolVar(&opts.ToLower, "to-lower", defaultToLower, "whether the words on corpus convert to lowercase or not") 123 | cmd.Flags().IntVar(&opts.UpdateLRBatch, "update-lr-batch", defaultUpdateLRBatch, "batch size to update learning rate") 124 | cmd.Flags().BoolVar(&opts.Verbose, "verbose", defaultVerbose, "verbose mode") 125 | cmd.Flags().IntVarP(&opts.Window, "window", "w", defaultWindow, "context window size") 126 | } 127 | 128 | type ModelOption func(*Options) 129 | 130 | func BatchSize(v int) ModelOption { 131 | return ModelOption(func(opts *Options) { 132 | opts.BatchSize = v 133 | }) 134 | } 135 | 136 | func DocInMemory() ModelOption { 137 | return ModelOption(func(opts *Options) { 138 | opts.DocInMemory = true 139 | }) 140 | } 141 | 142 | func Goroutines(v int) ModelOption { 143 | return ModelOption(func(opts *Options) { 144 | opts.Goroutines = v 145 | }) 146 | } 147 | 148 | func Dim(v int) ModelOption { 149 | return ModelOption(func(opts *Options) { 150 | opts.Dim = v 151 | }) 152 | } 153 | 154 | func Initlr(v float64) ModelOption { 155 | return ModelOption(func(opts *Options) { 156 | opts.Initlr = v 157 | }) 158 | } 159 | 160 | func Iter(v int) ModelOption { 161 | return ModelOption(func(opts *Options) { 162 | opts.Iter = v 163 | }) 164 | } 165 | 166 | func LogBatch(v int) ModelOption { 167 | return ModelOption(func(opts *Options) { 168 | opts.LogBatch = v 169 | }) 170 | } 171 | 172 | func MaxCount(v int) ModelOption { 173 | return ModelOption(func(opts *Options) { 174 | opts.MaxCount = v 175 | }) 176 | } 177 | 178 | func MaxDepth(v int) ModelOption { 179 | return ModelOption(func(opts *Options) { 180 | opts.MaxDepth = v 181 | }) 182 | } 183 | 184 | func MinCount(v int) ModelOption { 185 | return ModelOption(func(opts *Options) { 186 | opts.MinCount = v 187 | }) 188 | } 189 | 190 | func MinLR(v float64) ModelOption { 191 | return ModelOption(func(opts *Options) { 192 | opts.MinLR = v 193 | }) 194 | } 195 | 196 | func Model(typ ModelType) ModelOption { 197 | return ModelOption(func(opts *Options) { 198 | opts.ModelType = typ 199 | }) 200 | } 201 | 202 | func NegativeSampleSize(v int) ModelOption { 203 | return ModelOption(func(opts *Options) { 204 | opts.NegativeSampleSize = v 205 | }) 206 | } 207 | 208 | func Optimizer(typ OptimizerType) ModelOption { 209 | return ModelOption(func(opts *Options) { 210 | opts.OptimizerType = typ 211 | }) 212 | } 213 | 214 | func SubsampleThreshold(v float64) ModelOption { 215 | return ModelOption(func(opts *Options) { 216 | opts.SubsampleThreshold = v 217 | }) 218 | } 219 | 220 | func ToLower() ModelOption { 221 | return ModelOption(func(opts *Options) { 222 | opts.ToLower = true 223 | }) 224 | } 225 | 226 | func UpdateLRBatch(v int) ModelOption { 227 | return ModelOption(func(opts *Options) { 228 | opts.UpdateLRBatch = v 229 | }) 230 | } 231 | 232 | func Verbose() ModelOption { 233 | return ModelOption(func(opts *Options) { 234 | opts.Verbose = true 235 | }) 236 | } 237 | 238 | func Window(v int) ModelOption { 239 | return ModelOption(func(opts *Options) { 240 | opts.Window = v 241 | }) 242 | } 243 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 3 | cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= 4 | cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= 5 | cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= 6 | cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= 7 | cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= 8 | cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= 9 | cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= 10 | cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= 11 | cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= 12 | cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= 13 | dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 14 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 15 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 16 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 17 | github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= 18 | github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= 19 | github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= 20 | github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= 21 | github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= 22 | github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= 23 | github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= 24 | github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= 25 | github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= 26 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 27 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 28 | github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= 29 | github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 30 | github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 31 | github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= 32 | github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= 33 | github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= 34 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 35 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 36 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 37 | github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= 38 | github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= 39 | github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= 40 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 41 | github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= 42 | github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= 43 | github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= 44 | github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= 45 | github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= 46 | github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= 47 | github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= 48 | github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= 49 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 50 | github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 51 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 52 | github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 53 | github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= 54 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 55 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 56 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 57 | github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 58 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 59 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 60 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 61 | github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= 62 | github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 63 | github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= 64 | github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= 65 | github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= 66 | github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= 67 | github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= 68 | github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 69 | github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= 70 | github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= 71 | github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= 72 | github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= 73 | github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= 74 | github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= 75 | github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= 76 | github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= 77 | github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= 78 | github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= 79 | github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= 80 | github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= 81 | github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= 82 | github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= 83 | github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= 84 | github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= 85 | github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 86 | github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= 87 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 88 | github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= 89 | github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= 90 | github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= 91 | github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= 92 | github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= 93 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 94 | github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= 95 | github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= 96 | github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= 97 | github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= 98 | github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= 99 | github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= 100 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 101 | github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 102 | github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= 103 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 104 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 105 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 106 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 107 | github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 108 | github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= 109 | github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= 110 | github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= 111 | github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54= 112 | github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 113 | github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= 114 | github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= 115 | github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= 116 | github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 117 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 118 | github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= 119 | github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= 120 | github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= 121 | github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 122 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 123 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 124 | github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= 125 | github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 126 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 127 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 128 | github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= 129 | github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= 130 | github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= 131 | github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= 132 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 133 | github.com/peterh/liner v1.2.0 h1:w/UPXyl5GfahFxcTOz2j9wCIHNI+pUPr2laqpojKNCg= 134 | github.com/peterh/liner v1.2.0/go.mod h1:CRroGNssyjTd/qIG2FyxByd2S8JEAZXBl4qUrZf8GS0= 135 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 136 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 137 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 138 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 139 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 140 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 141 | github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= 142 | github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= 143 | github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= 144 | github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= 145 | github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 146 | github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= 147 | github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= 148 | github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= 149 | github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= 150 | github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= 151 | github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= 152 | github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= 153 | github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 154 | github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= 155 | github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= 156 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= 157 | github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= 158 | github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= 159 | github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= 160 | github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= 161 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 162 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 163 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 164 | github.com/spf13/cobra v1.1.1 h1:KfztREH0tPxJJ+geloSLaAkaPkr4ki2Er5quFV1TDo4= 165 | github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI= 166 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 167 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 168 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 169 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 170 | github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= 171 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 172 | github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 173 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 174 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 175 | github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= 176 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 177 | github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= 178 | github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= 179 | github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= 180 | go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= 181 | go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= 182 | go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= 183 | go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= 184 | go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= 185 | go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= 186 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 187 | golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 188 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 189 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 190 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 191 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 192 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 193 | golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= 194 | golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= 195 | golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= 196 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 197 | golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 198 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 199 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 200 | golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 201 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 202 | golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 203 | golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 204 | golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 205 | golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= 206 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= 207 | golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= 208 | golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= 209 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 210 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 211 | golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 212 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 213 | golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 214 | golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 215 | golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 216 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 217 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 218 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 219 | golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 220 | golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 221 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 222 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 223 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 224 | golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 225 | golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= 226 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 227 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 228 | golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 229 | golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 230 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 231 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= 232 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 233 | golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 234 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 235 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 236 | golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 237 | golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 238 | golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 239 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 240 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 241 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 242 | golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 243 | golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 244 | golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 245 | golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 246 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 247 | golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 248 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 249 | golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 250 | golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 251 | golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 252 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 253 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 254 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 255 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 256 | golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 257 | golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 258 | golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 259 | golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 260 | golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 261 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 262 | golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 263 | golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 264 | golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 265 | golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 266 | golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 267 | golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 268 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 269 | google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= 270 | google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= 271 | google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= 272 | google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= 273 | google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= 274 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 275 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 276 | google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 277 | google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= 278 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 279 | google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 280 | google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 281 | google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 282 | google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= 283 | google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 284 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 285 | google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= 286 | google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= 287 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 288 | google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= 289 | google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= 290 | gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= 291 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 292 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 293 | gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U= 294 | gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 295 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 296 | gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= 297 | gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= 298 | gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= 299 | gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 300 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 301 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 302 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= 303 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 304 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 305 | honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 306 | honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 307 | honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= 308 | rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= 309 | --------------------------------------------------------------------------------