├── data
    └── .gitkeep
├── .gitignore
├── go.mod
├── renovate.json
├── .github
    └── workflows
    │   └── ci.yaml
├── evaluation_test.go
├── sentence.go
├── sentence_test.go
├── feature_test.go
├── decoder.go
├── decoder_test.go
├── Makefile
├── action.go
├── state_test.go
├── word.go
├── LICENSE
├── evaluation.go
├── go.sum
├── state.go
├── action_test.go
├── reader.go
├── README.md
├── perceptron_test.go
├── main.go
├── perceptron.go
└── feature.go


/data/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/.gitkeep
2 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module go-easy-first
2 | 
3 | go 1.13
4 | 
5 | require (
6 | 	github.com/olekukonko/tablewriter v0.0.5
7 | 	github.com/urfave/cli/v2 v2.3.0
8 | )
9 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |     "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 |     "extends": [
4 |         "github>syou6162/renovate-config"
5 |     ]
6 | }


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     name: Test
 8 |     runs-on: ubuntu-16.04    
 9 |     steps:
10 |     - name: Checkout code
11 |       uses: actions/checkout@master
12 |     - name: Build
13 |       env:
14 |         GOPATH: /home/runner/work/
15 |       run: make deps bindata build
16 | 


--------------------------------------------------------------------------------
/evaluation_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestDependencyAccuracy(t *testing.T) {
 8 | 	g1 := []int{1, 2, 3}
 9 | 	g2 := []int{1, 2, 3}
10 | 	g3 := []int{1, 2, 3, 4}
11 | 	g := [][]int{g1, g2, g3}
12 | 
13 | 	p1 := []int{1, 2, 30}
14 | 	p2 := []int{1, 2, 30}
15 | 	p3 := []int{1, 2, 3, 40}
16 | 	p := [][]int{p1, p2, p3}
17 | 
18 | 	if a, _ := dependencyAccuracy(g, p); a != 0.7 {
19 | 		t.Error("dependency accuracy must be 0.7")
20 | 	}
21 | }
22 | 


--------------------------------------------------------------------------------
/sentence.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | type Sentence struct {
 4 | 	words []*Word
 5 | }
 6 | 
 7 | // extract heads without root for evaluation
 8 | func (sent *Sentence) ExtractHeads() []int {
 9 | 	heads := make([]int, 0)
10 | 	for _, w := range sent.words[1:] {
11 | 		heads = append(heads, w.head)
12 | 	}
13 | 	return heads
14 | }
15 | 
16 | func (sent *Sentence) ExtractPredictedHeads() []int {
17 | 	heads := make([]int, 0)
18 | 	for _, w := range sent.words[1:] {
19 | 		heads = append(heads, w.predHead)
20 | 	}
21 | 	return heads
22 | }
23 | 


--------------------------------------------------------------------------------
/sentence_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestExtractHeads(t *testing.T) {
 9 | 	words := make([]*Word, 0)
10 | 	words = append(words,
11 | 		makeRootWord(),
12 | 		makeWord("ms.", "NNP", 1, 2),
13 | 		makeWord("hang", "NNP", 2, 3),
14 | 		makeWord("plays", "VBZ", 3, 0),
15 | 		makeWord("elianti", "NNP", 4, 3),
16 | 		makeWord(".", ".", 5, 3),
17 | 	)
18 | 	sent := Sentence{words: words}
19 | 	head := sent.ExtractHeads()
20 | 
21 | 	if !reflect.DeepEqual(head, []int{2, 3, 0, 3, 3}) {
22 | 		t.Error("head extraction seems wrong")
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/feature_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestAddUnigramFeatures(t *testing.T) {
 8 | 	words := make([]*Word, 0)
 9 | 	words = append(words,
10 | 		makeRootWord(),
11 | 		makeWord("ms.", "NNP", 1, 2),
12 | 		makeWord("hang", "NNP", 2, 3),
13 | 		makeWord("plays", "VBZ", 3, 0),
14 | 		makeWord("elianti", "NNP", 4, 3),
15 | 		makeWord(".", ".", 5, 3),
16 | 	)
17 | 	s := NewState(words)
18 | 	features := make([]int, 0)
19 | 	AddUnigramFeatures(&features, s, "left", 1)
20 | 
21 | 	if len(features) == 0 {
22 | 		t.Error("length of features must be greater than 0")
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/decoder.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | func isFinished(state *State) bool {
 4 | 	return len(state.pending) == 1
 5 | }
 6 | 
 7 | func decode(weight *[]float64, state *State) {
 8 | 	if isFinished(state) {
 9 | 		// Do nothing
10 | 	} else {
11 | 		pair := BestActionIndexPair(weight, state)
12 | 		pair.action(state, pair.index)
13 | 		state.ResetFvCache(pair.index)
14 | 		decode(weight, state)
15 | 	}
16 | }
17 | 
18 | func Decode(weight *[]float64, sent *Sentence) {
19 | 	s := NewState(sent.words)
20 | 	decode(weight, s)
21 | 
22 | 	for child, parent := range s.arcs {
23 | 		sent.words[child].predHead = parent
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/decoder_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestDecode(t *testing.T) {
 8 | 	words := make([]*Word, 0)
 9 | 	words = append(words,
10 | 		makeRootWord(),
11 | 		makeWord("ms.", "NNP", 1, 2),
12 | 		makeWord("hang", "NNP", 2, 3),
13 | 		makeWord("plays", "VBZ", 3, 0),
14 | 		makeWord("elianti", "NNP", 4, 3),
15 | 		makeWord(".", ".", 5, 3),
16 | 	)
17 | 	sent := Sentence{words: words}
18 | 	weight := make([]float64, MaxFeatureLength)
19 | 
20 | 	s := NewState(sent.words)
21 | 	decode(&weight, s)
22 | 	if len(s.arcs) == 0 {
23 | 		t.Error("length of arcs must be greater than 0")
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | COVERAGE = coverage.out
 2 | 
 3 | all: build
 4 | 
 5 | .PHONY: deps
 6 | deps:
 7 | 	go get github.com/mattn/goveralls
 8 | 	go get -u github.com/jteeuwen/go-bindata/...
 9 | 
10 | .PHONY: bindata
11 | bindata:
12 | 	${GOPATH}/bin/go-bindata -ignore='\.gitkeep' data
13 | 
14 | .PHONY: build
15 | build:
16 | 	go build -v
17 | 
18 | .PHONY: fmt
19 | fmt:
20 | 	gofmt -s -w $$(git ls-files | grep -e '\.go$$' | grep -v -e vendor)
21 | 
22 | .PHONY: test
23 | test:
24 | 	go test -v ./...
25 | 
26 | .PHONY: cover
27 | cover:
28 | 	go test -v -cover -race -coverprofile=${COVERAGE}
29 | 
30 | .PHONY: vet
31 | vet:
32 | 	go tool vet --all *.go
33 | 
34 | .PHONY: test-all
35 | test-all: vet test
36 | 


--------------------------------------------------------------------------------
/action.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | type StateAction func(state *State, idx int)
 4 | 
 5 | // AttachLeft は左側の単語を右側の単語の親にします
 6 | func AttachLeft(state *State, idx int) {
 7 | 	parent := state.pending[idx]
 8 | 	child := state.pending[idx+1]
 9 | 
10 | 	state.deletePending(idx + 1)
11 | 	parent.appendChild(child)
12 | 	state.arcs[child.idx] = parent.idx
13 | }
14 | 
15 | // AttachRight は右側の単語を左側の単語の親にします
16 | func AttachRight(state *State, idx int) {
17 | 	parent := state.pending[idx+1]
18 | 	child := state.pending[idx]
19 | 
20 | 	state.deletePending(idx)
21 | 	parent.prependChild(child)
22 | 	state.arcs[child.idx] = parent.idx
23 | }
24 | 
25 | // StateActions はActionの集合です
26 | var StateActions = []StateAction{AttachLeft, AttachRight}
27 | 


--------------------------------------------------------------------------------
/state_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestDeletePending(t *testing.T) {
 8 | 	words := make([]*Word, 0)
 9 | 	words = append(words,
10 | 		makeRootWord(),
11 | 		makeWord("ms.", "NNP", 0, -1),
12 | 		makeWord("hang", "NNP", 1, 0),
13 | 		makeWord("plays", "VBZ", 2, 1),
14 | 	)
15 | 	s := NewState(words)
16 | 	s.deletePending(2)
17 | 
18 | 	if s.pending[1].surface != "ms." {
19 | 		t.Error("surface must be 'ms.'")
20 | 	}
21 | 	if s.pending[2].surface != "plays" {
22 | 		t.Error("surface must be 'plays'")
23 | 	}
24 | 
25 | 	s.deletePending(1)
26 | 	if s.pending[1].surface != "plays" {
27 | 		t.Error("surface must be 'plays'")
28 | 	}
29 | 
30 | 	if words[1].surface != "ms." {
31 | 		t.Error("surface is wrong!!!" + words[1].surface)
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/word.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | type Word struct {
 4 | 	surface  string
 5 | 	lemma    string
 6 | 	posTag   string
 7 | 	cposTag  string
 8 | 	idx      int
 9 | 	head     int
10 | 	predHead int
11 | 	children []Word
12 | }
13 | 
14 | func makeWord(surface string, posTag string, idx int, head int) *Word {
15 | 	return &Word{surface, surface, posTag, posTag, idx, head, head, make([]Word, 0)}
16 | }
17 | 
18 | func makeRootWord() *Word {
19 | 	return makeWord("*ROOT*", "*ROOT*", 0, -1)
20 | }
21 | 
22 | func (word *Word) appendChild(c *Word) []Word {
23 | 	word.children = append(word.children, *c)
24 | 	return word.children
25 | }
26 | 
27 | func (word *Word) prependChild(c *Word) []Word {
28 | 	word.children = append([]Word{*c}, word.children...)
29 | 	return word.children
30 | }
31 | 
32 | func (word *Word) LeftMostChild() *Word {
33 | 	if len(word.children) == 0 {
34 | 		return nil
35 | 	} else {
36 | 		return &word.children[0]
37 | 	}
38 | }
39 | 
40 | func (word *Word) RightMostChild() *Word {
41 | 	if len(word.children) == 0 {
42 | 		return nil
43 | 	} else {
44 | 		return &word.children[len(word.children)-1]
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Yasuhisa Yoshida <syou6162@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/evaluation.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"runtime"
 6 | 	"sync"
 7 | )
 8 | 
 9 | func dependencyAccuracy(golds [][]int, predictions [][]int) (float64, error) {
10 | 	if len(golds) != len(predictions) {
11 | 		return 0.0, errors.New("length of golds and that of predictions is not same")
12 | 	}
13 | 	sum := 0.0
14 | 	count := 0.0
15 | 	for idx, gold := range golds {
16 | 		pred := predictions[idx]
17 | 		if len(gold) != len(pred) {
18 | 			return 0.0, errors.New("length of gold and that of pred is not same")
19 | 		}
20 | 		for i, g := range gold {
21 | 			if g == pred[i] {
22 | 				sum += 1.0
23 | 			}
24 | 			count += 1.0
25 | 		}
26 | 	}
27 | 	return sum / count, nil
28 | }
29 | 
30 | func DependencyAccuracy(w *[]float64, sents []*Sentence) float64 {
31 | 	wg := &sync.WaitGroup{}
32 | 	goldHeads := make([][]int, 0)
33 | 	for _, sent := range sents {
34 | 		goldHeads = append(goldHeads, sent.ExtractHeads())
35 | 	}
36 | 
37 | 	predHeads := make([][]int, 0)
38 | 
39 | 	cpus := runtime.NumCPU()
40 | 	semaphore := make(chan int, cpus)
41 | 	for _, sent := range sents {
42 | 		wg.Add(1)
43 | 		go func(sent *Sentence) {
44 | 			defer wg.Done()
45 | 			semaphore <- 1
46 | 			Decode(w, sent)
47 | 			<-semaphore
48 | 		}(sent)
49 | 	}
50 | 	wg.Wait()
51 | 
52 | 	for _, sent := range sents {
53 | 		predHeads = append(predHeads, sent.ExtractPredictedHeads())
54 | 	}
55 | 	accuracy, _ := dependencyAccuracy(goldHeads, predHeads)
56 | 	return accuracy
57 | }
58 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 2 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
 3 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 4 | github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 5 | github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 6 | github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 7 | github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 8 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 9 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
10 | github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
11 | github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
12 | github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
13 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
14 | github.com/urfave/cli v1.22.5 h1:lNq9sAHXK2qfdI8W+GRItjCEkI+2oR4d+MEHy1CKXoU=
15 | github.com/urfave/cli v1.22.5/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
17 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
18 | 


--------------------------------------------------------------------------------
/state.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"math"
 5 | 	"reflect"
 6 | 	"runtime"
 7 | 	"strconv"
 8 | )
 9 | 
10 | type FvCache map[string][]int
11 | 
12 | type State struct {
13 | 	pending []*Word
14 | 	arcs    map[int]int
15 | 	fvCache FvCache
16 | }
17 | 
18 | func (state *State) cacheKeyStr(pair ActionIndexPair) string {
19 | 	funcName := runtime.FuncForPC(reflect.ValueOf(pair.action).Pointer()).Name()
20 | 	left := state.pending[pair.index]
21 | 	right := state.pending[pair.index+1]
22 | 	return funcName + ":" + strconv.Itoa(left.idx) + "-" + strconv.Itoa(right.idx)
23 | }
24 | 
25 | func (state *State) InitFvCache() {
26 | 	for _, f := range StateActions {
27 | 		for idx := 0; idx < len(state.pending)-1; idx++ {
28 | 			pair := ActionIndexPair{f, idx}
29 | 			fv := ExtractFeatures(state, pair)
30 | 			state.fvCache[state.cacheKeyStr(pair)] = fv
31 | 		}
32 | 	}
33 | }
34 | 
35 | func NewState(pending []*Word) *State {
36 | 	for _, w := range pending {
37 | 		w.children = make([]Word, 0)
38 | 	}
39 | 	p := make([]*Word, len(pending))
40 | 	copy(p, pending)
41 | 	state := State{p, make(map[int]int), FvCache{}}
42 | 	state.InitFvCache()
43 | 	return &state
44 | }
45 | 
46 | func (state *State) deletePending(idx int) []*Word {
47 | 	state.pending = append(state.pending[:idx], state.pending[idx+1:]...)
48 | 	return state.pending
49 | }
50 | 
51 | func (state *State) ResetFvCache(index int) {
52 | 	for _, f := range StateActions {
53 | 		min := int(math.Max(0, float64(index-3)))
54 | 		max := int(math.Min(float64(len(state.pending)-1), float64(index+3)))
55 | 		for idx := min; idx < max; idx++ {
56 | 			pair := ActionIndexPair{f, idx}
57 | 			delete(state.fvCache, state.cacheKeyStr(pair))
58 | 		}
59 | 	}
60 | }
61 | 
62 | func (state *State) GetFvCache(pair ActionIndexPair) []int {
63 | 	key := state.cacheKeyStr(pair)
64 | 	if fv, ok := state.fvCache[key]; ok {
65 | 		return fv
66 | 	} else {
67 | 		fv = ExtractFeatures(state, pair)
68 | 		state.fvCache[key] = fv
69 | 		return fv
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/action_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestAttachLeft(t *testing.T) {
 8 | 	words := make([]*Word, 0)
 9 | 	words = append(words,
10 | 		makeRootWord(),
11 | 		makeWord("ms.", "NNP", 1, 2),
12 | 		makeWord("hang", "NNP", 2, 3),
13 | 		makeWord("plays", "VBZ", 3, 0),
14 | 		makeWord("elianti", "NNP", 4, 3),
15 | 		makeWord(".", ".", 5, 3),
16 | 	)
17 | 	s := NewState(words)
18 | 	AttachLeft(s, 3)
19 | 	p, ok := s.arcs[4]
20 | 	if !ok || p != 3 {
21 | 		t.Error("parent's index must be 3")
22 | 	}
23 | 
24 | 	AttachLeft(s, 3)
25 | 	p, ok = s.arcs[5]
26 | 	if !ok || p != 3 {
27 | 		t.Error("parent's index must be 3")
28 | 	}
29 | 
30 | 	if len(s.pending) != 4 {
31 | 		t.Error("length of pending must be 4")
32 | 	}
33 | }
34 | 
35 | func TestAttachRight(t *testing.T) {
36 | 	words := make([]*Word, 0)
37 | 	words = append(words,
38 | 		makeRootWord(),
39 | 		makeWord("ms.", "NNP", 1, 2),
40 | 		makeWord("hang", "NNP", 2, 3),
41 | 		makeWord("plays", "VBZ", 3, 0),
42 | 		makeWord("elianti", "NNP", 4, 3),
43 | 		makeWord(".", ".", 5, 3),
44 | 	)
45 | 	s := NewState(words)
46 | 	AttachRight(s, 3)
47 | 	p, ok := s.arcs[3]
48 | 	if !ok || p != 4 {
49 | 		t.Error("parent's index must be 4")
50 | 	}
51 | 
52 | 	AttachRight(s, 3)
53 | 	p, ok = s.arcs[4]
54 | 	if !ok || p != 5 {
55 | 		t.Error("parent's index must be 5")
56 | 	}
57 | 
58 | 	if len(s.pending) != 4 {
59 | 		t.Error("length of pending must be 4")
60 | 	}
61 | }
62 | 
63 | func TestAttachLeftAll(t *testing.T) {
64 | 	words := make([]*Word, 0)
65 | 	words = append(words,
66 | 		makeRootWord(),
67 | 		makeWord("ms.", "NNP", 1, 2),
68 | 		makeWord("hang", "NNP", 2, 3),
69 | 		makeWord("plays", "VBZ", 3, 0),
70 | 		makeWord("elianti", "NNP", 4, 3),
71 | 		makeWord(".", ".", 5, 3),
72 | 	)
73 | 	s := NewState(words)
74 | 	AttachLeft(s, 0)
75 | 	AttachLeft(s, 0)
76 | 	AttachLeft(s, 0)
77 | 	AttachLeft(s, 0)
78 | 	AttachLeft(s, 0)
79 | 	if words[1].surface != "ms." {
80 | 		t.Error("surface is wrong")
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/reader.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"encoding/gob"
 6 | 	"errors"
 7 | 	"io/ioutil"
 8 | 	"os"
 9 | 	"strconv"
10 | 	"strings"
11 | )
12 | 
13 | func makeSentence(s string) (*Sentence, error) {
14 | 	lines := strings.Split(s, "\n")
15 | 	if len(lines) < 4 {
16 | 		return nil, errors.New("Invalid line")
17 | 	}
18 | 	words := strings.Split(strings.TrimSpace(lines[0]), "\t")
19 | 	posTags := strings.Split(strings.TrimSpace(lines[1]), "\t")
20 | 	heads := strings.Split(strings.TrimSpace(lines[3]), "\t")
21 | 
22 | 	sent := make([]*Word, 0)
23 | 	sent = append(sent, makeRootWord())
24 | 	for i := 0; i < len(words); i++ {
25 | 		head, err := strconv.ParseInt(heads[i], 10, 0)
26 | 		if err != nil {
27 | 			return nil, err
28 | 		}
29 | 		sent = append(sent, makeWord(words[i], posTags[i], i+1, int(head)))
30 | 	}
31 | 	return &Sentence{sent}, nil
32 | }
33 | 
34 | func splitBySentence(s string) []string {
35 | 	return strings.Split(s, "\n\n")
36 | }
37 | 
38 | func ReadData(filename string) ([]*Sentence, error) {
39 | 	file, err := os.Open(filename)
40 | 	if err != nil {
41 | 		return nil, err
42 | 	}
43 | 
44 | 	data, err := ioutil.ReadAll(bufio.NewReader(file))
45 | 	if err != nil {
46 | 		return nil, err
47 | 	}
48 | 
49 | 	sentences := make([]*Sentence, 0)
50 | 	for _, sent := range splitBySentence(string(data)) {
51 | 		s, err := makeSentence(sent)
52 | 		if err != nil {
53 | 			break
54 | 		}
55 | 		sentences = append(sentences, s)
56 | 	}
57 | 	return sentences, nil
58 | }
59 | 
60 | func SaveModel(weight *[]float64, filename string) error {
61 | 	file, err := os.Create(filename)
62 | 	defer file.Close()
63 | 	if err != nil {
64 | 		return err
65 | 	}
66 | 
67 | 	enc := gob.NewEncoder(file)
68 | 	enc.Encode(&weight)
69 | 	return nil
70 | }
71 | 
72 | func LoadModel(filename string) (*[]float64, error) {
73 | 	var w []float64
74 | 	file, err := os.Open(filename)
75 | 	defer file.Close()
76 | 	if err != nil {
77 | 		return nil, err
78 | 	}
79 | 
80 | 	decoder := gob.NewDecoder(file)
81 | 	decoder.Decode(&w)
82 | 	return &w, nil
83 | }
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # go-easy-first
 2 | [![CircleCI](https://circleci.com/gh/syou6162/go-easy-first.svg?style=shield)](https://circleci.com/gh/syou6162/go-easy-first)
 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/syou6162/go-easy-first)](https://goreportcard.com/report/github.com/syou6162/go-easy-first)
 4 | [![Coverage Status](https://coveralls.io/repos/github/syou6162/go-easy-first/badge.svg?branch=coveralls)](https://coveralls.io/github/syou6162/go-easy-first?branch=coveralls)
 5 | 
 6 | go-easy-first - Dependency Parser with Easy-First Algorithm (An Efficient Algorithm for Easy-First Non-Directional Dependency Parsing, NAACL-2010, Yoav Goldberg and Michael Elhadad) written in Go.
 7 | 
 8 | # Build from source
 9 | 
10 | ```sh
11 | % git clone https://github.com/syou6162/go-easy-first.git
12 | % cd go-easy-first
13 | % make deps && make bindata && make build
14 | ```
15 | 
16 | # Usage
17 | go-easy-first has `train` (training a parser phase) and `eval` (evaluating a trained parser phase) modes. To see the detail options, type `./go-easy-first --help`.
18 | 
19 | ## Training a parser
20 | To see the detail options, type `./go-easy-first train --help`.
21 | 
22 | ```sh
23 | % ./go-easy-first train --train-filename path/to/train.txt --dev-filename path/to/dev.txt --max-iter 10 --model-filename model.bin
24 | 0, 0.907, 0.893
25 | 1, 0.920, 0.901
26 | 2, 0.929, 0.904
27 | 3, 0.935, 0.906
28 | 4, 0.940, 0.907
29 | 5, 0.944, 0.907
30 | 6, 0.947, 0.908
31 | 7, 0.950, 0.908
32 | 8, 0.953, 0.908
33 | 9, 0.955, 0.908
34 | ```
35 | 
36 | ## Evaluating a trained parser
37 | To see the detail options, type `./go-easy-first eval --help`.
38 | 
39 | ```sh
40 | % ./go-easy-first eval --test-filename path/to/test.txt --model-filename model.bin
41 | | SENTENCES | SECONDS | ACCURACY |
42 | |-----------|---------|----------|
43 | |      1346 |    4.60 |    0.888 |
44 | ```
45 | 
46 | ## [Experimental] Obtain a single binary embedded model parameters
47 | 
48 | ```sh
49 | % ./go-easy-first train --train-filename path/to/train.txt --dev-filename path/to/dev.txt --max-iter 10 --model-filename data/model.bin
50 | % make bindata && make build
51 | % ./go-easy-first decode --test-filename path/to/test.txt
52 | ```
53 | 
54 | # Roadmap
55 | - [ ] Implement PP-Attachment features
56 | - [ ] Beam search with max-violation perceptron
57 | - [ ] Mini-batch update
58 | - [x] Embed weight parameters to a built binary file using go-bindata
59 | 
60 | # Author
61 | Yasuhisa Yoshida
62 | 


--------------------------------------------------------------------------------
/perceptron_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestEdgeFor(t *testing.T) {
  9 | 	words := make([]*Word, 0)
 10 | 	words = append(words,
 11 | 		makeWord("ms.", "NNP", 0, -1),
 12 | 		makeWord("hang", "NNP", 1, 0),
 13 | 		makeWord("plays", "VBZ", 2, 1),
 14 | 	)
 15 | 	s := NewState(words)
 16 | 	pair, err := EdgeFor(s, 0, 0)
 17 | 	if err != nil {
 18 | 		t.Error("error should be nil")
 19 | 	}
 20 | 	if !reflect.DeepEqual(pair, []int{0, 1}) {
 21 | 		t.Error("pair shoud be [0, 1] but: ", pair)
 22 | 	}
 23 | }
 24 | 
 25 | func TestIsValidFalse(t *testing.T) {
 26 | 	words := make([]*Word, 0)
 27 | 	words = append(words,
 28 | 		makeWord("ms.", "NNP", 0, -1),
 29 | 		makeWord("hang", "NNP", 1, 0),
 30 | 		makeWord("plays", "VBZ", 2, 1),
 31 | 	)
 32 | 	s := NewState(words)
 33 | 	goldArcs := make(map[int][]int)
 34 | 	goldArcs[-1] = []int{0}
 35 | 	goldArcs[0] = []int{1}
 36 | 	goldArcs[1] = []int{2}
 37 | 	if IsValid(s, 0, 0, goldArcs) != false {
 38 | 		t.Error("should return false")
 39 | 	}
 40 | }
 41 | 
 42 | func TestIsValidTrue(t *testing.T) {
 43 | 	words := make([]*Word, 0)
 44 | 	words = append(words,
 45 | 		makeWord("ms.", "NNP", 0, -1),
 46 | 		makeWord("hang", "NNP", 1, 0),
 47 | 		makeWord("plays", "VBZ", 2, 1),
 48 | 	)
 49 | 
 50 | 	s := NewState(words)
 51 | 	arcs := make(map[int]int)
 52 | 	arcs[2] = 1
 53 | 	s.arcs = arcs
 54 | 	goldArcs := make(map[int][]int)
 55 | 	goldArcs[-1] = []int{0}
 56 | 	goldArcs[0] = []int{1}
 57 | 	goldArcs[1] = []int{2}
 58 | 	if IsValid(s, 0, 0, goldArcs) != true {
 59 | 		t.Error("should return true")
 60 | 	}
 61 | }
 62 | 
 63 | func TestAllowedActions(t *testing.T) {
 64 | 	words := make([]*Word, 0)
 65 | 	words = append(words,
 66 | 		makeRootWord(),
 67 | 		makeWord("ms.", "NNP", 1, 2),
 68 | 		makeWord("hang", "NNP", 2, 3),
 69 | 		makeWord("plays", "VBZ", 3, 0),
 70 | 		makeWord("elianti", "NNP", 4, 3),
 71 | 		makeWord(".", ".", 5, 3),
 72 | 	)
 73 | 	s := NewState(words)
 74 | 	AttachRight(s, 3)
 75 | 
 76 | 	goldArcs := make(map[int][]int)
 77 | 	goldArcs[-1] = []int{0}
 78 | 	goldArcs[0] = []int{1}
 79 | 	goldArcs[1] = []int{2}
 80 | 
 81 | 	if 1 != len(AllowedActions(s, goldArcs)) {
 82 | 		t.Error("length of allowed actions must be 1")
 83 | 	}
 84 | }
 85 | 
 86 | func TestCandidateActions(t *testing.T) {
 87 | 	words := make([]*Word, 0)
 88 | 	words = append(words,
 89 | 		makeRootWord(),
 90 | 		makeWord("ms.", "NNP", 1, 2),
 91 | 		makeWord("hang", "NNP", 2, 3),
 92 | 		makeWord("plays", "VBZ", 3, 0),
 93 | 		makeWord("elianti", "NNP", 4, 3),
 94 | 		makeWord(".", ".", 5, 3),
 95 | 	)
 96 | 	s := NewState(words)
 97 | 
 98 | 	if 10 != len(CandidateActions(s)) {
 99 | 		t.Error("length of candidate actions must be 10")
100 | 	}
101 | }
102 | 
103 | func TestUpdateWeight(t *testing.T) {
104 | 	model := NewModel()
105 | 	gold := []int{1, 2, 3}
106 | 	predict := []int{1, 3, 4}
107 | 	model.updateWeight(&gold, &predict)
108 | 
109 | 	if w := model.weight[1]; w != 0 {
110 | 		t.Error("weight of '1' must be 0")
111 | 	}
112 | 	if w := model.weight[2]; w != 1 {
113 | 		t.Error("weight of '2' must be 1")
114 | 	}
115 | 	if w := model.weight[3]; w != 0 {
116 | 		t.Error("weight of '3' must be 0")
117 | 	}
118 | 	if w := model.weight[4]; w != -1 {
119 | 		t.Error("weight of '4' must be -1")
120 | 	}
121 | 
122 | 	model.updateWeight(&gold, &predict)
123 | 
124 | 	if w := model.cumWeight[1]; w != 0 {
125 | 		t.Error("cumWeight of '1' must be 0")
126 | 	}
127 | 	if w := model.cumWeight[2]; w != 3 {
128 | 		t.Error("cumWeight of '2' must be 3")
129 | 	}
130 | 	if w := model.cumWeight[3]; w != 0 {
131 | 		t.Error("cumWeight of '3' must be 0")
132 | 	}
133 | 	if w := model.cumWeight[4]; w != -3 {
134 | 		t.Error("cumWeight of '4' must be -3")
135 | 	}
136 | }
137 | 
138 | func TestUpdate(t *testing.T) {
139 | 	words := make([]*Word, 0)
140 | 	words = append(words,
141 | 		makeRootWord(),
142 | 		makeWord("ms.", "NNP", 1, 2),
143 | 		makeWord("hang", "NNP", 2, 3),
144 | 		makeWord("plays", "VBZ", 3, 0),
145 | 		makeWord("elianti", "NNP", 4, 3),
146 | 		makeWord(".", ".", 5, 3),
147 | 	)
148 | 	sent := Sentence{words: words}
149 | 	model := NewModel()
150 | 	model.Update(&sent)
151 | 	if model.count == 1 {
152 | 		t.Error("count must be greater than 1")
153 | 	}
154 | }
155 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/gob"
  6 | 	"fmt"
  7 | 	"math/rand"
  8 | 	"os"
  9 | 	"runtime"
 10 | 	"time"
 11 | 
 12 | 	"github.com/olekukonko/tablewriter"
 13 | 	"github.com/urfave/cli"
 14 | )
 15 | 
 16 | func shuffle(data []*Sentence) {
 17 | 	n := len(data)
 18 | 	for i := n - 1; i >= 0; i-- {
 19 | 		j := rand.Intn(i + 1)
 20 | 		data[i], data[j] = data[j], data[i]
 21 | 	}
 22 | }
 23 | 
 24 | func printEvaluation(data [][]string) {
 25 | 	table := tablewriter.NewWriter(os.Stdout)
 26 | 	table.SetHeader([]string{"Sentences", "Seconds", "Accuracy"})
 27 | 	table.SetBorders(tablewriter.Border{Left: true, Top: false, Right: true, Bottom: false})
 28 | 	table.SetCenterSeparator("|")
 29 | 	table.AppendBulk(data) // Add Bulk Data
 30 | 	table.Render()
 31 | }
 32 | 
 33 | var commandTrain = cli.Command{
 34 | 	Name:  "train",
 35 | 	Usage: "Train a parsing model by easy-first algorithm",
 36 | 	Description: `
 37 | Train a parsing model by easy-first algorithm.
 38 | `,
 39 | 	Action: doTrain,
 40 | 	Flags: []cli.Flag{
 41 | 		cli.StringFlag{Name: "train-filename"},
 42 | 		cli.StringFlag{Name: "dev-filename"},
 43 | 		cli.StringFlag{Name: "model-filename"},
 44 | 		cli.IntFlag{Name: "max-iter", Value: 10},
 45 | 	},
 46 | }
 47 | 
 48 | var commandEval = cli.Command{
 49 | 	Name:  "eval",
 50 | 	Usage: "Evaluate a parsing model by easy-first algorithm",
 51 | 	Description: `
 52 | Evaluate a parsing model by easy-first algorithm.
 53 | `,
 54 | 	Action: doEval,
 55 | 	Flags: []cli.Flag{
 56 | 		cli.StringFlag{Name: "test-filename"},
 57 | 		cli.StringFlag{Name: "model-filename"},
 58 | 	},
 59 | }
 60 | 
 61 | // This is an experimental feature
 62 | var commandDecode = cli.Command{
 63 | 	Name:  "decode",
 64 | 	Usage: "Decode a sentence with an embeded model",
 65 | 	Description: `
 66 | Decode a sentence with an embeded model.
 67 | `,
 68 | 	Action: doDecode,
 69 | 	Flags: []cli.Flag{
 70 | 		cli.StringFlag{Name: "test-filename"},
 71 | 	},
 72 | }
 73 | 
 74 | var Commands = []cli.Command{
 75 | 	commandTrain,
 76 | 	commandEval,
 77 | 	commandDecode,
 78 | }
 79 | 
 80 | func doTrain(c *cli.Context) error {
 81 | 	trainFilename := c.String("train-filename")
 82 | 	devFilename := c.String("dev-filename")
 83 | 	modelFilename := c.String("model-filename")
 84 | 	maxIter := c.Int("max-iter")
 85 | 
 86 | 	if trainFilename == "" {
 87 | 		_ = cli.ShowCommandHelp(c, "train")
 88 | 		return cli.NewExitError("`train-filename` is a required field to train a parser.", 1)
 89 | 	}
 90 | 
 91 | 	if devFilename == "" {
 92 | 		_ = cli.ShowCommandHelp(c, "train")
 93 | 		return cli.NewExitError("`dev-filename` is a required field to train a parser.", 1)
 94 | 	}
 95 | 
 96 | 	if modelFilename == "" {
 97 | 		_ = cli.ShowCommandHelp(c, "train")
 98 | 		return cli.NewExitError("`model-filename` is a required field to train a parser.", 1)
 99 | 	}
100 | 
101 | 	goldSents, _ := ReadData(trainFilename)
102 | 	devSents, _ := ReadData(devFilename)
103 | 
104 | 	model := NewModel()
105 | 	for iter := 0; iter < maxIter; iter++ {
106 | 		shuffle(goldSents)
107 | 		for _, sent := range goldSents {
108 | 			model.Update(sent)
109 | 		}
110 | 		w := model.AveragedWeight()
111 | 		trainAccuracy := DependencyAccuracy(&w, goldSents)
112 | 		devAccuracy := DependencyAccuracy(&w, devSents)
113 | 		fmt.Println(fmt.Sprintf("%d, %0.03f, %0.03f", iter, trainAccuracy, devAccuracy))
114 | 	}
115 | 
116 | 	w := model.AveragedWeight()
117 | 	SaveModel(&w, modelFilename)
118 | 	return nil
119 | }
120 | 
121 | func doEval(c *cli.Context) error {
122 | 	testFilename := c.String("test-filename")
123 | 	modelFilename := c.String("model-filename")
124 | 
125 | 	if testFilename == "" {
126 | 		_ = cli.ShowCommandHelp(c, "eval")
127 | 		return cli.NewExitError("`test-filename` is a required field to evaluate a parser.", 1)
128 | 	}
129 | 
130 | 	if modelFilename == "" {
131 | 		_ = cli.ShowCommandHelp(c, "eval")
132 | 		return cli.NewExitError("`model-filename` is a required field to evaluate a parser.", 1)
133 | 	}
134 | 
135 | 	goldSents, _ := ReadData(testFilename)
136 | 	weight, _ := LoadModel(modelFilename)
137 | 	start := time.Now()
138 | 	testAccuracy := DependencyAccuracy(weight, goldSents)
139 | 	end := time.Now().Sub(start).Seconds()
140 | 
141 | 	data := [][]string{
142 | 		{fmt.Sprintf("%d", len(goldSents)), fmt.Sprintf("%0.02f", end), fmt.Sprintf("%0.03f", testAccuracy)},
143 | 	}
144 | 	printEvaluation(data)
145 | 	return nil
146 | }
147 | 
148 | func loadModel(filename string) (*[]float64, error) {
149 | 	var weight []float64
150 | 	var b bytes.Buffer
151 | 	tmp, err := Asset(filename)
152 | 	if err != nil {
153 | 		return nil, err
154 | 	}
155 | 	b.Write(tmp)
156 | 
157 | 	decoder := gob.NewDecoder(&b)
158 | 	decoder.Decode(&weight)
159 | 	return &weight, nil
160 | }
161 | 
162 | func doDecode(c *cli.Context) error {
163 | 	testFilename := c.String("test-filename")
164 | 
165 | 	if testFilename == "" {
166 | 		_ = cli.ShowCommandHelp(c, "decode")
167 | 		return cli.NewExitError("`test-filename` is a required field to decode sentences.", 1)
168 | 	}
169 | 
170 | 	goldSents, _ := ReadData(testFilename)
171 | 
172 | 	weight, err := loadModel("data/model.bin")
173 | 	if err != nil {
174 | 		return err
175 | 	}
176 | 
177 | 	start := time.Now()
178 | 	testAccuracy := DependencyAccuracy(weight, goldSents)
179 | 	end := time.Now().Sub(start).Seconds()
180 | 
181 | 	data := [][]string{
182 | 		{fmt.Sprintf("%d", len(goldSents)), fmt.Sprintf("%0.02f", end), fmt.Sprintf("%0.03f", testAccuracy)},
183 | 	}
184 | 	printEvaluation(data)
185 | 	return nil
186 | }
187 | 
188 | func main() {
189 | 	app := cli.NewApp()
190 | 	app.Name = "easy-first"
191 | 	app.Commands = Commands
192 | 
193 | 	runtime.GOMAXPROCS(runtime.NumCPU())
194 | 
195 | 	app.Run(os.Args)
196 | }
197 | 


--------------------------------------------------------------------------------
/perceptron.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"math"
  6 | 	"reflect"
  7 | )
  8 | 
  9 | // GoldArcs returns map of parent => children
 10 | func GoldArcs(sent *Sentence) map[int][]int {
 11 | 	result := make(map[int][]int)
 12 | 	for idx, w := range sent.words {
 13 | 		head := w.head
 14 | 		if children, ok := result[head]; ok {
 15 | 			result[head] = append(children, idx)
 16 | 		} else {
 17 | 			result[head] = []int{idx}
 18 | 		}
 19 | 	}
 20 | 	return result
 21 | }
 22 | 
 23 | // EdgeFor returns a pair of parent index and child index
 24 | func EdgeFor(state *State, actionID int, idx int) ([]int, error) {
 25 | 	switch actionID {
 26 | 	case 0:
 27 | 		return []int{state.pending[idx].idx, state.pending[idx+1].idx}, nil
 28 | 	case 1:
 29 | 		return []int{state.pending[idx+1].idx, state.pending[idx].idx}, nil
 30 | 	default:
 31 | 		return nil, errors.New("Invalid line")
 32 | 	}
 33 | }
 34 | 
 35 | // IsValid returns the chosen action/location pair is valid
 36 | func IsValid(state *State, actionID int, idx int, goldArcs map[int][]int) bool {
 37 | 	pair, err := EdgeFor(state, actionID, idx)
 38 | 	if err != nil {
 39 | 		return false
 40 | 	}
 41 | 	pIdx := pair[0]
 42 | 	cIdx := pair[1]
 43 | 	containedInGoldArcs := false
 44 | 	for _, i := range goldArcs[pIdx] {
 45 | 		if cIdx == i {
 46 | 			containedInGoldArcs = true
 47 | 			break
 48 | 		}
 49 | 	}
 50 | 	flag := false
 51 | 	for _, cPrime := range goldArcs[cIdx] {
 52 | 		if cIdx != state.arcs[cPrime] {
 53 | 			flag = true
 54 | 			break
 55 | 		}
 56 | 	}
 57 | 	if !containedInGoldArcs || flag {
 58 | 		return false
 59 | 	}
 60 | 	return true
 61 | }
 62 | 
 63 | type ActionIndexPair struct {
 64 | 	action StateAction
 65 | 	index  int
 66 | }
 67 | 
 68 | func (pair1 ActionIndexPair) SameActionIndexPair(pair2 ActionIndexPair) bool {
 69 | 	return pair1.index == pair2.index &&
 70 | 		reflect.ValueOf(pair1.action).Pointer() == reflect.ValueOf(pair2.action).Pointer()
 71 | }
 72 | 
 73 | func AllowedActions(state *State, goldArcs map[int][]int) []ActionIndexPair {
 74 | 	result := make([]ActionIndexPair, 0)
 75 | 	for actionID, f := range StateActions {
 76 | 		for idx := 0; idx < len(state.pending)-1; idx++ {
 77 | 			if IsValid(state, actionID, idx, goldArcs) {
 78 | 				result = append(result, ActionIndexPair{f, idx})
 79 | 			}
 80 | 		}
 81 | 	}
 82 | 	return result
 83 | }
 84 | 
 85 | func CandidateActions(state *State) []ActionIndexPair {
 86 | 	result := make([]ActionIndexPair, 0)
 87 | 	for _, f := range StateActions {
 88 | 		for idx := 0; idx < len(state.pending)-1; idx++ {
 89 | 			result = append(result, ActionIndexPair{f, idx})
 90 | 		}
 91 | 	}
 92 | 	return result
 93 | }
 94 | 
 95 | func DotProduct(weight *[]float64, fv []int) float64 {
 96 | 	sum := 0.0
 97 | 	for _, f := range fv {
 98 | 		sum += (*weight)[f]
 99 | 	}
100 | 	return sum
101 | }
102 | 
103 | func BestActionIndexPair(weight *[]float64, state *State) ActionIndexPair {
104 | 	bestScore := math.Inf(-1)
105 | 	pairs := CandidateActions(state)
106 | 	bestPair := pairs[0]
107 | 	for _, pair := range pairs {
108 | 		fv := state.GetFvCache(pair)
109 | 		score := DotProduct(weight, fv)
110 | 		if score > bestScore {
111 | 			bestPair = pair
112 | 			bestScore = score
113 | 		}
114 | 	}
115 | 	return bestPair
116 | }
117 | 
118 | func BestAllowedActionIndexPair(weight *[]float64, state *State, pairs []ActionIndexPair) ActionIndexPair {
119 | 	bestScore := math.Inf(-1)
120 | 	bestPair := pairs[0]
121 | 	for _, pair := range pairs {
122 | 		fv := state.GetFvCache(pair)
123 | 		score := DotProduct(weight, fv)
124 | 		if score > bestScore {
125 | 			bestPair = pair
126 | 			bestScore = score
127 | 		}
128 | 	}
129 | 	return bestPair
130 | }
131 | 
132 | type Model struct {
133 | 	weight    []float64
134 | 	cumWeight []float64
135 | 	count     int
136 | }
137 | 
138 | func NewModel() Model {
139 | 	return Model{make([]float64, MaxFeatureLength), make([]float64, MaxFeatureLength), 1}
140 | }
141 | 
142 | func (model *Model) updateWeight(goldFeatureVector *[]int, predictFeatureVector *[]int) {
143 | 	for _, feat := range *goldFeatureVector {
144 | 		w := model.weight[feat]
145 | 		cumW := model.cumWeight[feat]
146 | 		model.weight[feat] = w + 1.0
147 | 		model.cumWeight[feat] = cumW + float64(model.count)
148 | 	}
149 | 	for _, feat := range *predictFeatureVector {
150 | 		w := model.weight[feat]
151 | 		cumW := model.cumWeight[feat]
152 | 		model.weight[feat] = w - 1.0
153 | 		model.cumWeight[feat] = cumW - float64(model.count)
154 | 	}
155 | 	model.count += 1
156 | }
157 | 
158 | func (model *Model) Update(gold *Sentence) {
159 | 	state := NewState(gold.words)
160 | 	goldArcs := GoldArcs(gold)
161 | 	iter := 0
162 | 	for {
163 | 		if len(state.pending) <= 1 {
164 | 			break
165 | 		}
166 | 		allow := AllowedActions(state, goldArcs)
167 | 		choice := BestActionIndexPair(&model.weight, state)
168 | 		containChoice := false
169 | 		for _, pair := range allow {
170 | 			if pair.SameActionIndexPair(choice) {
171 | 				containChoice = true
172 | 			}
173 | 		}
174 | 		if containChoice {
175 | 			choice.action(state, choice.index)
176 | 			state.ResetFvCache(choice.index)
177 | 		} else {
178 | 			predFv := state.GetFvCache(choice)
179 | 			good := BestAllowedActionIndexPair(&model.weight, state, allow)
180 | 			goodFv := state.GetFvCache(good)
181 | 			model.updateWeight(&goodFv, &predFv)
182 | 		}
183 | 		iter++
184 | 		if iter > 500 { // for infinite loop
185 | 			break
186 | 		}
187 | 	}
188 | }
189 | 
190 | // w_t - w_cum / t
191 | func (model *Model) AveragedWeight() []float64 {
192 | 	avg := make([]float64, MaxFeatureLength)
193 | 	for k, v := range model.weight {
194 | 		avg[k] = v
195 | 	}
196 | 	for k, v := range model.cumWeight {
197 | 		avg[k] = avg[k] - v/float64(model.count)
198 | 	}
199 | 	return avg
200 | }
201 | 


--------------------------------------------------------------------------------
/feature.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"reflect"
  6 | 	"runtime"
  7 | 	"strconv"
  8 | )
  9 | 
 10 | func NilSafePosTag(w *Word) string {
 11 | 	posTag := ""
 12 | 	if w != nil {
 13 | 		posTag = w.posTag
 14 | 	}
 15 | 	return posTag
 16 | }
 17 | 
 18 | func NilSafePendingWord(state *State, idx int) *Word {
 19 | 	if idx < 0 || idx >= len(state.pending) {
 20 | 		return nil
 21 | 	} else {
 22 | 		return state.pending[idx]
 23 | 	}
 24 | }
 25 | 
 26 | func addUnigramFeatures(features *[]int, state *State, actName string, idx int, prefix string) {
 27 | 	if idx < 0 || idx >= len(state.pending) {
 28 | 		return
 29 | 	}
 30 | 	w := state.pending[idx]
 31 | 	lcp := NilSafePosTag(w.LeftMostChild())
 32 | 	rcp := NilSafePosTag(w.RightMostChild())
 33 | 	*features = append(*features,
 34 | 		JenkinsHash(actName+"+"+prefix+"+surface:"+w.surface),
 35 | 		JenkinsHash(actName+"+"+prefix+"+lemma:"+w.lemma),
 36 | 		JenkinsHash(actName+"+"+prefix+"+posTag:"+w.posTag),
 37 | 		JenkinsHash(actName+"+"+prefix+"+cposTag:"+w.cposTag),
 38 | 		JenkinsHash(actName+"+"+prefix+"+posTag:"+w.posTag+"+leftmost:"+lcp),
 39 | 		JenkinsHash(actName+"+"+prefix+"+posTag:"+w.posTag+"+rightmost:"+rcp),
 40 | 		JenkinsHash(actName+"+"+prefix+"+posTag:"+w.posTag+"+leftmost:"+lcp+"+rightmost:"+rcp),
 41 | 	)
 42 | }
 43 | 
 44 | func distStr(dist int) string {
 45 | 	d := "0"
 46 | 	switch dist {
 47 | 	case 1:
 48 | 		d = "1"
 49 | 	case 2:
 50 | 		d = "2"
 51 | 	case 3:
 52 | 		d = "3"
 53 | 	case 4:
 54 | 		d = "4"
 55 | 	default:
 56 | 		d = "5"
 57 | 	}
 58 | 	return d
 59 | }
 60 | 
 61 | func AddBigramFeatures(features *[]int, actName string, parent *Word, child *Word, prefix string) {
 62 | 	if parent == nil || child == nil {
 63 | 		return
 64 | 	}
 65 | 
 66 | 	plcp := NilSafePosTag(parent.LeftMostChild())
 67 | 	prcp := NilSafePosTag(parent.RightMostChild())
 68 | 	clcp := NilSafePosTag(child.LeftMostChild())
 69 | 	crcp := NilSafePosTag(child.RightMostChild())
 70 | 
 71 | 	*features = append(*features,
 72 | 		JenkinsHash(actName+"+"+prefix+"+parent-surface:"+parent.surface+"+child-surface:"+child.surface),
 73 | 		JenkinsHash(actName+"+"+prefix+"+parent-surface:"+parent.surface+"+child-posTag:"+child.posTag),
 74 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-surface:"+child.surface),
 75 | 		JenkinsHash(actName+"+"+prefix+"+parent-lemma:"+parent.lemma+"+child-lemma:"+child.lemma),
 76 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-posTag:"+child.posTag),
 77 | 		JenkinsHash(actName+"+"+prefix+"+parent-cposTag:"+parent.cposTag+"+child-cposTag:"+child.cposTag),
 78 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-posTag:"+child.posTag+"+plcp:"+plcp+"+prcp:"+prcp),
 79 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-posTag:"+child.posTag+"+plcp:"+plcp+"+crcp:"+crcp),
 80 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-posTag:"+child.posTag+"+clcp:"+clcp+"+prcp:"+prcp),
 81 | 		JenkinsHash(actName+"+"+prefix+"+parent-posTag:"+parent.posTag+"+child-posTag:"+child.posTag+"+clcp:"+clcp+"+crcp:"+crcp),
 82 | 	)
 83 | }
 84 | 
 85 | func AddUnigramFeatures(features *[]int, state *State, actName string, idx int) {
 86 | 	addUnigramFeatures(features, state, actName, idx-2, "p_i-2")
 87 | 	addUnigramFeatures(features, state, actName, idx-1, "p_i-1")
 88 | 	addUnigramFeatures(features, state, actName, idx, "p_i")
 89 | 	addUnigramFeatures(features, state, actName, idx+1, "p_i+1")
 90 | 	addUnigramFeatures(features, state, actName, idx+2, "p_i+2")
 91 | 	addUnigramFeatures(features, state, actName, idx+3, "p_i+3")
 92 | }
 93 | 
 94 | func hasNoChildren(w *Word) bool {
 95 | 	return len(w.children) == 0
 96 | }
 97 | 
 98 | func addStructuralSingleFeatures(features *[]int, state *State, actName string, idx int, prefix string) {
 99 | 	if idx < 0 || idx >= len(state.pending) {
100 | 		return
101 | 	}
102 | 	w := state.pending[idx]
103 | 	*features = append(*features,
104 | 		JenkinsHash(actName+"+"+prefix+"+len:"+strconv.Itoa(len(w.children))),
105 | 		JenkinsHash(actName+"+"+prefix+"+no-children:"+strconv.FormatBool(hasNoChildren(w))),
106 | 	)
107 | }
108 | 
109 | func AddStructuralSingleFeatures(features *[]int, state *State, actName string, idx int) {
110 | 	addStructuralSingleFeatures(features, state, actName, idx-2, "p_i-2")
111 | 	addStructuralSingleFeatures(features, state, actName, idx-1, "p_i-1")
112 | 	addStructuralSingleFeatures(features, state, actName, idx, "p_i")
113 | 	addStructuralSingleFeatures(features, state, actName, idx+1, "p_i+1")
114 | 	addStructuralSingleFeatures(features, state, actName, idx+2, "p_i+2")
115 | 	addStructuralSingleFeatures(features, state, actName, idx+3, "p_i+3")
116 | }
117 | 
118 | func addStructuralPairFeatures(features *[]int, actName string, left *Word, right *Word, prefix string) {
119 | 	if left == nil || right == nil {
120 | 		return
121 | 	}
122 | 	dist := int(math.Abs(float64(left.idx - right.idx)))
123 | 
124 | 	*features = append(*features,
125 | 		JenkinsHash(actName+"+"+prefix+"+dist:"+distStr(dist)),
126 | 		JenkinsHash(actName+"+"+prefix+"+dist:"+distStr(dist)+"+leftPos:"+left.posTag+"+rightPos:"+right.posTag),
127 | 	)
128 | }
129 | 
130 | func extractFeatures(state *State, actName string, idx int) []int {
131 | 	features := make([]int, 0)
132 | 	AddUnigramFeatures(&features, state, actName, idx)
133 | 	AddStructuralSingleFeatures(&features, state, actName, idx)
134 | 
135 | 	p0 := NilSafePendingWord(state, idx-1)
136 | 	p1 := NilSafePendingWord(state, idx)
137 | 	p2 := NilSafePendingWord(state, idx+1)
138 | 	p3 := NilSafePendingWord(state, idx+2)
139 | 
140 | 	AddBigramFeatures(&features, actName, p1, p2, "p_i+p_{i+1}")
141 | 	AddBigramFeatures(&features, actName, p1, p3, "p_i+p_{i+2}")
142 | 	AddBigramFeatures(&features, actName, p0, p1, "p_{i-1}+p_i")
143 | 	AddBigramFeatures(&features, actName, p0, p3, "p_{i-1}+p_{i+2}")
144 | 	AddBigramFeatures(&features, actName, p2, p3, "p_{i+1}+p_{i+2}")
145 | 
146 | 	addStructuralPairFeatures(&features, actName, p1, p2, "p_i+p_{i+1}")
147 | 	addStructuralPairFeatures(&features, actName, p1, p3, "p_i+p_{i+2}")
148 | 	addStructuralPairFeatures(&features, actName, p0, p1, "p_{i-1}+p_i")
149 | 	addStructuralPairFeatures(&features, actName, p0, p3, "p_{i-1}+p_{i+2}")
150 | 	addStructuralPairFeatures(&features, actName, p2, p3, "p_{i+1}+p_{i+2}")
151 | 
152 | 	return features
153 | }
154 | 
155 | func mod(n, m int) int {
156 | 	if n < 0 {
157 | 		return (m - (-n % m)) % m
158 | 	} else {
159 | 		return n % m
160 | 	}
161 | }
162 | 
163 | var MaxFeatureLength = 1000000
164 | 
165 | func JenkinsHash(s string) int {
166 | 	hash := 0
167 | 	for _, b := range []byte(s) {
168 | 		hash += int(b)
169 | 		hash += hash << 10
170 | 		hash ^= hash >> 6
171 | 	}
172 | 
173 | 	hash += hash << 3
174 | 	hash ^= hash >> 11
175 | 	hash += hash << 15
176 | 
177 | 	return mod(hash, MaxFeatureLength)
178 | }
179 | 
180 | func ExtractFeatures(state *State, pair ActionIndexPair) []int {
181 | 	actName := runtime.FuncForPC(reflect.ValueOf(pair.action).Pointer()).Name()
182 | 
183 | 	features := extractFeatures(state, actName, pair.index)
184 | 	return features
185 | }
186 | 


--------------------------------------------------------------------------------