├── .gitignore
├── circle.yml
├── crf
    ├── testdata
    │   ├── train_data_3.txt
    │   ├── train_data_4.txt
    │   ├── train_data_1.txt
    │   └── train_data_2.txt
    ├── crf_trainer_test.go
    ├── crf_trainer.go
    ├── crf_test.go
    └── crf.go
├── .vscode
    └── launch.json
├── README.md
├── bin
    └── ci_test.sh
├── LICENSE
└── labelling
    ├── labelling.go
    └── labelling_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | *.test


--------------------------------------------------------------------------------
/circle.yml:
--------------------------------------------------------------------------------
1 | test:
2 |     override:
3 |         - sh ./bin/ci_test.sh


--------------------------------------------------------------------------------
/crf/testdata/train_data_3.txt:
--------------------------------------------------------------------------------
1 | label1 label2 label3
2 | 
3 | 1 cup water
4 | label1 label2
5 | 


--------------------------------------------------------------------------------
/crf/testdata/train_data_4.txt:
--------------------------------------------------------------------------------
1 | label1 label2 label3
2 | 
3 | 1 cup water
4 | label1 label2 label5
5 | 


--------------------------------------------------------------------------------
/crf/testdata/train_data_1.txt:
--------------------------------------------------------------------------------
 1 | quantity unit name comment
 2 | 
 3 | 1/4 cup milk
 4 | quantity unit name 
 5 | 
 6 | 3 large eggs
 7 | quantity name name 
 8 | 
 9 | 5 peeled carrots
10 | quantity comment name
11 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Debug Tests",
 6 |             "type": "go",
 7 |             "request": "launch",
 8 |             "mode": "test",
 9 |             "remotePath": "",
10 |             "port": 2345,
11 |             "host": "127.0.0.1",
12 |             "program": "${workspaceRoot}",
13 |             "env": {},
14 |             "args": [],
15 |             "showLog": true
16 |         }
17 |     ]
18 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NLP
 2 | 
 3 | [![CircleCI](https://circleci.com/gh/chriscasola/nlp.svg?style=svg)](https://circleci.com/gh/chriscasola/nlp)
 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/chriscasola/nlp)](https://goreportcard.com/report/github.com/chriscasola/nlp)
 5 | [![GoDoc](https://godoc.org/github.com/chriscasola/nlp?status.svg)](https://godoc.org/github.com/chriscasola/nlp)
 6 | 
 7 | NLP is a go package meant to contain implementations of common natural language processing algorithms. So far there
 8 | is a naive implementation of conditional random fields.
 9 | 
10 | ## CRF
11 | 
12 | The CRF implementation draws from the following articles:
13 | 
14 | * [Introduction to Conditional Random Fields](http://blog.echen.me/2012/01/03/introduction-to-conditional-random-fields/)
15 | * [An Introduction to Conditional Random Fields](http://homepages.inf.ed.ac.uk/csutton/publications/crftutv2.pdf)
16 | * [An Introduction to Conditional Random Fields for Relational Learning](https://people.cs.umass.edu/~mccallum/papers/crf-tutorial.pdf)
17 | 


--------------------------------------------------------------------------------
/bin/ci_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | go vet ./...
 4 | go get -u github.com/jstemmer/go-junit-report
 5 | mkdir -p $CIRCLE_TEST_REPORTS/junit
 6 | mkdir -p $CIRCLE_ARTIFACTS/coverage
 7 | cd ..
 8 | mkdir -p .go_workspace/src/github.com/chriscasola/nlp
 9 | cp -R nlp .go_workspace/src/github.com/chriscasola/
10 | cd .go_workspace/src/github.com/chriscasola/nlp
11 | go test -v ./... | go-junit-report > $CIRCLE_TEST_REPORTS/junit/report.xml
12 | cd crf
13 | go test -covermode=count -coverprofile=$CIRCLE_ARTIFACTS/coverage_crf.out ./...
14 | go tool cover -html=$CIRCLE_ARTIFACTS/coverage_crf.out -o $CIRCLE_ARTIFACTS/coverage_crf.html
15 | go tool cover -func=$CIRCLE_ARTIFACTS/coverage_crf.out -o $CIRCLE_ARTIFACTS/coverage_crf.txt
16 | cd ../labelling
17 | go test -covermode=count -coverprofile=$CIRCLE_ARTIFACTS/coverage_labelling.out ./...
18 | go tool cover -html=$CIRCLE_ARTIFACTS/coverage_labelling.out -o $CIRCLE_ARTIFACTS/coverage_labelling.html
19 | go tool cover -func=$CIRCLE_ARTIFACTS/coverage_labelling.out -o $CIRCLE_ARTIFACTS/coverage_labelling.txt
20 | cd ..
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Chris Casola
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/labelling/labelling.go:
--------------------------------------------------------------------------------
 1 | package labelling
 2 | 
 3 | // FeatureFunction is a feature function for the model
 4 | type FeatureFunction func(sentence []string, i int, labelCurr string, labelPrev string) float64
 5 | 
 6 | // Feature includes the weight and feature function for a feature
 7 | type Feature struct {
 8 | 	Weight float64
 9 | 	Value  FeatureFunction
10 | }
11 | 
12 | // FindBestLabelling determines the best labeling for the given sentence
13 | func FindBestLabelling(sentence []string, labels []string, features []Feature) []string {
14 | 	labelling := make([]string, 0)
15 | 
16 | 	for i := 0; i < len(sentence); i++ {
17 | 		bestScore, bestLabel, currentScore := -1.0, "", 0.0
18 | 		prevLabel := ""
19 | 		if i > 0 {
20 | 			prevLabel = labelling[i-1]
21 | 		}
22 | 
23 | 		for j := 0; j < len(labels); j++ {
24 | 			for k := 0; k < len(features); k++ {
25 | 				currentScore += (features[k].Weight * features[k].Value(sentence, i, labels[j], prevLabel))
26 | 			}
27 | 
28 | 			if currentScore > bestScore {
29 | 				bestScore = currentScore
30 | 				bestLabel = labels[j]
31 | 			}
32 | 
33 | 			currentScore = 0
34 | 		}
35 | 
36 | 		labelling = append(labelling, bestLabel)
37 | 	}
38 | 
39 | 	return labelling
40 | }
41 | 


--------------------------------------------------------------------------------
/labelling/labelling_test.go:
--------------------------------------------------------------------------------
 1 | package labelling
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"strings"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func stringInArray(list []string, s string) bool {
10 | 	for i := 0; i < len(list); i++ {
11 | 		if strings.ToLower(list[i]) == strings.ToLower(s) {
12 | 			return true
13 | 		}
14 | 	}
15 | 
16 | 	return false
17 | }
18 | 
19 | func isQuantityAtBeginning(sentence []string, i int, labelCurr string, labelPrev string) float64 {
20 | 	if i == 0 && stringInArray([]string{"1", "2", "3", "4", "5", "6", "7", "8", "9"}, sentence[i]) && labelCurr == "quantity" {
21 | 		return 1
22 | 	}
23 | 
24 | 	return 0
25 | }
26 | 
27 | func unitFollowsQuantity(sentence []string, i int, labelCurr string, labelPrev string) float64 {
28 | 	if labelPrev == "quantity" && labelCurr == "units" {
29 | 		return 1
30 | 	}
31 | 
32 | 	return 0
33 | }
34 | 
35 | func ingredientFollowsUnit(sentence []string, i int, labelCurr string, labelPrev string) float64 {
36 | 	if labelPrev == "units" && labelCurr == "ingredient" {
37 | 		return 1
38 | 	}
39 | 
40 | 	return 0
41 | }
42 | 
43 | func TestFindBestLabelling(t *testing.T) {
44 | 	sentence := strings.Split("1 cup apples", " ")
45 | 	labels := []string{"quantity", "units", "ingredient"}
46 | 	features := []Feature{
47 | 		{1.0, isQuantityAtBeginning},
48 | 		{1.0, unitFollowsQuantity},
49 | 		{1.0, ingredientFollowsUnit},
50 | 	}
51 | 	bestLabelling := FindBestLabelling(sentence, labels, features)
52 | 
53 | 	expectedLabelling := []string{"quantity", "units", "ingredient"}
54 | 
55 | 	if reflect.DeepEqual(bestLabelling, expectedLabelling) != true {
56 | 		t.Errorf("Expected %v to equal %v", bestLabelling, expectedLabelling)
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/crf/crf_trainer_test.go:
--------------------------------------------------------------------------------
 1 | package crf
 2 | 
 3 | import "testing"
 4 | import "reflect"
 5 | 
 6 | func TestLoadTrainingData(t *testing.T) {
 7 | 	labels, sentences, err := LoadTrainingData("./testdata/train_data_1.txt")
 8 | 
 9 | 	if err != nil {
10 | 		t.Errorf("Unexpected error: %v", err)
11 | 		return
12 | 	}
13 | 
14 | 	expectedLabels := []Label{"quantity", "unit", "name", "comment"}
15 | 
16 | 	expectedSentences := []Sentence{
17 | 		{Words: []string{"1/4", "cup", "milk"}, Labeling: SentenceLabeling{Labels: []Label{"quantity", "unit", "name"}}},
18 | 		{Words: []string{"3", "large", "eggs"}, Labeling: SentenceLabeling{Labels: []Label{"quantity", "name", "name"}}},
19 | 		{Words: []string{"5", "peeled", "carrots"}, Labeling: SentenceLabeling{Labels: []Label{"quantity", "comment", "name"}}},
20 | 	}
21 | 
22 | 	if reflect.DeepEqual(labels, expectedLabels) != true {
23 | 		t.Errorf("Expected %v to equal %v", labels, expectedLabels)
24 | 	}
25 | 
26 | 	if reflect.DeepEqual(sentences, expectedSentences) != true {
27 | 		t.Errorf("Expected %v to equal %v", sentences, expectedSentences)
28 | 	}
29 | }
30 | 
31 | func TestLoadTrainingDataWithErrors(t *testing.T) {
32 | 	cases := map[string]string{
33 | 		"./testdata/train_data_3.txt": "not enough labels (line 4)",
34 | 		"./testdata/train_data_4.txt": "invalid label (line 4)",
35 | 	}
36 | 
37 | 	for filePath, expectedError := range cases {
38 | 		_, _, err := LoadTrainingData(filePath)
39 | 
40 | 		if err == nil {
41 | 			t.Errorf("Expected error for bad training data")
42 | 			return
43 | 		}
44 | 
45 | 		if err.Error() != expectedError {
46 | 			t.Errorf("Expected \"%v\" to be \"%v\"", err.Error(), expectedError)
47 | 		}
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/crf/crf_trainer.go:
--------------------------------------------------------------------------------
 1 | package crf
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"strings"
 8 | )
 9 | 
10 | // LoadTrainingData loads training data from a file. The first line of the file should
11 | // be a space separate list of all possible labels. The following lines should
12 | // alternate between a training sentence and the labeling for that sentence. Blank
13 | // lines will be ignored.  Below is an example:
14 | //
15 | // 1/4 cup milk
16 | // quantity unit ingredient
17 | //
18 | // 3 large eggs
19 | // quantity ingredient ingredient
20 | //
21 | func LoadTrainingData(filename string) ([]Label, []Sentence, error) {
22 | 	file, err := os.Open(filename)
23 | 
24 | 	if err != nil {
25 | 		return nil, nil, fmt.Errorf("Unable to open data file: %v", err)
26 | 	}
27 | 
28 | 	defer file.Close()
29 | 
30 | 	lineNum := 0
31 | 	result := make([]Sentence, 0)
32 | 	labelSet := make([]Label, 0)
33 | 	scanner := bufio.NewScanner(file)
34 | 	var currentSentence *Sentence
35 | 
36 | 	if scanner.Scan() {
37 | 		lineNum++
38 | 		labels := strings.Split(scanner.Text(), " ")
39 | 		for _, label := range labels {
40 | 			if label != "" {
41 | 				labelSet = append(labelSet, Label(label))
42 | 			}
43 | 		}
44 | 	}
45 | 
46 | 	for scanner.Scan() {
47 | 		lineNum++
48 | 		line := scanner.Text()
49 | 		if line == "" {
50 | 			continue
51 | 		}
52 | 
53 | 		if currentSentence == nil {
54 | 			currentSentence = MakeSentence(line)
55 | 		} else {
56 | 			sentenceLabels := removeEmptyString(strings.Split(line, " "))
57 | 			if len(sentenceLabels) != len(currentSentence.Words) {
58 | 				return nil, nil, fmt.Errorf("not enough labels (line %v)", lineNum)
59 | 			}
60 | 			for _, label := range sentenceLabels {
61 | 				if !labelExists(labelSet, label) {
62 | 					return nil, nil, fmt.Errorf("invalid label (line %v)", lineNum)
63 | 				}
64 | 				currentSentence.Labeling.Labels = append(currentSentence.Labeling.Labels, Label(label))
65 | 			}
66 | 			result = append(result, *currentSentence)
67 | 			currentSentence = nil
68 | 		}
69 | 	}
70 | 
71 | 	if err := scanner.Err(); err != nil {
72 | 		return nil, nil, fmt.Errorf("Error reading training file: %v", err)
73 | 	}
74 | 
75 | 	return labelSet, result, nil
76 | }
77 | 
78 | func labelExists(labels []Label, label string) bool {
79 | 	for i := 0; i < len(labels); i++ {
80 | 		if string(labels[i]) == label {
81 | 			return true
82 | 		}
83 | 	}
84 | 	return false
85 | }
86 | 


--------------------------------------------------------------------------------
/crf/crf_test.go:
--------------------------------------------------------------------------------
  1 | package crf
  2 | 
  3 | import (
  4 | 	"math"
  5 | 	"math/rand"
  6 | 	"reflect"
  7 | 	"strings"
  8 | 	"testing"
  9 | )
 10 | 
 11 | func featureFuncA(s []string, i int, labelCurr Label, labelPrev Label) bool {
 12 | 	if i%2 == 0 {
 13 | 		return true
 14 | 	}
 15 | 
 16 | 	return false
 17 | }
 18 | 
 19 | func featureFuncB(s []string, i int, labelCurr Label, labelPrev Label) bool {
 20 | 	if strings.ToLower(s[i])[0] == 't' {
 21 | 		return true
 22 | 	}
 23 | 
 24 | 	return false
 25 | }
 26 | 
 27 | func featureFuncC(s []string, i int, labelCurr Label, labelPrev Label) bool {
 28 | 	if labelCurr != labelPrev && labelPrev != "" {
 29 | 		return true
 30 | 	}
 31 | 
 32 | 	return false
 33 | }
 34 | 
 35 | func featureFuncD(s []string, i int, labelCurr Label, labelPrev Label) bool {
 36 | 	if labelCurr == "B" && labelPrev == "A" {
 37 | 		return true
 38 | 	}
 39 | 
 40 | 	return false
 41 | }
 42 | 
 43 | var featureA = Feature{Weight: 0.25, Value: featureFuncA}
 44 | var featureB = Feature{Weight: 0.75, Value: featureFuncB}
 45 | var featureC = Feature{Weight: 0.75, Value: featureFuncC}
 46 | var featureD = Feature{Weight: 0.25, Value: featureFuncD}
 47 | 
 48 | func TestScoreLabeling(t *testing.T) {
 49 | 	sentenceAFeatures := make([]Feature, 0)
 50 | 	sentenceAFeatures = append(sentenceAFeatures, featureA)
 51 | 	sentenceAFeatures = append(sentenceAFeatures, featureB)
 52 | 
 53 | 	sentenceA := MakeSentence("This is a test sentence")
 54 | 	labelingA1 := SentenceLabeling{Labels: []Label{"A", "B", "A", "B", "A"}}
 55 | 
 56 | 	if score := sentenceA.ScoreLabeling(&labelingA1, sentenceAFeatures); score != math.Exp(2.25) {
 57 | 		t.Errorf("Score is %v but should be %v", score, math.Exp(2.25))
 58 | 	}
 59 | }
 60 | 
 61 | func TestGetAllPossibleLabelings(t *testing.T) {
 62 | 	result := getAllPossibleLabelings([]string{"the", "fat", "cat"}, []Label{"a", "b"})
 63 | 	expected := []SentenceLabeling{
 64 | 		{Labels: []Label{"a", "a", "a"}},
 65 | 		{Labels: []Label{"a", "a", "b"}},
 66 | 		{Labels: []Label{"a", "b", "a"}},
 67 | 		{Labels: []Label{"a", "b", "b"}},
 68 | 		{Labels: []Label{"b", "a", "a"}},
 69 | 		{Labels: []Label{"b", "a", "b"}},
 70 | 		{Labels: []Label{"b", "b", "a"}},
 71 | 		{Labels: []Label{"b", "b", "b"}},
 72 | 	}
 73 | 
 74 | 	if reflect.DeepEqual(result, expected) != true {
 75 | 		t.Errorf("Expected %v to be %v", result, expected)
 76 | 	}
 77 | 
 78 | 	result = getAllPossibleLabelings([]string{"the", "fat"}, []Label{"a", "b", "c"})
 79 | 	expected = []SentenceLabeling{
 80 | 		{Labels: []Label{"a", "a"}},
 81 | 		{Labels: []Label{"a", "b"}},
 82 | 		{Labels: []Label{"a", "c"}},
 83 | 		{Labels: []Label{"b", "a"}},
 84 | 		{Labels: []Label{"b", "b"}},
 85 | 		{Labels: []Label{"b", "c"}},
 86 | 		{Labels: []Label{"c", "a"}},
 87 | 		{Labels: []Label{"c", "b"}},
 88 | 		{Labels: []Label{"c", "c"}},
 89 | 	}
 90 | 
 91 | 	if reflect.DeepEqual(result, expected) != true {
 92 | 		t.Errorf("Expected %v to be %v", result, expected)
 93 | 	}
 94 | }
 95 | 
 96 | func TestCalculateBestLabeling(t *testing.T) {
 97 | 	sentenceAFeatures := make([]Feature, 0)
 98 | 	sentenceAFeatures = append(sentenceAFeatures, featureC)
 99 | 	sentenceAFeatures = append(sentenceAFeatures, featureD)
100 | 
101 | 	sentenceA := MakeSentence("This is a test sentence")
102 | 	sentenceA.CalculateBestLabeling(sentenceAFeatures, []Label{"A", "B"})
103 | 
104 | 	expected := []Label{"A", "B", "A", "B", "A"}
105 | 	result := sentenceA.Labeling.Labels
106 | 	if reflect.DeepEqual(result, expected) != true {
107 | 		t.Errorf("Expected %v to be %v", result, expected)
108 | 	}
109 | }
110 | 
111 | func TestGetRandomWeights(t *testing.T) {
112 | 	rand.Seed(1)
113 | 	randWeights := getRandomWeights(5)
114 | 	expected := []float64{0.19682432385076745, 0.3061472185456322, 0.21632243051716366, 0.14248132487885407, 0.1382247022075827}
115 | 	if reflect.DeepEqual(randWeights, expected) != true {
116 | 		t.Errorf("Expected %v to be %v", randWeights, expected)
117 | 	}
118 | 
119 | 	sum := float64(0)
120 | 	for _, num := range randWeights {
121 | 		sum += num
122 | 	}
123 | 
124 | 	if sum != 1 {
125 | 		t.Errorf("Expected %v to be 1", sum)
126 | 	}
127 | }
128 | 
129 | func QuantityAtBeginning(s []string, i int, labelCurr Label, labelPrev Label) bool {
130 | 	if i == 0 && labelCurr == "quantity" {
131 | 		return true
132 | 	}
133 | 
134 | 	return false
135 | }
136 | 
137 | func UnitAfterQuantity(s []string, i int, labelCurr Label, labelPrev Label) bool {
138 | 	if labelPrev == "quantity" && labelCurr == "unit" {
139 | 		return true
140 | 	}
141 | 
142 | 	return false
143 | }
144 | 
145 | func TestLearnWeights(t *testing.T) {
146 | 	t.Skip("Skipping until performance can be improved")
147 | 	features := []Feature{
148 | 		Feature{Value: QuantityAtBeginning},
149 | 		Feature{Value: UnitAfterQuantity},
150 | 	}
151 | 	labels, trainingData, err := LoadTrainingData("./testdata/train_data_2.txt")
152 | 
153 | 	if err != nil {
154 | 		t.Errorf("Unexpected error reading test data file: %v", err)
155 | 		return
156 | 	}
157 | 
158 | 	LearnWeights(features, labels, trainingData)
159 | }
160 | 


--------------------------------------------------------------------------------
/crf/testdata/train_data_2.txt:
--------------------------------------------------------------------------------
  1 | quantity unit name comment separator other
  2 | 
  3 | 1/4 cup milk
  4 | quantity unit name 
  5 | 
  6 | 3 large eggs
  7 | quantity name name 
  8 | 
  9 | 5 peeled carrots
 10 | quantity comment name
 11 | 
 12 | 2 tablespoon soy sauce
 13 | quantity unit name name
 14 | 
 15 | 1 tablespoon dry sherry
 16 | quantity unit name name
 17 | 
 18 | 1 dash sesame oil
 19 | quantity unit name name
 20 | 
 21 | 2 tablespoon all-purpose flour
 22 | quantity unit name name
 23 | 
 24 | 2 tablespoon cornstarch
 25 | quantity unit name
 26 | 
 27 | 1/4 teaspoon baking powder
 28 | quantity unit name name
 29 | 
 30 | 1/4 teaspoon baking soda
 31 | quantity unit name name
 32 | 
 33 | 1 teaspoon canola oil
 34 | quantity unit name name
 35 | 
 36 | 4 (5 ounce) skinless, boneless chicken breast halves, cut into 1-inch cubes
 37 | quantity unit unit name name name name name comment comment comment comment
 38 | 
 39 | 1 quart vegetable oil for frying
 40 | quantity unit name name comment comment
 41 | 
 42 | 1/2 cup water
 43 | quantity unit name
 44 | 
 45 | 1 cup chicken broth
 46 | quantity unit name name
 47 | 
 48 | 1/4 cup distilled white vinegar
 49 | quantity unit name name name
 50 | 
 51 | 1/4 cup cornstarch
 52 | quantity unit name
 53 | 
 54 | 1 teaspoon red chile paste (such as Thai Kitchen)
 55 | quantity unit name name name comment comment comment comment
 56 | 
 57 | 1 clove garlic, minced
 58 | quantity name name comment
 59 | 
 60 | 2 tablespoon toasted sesame seeds
 61 | quantity unit comment name name
 62 | 
 63 | 3 zucchinis - ends trimmed, halved, and cut into 1/2-inch strips
 64 | quantity name separator comment comment comment comment comment comment comment comment
 65 | 
 66 | 2 eggs, beaten
 67 | quantity name comment
 68 | 
 69 | cooking spray
 70 | name name
 71 | 
 72 | 1/4 cup dry milk powder
 73 | quantity unit name name name
 74 | 
 75 | 3 tablespoon white sugar
 76 | quantity unit name name
 77 | 
 78 | 1 cup warm water (110 degrees F/45 degrees C)
 79 | quantity unit comment name comment comment comment comment comment
 80 | 
 81 | 2 tablespoon butter, softened
 82 | quantity unit name comment
 83 | 
 84 | 1 (.25 ounce) package active dry yeast
 85 | quantity unit unit unit name name name
 86 | 
 87 | 1 egg white
 88 | quantity name name
 89 | 
 90 | 1/4 cup all-purpose flour for coating
 91 | quantity unit name name comment comment
 92 | 
 93 | 1/4 teaspoon ground black pepper
 94 | quantity unit name name name
 95 | 
 96 | 1 tablespoon finely chopped Chinese chives
 97 | quantity unit comment comment name name
 98 | 
 99 | 1 teaspoon chile-garlic sauce (such as Sriracha)
100 | quantity unit name name comment comment comment
101 | 
102 | 3 clove garlic, minced
103 | quantity unit name comment
104 | 
105 | 1 egg, beaten
106 | quantity name comment
107 | 
108 | 1 tablespoon minced fresh ginger
109 | quantity unit comment other name
110 | 
111 | 50 dumpling wrappers
112 | quantity name name
113 | 
114 | 1 cup vegetable oil for frying
115 | quantity unit name name comment comment
116 | 
117 | 1 quart water, or more as needed
118 | quantity unit name comment comment comment comment
119 | 
120 | 1 fresh jalapeno pepper, finely diced
121 | quantity other name name comment comment
122 | 
123 | 1 bunch asparagus, trimmed and cut into 1/4-inch pieces
124 | quantity unit name comment comment comment comment comment comment
125 | 
126 | 1 cup shelled fresh or thawed frozen peas
127 | quantity unit comment other other other name name
128 | 
129 | 2 tablespoon chopped fresh mint
130 | quantity unit comment other name
131 | 
132 | salt and freshly ground black pepper to taste
133 | other other other other other other other other
134 | 
135 | 2 (4 ounce) filet mignon steaks
136 | quantity unit unit name name name
137 | 
138 | 1/2 teaspoon freshly ground black pepper to taste
139 | quantity unit comment comment name name comment comment
140 | 
141 | salt to taste
142 | name comment comment
143 | 
144 | 1/4 cup dry red wine
145 | quantity unit name name name
146 | 
147 | 6 cup chicken broth, divided
148 | quantity unit name name comment
149 | 
150 | 3 tablespoon olive oil, divided
151 | quantity unit name name comment
152 | 
153 | 1 pound portobello mushrooms, thinly sliced
154 | quantity unit name name comment comment
155 | 
156 | 1 pound white mushrooms, thinly sliced
157 | quantity unit name name comment comment
158 | 
159 | 2 shallots, diced
160 | quantity name comment
161 | 
162 | 1 1/2 cup Arborio rice
163 | quantity quantity unit name name
164 | 
165 | sea salt to taste
166 | name name comment comment
167 | 
168 | freshly ground black pepper to taste
169 | comment comment name name comment comment
170 | 
171 | 3 tablespoon finely chopped chives
172 | quantity unit comment comment name
173 | 
174 | 4 tablespoon butter
175 | quantity unit name
176 | 
177 | 1/3 cup freshly grated Parmesan cheese
178 | quantity unit comment comment name name
179 | 
180 | 2 (14.5 ounce) cans Italian-style diced tomatoes
181 | quantity unit unit unit name name name
182 | 
183 | 1 (19 ounce) can cannellini beans, drained and rinsed
184 | quantity unit unit unit name name comment comment comment
185 | 
186 | 10 ounces fresh spinach, washed and chopped
187 | quantity unit other name comment comment comment
188 | 
189 | 8 ounces penne pasta
190 | quantity unit name name
191 | 
192 | 1/2 cup crumbled feta cheese
193 | quantity unit comment name name
194 | 
195 | 1 pound sweet Italian sausage, casings removed
196 | quantity unit name name name comment comment
197 | 
198 | 1 cup chopped onion
199 | quantity unit comment name
200 | 
201 | 2 clove garlic, minced
202 | quantity unit name comment
203 | 
204 | 5 cup beef broth
205 | quantity unit name name
206 | 
207 | 1/2 cup water
208 | quantity unit name
209 | 
210 | 4 large tomatoes - peeled, seeded and chopped
211 | quantity name name separator comment comment comment comment
212 | 
213 | 1 cup thinly sliced carrots
214 | quantity unit comment comment name
215 | 
216 | 1/2 tablespoon packed fresh basil leaves
217 | quantity unit unit other name name
218 | 
219 | 1/2 teaspoon dried oregano
220 | quantity unit other name
221 | 
222 | 1 1/2 cup sliced zucchini
223 | quantity quantity unit comment name
224 | 


--------------------------------------------------------------------------------
/crf/crf.go:
--------------------------------------------------------------------------------
  1 | package crf
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"math/rand"
  7 | 	"strings"
  8 | )
  9 | 
 10 | // Label is a label applied to a word in a sentence
 11 | type Label string
 12 | 
 13 | // FeatureFunction is a feature function for linear-chain CRF
 14 | type FeatureFunction func(s []string, i int, labelCurr Label, labelPrev Label) bool
 15 | 
 16 | // Feature includes the weight and feature function for a CRF feature
 17 | type Feature struct {
 18 | 	Weight float64
 19 | 	Value  FeatureFunction
 20 | }
 21 | 
 22 | // EvaluateFeature evalutes the score of a given labeling using the feature function
 23 | func (f *Feature) EvaluateFeature(s []string, labeling *SentenceLabeling) float64 {
 24 | 	score := float64(0)
 25 | 
 26 | 	if len(s) != len(labeling.Labels) {
 27 | 		panic(fmt.Sprintf("Misaligned labels for \"%v\" labeled with \"%v\"\n", s, labeling.Labels))
 28 | 	}
 29 | 
 30 | 	for i := range s {
 31 | 		var val bool
 32 | 		if i == 0 {
 33 | 			val = f.Value(s, i, labeling.Labels[i], "")
 34 | 		} else {
 35 | 			val = f.Value(s, i, labeling.Labels[i], labeling.Labels[i-1])
 36 | 		}
 37 | 
 38 | 		if val {
 39 | 			score += f.Weight
 40 | 		}
 41 | 	}
 42 | 
 43 | 	return score
 44 | }
 45 | 
 46 | // SentenceLabeling is a specific order of labels for a sentence
 47 | type SentenceLabeling struct {
 48 | 	Labels      []Label
 49 | 	Score       float64
 50 | 	Probability float64
 51 | }
 52 | 
 53 | // Sentence is a sentence to be processed using CRF
 54 | type Sentence struct {
 55 | 	Words    []string
 56 | 	Labeling SentenceLabeling
 57 | }
 58 | 
 59 | // MakeSentence makes a new Sentence with the given sentence and features
 60 | func MakeSentence(sentence string) *Sentence {
 61 | 	return &Sentence{Words: removeEmptyString(strings.Split(sentence, " "))}
 62 | }
 63 | 
 64 | // ScoreLabeling determines the score of a given labeling of the sentence
 65 | func (s *Sentence) ScoreLabeling(labeling *SentenceLabeling, features []Feature) float64 {
 66 | 	score := float64(0)
 67 | 
 68 | 	for _, feature := range features {
 69 | 		score += feature.EvaluateFeature(s.Words, labeling)
 70 | 	}
 71 | 
 72 | 	return math.Exp(score)
 73 | }
 74 | 
 75 | func recursivelyLabelWord(words []string, allLabels []Label, appliedLabels []Label) []SentenceLabeling {
 76 | 	var result []SentenceLabeling
 77 | 	if len(words) == len(appliedLabels) {
 78 | 		result = append(result, SentenceLabeling{Labels: appliedLabels})
 79 | 		return result
 80 | 	}
 81 | 
 82 | 	for _, label := range allLabels {
 83 | 		restLabels := append(appliedLabels, label)
 84 | 		subResult := recursivelyLabelWord(words, allLabels, restLabels)
 85 | 		for _, r := range subResult {
 86 | 			result = append(result, r)
 87 | 		}
 88 | 	}
 89 | 
 90 | 	return result
 91 | }
 92 | 
 93 | func getAllPossibleLabelings(words []string, labels []Label) []SentenceLabeling {
 94 | 	var result []SentenceLabeling
 95 | 
 96 | 	for _, label := range labels {
 97 | 		restLabels := []Label{label}
 98 | 		subResult := recursivelyLabelWord(words, labels, restLabels)
 99 | 		for _, r := range subResult {
100 | 			result = append(result, r)
101 | 		}
102 | 	}
103 | 
104 | 	return result
105 | }
106 | 
107 | func (s *Sentence) scoreAllLabelings(features []Feature, labels []Label) []SentenceLabeling {
108 | 	labelings := getAllPossibleLabelings(s.Words, labels)
109 | 
110 | 	for i := range labelings {
111 | 		labelings[i].Score = s.ScoreLabeling(&labelings[i], features)
112 | 	}
113 | 
114 | 	return labelings
115 | }
116 | 
117 | func calculateNormalizationConstant(labelings []SentenceLabeling) float64 {
118 | 	sum := float64(0)
119 | 
120 | 	for _, labeling := range labelings {
121 | 		sum += labeling.Score
122 | 	}
123 | 
124 | 	return sum
125 | }
126 | 
127 | func (s *Sentence) calculateLabelProbabilities(features []Feature, labels []Label) []SentenceLabeling {
128 | 	labelings := s.scoreAllLabelings(features, labels)
129 | 	normalizationConstant := calculateNormalizationConstant(labelings)
130 | 
131 | 	for i := range labelings {
132 | 		labelings[i].Probability = labelings[i].Score / normalizationConstant
133 | 	}
134 | 
135 | 	return labelings
136 | }
137 | 
138 | // CalculateBestLabeling determines the best labeling of the sentence
139 | func (s *Sentence) CalculateBestLabeling(features []Feature, labels []Label) {
140 | 	labelings := s.calculateLabelProbabilities(features, labels)
141 | 
142 | 	currentBestLabel := labelings[0]
143 | 
144 | 	for _, labeling := range labelings {
145 | 		if labeling.Probability > currentBestLabel.Probability {
146 | 			currentBestLabel = labeling
147 | 		}
148 | 	}
149 | 
150 | 	s.Labeling = currentBestLabel
151 | }
152 | 
153 | // LearnWeights attempts to learn the weight to use for each of the given feature functions
154 | // using the provided labels and training data
155 | func LearnWeights(features []Feature, labels []Label, trainingData []Sentence) {
156 | 	randomWeights := getRandomWeights(len(features))
157 | 
158 | 	// assign random weights to each feature function
159 | 	for i := 0; i < len(features); i++ {
160 | 		features[i].Weight = randomWeights[i]
161 | 	}
162 | 
163 | 	// loop through all of the training sentences
164 | 	for i := 0; i < len(trainingData); i++ {
165 | 		fmt.Printf("Analyzing sentence: %v\n", trainingData[i].Words)
166 | 		const threshold = float64(0.01)
167 | 		const learningRate = float64(1)
168 | 		lastChange := float64(1)
169 | 
170 | 		// keep moving the weights until they coalesce on a value
171 | 		for lastChange > threshold {
172 | 			possibleLabelings := getAllPossibleLabelings(trainingData[i].Words, labels)
173 | 
174 | 			// loop through each feature function and calculate the difference between the contribution
175 | 			// of the feature function for the correct labeling and the contribution of the feature function
176 | 			// given the current model
177 | 			for j := 0; j < len(features); j++ {
178 | 				trueValue := features[j].EvaluateFeature(trainingData[i].Words, &trainingData[i].Labeling)
179 | 				expectedContribution := float64(0)
180 | 
181 | 				for k := 0; k < len(possibleLabelings); k++ {
182 | 					expectedContribution += possibleLabelings[k].Probability * features[j].EvaluateFeature(trainingData[i].Words, &possibleLabelings[k])
183 | 				}
184 | 
185 | 				// calculate gradient of the log probability of the training example
186 | 				gradProb := trueValue - expectedContribution
187 | 				lastChange = learningRate * gradProb
188 | 				features[j].Weight += lastChange
189 | 			}
190 | 		}
191 | 	}
192 | }
193 | 
194 | func getRandomWeights(num int) []float64 {
195 | 	randomNumbers := make([]float64, num)
196 | 	sum := float64(0)
197 | 	for i := 0; i < num; i++ {
198 | 		randomNumbers[i] = rand.Float64()
199 | 		sum += randomNumbers[i]
200 | 	}
201 | 	for i := 0; i < num; i++ {
202 | 		randomNumbers[i] = randomNumbers[i] / sum
203 | 	}
204 | 
205 | 	return randomNumbers
206 | }
207 | 
208 | func removeEmptyString(arr []string) []string {
209 | 	if arr == nil {
210 | 		return arr
211 | 	}
212 | 	result := make([]string, 0)
213 | 	for i := 0; i < len(arr); i++ {
214 | 		if arr[i] != "" {
215 | 			result = append(result, arr[i])
216 | 		}
217 | 	}
218 | 	return result
219 | }
220 | 


--------------------------------------------------------------------------------