├── LICENSE
├── README.md
└── summarize
    ├── buffer_pool.go
    ├── stop_words_provider.go
    ├── stop_words_provider_test.go
    ├── summarize.go
    ├── summarize_test.go
    ├── text_counter.go
    ├── text_counter_test.go
    ├── text_splitter.go
    └── text_splitter_test.go


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Viktor Kojouharov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | text-summary
 2 | ============
 3 | 
 4 | Text-summary takes some text, and extracts its key points. Its an almost direct port of [PyTeaser](https://github.com/xiaoxu193/PyTeaser)
 5 | 
 6 | # From PyTeaser
 7 | 
 8 | Summaries are created by ranking sentences in a news article according to how relevant they are to the entire text. The top 5 sentences are used to form a "summary". Each sentence is ranked by using four criteria:
 9 | 
10 | - Relevance to the title
11 | - Relevance to keywords in the article
12 | - Position of the sentence
13 | - Length of the sentence
14 | 
15 | # Usage
16 | ```
17 | import "github.com/urandom/text-summary/summarize"
18 | 
19 | ...
20 | 
21 | s := summarize.New("Title for the text", someIOReader)
22 | // or
23 | // s := summarize.NewFromString("Title for the text", "Lengthy text ...")
24 | 
25 | keyPoints := s.KeyPoints()
26 | ```
27 | 


--------------------------------------------------------------------------------
/summarize/buffer_pool.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"sync"
 6 | )
 7 | 
 8 | var bufferPool sync.Pool
 9 | 
10 | func getBuffer() *bytes.Buffer {
11 | 	buffer := bufferPool.Get().(*bytes.Buffer)
12 | 	buffer.Reset()
13 | 
14 | 	return buffer
15 | }
16 | 
17 | func init() {
18 | 	bufferPool = sync.Pool{
19 | 		New: func() interface{} {
20 | 			return new(bytes.Buffer)
21 | 		},
22 | 	}
23 | }
24 | 


--------------------------------------------------------------------------------
/summarize/stop_words_provider.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | type StopWordsProvider interface {
 4 | 	SetLanguage(string)
 5 | 	IsStopWord(string) bool
 6 | }
 7 | 
 8 | type DefaultStopWords struct{}
 9 | 
10 | var defaultStopWordsMap = map[string]bool{
11 | 	"a": true, "about": true, "above": true, "after": true, "again": true,
12 | 	"against": true, "all": true, "am": true, "an": true, "and": true, "any": true, "are": true, "as": true, "at": true, "be": true, "because": true,
13 | 	"been": true, "before": true, "being": true, "below": true, "between": true, "both": true, "but": true, "by": true, "can": true, "did": true,
14 | 	"do": true, "does": true, "doing": true, "don": true, "down": true,
15 | 	"during": true, "each": true, "few": true, "for": true, "from": true,
16 | 	"further": true, "had": true, "has": true, "have": true, "having": true,
17 | 	"he": true, "her": true, "here": true, "hers": true, "herself": true,
18 | 	"him": true, "himself": true, "his": true, "how": true, "i": true, "if": true, "in": true, "into": true, "is": true, "it": true, "its": true,
19 | 	"itself": true, "just": true, "me": true, "more": true, "most": true, "my": true, "myself": true, "no": true, "nor": true, "not": true, "now": true,
20 | 	"of": true, "off": true, "on": true, "once": true, "only": true, "or": true, "other": true, "our": true, "ours": true, "ourselves": true, "out": true, "over": true, "own": true, "s": true, "same": true, "she": true,
21 | 	"should": true, "so": true, "some": true, "such": true, "t": true, "than": true, "that": true, "the": true, "their": true, "theirs": true, "them": true, "themselves": true, "then": true, "there": true, "these": true,
22 | 	"they": true, "this": true, "those": true, "through": true, "to": true,
23 | 	"too": true, "under": true, "until": true, "up": true, "very": true, "was": true, "we": true, "were": true, "what": true, "when": true, "where": true,
24 | 	"which": true, "while": true, "who": true, "whom": true, "why": true,
25 | 	"will": true, "with": true, "you": true, "your": true, "yours": true,
26 | 	"yourself": true, "yourselves": true,
27 | }
28 | 
29 | func (d DefaultStopWords) SetLanguage(lang string) {}
30 | 
31 | func (d DefaultStopWords) IsStopWord(word string) bool {
32 | 	_, ok := defaultStopWordsMap[word]
33 | 
34 | 	return ok
35 | }
36 | 


--------------------------------------------------------------------------------
/summarize/stop_words_provider_test.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestDefaultStopWords(t *testing.T) {
 6 | 	d := DefaultStopWords{}
 7 | 
 8 | 	if !d.IsStopWord("a") {
 9 | 		t.Fatal("Expected 'a' to be a stop word")
10 | 	}
11 | }
12 | 


--------------------------------------------------------------------------------
/summarize/summarize.go:
--------------------------------------------------------------------------------
  1 | package summarize
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"math"
  6 | 	"strings"
  7 | )
  8 | 
  9 | type Summarize struct {
 10 | 	Title             string
 11 | 	Text              string
 12 | 	Language          string
 13 | 	StopWordsProvider StopWordsProvider
 14 | 	TextSplitter      TextSplitter
 15 | 	EndSentenceRunes  []rune
 16 | 	QuoteTuples       [][]rune
 17 | 	IdealWordCount    int
 18 | }
 19 | 
 20 | func New(title string, r io.Reader) Summarize {
 21 | 	buffer := getBuffer()
 22 | 	defer bufferPool.Put(buffer)
 23 | 
 24 | 	buffer.ReadFrom(r)
 25 | 
 26 | 	return NewFromString(title, buffer.String())
 27 | }
 28 | 
 29 | func NewFromString(title, text string) Summarize {
 30 | 	return Summarize{
 31 | 		Title:             title,
 32 | 		Text:              text,
 33 | 		Language:          "en",
 34 | 		StopWordsProvider: DefaultStopWords{},
 35 | 		TextSplitter:      DefaultTextSplitter{[]rune{'.', '!', '?'}},
 36 | 		IdealWordCount:    20,
 37 | 	}
 38 | }
 39 | 
 40 | func (s Summarize) KeyPoints() []string {
 41 | 	s.StopWordsProvider.SetLanguage(s.Language)
 42 | 
 43 | 	sentences := s.TextSplitter.Sentences(s.Text)
 44 | 	keywords := s.keywords(s.Text)
 45 | 	titleWords := toLower(s.TextSplitter.Words(s.Title))
 46 | 
 47 | 	if len(sentences) <= 5 {
 48 | 		return sentences
 49 | 	}
 50 | 
 51 | 	ranks := TextCounter{}
 52 | 
 53 | 	titleMap := map[string]bool{}
 54 | 	for _, t := range titleWords {
 55 | 		if !s.StopWordsProvider.IsStopWord(t) {
 56 | 			titleMap[t] = true
 57 | 		}
 58 | 	}
 59 | 
 60 | 	for i, sent := range sentences {
 61 | 		words := toLower(s.TextSplitter.Words(sent))
 62 | 		titleScore := s.titleScore(titleMap, words)
 63 | 		lengthScore := s.lengthScore(words)
 64 | 		positionScore := s.positionScore(i+1, len(sentences))
 65 | 		sbs := s.sbs(words, keywords)
 66 | 		dbs := s.dbs(words, keywords)
 67 | 
 68 | 		freq := (sbs + dbs) / 2 * 10
 69 | 		total := (titleScore*1.5 + freq*2 + lengthScore + positionScore) / 4
 70 | 		ranks.Add(sent, int(total*100))
 71 | 	}
 72 | 
 73 | 	var keyPoints []string
 74 | 
 75 | 	mostCommon := ranks.MostCommon(5)
 76 | 	commonMap := map[string]bool{}
 77 | 
 78 | 	for _, p := range mostCommon {
 79 | 		commonMap[p.Text] = true
 80 | 	}
 81 | 
 82 | 	for _, sent := range sentences {
 83 | 		if commonMap[sent] {
 84 | 			keyPoints = append(keyPoints, sent)
 85 | 		}
 86 | 	}
 87 | 
 88 | 	return keyPoints
 89 | }
 90 | 
 91 | func (s Summarize) keywords(text string) map[string]float64 {
 92 | 	allWords := toLower(s.TextSplitter.Words(text))
 93 | 	allLen := float64(len(allWords))
 94 | 	filteredWords := []string{}
 95 | 
 96 | 	for _, w := range allWords {
 97 | 		if !s.StopWordsProvider.IsStopWord(w) {
 98 | 			filteredWords = append(filteredWords, w)
 99 | 		}
100 | 	}
101 | 
102 | 	freq := NewTextCounterFromSlice(filteredWords)
103 | 
104 | 	pairs := freq.MostCommon(10)
105 | 	keyMap := map[string]float64{}
106 | 
107 | 	for _, p := range pairs {
108 | 		score := float64(p.Count) / allLen
109 | 		keyMap[p.Text] = score*1.5 + 1
110 | 	}
111 | 
112 | 	return keyMap
113 | }
114 | 
115 | func (s Summarize) titleScore(titleMap map[string]bool, words []string) float64 {
116 | 	count := 0.0
117 | 
118 | 	for _, w := range words {
119 | 		if _, ok := titleMap[w]; ok && !s.StopWordsProvider.IsStopWord(w) {
120 | 			count += 1
121 | 		}
122 | 	}
123 | 
124 | 	return count / float64(len(titleMap))
125 | }
126 | 
127 | func (s Summarize) lengthScore(words []string) float64 {
128 | 	return 1 - math.Abs(float64(s.IdealWordCount-len(words)))/float64(s.IdealWordCount)
129 | }
130 | 
131 | func (s Summarize) positionScore(pos, total int) float64 {
132 | 	normalized := float64(pos) / float64(total)
133 | 
134 | 	if normalized < 0 {
135 | 		normalized = 0
136 | 	}
137 | 
138 | 	if normalized <= 0.1 {
139 | 		return 0.17
140 | 	} else if normalized <= 0.2 {
141 | 		return 0.23
142 | 	} else if normalized <= 0.3 {
143 | 		return 0.14
144 | 	} else if normalized <= 0.4 {
145 | 		return 0.08
146 | 	} else if normalized <= 0.5 {
147 | 		return 0.05
148 | 	} else if normalized <= 0.6 {
149 | 		return 0.04
150 | 	} else if normalized <= 0.7 {
151 | 		return 0.06
152 | 	} else if normalized <= 0.8 {
153 | 		return 0.04
154 | 	} else if normalized <= 0.9 {
155 | 		return 0.04
156 | 	} else if normalized <= 1.0 {
157 | 		return 0.15
158 | 	} else {
159 | 		return 0
160 | 	}
161 | }
162 | 
163 | func (s Summarize) sbs(words []string, keywords map[string]float64) float64 {
164 | 	score := 0.0
165 | 
166 | 	if len(words) == 0 {
167 | 		return score
168 | 	}
169 | 
170 | 	for _, w := range words {
171 | 		if c, ok := keywords[w]; ok {
172 | 			score += c
173 | 		}
174 | 	}
175 | 
176 | 	return (1 / float64(len(words)) * score) / 10
177 | }
178 | 
179 | func (s Summarize) dbs(words []string, keywords map[string]float64) float64 {
180 | 	score := 0.0
181 | 
182 | 	if len(words) == 0 {
183 | 		return score
184 | 	}
185 | 
186 | 	summ := 0.0
187 | 	first := [2]float64{}
188 | 	second := [2]float64{}
189 | 
190 | 	uniqueWords := map[string]bool{}
191 | 	for i, w := range words {
192 | 		if c, ok := keywords[w]; ok {
193 | 			if len(first) == 0 {
194 | 				first[0], first[1] = float64(i), c
195 | 			} else {
196 | 				second[0], second[1] = first[0], first[1]
197 | 				first[0], first[1] = float64(i), c
198 | 
199 | 				diff := first[0] - second[0]
200 | 				summ += first[1] * second[1] / math.Pow(diff, 2)
201 | 			}
202 | 
203 | 			uniqueWords[w] = true
204 | 		}
205 | 	}
206 | 
207 | 	k := float64(len(uniqueWords) + 1)
208 | 	return (1 / (k * (k + 1)) * summ)
209 | }
210 | 
211 | func toLower(words []string) []string {
212 | 	var lower []string
213 | 
214 | 	for _, w := range words {
215 | 		lower = append(lower, strings.ToLower(w))
216 | 	}
217 | 
218 | 	return lower
219 | }
220 | 


--------------------------------------------------------------------------------
/summarize/summarize_test.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestSummarize(t *testing.T) {
 9 | 	s := New(title, strings.NewReader(text))
10 | 
11 | 	points := s.KeyPoints()
12 | 	checkKeyPoints(t, points)
13 | }
14 | 
15 | func TestSummarizeFromString(t *testing.T) {
16 | 	s := NewFromString(title, text)
17 | 
18 | 	points := s.KeyPoints()
19 | 	checkKeyPoints(t, points)
20 | }
21 | 
22 | func checkKeyPoints(t *testing.T, points []string) {
23 | 	if len(points) != len(keyPoints) {
24 | 		t.Fatalf("Number of key points differ from expected: %d - %d\n", len(points), len(keyPoints))
25 | 	}
26 | 	for i, k := range keyPoints {
27 | 		if k != points[i] {
28 | 			t.Fatalf("Expected summary point '%s', got '%s'\n", k, points[i])
29 | 		}
30 | 	}
31 | }
32 | 
33 | var (
34 | 	title = `Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing`
35 | 	text  = `The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.
36 | Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.
37 | In this paper, we focus on the third approach in supporting mobile data stream applica- tions.
38 | More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.
39 | To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations.
40 | We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application.
41 | Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources.
42 | Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics.
43 | Based on the framework, we design a genetic algorithm for optimal computation parti- tion.
44 | Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.`
45 | 	keyPoints = []string{
46 | 		`The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.`,
47 | 		`Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.`,
48 | 		`In this paper, we focus on the third approach in supporting mobile data stream applica- tions.`,
49 | 		`More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.`,
50 | 		`We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application.`,
51 | 	}
52 | )
53 | 


--------------------------------------------------------------------------------
/summarize/text_counter.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import "sort"
 4 | 
 5 | type TextCounter map[string]int
 6 | type CommonPairs []CommonPair
 7 | type CommonPair struct {
 8 | 	Text  string
 9 | 	Count int
10 | }
11 | 
12 | func NewTextCounterFromPairs(pairs CommonPairs) TextCounter {
13 | 	tc := TextCounter{}
14 | 	for _, p := range pairs {
15 | 		tc[p.Text] = p.Count
16 | 	}
17 | 
18 | 	return tc
19 | }
20 | 
21 | func NewTextCounterFromSlice(words []string) TextCounter {
22 | 	tc := TextCounter{}
23 | 	for _, w := range words {
24 | 		tc.Add(w)
25 | 	}
26 | 
27 | 	return tc
28 | }
29 | 
30 | func (tc TextCounter) Add(text string, score ...int) {
31 | 	sc := 1
32 | 	if len(score) > 0 {
33 | 		sc = score[0]
34 | 	}
35 | 
36 | 	if c, ok := tc[text]; ok {
37 | 		tc[text] = c + sc
38 | 	} else {
39 | 		tc[text] = sc
40 | 	}
41 | }
42 | 
43 | func (tc TextCounter) MostCommon(limit ...int) CommonPairs {
44 | 	pairs := CommonPairs{}
45 | 
46 | 	for t, c := range tc {
47 | 		pairs = append(pairs, CommonPair{Text: t, Count: c})
48 | 	}
49 | 
50 | 	sort.Sort(sort.Reverse(pairs))
51 | 
52 | 	if len(limit) > 0 && len(pairs) > limit[0] {
53 | 		return pairs[:limit[0]]
54 | 	}
55 | 
56 | 	return pairs
57 | }
58 | 
59 | func (c CommonPairs) Len() int {
60 | 	return len(c)
61 | }
62 | 
63 | func (c CommonPairs) Less(i, j int) bool {
64 | 	return c[i].Count < c[j].Count
65 | }
66 | 
67 | func (c CommonPairs) Swap(i, j int) {
68 | 	c[i], c[j] = c[j], c[i]
69 | }
70 | 


--------------------------------------------------------------------------------
/summarize/text_counter_test.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestTextCounter(t *testing.T) {
 6 | 	tc := TextCounter{}
 7 | 
 8 | 	tc.Add("foo", 5)
 9 | 	tc.Add("bar")
10 | 	tc.Add("baz", 2)
11 | 	tc.Add("baz", 9)
12 | 
13 | 	expInt := 5
14 | 	if tc["foo"] != expInt {
15 | 		t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"])
16 | 	}
17 | 
18 | 	expInt = 1
19 | 	if tc["bar"] != expInt {
20 | 		t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"])
21 | 	}
22 | 
23 | 	expInt = 11
24 | 	if tc["baz"] != expInt {
25 | 		t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"])
26 | 	}
27 | 
28 | 	common := tc.MostCommon(2)
29 | 	expInt = 2
30 | 	if len(common) != expInt {
31 | 		t.Fatalf("Expected common length of %d, got %d\n", expInt, len(common))
32 | 	}
33 | 
34 | 	expStr := "baz"
35 | 	if common[0].Text != expStr {
36 | 		t.Fatalf("Expected most common text to be '%s', got '%s'\n", expStr, common[0].Text)
37 | 	}
38 | 
39 | 	expInt = 11
40 | 	if common[0].Count != expInt {
41 | 		t.Fatalf("Expected most common count to be '%d', got '%d'\n", expInt, common[0].Count)
42 | 	}
43 | 
44 | 	expStr = "foo"
45 | 	if common[1].Text != expStr {
46 | 		t.Fatalf("Expected second most common text to be '%s', got '%s'\n", expStr, common[1].Text)
47 | 	}
48 | 
49 | 	expInt = 5
50 | 	if common[1].Count != expInt {
51 | 		t.Fatalf("Expected second most common count to be '%d', got '%d'\n", expInt, common[1].Count)
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/summarize/text_splitter.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import "unicode"
 4 | 
 5 | type TextSplitter interface {
 6 | 	Sentences(string) []string
 7 | 	Words(string) []string
 8 | }
 9 | 
10 | type DefaultTextSplitter struct {
11 | 	Punctuations []rune
12 | }
13 | 
14 | func (d DefaultTextSplitter) Sentences(text string) []string {
15 | 	buf := getBuffer()
16 | 	defer bufferPool.Put(buf)
17 | 
18 | 	sentences := []string{}
19 | 	newSentence := true
20 | 	lastNonWhiteSpace := -1
21 | 
22 | 	for _, r := range text {
23 | 		if oneOfPunct(r, d.Punctuations) {
24 | 			if buf.Len() > 0 {
25 | 				if lastNonWhiteSpace > 0 {
26 | 					buf.Truncate(lastNonWhiteSpace)
27 | 					buf.WriteRune(r)
28 | 					sentences = append(sentences, buf.String())
29 | 				}
30 | 				buf.Reset()
31 | 				newSentence = true
32 | 			}
33 | 		} else {
34 | 			isSpace := unicode.IsSpace(r)
35 | 			if newSentence && isSpace {
36 | 				continue
37 | 			}
38 | 			newSentence = false
39 | 			buf.WriteRune(r)
40 | 			if !isSpace {
41 | 				lastNonWhiteSpace = buf.Len()
42 | 			}
43 | 		}
44 | 	}
45 | 
46 | 	if buf.Len() > 0 && lastNonWhiteSpace > 0 {
47 | 		buf.Truncate(lastNonWhiteSpace)
48 | 		sentences = append(sentences, buf.String())
49 | 		buf.Reset()
50 | 	}
51 | 
52 | 	return sentences
53 | }
54 | 
55 | func (d DefaultTextSplitter) Words(text string) []string {
56 | 	buf := getBuffer()
57 | 	defer bufferPool.Put(buf)
58 | 
59 | 	words := []string{}
60 | 
61 | 	for _, r := range text {
62 | 		if unicode.IsLetter(r) || unicode.IsNumber(r) {
63 | 			buf.WriteRune(r)
64 | 		} else if !unicode.IsOneOf([]*unicode.RangeTable{unicode.Hyphen}, r) {
65 | 			if buf.Len() > 0 {
66 | 				words = append(words, buf.String())
67 | 			}
68 | 			buf.Reset()
69 | 		}
70 | 	}
71 | 
72 | 	if buf.Len() > 0 {
73 | 		words = append(words, buf.String())
74 | 	}
75 | 
76 | 	return words
77 | }
78 | 
79 | func oneOfPunct(r rune, punct []rune) bool {
80 | 	for _, p := range punct {
81 | 		if p == r {
82 | 			return true
83 | 		}
84 | 	}
85 | 	return false
86 | }
87 | 
88 | func oneOfStartQuote(r rune, quotes [][]rune) int {
89 | 	for i, q := range quotes {
90 | 		if q[0] == r {
91 | 			return i
92 | 		}
93 | 	}
94 | 	return -1
95 | }
96 | 


--------------------------------------------------------------------------------
/summarize/text_splitter_test.go:
--------------------------------------------------------------------------------
 1 | package summarize
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestDefaultSentenceSplitter(t *testing.T) {
 6 | 	d := DefaultTextSplitter{[]rune{'.', '!', '?'}}
 7 | 
 8 | 	exp := []string{"First sentence.", "Second sentence"}
 9 | 	res := d.Sentences(text1)
10 | 
11 | 	checkSentences(t, exp, res)
12 | 
13 | 	exp = []string{"First sentence."}
14 | 	res = d.Sentences(text2)
15 | 
16 | 	checkSentences(t, exp, res)
17 | 
18 | 	exp = []string{`Then he said: " do not feed the sharks"`}
19 | 	res = d.Sentences(text3)
20 | 
21 | 	checkSentences(t, exp, res)
22 | 
23 | 	exp = []string{
24 | 		"The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.",
25 | 		"Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.",
26 | 		"In this paper, we focus on the third approach in supporting mobile data stream applica- tions.",
27 | 		"More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.",
28 | 		"To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations.",
29 | 	}
30 | 	res = d.Sentences(bigText)
31 | 
32 | 	checkSentences(t, exp, res)
33 | }
34 | 
35 | func TestDefaultWordSplitter(t *testing.T) {
36 | 	d := DefaultTextSplitter{[]rune{'.', '!', '?'}}
37 | 
38 | 	exp := []string{"First", "sentence", "Second", "sentence"}
39 | 	res := d.Words(text1)
40 | 
41 | 	checkSentences(t, exp, res)
42 | 
43 | 	exp = []string{"Then", "he", "said", "do", "not", "feed", "the", "sharks"}
44 | 	res = d.Words(text3)
45 | 
46 | 	checkSentences(t, exp, res)
47 | 
48 | 	exp = []string{"Three", "things", "are", "outlined", "1", "first", "2", "second", "3", "third"}
49 | 	res = d.Words(text4)
50 | 
51 | 	checkSentences(t, exp, res)
52 | }
53 | 
54 | func checkSentences(t *testing.T, exp, res []string) {
55 | 	if len(exp) != len(res) {
56 | 		t.Fatalf("Number of sentences differ from expected: %d - %d\n", len(res), len(exp))
57 | 	}
58 | 	for i, s := range exp {
59 | 		if s != res[i] {
60 | 			t.Fatalf("Expected sentence '%s', got '%s'\n", s, res[i])
61 | 		}
62 | 	}
63 | }
64 | 
65 | var (
66 | 	text1   = ` First sentence.   Second sentence  `
67 | 	text2   = `First sentence.    `
68 | 	text3   = `Then he said: " do not feed the sharks"`
69 | 	text4   = `Three things are outlined: 1) first; 2) second; 3) third`
70 | 	bigText = `The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.
71 | Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.
72 | In this paper, we focus on the third approach in supporting mobile data stream applica- tions.
73 | More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.
74 | To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations.`
75 | )
76 | 


--------------------------------------------------------------------------------