├── LICENSE ├── README.md └── summarize ├── buffer_pool.go ├── stop_words_provider.go ├── stop_words_provider_test.go ├── summarize.go ├── summarize_test.go ├── text_counter.go ├── text_counter_test.go ├── text_splitter.go └── text_splitter_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Viktor Kojouharov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | text-summary 2 | ============ 3 | 4 | Text-summary takes some text, and extracts its key points. Its an almost direct port of [PyTeaser](https://github.com/xiaoxu193/PyTeaser) 5 | 6 | # From PyTeaser 7 | 8 | Summaries are created by ranking sentences in a news article according to how relevant they are to the entire text. The top 5 sentences are used to form a "summary". Each sentence is ranked by using four criteria: 9 | 10 | - Relevance to the title 11 | - Relevance to keywords in the article 12 | - Position of the sentence 13 | - Length of the sentence 14 | 15 | # Usage 16 | ``` 17 | import "github.com/urandom/text-summary/summarize" 18 | 19 | ... 20 | 21 | s := summarize.New("Title for the text", someIOReader) 22 | // or 23 | // s := summarize.NewFromString("Title for the text", "Lengthy text ...") 24 | 25 | keyPoints := s.KeyPoints() 26 | ``` 27 | -------------------------------------------------------------------------------- /summarize/buffer_pool.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import ( 4 | "bytes" 5 | "sync" 6 | ) 7 | 8 | var bufferPool sync.Pool 9 | 10 | func getBuffer() *bytes.Buffer { 11 | buffer := bufferPool.Get().(*bytes.Buffer) 12 | buffer.Reset() 13 | 14 | return buffer 15 | } 16 | 17 | func init() { 18 | bufferPool = sync.Pool{ 19 | New: func() interface{} { 20 | return new(bytes.Buffer) 21 | }, 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /summarize/stop_words_provider.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | type StopWordsProvider interface { 4 | SetLanguage(string) 5 | IsStopWord(string) bool 6 | } 7 | 8 | type DefaultStopWords struct{} 9 | 10 | var defaultStopWordsMap = map[string]bool{ 11 | "a": true, "about": true, "above": true, "after": true, "again": true, 12 | "against": true, "all": true, "am": true, "an": true, "and": true, "any": true, "are": true, "as": true, "at": true, "be": true, "because": true, 13 | "been": true, "before": true, "being": true, "below": true, "between": true, "both": true, "but": true, "by": true, "can": true, "did": true, 14 | "do": true, "does": true, "doing": true, "don": true, "down": true, 15 | "during": true, "each": true, "few": true, "for": true, "from": true, 16 | "further": true, "had": true, "has": true, "have": true, "having": true, 17 | "he": true, "her": true, "here": true, "hers": true, "herself": true, 18 | "him": true, "himself": true, "his": true, "how": true, "i": true, "if": true, "in": true, "into": true, "is": true, "it": true, "its": true, 19 | "itself": true, "just": true, "me": true, "more": true, "most": true, "my": true, "myself": true, "no": true, "nor": true, "not": true, "now": true, 20 | "of": true, "off": true, "on": true, "once": true, "only": true, "or": true, "other": true, "our": true, "ours": true, "ourselves": true, "out": true, "over": true, "own": true, "s": true, "same": true, "she": true, 21 | "should": true, "so": true, "some": true, "such": true, "t": true, "than": true, "that": true, "the": true, "their": true, "theirs": true, "them": true, "themselves": true, "then": true, "there": true, "these": true, 22 | "they": true, "this": true, "those": true, "through": true, "to": true, 23 | "too": true, "under": true, "until": true, "up": true, "very": true, "was": true, "we": true, "were": true, "what": true, "when": true, "where": true, 24 | "which": true, "while": true, "who": true, "whom": true, "why": true, 25 | "will": true, "with": true, "you": true, "your": true, "yours": true, 26 | "yourself": true, "yourselves": true, 27 | } 28 | 29 | func (d DefaultStopWords) SetLanguage(lang string) {} 30 | 31 | func (d DefaultStopWords) IsStopWord(word string) bool { 32 | _, ok := defaultStopWordsMap[word] 33 | 34 | return ok 35 | } 36 | -------------------------------------------------------------------------------- /summarize/stop_words_provider_test.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import "testing" 4 | 5 | func TestDefaultStopWords(t *testing.T) { 6 | d := DefaultStopWords{} 7 | 8 | if !d.IsStopWord("a") { 9 | t.Fatal("Expected 'a' to be a stop word") 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /summarize/summarize.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import ( 4 | "io" 5 | "math" 6 | "strings" 7 | ) 8 | 9 | type Summarize struct { 10 | Title string 11 | Text string 12 | Language string 13 | StopWordsProvider StopWordsProvider 14 | TextSplitter TextSplitter 15 | EndSentenceRunes []rune 16 | QuoteTuples [][]rune 17 | IdealWordCount int 18 | } 19 | 20 | func New(title string, r io.Reader) Summarize { 21 | buffer := getBuffer() 22 | defer bufferPool.Put(buffer) 23 | 24 | buffer.ReadFrom(r) 25 | 26 | return NewFromString(title, buffer.String()) 27 | } 28 | 29 | func NewFromString(title, text string) Summarize { 30 | return Summarize{ 31 | Title: title, 32 | Text: text, 33 | Language: "en", 34 | StopWordsProvider: DefaultStopWords{}, 35 | TextSplitter: DefaultTextSplitter{[]rune{'.', '!', '?'}}, 36 | IdealWordCount: 20, 37 | } 38 | } 39 | 40 | func (s Summarize) KeyPoints() []string { 41 | s.StopWordsProvider.SetLanguage(s.Language) 42 | 43 | sentences := s.TextSplitter.Sentences(s.Text) 44 | keywords := s.keywords(s.Text) 45 | titleWords := toLower(s.TextSplitter.Words(s.Title)) 46 | 47 | if len(sentences) <= 5 { 48 | return sentences 49 | } 50 | 51 | ranks := TextCounter{} 52 | 53 | titleMap := map[string]bool{} 54 | for _, t := range titleWords { 55 | if !s.StopWordsProvider.IsStopWord(t) { 56 | titleMap[t] = true 57 | } 58 | } 59 | 60 | for i, sent := range sentences { 61 | words := toLower(s.TextSplitter.Words(sent)) 62 | titleScore := s.titleScore(titleMap, words) 63 | lengthScore := s.lengthScore(words) 64 | positionScore := s.positionScore(i+1, len(sentences)) 65 | sbs := s.sbs(words, keywords) 66 | dbs := s.dbs(words, keywords) 67 | 68 | freq := (sbs + dbs) / 2 * 10 69 | total := (titleScore*1.5 + freq*2 + lengthScore + positionScore) / 4 70 | ranks.Add(sent, int(total*100)) 71 | } 72 | 73 | var keyPoints []string 74 | 75 | mostCommon := ranks.MostCommon(5) 76 | commonMap := map[string]bool{} 77 | 78 | for _, p := range mostCommon { 79 | commonMap[p.Text] = true 80 | } 81 | 82 | for _, sent := range sentences { 83 | if commonMap[sent] { 84 | keyPoints = append(keyPoints, sent) 85 | } 86 | } 87 | 88 | return keyPoints 89 | } 90 | 91 | func (s Summarize) keywords(text string) map[string]float64 { 92 | allWords := toLower(s.TextSplitter.Words(text)) 93 | allLen := float64(len(allWords)) 94 | filteredWords := []string{} 95 | 96 | for _, w := range allWords { 97 | if !s.StopWordsProvider.IsStopWord(w) { 98 | filteredWords = append(filteredWords, w) 99 | } 100 | } 101 | 102 | freq := NewTextCounterFromSlice(filteredWords) 103 | 104 | pairs := freq.MostCommon(10) 105 | keyMap := map[string]float64{} 106 | 107 | for _, p := range pairs { 108 | score := float64(p.Count) / allLen 109 | keyMap[p.Text] = score*1.5 + 1 110 | } 111 | 112 | return keyMap 113 | } 114 | 115 | func (s Summarize) titleScore(titleMap map[string]bool, words []string) float64 { 116 | count := 0.0 117 | 118 | for _, w := range words { 119 | if _, ok := titleMap[w]; ok && !s.StopWordsProvider.IsStopWord(w) { 120 | count += 1 121 | } 122 | } 123 | 124 | return count / float64(len(titleMap)) 125 | } 126 | 127 | func (s Summarize) lengthScore(words []string) float64 { 128 | return 1 - math.Abs(float64(s.IdealWordCount-len(words)))/float64(s.IdealWordCount) 129 | } 130 | 131 | func (s Summarize) positionScore(pos, total int) float64 { 132 | normalized := float64(pos) / float64(total) 133 | 134 | if normalized < 0 { 135 | normalized = 0 136 | } 137 | 138 | if normalized <= 0.1 { 139 | return 0.17 140 | } else if normalized <= 0.2 { 141 | return 0.23 142 | } else if normalized <= 0.3 { 143 | return 0.14 144 | } else if normalized <= 0.4 { 145 | return 0.08 146 | } else if normalized <= 0.5 { 147 | return 0.05 148 | } else if normalized <= 0.6 { 149 | return 0.04 150 | } else if normalized <= 0.7 { 151 | return 0.06 152 | } else if normalized <= 0.8 { 153 | return 0.04 154 | } else if normalized <= 0.9 { 155 | return 0.04 156 | } else if normalized <= 1.0 { 157 | return 0.15 158 | } else { 159 | return 0 160 | } 161 | } 162 | 163 | func (s Summarize) sbs(words []string, keywords map[string]float64) float64 { 164 | score := 0.0 165 | 166 | if len(words) == 0 { 167 | return score 168 | } 169 | 170 | for _, w := range words { 171 | if c, ok := keywords[w]; ok { 172 | score += c 173 | } 174 | } 175 | 176 | return (1 / float64(len(words)) * score) / 10 177 | } 178 | 179 | func (s Summarize) dbs(words []string, keywords map[string]float64) float64 { 180 | score := 0.0 181 | 182 | if len(words) == 0 { 183 | return score 184 | } 185 | 186 | summ := 0.0 187 | first := [2]float64{} 188 | second := [2]float64{} 189 | 190 | uniqueWords := map[string]bool{} 191 | for i, w := range words { 192 | if c, ok := keywords[w]; ok { 193 | if len(first) == 0 { 194 | first[0], first[1] = float64(i), c 195 | } else { 196 | second[0], second[1] = first[0], first[1] 197 | first[0], first[1] = float64(i), c 198 | 199 | diff := first[0] - second[0] 200 | summ += first[1] * second[1] / math.Pow(diff, 2) 201 | } 202 | 203 | uniqueWords[w] = true 204 | } 205 | } 206 | 207 | k := float64(len(uniqueWords) + 1) 208 | return (1 / (k * (k + 1)) * summ) 209 | } 210 | 211 | func toLower(words []string) []string { 212 | var lower []string 213 | 214 | for _, w := range words { 215 | lower = append(lower, strings.ToLower(w)) 216 | } 217 | 218 | return lower 219 | } 220 | -------------------------------------------------------------------------------- /summarize/summarize_test.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestSummarize(t *testing.T) { 9 | s := New(title, strings.NewReader(text)) 10 | 11 | points := s.KeyPoints() 12 | checkKeyPoints(t, points) 13 | } 14 | 15 | func TestSummarizeFromString(t *testing.T) { 16 | s := NewFromString(title, text) 17 | 18 | points := s.KeyPoints() 19 | checkKeyPoints(t, points) 20 | } 21 | 22 | func checkKeyPoints(t *testing.T, points []string) { 23 | if len(points) != len(keyPoints) { 24 | t.Fatalf("Number of key points differ from expected: %d - %d\n", len(points), len(keyPoints)) 25 | } 26 | for i, k := range keyPoints { 27 | if k != points[i] { 28 | t.Fatalf("Expected summary point '%s', got '%s'\n", k, points[i]) 29 | } 30 | } 31 | } 32 | 33 | var ( 34 | title = `Framework for Partitioning and Execution of Data Stream Applications in Mobile Cloud Computing` 35 | text = `The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. 36 | Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. 37 | In this paper, we focus on the third approach in supporting mobile data stream applica- tions. 38 | More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. 39 | To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations. 40 | We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application. 41 | Different from existing works, the framework not only allows the dynamic partitioning for a single user but also supports the sharing of computation in- stances among multiple users in the cloud to achieve efficient utilization of the underlying cloud resources. 42 | Meanwhile, the framework has better scalability because it is designed on the elastic cloud fabrics. 43 | Based on the framework, we design a genetic algorithm for optimal computation parti- tion. 44 | Both numerical evaluation and real world experiment have been performed, and the results show that the par- titioned application can achieve at least two times better performance in terms of throughput than the application without partitioning.` 45 | keyPoints = []string{ 46 | `The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.`, 47 | `Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.`, 48 | `In this paper, we focus on the third approach in supporting mobile data stream applica- tions.`, 49 | `More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.`, 50 | `We first propose a framework to provide runtime support for the dynamic computation partitioning and exe- cution of the application.`, 51 | } 52 | ) 53 | -------------------------------------------------------------------------------- /summarize/text_counter.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import "sort" 4 | 5 | type TextCounter map[string]int 6 | type CommonPairs []CommonPair 7 | type CommonPair struct { 8 | Text string 9 | Count int 10 | } 11 | 12 | func NewTextCounterFromPairs(pairs CommonPairs) TextCounter { 13 | tc := TextCounter{} 14 | for _, p := range pairs { 15 | tc[p.Text] = p.Count 16 | } 17 | 18 | return tc 19 | } 20 | 21 | func NewTextCounterFromSlice(words []string) TextCounter { 22 | tc := TextCounter{} 23 | for _, w := range words { 24 | tc.Add(w) 25 | } 26 | 27 | return tc 28 | } 29 | 30 | func (tc TextCounter) Add(text string, score ...int) { 31 | sc := 1 32 | if len(score) > 0 { 33 | sc = score[0] 34 | } 35 | 36 | if c, ok := tc[text]; ok { 37 | tc[text] = c + sc 38 | } else { 39 | tc[text] = sc 40 | } 41 | } 42 | 43 | func (tc TextCounter) MostCommon(limit ...int) CommonPairs { 44 | pairs := CommonPairs{} 45 | 46 | for t, c := range tc { 47 | pairs = append(pairs, CommonPair{Text: t, Count: c}) 48 | } 49 | 50 | sort.Sort(sort.Reverse(pairs)) 51 | 52 | if len(limit) > 0 && len(pairs) > limit[0] { 53 | return pairs[:limit[0]] 54 | } 55 | 56 | return pairs 57 | } 58 | 59 | func (c CommonPairs) Len() int { 60 | return len(c) 61 | } 62 | 63 | func (c CommonPairs) Less(i, j int) bool { 64 | return c[i].Count < c[j].Count 65 | } 66 | 67 | func (c CommonPairs) Swap(i, j int) { 68 | c[i], c[j] = c[j], c[i] 69 | } 70 | -------------------------------------------------------------------------------- /summarize/text_counter_test.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import "testing" 4 | 5 | func TestTextCounter(t *testing.T) { 6 | tc := TextCounter{} 7 | 8 | tc.Add("foo", 5) 9 | tc.Add("bar") 10 | tc.Add("baz", 2) 11 | tc.Add("baz", 9) 12 | 13 | expInt := 5 14 | if tc["foo"] != expInt { 15 | t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"]) 16 | } 17 | 18 | expInt = 1 19 | if tc["bar"] != expInt { 20 | t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"]) 21 | } 22 | 23 | expInt = 11 24 | if tc["baz"] != expInt { 25 | t.Fatalf("Expected count of %d, got %d\n", expInt, tc["foo"]) 26 | } 27 | 28 | common := tc.MostCommon(2) 29 | expInt = 2 30 | if len(common) != expInt { 31 | t.Fatalf("Expected common length of %d, got %d\n", expInt, len(common)) 32 | } 33 | 34 | expStr := "baz" 35 | if common[0].Text != expStr { 36 | t.Fatalf("Expected most common text to be '%s', got '%s'\n", expStr, common[0].Text) 37 | } 38 | 39 | expInt = 11 40 | if common[0].Count != expInt { 41 | t.Fatalf("Expected most common count to be '%d', got '%d'\n", expInt, common[0].Count) 42 | } 43 | 44 | expStr = "foo" 45 | if common[1].Text != expStr { 46 | t.Fatalf("Expected second most common text to be '%s', got '%s'\n", expStr, common[1].Text) 47 | } 48 | 49 | expInt = 5 50 | if common[1].Count != expInt { 51 | t.Fatalf("Expected second most common count to be '%d', got '%d'\n", expInt, common[1].Count) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /summarize/text_splitter.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import "unicode" 4 | 5 | type TextSplitter interface { 6 | Sentences(string) []string 7 | Words(string) []string 8 | } 9 | 10 | type DefaultTextSplitter struct { 11 | Punctuations []rune 12 | } 13 | 14 | func (d DefaultTextSplitter) Sentences(text string) []string { 15 | buf := getBuffer() 16 | defer bufferPool.Put(buf) 17 | 18 | sentences := []string{} 19 | newSentence := true 20 | lastNonWhiteSpace := -1 21 | 22 | for _, r := range text { 23 | if oneOfPunct(r, d.Punctuations) { 24 | if buf.Len() > 0 { 25 | if lastNonWhiteSpace > 0 { 26 | buf.Truncate(lastNonWhiteSpace) 27 | buf.WriteRune(r) 28 | sentences = append(sentences, buf.String()) 29 | } 30 | buf.Reset() 31 | newSentence = true 32 | } 33 | } else { 34 | isSpace := unicode.IsSpace(r) 35 | if newSentence && isSpace { 36 | continue 37 | } 38 | newSentence = false 39 | buf.WriteRune(r) 40 | if !isSpace { 41 | lastNonWhiteSpace = buf.Len() 42 | } 43 | } 44 | } 45 | 46 | if buf.Len() > 0 && lastNonWhiteSpace > 0 { 47 | buf.Truncate(lastNonWhiteSpace) 48 | sentences = append(sentences, buf.String()) 49 | buf.Reset() 50 | } 51 | 52 | return sentences 53 | } 54 | 55 | func (d DefaultTextSplitter) Words(text string) []string { 56 | buf := getBuffer() 57 | defer bufferPool.Put(buf) 58 | 59 | words := []string{} 60 | 61 | for _, r := range text { 62 | if unicode.IsLetter(r) || unicode.IsNumber(r) { 63 | buf.WriteRune(r) 64 | } else if !unicode.IsOneOf([]*unicode.RangeTable{unicode.Hyphen}, r) { 65 | if buf.Len() > 0 { 66 | words = append(words, buf.String()) 67 | } 68 | buf.Reset() 69 | } 70 | } 71 | 72 | if buf.Len() > 0 { 73 | words = append(words, buf.String()) 74 | } 75 | 76 | return words 77 | } 78 | 79 | func oneOfPunct(r rune, punct []rune) bool { 80 | for _, p := range punct { 81 | if p == r { 82 | return true 83 | } 84 | } 85 | return false 86 | } 87 | 88 | func oneOfStartQuote(r rune, quotes [][]rune) int { 89 | for i, q := range quotes { 90 | if q[0] == r { 91 | return i 92 | } 93 | } 94 | return -1 95 | } 96 | -------------------------------------------------------------------------------- /summarize/text_splitter_test.go: -------------------------------------------------------------------------------- 1 | package summarize 2 | 3 | import "testing" 4 | 5 | func TestDefaultSentenceSplitter(t *testing.T) { 6 | d := DefaultTextSplitter{[]rune{'.', '!', '?'}} 7 | 8 | exp := []string{"First sentence.", "Second sentence"} 9 | res := d.Sentences(text1) 10 | 11 | checkSentences(t, exp, res) 12 | 13 | exp = []string{"First sentence."} 14 | res = d.Sentences(text2) 15 | 16 | checkSentences(t, exp, res) 17 | 18 | exp = []string{`Then he said: " do not feed the sharks"`} 19 | res = d.Sentences(text3) 20 | 21 | checkSentences(t, exp, res) 22 | 23 | exp = []string{ 24 | "The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm.", 25 | "Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources.", 26 | "In this paper, we focus on the third approach in supporting mobile data stream applica- tions.", 27 | "More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data.", 28 | "To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations.", 29 | } 30 | res = d.Sentences(bigText) 31 | 32 | checkSentences(t, exp, res) 33 | } 34 | 35 | func TestDefaultWordSplitter(t *testing.T) { 36 | d := DefaultTextSplitter{[]rune{'.', '!', '?'}} 37 | 38 | exp := []string{"First", "sentence", "Second", "sentence"} 39 | res := d.Words(text1) 40 | 41 | checkSentences(t, exp, res) 42 | 43 | exp = []string{"Then", "he", "said", "do", "not", "feed", "the", "sharks"} 44 | res = d.Words(text3) 45 | 46 | checkSentences(t, exp, res) 47 | 48 | exp = []string{"Three", "things", "are", "outlined", "1", "first", "2", "second", "3", "third"} 49 | res = d.Words(text4) 50 | 51 | checkSentences(t, exp, res) 52 | } 53 | 54 | func checkSentences(t *testing.T, exp, res []string) { 55 | if len(exp) != len(res) { 56 | t.Fatalf("Number of sentences differ from expected: %d - %d\n", len(res), len(exp)) 57 | } 58 | for i, s := range exp { 59 | if s != res[i] { 60 | t.Fatalf("Expected sentence '%s', got '%s'\n", s, res[i]) 61 | } 62 | } 63 | } 64 | 65 | var ( 66 | text1 = ` First sentence. Second sentence ` 67 | text2 = `First sentence. ` 68 | text3 = `Then he said: " do not feed the sharks"` 69 | text4 = `Three things are outlined: 1) first; 2) second; 3) third` 70 | bigText = `The contribution of cloud computing and mobile computing technologies lead to the newly emerging mobile cloud com- puting paradigm. 71 | Three major approaches have been pro- posed for mobile cloud applications: 1) extending the access to cloud services to mobile devices; 2) enabling mobile de- vices to work collaboratively as cloud resource providers; 3) augmenting the execution of mobile applications on portable devices using cloud resources. 72 | In this paper, we focus on the third approach in supporting mobile data stream applica- tions. 73 | More specifically, we study how to optimize the com- putation partitioning of a data stream application between mobile and cloud to achieve maximum speed/throughput in processing the streaming data. 74 | To the best of our knowledge, it is the first work to study the partitioning problem for mobile data stream applica- tions, where the optimization is placed on achieving high throughput of processing the streaming data rather than minimizing the makespan of executions as in other appli- cations.` 75 | ) 76 | --------------------------------------------------------------------------------