├── README.md ├── data ├── README.md └── dictionary.txt ├── dictionary.go ├── go.mod ├── go.sum ├── license.txt ├── segment.go ├── segmenter.go ├── segmenter_test.go ├── server ├── server.go └── static │ ├── index.html │ └── jquery.min.js ├── test_utils.go ├── testdata ├── bailuyuan.txt ├── test_dict1.txt └── test_dict2.txt ├── token.go ├── tools ├── benchmark.go ├── example.go └── goroutines.go ├── utils.go ├── utils_test.go └── vendor ├── github.com ├── adamzy │ ├── cedar-go │ │ ├── LICENSE.md │ │ ├── README.md │ │ ├── api.go │ │ ├── cedar.go │ │ ├── doc.go │ │ ├── errors.go │ │ └── io.go │ └── sego │ │ ├── README.md │ │ ├── dictionary.go │ │ ├── license.txt │ │ ├── segment.go │ │ ├── segmenter.go │ │ ├── test_utils.go │ │ ├── token.go │ │ └── utils.go └── issue9 │ └── assert │ ├── .editorconfig │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── assert.go │ ├── assertion.go │ ├── doc.go │ ├── go.mod │ └── util.go └── modules.txt /README.md: -------------------------------------------------------------------------------- 1 | sego 2 | ==== 3 | 4 | Go中文分词 5 | 6 | 词典用双数组trie(Double-Array Trie)实现, 7 | 分词器算法为基于词频的最短路径加动态规划。 8 | 9 | 支持普通和搜索引擎两种分词模式,支持用户词典、词性标注,可运行JSON RPC服务。 10 | 11 | 分词速度单线程9MB/s,goroutines并发42MB/s(8核Macbook Pro)。 12 | 13 | # 安装/更新 14 | 15 | ``` 16 | go get -u github.com/huichen/sego 17 | ``` 18 | 19 | # 使用 20 | 21 | 22 | ```go 23 | package main 24 | 25 | import ( 26 | "fmt" 27 | "github.com/huichen/sego" 28 | ) 29 | 30 | func main() { 31 | // 载入词典 32 | var segmenter sego.Segmenter 33 | segmenter.LoadDictionary("github.com/huichen/sego/data/dictionary.txt") 34 | 35 | // 分词 36 | text := []byte("中华人民共和国中央人民政府") 37 | segments := segmenter.Segment(text) 38 | 39 | // 处理分词结果 40 | // 支持普通模式和搜索模式两种分词,见代码中SegmentsToString函数的注释。 41 | fmt.Println(sego.SegmentsToString(segments, false)) 42 | } 43 | ``` 44 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | dictionary.txt 词典拷贝自 github.com/fxsjy/jieba 2 | -------------------------------------------------------------------------------- /dictionary.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import "github.com/adamzy/cedar-go" 4 | 5 | // Dictionary结构体实现了一个字串前缀树,一个分词可能出现在叶子节点也有可能出现在非叶节点 6 | type Dictionary struct { 7 | trie *cedar.Cedar // Cedar 前缀树 8 | maxTokenLength int // 词典中最长的分词 9 | tokens []Token // 词典中所有的分词,方便遍历 10 | totalFrequency int64 // 词典中所有分词的频率之和 11 | } 12 | 13 | func NewDictionary() *Dictionary { 14 | return &Dictionary{trie: cedar.New()} 15 | } 16 | 17 | // 词典中最长的分词 18 | func (dict *Dictionary) MaxTokenLength() int { 19 | return dict.maxTokenLength 20 | } 21 | 22 | // 词典中分词数目 23 | func (dict *Dictionary) NumTokens() int { 24 | return len(dict.tokens) 25 | } 26 | 27 | // 词典中所有分词的频率之和 28 | func (dict *Dictionary) TotalFrequency() int64 { 29 | return dict.totalFrequency 30 | } 31 | 32 | // 释放资源 33 | func (dict *Dictionary) Close() { 34 | dict.trie = nil 35 | dict.maxTokenLength = 0 36 | dict.tokens = nil 37 | dict.totalFrequency = int64(0) 38 | } 39 | 40 | // 向词典中加入一个分词 41 | func (dict *Dictionary) addToken(token Token) { 42 | bytes := textSliceToBytes(token.text) 43 | _, err := dict.trie.Get(bytes) 44 | if err == nil { 45 | return 46 | } 47 | 48 | dict.trie.Insert(bytes, dict.NumTokens()) 49 | dict.tokens = append(dict.tokens, token) 50 | dict.totalFrequency += int64(token.frequency) 51 | if len(token.text) > dict.maxTokenLength { 52 | dict.maxTokenLength = len(token.text) 53 | } 54 | } 55 | 56 | // 在词典中查找和字元组words可以前缀匹配的所有分词 57 | // 返回值为找到的分词数 58 | func (dict *Dictionary) lookupTokens(words []Text, tokens []*Token) (numOfTokens int) { 59 | var id, value int 60 | var err error 61 | for _, word := range words { 62 | id, err = dict.trie.Jump(word, id) 63 | if err != nil { 64 | break 65 | } 66 | value, err = dict.trie.Value(id) 67 | if err == nil { 68 | tokens[numOfTokens] = &dict.tokens[value] 69 | numOfTokens++ 70 | } 71 | } 72 | return 73 | } 74 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/huichen/sego 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d 7 | github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8 8 | github.com/issue9/assert v1.4.1 9 | ) 10 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d h1:ir/IFJU5xbja5UaBEQLjcvn7aAU01nqU/NUyOBEU+ew= 2 | github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d/go.mod h1:PRWNwWq0yifz6XDPZu48aSld8BWwBfr2JKB2bGWiEd4= 3 | github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8 h1:0RkucRD7zjlMC44X9ni444XpR3vjzyUeHRlViwOwjUw= 4 | github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8/go.mod h1:KQxo+Xesl2wLJ3yJcX443KaoWzXpbPzU1GNRyE8kNEY= 5 | github.com/issue9/assert v1.4.1 h1:gUtOpMTeaE4JTe9kACma5foOHBvVt1p5XTFrULDwdXI= 6 | github.com/issue9/assert v1.4.1/go.mod h1:Yktk83hAVl1SPSYtd9kjhBizuiBIqUQyj+D5SE2yjVY= 7 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2013 Hui Chen 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /segment.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 文本中的一个分词 4 | type Segment struct { 5 | // 分词在文本中的起始字节位置 6 | start int 7 | 8 | // 分词在文本中的结束字节位置(不包括该位置) 9 | end int 10 | 11 | // 分词信息 12 | token *Token 13 | } 14 | 15 | // 返回分词在文本中的起始字节位置 16 | func (s *Segment) Start() int { 17 | return s.start 18 | } 19 | 20 | // 返回分词在文本中的结束字节位置(不包括该位置) 21 | func (s *Segment) End() int { 22 | return s.end 23 | } 24 | 25 | // 返回分词信息 26 | func (s *Segment) Token() *Token { 27 | return s.token 28 | } 29 | -------------------------------------------------------------------------------- /segmenter.go: -------------------------------------------------------------------------------- 1 | //Go中文分词 2 | package sego 3 | 4 | import ( 5 | "bufio" 6 | "fmt" 7 | "log" 8 | "math" 9 | "os" 10 | "strconv" 11 | "strings" 12 | "unicode" 13 | "unicode/utf8" 14 | ) 15 | 16 | const ( 17 | minTokenFrequency = 2 // 仅从字典文件中读取大于等于此频率的分词 18 | ) 19 | 20 | // 分词器结构体 21 | type Segmenter struct { 22 | dict *Dictionary 23 | } 24 | 25 | // 该结构体用于记录Viterbi算法中某字元处的向前分词跳转信息 26 | type jumper struct { 27 | minDistance float32 28 | token *Token 29 | } 30 | 31 | // 返回分词器使用的词典 32 | func (seg *Segmenter) Dictionary() *Dictionary { 33 | return seg.dict 34 | } 35 | 36 | // 从文件中载入词典 37 | // 38 | // 可以载入多个词典文件,文件名用","分隔,排在前面的词典优先载入分词,比如 39 | // "用户词典.txt,通用词典.txt" 40 | // 当一个分词既出现在用户词典也出现在通用词典中,则优先使用用户词典。 41 | // 42 | // 词典的格式为(每个分词一行): 43 | // 分词文本 频率 词性 44 | func (seg *Segmenter) LoadDictionary(files string) { 45 | seg.dict = NewDictionary() 46 | for _, file := range strings.Split(files, ",") { 47 | log.Printf("载入sego词典 %s", file) 48 | dictFile, err := os.Open(file) 49 | defer dictFile.Close() 50 | if err != nil { 51 | log.Fatalf("无法载入字典文件 \"%s\" \n", file) 52 | } 53 | 54 | reader := bufio.NewReader(dictFile) 55 | var text string 56 | var freqText string 57 | var frequency int 58 | var pos string 59 | 60 | // 逐行读入分词 61 | for { 62 | size, _ := fmt.Fscanln(reader, &text, &freqText, &pos) 63 | 64 | if size == 0 { 65 | // 文件结束 66 | break 67 | } else if size < 2 { 68 | // 无效行 69 | continue 70 | } else if size == 2 { 71 | // 没有词性标注时设为空字符串 72 | pos = "" 73 | } 74 | 75 | // 解析词频 76 | var err error 77 | frequency, err = strconv.Atoi(freqText) 78 | if err != nil { 79 | continue 80 | } 81 | 82 | // 过滤频率太小的词 83 | if frequency < minTokenFrequency { 84 | continue 85 | } 86 | 87 | // 将分词添加到字典中 88 | words := splitTextToWords([]byte(text)) 89 | token := Token{text: words, frequency: frequency, pos: pos} 90 | seg.dict.addToken(token) 91 | } 92 | } 93 | 94 | // 计算每个分词的路径值,路径值含义见Token结构体的注释 95 | logTotalFrequency := float32(math.Log2(float64(seg.dict.totalFrequency))) 96 | for i := range seg.dict.tokens { 97 | token := &seg.dict.tokens[i] 98 | token.distance = logTotalFrequency - float32(math.Log2(float64(token.frequency))) 99 | } 100 | 101 | // 对每个分词进行细致划分,用于搜索引擎模式,该模式用法见Token结构体的注释。 102 | for i := range seg.dict.tokens { 103 | token := &seg.dict.tokens[i] 104 | segments := seg.segmentWords(token.text, true) 105 | 106 | // 计算需要添加的子分词数目 107 | numTokensToAdd := 0 108 | for iToken := 0; iToken < len(segments); iToken++ { 109 | if len(segments[iToken].token.text) > 0 { 110 | numTokensToAdd++ 111 | } 112 | } 113 | token.segments = make([]*Segment, numTokensToAdd) 114 | 115 | // 添加子分词 116 | iSegmentsToAdd := 0 117 | for iToken := 0; iToken < len(segments); iToken++ { 118 | if len(segments[iToken].token.text) > 0 { 119 | token.segments[iSegmentsToAdd] = &segments[iToken] 120 | iSegmentsToAdd++ 121 | } 122 | } 123 | } 124 | 125 | log.Println("sego词典载入完毕") 126 | } 127 | 128 | // 对文本分词 129 | // 130 | // 输入参数: 131 | // bytes UTF8文本的字节数组 132 | // 133 | // 输出: 134 | // []Segment 划分的分词 135 | func (seg *Segmenter) Segment(bytes []byte) []Segment { 136 | return seg.internalSegment(bytes, false) 137 | } 138 | 139 | func (seg *Segmenter) InternalSegment(bytes []byte, searchMode bool) []Segment { 140 | return seg.internalSegment(bytes, searchMode) 141 | } 142 | 143 | // 释放资源 144 | func (seg *Segmenter) Close() { 145 | if seg.dict != nil { 146 | seg.dict.Close() 147 | } 148 | } 149 | 150 | func (seg *Segmenter) internalSegment(bytes []byte, searchMode bool) []Segment { 151 | // 处理特殊情况 152 | if len(bytes) == 0 { 153 | return []Segment{} 154 | } 155 | 156 | // 划分字元 157 | text := splitTextToWords(bytes) 158 | 159 | return seg.segmentWords(text, searchMode) 160 | } 161 | 162 | func (seg *Segmenter) segmentWords(text []Text, searchMode bool) []Segment { 163 | // 搜索模式下该分词已无继续划分可能的情况 164 | if searchMode && len(text) == 1 { 165 | return []Segment{} 166 | } 167 | 168 | // jumpers定义了每个字元处的向前跳转信息,包括这个跳转对应的分词, 169 | // 以及从文本段开始到该字元的最短路径值 170 | jumpers := make([]jumper, len(text)) 171 | 172 | tokens := make([]*Token, seg.dict.maxTokenLength) 173 | for current := 0; current < len(text); current++ { 174 | // 找到前一个字元处的最短路径,以便计算后续路径值 175 | var baseDistance float32 176 | if current == 0 { 177 | // 当本字元在文本首部时,基础距离应该是零 178 | baseDistance = 0 179 | } else { 180 | baseDistance = jumpers[current-1].minDistance 181 | } 182 | 183 | // 寻找所有以当前字元开头的分词 184 | numTokens := seg.dict.lookupTokens( 185 | text[current:minInt(current+seg.dict.maxTokenLength, len(text))], tokens) 186 | 187 | // 对所有可能的分词,更新分词结束字元处的跳转信息 188 | for iToken := 0; iToken < numTokens; iToken++ { 189 | location := current + len(tokens[iToken].text) - 1 190 | if !searchMode || current != 0 || location != len(text)-1 { 191 | updateJumper(&jumpers[location], baseDistance, tokens[iToken]) 192 | } 193 | } 194 | 195 | // 当前字元没有对应分词时补加一个伪分词 196 | if numTokens == 0 || len(tokens[0].text) > 1 { 197 | updateJumper(&jumpers[current], baseDistance, 198 | &Token{text: []Text{text[current]}, frequency: 1, distance: 32, pos: "x"}) 199 | } 200 | } 201 | 202 | // 从后向前扫描第一遍得到需要添加的分词数目 203 | numSeg := 0 204 | for index := len(text) - 1; index >= 0; { 205 | location := index - len(jumpers[index].token.text) + 1 206 | numSeg++ 207 | index = location - 1 208 | } 209 | 210 | // 从后向前扫描第二遍添加分词到最终结果 211 | outputSegments := make([]Segment, numSeg) 212 | for index := len(text) - 1; index >= 0; { 213 | location := index - len(jumpers[index].token.text) + 1 214 | numSeg-- 215 | outputSegments[numSeg].token = jumpers[index].token 216 | index = location - 1 217 | } 218 | 219 | // 计算各个分词的字节位置 220 | bytePosition := 0 221 | for iSeg := 0; iSeg < len(outputSegments); iSeg++ { 222 | outputSegments[iSeg].start = bytePosition 223 | bytePosition += textSliceByteLength(outputSegments[iSeg].token.text) 224 | outputSegments[iSeg].end = bytePosition 225 | } 226 | return outputSegments 227 | } 228 | 229 | // 更新跳转信息: 230 | // 1. 当该位置从未被访问过时(jumper.minDistance为零的情况),或者 231 | // 2. 当该位置的当前最短路径大于新的最短路径时 232 | // 将当前位置的最短路径值更新为baseDistance加上新分词的概率 233 | func updateJumper(jumper *jumper, baseDistance float32, token *Token) { 234 | newDistance := baseDistance + token.distance 235 | if jumper.minDistance == 0 || jumper.minDistance > newDistance { 236 | jumper.minDistance = newDistance 237 | jumper.token = token 238 | } 239 | } 240 | 241 | // 取两整数较小值 242 | func minInt(a, b int) int { 243 | if a > b { 244 | return b 245 | } 246 | return a 247 | } 248 | 249 | // 取两整数较大值 250 | func maxInt(a, b int) int { 251 | if a > b { 252 | return a 253 | } 254 | return b 255 | } 256 | 257 | // 将文本划分成字元 258 | func splitTextToWords(text Text) []Text { 259 | output := make([]Text, 0, len(text)/3) 260 | current := 0 261 | inAlphanumeric := true 262 | alphanumericStart := 0 263 | for current < len(text) { 264 | r, size := utf8.DecodeRune(text[current:]) 265 | if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) { 266 | // 当前是拉丁字母或数字(非中日韩文字) 267 | if !inAlphanumeric { 268 | alphanumericStart = current 269 | inAlphanumeric = true 270 | } 271 | } else { 272 | if inAlphanumeric { 273 | inAlphanumeric = false 274 | if current != 0 { 275 | output = append(output, toLower(text[alphanumericStart:current])) 276 | } 277 | } 278 | output = append(output, text[current:current+size]) 279 | } 280 | current += size 281 | } 282 | 283 | // 处理最后一个字元是英文的情况 284 | if inAlphanumeric { 285 | if current != 0 { 286 | output = append(output, toLower(text[alphanumericStart:current])) 287 | } 288 | } 289 | 290 | return output 291 | } 292 | 293 | // 将英文词转化为小写 294 | func toLower(text []byte) []byte { 295 | output := make([]byte, len(text)) 296 | for i, t := range text { 297 | if t >= 'A' && t <= 'Z' { 298 | output[i] = t - 'A' + 'a' 299 | } else { 300 | output[i] = t 301 | } 302 | } 303 | return output 304 | } 305 | -------------------------------------------------------------------------------- /segmenter_test.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | var ( 8 | prodSeg = Segmenter{} 9 | ) 10 | 11 | func TestSplit(t *testing.T) { 12 | expect(t, "中/国/有/十/三/亿/人/口/", 13 | bytesToString(splitTextToWords([]byte( 14 | "中国有十三亿人口")))) 15 | 16 | expect(t, "github/ /is/ /a/ /web/-/based/ /hosting/ /service/,/ /for/ /software/ /development/ /projects/./", 17 | bytesToString(splitTextToWords([]byte( 18 | "GitHub is a web-based hosting service, for software development projects.")))) 19 | 20 | expect(t, "中/国/雅/虎/yahoo/!/ /china/致/力/于/,/领/先/的/公/益/民/生/门/户/网/站/。/", 21 | bytesToString(splitTextToWords([]byte( 22 | "中国雅虎Yahoo! China致力于,领先的公益民生门户网站。")))) 23 | 24 | expect(t, "こ/ん/に/ち/は/", bytesToString(splitTextToWords([]byte("こんにちは")))) 25 | 26 | expect(t, "안/녕/하/세/요/", bytesToString(splitTextToWords([]byte("안녕하세요")))) 27 | 28 | expect(t, "Я/ /тоже/ /рада/ /Вас/ /видеть/", bytesToString(splitTextToWords([]byte("Я тоже рада Вас видеть")))) 29 | 30 | expect(t, "¿/cómo/ /van/ /las/ /cosas/", bytesToString(splitTextToWords([]byte("¿Cómo van las cosas")))) 31 | 32 | expect(t, "wie/ /geht/ /es/ /ihnen/", bytesToString(splitTextToWords([]byte("Wie geht es Ihnen")))) 33 | 34 | expect(t, "je/ /suis/ /enchanté/ /de/ /cette/ /pièce/", 35 | bytesToString(splitTextToWords([]byte("Je suis enchanté de cette pièce")))) 36 | } 37 | 38 | func TestSegment(t *testing.T) { 39 | var seg Segmenter 40 | seg.LoadDictionary("testdata/test_dict1.txt,testdata/test_dict2.txt") 41 | expect(t, "12", seg.dict.NumTokens()) 42 | segments := seg.Segment([]byte("中国有十三亿人口")) 43 | expect(t, "中国/ 有/p3 十三亿/ 人口/p12 ", SegmentsToString(segments, false)) 44 | expect(t, "4", len(segments)) 45 | expect(t, "0", segments[0].start) 46 | expect(t, "6", segments[0].end) 47 | expect(t, "6", segments[1].start) 48 | expect(t, "9", segments[1].end) 49 | expect(t, "9", segments[2].start) 50 | expect(t, "18", segments[2].end) 51 | expect(t, "18", segments[3].start) 52 | expect(t, "24", segments[3].end) 53 | } 54 | 55 | func TestLargeDictionary(t *testing.T) { 56 | prodSeg.LoadDictionary("data/dictionary.txt") 57 | expect(t, "中国/ns 人口/n ", SegmentsToString(prodSeg.Segment( 58 | []byte("中国人口")), false)) 59 | 60 | expect(t, "中国/ns 人口/n ", SegmentsToString(prodSeg.internalSegment( 61 | []byte("中国人口"), false), false)) 62 | 63 | expect(t, "中国/ns 人口/n ", SegmentsToString(prodSeg.internalSegment( 64 | []byte("中国人口"), true), false)) 65 | 66 | expect(t, "中华人民共和国/ns 中央人民政府/nt ", SegmentsToString(prodSeg.internalSegment( 67 | []byte("中华人民共和国中央人民政府"), true), false)) 68 | 69 | expect(t, "中华人民共和国中央人民政府/nt ", SegmentsToString(prodSeg.internalSegment( 70 | []byte("中华人民共和国中央人民政府"), false), false)) 71 | 72 | expect(t, "中华/nz 人民/n 共和/nz 国/n 共和国/ns 人民共和国/nt 中华人民共和国/ns 中央/n 人民/n 政府/n 人民政府/nt 中央人民政府/nt 中华人民共和国中央人民政府/nt ", SegmentsToString(prodSeg.Segment( 73 | []byte("中华人民共和国中央人民政府")), true)) 74 | } 75 | -------------------------------------------------------------------------------- /server/server.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | sego分词服务器同时提供了两种模式: 4 | 5 | "/" 分词演示网页 6 | "/json" JSON格式的RPC服务 7 | 输入: 8 | POST或GET模式输入text参数 9 | 输出JSON格式: 10 | { 11 | segments:[ 12 | {"text":"服务器", "pos":"n"}, 13 | {"text":"指令", "pos":"n"}, 14 | ... 15 | ] 16 | } 17 | 18 | 19 | 测试服务器见 http://sego.weiboglass.com 20 | 21 | */ 22 | 23 | package main 24 | 25 | import ( 26 | "encoding/json" 27 | "flag" 28 | "fmt" 29 | "github.com/huichen/sego" 30 | "io" 31 | "log" 32 | "net/http" 33 | "runtime" 34 | ) 35 | 36 | var ( 37 | host = flag.String("host", "", "HTTP服务器主机名") 38 | port = flag.Int("port", 8080, "HTTP服务器端口") 39 | dict = flag.String("dict", "../data/dictionary.txt", "词典文件") 40 | staticFolder = flag.String("static_folder", "static", "静态页面存放的目录") 41 | segmenter = sego.Segmenter{} 42 | ) 43 | 44 | type JsonResponse struct { 45 | Segments []*Segment `json:"segments"` 46 | } 47 | 48 | type Segment struct { 49 | Text string `json:"text"` 50 | Pos string `json:"pos"` 51 | } 52 | 53 | func JsonRpcServer(w http.ResponseWriter, req *http.Request) { 54 | // 得到要分词的文本 55 | text := req.URL.Query().Get("text") 56 | if text == "" { 57 | text = req.PostFormValue("text") 58 | } 59 | 60 | // 分词 61 | segments := segmenter.Segment([]byte(text)) 62 | 63 | // 整理为输出格式 64 | ss := []*Segment{} 65 | for _, segment := range segments { 66 | ss = append(ss, &Segment{Text: segment.Token().Text(), Pos: segment.Token().Pos()}) 67 | } 68 | response, _ := json.Marshal(&JsonResponse{Segments: ss}) 69 | 70 | w.Header().Set("Content-Type", "application/json") 71 | io.WriteString(w, string(response)) 72 | } 73 | 74 | func main() { 75 | flag.Parse() 76 | 77 | // 将线程数设置为CPU数 78 | runtime.GOMAXPROCS(runtime.NumCPU()) 79 | 80 | // 初始化分词器 81 | segmenter.LoadDictionary(*dict) 82 | 83 | http.HandleFunc("/json", JsonRpcServer) 84 | http.Handle("/", http.FileServer(http.Dir(*staticFolder))) 85 | log.Print("服务器启动") 86 | http.ListenAndServe(fmt.Sprintf("%s:%d", *host, *port), nil) 87 | } 88 | -------------------------------------------------------------------------------- /server/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | sego中文分词演示 4 | 5 | 8 | 9 | 10 | 48 | 49 | 50 | 51 |

sego中文分词演示

52 | 53 |
54 | 61 |
62 | 63 |
64 |



65 |



66 |        67 |
68 | 69 |
70 | 71 |
72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /server/static/jquery.min.js: -------------------------------------------------------------------------------- 1 | /*! jQuery v1.7.2 jquery.com | jquery.org/license */ 2 | (function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cu(a){if(!cj[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){ck||(ck=c.createElement("iframe"),ck.frameBorder=ck.width=ck.height=0),b.appendChild(ck);if(!cl||!ck.createElement)cl=(ck.contentWindow||ck.contentDocument).document,cl.write((f.support.boxModel?"":"")+""),cl.close();d=cl.createElement(a),cl.body.appendChild(d),e=f.css(d,"display"),b.removeChild(ck)}cj[a]=e}return cj[a]}function ct(a,b){var c={};f.each(cp.concat.apply([],cp.slice(0,b)),function(){c[this]=a});return c}function cs(){cq=b}function cr(){setTimeout(cs,0);return cq=f.now()}function ci(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function ch(){try{return new a.XMLHttpRequest}catch(b){}}function cb(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d.length,j,k=d[0],l,m,n,o,p;for(g=1;g0){if(c!=="border")for(;e=0===c})}function S(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function K(){return!0}function J(){return!1}function n(a,b,c){var d=b+"defer",e=b+"queue",g=b+"mark",h=f._data(a,d);h&&(c==="queue"||!f._data(a,e))&&(c==="mark"||!f._data(a,g))&&setTimeout(function(){!f._data(a,e)&&!f._data(a,g)&&(f.removeData(a,d,!0),h.fire())},0)}function m(a){for(var b in a){if(b==="data"&&f.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function l(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(k,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:f.isNumeric(d)?+d:j.test(d)?f.parseJSON(d):d}catch(g){}f.data(a,c,d)}else d=b}return d}function h(a){var b=g[a]={},c,d;a=a.split(/\s+/);for(c=0,d=a.length;c)[^>]*$|#([\w\-]*)$)/,j=/\S/,k=/^\s+/,l=/\s+$/,m=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,n=/^[\],:{}\s]*$/,o=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,p=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,q=/(?:^|:|,)(?:\s*\[)+/g,r=/(webkit)[ \/]([\w.]+)/,s=/(opera)(?:.*version)?[ \/]([\w.]+)/,t=/(msie) ([\w.]+)/,u=/(mozilla)(?:.*? rv:([\w.]+))?/,v=/-([a-z]|[0-9])/ig,w=/^-ms-/,x=function(a,b){return(b+"").toUpperCase()},y=d.userAgent,z,A,B,C=Object.prototype.toString,D=Object.prototype.hasOwnProperty,E=Array.prototype.push,F=Array.prototype.slice,G=String.prototype.trim,H=Array.prototype.indexOf,I={};e.fn=e.prototype={constructor:e,init:function(a,d,f){var g,h,j,k;if(!a)return this;if(a.nodeType){this.context=this[0]=a,this.length=1;return this}if(a==="body"&&!d&&c.body){this.context=c,this[0]=c.body,this.selector=a,this.length=1;return this}if(typeof a=="string"){a.charAt(0)!=="<"||a.charAt(a.length-1)!==">"||a.length<3?g=i.exec(a):g=[null,a,null];if(g&&(g[1]||!d)){if(g[1]){d=d instanceof e?d[0]:d,k=d?d.ownerDocument||d:c,j=m.exec(a),j?e.isPlainObject(d)?(a=[c.createElement(j[1])],e.fn.attr.call(a,d,!0)):a=[k.createElement(j[1])]:(j=e.buildFragment([g[1]],[k]),a=(j.cacheable?e.clone(j.fragment):j.fragment).childNodes);return e.merge(this,a)}h=c.getElementById(g[2]);if(h&&h.parentNode){if(h.id!==g[2])return f.find(a);this.length=1,this[0]=h}this.context=c,this.selector=a;return this}return!d||d.jquery?(d||f).find(a):this.constructor(d).find(a)}if(e.isFunction(a))return f.ready(a);a.selector!==b&&(this.selector=a.selector,this.context=a.context);return e.makeArray(a,this)},selector:"",jquery:"1.7.2",length:0,size:function(){return this.length},toArray:function(){return F.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=this.constructor();e.isArray(a)?E.apply(d,a):e.merge(d,a),d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")");return d},each:function(a,b){return e.each(this,a,b)},ready:function(a){e.bindReady(),A.add(a);return this},eq:function(a){a=+a;return a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(F.apply(this,arguments),"slice",F.call(arguments).join(","))},map:function(a){return this.pushStack(e.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:E,sort:[].sort,splice:[].splice},e.fn.init.prototype=e.fn,e.extend=e.fn.extend=function(){var a,c,d,f,g,h,i=arguments[0]||{},j=1,k=arguments.length,l=!1;typeof i=="boolean"&&(l=i,i=arguments[1]||{},j=2),typeof i!="object"&&!e.isFunction(i)&&(i={}),k===j&&(i=this,--j);for(;j0)return;A.fireWith(c,[e]),e.fn.trigger&&e(c).trigger("ready").off("ready")}},bindReady:function(){if(!A){A=e.Callbacks("once memory");if(c.readyState==="complete")return setTimeout(e.ready,1);if(c.addEventListener)c.addEventListener("DOMContentLoaded",B,!1),a.addEventListener("load",e.ready,!1);else if(c.attachEvent){c.attachEvent("onreadystatechange",B),a.attachEvent("onload",e.ready);var b=!1;try{b=a.frameElement==null}catch(d){}c.documentElement.doScroll&&b&&J()}}},isFunction:function(a){return e.type(a)==="function"},isArray:Array.isArray||function(a){return e.type(a)==="array"},isWindow:function(a){return a!=null&&a==a.window},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):I[C.call(a)]||"object"},isPlainObject:function(a){if(!a||e.type(a)!=="object"||a.nodeType||e.isWindow(a))return!1;try{if(a.constructor&&!D.call(a,"constructor")&&!D.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||D.call(a,d)},isEmptyObject:function(a){for(var b in a)return!1;return!0},error:function(a){throw new Error(a)},parseJSON:function(b){if(typeof b!="string"||!b)return null;b=e.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(n.test(b.replace(o,"@").replace(p,"]").replace(q,"")))return(new Function("return "+b))();e.error("Invalid JSON: "+b)},parseXML:function(c){if(typeof c!="string"||!c)return null;var d,f;try{a.DOMParser?(f=new DOMParser,d=f.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(g){d=b}(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&e.error("Invalid XML: "+c);return d},noop:function(){},globalEval:function(b){b&&j.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(w,"ms-").replace(v,x)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var f,g=0,h=a.length,i=h===b||e.isFunction(a);if(d){if(i){for(f in a)if(c.apply(a[f],d)===!1)break}else for(;g0&&a[0]&&a[j-1]||j===0||e.isArray(a));if(k)for(;i1?i.call(arguments,0):b,j.notifyWith(k,e)}}function l(a){return function(c){b[a]=arguments.length>1?i.call(arguments,0):c,--g||j.resolveWith(j,b)}}var b=i.call(arguments,0),c=0,d=b.length,e=Array(d),g=d,h=d,j=d<=1&&a&&f.isFunction(a.promise)?a:f.Deferred(),k=j.promise();if(d>1){for(;c
a",d=p.getElementsByTagName("*"),e=p.getElementsByTagName("a")[0];if(!d||!d.length||!e)return{};g=c.createElement("select"),h=g.appendChild(c.createElement("option")),i=p.getElementsByTagName("input")[0],b={leadingWhitespace:p.firstChild.nodeType===3,tbody:!p.getElementsByTagName("tbody").length,htmlSerialize:!!p.getElementsByTagName("link").length,style:/top/.test(e.getAttribute("style")),hrefNormalized:e.getAttribute("href")==="/a",opacity:/^0.55/.test(e.style.opacity),cssFloat:!!e.style.cssFloat,checkOn:i.value==="on",optSelected:h.selected,getSetAttribute:p.className!=="t",enctype:!!c.createElement("form").enctype,html5Clone:c.createElement("nav").cloneNode(!0).outerHTML!=="<:nav>",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0,pixelMargin:!0},f.boxModel=b.boxModel=c.compatMode==="CSS1Compat",i.checked=!0,b.noCloneChecked=i.cloneNode(!0).checked,g.disabled=!0,b.optDisabled=!h.disabled;try{delete p.test}catch(r){b.deleteExpando=!1}!p.addEventListener&&p.attachEvent&&p.fireEvent&&(p.attachEvent("onclick",function(){b.noCloneEvent=!1}),p.cloneNode(!0).fireEvent("onclick")),i=c.createElement("input"),i.value="t",i.setAttribute("type","radio"),b.radioValue=i.value==="t",i.setAttribute("checked","checked"),i.setAttribute("name","t"),p.appendChild(i),j=c.createDocumentFragment(),j.appendChild(p.lastChild),b.checkClone=j.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=i.checked,j.removeChild(i),j.appendChild(p);if(p.attachEvent)for(n in{submit:1,change:1,focusin:1})m="on"+n,o=m in p,o||(p.setAttribute(m,"return;"),o=typeof p[m]=="function"),b[n+"Bubbles"]=o;j.removeChild(p),j=g=h=p=i=null,f(function(){var d,e,g,h,i,j,l,m,n,q,r,s,t,u=c.getElementsByTagName("body")[0];!u||(m=1,t="padding:0;margin:0;border:",r="position:absolute;top:0;left:0;width:1px;height:1px;",s=t+"0;visibility:hidden;",n="style='"+r+t+"5px solid #000;",q="
"+""+"
",d=c.createElement("div"),d.style.cssText=s+"width:0;height:0;position:static;top:0;margin-top:"+m+"px",u.insertBefore(d,u.firstChild),p=c.createElement("div"),d.appendChild(p),p.innerHTML="
t
",k=p.getElementsByTagName("td"),o=k[0].offsetHeight===0,k[0].style.display="",k[1].style.display="none",b.reliableHiddenOffsets=o&&k[0].offsetHeight===0,a.getComputedStyle&&(p.innerHTML="",l=c.createElement("div"),l.style.width="0",l.style.marginRight="0",p.style.width="2px",p.appendChild(l),b.reliableMarginRight=(parseInt((a.getComputedStyle(l,null)||{marginRight:0}).marginRight,10)||0)===0),typeof p.style.zoom!="undefined"&&(p.innerHTML="",p.style.width=p.style.padding="1px",p.style.border=0,p.style.overflow="hidden",p.style.display="inline",p.style.zoom=1,b.inlineBlockNeedsLayout=p.offsetWidth===3,p.style.display="block",p.style.overflow="visible",p.innerHTML="
",b.shrinkWrapBlocks=p.offsetWidth!==3),p.style.cssText=r+s,p.innerHTML=q,e=p.firstChild,g=e.firstChild,i=e.nextSibling.firstChild.firstChild,j={doesNotAddBorder:g.offsetTop!==5,doesAddBorderForTableAndCells:i.offsetTop===5},g.style.position="fixed",g.style.top="20px",j.fixedPosition=g.offsetTop===20||g.offsetTop===15,g.style.position=g.style.top="",e.style.overflow="hidden",e.style.position="relative",j.subtractsBorderForOverflowNotVisible=g.offsetTop===-5,j.doesNotIncludeMarginInBodyOffset=u.offsetTop!==m,a.getComputedStyle&&(p.style.marginTop="1%",b.pixelMargin=(a.getComputedStyle(p,null)||{marginTop:0}).marginTop!=="1%"),typeof d.style.zoom!="undefined"&&(d.style.zoom=1),u.removeChild(d),l=p=d=null,f.extend(b,j))});return b}();var j=/^(?:\{.*\}|\[.*\])$/,k=/([A-Z])/g;f.extend({cache:{},uuid:0,expando:"jQuery"+(f.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){a=a.nodeType?f.cache[a[f.expando]]:a[f.expando];return!!a&&!m(a)},data:function(a,c,d,e){if(!!f.acceptData(a)){var g,h,i,j=f.expando,k=typeof c=="string",l=a.nodeType,m=l?f.cache:a,n=l?a[j]:a[j]&&j,o=c==="events";if((!n||!m[n]||!o&&!e&&!m[n].data)&&k&&d===b)return;n||(l?a[j]=n=++f.uuid:n=j),m[n]||(m[n]={},l||(m[n].toJSON=f.noop));if(typeof c=="object"||typeof c=="function")e?m[n]=f.extend(m[n],c):m[n].data=f.extend(m[n].data,c);g=h=m[n],e||(h.data||(h.data={}),h=h.data),d!==b&&(h[f.camelCase(c)]=d);if(o&&!h[c])return g.events;k?(i=h[c],i==null&&(i=h[f.camelCase(c)])):i=h;return i}},removeData:function(a,b,c){if(!!f.acceptData(a)){var d,e,g,h=f.expando,i=a.nodeType,j=i?f.cache:a,k=i?a[h]:h;if(!j[k])return;if(b){d=c?j[k]:j[k].data;if(d){f.isArray(b)||(b in d?b=[b]:(b=f.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,g=b.length;e1,null,!1)},removeData:function(a){return this.each(function(){f.removeData(this,a)})}}),f.extend({_mark:function(a,b){a&&(b=(b||"fx")+"mark",f._data(a,b,(f._data(a,b)||0)+1))},_unmark:function(a,b,c){a!==!0&&(c=b,b=a,a=!1);if(b){c=c||"fx";var d=c+"mark",e=a?0:(f._data(b,d)||1)-1;e?f._data(b,d,e):(f.removeData(b,d,!0),n(b,c,"mark"))}},queue:function(a,b,c){var d;if(a){b=(b||"fx")+"queue",d=f._data(a,b),c&&(!d||f.isArray(c)?d=f._data(a,b,f.makeArray(c)):d.push(c));return d||[]}},dequeue:function(a,b){b=b||"fx";var c=f.queue(a,b),d=c.shift(),e={};d==="inprogress"&&(d=c.shift()),d&&(b==="fx"&&c.unshift("inprogress"),f._data(a,b+".run",e),d.call(a,function(){f.dequeue(a,b)},e)),c.length||(f.removeData(a,b+"queue "+b+".run",!0),n(a,b,"queue"))}}),f.fn.extend({queue:function(a,c){var d=2;typeof a!="string"&&(c=a,a="fx",d--);if(arguments.length1)},removeAttr:function(a){return this.each(function(){f.removeAttr(this,a)})},prop:function(a,b){return f.access(this,f.prop,a,b,arguments.length>1)},removeProp:function(a){a=f.propFix[a]||a;return this.each(function(){try{this[a]=b,delete this[a]}catch(c){}})},addClass:function(a){var b,c,d,e,g,h,i;if(f.isFunction(a))return this.each(function(b){f(this).addClass(a.call(this,b,this.className))});if(a&&typeof a=="string"){b=a.split(p);for(c=0,d=this.length;c-1)return!0;return!1},val:function(a){var c,d,e,g=this[0];{if(!!arguments.length){e=f.isFunction(a);return this.each(function(d){var g=f(this),h;if(this.nodeType===1){e?h=a.call(this,d,g.val()):h=a,h==null?h="":typeof h=="number"?h+="":f.isArray(h)&&(h=f.map(h,function(a){return a==null?"":a+""})),c=f.valHooks[this.type]||f.valHooks[this.nodeName.toLowerCase()];if(!c||!("set"in c)||c.set(this,h,"value")===b)this.value=h}})}if(g){c=f.valHooks[g.type]||f.valHooks[g.nodeName.toLowerCase()];if(c&&"get"in c&&(d=c.get(g,"value"))!==b)return d;d=g.value;return typeof d=="string"?d.replace(q,""):d==null?"":d}}}}),f.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,g=a.selectedIndex,h=[],i=a.options,j=a.type==="select-one";if(g<0)return null;c=j?g:0,d=j?g+1:i.length;for(;c=0}),c.length||(a.selectedIndex=-1);return c}}},attrFn:{val:!0,css:!0,html:!0,text:!0,data:!0,width:!0,height:!0,offset:!0},attr:function(a,c,d,e){var g,h,i,j=a.nodeType;if(!!a&&j!==3&&j!==8&&j!==2){if(e&&c in f.attrFn)return f(a)[c](d);if(typeof a.getAttribute=="undefined")return f.prop(a,c,d);i=j!==1||!f.isXMLDoc(a),i&&(c=c.toLowerCase(),h=f.attrHooks[c]||(u.test(c)?x:w));if(d!==b){if(d===null){f.removeAttr(a,c);return}if(h&&"set"in h&&i&&(g=h.set(a,d,c))!==b)return g;a.setAttribute(c,""+d);return d}if(h&&"get"in h&&i&&(g=h.get(a,c))!==null)return g;g=a.getAttribute(c);return g===null?b:g}},removeAttr:function(a,b){var c,d,e,g,h,i=0;if(b&&a.nodeType===1){d=b.toLowerCase().split(p),g=d.length;for(;i=0}})});var z=/^(?:textarea|input|select)$/i,A=/^([^\.]*)?(?:\.(.+))?$/,B=/(?:^|\s)hover(\.\S+)?\b/,C=/^key/,D=/^(?:mouse|contextmenu)|click/,E=/^(?:focusinfocus|focusoutblur)$/,F=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,G=function( 3 | a){var b=F.exec(a);b&&(b[1]=(b[1]||"").toLowerCase(),b[3]=b[3]&&new RegExp("(?:^|\\s)"+b[3]+"(?:\\s|$)"));return b},H=function(a,b){var c=a.attributes||{};return(!b[1]||a.nodeName.toLowerCase()===b[1])&&(!b[2]||(c.id||{}).value===b[2])&&(!b[3]||b[3].test((c["class"]||{}).value))},I=function(a){return f.event.special.hover?a:a.replace(B,"mouseenter$1 mouseleave$1")};f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler,g=p.selector),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k=0&&(h=h.slice(0,-1),k=!0),h.indexOf(".")>=0&&(i=h.split("."),h=i.shift(),i.sort());if((!e||f.event.customEvent[h])&&!f.event.global[h])return;c=typeof c=="object"?c[f.expando]?c:new f.Event(h,c):new f.Event(h),c.type=h,c.isTrigger=!0,c.exclusive=k,c.namespace=i.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+i.join("\\.(?:.*\\.)?")+"(\\.|$)"):null,o=h.indexOf(":")<0?"on"+h:"";if(!e){j=f.cache;for(l in j)j[l].events&&j[l].events[h]&&f.event.trigger(c,d,j[l].handle.elem,!0);return}c.result=b,c.target||(c.target=e),d=d!=null?f.makeArray(d):[],d.unshift(c),p=f.event.special[h]||{};if(p.trigger&&p.trigger.apply(e,d)===!1)return;r=[[e,p.bindType||h]];if(!g&&!p.noBubble&&!f.isWindow(e)){s=p.delegateType||h,m=E.test(s+h)?e:e.parentNode,n=null;for(;m;m=m.parentNode)r.push([m,s]),n=m;n&&n===e.ownerDocument&&r.push([n.defaultView||n.parentWindow||a,s])}for(l=0;le&&j.push({elem:this,matches:d.slice(e)});for(k=0;k0?this.on(b,null,a,c):this.trigger(b)},f.attrFn&&(f.attrFn[b]=!0),C.test(b)&&(f.event.fixHooks[b]=f.event.keyHooks),D.test(b)&&(f.event.fixHooks[b]=f.event.mouseHooks)}),function(){function x(a,b,c,e,f,g){for(var h=0,i=e.length;h0){k=j;break}}j=j[a]}e[h]=k}}}function w(a,b,c,e,f,g){for(var h=0,i=e.length;h+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,d="sizcache"+(Math.random()+"").replace(".",""),e=0,g=Object.prototype.toString,h=!1,i=!0,j=/\\/g,k=/\r\n/g,l=/\W/;[0,0].sort(function(){i=!1;return 0});var m=function(b,d,e,f){e=e||[],d=d||c;var h=d;if(d.nodeType!==1&&d.nodeType!==9)return[];if(!b||typeof b!="string")return e;var i,j,k,l,n,q,r,t,u=!0,v=m.isXML(d),w=[],x=b;do{a.exec(""),i=a.exec(x);if(i){x=i[3],w.push(i[1]);if(i[2]){l=i[3];break}}}while(i);if(w.length>1&&p.exec(b))if(w.length===2&&o.relative[w[0]])j=y(w[0]+w[1],d,f);else{j=o.relative[w[0]]?[d]:m(w.shift(),d);while(w.length)b=w.shift(),o.relative[b]&&(b+=w.shift()),j=y(b,j,f)}else{!f&&w.length>1&&d.nodeType===9&&!v&&o.match.ID.test(w[0])&&!o.match.ID.test(w[w.length-1])&&(n=m.find(w.shift(),d,v),d=n.expr?m.filter(n.expr,n.set)[0]:n.set[0]);if(d){n=f?{expr:w.pop(),set:s(f)}:m.find(w.pop(),w.length===1&&(w[0]==="~"||w[0]==="+")&&d.parentNode?d.parentNode:d,v),j=n.expr?m.filter(n.expr,n.set):n.set,w.length>0?k=s(j):u=!1;while(w.length)q=w.pop(),r=q,o.relative[q]?r=w.pop():q="",r==null&&(r=d),o.relative[q](k,r,v)}else k=w=[]}k||(k=j),k||m.error(q||b);if(g.call(k)==="[object Array]")if(!u)e.push.apply(e,k);else if(d&&d.nodeType===1)for(t=0;k[t]!=null;t++)k[t]&&(k[t]===!0||k[t].nodeType===1&&m.contains(d,k[t]))&&e.push(j[t]);else for(t=0;k[t]!=null;t++)k[t]&&k[t].nodeType===1&&e.push(j[t]);else s(k,e);l&&(m(l,h,e,f),m.uniqueSort(e));return e};m.uniqueSort=function(a){if(u){h=i,a.sort(u);if(h)for(var b=1;b0},m.find=function(a,b,c){var d,e,f,g,h,i;if(!a)return[];for(e=0,f=o.order.length;e":function(a,b){var c,d=typeof b=="string",e=0,f=a.length;if(d&&!l.test(b)){b=b.toLowerCase();for(;e=0)?c||d.push(h):c&&(b[g]=!1));return!1},ID:function(a){return a[1].replace(j,"")},TAG:function(a,b){return a[1].replace(j,"").toLowerCase()},CHILD:function(a){if(a[1]==="nth"){a[2]||m.error(a[0]),a[2]=a[2].replace(/^\+|\s*/g,"");var b=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(a[2]==="even"&&"2n"||a[2]==="odd"&&"2n+1"||!/\D/.test(a[2])&&"0n+"+a[2]||a[2]);a[2]=b[1]+(b[2]||1)-0,a[3]=b[3]-0}else a[2]&&m.error(a[0]);a[0]=e++;return a},ATTR:function(a,b,c,d,e,f){var g=a[1]=a[1].replace(j,"");!f&&o.attrMap[g]&&(a[1]=o.attrMap[g]),a[4]=(a[4]||a[5]||"").replace(j,""),a[2]==="~="&&(a[4]=" "+a[4]+" ");return a},PSEUDO:function(b,c,d,e,f){if(b[1]==="not")if((a.exec(b[3])||"").length>1||/^\w/.test(b[3]))b[3]=m(b[3],null,null,c);else{var g=m.filter(b[3],c,d,!0^f);d||e.push.apply(e,g);return!1}else if(o.match.POS.test(b[0])||o.match.CHILD.test(b[0]))return!0;return b},POS:function(a){a.unshift(!0);return a}},filters:{enabled:function(a){return a.disabled===!1&&a.type!=="hidden"},disabled:function(a){return a.disabled===!0},checked:function(a){return a.checked===!0},selected:function(a){a.parentNode&&a.parentNode.selectedIndex;return a.selected===!0},parent:function(a){return!!a.firstChild},empty:function(a){return!a.firstChild},has:function(a,b,c){return!!m(c[3],a).length},header:function(a){return/h\d/i.test(a.nodeName)},text:function(a){var b=a.getAttribute("type"),c=a.type;return a.nodeName.toLowerCase()==="input"&&"text"===c&&(b===c||b===null)},radio:function(a){return a.nodeName.toLowerCase()==="input"&&"radio"===a.type},checkbox:function(a){return a.nodeName.toLowerCase()==="input"&&"checkbox"===a.type},file:function(a){return a.nodeName.toLowerCase()==="input"&&"file"===a.type},password:function(a){return a.nodeName.toLowerCase()==="input"&&"password"===a.type},submit:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"submit"===a.type},image:function(a){return a.nodeName.toLowerCase()==="input"&&"image"===a.type},reset:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"reset"===a.type},button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&"button"===a.type||b==="button"},input:function(a){return/input|select|textarea|button/i.test(a.nodeName)},focus:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b){return b===0},last:function(a,b,c,d){return b===d.length-1},even:function(a,b){return b%2===0},odd:function(a,b){return b%2===1},lt:function(a,b,c){return bc[3]-0},nth:function(a,b,c){return c[3]-0===b},eq:function(a,b,c){return c[3]-0===b}},filter:{PSEUDO:function(a,b,c,d){var e=b[1],f=o.filters[e];if(f)return f(a,c,b,d);if(e==="contains")return(a.textContent||a.innerText||n([a])||"").indexOf(b[3])>=0;if(e==="not"){var g=b[3];for(var h=0,i=g.length;h=0}},ID:function(a,b){return a.nodeType===1&&a.getAttribute("id")===b},TAG:function(a,b){return b==="*"&&a.nodeType===1||!!a.nodeName&&a.nodeName.toLowerCase()===b},CLASS:function(a,b){return(" "+(a.className||a.getAttribute("class"))+" ").indexOf(b)>-1},ATTR:function(a,b){var c=b[1],d=m.attr?m.attr(a,c):o.attrHandle[c]?o.attrHandle[c](a):a[c]!=null?a[c]:a.getAttribute(c),e=d+"",f=b[2],g=b[4];return d==null?f==="!=":!f&&m.attr?d!=null:f==="="?e===g:f==="*="?e.indexOf(g)>=0:f==="~="?(" "+e+" ").indexOf(g)>=0:g?f==="!="?e!==g:f==="^="?e.indexOf(g)===0:f==="$="?e.substr(e.length-g.length)===g:f==="|="?e===g||e.substr(0,g.length+1)===g+"-":!1:e&&d!==!1},POS:function(a,b,c,d){var e=b[2],f=o.setFilters[e];if(f)return f(a,c,b,d)}}},p=o.match.POS,q=function(a,b){return"\\"+(b-0+1)};for(var r in o.match)o.match[r]=new RegExp(o.match[r].source+/(?![^\[]*\])(?![^\(]*\))/.source),o.leftMatch[r]=new RegExp(/(^(?:.|\r|\n)*?)/.source+o.match[r].source.replace(/\\(\d+)/g,q));o.match.globalPOS=p;var s=function(a,b){a=Array.prototype.slice.call(a,0);if(b){b.push.apply(b,a);return b}return a};try{Array.prototype.slice.call(c.documentElement.childNodes,0)[0].nodeType}catch(t){s=function(a,b){var c=0,d=b||[];if(g.call(a)==="[object Array]")Array.prototype.push.apply(d,a);else if(typeof a.length=="number")for(var e=a.length;c",e.insertBefore(a,e.firstChild),c.getElementById(d)&&(o.find.ID=function(a,c,d){if(typeof c.getElementById!="undefined"&&!d){var e=c.getElementById(a[1]);return e?e.id===a[1]||typeof e.getAttributeNode!="undefined"&&e.getAttributeNode("id").nodeValue===a[1]?[e]:b:[]}},o.filter.ID=function(a,b){var c=typeof a.getAttributeNode!="undefined"&&a.getAttributeNode("id");return a.nodeType===1&&c&&c.nodeValue===b}),e.removeChild(a),e=a=null}(),function(){var a=c.createElement("div");a.appendChild(c.createComment("")),a.getElementsByTagName("*").length>0&&(o.find.TAG=function(a,b){var c=b.getElementsByTagName(a[1]);if(a[1]==="*"){var d=[];for(var e=0;c[e];e++)c[e].nodeType===1&&d.push(c[e]);c=d}return c}),a.innerHTML="",a.firstChild&&typeof a.firstChild.getAttribute!="undefined"&&a.firstChild.getAttribute("href")!=="#"&&(o.attrHandle.href=function(a){return a.getAttribute("href",2)}),a=null}(),c.querySelectorAll&&function(){var a=m,b=c.createElement("div"),d="__sizzle__";b.innerHTML="

";if(!b.querySelectorAll||b.querySelectorAll(".TEST").length!==0){m=function(b,e,f,g){e=e||c;if(!g&&!m.isXML(e)){var h=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b);if(h&&(e.nodeType===1||e.nodeType===9)){if(h[1])return s(e.getElementsByTagName(b),f);if(h[2]&&o.find.CLASS&&e.getElementsByClassName)return s(e.getElementsByClassName(h[2]),f)}if(e.nodeType===9){if(b==="body"&&e.body)return s([e.body],f);if(h&&h[3]){var i=e.getElementById(h[3]);if(!i||!i.parentNode)return s([],f);if(i.id===h[3])return s([i],f)}try{return s(e.querySelectorAll(b),f)}catch(j){}}else if(e.nodeType===1&&e.nodeName.toLowerCase()!=="object"){var k=e,l=e.getAttribute("id"),n=l||d,p=e.parentNode,q=/^\s*[+~]/.test(b);l?n=n.replace(/'/g,"\\$&"):e.setAttribute("id",n),q&&p&&(e=e.parentNode);try{if(!q||p)return s(e.querySelectorAll("[id='"+n+"'] "+b),f)}catch(r){}finally{l||k.removeAttribute("id")}}}return a(b,e,f,g)};for(var e in a)m[e]=a[e];b=null}}(),function(){var a=c.documentElement,b=a.matchesSelector||a.mozMatchesSelector||a.webkitMatchesSelector||a.msMatchesSelector;if(b){var d=!b.call(c.createElement("div"),"div"),e=!1;try{b.call(c.documentElement,"[test!='']:sizzle")}catch(f){e=!0}m.matchesSelector=function(a,c){c=c.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!m.isXML(a))try{if(e||!o.match.PSEUDO.test(c)&&!/!=/.test(c)){var f=b.call(a,c);if(f||!d||a.document&&a.document.nodeType!==11)return f}}catch(g){}return m(c,null,null,[a]).length>0}}}(),function(){var a=c.createElement("div");a.innerHTML="
";if(!!a.getElementsByClassName&&a.getElementsByClassName("e").length!==0){a.lastChild.className="e";if(a.getElementsByClassName("e").length===1)return;o.order.splice(1,0,"CLASS"),o.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!="undefined"&&!c)return b.getElementsByClassName(a[1])},a=null}}(),c.documentElement.contains?m.contains=function(a,b){return a!==b&&(a.contains?a.contains(b):!0)}:c.documentElement.compareDocumentPosition?m.contains=function(a,b){return!!(a.compareDocumentPosition(b)&16)}:m.contains=function(){return!1},m.isXML=function(a){var b=(a?a.ownerDocument||a:0).documentElement;return b?b.nodeName!=="HTML":!1};var y=function(a,b,c){var d,e=[],f="",g=b.nodeType?[b]:b;while(d=o.match.PSEUDO.exec(a))f+=d[0],a=a.replace(o.match.PSEUDO,"");a=o.relative[a]?a+"*":a;for(var h=0,i=g.length;h0)for(h=g;h=0:f.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c=[],d,e,g=this[0];if(f.isArray(a)){var h=1;while(g&&g.ownerDocument&&g!==b){for(d=0;d-1:f.find.matchesSelector(g,a)){c.push(g);break}g=g.parentNode;if(!g||!g.ownerDocument||g===b||g.nodeType===11)break}}c=c.length>1?f.unique(c):c;return this.pushStack(c,"closest",a)},index:function(a){if(!a)return this[0]&&this[0].parentNode?this.prevAll().length:-1;if(typeof a=="string")return f.inArray(this[0],f(a));return f.inArray(a.jquery?a[0]:a,this)},add:function(a,b){var c=typeof a=="string"?f(a,b):f.makeArray(a&&a.nodeType?[a]:a),d=f.merge(this.get(),c);return this.pushStack(S(c[0])||S(d[0])?d:f.unique(d))},andSelf:function(){return this.add(this.prevObject)}}),f.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return f.dir(a,"parentNode")},parentsUntil:function(a,b,c){return f.dir(a,"parentNode",c)},next:function(a){return f.nth(a,2,"nextSibling")},prev:function(a){return f.nth(a,2,"previousSibling")},nextAll:function(a){return f.dir(a,"nextSibling")},prevAll:function(a){return f.dir(a,"previousSibling")},nextUntil:function(a,b,c){return f.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return f.dir(a,"previousSibling",c)},siblings:function(a){return f.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return f.sibling(a.firstChild)},contents:function(a){return f.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:f.makeArray(a.childNodes)}},function(a,b){f.fn[a]=function(c,d){var e=f.map(this,b,c);L.test(a)||(d=c),d&&typeof d=="string"&&(e=f.filter(d,e)),e=this.length>1&&!R[a]?f.unique(e):e,(this.length>1||N.test(d))&&M.test(a)&&(e=e.reverse());return this.pushStack(e,a,P.call(arguments).join(","))}}),f.extend({filter:function(a,b,c){c&&(a=":not("+a+")");return b.length===1?f.find.matchesSelector(b[0],a)?[b[0]]:[]:f.find.matches(a,b)},dir:function(a,c,d){var e=[],g=a[c];while(g&&g.nodeType!==9&&(d===b||g.nodeType!==1||!f(g).is(d)))g.nodeType===1&&e.push(g),g=g[c];return e},nth:function(a,b,c,d){b=b||1;var e=0;for(;a;a=a[c])if(a.nodeType===1&&++e===b)break;return a},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var V="abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",W=/ jQuery\d+="(?:\d+|null)"/g,X=/^\s+/,Y=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,Z=/<([\w:]+)/,$=/]","i"),bd=/checked\s*(?:[^=]|=\s*.checked.)/i,be=/\/(java|ecma)script/i,bf=/^\s*",""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]},bh=U(c);bg.optgroup=bg.option,bg.tbody=bg.tfoot=bg.colgroup=bg.caption=bg.thead,bg.th=bg.td,f.support.htmlSerialize||(bg._default=[1,"div
","
"]),f.fn.extend({text:function(a){return f.access(this,function(a){return a===b?f.text(this):this.empty().append((this[0]&&this[0].ownerDocument||c).createTextNode(a))},null,a,arguments.length)},wrapAll:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapAll(a.call(this,b))});if(this[0]){var b=f(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapInner(a.call(this,b))});return this.each(function(){var b=f(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=f.isFunction(a);return this.each(function(c){f(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){f.nodeName(this,"body")||f(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=f 4 | .clean(arguments);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,f.clean(arguments));return a}},remove:function(a,b){for(var c=0,d;(d=this[c])!=null;c++)if(!a||f.filter(a,[d]).length)!b&&d.nodeType===1&&(f.cleanData(d.getElementsByTagName("*")),f.cleanData([d])),d.parentNode&&d.parentNode.removeChild(d);return this},empty:function(){for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){return f.access(this,function(a){var c=this[0]||{},d=0,e=this.length;if(a===b)return c.nodeType===1?c.innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Y,"<$1>");try{for(;d1&&l0?this.clone(!0):this).get();f(e[h])[b](j),d=d.concat(j)}return this.pushStack(d,a,e.selector)}}),f.extend({clone:function(a,b,c){var d,e,g,h=f.support.html5Clone||f.isXMLDoc(a)||!bc.test("<"+a.nodeName+">")?a.cloneNode(!0):bo(a);if((!f.support.noCloneEvent||!f.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!f.isXMLDoc(a)){bk(a,h),d=bl(a),e=bl(h);for(g=0;d[g];++g)e[g]&&bk(d[g],e[g])}if(b){bj(a,h);if(c){d=bl(a),e=bl(h);for(g=0;d[g];++g)bj(d[g],e[g])}}d=e=null;return h},clean:function(a,b,d,e){var g,h,i,j=[];b=b||c,typeof b.createElement=="undefined"&&(b=b.ownerDocument||b[0]&&b[0].ownerDocument||c);for(var k=0,l;(l=a[k])!=null;k++){typeof l=="number"&&(l+="");if(!l)continue;if(typeof l=="string")if(!_.test(l))l=b.createTextNode(l);else{l=l.replace(Y,"<$1>");var m=(Z.exec(l)||["",""])[1].toLowerCase(),n=bg[m]||bg._default,o=n[0],p=b.createElement("div"),q=bh.childNodes,r;b===c?bh.appendChild(p):U(b).appendChild(p),p.innerHTML=n[1]+l+n[2];while(o--)p=p.lastChild;if(!f.support.tbody){var s=$.test(l),t=m==="table"&&!s?p.firstChild&&p.firstChild.childNodes:n[1]===""&&!s?p.childNodes:[];for(i=t.length-1;i>=0;--i)f.nodeName(t[i],"tbody")&&!t[i].childNodes.length&&t[i].parentNode.removeChild(t[i])}!f.support.leadingWhitespace&&X.test(l)&&p.insertBefore(b.createTextNode(X.exec(l)[0]),p.firstChild),l=p.childNodes,p&&(p.parentNode.removeChild(p),q.length>0&&(r=q[q.length-1],r&&r.parentNode&&r.parentNode.removeChild(r)))}var u;if(!f.support.appendChecked)if(l[0]&&typeof (u=l.length)=="number")for(i=0;i1)},f.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=by(a,"opacity");return c===""?"1":c}return a.style.opacity}}},cssNumber:{fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":f.support.cssFloat?"cssFloat":"styleFloat"},style:function(a,c,d,e){if(!!a&&a.nodeType!==3&&a.nodeType!==8&&!!a.style){var g,h,i=f.camelCase(c),j=a.style,k=f.cssHooks[i];c=f.cssProps[i]||i;if(d===b){if(k&&"get"in k&&(g=k.get(a,!1,e))!==b)return g;return j[c]}h=typeof d,h==="string"&&(g=bu.exec(d))&&(d=+(g[1]+1)*+g[2]+parseFloat(f.css(a,c)),h="number");if(d==null||h==="number"&&isNaN(d))return;h==="number"&&!f.cssNumber[i]&&(d+="px");if(!k||!("set"in k)||(d=k.set(a,d))!==b)try{j[c]=d}catch(l){}}},css:function(a,c,d){var e,g;c=f.camelCase(c),g=f.cssHooks[c],c=f.cssProps[c]||c,c==="cssFloat"&&(c="float");if(g&&"get"in g&&(e=g.get(a,!0,d))!==b)return e;if(by)return by(a,c)},swap:function(a,b,c){var d={},e,f;for(f in b)d[f]=a.style[f],a.style[f]=b[f];e=c.call(a);for(f in b)a.style[f]=d[f];return e}}),f.curCSS=f.css,c.defaultView&&c.defaultView.getComputedStyle&&(bz=function(a,b){var c,d,e,g,h=a.style;b=b.replace(br,"-$1").toLowerCase(),(d=a.ownerDocument.defaultView)&&(e=d.getComputedStyle(a,null))&&(c=e.getPropertyValue(b),c===""&&!f.contains(a.ownerDocument.documentElement,a)&&(c=f.style(a,b))),!f.support.pixelMargin&&e&&bv.test(b)&&bt.test(c)&&(g=h.width,h.width=c,c=e.width,h.width=g);return c}),c.documentElement.currentStyle&&(bA=function(a,b){var c,d,e,f=a.currentStyle&&a.currentStyle[b],g=a.style;f==null&&g&&(e=g[b])&&(f=e),bt.test(f)&&(c=g.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),g.left=b==="fontSize"?"1em":f,f=g.pixelLeft+"px",g.left=c,d&&(a.runtimeStyle.left=d));return f===""?"auto":f}),by=bz||bA,f.each(["height","width"],function(a,b){f.cssHooks[b]={get:function(a,c,d){if(c)return a.offsetWidth!==0?bB(a,b,d):f.swap(a,bw,function(){return bB(a,b,d)})},set:function(a,b){return bs.test(b)?b+"px":b}}}),f.support.opacity||(f.cssHooks.opacity={get:function(a,b){return bq.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?parseFloat(RegExp.$1)/100+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=f.isNumeric(b)?"alpha(opacity="+b*100+")":"",g=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&f.trim(g.replace(bp,""))===""){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bp.test(g)?g.replace(bp,e):g+" "+e}}),f(function(){f.support.reliableMarginRight||(f.cssHooks.marginRight={get:function(a,b){return f.swap(a,{display:"inline-block"},function(){return b?by(a,"margin-right"):a.style.marginRight})}})}),f.expr&&f.expr.filters&&(f.expr.filters.hidden=function(a){var b=a.offsetWidth,c=a.offsetHeight;return b===0&&c===0||!f.support.reliableHiddenOffsets&&(a.style&&a.style.display||f.css(a,"display"))==="none"},f.expr.filters.visible=function(a){return!f.expr.filters.hidden(a)}),f.each({margin:"",padding:"",border:"Width"},function(a,b){f.cssHooks[a+b]={expand:function(c){var d,e=typeof c=="string"?c.split(" "):[c],f={};for(d=0;d<4;d++)f[a+bx[d]+b]=e[d]||e[d-2]||e[0];return f}}});var bC=/%20/g,bD=/\[\]$/,bE=/\r?\n/g,bF=/#.*$/,bG=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,bH=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,bI=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,bJ=/^(?:GET|HEAD)$/,bK=/^\/\//,bL=/\?/,bM=/)<[^<]*)*<\/script>/gi,bN=/^(?:select|textarea)/i,bO=/\s+/,bP=/([?&])_=[^&]*/,bQ=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,bR=f.fn.load,bS={},bT={},bU,bV,bW=["*/"]+["*"];try{bU=e.href}catch(bX){bU=c.createElement("a"),bU.href="",bU=bU.href}bV=bQ.exec(bU.toLowerCase())||[],f.fn.extend({load:function(a,c,d){if(typeof a!="string"&&bR)return bR.apply(this,arguments);if(!this.length)return this;var e=a.indexOf(" ");if(e>=0){var g=a.slice(e,a.length);a=a.slice(0,e)}var h="GET";c&&(f.isFunction(c)?(d=c,c=b):typeof c=="object"&&(c=f.param(c,f.ajaxSettings.traditional),h="POST"));var i=this;f.ajax({url:a,type:h,dataType:"html",data:c,complete:function(a,b,c){c=a.responseText,a.isResolved()&&(a.done(function(a){c=a}),i.html(g?f("
").append(c.replace(bM,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bN.test(this.nodeName)||bH.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bE,"\r\n")}}):{name:b.name,value:c.replace(bE,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b$(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b$(a,b);return a},ajaxSettings:{url:bU,isLocal:bI.test(bV[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded; charset=UTF-8",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bW},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bY(bS),ajaxTransport:bY(bT),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?ca(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cb(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bG.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bF,"").replace(bK,bV[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bO),d.crossDomain==null&&(r=bQ.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bV[1]&&r[2]==bV[2]&&(r[3]||(r[1]==="http:"?80:443))==(bV[3]||(bV[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),bZ(bS,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bJ.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bL.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bP,"$1_="+x);d.url=y+(y===d.url?(bL.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bW+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=bZ(bT,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)b_(g,a[g],c,e);return d.join("&").replace(bC,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cc=f.now(),cd=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cc++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=typeof b.data=="string"&&/^application\/x\-www\-form\-urlencoded/.test(b.contentType);if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(cd.test(b.url)||e&&cd.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(cd,l),b.url===j&&(e&&(k=k.replace(cd,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var ce=a.ActiveXObject?function(){for(var a in cg)cg[a](0,1)}:!1,cf=0,cg;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ch()||ci()}:ch,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c){if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,ce&&delete cg[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n);try{m.text=h.responseText}catch(a){}try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cf,ce&&(cg||(cg={},f(a).unload(ce)),cg[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var cj={},ck,cl,cm=/^(?:toggle|show|hide)$/,cn=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,co,cp=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cq;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(ct("show",3),a,b,c);for(var g=0,h=this.length;g=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,c){var d=/Y/.test(c);f.fn[a]=function(e){return f.access(this,function(a,e,g){var h=cy(a);if(g===b)return h?c in h?h[c]:f.support.boxModel&&h.document.documentElement[e]||h.document.body[e]:a[e];h?h.scrollTo(d?f(h).scrollLeft():g,d?g:f(h).scrollTop()):a[e]=g},a,e,arguments.length,null)}}),f.each({Height:"height",Width:"width"},function(a,c){var d="client"+a,e="scroll"+a,g="offset"+a;f.fn["inner"+a]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,c,"padding")):this[c]():null},f.fn["outer"+a]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,c,a?"margin":"border")):this[c]():null},f.fn[c]=function(a){return f.access(this,function(a,c,h){var i,j,k,l;if(f.isWindow(a)){i=a.document,j=i.documentElement[d];return f.support.boxModel&&j||i.body&&i.body[d]||j}if(a.nodeType===9){i=a.documentElement;if(i[d]>=i[e])return i[d];return Math.max(a.body[e],i[e],a.body[g],i[g])}if(h===b){k=f.css(a,c),l=parseFloat(k);return f.isNumeric(l)?l:k}f(a).css(c,h)},c,a,arguments.length,null)}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window); -------------------------------------------------------------------------------- /test_utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func expect(t *testing.T, expect string, actual interface{}) { 9 | actualString := fmt.Sprint(actual) 10 | if expect != actualString { 11 | t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString) 12 | } 13 | } 14 | 15 | func printTokens(tokens []*Token, numTokens int) (output string) { 16 | for iToken := 0; iToken < numTokens; iToken++ { 17 | for _, word := range tokens[iToken].text { 18 | output += fmt.Sprint(string(word)) 19 | } 20 | output += " " 21 | } 22 | return 23 | } 24 | 25 | func toWords(strings ...string) []Text { 26 | words := []Text{} 27 | for _, s := range strings { 28 | words = append(words, []byte(s)) 29 | } 30 | return words 31 | } 32 | 33 | func bytesToString(bytes []Text) (output string) { 34 | for _, b := range bytes { 35 | output += (string(b) + "/") 36 | } 37 | return 38 | } 39 | -------------------------------------------------------------------------------- /testdata/test_dict1.txt: -------------------------------------------------------------------------------- 1 | 中 64 p1 2 | 国 64 p2 3 | 有 64 p3 4 | 三 64 5 | 亿 64 p5 6 | 人 64 p6 7 | 口 64 p7 8 | -------------------------------------------------------------------------------- /testdata/test_dict2.txt: -------------------------------------------------------------------------------- 1 | 中国 32 2 | 国有 8 p9 3 | 十三 16 p10 4 | 十三亿 4 5 | 人口 16 p12 6 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 字串类型,可以用来表达 4 | // 1. 一个字元,比如"中"又如"国", 英文的一个字元是一个词 5 | // 2. 一个分词,比如"中国"又如"人口" 6 | // 3. 一段文字,比如"中国有十三亿人口" 7 | type Text []byte 8 | 9 | // 一个分词 10 | type Token struct { 11 | // 分词的字串,这实际上是个字元数组 12 | text []Text 13 | 14 | // 分词在语料库中的词频 15 | frequency int 16 | 17 | // log2(总词频/该分词词频),这相当于log2(1/p(分词)),用作动态规划中 18 | // 该分词的路径长度。求解prod(p(分词))的最大值相当于求解 19 | // sum(distance(分词))的最小值,这就是“最短路径”的来历。 20 | distance float32 21 | 22 | // 词性标注 23 | pos string 24 | 25 | // 该分词文本的进一步分词划分,见Segments函数注释。 26 | segments []*Segment 27 | } 28 | 29 | // 返回分词文本 30 | func (token *Token) Text() string { 31 | return textSliceToString(token.text) 32 | } 33 | 34 | // 返回分词在语料库中的词频 35 | func (token *Token) Frequency() int { 36 | return token.frequency 37 | } 38 | 39 | // 返回分词词性标注 40 | func (token *Token) Pos() string { 41 | return token.pos 42 | } 43 | 44 | // 该分词文本的进一步分词划分,比如"中华人民共和国中央人民政府"这个分词 45 | // 有两个子分词"中华人民共和国"和"中央人民政府"。子分词也可以进一步有子分词 46 | // 形成一个树结构,遍历这个树就可以得到该分词的所有细致分词划分,这主要 47 | // 用于搜索引擎对一段文本进行全文搜索。 48 | func (token *Token) Segments() []*Segment { 49 | return token.segments 50 | } 51 | 52 | func (token *Token) TextEquals(string string) bool { 53 | tokenLen := 0 54 | for _, t := range token.text { 55 | tokenLen += len(t) 56 | } 57 | if tokenLen != len(string) { 58 | return false 59 | } 60 | bytStr := []byte(string) 61 | index := 0 62 | for i := 0; i < len(token.text); i++ { 63 | textArray := []byte(token.text[i]) 64 | for j := 0; j < len(textArray); j++ { 65 | if textArray[j] != bytStr[index] { 66 | index = index + 1 67 | return false 68 | } 69 | index = index + 1 70 | } 71 | } 72 | return true 73 | } 74 | -------------------------------------------------------------------------------- /tools/benchmark.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 测试sego分词速度 4 | 5 | go run benchmark.go 6 | 7 | 输出分词结果到文件: 8 | 9 | go run benchmark.go -output=output.txt 10 | 11 | 分析性能瓶颈: 12 | 13 | go build benchmark.go 14 | ./benchmark -cpuprofile=cpu.prof 15 | go tool pprof benchmark cpu.prof 16 | 17 | 分析内存占用: 18 | 19 | go build benchmark.go 20 | ./benchmark -memprofile=mem.prof 21 | go tool pprof benchmark mem.prof 22 | 23 | */ 24 | 25 | package main 26 | 27 | import ( 28 | "bufio" 29 | "flag" 30 | "fmt" 31 | "github.com/huichen/sego" 32 | "log" 33 | "os" 34 | "runtime" 35 | "runtime/pprof" 36 | "time" 37 | ) 38 | 39 | var ( 40 | cpuprofile = flag.String("cpuprofile", "", "处理器profile文件") 41 | memprofile = flag.String("memprofile", "", "内存profile文件") 42 | output = flag.String("output", "", "输出分词结果到此文件") 43 | numRuns = 20 44 | ) 45 | 46 | func main() { 47 | // 确保单线程,因为Go从1.5开始默认多线程 48 | runtime.GOMAXPROCS(1) 49 | 50 | // 解析命令行参数 51 | flag.Parse() 52 | 53 | // 记录时间 54 | t0 := time.Now() 55 | 56 | var segmenter sego.Segmenter 57 | segmenter.LoadDictionary("../data/dictionary.txt") 58 | 59 | // 记录时间 60 | t1 := time.Now() 61 | log.Printf("载入词典花费时间 %v", t1.Sub(t0)) 62 | 63 | // 写入内存profile文件 64 | if *memprofile != "" { 65 | f, err := os.Create(*memprofile) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | pprof.WriteHeapProfile(f) 70 | defer f.Close() 71 | } 72 | 73 | // 打开将要分词的文件 74 | file, err := os.Open("../testdata/bailuyuan.txt") 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | defer file.Close() 79 | 80 | // 逐行读入 81 | scanner := bufio.NewScanner(file) 82 | size := 0 83 | lines := [][]byte{} 84 | for scanner.Scan() { 85 | var text string 86 | fmt.Sscanf(scanner.Text(), "%s", &text) 87 | content := []byte(text) 88 | size += len(content) 89 | lines = append(lines, content) 90 | } 91 | 92 | // 当指定输出文件时打开输出文件 93 | var of *os.File 94 | if *output != "" { 95 | of, err = os.Create(*output) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | defer of.Close() 100 | } 101 | 102 | // 记录时间 103 | t2 := time.Now() 104 | 105 | // 打开处理器profile文件 106 | if *cpuprofile != "" { 107 | f, err := os.Create(*cpuprofile) 108 | if err != nil { 109 | log.Fatal(err) 110 | } 111 | pprof.StartCPUProfile(f) 112 | defer pprof.StopCPUProfile() 113 | } 114 | 115 | // 分词 116 | for i := 0; i < numRuns; i++ { 117 | for _, l := range lines { 118 | segments := segmenter.Segment(l) 119 | if *output != "" { 120 | of.WriteString(sego.SegmentsToString(segments, false)) 121 | of.WriteString("\n") 122 | } 123 | } 124 | } 125 | 126 | // 停止处理器profile 127 | if *cpuprofile != "" { 128 | defer pprof.StopCPUProfile() 129 | } 130 | 131 | // 记录时间并计算分词速度 132 | t3 := time.Now() 133 | log.Printf("分词花费时间 %v", t3.Sub(t2)) 134 | log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t3.Sub(t2).Seconds()/(1024*1024)) 135 | } 136 | -------------------------------------------------------------------------------- /tools/example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | 7 | "github.com/adamzy/sego" 8 | ) 9 | 10 | var ( 11 | text = flag.String("text", "中国互联网历史上最大的一笔并购案", "要分词的文本") 12 | ) 13 | 14 | func main() { 15 | flag.Parse() 16 | 17 | var seg sego.Segmenter 18 | seg.LoadDictionary("../data/dictionary.txt") 19 | 20 | segments := seg.Segment([]byte(*text)) 21 | fmt.Println(sego.SegmentsToString(segments, true)) 22 | } 23 | -------------------------------------------------------------------------------- /tools/goroutines.go: -------------------------------------------------------------------------------- 1 | // 测试sego并行分词速度 2 | 3 | package main 4 | 5 | import ( 6 | "bufio" 7 | "fmt" 8 | "github.com/huichen/sego" 9 | "log" 10 | "os" 11 | "runtime" 12 | "time" 13 | ) 14 | 15 | var ( 16 | segmenter = sego.Segmenter{} 17 | numThreads = runtime.NumCPU() 18 | task = make(chan []byte, numThreads*40) 19 | done = make(chan bool, numThreads) 20 | numRuns = 50 21 | ) 22 | 23 | func worker() { 24 | for line := range task { 25 | segmenter.Segment(line) 26 | } 27 | done <- true 28 | } 29 | 30 | func main() { 31 | // 将线程数设置为CPU数 32 | runtime.GOMAXPROCS(numThreads) 33 | 34 | // 载入词典 35 | segmenter.LoadDictionary("../data/dictionary.txt") 36 | 37 | // 打开将要分词的文件 38 | file, err := os.Open("../testdata/bailuyuan.txt") 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | defer file.Close() 43 | 44 | // 逐行读入 45 | scanner := bufio.NewScanner(file) 46 | size := 0 47 | lines := [][]byte{} 48 | for scanner.Scan() { 49 | var text string 50 | fmt.Sscanf(scanner.Text(), "%s", &text) 51 | content := []byte(text) 52 | size += len(content) 53 | lines = append(lines, content) 54 | } 55 | 56 | // 启动工作线程 57 | for i := 0; i < numThreads; i++ { 58 | go worker() 59 | } 60 | log.Print("开始分词") 61 | 62 | // 记录时间 63 | t0 := time.Now() 64 | 65 | // 并行分词 66 | for i := 0; i < numRuns; i++ { 67 | for _, l := range lines { 68 | task <- l 69 | } 70 | } 71 | close(task) 72 | 73 | // 确保分词完成 74 | for i := 0; i < numThreads; i++ { 75 | <-done 76 | } 77 | 78 | // 记录时间并计算分词速度 79 | t1 := time.Now() 80 | log.Printf("分词花费时间 %v", t1.Sub(t0)) 81 | log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t1.Sub(t0).Seconds()/(1024*1024)) 82 | } 83 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | ) 7 | 8 | // 输出分词结果为字符串 9 | // 10 | // 有两种输出模式,以"中华人民共和国"为例 11 | // 12 | // 普通模式(searchMode=false)输出一个分词"中华人民共和国/ns " 13 | // 搜索模式(searchMode=true) 输出普通模式的再细致切分: 14 | // "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns " 15 | // 16 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见Token结构体的注释。 17 | func SegmentsToString(segs []Segment, searchMode bool) (output string) { 18 | if searchMode { 19 | for _, seg := range segs { 20 | output += tokenToString(seg.token) 21 | } 22 | } else { 23 | for _, seg := range segs { 24 | output += fmt.Sprintf( 25 | "%s/%s ", textSliceToString(seg.token.text), seg.token.pos) 26 | } 27 | } 28 | return 29 | } 30 | 31 | func tokenToString(token *Token) (output string) { 32 | hasOnlyTerminalToken := true 33 | for _, s := range token.segments { 34 | if len(s.token.segments) > 1 { 35 | hasOnlyTerminalToken = false 36 | } 37 | } 38 | 39 | if !hasOnlyTerminalToken { 40 | for _, s := range token.segments { 41 | if s != nil { 42 | output += tokenToString(s.token) 43 | } 44 | } 45 | } 46 | output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos) 47 | return 48 | } 49 | 50 | // 输出分词结果到一个字符串slice 51 | // 52 | // 有两种输出模式,以"中华人民共和国"为例 53 | // 54 | // 普通模式(searchMode=false)输出一个分词"[中华人民共和国]" 55 | // 搜索模式(searchMode=true) 输出普通模式的再细致切分: 56 | // "[中华 人民 共和 共和国 人民共和国 中华人民共和国]" 57 | // 58 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见Token结构体的注释。 59 | 60 | func SegmentsToSlice(segs []Segment, searchMode bool) (output []string) { 61 | if searchMode { 62 | for _, seg := range segs { 63 | output = append(output, tokenToSlice(seg.token)...) 64 | } 65 | } else { 66 | for _, seg := range segs { 67 | output = append(output, seg.token.Text()) 68 | } 69 | } 70 | return 71 | } 72 | 73 | func tokenToSlice(token *Token) (output []string) { 74 | hasOnlyTerminalToken := true 75 | for _, s := range token.segments { 76 | if len(s.token.segments) > 1 { 77 | hasOnlyTerminalToken = false 78 | } 79 | } 80 | if !hasOnlyTerminalToken { 81 | for _, s := range token.segments { 82 | output = append(output, tokenToSlice(s.token)...) 83 | } 84 | } 85 | output = append(output, textSliceToString(token.text)) 86 | return output 87 | } 88 | 89 | // 将多个字元拼接一个字符串输出 90 | func textSliceToString(text []Text) string { 91 | return Join(text) 92 | } 93 | 94 | func Join(a []Text) string { 95 | switch len(a) { 96 | case 0: 97 | return "" 98 | case 1: 99 | return string(a[0]) 100 | case 2: 101 | // Special case for common small values. 102 | // Remove if golang.org/issue/6714 is fixed 103 | return string(a[0]) + string(a[1]) 104 | case 3: 105 | // Special case for common small values. 106 | // Remove if golang.org/issue/6714 is fixed 107 | return string(a[0]) + string(a[1]) + string(a[2]) 108 | } 109 | n := 0 110 | for i := 0; i < len(a); i++ { 111 | n += len(a[i]) 112 | } 113 | 114 | b := make([]byte, n) 115 | bp := copy(b, a[0]) 116 | for _, s := range a[1:] { 117 | bp += copy(b[bp:], s) 118 | } 119 | return string(b) 120 | } 121 | 122 | // 返回多个字元的字节总长度 123 | func textSliceByteLength(text []Text) (length int) { 124 | for _, word := range text { 125 | length += len(word) 126 | } 127 | return 128 | } 129 | 130 | func textSliceToBytes(text []Text) []byte { 131 | var buf bytes.Buffer 132 | for _, word := range text { 133 | buf.Write(word) 134 | } 135 | return buf.Bytes() 136 | } 137 | -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/issue9/assert" 8 | ) 9 | 10 | /* 11 | * 作者:张晓明 时间:18/6/14 12 | */ 13 | 14 | var ( 15 | strs = []Text{ 16 | Text("one"), 17 | Text("two"), 18 | Text("three"), 19 | Text("four"), 20 | Text("five"), 21 | Text("six"), 22 | Text("seven"), 23 | Text("eight"), 24 | Text("nine"), 25 | Text("ten"), 26 | } 27 | ) 28 | 29 | func Test_textSliceToString(t *testing.T) { 30 | a := textSliceToString(strs) 31 | b := Join(strs) 32 | assert.Equal(t, a, b) 33 | } 34 | 35 | func StringsJoin(b *testing.B) { 36 | for i := 0; i < b.N; i++ { 37 | Join(strs) 38 | } 39 | } 40 | 41 | func TextSliceToString(b *testing.B) { 42 | for i := 0; i < b.N; i++ { 43 | textSliceToString(strs) 44 | } 45 | } 46 | 47 | func Test_Benchmark(t *testing.T) { 48 | fmt.Println("strings.Join:") 49 | fmt.Println(testing.Benchmark(StringsJoin)) 50 | fmt.Println("textSliceToString") 51 | fmt.Println(testing.Benchmark(TextSliceToString)) 52 | } 53 | 54 | func Test_Token_TextEquals(t *testing.T) { 55 | token := Token{ 56 | text: []Text{ 57 | []byte("one"), 58 | []byte("two"), 59 | }, 60 | } 61 | assert.True(t, token.TextEquals("onetwo")) 62 | } 63 | 64 | func Test_Token_TextEquals_CN(t *testing.T) { 65 | token := Token{ 66 | text: []Text{ 67 | []byte("中国"), 68 | []byte("文字"), 69 | }, 70 | } 71 | assert.True(t, token.TextEquals("中国文字")) 72 | } 73 | 74 | func Test_Token_TextNotEquals(t *testing.T) { 75 | token := Token{ 76 | text: []Text{ 77 | []byte("one"), 78 | []byte("two"), 79 | }, 80 | } 81 | assert.False(t, token.TextEquals("one-two")) 82 | } 83 | 84 | func Test_Token_TextNotEquals_CN(t *testing.T) { 85 | token := Token{ 86 | text: []Text{ 87 | []byte("中国"), 88 | []byte("文字"), 89 | }, 90 | } 91 | assert.False(t, token.TextEquals("中国文字1")) 92 | } 93 | 94 | func Test_Token_TextNotEquals_CN_B(t *testing.T) { 95 | token := Token{ 96 | text: []Text{ 97 | []byte("中国"), 98 | []byte("文字"), 99 | }, 100 | } 101 | assert.False(t, token.TextEquals("中国文")) 102 | } 103 | 104 | func Test_Token_Split(t *testing.T) { 105 | probMap := map[string]string{ 106 | "衣门襟": "拉链", 107 | "品牌": "天奕", 108 | "图案": "纯色 字母", 109 | "颜色分类": "牛奶白 水粉色 湖水蓝 浅军绿 雅致灰", 110 | "尺码": "大码XL 大码XXL 大码XXXL 大码XXXXL", 111 | "组合形式": "单件", 112 | "面料": "聚酯", 113 | "领型": "连帽", 114 | "服饰工艺": "立体裁剪", 115 | "货号": "YZL-1806052", 116 | "厚薄": "超薄", 117 | "年份季节": "2018年夏季", 118 | "通勤": "韩版", 119 | "服装款式细节": "不对称", 120 | "成分含量": "81%(含)-90%(含)", 121 | "袖型": "常规", 122 | "风格": "通勤", 123 | "适用年龄": "18-24周岁", 124 | "服装版型": "宽松", 125 | "大码女装分类": "其它特大款式", 126 | "衣长": "中长款", 127 | "袖长": "长袖", 128 | "穿着方式": "开衫", 129 | } 130 | word := "卫衣女宽松拉链外套开衫韩版" 131 | var segmenter Segmenter 132 | segmenter.LoadDictionary("dictionary.txt") 133 | segments := segmenter.InternalSegment([]byte(word),true) 134 | for _,s := range segments{ 135 | fmt.Println(s.token.Text()) 136 | } 137 | for _, value := range probMap { 138 | for _, s := range segments { 139 | if s.Token().Text() == value { 140 | fmt.Println("=",value) 141 | } 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/README.md: -------------------------------------------------------------------------------- 1 | # cedar-go [![GoDoc](https://godoc.org/github.com/adamzy/cedar-go?status.svg)](https://godoc.org/github.com/adamzy/cedar-go) 2 | 3 | Package `cedar-go` implementes double-array trie. 4 | 5 | It is a [Golang](https://golang.org/) port of [cedar](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar) which is written in C++ by Naoki Yoshinaga. `cedar-go` currently implements the `reduced` verion of cedar. 6 | This package is not thread safe if there is one goroutine doing insertions or deletions. 7 | 8 | ## Install 9 | ``` 10 | go get github.com/adamzy/cedar-go 11 | ``` 12 | 13 | ## Usage 14 | ```go 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/adamzy/cedar-go" 21 | ) 22 | 23 | func main() { 24 | // create a new cedar trie. 25 | trie := cedar.New() 26 | 27 | // a helper function to print the id-key-value triple given trie node id 28 | printIdKeyValue := func(id int) { 29 | // the key of node `id`. 30 | key, _ := trie.Key(id) 31 | // the value of node `id`. 32 | value, _ := trie.Value(id) 33 | fmt.Printf("%d\t%s:%v\n", id, key, value) 34 | } 35 | 36 | // Insert key-value pairs. 37 | // The order of insertion is not important. 38 | trie.Insert([]byte("How many"), 0) 39 | trie.Insert([]byte("How many loved"), 1) 40 | trie.Insert([]byte("How many loved your moments"), 2) 41 | trie.Insert([]byte("How many loved your moments of glad grace"), 3) 42 | trie.Insert([]byte("姑苏"), 4) 43 | trie.Insert([]byte("姑苏城外"), 5) 44 | trie.Insert([]byte("姑苏城外寒山寺"), 6) 45 | 46 | // Get the associated value of a key directly. 47 | value, _ := trie.Get([]byte("How many loved your moments of glad grace")) 48 | fmt.Println(value) 49 | 50 | // Or, jump to the node first, 51 | id, _ := trie.Jump([]byte("How many loved your moments"), 0) 52 | // then get the key and the value 53 | printIdKeyValue(id) 54 | 55 | fmt.Println("\nPrefixMatch\nid\tkey:value") 56 | for _, id := range trie.PrefixMatch([]byte("How many loved your moments of glad grace"), 0) { 57 | printIdKeyValue(id) 58 | } 59 | 60 | fmt.Println("\nPrefixPredict\nid\tkey:value") 61 | for _, id := range trie.PrefixPredict([]byte("姑苏"), 0) { 62 | printIdKeyValue(id) 63 | } 64 | } 65 | ``` 66 | will produce 67 | ``` 68 | 3 69 | 281 How many loved your moments:2 70 | 71 | PrefixMatch 72 | id key:value 73 | 262 How many:0 74 | 268 How many loved:1 75 | 281 How many loved your moments:2 76 | 296 How many loved your moments of glad grace:3 77 | 78 | PrefixPredict 79 | id key:value 80 | 303 姑苏:4 81 | 309 姑苏城外:5 82 | 318 姑苏城外寒山寺:6 83 | ``` 84 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/api.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | // Status reports the following statistics of the cedar: 4 | // keys: number of keys that are in the cedar, 5 | // nodes: number of trie nodes (slots in the base array) has been taken, 6 | // size: the size of the base array used by the cedar, 7 | // capacity: the capicity of the base array used by the cedar. 8 | func (da *Cedar) Status() (keys, nodes, size, capacity int) { 9 | for i := 0; i < da.Size; i++ { 10 | n := da.Array[i] 11 | if n.Check >= 0 { 12 | nodes++ 13 | if n.Value >= 0 { 14 | keys++ 15 | } 16 | } 17 | } 18 | return keys, nodes, da.Size, da.Capacity 19 | } 20 | 21 | // Jump travels from a node `from` to another node `to` by following the path `path`. 22 | // For example, if the following keys were inserted: 23 | // id key 24 | // 19 abc 25 | // 23 ab 26 | // 37 abcd 27 | // then 28 | // Jump([]byte("ab"), 0) = 23, nil // reach "ab" from root 29 | // Jump([]byte("c"), 23) = 19, nil // reach "abc" from "ab" 30 | // Jump([]byte("cd"), 23) = 37, nil // reach "abcd" from "ab" 31 | func (da *Cedar) Jump(path []byte, from int) (to int, err error) { 32 | for _, b := range path { 33 | if da.Array[from].Value >= 0 { 34 | return from, ErrNoPath 35 | } 36 | to = da.Array[from].base() ^ int(b) 37 | if da.Array[to].Check != from { 38 | return from, ErrNoPath 39 | } 40 | from = to 41 | } 42 | return to, nil 43 | } 44 | 45 | // Key returns the key of the node with the given `id`. 46 | // It will return ErrNoPath, if the node does not exist. 47 | func (da *Cedar) Key(id int) (key []byte, err error) { 48 | for id > 0 { 49 | from := da.Array[id].Check 50 | if from < 0 { 51 | return nil, ErrNoPath 52 | } 53 | if char := byte(da.Array[from].base() ^ id); char != 0 { 54 | key = append(key, char) 55 | } 56 | id = from 57 | } 58 | if id != 0 || len(key) == 0 { 59 | return nil, ErrInvalidKey 60 | } 61 | for i := 0; i < len(key)/2; i++ { 62 | key[i], key[len(key)-i-1] = key[len(key)-i-1], key[i] 63 | } 64 | return key, nil 65 | } 66 | 67 | // Value returns the value of the node with the given `id`. 68 | // It will return ErrNoValue, if the node does not have a value. 69 | func (da *Cedar) Value(id int) (value int, err error) { 70 | value = da.Array[id].Value 71 | if value >= 0 { 72 | return value, nil 73 | } 74 | to := da.Array[id].base() 75 | if da.Array[to].Check == id && da.Array[to].Value >= 0 { 76 | return da.Array[to].Value, nil 77 | } 78 | return 0, ErrNoValue 79 | } 80 | 81 | // Insert adds a key-value pair into the cedar. 82 | // It will return ErrInvalidValue, if value < 0 or >= ValueLimit. 83 | func (da *Cedar) Insert(key []byte, value int) error { 84 | if value < 0 || value >= ValueLimit { 85 | return ErrInvalidValue 86 | } 87 | p := da.get(key, 0, 0) 88 | *p = value 89 | return nil 90 | } 91 | 92 | // Update increases the value associated with the `key`. 93 | // The `key` will be inserted if it is not in the cedar. 94 | // It will return ErrInvalidValue, if the updated value < 0 or >= ValueLimit. 95 | func (da *Cedar) Update(key []byte, value int) error { 96 | p := da.get(key, 0, 0) 97 | 98 | // key was not inserted 99 | if *p == ValueLimit { 100 | *p = value 101 | return nil 102 | } 103 | 104 | // key was inserted before 105 | if *p+value < 0 || *p+value >= ValueLimit { 106 | return ErrInvalidValue 107 | } 108 | *p += value 109 | return nil 110 | } 111 | 112 | // Delete removes a key-value pair from the cedar. 113 | // It will return ErrNoPath, if the key has not been added. 114 | func (da *Cedar) Delete(key []byte) error { 115 | // if the path does not exist, or the end is not a leaf, nothing to delete 116 | to, err := da.Jump(key, 0) 117 | if err != nil { 118 | return ErrNoPath 119 | } 120 | 121 | if da.Array[to].Value < 0 { 122 | base := da.Array[to].base() 123 | if da.Array[base].Check == to { 124 | to = base 125 | } 126 | } 127 | 128 | for to > 0 { 129 | from := da.Array[to].Check 130 | base := da.Array[from].base() 131 | label := byte(to ^ base) 132 | 133 | // if `to` has sibling, remove `to` from the sibling list, then stop 134 | if da.Ninfos[to].Sibling != 0 || da.Ninfos[from].Child != label { 135 | // delete the label from the child ring first 136 | da.popSibling(from, base, label) 137 | // then release the current node `to` to the empty node ring 138 | da.pushEnode(to) 139 | break 140 | } 141 | // otherwise, just release the current node `to` to the empty node ring 142 | da.pushEnode(to) 143 | // then check its parent node 144 | to = from 145 | } 146 | return nil 147 | } 148 | 149 | // Get returns the value associated with the given `key`. 150 | // It is equivalent to 151 | // id, err1 = Jump(key) 152 | // value, err2 = Value(id) 153 | // Thus, it may return ErrNoPath or ErrNoValue, 154 | func (da *Cedar) Get(key []byte) (value int, err error) { 155 | to, err := da.Jump(key, 0) 156 | if err != nil { 157 | return 0, err 158 | } 159 | return da.Value(to) 160 | } 161 | 162 | // PrefixMatch returns a list of at most `num` nodes which match the prefix of the key. 163 | // If `num` is 0, it returns all matches. 164 | // For example, if the following keys were inserted: 165 | // id key 166 | // 19 abc 167 | // 23 ab 168 | // 37 abcd 169 | // then 170 | // PrefixMatch([]byte("abc"), 1) = [ 23 ] // match ["ab"] 171 | // PrefixMatch([]byte("abcd"), 0) = [ 23, 19, 37] // match ["ab", "abc", "abcd"] 172 | func (da *Cedar) PrefixMatch(key []byte, num int) (ids []int) { 173 | for from, i := 0, 0; i < len(key); i++ { 174 | to, err := da.Jump(key[i:i+1], from) 175 | if err != nil { 176 | break 177 | } 178 | if _, err := da.Value(to); err == nil { 179 | ids = append(ids, to) 180 | num-- 181 | if num == 0 { 182 | return 183 | } 184 | } 185 | from = to 186 | } 187 | return 188 | } 189 | 190 | // PrefixPredict returns a list of at most `num` nodes which has the key as their prefix. 191 | // These nodes are ordered by their keys. 192 | // If `num` is 0, it returns all matches. 193 | // For example, if the following keys were inserted: 194 | // id key 195 | // 19 abc 196 | // 23 ab 197 | // 37 abcd 198 | // then 199 | // PrefixPredict([]byte("ab"), 2) = [ 23, 19 ] // predict ["ab", "abc"] 200 | // PrefixPredict([]byte("ab"), 0) = [ 23, 19, 37 ] // predict ["ab", "abc", "abcd"] 201 | func (da *Cedar) PrefixPredict(key []byte, num int) (ids []int) { 202 | root, err := da.Jump(key, 0) 203 | if err != nil { 204 | return 205 | } 206 | for from, err := da.begin(root); err == nil; from, err = da.next(from, root) { 207 | ids = append(ids, from) 208 | num-- 209 | if num == 0 { 210 | return 211 | } 212 | } 213 | return 214 | } 215 | 216 | func (da *Cedar) begin(from int) (to int, err error) { 217 | for c := da.Ninfos[from].Child; c != 0; { 218 | to = da.Array[from].base() ^ int(c) 219 | c = da.Ninfos[to].Child 220 | from = to 221 | } 222 | if da.Array[from].base() > 0 { 223 | return da.Array[from].base(), nil 224 | } 225 | return from, nil 226 | } 227 | 228 | func (da *Cedar) next(from int, root int) (to int, err error) { 229 | c := da.Ninfos[from].Sibling 230 | for c == 0 && from != root && da.Array[from].Check >= 0 { 231 | from = da.Array[from].Check 232 | c = da.Ninfos[from].Sibling 233 | } 234 | if from == root { 235 | return 0, ErrNoPath 236 | } 237 | from = da.Array[da.Array[from].Check].base() ^ int(c) 238 | return da.begin(from) 239 | } 240 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/cedar.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | const ValueLimit = int(^uint(0) >> 1) 4 | 5 | type node struct { 6 | Value int 7 | Check int 8 | } 9 | 10 | func (n *node) base() int { return -(n.Value + 1) } 11 | 12 | type ninfo struct { 13 | Sibling, Child byte 14 | } 15 | 16 | type block struct { 17 | Prev, Next, Num, Reject, Trial, Ehead int 18 | } 19 | 20 | func (b *block) init() { 21 | b.Num = 256 22 | b.Reject = 257 23 | } 24 | 25 | type Cedar struct { 26 | *cedar 27 | } 28 | 29 | type cedar struct { 30 | Array []node 31 | Ninfos []ninfo 32 | Blocks []block 33 | Reject [257]int 34 | BheadF int 35 | BheadC int 36 | BheadO int 37 | Capacity int 38 | Size int 39 | Ordered bool 40 | MaxTrial int 41 | } 42 | 43 | func New() *Cedar { 44 | da := cedar{ 45 | Array: make([]node, 256), 46 | Ninfos: make([]ninfo, 256), 47 | Blocks: make([]block, 1), 48 | Capacity: 256, 49 | Size: 256, 50 | Ordered: true, 51 | MaxTrial: 1, 52 | } 53 | 54 | da.Array[0] = node{-2, 0} 55 | for i := 1; i < 256; i++ { 56 | da.Array[i] = node{-(i - 1), -(i + 1)} 57 | } 58 | da.Array[1].Value = -255 59 | da.Array[255].Check = -1 60 | 61 | da.Blocks[0].Ehead = 1 62 | da.Blocks[0].init() 63 | 64 | for i := 0; i <= 256; i++ { 65 | da.Reject[i] = i + 1 66 | } 67 | 68 | return &Cedar{&da} 69 | } 70 | 71 | // Get value by key, insert the key if not exist 72 | func (da *cedar) get(key []byte, from, pos int) *int { 73 | for ; pos < len(key); pos++ { 74 | if value := da.Array[from].Value; value >= 0 && value != ValueLimit { 75 | to := da.follow(from, 0) 76 | da.Array[to].Value = value 77 | } 78 | from = da.follow(from, key[pos]) 79 | } 80 | to := from 81 | if da.Array[from].Value < 0 { 82 | to = da.follow(from, 0) 83 | } 84 | return &da.Array[to].Value 85 | } 86 | 87 | func (da *cedar) follow(from int, label byte) int { 88 | base := da.Array[from].base() 89 | to := base ^ int(label) 90 | if base < 0 || da.Array[to].Check < 0 { 91 | hasChild := false 92 | if base >= 0 { 93 | hasChild = (da.Array[base^int(da.Ninfos[from].Child)].Check == from) 94 | } 95 | to = da.popEnode(base, label, from) 96 | da.pushSibling(from, to^int(label), label, hasChild) 97 | } else if da.Array[to].Check != from { 98 | to = da.resolve(from, base, label) 99 | } else if da.Array[to].Check == from { 100 | } else { 101 | panic("cedar: internal error, should not be here") 102 | } 103 | return to 104 | } 105 | 106 | func (da *cedar) popBlock(bi int, head_in *int, last bool) { 107 | if last { 108 | *head_in = 0 109 | } else { 110 | b := &da.Blocks[bi] 111 | da.Blocks[b.Prev].Next = b.Next 112 | da.Blocks[b.Next].Prev = b.Prev 113 | if bi == *head_in { 114 | *head_in = b.Next 115 | } 116 | } 117 | } 118 | 119 | func (da *cedar) pushBlock(bi int, head_out *int, empty bool) { 120 | b := &da.Blocks[bi] 121 | if empty { 122 | *head_out, b.Prev, b.Next = bi, bi, bi 123 | } else { 124 | tail_out := &da.Blocks[*head_out].Prev 125 | b.Prev = *tail_out 126 | b.Next = *head_out 127 | *head_out, *tail_out, da.Blocks[*tail_out].Next = bi, bi, bi 128 | } 129 | } 130 | 131 | func (da *cedar) addBlock() int { 132 | if da.Size == da.Capacity { 133 | da.Capacity *= 2 134 | 135 | oldArray := da.Array 136 | da.Array = make([]node, da.Capacity) 137 | copy(da.Array, oldArray) 138 | 139 | oldNinfo := da.Ninfos 140 | da.Ninfos = make([]ninfo, da.Capacity) 141 | copy(da.Ninfos, oldNinfo) 142 | 143 | oldBlock := da.Blocks 144 | da.Blocks = make([]block, da.Capacity>>8) 145 | copy(da.Blocks, oldBlock) 146 | } 147 | 148 | da.Blocks[da.Size>>8].init() 149 | da.Blocks[da.Size>>8].Ehead = da.Size 150 | 151 | da.Array[da.Size] = node{-(da.Size + 255), -(da.Size + 1)} 152 | for i := da.Size + 1; i < da.Size+255; i++ { 153 | da.Array[i] = node{-(i - 1), -(i + 1)} 154 | } 155 | da.Array[da.Size+255] = node{-(da.Size + 254), -da.Size} 156 | 157 | da.pushBlock(da.Size>>8, &da.BheadO, da.BheadO == 0) 158 | da.Size += 256 159 | return da.Size>>8 - 1 160 | } 161 | 162 | func (da *cedar) transferBlock(bi int, head_in, head_out *int) { 163 | da.popBlock(bi, head_in, bi == da.Blocks[bi].Next) 164 | da.pushBlock(bi, head_out, *head_out == 0 && da.Blocks[bi].Num != 0) 165 | } 166 | 167 | func (da *cedar) popEnode(base int, label byte, from int) int { 168 | e := base ^ int(label) 169 | if base < 0 { 170 | e = da.findPlace() 171 | } 172 | bi := e >> 8 173 | n := &da.Array[e] 174 | b := &da.Blocks[bi] 175 | b.Num-- 176 | if b.Num == 0 { 177 | if bi != 0 { 178 | da.transferBlock(bi, &da.BheadC, &da.BheadF) 179 | } 180 | } else { 181 | da.Array[-n.Value].Check = n.Check 182 | da.Array[-n.Check].Value = n.Value 183 | if e == b.Ehead { 184 | b.Ehead = -n.Check 185 | } 186 | if bi != 0 && b.Num == 1 && b.Trial != da.MaxTrial { 187 | da.transferBlock(bi, &da.BheadO, &da.BheadC) 188 | } 189 | } 190 | n.Value = ValueLimit 191 | n.Check = from 192 | if base < 0 { 193 | da.Array[from].Value = -(e ^ int(label)) - 1 194 | } 195 | return e 196 | } 197 | 198 | func (da *cedar) pushEnode(e int) { 199 | bi := e >> 8 200 | b := &da.Blocks[bi] 201 | b.Num++ 202 | if b.Num == 1 { 203 | b.Ehead = e 204 | da.Array[e] = node{-e, -e} 205 | if bi != 0 { 206 | da.transferBlock(bi, &da.BheadF, &da.BheadC) 207 | } 208 | } else { 209 | prev := b.Ehead 210 | next := -da.Array[prev].Check 211 | da.Array[e] = node{-prev, -next} 212 | da.Array[prev].Check = -e 213 | da.Array[next].Value = -e 214 | if b.Num == 2 || b.Trial == da.MaxTrial { 215 | if bi != 0 { 216 | da.transferBlock(bi, &da.BheadC, &da.BheadO) 217 | } 218 | } 219 | b.Trial = 0 220 | } 221 | if b.Reject < da.Reject[b.Num] { 222 | b.Reject = da.Reject[b.Num] 223 | } 224 | da.Ninfos[e] = ninfo{} 225 | } 226 | 227 | // hasChild: wherether the `from` node has children 228 | func (da *cedar) pushSibling(from, base int, label byte, hasChild bool) { 229 | c := &da.Ninfos[from].Child 230 | keepOrder := *c == 0 231 | if da.Ordered { 232 | keepOrder = label > *c 233 | } 234 | if hasChild && keepOrder { 235 | c = &da.Ninfos[base^int(*c)].Sibling 236 | for da.Ordered && *c != 0 && *c < label { 237 | c = &da.Ninfos[base^int(*c)].Sibling 238 | } 239 | } 240 | da.Ninfos[base^int(label)].Sibling = *c 241 | *c = label 242 | } 243 | 244 | func (da *cedar) popSibling(from, base int, label byte) { 245 | c := &da.Ninfos[from].Child 246 | for *c != label { 247 | c = &da.Ninfos[base^int(*c)].Sibling 248 | } 249 | *c = da.Ninfos[base^int(*c)].Sibling 250 | } 251 | 252 | func (da *cedar) consult(base_n, base_p int, c_n, c_p byte) bool { 253 | c_n = da.Ninfos[base_n^int(c_n)].Sibling 254 | c_p = da.Ninfos[base_p^int(c_p)].Sibling 255 | for c_n != 0 && c_p != 0 { 256 | c_n = da.Ninfos[base_n^int(c_n)].Sibling 257 | c_p = da.Ninfos[base_p^int(c_p)].Sibling 258 | } 259 | return c_p != 0 260 | } 261 | 262 | func (da *cedar) setChild(base int, c byte, label byte, flag bool) []byte { 263 | child := make([]byte, 0, 257) 264 | if c == 0 { 265 | child = append(child, c) 266 | c = da.Ninfos[base^int(c)].Sibling 267 | } 268 | if da.Ordered { 269 | for c != 0 && c <= label { 270 | child = append(child, c) 271 | c = da.Ninfos[base^int(c)].Sibling 272 | } 273 | } 274 | if flag { 275 | child = append(child, label) 276 | } 277 | for c != 0 { 278 | child = append(child, c) 279 | c = da.Ninfos[base^int(c)].Sibling 280 | } 281 | return child 282 | } 283 | 284 | func (da *cedar) findPlace() int { 285 | if da.BheadC != 0 { 286 | return da.Blocks[da.BheadC].Ehead 287 | } 288 | if da.BheadO != 0 { 289 | return da.Blocks[da.BheadO].Ehead 290 | } 291 | return da.addBlock() << 8 292 | } 293 | 294 | func (da *cedar) findPlaces(child []byte) int { 295 | bi := da.BheadO 296 | if bi != 0 { 297 | bz := da.Blocks[da.BheadO].Prev 298 | nc := len(child) 299 | for { 300 | b := &da.Blocks[bi] 301 | if b.Num >= nc && nc < b.Reject { 302 | for e := b.Ehead; ; { 303 | base := e ^ int(child[0]) 304 | for i := 0; da.Array[base^int(child[i])].Check < 0; i++ { 305 | if i == len(child)-1 { 306 | b.Ehead = e 307 | return e 308 | } 309 | } 310 | e = -da.Array[e].Check 311 | if e == b.Ehead { 312 | break 313 | } 314 | } 315 | } 316 | b.Reject = nc 317 | if b.Reject < da.Reject[b.Num] { 318 | da.Reject[b.Num] = b.Reject 319 | } 320 | bi_ := b.Next 321 | b.Trial++ 322 | if b.Trial == da.MaxTrial { 323 | da.transferBlock(bi, &da.BheadO, &da.BheadC) 324 | } 325 | if bi == bz { 326 | break 327 | } 328 | bi = bi_ 329 | } 330 | } 331 | return da.addBlock() << 8 332 | } 333 | 334 | func (da *cedar) resolve(from_n, base_n int, label_n byte) int { 335 | to_pn := base_n ^ int(label_n) 336 | from_p := da.Array[to_pn].Check 337 | base_p := da.Array[from_p].base() 338 | 339 | flag := da.consult(base_n, base_p, da.Ninfos[from_n].Child, da.Ninfos[from_p].Child) 340 | var children []byte 341 | if flag { 342 | children = da.setChild(base_n, da.Ninfos[from_n].Child, label_n, true) 343 | } else { 344 | children = da.setChild(base_p, da.Ninfos[from_p].Child, 255, false) 345 | } 346 | var base int 347 | if len(children) == 1 { 348 | base = da.findPlace() 349 | } else { 350 | base = da.findPlaces(children) 351 | } 352 | base ^= int(children[0]) 353 | var from int 354 | var base_ int 355 | if flag { 356 | from = from_n 357 | base_ = base_n 358 | } else { 359 | from = from_p 360 | base_ = base_p 361 | } 362 | if flag && children[0] == label_n { 363 | da.Ninfos[from].Child = label_n 364 | } 365 | da.Array[from].Value = -base - 1 366 | for i := 0; i < len(children); i++ { 367 | to := da.popEnode(base, children[i], from) 368 | to_ := base_ ^ int(children[i]) 369 | if i == len(children)-1 { 370 | da.Ninfos[to].Sibling = 0 371 | } else { 372 | da.Ninfos[to].Sibling = children[i+1] 373 | } 374 | if flag && to_ == to_pn { // new node has no child 375 | continue 376 | } 377 | n := &da.Array[to] 378 | n_ := &da.Array[to_] 379 | n.Value = n_.Value 380 | if n.Value < 0 && children[i] != 0 { 381 | // this node has children, fix their check 382 | c := da.Ninfos[to_].Child 383 | da.Ninfos[to].Child = c 384 | da.Array[n.base()^int(c)].Check = to 385 | c = da.Ninfos[n.base()^int(c)].Sibling 386 | for c != 0 { 387 | da.Array[n.base()^int(c)].Check = to 388 | c = da.Ninfos[n.base()^int(c)].Sibling 389 | } 390 | } 391 | if !flag && to_ == from_n { // parent node moved 392 | from_n = to 393 | } 394 | if !flag && to_ == to_pn { 395 | da.pushSibling(from_n, to_pn^int(label_n), label_n, true) 396 | da.Ninfos[to_].Child = 0 397 | n_.Value = ValueLimit 398 | n_.Check = from_n 399 | } else { 400 | da.pushEnode(to_) 401 | } 402 | } 403 | if flag { 404 | return base ^ int(label_n) 405 | } 406 | return to_pn 407 | } 408 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/doc.go: -------------------------------------------------------------------------------- 1 | // Package cedar-go implements double-array trie. 2 | // 3 | // It is a golang port of cedar (http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar) which is written in C++ by Naoki Yoshinaga. 4 | // Currently cedar-go implements the `reduced` verion of cedar. 5 | // This package is not thread safe if there is one goroutine doing 6 | // insertions or deletions. 7 | // 8 | // Note 9 | // 10 | // key must be `[]byte` without zero items, 11 | // while value must be integer in the range [0, 2<<63-2] or [0, 2<<31-2] depends on the platform. 12 | package cedar 13 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/errors.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrInvalidDataType = errors.New("cedar: invalid datatype") 7 | ErrInvalidValue = errors.New("cedar: invalid value") 8 | ErrInvalidKey = errors.New("cedar: invalid key") 9 | ErrNoPath = errors.New("cedar: no path") 10 | ErrNoValue = errors.New("cedar: no value") 11 | ) 12 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/io.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | import ( 4 | "bufio" 5 | "encoding/gob" 6 | "encoding/json" 7 | "io" 8 | "os" 9 | ) 10 | 11 | // Save saves the cedar to an io.Writer, 12 | // where dataType is either "json" or "gob". 13 | func (da *Cedar) Save(out io.Writer, dataType string) error { 14 | switch dataType { 15 | case "gob", "GOB": 16 | dataEecoder := gob.NewEncoder(out) 17 | return dataEecoder.Encode(da.cedar) 18 | case "json", "JSON": 19 | dataEecoder := json.NewEncoder(out) 20 | return dataEecoder.Encode(da.cedar) 21 | } 22 | return ErrInvalidDataType 23 | } 24 | 25 | // SaveToFile saves the cedar to a file, 26 | // where dataType is either "json" or "gob". 27 | func (da *Cedar) SaveToFile(fileName string, dataType string) error { 28 | file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, 0666) 29 | if err != nil { 30 | return err 31 | } 32 | defer file.Close() 33 | out := bufio.NewWriter(file) 34 | defer out.Flush() 35 | da.Save(out, dataType) 36 | return nil 37 | } 38 | 39 | // Load loads the cedar from an io.Writer, 40 | // where dataType is either "json" or "gob". 41 | func (da *Cedar) Load(in io.Reader, dataType string) error { 42 | switch dataType { 43 | case "gob", "GOB": 44 | dataDecoder := gob.NewDecoder(in) 45 | return dataDecoder.Decode(da.cedar) 46 | case "json", "JSON": 47 | dataDecoder := json.NewDecoder(in) 48 | return dataDecoder.Decode(da.cedar) 49 | } 50 | return ErrInvalidDataType 51 | } 52 | 53 | // LoadFromFile loads the cedar from a file, 54 | // where dataType is either "json" or "gob". 55 | func (da *Cedar) LoadFromFile(fileName string, dataType string) error { 56 | file, err := os.OpenFile(fileName, os.O_RDONLY, 0600) 57 | defer file.Close() 58 | if err != nil { 59 | return err 60 | } 61 | in := bufio.NewReader(file) 62 | return da.Load(in, dataType) 63 | } 64 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/README.md: -------------------------------------------------------------------------------- 1 | sego 2 | ==== 3 | 4 | Go中文分词 5 | 6 | 词典用前缀树实现, 7 | 分词器算法为基于词频的最短路径加动态规划。 8 | 9 | 支持普通和搜索引擎两种分词模式,支持用户词典、词性标注,可运行JSON RPC服务。 10 | 11 | 分词速度单线程2.5MB/s,goroutines并发27MB/s, 处理器32核Xeon。 12 | 13 | # 安装/更新 14 | 15 | ``` 16 | go get -u github.com/huichen/sego 17 | ``` 18 | 19 | # 使用 20 | 21 | 22 | ```go 23 | package main 24 | 25 | import ( 26 | "fmt" 27 | "github.com/huichen/sego" 28 | ) 29 | 30 | func main() { 31 | // 载入词典 32 | var segmenter sego.Segmenter 33 | segmenter.LoadDictionary("github.com/huichen/sego/data/dictionary.txt") 34 | 35 | // 分词 36 | text := []byte("中华人民共和国中央人民政府") 37 | segments := segmenter.Segment(text) 38 | 39 | // 处理分词结果 40 | // 支持普通模式和搜索模式两种分词,见代码中SegmentsToString函数的注释。 41 | fmt.Println(sego.SegmentsToString(segments, false)) 42 | } 43 | ``` 44 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/dictionary.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import "github.com/adamzy/cedar-go" 4 | 5 | // Dictionary结构体实现了一个字串前缀树,一个分词可能出现在叶子节点也有可能出现在非叶节点 6 | type Dictionary struct { 7 | trie *cedar.Cedar // Cedar 前缀树 8 | maxTokenLength int // 词典中最长的分词 9 | tokens []Token // 词典中所有的分词,方便遍历 10 | totalFrequency int64 // 词典中所有分词的频率之和 11 | } 12 | 13 | func NewDictionary() *Dictionary { 14 | return &Dictionary{trie: cedar.New()} 15 | } 16 | 17 | // 词典中最长的分词 18 | func (dict *Dictionary) MaxTokenLength() int { 19 | return dict.maxTokenLength 20 | } 21 | 22 | // 词典中分词数目 23 | func (dict *Dictionary) NumTokens() int { 24 | return len(dict.tokens) 25 | } 26 | 27 | // 词典中所有分词的频率之和 28 | func (dict *Dictionary) TotalFrequency() int64 { 29 | return dict.totalFrequency 30 | } 31 | 32 | // 向词典中加入一个分词 33 | func (dict *Dictionary) addToken(token Token) { 34 | bytes := textSliceToBytes(token.text) 35 | _, err := dict.trie.Get(bytes) 36 | if err == nil { 37 | return 38 | } 39 | 40 | dict.trie.Insert(bytes, dict.NumTokens()) 41 | dict.tokens = append(dict.tokens, token) 42 | dict.totalFrequency += int64(token.frequency) 43 | if len(token.text) > dict.maxTokenLength { 44 | dict.maxTokenLength = len(token.text) 45 | } 46 | } 47 | 48 | // 在词典中查找和字元组words可以前缀匹配的所有分词 49 | // 返回值为找到的分词数 50 | func (dict *Dictionary) lookupTokens(words []Text, tokens []*Token) (numOfTokens int) { 51 | var id, value int 52 | var err error 53 | for _, word := range words { 54 | id, err = dict.trie.Jump(word, id) 55 | if err != nil { 56 | break 57 | } 58 | value, err = dict.trie.Value(id) 59 | if err == nil { 60 | tokens[numOfTokens] = &dict.tokens[value] 61 | numOfTokens++ 62 | } 63 | } 64 | return 65 | } 66 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2013 Hui Chen 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/segment.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 文本中的一个分词 4 | type Segment struct { 5 | // 分词在文本中的起始字节位置 6 | start int 7 | 8 | // 分词在文本中的结束字节位置(不包括该位置) 9 | end int 10 | 11 | // 分词信息 12 | token *Token 13 | } 14 | 15 | // 返回分词在文本中的起始字节位置 16 | func (s *Segment) Start() int { 17 | return s.start 18 | } 19 | 20 | // 返回分词在文本中的结束字节位置(不包括该位置) 21 | func (s *Segment) End() int { 22 | return s.end 23 | } 24 | 25 | // 返回分词信息 26 | func (s *Segment) Token() *Token { 27 | return s.token 28 | } 29 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/segmenter.go: -------------------------------------------------------------------------------- 1 | //Go中文分词 2 | package sego 3 | 4 | import ( 5 | "bufio" 6 | "fmt" 7 | "log" 8 | "math" 9 | "os" 10 | "strconv" 11 | "strings" 12 | "unicode" 13 | "unicode/utf8" 14 | ) 15 | 16 | const ( 17 | minTokenFrequency = 2 // 仅从字典文件中读取大于等于此频率的分词 18 | ) 19 | 20 | // 分词器结构体 21 | type Segmenter struct { 22 | dict *Dictionary 23 | } 24 | 25 | // 该结构体用于记录Viterbi算法中某字元处的向前分词跳转信息 26 | type jumper struct { 27 | minDistance float32 28 | token *Token 29 | } 30 | 31 | // 返回分词器使用的词典 32 | func (seg *Segmenter) Dictionary() *Dictionary { 33 | return seg.dict 34 | } 35 | 36 | // 从文件中载入词典 37 | // 38 | // 可以载入多个词典文件,文件名用","分隔,排在前面的词典优先载入分词,比如 39 | // "用户词典.txt,通用词典.txt" 40 | // 当一个分词既出现在用户词典也出现在通用词典中,则优先使用用户词典。 41 | // 42 | // 词典的格式为(每个分词一行): 43 | // 分词文本 频率 词性 44 | func (seg *Segmenter) LoadDictionary(files string) { 45 | seg.dict = NewDictionary() 46 | for _, file := range strings.Split(files, ",") { 47 | log.Printf("载入sego词典 %s", file) 48 | dictFile, err := os.Open(file) 49 | defer dictFile.Close() 50 | if err != nil { 51 | log.Fatalf("无法载入字典文件 \"%s\" \n", file) 52 | } 53 | 54 | reader := bufio.NewReader(dictFile) 55 | var text string 56 | var freqText string 57 | var frequency int 58 | var pos string 59 | 60 | // 逐行读入分词 61 | for { 62 | size, _ := fmt.Fscanln(reader, &text, &freqText, &pos) 63 | 64 | if size == 0 { 65 | // 文件结束 66 | break 67 | } else if size < 2 { 68 | // 无效行 69 | continue 70 | } else if size == 2 { 71 | // 没有词性标注时设为空字符串 72 | pos = "" 73 | } 74 | 75 | // 解析词频 76 | var err error 77 | frequency, err = strconv.Atoi(freqText) 78 | if err != nil { 79 | continue 80 | } 81 | 82 | // 过滤频率太小的词 83 | if frequency < minTokenFrequency { 84 | continue 85 | } 86 | 87 | // 将分词添加到字典中 88 | words := splitTextToWords([]byte(text)) 89 | token := Token{text: words, frequency: frequency, pos: pos} 90 | seg.dict.addToken(token) 91 | } 92 | } 93 | 94 | // 计算每个分词的路径值,路径值含义见Token结构体的注释 95 | logTotalFrequency := float32(math.Log2(float64(seg.dict.totalFrequency))) 96 | for i := range seg.dict.tokens { 97 | token := &seg.dict.tokens[i] 98 | token.distance = logTotalFrequency - float32(math.Log2(float64(token.frequency))) 99 | } 100 | 101 | // 对每个分词进行细致划分,用于搜索引擎模式,该模式用法见Token结构体的注释。 102 | for i := range seg.dict.tokens { 103 | token := &seg.dict.tokens[i] 104 | segments := seg.segmentWords(token.text, true) 105 | 106 | // 计算需要添加的子分词数目 107 | numTokensToAdd := 0 108 | for iToken := 0; iToken < len(segments); iToken++ { 109 | if len(segments[iToken].token.text) > 1 { 110 | // 略去字元长度为一的分词 111 | // TODO: 这值得进一步推敲,特别是当字典中有英文复合词的时候 112 | numTokensToAdd++ 113 | } 114 | } 115 | token.segments = make([]*Segment, numTokensToAdd) 116 | 117 | // 添加子分词 118 | iSegmentsToAdd := 0 119 | for iToken := 0; iToken < len(segments); iToken++ { 120 | if len(segments[iToken].token.text) > 1 { 121 | token.segments[iSegmentsToAdd] = &segments[iToken] 122 | iSegmentsToAdd++ 123 | } 124 | } 125 | } 126 | 127 | log.Println("sego词典载入完毕") 128 | } 129 | 130 | // 对文本分词 131 | // 132 | // 输入参数: 133 | // bytes UTF8文本的字节数组 134 | // 135 | // 输出: 136 | // []Segment 划分的分词 137 | func (seg *Segmenter) Segment(bytes []byte) []Segment { 138 | return seg.internalSegment(bytes, false) 139 | } 140 | 141 | func (seg *Segmenter) internalSegment(bytes []byte, searchMode bool) []Segment { 142 | // 处理特殊情况 143 | if len(bytes) == 0 { 144 | return []Segment{} 145 | } 146 | 147 | // 划分字元 148 | text := splitTextToWords(bytes) 149 | 150 | return seg.segmentWords(text, searchMode) 151 | } 152 | 153 | func (seg *Segmenter) segmentWords(text []Text, searchMode bool) []Segment { 154 | // 搜索模式下该分词已无继续划分可能的情况 155 | if searchMode && len(text) == 1 { 156 | return []Segment{} 157 | } 158 | 159 | // jumpers定义了每个字元处的向前跳转信息,包括这个跳转对应的分词, 160 | // 以及从文本段开始到该字元的最短路径值 161 | jumpers := make([]jumper, len(text)) 162 | 163 | tokens := make([]*Token, seg.dict.maxTokenLength) 164 | for current := 0; current < len(text); current++ { 165 | // 找到前一个字元处的最短路径,以便计算后续路径值 166 | var baseDistance float32 167 | if current == 0 { 168 | // 当本字元在文本首部时,基础距离应该是零 169 | baseDistance = 0 170 | } else { 171 | baseDistance = jumpers[current-1].minDistance 172 | } 173 | 174 | // 寻找所有以当前字元开头的分词 175 | numTokens := seg.dict.lookupTokens( 176 | text[current:minInt(current+seg.dict.maxTokenLength, len(text))], tokens) 177 | 178 | // 对所有可能的分词,更新分词结束字元处的跳转信息 179 | for iToken := 0; iToken < numTokens; iToken++ { 180 | location := current + len(tokens[iToken].text) - 1 181 | if !searchMode || current != 0 || location != len(text)-1 { 182 | updateJumper(&jumpers[location], baseDistance, tokens[iToken]) 183 | } 184 | } 185 | 186 | // 当前字元没有对应分词时补加一个伪分词 187 | if numTokens == 0 || len(tokens[0].text) > 1 { 188 | updateJumper(&jumpers[current], baseDistance, 189 | &Token{text: []Text{text[current]}, frequency: 1, distance: 32, pos: "x"}) 190 | } 191 | } 192 | 193 | // 从后向前扫描第一遍得到需要添加的分词数目 194 | numSeg := 0 195 | for index := len(text) - 1; index >= 0; { 196 | location := index - len(jumpers[index].token.text) + 1 197 | numSeg++ 198 | index = location - 1 199 | } 200 | 201 | // 从后向前扫描第二遍添加分词到最终结果 202 | outputSegments := make([]Segment, numSeg) 203 | for index := len(text) - 1; index >= 0; { 204 | location := index - len(jumpers[index].token.text) + 1 205 | numSeg-- 206 | outputSegments[numSeg].token = jumpers[index].token 207 | index = location - 1 208 | } 209 | 210 | // 计算各个分词的字节位置 211 | bytePosition := 0 212 | for iSeg := 0; iSeg < len(outputSegments); iSeg++ { 213 | outputSegments[iSeg].start = bytePosition 214 | bytePosition += textSliceByteLength(outputSegments[iSeg].token.text) 215 | outputSegments[iSeg].end = bytePosition 216 | } 217 | return outputSegments 218 | } 219 | 220 | // 更新跳转信息: 221 | // 1. 当该位置从未被访问过时(jumper.minDistance为零的情况),或者 222 | // 2. 当该位置的当前最短路径大于新的最短路径时 223 | // 将当前位置的最短路径值更新为baseDistance加上新分词的概率 224 | func updateJumper(jumper *jumper, baseDistance float32, token *Token) { 225 | newDistance := baseDistance + token.distance 226 | if jumper.minDistance == 0 || jumper.minDistance > newDistance { 227 | jumper.minDistance = newDistance 228 | jumper.token = token 229 | } 230 | } 231 | 232 | // 取两整数较小值 233 | func minInt(a, b int) int { 234 | if a > b { 235 | return b 236 | } 237 | return a 238 | } 239 | 240 | // 取两整数较大值 241 | func maxInt(a, b int) int { 242 | if a > b { 243 | return a 244 | } 245 | return b 246 | } 247 | 248 | // 将文本划分成字元 249 | func splitTextToWords(text Text) []Text { 250 | output := make([]Text, 0, len(text)/3) 251 | current := 0 252 | inAlphanumeric := true 253 | alphanumericStart := 0 254 | for current < len(text) { 255 | r, size := utf8.DecodeRune(text[current:]) 256 | if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) { 257 | // 当前是拉丁字母或数字(非中日韩文字) 258 | if !inAlphanumeric { 259 | alphanumericStart = current 260 | inAlphanumeric = true 261 | } 262 | } else { 263 | if inAlphanumeric { 264 | inAlphanumeric = false 265 | if current != 0 { 266 | output = append(output, toLower(text[alphanumericStart:current])) 267 | } 268 | } 269 | output = append(output, text[current:current+size]) 270 | } 271 | current += size 272 | } 273 | 274 | // 处理最后一个字元是英文的情况 275 | if inAlphanumeric { 276 | if current != 0 { 277 | output = append(output, toLower(text[alphanumericStart:current])) 278 | } 279 | } 280 | 281 | return output 282 | } 283 | 284 | // 将英文词转化为小写 285 | func toLower(text []byte) []byte { 286 | output := make([]byte, len(text)) 287 | for i, t := range text { 288 | if t >= 'A' && t <= 'Z' { 289 | output[i] = t - 'A' + 'a' 290 | } else { 291 | output[i] = t 292 | } 293 | } 294 | return output 295 | } 296 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/test_utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func expect(t *testing.T, expect string, actual interface{}) { 9 | actualString := fmt.Sprint(actual) 10 | if expect != actualString { 11 | t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString) 12 | } 13 | } 14 | 15 | func printTokens(tokens []*Token, numTokens int) (output string) { 16 | for iToken := 0; iToken < numTokens; iToken++ { 17 | for _, word := range tokens[iToken].text { 18 | output += fmt.Sprint(string(word)) 19 | } 20 | output += " " 21 | } 22 | return 23 | } 24 | 25 | func toWords(strings ...string) []Text { 26 | words := []Text{} 27 | for _, s := range strings { 28 | words = append(words, []byte(s)) 29 | } 30 | return words 31 | } 32 | 33 | func bytesToString(bytes []Text) (output string) { 34 | for _, b := range bytes { 35 | output += (string(b) + "/") 36 | } 37 | return 38 | } 39 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/token.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 字串类型,可以用来表达 4 | // 1. 一个字元,比如"中"又如"国", 英文的一个字元是一个词 5 | // 2. 一个分词,比如"中国"又如"人口" 6 | // 3. 一段文字,比如"中国有十三亿人口" 7 | type Text []byte 8 | 9 | // 一个分词 10 | type Token struct { 11 | // 分词的字串,这实际上是个字元数组 12 | text []Text 13 | 14 | // 分词在语料库中的词频 15 | frequency int 16 | 17 | // log2(总词频/该分词词频),这相当于log2(1/p(分词)),用作动态规划中 18 | // 该分词的路径长度。求解prod(p(分词))的最大值相当于求解 19 | // sum(distance(分词))的最小值,这就是“最短路径”的来历。 20 | distance float32 21 | 22 | // 词性标注 23 | pos string 24 | 25 | // 该分词文本的进一步分词划分,见Segments函数注释。 26 | segments []*Segment 27 | } 28 | 29 | // 返回分词文本 30 | func (token *Token) Text() string { 31 | return textSliceToString(token.text) 32 | } 33 | 34 | // 返回分词在语料库中的词频 35 | func (token *Token) Frequency() int { 36 | return token.frequency 37 | } 38 | 39 | // 返回分词词性标注 40 | func (token *Token) Pos() string { 41 | return token.pos 42 | } 43 | 44 | // 该分词文本的进一步分词划分,比如"中华人民共和国中央人民政府"这个分词 45 | // 有两个子分词"中华人民共和国"和"中央人民政府"。子分词也可以进一步有子分词 46 | // 形成一个树结构,遍历这个树就可以得到该分词的所有细致分词划分,这主要 47 | // 用于搜索引擎对一段文本进行全文搜索。 48 | func (token *Token) Segments() []*Segment { 49 | return token.segments 50 | } 51 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | ) 7 | 8 | // 输出分词结果为字符串 9 | // 10 | // 有两种输出模式,以"中华人民共和国"为例 11 | // 12 | // 普通模式(searchMode=false)输出一个分词"中华人民共和国/ns " 13 | // 搜索模式(searchMode=true) 输出普通模式的再细致切分: 14 | // "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns " 15 | // 16 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见Token结构体的注释。 17 | func SegmentsToString(segs []Segment, searchMode bool) (output string) { 18 | if searchMode { 19 | for _, seg := range segs { 20 | output += tokenToString(seg.token) 21 | } 22 | } else { 23 | for _, seg := range segs { 24 | output += fmt.Sprintf( 25 | "%s/%s ", textSliceToString(seg.token.text), seg.token.pos) 26 | } 27 | } 28 | return 29 | } 30 | 31 | func tokenToString(token *Token) (output string) { 32 | for _, s := range token.segments { 33 | output += tokenToString(s.token) 34 | } 35 | output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos) 36 | return 37 | } 38 | 39 | // 输出分词结果到一个字符串slice 40 | // 41 | // 有两种输出模式,以"中华人民共和国"为例 42 | // 43 | // 普通模式(searchMode=false)输出一个分词"[中华人民共和国]" 44 | // 搜索模式(searchMode=true) 输出普通模式的再细致切分: 45 | // "[中华 人民 共和 共和国 人民共和国 中华人民共和国]" 46 | // 47 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字,详情请见Token结构体的注释。 48 | 49 | func SegmentsToSlice(segs []Segment, searchMode bool) (output []string) { 50 | if searchMode { 51 | for _, seg := range segs { 52 | output = append(output, tokenToSlice(seg.token)...) 53 | } 54 | } else { 55 | for _, seg := range segs { 56 | output = append(output, seg.token.Text()) 57 | } 58 | } 59 | return 60 | } 61 | 62 | func tokenToSlice(token *Token) (output []string) { 63 | for _, s := range token.segments { 64 | output = append(output, tokenToSlice(s.token)...) 65 | } 66 | output = append(output, textSliceToString(token.text)) 67 | return output 68 | } 69 | 70 | // 将多个字元拼接一个字符串输出 71 | func textSliceToString(text []Text) string { 72 | var output string 73 | for _, word := range text { 74 | output += string(word) 75 | } 76 | return output 77 | } 78 | 79 | // 返回多个字元的字节总长度 80 | func textSliceByteLength(text []Text) (length int) { 81 | for _, word := range text { 82 | length += len(word) 83 | } 84 | return 85 | } 86 | 87 | func textSliceToBytes(text []Text) []byte { 88 | var buf bytes.Buffer 89 | for _, word := range text { 90 | buf.Write(word) 91 | } 92 | return buf.Bytes() 93 | } 94 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | charset = utf-8 11 | 12 | # html 13 | [*.{htm,html,js,css}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | # 配置文件 18 | [*.{yml,yaml,json}] 19 | indent_style = space 20 | indent_size = 2 21 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | # vim 27 | *.swp 28 | 29 | # osx 30 | .DS_Store 31 | 32 | .vscode 33 | .idea 34 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 caixw 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/README.md: -------------------------------------------------------------------------------- 1 | assert 2 | [![Go](https://github.com/issue9/assert/workflows/Go/badge.svg)](https://github.com/issue9/assert/actions?query=workflow%3AGo) 3 | [![codecov](https://codecov.io/gh/issue9/assert/branch/master/graph/badge.svg)](https://codecov.io/gh/issue9/assert) 4 | [![license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://opensource.org/licenses/MIT) 5 | ====== 6 | 7 | assert 包是对 testing 的一个简单扩展,提供的一系列的断言函数, 8 | 方便在测试函数中使用: 9 | 10 | ```go 11 | func TestA(t *testing.T) { 12 | v := true 13 | assert.True(v) 14 | 15 | a := assert.New(t) 16 | a.True(v) 17 | } 18 | 19 | // 也可以对 testing.B 使用 20 | func Benchmark1(b *testing.B) { 21 | a := assert.New(b) 22 | v := false 23 | a.True(v) 24 | for(i:=0; i -1 { 63 | funcName = funcName[:index] 64 | info = funcName + "(" + basename + ":" + strconv.Itoa(line) + ")" 65 | continue 66 | } 67 | 68 | info = funcName + "(" + basename + ":" + strconv.Itoa(line) + ")" 69 | break 70 | } 71 | 72 | if info == "" { 73 | info = "<无法获取调用者信息>" 74 | } 75 | return info 76 | } 77 | 78 | // 格式化错误提示信息 79 | // 80 | // msg1 中的所有参数将依次被传递给 fmt.Sprintf() 函数, 81 | // 所以 msg1[0] 必须可以转换成 string(如:string, []byte, []rune, fmt.Stringer) 82 | // 83 | // msg2 参数格式与 msg1 完全相同,在 msg1 为空的情况下,会使用 msg2 的内容, 84 | // 否则 msg2 不会启作用。 85 | func formatMessage(msg1 []interface{}, msg2 []interface{}) string { 86 | msg := msg1 87 | if len(msg) == 0 { 88 | msg = msg2 89 | } 90 | 91 | if len(msg) == 0 { 92 | return "<未提供任何错误信息>" 93 | } 94 | 95 | if len(msg) == 1 { 96 | return fmt.Sprint(msg[0]) 97 | } 98 | 99 | format := "" 100 | switch v := msg[0].(type) { 101 | case []byte: 102 | format = string(v) 103 | case []rune: 104 | format = string(v) 105 | case string: 106 | format = v 107 | case fmt.Stringer: 108 | format = v.String() 109 | default: 110 | return fmt.Sprintln(msg...) 111 | } 112 | 113 | return fmt.Sprintf(format, msg[1:]...) 114 | } 115 | 116 | // 当 expr 条件不成立时 117 | // 118 | // expr 返回结果值为bool类型的表达式; 119 | // msg1,msg2 输出的错误信息,之所以提供两组信息,是方便在用户没有提供的情况下, 120 | // 可以使用系统内部提供的信息,优先使用 msg1 中的信息,若不存在,则使用 msg2 的内容。 121 | func assert(t testing.TB, expr bool, msg1 []interface{}, msg2 []interface{}) { 122 | if !expr { 123 | t.Error(formatMessage(msg1, msg2) + "@" + getCallerInfo()) 124 | } 125 | } 126 | 127 | // True 断言表达式 expr 为 true 128 | // 129 | // args 对应 fmt.Printf() 函数中的参数,其中 args[0] 对应第一个参数 format,依次类推, 130 | // 具体可参数 formatMessage() 函数的介绍。其它断言函数的 args 参数,功能与此相同。 131 | func True(t testing.TB, expr bool, args ...interface{}) { 132 | assert(t, expr, args, []interface{}{"True 失败,实际值为 %#v", expr}) 133 | } 134 | 135 | // False 断言表达式 expr 为 false 136 | func False(t testing.TB, expr bool, args ...interface{}) { 137 | assert(t, !expr, args, []interface{}{"False 失败,实际值为 %#v", expr}) 138 | } 139 | 140 | // Nil 断言表达式 expr 为 nil 141 | func Nil(t testing.TB, expr interface{}, args ...interface{}) { 142 | assert(t, IsNil(expr), args, []interface{}{"Nil 失败,实际值为 %#v", expr}) 143 | } 144 | 145 | // NotNil 断言表达式 expr 为非 nil 值 146 | func NotNil(t testing.TB, expr interface{}, args ...interface{}) { 147 | assert(t, !IsNil(expr), args, []interface{}{"NotNil 失败,实际值为 %#v", expr}) 148 | } 149 | 150 | // Equal 断言 v1 与 v2 两个值相等 151 | func Equal(t testing.TB, v1, v2 interface{}, args ...interface{}) { 152 | assert(t, IsEqual(v1, v2), args, []interface{}{"Equal 失败,实际值为\nv1=%#v\nv2=%#v", v1, v2}) 153 | } 154 | 155 | // NotEqual 断言 v1 与 v2 两个值不相等 156 | func NotEqual(t testing.TB, v1, v2 interface{}, args ...interface{}) { 157 | assert(t, !IsEqual(v1, v2), args, []interface{}{"NotEqual 失败,实际值为\nv1=%#v\nv2=%#v", v1, v2}) 158 | } 159 | 160 | // Empty 断言 expr 的值为空(nil,"",0,false),否则输出错误信息 161 | func Empty(t testing.TB, expr interface{}, args ...interface{}) { 162 | assert(t, IsEmpty(expr), args, []interface{}{"Empty 失败,实际值为 %#v", expr}) 163 | } 164 | 165 | // NotEmpty 断言 expr 的值为非空(除 nil,"",0,false之外),否则输出错误信息 166 | func NotEmpty(t testing.TB, expr interface{}, args ...interface{}) { 167 | assert(t, !IsEmpty(expr), args, []interface{}{"NotEmpty 失败,实际值为 %#v", expr}) 168 | } 169 | 170 | // Error 断言有错误发生 171 | // 172 | // 传递未初始化的 error 值(var err error = nil),将断言失败 173 | func Error(t testing.TB, expr interface{}, args ...interface{}) { 174 | if IsNil(expr) { // 空值,必定没有错误 175 | assert(t, false, args, []interface{}{"Error 失败,实际值为 Nil:[%T]", expr}) 176 | return 177 | } 178 | 179 | _, ok := expr.(error) 180 | assert(t, ok, args, []interface{}{"Error 失败,实际类型为[%T]", expr}) 181 | } 182 | 183 | // ErrorString 断言有错误发生且错误信息中包含指定的字符串 str 184 | // 185 | // 传递未初始化的 error 值(var err error = nil),将断言失败 186 | func ErrorString(t testing.TB, expr interface{}, str string, args ...interface{}) { 187 | if IsNil(expr) { // 空值,必定没有错误 188 | assert(t, false, args, []interface{}{"ErrorString 失败,实际值为 Nil:[%T]", expr}) 189 | return 190 | } 191 | 192 | if err, ok := expr.(error); ok { 193 | index := strings.Index(err.Error(), str) 194 | assert(t, index >= 0, args, []interface{}{"Error 失败,实际类型为[%T]", expr}) 195 | } 196 | } 197 | 198 | // ErrorType 断言有错误发生且错误的类型与 typ 的类型相同 199 | // 200 | // 传递未初始化的 error 值(var err error = nil),将断言失败。 201 | // 202 | // 仅对 expr 是否与 typ 为同一类型作简单判断,如果要检测是否是包含关系,可以使用 errors.Is 检测。 203 | func ErrorType(t testing.TB, expr interface{}, typ error, args ...interface{}) { 204 | if IsNil(expr) { // 空值,必定没有错误 205 | assert(t, false, args, []interface{}{"ErrorType 失败,实际值为 Nil:[%T]", expr}) 206 | return 207 | } 208 | 209 | if _, ok := expr.(error); !ok { 210 | assert(t, false, args, []interface{}{"ErrorType 失败,实际类型为[%T],且无法转换成 error 接口", expr}) 211 | return 212 | } 213 | 214 | t1 := reflect.TypeOf(expr) 215 | t2 := reflect.TypeOf(typ) 216 | assert(t, t1 == t2, args, []interface{}{"ErrorType 失败,v1[%v]为一个错误类型,但与v2[%v]的类型不相同", t1, t2}) 217 | } 218 | 219 | // NotError 断言没有错误发生 220 | func NotError(t testing.TB, expr interface{}, args ...interface{}) { 221 | if IsNil(expr) { // 空值必定没有错误 222 | assert(t, true, args, []interface{}{"NotError 失败,实际类型为[%T]", expr}) 223 | return 224 | } 225 | err, ok := expr.(error) 226 | assert(t, !ok, args, []interface{}{"NotError 失败,错误信息为[%v]", err}) 227 | } 228 | 229 | // ErrorIs 断言 expr 为 target 类型 230 | // 231 | // 相当于 True(t, errors.Is(expr, target)) 232 | func ErrorIs(t testing.TB, expr interface{}, target error, args ...interface{}) { 233 | err, ok := expr.(error) 234 | assert(t, ok, args, []interface{}{"ErrorIs 失败,expr 无法转换成 error。"}) 235 | 236 | assert(t, errors.Is(err, target), args, []interface{}{"ErrorIs 失败,expr 不是且不包含 target。"}) 237 | } 238 | 239 | // FileExists 断言文件存在 240 | func FileExists(t testing.TB, path string, args ...interface{}) { 241 | _, err := os.Stat(path) 242 | 243 | if err != nil && !os.IsExist(err) { 244 | assert(t, false, args, []interface{}{"FileExists 失败,且附带以下错误:%v", err}) 245 | } 246 | } 247 | 248 | // FileNotExists 断言文件不存在 249 | func FileNotExists(t testing.TB, path string, args ...interface{}) { 250 | _, err := os.Stat(path) 251 | 252 | if err == nil { 253 | assert(t, false, args, []interface{}{"FileNotExists 失败"}) 254 | } 255 | if os.IsExist(err) { 256 | assert(t, false, args, []interface{}{"FileNotExists 失败,且返回以下错误信息:%v", err}) 257 | } 258 | } 259 | 260 | // Panic 断言函数会发生 panic 261 | func Panic(t testing.TB, fn func(), args ...interface{}) { 262 | has, _ := HasPanic(fn) 263 | assert(t, has, args, []interface{}{"并未发生 panic"}) 264 | } 265 | 266 | // PanicString 断言函数会发生 panic 且 panic 信息中包含指定的字符串内容 267 | func PanicString(t testing.TB, fn func(), str string, args ...interface{}) { 268 | if has, msg := HasPanic(fn); has { 269 | index := strings.Index(fmt.Sprint(msg), str) 270 | assert(t, index >= 0, args, []interface{}{"panic 中并未包含 %s", str}) 271 | return 272 | } 273 | 274 | assert(t, false, args, []interface{}{"并未发生 panic"}) 275 | } 276 | 277 | // PanicType 断言函数会发生 panic 且抛出指定的类型 278 | func PanicType(t testing.TB, fn func(), typ interface{}, args ...interface{}) { 279 | has, msg := HasPanic(fn) 280 | if !has { 281 | return 282 | } 283 | 284 | t1 := reflect.TypeOf(msg) 285 | t2 := reflect.TypeOf(typ) 286 | assert(t, t1 == t2, args, []interface{}{"PanicType 失败,v1[%v]的类型与v2[%v]的类型不相同", t1, t2}) 287 | 288 | } 289 | 290 | // NotPanic 断言函数不会发生 panic 291 | func NotPanic(t testing.TB, fn func(), args ...interface{}) { 292 | has, msg := HasPanic(fn) 293 | assert(t, !has, args, []interface{}{"发生了 panic,其信息为[%v]", msg}) 294 | } 295 | 296 | // Contains 断言 container 包含 item 的或是包含 item 中的所有项 297 | // 298 | // 具体函数说明可参考 IsContains() 299 | func Contains(t testing.TB, container, item interface{}, args ...interface{}) { 300 | assert(t, IsContains(container, item), args, 301 | []interface{}{"container:[%v]并未包含item[%v]", container, item}) 302 | } 303 | 304 | // NotContains 断言 container 不包含 item 的或是不包含 item 中的所有项 305 | func NotContains(t testing.TB, container, item interface{}, args ...interface{}) { 306 | assert(t, !IsContains(container, item), args, 307 | []interface{}{"container:[%v]包含item[%v]", container, item}) 308 | } 309 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/assertion.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | package assert 4 | 5 | import "testing" 6 | 7 | // Assertion 是对 testing.TB 进行了简单的封装。 8 | // 可以以对象的方式调用包中的各个断言函数。 9 | type Assertion struct { 10 | t testing.TB 11 | } 12 | 13 | // New 返回 Assertion 对象。 14 | func New(t testing.TB) *Assertion { 15 | return &Assertion{t: t} 16 | } 17 | 18 | // TB 返回 testing.TB 接口 19 | func (a *Assertion) TB() testing.TB { 20 | return a.t 21 | } 22 | 23 | // True 参照 assert.True() 函数 24 | func (a *Assertion) True(expr bool, msg ...interface{}) *Assertion { 25 | True(a.t, expr, msg...) 26 | return a 27 | } 28 | 29 | // False 参照 assert.False() 函数 30 | func (a *Assertion) False(expr bool, msg ...interface{}) *Assertion { 31 | False(a.t, expr, msg...) 32 | return a 33 | } 34 | 35 | // Nil 参照 assert.Nil() 函数 36 | func (a *Assertion) Nil(expr interface{}, msg ...interface{}) *Assertion { 37 | Nil(a.t, expr, msg...) 38 | return a 39 | } 40 | 41 | // NotNil 参照 assert.NotNil() 函数 42 | func (a *Assertion) NotNil(expr interface{}, msg ...interface{}) *Assertion { 43 | NotNil(a.t, expr, msg...) 44 | return a 45 | } 46 | 47 | // Equal 参照 assert.Equal() 函数 48 | func (a *Assertion) Equal(v1, v2 interface{}, msg ...interface{}) *Assertion { 49 | Equal(a.t, v1, v2, msg...) 50 | return a 51 | } 52 | 53 | // NotEqual 参照 assert.NotEqual() 函数 54 | func (a *Assertion) NotEqual(v1, v2 interface{}, msg ...interface{}) *Assertion { 55 | NotEqual(a.t, v1, v2, msg...) 56 | return a 57 | } 58 | 59 | // Empty 参照 assert.Empty() 函数 60 | func (a *Assertion) Empty(expr interface{}, msg ...interface{}) *Assertion { 61 | Empty(a.t, expr, msg...) 62 | return a 63 | } 64 | 65 | // NotEmpty 参照 assert.NotEmpty() 函数 66 | func (a *Assertion) NotEmpty(expr interface{}, msg ...interface{}) *Assertion { 67 | NotEmpty(a.t, expr, msg...) 68 | return a 69 | } 70 | 71 | // Error 参照 assert.Error() 函数 72 | func (a *Assertion) Error(expr interface{}, msg ...interface{}) *Assertion { 73 | Error(a.t, expr, msg...) 74 | return a 75 | } 76 | 77 | // ErrorString 参照 assert.ErrorString() 函数 78 | func (a *Assertion) ErrorString(expr interface{}, str string, msg ...interface{}) *Assertion { 79 | ErrorString(a.t, expr, str, msg...) 80 | return a 81 | } 82 | 83 | // ErrorType 参照 assert.ErrorType() 函数 84 | func (a *Assertion) ErrorType(expr interface{}, typ error, msg ...interface{}) *Assertion { 85 | ErrorType(a.t, expr, typ, msg...) 86 | return a 87 | } 88 | 89 | // NotError 参照 assert.NotError() 函数 90 | func (a *Assertion) NotError(expr interface{}, msg ...interface{}) *Assertion { 91 | NotError(a.t, expr, msg...) 92 | return a 93 | } 94 | 95 | // ErrorIs 断言 expr 为 target 类型 96 | // 97 | // 相当于 a.True(errors.Is(expr, target)) 98 | func (a *Assertion) ErrorIs(expr interface{}, target error, msg ...interface{}) *Assertion { 99 | ErrorIs(a.t, expr, target, msg...) 100 | return a 101 | } 102 | 103 | // FileExists 参照 assert.FileExists() 函数 104 | func (a *Assertion) FileExists(path string, msg ...interface{}) *Assertion { 105 | FileExists(a.t, path, msg...) 106 | return a 107 | } 108 | 109 | // FileNotExists 参照 assert.FileNotExists() 函数 110 | func (a *Assertion) FileNotExists(path string, msg ...interface{}) *Assertion { 111 | FileNotExists(a.t, path, msg...) 112 | return a 113 | } 114 | 115 | // Panic 参照 assert.Panic() 函数 116 | func (a *Assertion) Panic(fn func(), msg ...interface{}) *Assertion { 117 | Panic(a.t, fn, msg...) 118 | return a 119 | } 120 | 121 | // PanicString 参照 assert.PanicString() 函数 122 | func (a *Assertion) PanicString(fn func(), str string, msg ...interface{}) *Assertion { 123 | PanicString(a.t, fn, str, msg...) 124 | return a 125 | } 126 | 127 | // PanicType 参照 assert.PanicType() 函数 128 | func (a *Assertion) PanicType(fn func(), typ interface{}, msg ...interface{}) *Assertion { 129 | PanicType(a.t, fn, typ, msg...) 130 | return a 131 | } 132 | 133 | // NotPanic 参照 assert.NotPanic() 函数 134 | func (a *Assertion) NotPanic(fn func(), msg ...interface{}) *Assertion { 135 | NotPanic(a.t, fn, msg...) 136 | return a 137 | } 138 | 139 | // Contains 参照 assert.Contains() 函数 140 | func (a *Assertion) Contains(container, item interface{}, msg ...interface{}) *Assertion { 141 | Contains(a.t, container, item, msg...) 142 | return a 143 | } 144 | 145 | // NotContains 参照 assert.NotContains() 函数 146 | func (a *Assertion) NotContains(container, item interface{}, msg ...interface{}) *Assertion { 147 | NotContains(a.t, container, item, msg...) 148 | return a 149 | } 150 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/doc.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | // Package assert 是对 testing 包的一些简单包装 4 | // 5 | // 提供了两种操作方式:直接调用包函数;或是使用 Assertion 对象。 6 | // 两种方式完全等价,可以根据自己需要,选择一种。 7 | // func TestAssert(t *testing.T) { 8 | // var v interface{} = 5 9 | // 10 | // // 直接调用包函数 11 | // assert.True(t, v == 5, "v的值[%v]不等于5", v) 12 | // assert.Equal(t, 5, v, "v的值[%v]不等于5", v) 13 | // assert.Nil(t, v) 14 | // 15 | // // 以 Assertion 对象方式使用 16 | // a := assert.New(t) 17 | // a.True(v==5, "v的值[%v]不等于5", v) 18 | // a.Equal(5, v, "v的值[%v]不等于5", v) 19 | // a.Nil(v) 20 | // a.TB().Log("success") 21 | // 22 | // // 以函数链的形式调用 Assertion 对象的方法 23 | // a.True(false).Equal(5,6) 24 | // } 25 | // 26 | // // 也可以对 testing.B 使用 27 | // func Benchmark1(b *testing.B) { 28 | // a := assert.New(b) 29 | // a.True(false) 30 | // for(i:=0; i= reflect.Chan && k <= reflect.Slice && v.IsNil() 80 | } 81 | 82 | // IsEqual 判断两个值是否相等。 83 | // 84 | // 除了通过 reflect.DeepEqual() 判断值是否相等之外,一些类似 85 | // 可转换的数值也能正确判断,比如以下值也将会被判断为相等: 86 | // int8(5) == int(5) 87 | // []int{1,2} == []int8{1,2} 88 | // []int{1,2} == [2]int8{1,2} 89 | // []int{1,2} == []float32{1,2} 90 | // map[string]int{"1":"2":2} == map[string]int8{"1":1,"2":2} 91 | // 92 | // // map 的键值不同,即使可相互转换也判断不相等。 93 | // map[int]int{1:1,2:2} != map[int8]int{1:1,2:2} 94 | func IsEqual(v1, v2 interface{}) bool { 95 | if reflect.DeepEqual(v1, v2) { 96 | return true 97 | } 98 | 99 | vv1 := reflect.ValueOf(v1) 100 | vv2 := reflect.ValueOf(v2) 101 | 102 | // NOTE: 这里返回 false,而不是 true 103 | if !vv1.IsValid() || !vv2.IsValid() { 104 | return false 105 | } 106 | 107 | if vv1 == vv2 { 108 | return true 109 | } 110 | 111 | vv1Type := vv1.Type() 112 | vv2Type := vv2.Type() 113 | 114 | // 过滤掉已经在 reflect.DeepEqual() 进行处理的类型 115 | switch vv1Type.Kind() { 116 | case reflect.Struct, reflect.Ptr, reflect.Func, reflect.Interface: 117 | return false 118 | case reflect.Slice, reflect.Array: 119 | // vv2.Kind() 与 vv1 的不相同 120 | if vv2.Kind() != reflect.Slice && vv2.Kind() != reflect.Array { 121 | // 虽然类型不同,但可以相互转换成 vv1 的,如:vv2 是 string,vv2 是 []byte, 122 | if vv2Type.ConvertibleTo(vv1Type) { 123 | return IsEqual(vv1.Interface(), vv2.Convert(vv1Type).Interface()) 124 | } 125 | return false 126 | } 127 | 128 | // reflect.DeepEqual() 未考虑类型不同但是类型可转换的情况,比如: 129 | // []int{8,9} == []int8{8,9},此处重新对 slice 和 array 做比较处理。 130 | if vv1.Len() != vv2.Len() { 131 | return false 132 | } 133 | 134 | for i := 0; i < vv1.Len(); i++ { 135 | if !IsEqual(vv1.Index(i).Interface(), vv2.Index(i).Interface()) { 136 | return false 137 | } 138 | } 139 | return true // for 中所有的值比较都相等,返回 true 140 | case reflect.Map: 141 | if vv2.Kind() != reflect.Map { 142 | return false 143 | } 144 | 145 | if vv1.IsNil() != vv2.IsNil() { 146 | return false 147 | } 148 | if vv1.Len() != vv2.Len() { 149 | return false 150 | } 151 | if vv1.Pointer() == vv2.Pointer() { 152 | return true 153 | } 154 | 155 | // 两个 map 的键名类型不同 156 | if vv2Type.Key().Kind() != vv1Type.Key().Kind() { 157 | return false 158 | } 159 | 160 | for _, index := range vv1.MapKeys() { 161 | vv2Index := vv2.MapIndex(index) 162 | if !vv2Index.IsValid() { 163 | return false 164 | } 165 | 166 | if !IsEqual(vv1.MapIndex(index).Interface(), vv2Index.Interface()) { 167 | return false 168 | } 169 | } 170 | return true // for 中所有的值比较都相等,返回 true 171 | case reflect.String: 172 | if vv2.Kind() == reflect.String { 173 | return vv1.String() == vv2.String() 174 | } 175 | if vv2Type.ConvertibleTo(vv1Type) { // 考虑 v1 是 string,v2 是 []byte 的情况 176 | return IsEqual(vv1.Interface(), vv2.Convert(vv1Type).Interface()) 177 | } 178 | 179 | return false 180 | } 181 | 182 | if vv1Type.ConvertibleTo(vv2Type) { 183 | return vv2.Interface() == vv1.Convert(vv2Type).Interface() 184 | } else if vv2Type.ConvertibleTo(vv1Type) { 185 | return vv1.Interface() == vv2.Convert(vv1Type).Interface() 186 | } 187 | 188 | return false 189 | } 190 | 191 | // HasPanic 判断 fn 函数是否会发生 panic 192 | // 若发生了 panic,将把 msg 一起返回。 193 | func HasPanic(fn func()) (has bool, msg interface{}) { 194 | defer func() { 195 | if msg = recover(); msg != nil { 196 | has = true 197 | } 198 | }() 199 | fn() 200 | 201 | return 202 | } 203 | 204 | // IsContains 判断 container 是否包含了 item 的内容。若是指针,会判断指针指向的内容, 205 | // 但是不支持多重指针。 206 | // 207 | // 若 container 是字符串(string、[]byte 和 []rune,不包含 fmt.Stringer 接口), 208 | // 都将会以字符串的形式判断其是否包含 item。 209 | // 若 container 是个列表(array、slice、map)则判断其元素中是否包含 item 中的 210 | // 的所有项,或是 item 本身就是 container 中的一个元素。 211 | func IsContains(container, item interface{}) bool { 212 | if container == nil { // nil不包含任何东西 213 | return false 214 | } 215 | 216 | cv := reflect.ValueOf(container) 217 | iv := reflect.ValueOf(item) 218 | 219 | for cv.Kind() == reflect.Ptr { 220 | cv = cv.Elem() 221 | } 222 | 223 | for iv.Kind() == reflect.Ptr { 224 | iv = iv.Elem() 225 | } 226 | 227 | if IsEqual(container, item) { 228 | return true 229 | } 230 | 231 | // 判断是字符串的情况 232 | switch c := cv.Interface().(type) { 233 | case string: 234 | switch i := iv.Interface().(type) { 235 | case string: 236 | return strings.Contains(c, i) 237 | case []byte: 238 | return strings.Contains(c, string(i)) 239 | case []rune: 240 | return strings.Contains(c, string(i)) 241 | case byte: 242 | return bytes.IndexByte([]byte(c), i) != -1 243 | case rune: 244 | return bytes.IndexRune([]byte(c), i) != -1 245 | } 246 | case []byte: 247 | switch i := iv.Interface().(type) { 248 | case string: 249 | return bytes.Contains(c, []byte(i)) 250 | case []byte: 251 | return bytes.Contains(c, i) 252 | case []rune: 253 | return strings.Contains(string(c), string(i)) 254 | case byte: 255 | return bytes.IndexByte(c, i) != -1 256 | case rune: 257 | return bytes.IndexRune(c, i) != -1 258 | } 259 | case []rune: 260 | switch i := iv.Interface().(type) { 261 | case string: 262 | return strings.Contains(string(c), string(i)) 263 | case []byte: 264 | return strings.Contains(string(c), string(i)) 265 | case []rune: 266 | return strings.Contains(string(c), string(i)) 267 | case byte: 268 | return strings.IndexByte(string(c), i) != -1 269 | case rune: 270 | return strings.IndexRune(string(c), i) != -1 271 | } 272 | } 273 | 274 | if (cv.Kind() == reflect.Slice) || (cv.Kind() == reflect.Array) { 275 | if !cv.IsValid() || cv.Len() == 0 { // 空的,就不算包含另一个,即使另一个也是空值。 276 | return false 277 | } 278 | 279 | if !iv.IsValid() { 280 | return false 281 | } 282 | 283 | // item 是 container 的一个元素 284 | for i := 0; i < cv.Len(); i++ { 285 | if IsEqual(cv.Index(i).Interface(), iv.Interface()) { 286 | return true 287 | } 288 | } 289 | 290 | // 开始判断 item 的元素是否与 container 中的元素相等。 291 | 292 | // 若 item 的长度为 0,表示不包含 293 | if (iv.Kind() != reflect.Slice) || (iv.Len() == 0) { 294 | return false 295 | } 296 | 297 | // item 的元素比 container 的元素多 298 | if iv.Len() > cv.Len() { 299 | return false 300 | } 301 | 302 | // 依次比较 item 的各个子元素是否都存在于 container,且下标都相同 303 | ivIndex := 0 304 | for i := 0; i < cv.Len(); i++ { 305 | if IsEqual(cv.Index(i).Interface(), iv.Index(ivIndex).Interface()) { 306 | if (ivIndex == 0) && (i+iv.Len() > cv.Len()) { 307 | return false 308 | } 309 | ivIndex++ 310 | if ivIndex == iv.Len() { // 已经遍历完 iv 311 | return true 312 | } 313 | } else if ivIndex > 0 { 314 | return false 315 | } 316 | } 317 | return false 318 | } // end cv.Kind == reflect.Slice and reflect.Array 319 | 320 | if cv.Kind() == reflect.Map { 321 | if cv.Len() == 0 { 322 | return false 323 | } 324 | 325 | if (iv.Kind() != reflect.Map) || (iv.Len() == 0) { 326 | return false 327 | } 328 | 329 | if iv.Len() > cv.Len() { 330 | return false 331 | } 332 | 333 | // 判断所有 item 的项都存在于 container 中 334 | for _, key := range iv.MapKeys() { 335 | cvItem := cv.MapIndex(key) 336 | if !cvItem.IsValid() { // container 中不包含该值。 337 | return false 338 | } 339 | if !IsEqual(cvItem.Interface(), iv.MapIndex(key).Interface()) { 340 | return false 341 | } 342 | } 343 | // for 中的所有判断都成立,返回 true 344 | return true 345 | } 346 | 347 | return false 348 | } 349 | -------------------------------------------------------------------------------- /vendor/modules.txt: -------------------------------------------------------------------------------- 1 | # github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d 2 | ## explicit 3 | github.com/adamzy/cedar-go 4 | # github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8 5 | ## explicit 6 | github.com/adamzy/sego 7 | # github.com/issue9/assert v1.4.1 8 | ## explicit 9 | github.com/issue9/assert 10 | --------------------------------------------------------------------------------