sego中文分词演示

").append(c.replace(bM,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bN.test(this.nodeName)||bH.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bE,"\r\n")}}):{name:b.name,value:c.replace(bE,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b$(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b$(a,b);return a},ajaxSettings:{url:bU,isLocal:bI.test(bV[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded; charset=UTF-8",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bW},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bY(bS),ajaxTransport:bY(bT),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?ca(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cb(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bG.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bF,"").replace(bK,bV[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bO),d.crossDomain==null&&(r=bQ.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bV[1]&&r[2]==bV[2]&&(r[3]||(r[1]==="http:"?80:443))==(bV[3]||(bV[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),bZ(bS,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bJ.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bL.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bP,"$1_="+x);d.url=y+(y===d.url?(bL.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bW+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=bZ(bT,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)b_(g,a[g],c,e);return d.join("&").replace(bC,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cc=f.now(),cd=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cc++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=typeof b.data=="string"&&/^application\/x\-www\-form\-urlencoded/.test(b.contentType);if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(cd.test(b.url)||e&&cd.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(cd,l),b.url===j&&(e&&(k=k.replace(cd,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var ce=a.ActiveXObject?function(){for(var a in cg)cg[a](0,1)}:!1,cf=0,cg;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ch()||ci()}:ch,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c){if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,ce&&delete cg[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n);try{m.text=h.responseText}catch(a){}try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cf,ce&&(cg||(cg={},f(a).unload(ce)),cg[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var cj={},ck,cl,cm=/^(?:toggle|show|hide)$/,cn=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,co,cp=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cq;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(ct("show",3),a,b,c);for(var g=0,h=this.length;g=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(a,c){var d=/Y/.test(c);f.fn[a]=function(e){return f.access(this,function(a,e,g){var h=cy(a);if(g===b)return h?c in h?h[c]:f.support.boxModel&&h.document.documentElement[e]||h.document.body[e]:a[e];h?h.scrollTo(d?f(h).scrollLeft():g,d?g:f(h).scrollTop()):a[e]=g},a,e,arguments.length,null)}}),f.each({Height:"height",Width:"width"},function(a,c){var d="client"+a,e="scroll"+a,g="offset"+a;f.fn["inner"+a]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,c,"padding")):this[c]():null},f.fn["outer"+a]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,c,a?"margin":"border")):this[c]():null},f.fn[c]=function(a){return f.access(this,function(a,c,h){var i,j,k,l;if(f.isWindow(a)){i=a.document,j=i.documentElement[d];return f.support.boxModel&&j||i.body&&i.body[d]||j}if(a.nodeType===9){i=a.documentElement;if(i[d]>=i[e])return i[d];return Math.max(a.body[e],i[e],a.body[g],i[g])}if(h===b){k=f.css(a,c),l=parseFloat(k);return f.isNumeric(l)?l:k}f(a).css(c,h)},c,a,arguments.length,null)}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window); -------------------------------------------------------------------------------- /test_utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func expect(t *testing.T, expect string, actual interface{}) { 9 | actualString := fmt.Sprint(actual) 10 | if expect != actualString { 11 | t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString) 12 | } 13 | } 14 | 15 | func printTokens(tokens []*Token, numTokens int) (output string) { 16 | for iToken := 0; iToken < numTokens; iToken++ { 17 | for _, word := range tokens[iToken].text { 18 | output += fmt.Sprint(string(word)) 19 | } 20 | output += " " 21 | } 22 | return 23 | } 24 | 25 | func toWords(strings ...string) []Text { 26 | words := []Text{} 27 | for _, s := range strings { 28 | words = append(words, []byte(s)) 29 | } 30 | return words 31 | } 32 | 33 | func bytesToString(bytes []Text) (output string) { 34 | for _, b := range bytes { 35 | output += (string(b) + "/") 36 | } 37 | return 38 | } 39 | -------------------------------------------------------------------------------- /testdata/test_dict1.txt: -------------------------------------------------------------------------------- 1 | 中 64 p1 2 | 国 64 p2 3 | 有 64 p3 4 | 三 64 5 | 亿 64 p5 6 | 人 64 p6 7 | 口 64 p7 8 | -------------------------------------------------------------------------------- /testdata/test_dict2.txt: -------------------------------------------------------------------------------- 1 | 中国 32 2 | 国有 8 p9 3 | 十三 16 p10 4 | 十三亿 4 5 | 人口 16 p12 6 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 字串类型，可以用来表达 4 | // 1. 一个字元，比如"中"又如"国", 英文的一个字元是一个词 5 | // 2. 一个分词，比如"中国"又如"人口" 6 | // 3. 一段文字，比如"中国有十三亿人口" 7 | type Text []byte 8 | 9 | // 一个分词 10 | type Token struct { 11 | // 分词的字串，这实际上是个字元数组 12 | text []Text 13 | 14 | // 分词在语料库中的词频 15 | frequency int 16 | 17 | // log2(总词频/该分词词频)，这相当于log2(1/p(分词))，用作动态规划中 18 | // 该分词的路径长度。求解prod(p(分词))的最大值相当于求解 19 | // sum(distance(分词))的最小值，这就是“最短路径”的来历。 20 | distance float32 21 | 22 | // 词性标注 23 | pos string 24 | 25 | // 该分词文本的进一步分词划分，见Segments函数注释。 26 | segments []*Segment 27 | } 28 | 29 | // 返回分词文本 30 | func (token *Token) Text() string { 31 | return textSliceToString(token.text) 32 | } 33 | 34 | // 返回分词在语料库中的词频 35 | func (token *Token) Frequency() int { 36 | return token.frequency 37 | } 38 | 39 | // 返回分词词性标注 40 | func (token *Token) Pos() string { 41 | return token.pos 42 | } 43 | 44 | // 该分词文本的进一步分词划分，比如"中华人民共和国中央人民政府"这个分词 45 | // 有两个子分词"中华人民共和国"和"中央人民政府"。子分词也可以进一步有子分词 46 | // 形成一个树结构，遍历这个树就可以得到该分词的所有细致分词划分，这主要 47 | // 用于搜索引擎对一段文本进行全文搜索。 48 | func (token *Token) Segments() []*Segment { 49 | return token.segments 50 | } 51 | 52 | func (token *Token) TextEquals(string string) bool { 53 | tokenLen := 0 54 | for _, t := range token.text { 55 | tokenLen += len(t) 56 | } 57 | if tokenLen != len(string) { 58 | return false 59 | } 60 | bytStr := []byte(string) 61 | index := 0 62 | for i := 0; i < len(token.text); i++ { 63 | textArray := []byte(token.text[i]) 64 | for j := 0; j < len(textArray); j++ { 65 | if textArray[j] != bytStr[index] { 66 | index = index + 1 67 | return false 68 | } 69 | index = index + 1 70 | } 71 | } 72 | return true 73 | } 74 | -------------------------------------------------------------------------------- /tools/benchmark.go: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 测试sego分词速度 4 | 5 | go run benchmark.go 6 | 7 | 输出分词结果到文件： 8 | 9 | go run benchmark.go -output=output.txt 10 | 11 | 分析性能瓶颈： 12 | 13 | go build benchmark.go 14 | ./benchmark -cpuprofile=cpu.prof 15 | go tool pprof benchmark cpu.prof 16 | 17 | 分析内存占用： 18 | 19 | go build benchmark.go 20 | ./benchmark -memprofile=mem.prof 21 | go tool pprof benchmark mem.prof 22 | 23 | */ 24 | 25 | package main 26 | 27 | import ( 28 | "bufio" 29 | "flag" 30 | "fmt" 31 | "github.com/huichen/sego" 32 | "log" 33 | "os" 34 | "runtime" 35 | "runtime/pprof" 36 | "time" 37 | ) 38 | 39 | var ( 40 | cpuprofile = flag.String("cpuprofile", "", "处理器profile文件") 41 | memprofile = flag.String("memprofile", "", "内存profile文件") 42 | output = flag.String("output", "", "输出分词结果到此文件") 43 | numRuns = 20 44 | ) 45 | 46 | func main() { 47 | // 确保单线程，因为Go从1.5开始默认多线程 48 | runtime.GOMAXPROCS(1) 49 | 50 | // 解析命令行参数 51 | flag.Parse() 52 | 53 | // 记录时间 54 | t0 := time.Now() 55 | 56 | var segmenter sego.Segmenter 57 | segmenter.LoadDictionary("../data/dictionary.txt") 58 | 59 | // 记录时间 60 | t1 := time.Now() 61 | log.Printf("载入词典花费时间 %v", t1.Sub(t0)) 62 | 63 | // 写入内存profile文件 64 | if *memprofile != "" { 65 | f, err := os.Create(*memprofile) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | pprof.WriteHeapProfile(f) 70 | defer f.Close() 71 | } 72 | 73 | // 打开将要分词的文件 74 | file, err := os.Open("../testdata/bailuyuan.txt") 75 | if err != nil { 76 | log.Fatal(err) 77 | } 78 | defer file.Close() 79 | 80 | // 逐行读入 81 | scanner := bufio.NewScanner(file) 82 | size := 0 83 | lines := [][]byte{} 84 | for scanner.Scan() { 85 | var text string 86 | fmt.Sscanf(scanner.Text(), "%s", &text) 87 | content := []byte(text) 88 | size += len(content) 89 | lines = append(lines, content) 90 | } 91 | 92 | // 当指定输出文件时打开输出文件 93 | var of *os.File 94 | if *output != "" { 95 | of, err = os.Create(*output) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | defer of.Close() 100 | } 101 | 102 | // 记录时间 103 | t2 := time.Now() 104 | 105 | // 打开处理器profile文件 106 | if *cpuprofile != "" { 107 | f, err := os.Create(*cpuprofile) 108 | if err != nil { 109 | log.Fatal(err) 110 | } 111 | pprof.StartCPUProfile(f) 112 | defer pprof.StopCPUProfile() 113 | } 114 | 115 | // 分词 116 | for i := 0; i < numRuns; i++ { 117 | for _, l := range lines { 118 | segments := segmenter.Segment(l) 119 | if *output != "" { 120 | of.WriteString(sego.SegmentsToString(segments, false)) 121 | of.WriteString("\n") 122 | } 123 | } 124 | } 125 | 126 | // 停止处理器profile 127 | if *cpuprofile != "" { 128 | defer pprof.StopCPUProfile() 129 | } 130 | 131 | // 记录时间并计算分词速度 132 | t3 := time.Now() 133 | log.Printf("分词花费时间 %v", t3.Sub(t2)) 134 | log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t3.Sub(t2).Seconds()/(1024*1024)) 135 | } 136 | -------------------------------------------------------------------------------- /tools/example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | 7 | "github.com/adamzy/sego" 8 | ) 9 | 10 | var ( 11 | text = flag.String("text", "中国互联网历史上最大的一笔并购案", "要分词的文本") 12 | ) 13 | 14 | func main() { 15 | flag.Parse() 16 | 17 | var seg sego.Segmenter 18 | seg.LoadDictionary("../data/dictionary.txt") 19 | 20 | segments := seg.Segment([]byte(*text)) 21 | fmt.Println(sego.SegmentsToString(segments, true)) 22 | } 23 | -------------------------------------------------------------------------------- /tools/goroutines.go: -------------------------------------------------------------------------------- 1 | // 测试sego并行分词速度 2 | 3 | package main 4 | 5 | import ( 6 | "bufio" 7 | "fmt" 8 | "github.com/huichen/sego" 9 | "log" 10 | "os" 11 | "runtime" 12 | "time" 13 | ) 14 | 15 | var ( 16 | segmenter = sego.Segmenter{} 17 | numThreads = runtime.NumCPU() 18 | task = make(chan []byte, numThreads*40) 19 | done = make(chan bool, numThreads) 20 | numRuns = 50 21 | ) 22 | 23 | func worker() { 24 | for line := range task { 25 | segmenter.Segment(line) 26 | } 27 | done <- true 28 | } 29 | 30 | func main() { 31 | // 将线程数设置为CPU数 32 | runtime.GOMAXPROCS(numThreads) 33 | 34 | // 载入词典 35 | segmenter.LoadDictionary("../data/dictionary.txt") 36 | 37 | // 打开将要分词的文件 38 | file, err := os.Open("../testdata/bailuyuan.txt") 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | defer file.Close() 43 | 44 | // 逐行读入 45 | scanner := bufio.NewScanner(file) 46 | size := 0 47 | lines := [][]byte{} 48 | for scanner.Scan() { 49 | var text string 50 | fmt.Sscanf(scanner.Text(), "%s", &text) 51 | content := []byte(text) 52 | size += len(content) 53 | lines = append(lines, content) 54 | } 55 | 56 | // 启动工作线程 57 | for i := 0; i < numThreads; i++ { 58 | go worker() 59 | } 60 | log.Print("开始分词") 61 | 62 | // 记录时间 63 | t0 := time.Now() 64 | 65 | // 并行分词 66 | for i := 0; i < numRuns; i++ { 67 | for _, l := range lines { 68 | task <- l 69 | } 70 | } 71 | close(task) 72 | 73 | // 确保分词完成 74 | for i := 0; i < numThreads; i++ { 75 | <-done 76 | } 77 | 78 | // 记录时间并计算分词速度 79 | t1 := time.Now() 80 | log.Printf("分词花费时间 %v", t1.Sub(t0)) 81 | log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t1.Sub(t0).Seconds()/(1024*1024)) 82 | } 83 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | ) 7 | 8 | // 输出分词结果为字符串 9 | // 10 | // 有两种输出模式，以"中华人民共和国"为例 11 | // 12 | // 普通模式（searchMode=false）输出一个分词"中华人民共和国/ns " 13 | // 搜索模式（searchMode=true）输出普通模式的再细致切分： 14 | // "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns " 15 | // 16 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。 17 | func SegmentsToString(segs []Segment, searchMode bool) (output string) { 18 | if searchMode { 19 | for _, seg := range segs { 20 | output += tokenToString(seg.token) 21 | } 22 | } else { 23 | for _, seg := range segs { 24 | output += fmt.Sprintf( 25 | "%s/%s ", textSliceToString(seg.token.text), seg.token.pos) 26 | } 27 | } 28 | return 29 | } 30 | 31 | func tokenToString(token *Token) (output string) { 32 | hasOnlyTerminalToken := true 33 | for _, s := range token.segments { 34 | if len(s.token.segments) > 1 { 35 | hasOnlyTerminalToken = false 36 | } 37 | } 38 | 39 | if !hasOnlyTerminalToken { 40 | for _, s := range token.segments { 41 | if s != nil { 42 | output += tokenToString(s.token) 43 | } 44 | } 45 | } 46 | output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos) 47 | return 48 | } 49 | 50 | // 输出分词结果到一个字符串slice 51 | // 52 | // 有两种输出模式，以"中华人民共和国"为例 53 | // 54 | // 普通模式（searchMode=false）输出一个分词"[中华人民共和国]" 55 | // 搜索模式（searchMode=true）输出普通模式的再细致切分： 56 | // "[中华人民共和共和国人民共和国中华人民共和国]" 57 | // 58 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。 59 | 60 | func SegmentsToSlice(segs []Segment, searchMode bool) (output []string) { 61 | if searchMode { 62 | for _, seg := range segs { 63 | output = append(output, tokenToSlice(seg.token)...) 64 | } 65 | } else { 66 | for _, seg := range segs { 67 | output = append(output, seg.token.Text()) 68 | } 69 | } 70 | return 71 | } 72 | 73 | func tokenToSlice(token *Token) (output []string) { 74 | hasOnlyTerminalToken := true 75 | for _, s := range token.segments { 76 | if len(s.token.segments) > 1 { 77 | hasOnlyTerminalToken = false 78 | } 79 | } 80 | if !hasOnlyTerminalToken { 81 | for _, s := range token.segments { 82 | output = append(output, tokenToSlice(s.token)...) 83 | } 84 | } 85 | output = append(output, textSliceToString(token.text)) 86 | return output 87 | } 88 | 89 | // 将多个字元拼接一个字符串输出 90 | func textSliceToString(text []Text) string { 91 | return Join(text) 92 | } 93 | 94 | func Join(a []Text) string { 95 | switch len(a) { 96 | case 0: 97 | return "" 98 | case 1: 99 | return string(a[0]) 100 | case 2: 101 | // Special case for common small values. 102 | // Remove if golang.org/issue/6714 is fixed 103 | return string(a[0]) + string(a[1]) 104 | case 3: 105 | // Special case for common small values. 106 | // Remove if golang.org/issue/6714 is fixed 107 | return string(a[0]) + string(a[1]) + string(a[2]) 108 | } 109 | n := 0 110 | for i := 0; i < len(a); i++ { 111 | n += len(a[i]) 112 | } 113 | 114 | b := make([]byte, n) 115 | bp := copy(b, a[0]) 116 | for _, s := range a[1:] { 117 | bp += copy(b[bp:], s) 118 | } 119 | return string(b) 120 | } 121 | 122 | // 返回多个字元的字节总长度 123 | func textSliceByteLength(text []Text) (length int) { 124 | for _, word := range text { 125 | length += len(word) 126 | } 127 | return 128 | } 129 | 130 | func textSliceToBytes(text []Text) []byte { 131 | var buf bytes.Buffer 132 | for _, word := range text { 133 | buf.Write(word) 134 | } 135 | return buf.Bytes() 136 | } 137 | -------------------------------------------------------------------------------- /utils_test.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/issue9/assert" 8 | ) 9 | 10 | /* 11 | * 作者:张晓明时间:18/6/14 12 | */ 13 | 14 | var ( 15 | strs = []Text{ 16 | Text("one"), 17 | Text("two"), 18 | Text("three"), 19 | Text("four"), 20 | Text("five"), 21 | Text("six"), 22 | Text("seven"), 23 | Text("eight"), 24 | Text("nine"), 25 | Text("ten"), 26 | } 27 | ) 28 | 29 | func Test_textSliceToString(t *testing.T) { 30 | a := textSliceToString(strs) 31 | b := Join(strs) 32 | assert.Equal(t, a, b) 33 | } 34 | 35 | func StringsJoin(b *testing.B) { 36 | for i := 0; i < b.N; i++ { 37 | Join(strs) 38 | } 39 | } 40 | 41 | func TextSliceToString(b *testing.B) { 42 | for i := 0; i < b.N; i++ { 43 | textSliceToString(strs) 44 | } 45 | } 46 | 47 | func Test_Benchmark(t *testing.T) { 48 | fmt.Println("strings.Join:") 49 | fmt.Println(testing.Benchmark(StringsJoin)) 50 | fmt.Println("textSliceToString") 51 | fmt.Println(testing.Benchmark(TextSliceToString)) 52 | } 53 | 54 | func Test_Token_TextEquals(t *testing.T) { 55 | token := Token{ 56 | text: []Text{ 57 | []byte("one"), 58 | []byte("two"), 59 | }, 60 | } 61 | assert.True(t, token.TextEquals("onetwo")) 62 | } 63 | 64 | func Test_Token_TextEquals_CN(t *testing.T) { 65 | token := Token{ 66 | text: []Text{ 67 | []byte("中国"), 68 | []byte("文字"), 69 | }, 70 | } 71 | assert.True(t, token.TextEquals("中国文字")) 72 | } 73 | 74 | func Test_Token_TextNotEquals(t *testing.T) { 75 | token := Token{ 76 | text: []Text{ 77 | []byte("one"), 78 | []byte("two"), 79 | }, 80 | } 81 | assert.False(t, token.TextEquals("one-two")) 82 | } 83 | 84 | func Test_Token_TextNotEquals_CN(t *testing.T) { 85 | token := Token{ 86 | text: []Text{ 87 | []byte("中国"), 88 | []byte("文字"), 89 | }, 90 | } 91 | assert.False(t, token.TextEquals("中国文字1")) 92 | } 93 | 94 | func Test_Token_TextNotEquals_CN_B(t *testing.T) { 95 | token := Token{ 96 | text: []Text{ 97 | []byte("中国"), 98 | []byte("文字"), 99 | }, 100 | } 101 | assert.False(t, token.TextEquals("中国文")) 102 | } 103 | 104 | func Test_Token_Split(t *testing.T) { 105 | probMap := map[string]string{ 106 | "衣门襟": "拉链", 107 | "品牌": "天奕", 108 | "图案": "纯色字母", 109 | "颜色分类": "牛奶白水粉色湖水蓝浅军绿雅致灰", 110 | "尺码": "大码XL 大码XXL 大码XXXL 大码XXXXL", 111 | "组合形式": "单件", 112 | "面料": "聚酯", 113 | "领型": "连帽", 114 | "服饰工艺": "立体裁剪", 115 | "货号": "YZL-1806052", 116 | "厚薄": "超薄", 117 | "年份季节": "2018年夏季", 118 | "通勤": "韩版", 119 | "服装款式细节": "不对称", 120 | "成分含量": "81%(含)-90%(含)", 121 | "袖型": "常规", 122 | "风格": "通勤", 123 | "适用年龄": "18-24周岁", 124 | "服装版型": "宽松", 125 | "大码女装分类": "其它特大款式", 126 | "衣长": "中长款", 127 | "袖长": "长袖", 128 | "穿着方式": "开衫", 129 | } 130 | word := "卫衣女宽松拉链外套开衫韩版" 131 | var segmenter Segmenter 132 | segmenter.LoadDictionary("dictionary.txt") 133 | segments := segmenter.InternalSegment([]byte(word),true) 134 | for _,s := range segments{ 135 | fmt.Println(s.token.Text()) 136 | } 137 | for _, value := range probMap { 138 | for _, s := range segments { 139 | if s.Token().Text() == value { 140 | fmt.Println("=",value) 141 | } 142 | } 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/LICENSE.md: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/README.md: -------------------------------------------------------------------------------- 1 | # cedar-go [![GoDoc](https://godoc.org/github.com/adamzy/cedar-go?status.svg)](https://godoc.org/github.com/adamzy/cedar-go) 2 | 3 | Package `cedar-go` implementes double-array trie. 4 | 5 | It is a [Golang](https://golang.org/) port of [cedar](http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar) which is written in C++ by Naoki Yoshinaga. `cedar-go` currently implements the `reduced` verion of cedar. 6 | This package is not thread safe if there is one goroutine doing insertions or deletions. 7 | 8 | ## Install 9 | ``` 10 | go get github.com/adamzy/cedar-go 11 | ``` 12 | 13 | ## Usage 14 | ```go 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/adamzy/cedar-go" 21 | ) 22 | 23 | func main() { 24 | // create a new cedar trie. 25 | trie := cedar.New() 26 | 27 | // a helper function to print the id-key-value triple given trie node id 28 | printIdKeyValue := func(id int) { 29 | // the key of node `id`. 30 | key, _ := trie.Key(id) 31 | // the value of node `id`. 32 | value, _ := trie.Value(id) 33 | fmt.Printf("%d\t%s:%v\n", id, key, value) 34 | } 35 | 36 | // Insert key-value pairs. 37 | // The order of insertion is not important. 38 | trie.Insert([]byte("How many"), 0) 39 | trie.Insert([]byte("How many loved"), 1) 40 | trie.Insert([]byte("How many loved your moments"), 2) 41 | trie.Insert([]byte("How many loved your moments of glad grace"), 3) 42 | trie.Insert([]byte("姑苏"), 4) 43 | trie.Insert([]byte("姑苏城外"), 5) 44 | trie.Insert([]byte("姑苏城外寒山寺"), 6) 45 | 46 | // Get the associated value of a key directly. 47 | value, _ := trie.Get([]byte("How many loved your moments of glad grace")) 48 | fmt.Println(value) 49 | 50 | // Or, jump to the node first, 51 | id, _ := trie.Jump([]byte("How many loved your moments"), 0) 52 | // then get the key and the value 53 | printIdKeyValue(id) 54 | 55 | fmt.Println("\nPrefixMatch\nid\tkey:value") 56 | for _, id := range trie.PrefixMatch([]byte("How many loved your moments of glad grace"), 0) { 57 | printIdKeyValue(id) 58 | } 59 | 60 | fmt.Println("\nPrefixPredict\nid\tkey:value") 61 | for _, id := range trie.PrefixPredict([]byte("姑苏"), 0) { 62 | printIdKeyValue(id) 63 | } 64 | } 65 | ``` 66 | will produce 67 | ``` 68 | 3 69 | 281 How many loved your moments:2 70 | 71 | PrefixMatch 72 | id key:value 73 | 262 How many:0 74 | 268 How many loved:1 75 | 281 How many loved your moments:2 76 | 296 How many loved your moments of glad grace:3 77 | 78 | PrefixPredict 79 | id key:value 80 | 303 姑苏:4 81 | 309 姑苏城外:5 82 | 318 姑苏城外寒山寺:6 83 | ``` 84 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/api.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | // Status reports the following statistics of the cedar: 4 | // keys: number of keys that are in the cedar, 5 | // nodes: number of trie nodes (slots in the base array) has been taken, 6 | // size: the size of the base array used by the cedar, 7 | // capacity: the capicity of the base array used by the cedar. 8 | func (da *Cedar) Status() (keys, nodes, size, capacity int) { 9 | for i := 0; i < da.Size; i++ { 10 | n := da.Array[i] 11 | if n.Check >= 0 { 12 | nodes++ 13 | if n.Value >= 0 { 14 | keys++ 15 | } 16 | } 17 | } 18 | return keys, nodes, da.Size, da.Capacity 19 | } 20 | 21 | // Jump travels from a node `from` to another node `to` by following the path `path`. 22 | // For example, if the following keys were inserted: 23 | // id key 24 | // 19 abc 25 | // 23 ab 26 | // 37 abcd 27 | // then 28 | // Jump([]byte("ab"), 0) = 23, nil // reach "ab" from root 29 | // Jump([]byte("c"), 23) = 19, nil // reach "abc" from "ab" 30 | // Jump([]byte("cd"), 23) = 37, nil // reach "abcd" from "ab" 31 | func (da *Cedar) Jump(path []byte, from int) (to int, err error) { 32 | for _, b := range path { 33 | if da.Array[from].Value >= 0 { 34 | return from, ErrNoPath 35 | } 36 | to = da.Array[from].base() ^ int(b) 37 | if da.Array[to].Check != from { 38 | return from, ErrNoPath 39 | } 40 | from = to 41 | } 42 | return to, nil 43 | } 44 | 45 | // Key returns the key of the node with the given `id`. 46 | // It will return ErrNoPath, if the node does not exist. 47 | func (da *Cedar) Key(id int) (key []byte, err error) { 48 | for id > 0 { 49 | from := da.Array[id].Check 50 | if from < 0 { 51 | return nil, ErrNoPath 52 | } 53 | if char := byte(da.Array[from].base() ^ id); char != 0 { 54 | key = append(key, char) 55 | } 56 | id = from 57 | } 58 | if id != 0 || len(key) == 0 { 59 | return nil, ErrInvalidKey 60 | } 61 | for i := 0; i < len(key)/2; i++ { 62 | key[i], key[len(key)-i-1] = key[len(key)-i-1], key[i] 63 | } 64 | return key, nil 65 | } 66 | 67 | // Value returns the value of the node with the given `id`. 68 | // It will return ErrNoValue, if the node does not have a value. 69 | func (da *Cedar) Value(id int) (value int, err error) { 70 | value = da.Array[id].Value 71 | if value >= 0 { 72 | return value, nil 73 | } 74 | to := da.Array[id].base() 75 | if da.Array[to].Check == id && da.Array[to].Value >= 0 { 76 | return da.Array[to].Value, nil 77 | } 78 | return 0, ErrNoValue 79 | } 80 | 81 | // Insert adds a key-value pair into the cedar. 82 | // It will return ErrInvalidValue, if value < 0 or >= ValueLimit. 83 | func (da *Cedar) Insert(key []byte, value int) error { 84 | if value < 0 || value >= ValueLimit { 85 | return ErrInvalidValue 86 | } 87 | p := da.get(key, 0, 0) 88 | *p = value 89 | return nil 90 | } 91 | 92 | // Update increases the value associated with the `key`. 93 | // The `key` will be inserted if it is not in the cedar. 94 | // It will return ErrInvalidValue, if the updated value < 0 or >= ValueLimit. 95 | func (da *Cedar) Update(key []byte, value int) error { 96 | p := da.get(key, 0, 0) 97 | 98 | // key was not inserted 99 | if *p == ValueLimit { 100 | *p = value 101 | return nil 102 | } 103 | 104 | // key was inserted before 105 | if *p+value < 0 || *p+value >= ValueLimit { 106 | return ErrInvalidValue 107 | } 108 | *p += value 109 | return nil 110 | } 111 | 112 | // Delete removes a key-value pair from the cedar. 113 | // It will return ErrNoPath, if the key has not been added. 114 | func (da *Cedar) Delete(key []byte) error { 115 | // if the path does not exist, or the end is not a leaf, nothing to delete 116 | to, err := da.Jump(key, 0) 117 | if err != nil { 118 | return ErrNoPath 119 | } 120 | 121 | if da.Array[to].Value < 0 { 122 | base := da.Array[to].base() 123 | if da.Array[base].Check == to { 124 | to = base 125 | } 126 | } 127 | 128 | for to > 0 { 129 | from := da.Array[to].Check 130 | base := da.Array[from].base() 131 | label := byte(to ^ base) 132 | 133 | // if `to` has sibling, remove `to` from the sibling list, then stop 134 | if da.Ninfos[to].Sibling != 0 || da.Ninfos[from].Child != label { 135 | // delete the label from the child ring first 136 | da.popSibling(from, base, label) 137 | // then release the current node `to` to the empty node ring 138 | da.pushEnode(to) 139 | break 140 | } 141 | // otherwise, just release the current node `to` to the empty node ring 142 | da.pushEnode(to) 143 | // then check its parent node 144 | to = from 145 | } 146 | return nil 147 | } 148 | 149 | // Get returns the value associated with the given `key`. 150 | // It is equivalent to 151 | // id, err1 = Jump(key) 152 | // value, err2 = Value(id) 153 | // Thus, it may return ErrNoPath or ErrNoValue, 154 | func (da *Cedar) Get(key []byte) (value int, err error) { 155 | to, err := da.Jump(key, 0) 156 | if err != nil { 157 | return 0, err 158 | } 159 | return da.Value(to) 160 | } 161 | 162 | // PrefixMatch returns a list of at most `num` nodes which match the prefix of the key. 163 | // If `num` is 0, it returns all matches. 164 | // For example, if the following keys were inserted: 165 | // id key 166 | // 19 abc 167 | // 23 ab 168 | // 37 abcd 169 | // then 170 | // PrefixMatch([]byte("abc"), 1) = [ 23 ] // match ["ab"] 171 | // PrefixMatch([]byte("abcd"), 0) = [ 23, 19, 37] // match ["ab", "abc", "abcd"] 172 | func (da *Cedar) PrefixMatch(key []byte, num int) (ids []int) { 173 | for from, i := 0, 0; i < len(key); i++ { 174 | to, err := da.Jump(key[i:i+1], from) 175 | if err != nil { 176 | break 177 | } 178 | if _, err := da.Value(to); err == nil { 179 | ids = append(ids, to) 180 | num-- 181 | if num == 0 { 182 | return 183 | } 184 | } 185 | from = to 186 | } 187 | return 188 | } 189 | 190 | // PrefixPredict returns a list of at most `num` nodes which has the key as their prefix. 191 | // These nodes are ordered by their keys. 192 | // If `num` is 0, it returns all matches. 193 | // For example, if the following keys were inserted: 194 | // id key 195 | // 19 abc 196 | // 23 ab 197 | // 37 abcd 198 | // then 199 | // PrefixPredict([]byte("ab"), 2) = [ 23, 19 ] // predict ["ab", "abc"] 200 | // PrefixPredict([]byte("ab"), 0) = [ 23, 19, 37 ] // predict ["ab", "abc", "abcd"] 201 | func (da *Cedar) PrefixPredict(key []byte, num int) (ids []int) { 202 | root, err := da.Jump(key, 0) 203 | if err != nil { 204 | return 205 | } 206 | for from, err := da.begin(root); err == nil; from, err = da.next(from, root) { 207 | ids = append(ids, from) 208 | num-- 209 | if num == 0 { 210 | return 211 | } 212 | } 213 | return 214 | } 215 | 216 | func (da *Cedar) begin(from int) (to int, err error) { 217 | for c := da.Ninfos[from].Child; c != 0; { 218 | to = da.Array[from].base() ^ int(c) 219 | c = da.Ninfos[to].Child 220 | from = to 221 | } 222 | if da.Array[from].base() > 0 { 223 | return da.Array[from].base(), nil 224 | } 225 | return from, nil 226 | } 227 | 228 | func (da *Cedar) next(from int, root int) (to int, err error) { 229 | c := da.Ninfos[from].Sibling 230 | for c == 0 && from != root && da.Array[from].Check >= 0 { 231 | from = da.Array[from].Check 232 | c = da.Ninfos[from].Sibling 233 | } 234 | if from == root { 235 | return 0, ErrNoPath 236 | } 237 | from = da.Array[da.Array[from].Check].base() ^ int(c) 238 | return da.begin(from) 239 | } 240 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/cedar.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | const ValueLimit = int(^uint(0) >> 1) 4 | 5 | type node struct { 6 | Value int 7 | Check int 8 | } 9 | 10 | func (n *node) base() int { return -(n.Value + 1) } 11 | 12 | type ninfo struct { 13 | Sibling, Child byte 14 | } 15 | 16 | type block struct { 17 | Prev, Next, Num, Reject, Trial, Ehead int 18 | } 19 | 20 | func (b *block) init() { 21 | b.Num = 256 22 | b.Reject = 257 23 | } 24 | 25 | type Cedar struct { 26 | *cedar 27 | } 28 | 29 | type cedar struct { 30 | Array []node 31 | Ninfos []ninfo 32 | Blocks []block 33 | Reject [257]int 34 | BheadF int 35 | BheadC int 36 | BheadO int 37 | Capacity int 38 | Size int 39 | Ordered bool 40 | MaxTrial int 41 | } 42 | 43 | func New() *Cedar { 44 | da := cedar{ 45 | Array: make([]node, 256), 46 | Ninfos: make([]ninfo, 256), 47 | Blocks: make([]block, 1), 48 | Capacity: 256, 49 | Size: 256, 50 | Ordered: true, 51 | MaxTrial: 1, 52 | } 53 | 54 | da.Array[0] = node{-2, 0} 55 | for i := 1; i < 256; i++ { 56 | da.Array[i] = node{-(i - 1), -(i + 1)} 57 | } 58 | da.Array[1].Value = -255 59 | da.Array[255].Check = -1 60 | 61 | da.Blocks[0].Ehead = 1 62 | da.Blocks[0].init() 63 | 64 | for i := 0; i <= 256; i++ { 65 | da.Reject[i] = i + 1 66 | } 67 | 68 | return &Cedar{&da} 69 | } 70 | 71 | // Get value by key, insert the key if not exist 72 | func (da *cedar) get(key []byte, from, pos int) *int { 73 | for ; pos < len(key); pos++ { 74 | if value := da.Array[from].Value; value >= 0 && value != ValueLimit { 75 | to := da.follow(from, 0) 76 | da.Array[to].Value = value 77 | } 78 | from = da.follow(from, key[pos]) 79 | } 80 | to := from 81 | if da.Array[from].Value < 0 { 82 | to = da.follow(from, 0) 83 | } 84 | return &da.Array[to].Value 85 | } 86 | 87 | func (da *cedar) follow(from int, label byte) int { 88 | base := da.Array[from].base() 89 | to := base ^ int(label) 90 | if base < 0 || da.Array[to].Check < 0 { 91 | hasChild := false 92 | if base >= 0 { 93 | hasChild = (da.Array[base^int(da.Ninfos[from].Child)].Check == from) 94 | } 95 | to = da.popEnode(base, label, from) 96 | da.pushSibling(from, to^int(label), label, hasChild) 97 | } else if da.Array[to].Check != from { 98 | to = da.resolve(from, base, label) 99 | } else if da.Array[to].Check == from { 100 | } else { 101 | panic("cedar: internal error, should not be here") 102 | } 103 | return to 104 | } 105 | 106 | func (da *cedar) popBlock(bi int, head_in *int, last bool) { 107 | if last { 108 | *head_in = 0 109 | } else { 110 | b := &da.Blocks[bi] 111 | da.Blocks[b.Prev].Next = b.Next 112 | da.Blocks[b.Next].Prev = b.Prev 113 | if bi == *head_in { 114 | *head_in = b.Next 115 | } 116 | } 117 | } 118 | 119 | func (da *cedar) pushBlock(bi int, head_out *int, empty bool) { 120 | b := &da.Blocks[bi] 121 | if empty { 122 | *head_out, b.Prev, b.Next = bi, bi, bi 123 | } else { 124 | tail_out := &da.Blocks[*head_out].Prev 125 | b.Prev = *tail_out 126 | b.Next = *head_out 127 | *head_out, *tail_out, da.Blocks[*tail_out].Next = bi, bi, bi 128 | } 129 | } 130 | 131 | func (da *cedar) addBlock() int { 132 | if da.Size == da.Capacity { 133 | da.Capacity *= 2 134 | 135 | oldArray := da.Array 136 | da.Array = make([]node, da.Capacity) 137 | copy(da.Array, oldArray) 138 | 139 | oldNinfo := da.Ninfos 140 | da.Ninfos = make([]ninfo, da.Capacity) 141 | copy(da.Ninfos, oldNinfo) 142 | 143 | oldBlock := da.Blocks 144 | da.Blocks = make([]block, da.Capacity>>8) 145 | copy(da.Blocks, oldBlock) 146 | } 147 | 148 | da.Blocks[da.Size>>8].init() 149 | da.Blocks[da.Size>>8].Ehead = da.Size 150 | 151 | da.Array[da.Size] = node{-(da.Size + 255), -(da.Size + 1)} 152 | for i := da.Size + 1; i < da.Size+255; i++ { 153 | da.Array[i] = node{-(i - 1), -(i + 1)} 154 | } 155 | da.Array[da.Size+255] = node{-(da.Size + 254), -da.Size} 156 | 157 | da.pushBlock(da.Size>>8, &da.BheadO, da.BheadO == 0) 158 | da.Size += 256 159 | return da.Size>>8 - 1 160 | } 161 | 162 | func (da *cedar) transferBlock(bi int, head_in, head_out *int) { 163 | da.popBlock(bi, head_in, bi == da.Blocks[bi].Next) 164 | da.pushBlock(bi, head_out, *head_out == 0 && da.Blocks[bi].Num != 0) 165 | } 166 | 167 | func (da *cedar) popEnode(base int, label byte, from int) int { 168 | e := base ^ int(label) 169 | if base < 0 { 170 | e = da.findPlace() 171 | } 172 | bi := e >> 8 173 | n := &da.Array[e] 174 | b := &da.Blocks[bi] 175 | b.Num-- 176 | if b.Num == 0 { 177 | if bi != 0 { 178 | da.transferBlock(bi, &da.BheadC, &da.BheadF) 179 | } 180 | } else { 181 | da.Array[-n.Value].Check = n.Check 182 | da.Array[-n.Check].Value = n.Value 183 | if e == b.Ehead { 184 | b.Ehead = -n.Check 185 | } 186 | if bi != 0 && b.Num == 1 && b.Trial != da.MaxTrial { 187 | da.transferBlock(bi, &da.BheadO, &da.BheadC) 188 | } 189 | } 190 | n.Value = ValueLimit 191 | n.Check = from 192 | if base < 0 { 193 | da.Array[from].Value = -(e ^ int(label)) - 1 194 | } 195 | return e 196 | } 197 | 198 | func (da *cedar) pushEnode(e int) { 199 | bi := e >> 8 200 | b := &da.Blocks[bi] 201 | b.Num++ 202 | if b.Num == 1 { 203 | b.Ehead = e 204 | da.Array[e] = node{-e, -e} 205 | if bi != 0 { 206 | da.transferBlock(bi, &da.BheadF, &da.BheadC) 207 | } 208 | } else { 209 | prev := b.Ehead 210 | next := -da.Array[prev].Check 211 | da.Array[e] = node{-prev, -next} 212 | da.Array[prev].Check = -e 213 | da.Array[next].Value = -e 214 | if b.Num == 2 || b.Trial == da.MaxTrial { 215 | if bi != 0 { 216 | da.transferBlock(bi, &da.BheadC, &da.BheadO) 217 | } 218 | } 219 | b.Trial = 0 220 | } 221 | if b.Reject < da.Reject[b.Num] { 222 | b.Reject = da.Reject[b.Num] 223 | } 224 | da.Ninfos[e] = ninfo{} 225 | } 226 | 227 | // hasChild: wherether the `from` node has children 228 | func (da *cedar) pushSibling(from, base int, label byte, hasChild bool) { 229 | c := &da.Ninfos[from].Child 230 | keepOrder := *c == 0 231 | if da.Ordered { 232 | keepOrder = label > *c 233 | } 234 | if hasChild && keepOrder { 235 | c = &da.Ninfos[base^int(*c)].Sibling 236 | for da.Ordered && *c != 0 && *c < label { 237 | c = &da.Ninfos[base^int(*c)].Sibling 238 | } 239 | } 240 | da.Ninfos[base^int(label)].Sibling = *c 241 | *c = label 242 | } 243 | 244 | func (da *cedar) popSibling(from, base int, label byte) { 245 | c := &da.Ninfos[from].Child 246 | for *c != label { 247 | c = &da.Ninfos[base^int(*c)].Sibling 248 | } 249 | *c = da.Ninfos[base^int(*c)].Sibling 250 | } 251 | 252 | func (da *cedar) consult(base_n, base_p int, c_n, c_p byte) bool { 253 | c_n = da.Ninfos[base_n^int(c_n)].Sibling 254 | c_p = da.Ninfos[base_p^int(c_p)].Sibling 255 | for c_n != 0 && c_p != 0 { 256 | c_n = da.Ninfos[base_n^int(c_n)].Sibling 257 | c_p = da.Ninfos[base_p^int(c_p)].Sibling 258 | } 259 | return c_p != 0 260 | } 261 | 262 | func (da *cedar) setChild(base int, c byte, label byte, flag bool) []byte { 263 | child := make([]byte, 0, 257) 264 | if c == 0 { 265 | child = append(child, c) 266 | c = da.Ninfos[base^int(c)].Sibling 267 | } 268 | if da.Ordered { 269 | for c != 0 && c <= label { 270 | child = append(child, c) 271 | c = da.Ninfos[base^int(c)].Sibling 272 | } 273 | } 274 | if flag { 275 | child = append(child, label) 276 | } 277 | for c != 0 { 278 | child = append(child, c) 279 | c = da.Ninfos[base^int(c)].Sibling 280 | } 281 | return child 282 | } 283 | 284 | func (da *cedar) findPlace() int { 285 | if da.BheadC != 0 { 286 | return da.Blocks[da.BheadC].Ehead 287 | } 288 | if da.BheadO != 0 { 289 | return da.Blocks[da.BheadO].Ehead 290 | } 291 | return da.addBlock() << 8 292 | } 293 | 294 | func (da *cedar) findPlaces(child []byte) int { 295 | bi := da.BheadO 296 | if bi != 0 { 297 | bz := da.Blocks[da.BheadO].Prev 298 | nc := len(child) 299 | for { 300 | b := &da.Blocks[bi] 301 | if b.Num >= nc && nc < b.Reject { 302 | for e := b.Ehead; ; { 303 | base := e ^ int(child[0]) 304 | for i := 0; da.Array[base^int(child[i])].Check < 0; i++ { 305 | if i == len(child)-1 { 306 | b.Ehead = e 307 | return e 308 | } 309 | } 310 | e = -da.Array[e].Check 311 | if e == b.Ehead { 312 | break 313 | } 314 | } 315 | } 316 | b.Reject = nc 317 | if b.Reject < da.Reject[b.Num] { 318 | da.Reject[b.Num] = b.Reject 319 | } 320 | bi_ := b.Next 321 | b.Trial++ 322 | if b.Trial == da.MaxTrial { 323 | da.transferBlock(bi, &da.BheadO, &da.BheadC) 324 | } 325 | if bi == bz { 326 | break 327 | } 328 | bi = bi_ 329 | } 330 | } 331 | return da.addBlock() << 8 332 | } 333 | 334 | func (da *cedar) resolve(from_n, base_n int, label_n byte) int { 335 | to_pn := base_n ^ int(label_n) 336 | from_p := da.Array[to_pn].Check 337 | base_p := da.Array[from_p].base() 338 | 339 | flag := da.consult(base_n, base_p, da.Ninfos[from_n].Child, da.Ninfos[from_p].Child) 340 | var children []byte 341 | if flag { 342 | children = da.setChild(base_n, da.Ninfos[from_n].Child, label_n, true) 343 | } else { 344 | children = da.setChild(base_p, da.Ninfos[from_p].Child, 255, false) 345 | } 346 | var base int 347 | if len(children) == 1 { 348 | base = da.findPlace() 349 | } else { 350 | base = da.findPlaces(children) 351 | } 352 | base ^= int(children[0]) 353 | var from int 354 | var base_ int 355 | if flag { 356 | from = from_n 357 | base_ = base_n 358 | } else { 359 | from = from_p 360 | base_ = base_p 361 | } 362 | if flag && children[0] == label_n { 363 | da.Ninfos[from].Child = label_n 364 | } 365 | da.Array[from].Value = -base - 1 366 | for i := 0; i < len(children); i++ { 367 | to := da.popEnode(base, children[i], from) 368 | to_ := base_ ^ int(children[i]) 369 | if i == len(children)-1 { 370 | da.Ninfos[to].Sibling = 0 371 | } else { 372 | da.Ninfos[to].Sibling = children[i+1] 373 | } 374 | if flag && to_ == to_pn { // new node has no child 375 | continue 376 | } 377 | n := &da.Array[to] 378 | n_ := &da.Array[to_] 379 | n.Value = n_.Value 380 | if n.Value < 0 && children[i] != 0 { 381 | // this node has children, fix their check 382 | c := da.Ninfos[to_].Child 383 | da.Ninfos[to].Child = c 384 | da.Array[n.base()^int(c)].Check = to 385 | c = da.Ninfos[n.base()^int(c)].Sibling 386 | for c != 0 { 387 | da.Array[n.base()^int(c)].Check = to 388 | c = da.Ninfos[n.base()^int(c)].Sibling 389 | } 390 | } 391 | if !flag && to_ == from_n { // parent node moved 392 | from_n = to 393 | } 394 | if !flag && to_ == to_pn { 395 | da.pushSibling(from_n, to_pn^int(label_n), label_n, true) 396 | da.Ninfos[to_].Child = 0 397 | n_.Value = ValueLimit 398 | n_.Check = from_n 399 | } else { 400 | da.pushEnode(to_) 401 | } 402 | } 403 | if flag { 404 | return base ^ int(label_n) 405 | } 406 | return to_pn 407 | } 408 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/doc.go: -------------------------------------------------------------------------------- 1 | // Package cedar-go implements double-array trie. 2 | // 3 | // It is a golang port of cedar (http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/cedar) which is written in C++ by Naoki Yoshinaga. 4 | // Currently cedar-go implements the `reduced` verion of cedar. 5 | // This package is not thread safe if there is one goroutine doing 6 | // insertions or deletions. 7 | // 8 | // Note 9 | // 10 | // key must be `[]byte` without zero items, 11 | // while value must be integer in the range [0, 2<<63-2] or [0, 2<<31-2] depends on the platform. 12 | package cedar 13 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/errors.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrInvalidDataType = errors.New("cedar: invalid datatype") 7 | ErrInvalidValue = errors.New("cedar: invalid value") 8 | ErrInvalidKey = errors.New("cedar: invalid key") 9 | ErrNoPath = errors.New("cedar: no path") 10 | ErrNoValue = errors.New("cedar: no value") 11 | ) 12 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/cedar-go/io.go: -------------------------------------------------------------------------------- 1 | package cedar 2 | 3 | import ( 4 | "bufio" 5 | "encoding/gob" 6 | "encoding/json" 7 | "io" 8 | "os" 9 | ) 10 | 11 | // Save saves the cedar to an io.Writer, 12 | // where dataType is either "json" or "gob". 13 | func (da *Cedar) Save(out io.Writer, dataType string) error { 14 | switch dataType { 15 | case "gob", "GOB": 16 | dataEecoder := gob.NewEncoder(out) 17 | return dataEecoder.Encode(da.cedar) 18 | case "json", "JSON": 19 | dataEecoder := json.NewEncoder(out) 20 | return dataEecoder.Encode(da.cedar) 21 | } 22 | return ErrInvalidDataType 23 | } 24 | 25 | // SaveToFile saves the cedar to a file, 26 | // where dataType is either "json" or "gob". 27 | func (da *Cedar) SaveToFile(fileName string, dataType string) error { 28 | file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, 0666) 29 | if err != nil { 30 | return err 31 | } 32 | defer file.Close() 33 | out := bufio.NewWriter(file) 34 | defer out.Flush() 35 | da.Save(out, dataType) 36 | return nil 37 | } 38 | 39 | // Load loads the cedar from an io.Writer, 40 | // where dataType is either "json" or "gob". 41 | func (da *Cedar) Load(in io.Reader, dataType string) error { 42 | switch dataType { 43 | case "gob", "GOB": 44 | dataDecoder := gob.NewDecoder(in) 45 | return dataDecoder.Decode(da.cedar) 46 | case "json", "JSON": 47 | dataDecoder := json.NewDecoder(in) 48 | return dataDecoder.Decode(da.cedar) 49 | } 50 | return ErrInvalidDataType 51 | } 52 | 53 | // LoadFromFile loads the cedar from a file, 54 | // where dataType is either "json" or "gob". 55 | func (da *Cedar) LoadFromFile(fileName string, dataType string) error { 56 | file, err := os.OpenFile(fileName, os.O_RDONLY, 0600) 57 | defer file.Close() 58 | if err != nil { 59 | return err 60 | } 61 | in := bufio.NewReader(file) 62 | return da.Load(in, dataType) 63 | } 64 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/README.md: -------------------------------------------------------------------------------- 1 | sego 2 | ==== 3 | 4 | Go中文分词 5 | 6 | 词典用前缀树实现， 7 | 分词器算法为基于词频的最短路径加动态规划。 8 | 9 | 支持普通和搜索引擎两种分词模式，支持用户词典、词性标注，可运行JSON RPC服务。 10 | 11 | 分词速度单线程2.5MB/s，goroutines并发27MB/s, 处理器32核Xeon。 12 | 13 | # 安装/更新 14 | 15 | ``` 16 | go get -u github.com/huichen/sego 17 | ``` 18 | 19 | # 使用 20 | 21 | 22 | ```go 23 | package main 24 | 25 | import ( 26 | "fmt" 27 | "github.com/huichen/sego" 28 | ) 29 | 30 | func main() { 31 | // 载入词典 32 | var segmenter sego.Segmenter 33 | segmenter.LoadDictionary("github.com/huichen/sego/data/dictionary.txt") 34 | 35 | // 分词 36 | text := []byte("中华人民共和国中央人民政府") 37 | segments := segmenter.Segment(text) 38 | 39 | // 处理分词结果 40 | // 支持普通模式和搜索模式两种分词，见代码中SegmentsToString函数的注释。 41 | fmt.Println(sego.SegmentsToString(segments, false)) 42 | } 43 | ``` 44 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/dictionary.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import "github.com/adamzy/cedar-go" 4 | 5 | // Dictionary结构体实现了一个字串前缀树，一个分词可能出现在叶子节点也有可能出现在非叶节点 6 | type Dictionary struct { 7 | trie *cedar.Cedar // Cedar 前缀树 8 | maxTokenLength int // 词典中最长的分词 9 | tokens []Token // 词典中所有的分词，方便遍历 10 | totalFrequency int64 // 词典中所有分词的频率之和 11 | } 12 | 13 | func NewDictionary() *Dictionary { 14 | return &Dictionary{trie: cedar.New()} 15 | } 16 | 17 | // 词典中最长的分词 18 | func (dict *Dictionary) MaxTokenLength() int { 19 | return dict.maxTokenLength 20 | } 21 | 22 | // 词典中分词数目 23 | func (dict *Dictionary) NumTokens() int { 24 | return len(dict.tokens) 25 | } 26 | 27 | // 词典中所有分词的频率之和 28 | func (dict *Dictionary) TotalFrequency() int64 { 29 | return dict.totalFrequency 30 | } 31 | 32 | // 向词典中加入一个分词 33 | func (dict *Dictionary) addToken(token Token) { 34 | bytes := textSliceToBytes(token.text) 35 | _, err := dict.trie.Get(bytes) 36 | if err == nil { 37 | return 38 | } 39 | 40 | dict.trie.Insert(bytes, dict.NumTokens()) 41 | dict.tokens = append(dict.tokens, token) 42 | dict.totalFrequency += int64(token.frequency) 43 | if len(token.text) > dict.maxTokenLength { 44 | dict.maxTokenLength = len(token.text) 45 | } 46 | } 47 | 48 | // 在词典中查找和字元组words可以前缀匹配的所有分词 49 | // 返回值为找到的分词数 50 | func (dict *Dictionary) lookupTokens(words []Text, tokens []*Token) (numOfTokens int) { 51 | var id, value int 52 | var err error 53 | for _, word := range words { 54 | id, err = dict.trie.Jump(word, id) 55 | if err != nil { 56 | break 57 | } 58 | value, err = dict.trie.Value(id) 59 | if err == nil { 60 | tokens[numOfTokens] = &dict.tokens[value] 61 | numOfTokens++ 62 | } 63 | } 64 | return 65 | } 66 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2013 Hui Chen 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/segment.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 文本中的一个分词 4 | type Segment struct { 5 | // 分词在文本中的起始字节位置 6 | start int 7 | 8 | // 分词在文本中的结束字节位置（不包括该位置） 9 | end int 10 | 11 | // 分词信息 12 | token *Token 13 | } 14 | 15 | // 返回分词在文本中的起始字节位置 16 | func (s *Segment) Start() int { 17 | return s.start 18 | } 19 | 20 | // 返回分词在文本中的结束字节位置（不包括该位置） 21 | func (s *Segment) End() int { 22 | return s.end 23 | } 24 | 25 | // 返回分词信息 26 | func (s *Segment) Token() *Token { 27 | return s.token 28 | } 29 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/segmenter.go: -------------------------------------------------------------------------------- 1 | //Go中文分词 2 | package sego 3 | 4 | import ( 5 | "bufio" 6 | "fmt" 7 | "log" 8 | "math" 9 | "os" 10 | "strconv" 11 | "strings" 12 | "unicode" 13 | "unicode/utf8" 14 | ) 15 | 16 | const ( 17 | minTokenFrequency = 2 // 仅从字典文件中读取大于等于此频率的分词 18 | ) 19 | 20 | // 分词器结构体 21 | type Segmenter struct { 22 | dict *Dictionary 23 | } 24 | 25 | // 该结构体用于记录Viterbi算法中某字元处的向前分词跳转信息 26 | type jumper struct { 27 | minDistance float32 28 | token *Token 29 | } 30 | 31 | // 返回分词器使用的词典 32 | func (seg *Segmenter) Dictionary() *Dictionary { 33 | return seg.dict 34 | } 35 | 36 | // 从文件中载入词典 37 | // 38 | // 可以载入多个词典文件，文件名用","分隔，排在前面的词典优先载入分词，比如 39 | // "用户词典.txt,通用词典.txt" 40 | // 当一个分词既出现在用户词典也出现在通用词典中，则优先使用用户词典。 41 | // 42 | // 词典的格式为（每个分词一行）： 43 | // 分词文本频率词性 44 | func (seg *Segmenter) LoadDictionary(files string) { 45 | seg.dict = NewDictionary() 46 | for _, file := range strings.Split(files, ",") { 47 | log.Printf("载入sego词典 %s", file) 48 | dictFile, err := os.Open(file) 49 | defer dictFile.Close() 50 | if err != nil { 51 | log.Fatalf("无法载入字典文件 \"%s\" \n", file) 52 | } 53 | 54 | reader := bufio.NewReader(dictFile) 55 | var text string 56 | var freqText string 57 | var frequency int 58 | var pos string 59 | 60 | // 逐行读入分词 61 | for { 62 | size, _ := fmt.Fscanln(reader, &text, &freqText, &pos) 63 | 64 | if size == 0 { 65 | // 文件结束 66 | break 67 | } else if size < 2 { 68 | // 无效行 69 | continue 70 | } else if size == 2 { 71 | // 没有词性标注时设为空字符串 72 | pos = "" 73 | } 74 | 75 | // 解析词频 76 | var err error 77 | frequency, err = strconv.Atoi(freqText) 78 | if err != nil { 79 | continue 80 | } 81 | 82 | // 过滤频率太小的词 83 | if frequency < minTokenFrequency { 84 | continue 85 | } 86 | 87 | // 将分词添加到字典中 88 | words := splitTextToWords([]byte(text)) 89 | token := Token{text: words, frequency: frequency, pos: pos} 90 | seg.dict.addToken(token) 91 | } 92 | } 93 | 94 | // 计算每个分词的路径值，路径值含义见Token结构体的注释 95 | logTotalFrequency := float32(math.Log2(float64(seg.dict.totalFrequency))) 96 | for i := range seg.dict.tokens { 97 | token := &seg.dict.tokens[i] 98 | token.distance = logTotalFrequency - float32(math.Log2(float64(token.frequency))) 99 | } 100 | 101 | // 对每个分词进行细致划分，用于搜索引擎模式，该模式用法见Token结构体的注释。 102 | for i := range seg.dict.tokens { 103 | token := &seg.dict.tokens[i] 104 | segments := seg.segmentWords(token.text, true) 105 | 106 | // 计算需要添加的子分词数目 107 | numTokensToAdd := 0 108 | for iToken := 0; iToken < len(segments); iToken++ { 109 | if len(segments[iToken].token.text) > 1 { 110 | // 略去字元长度为一的分词 111 | // TODO: 这值得进一步推敲，特别是当字典中有英文复合词的时候 112 | numTokensToAdd++ 113 | } 114 | } 115 | token.segments = make([]*Segment, numTokensToAdd) 116 | 117 | // 添加子分词 118 | iSegmentsToAdd := 0 119 | for iToken := 0; iToken < len(segments); iToken++ { 120 | if len(segments[iToken].token.text) > 1 { 121 | token.segments[iSegmentsToAdd] = &segments[iToken] 122 | iSegmentsToAdd++ 123 | } 124 | } 125 | } 126 | 127 | log.Println("sego词典载入完毕") 128 | } 129 | 130 | // 对文本分词 131 | // 132 | // 输入参数： 133 | // bytes UTF8文本的字节数组 134 | // 135 | // 输出： 136 | // []Segment 划分的分词 137 | func (seg *Segmenter) Segment(bytes []byte) []Segment { 138 | return seg.internalSegment(bytes, false) 139 | } 140 | 141 | func (seg *Segmenter) internalSegment(bytes []byte, searchMode bool) []Segment { 142 | // 处理特殊情况 143 | if len(bytes) == 0 { 144 | return []Segment{} 145 | } 146 | 147 | // 划分字元 148 | text := splitTextToWords(bytes) 149 | 150 | return seg.segmentWords(text, searchMode) 151 | } 152 | 153 | func (seg *Segmenter) segmentWords(text []Text, searchMode bool) []Segment { 154 | // 搜索模式下该分词已无继续划分可能的情况 155 | if searchMode && len(text) == 1 { 156 | return []Segment{} 157 | } 158 | 159 | // jumpers定义了每个字元处的向前跳转信息，包括这个跳转对应的分词， 160 | // 以及从文本段开始到该字元的最短路径值 161 | jumpers := make([]jumper, len(text)) 162 | 163 | tokens := make([]*Token, seg.dict.maxTokenLength) 164 | for current := 0; current < len(text); current++ { 165 | // 找到前一个字元处的最短路径，以便计算后续路径值 166 | var baseDistance float32 167 | if current == 0 { 168 | // 当本字元在文本首部时，基础距离应该是零 169 | baseDistance = 0 170 | } else { 171 | baseDistance = jumpers[current-1].minDistance 172 | } 173 | 174 | // 寻找所有以当前字元开头的分词 175 | numTokens := seg.dict.lookupTokens( 176 | text[current:minInt(current+seg.dict.maxTokenLength, len(text))], tokens) 177 | 178 | // 对所有可能的分词，更新分词结束字元处的跳转信息 179 | for iToken := 0; iToken < numTokens; iToken++ { 180 | location := current + len(tokens[iToken].text) - 1 181 | if !searchMode || current != 0 || location != len(text)-1 { 182 | updateJumper(&jumpers[location], baseDistance, tokens[iToken]) 183 | } 184 | } 185 | 186 | // 当前字元没有对应分词时补加一个伪分词 187 | if numTokens == 0 || len(tokens[0].text) > 1 { 188 | updateJumper(&jumpers[current], baseDistance, 189 | &Token{text: []Text{text[current]}, frequency: 1, distance: 32, pos: "x"}) 190 | } 191 | } 192 | 193 | // 从后向前扫描第一遍得到需要添加的分词数目 194 | numSeg := 0 195 | for index := len(text) - 1; index >= 0; { 196 | location := index - len(jumpers[index].token.text) + 1 197 | numSeg++ 198 | index = location - 1 199 | } 200 | 201 | // 从后向前扫描第二遍添加分词到最终结果 202 | outputSegments := make([]Segment, numSeg) 203 | for index := len(text) - 1; index >= 0; { 204 | location := index - len(jumpers[index].token.text) + 1 205 | numSeg-- 206 | outputSegments[numSeg].token = jumpers[index].token 207 | index = location - 1 208 | } 209 | 210 | // 计算各个分词的字节位置 211 | bytePosition := 0 212 | for iSeg := 0; iSeg < len(outputSegments); iSeg++ { 213 | outputSegments[iSeg].start = bytePosition 214 | bytePosition += textSliceByteLength(outputSegments[iSeg].token.text) 215 | outputSegments[iSeg].end = bytePosition 216 | } 217 | return outputSegments 218 | } 219 | 220 | // 更新跳转信息: 221 | // 1. 当该位置从未被访问过时(jumper.minDistance为零的情况)，或者 222 | // 2. 当该位置的当前最短路径大于新的最短路径时 223 | // 将当前位置的最短路径值更新为baseDistance加上新分词的概率 224 | func updateJumper(jumper *jumper, baseDistance float32, token *Token) { 225 | newDistance := baseDistance + token.distance 226 | if jumper.minDistance == 0 || jumper.minDistance > newDistance { 227 | jumper.minDistance = newDistance 228 | jumper.token = token 229 | } 230 | } 231 | 232 | // 取两整数较小值 233 | func minInt(a, b int) int { 234 | if a > b { 235 | return b 236 | } 237 | return a 238 | } 239 | 240 | // 取两整数较大值 241 | func maxInt(a, b int) int { 242 | if a > b { 243 | return a 244 | } 245 | return b 246 | } 247 | 248 | // 将文本划分成字元 249 | func splitTextToWords(text Text) []Text { 250 | output := make([]Text, 0, len(text)/3) 251 | current := 0 252 | inAlphanumeric := true 253 | alphanumericStart := 0 254 | for current < len(text) { 255 | r, size := utf8.DecodeRune(text[current:]) 256 | if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) { 257 | // 当前是拉丁字母或数字（非中日韩文字） 258 | if !inAlphanumeric { 259 | alphanumericStart = current 260 | inAlphanumeric = true 261 | } 262 | } else { 263 | if inAlphanumeric { 264 | inAlphanumeric = false 265 | if current != 0 { 266 | output = append(output, toLower(text[alphanumericStart:current])) 267 | } 268 | } 269 | output = append(output, text[current:current+size]) 270 | } 271 | current += size 272 | } 273 | 274 | // 处理最后一个字元是英文的情况 275 | if inAlphanumeric { 276 | if current != 0 { 277 | output = append(output, toLower(text[alphanumericStart:current])) 278 | } 279 | } 280 | 281 | return output 282 | } 283 | 284 | // 将英文词转化为小写 285 | func toLower(text []byte) []byte { 286 | output := make([]byte, len(text)) 287 | for i, t := range text { 288 | if t >= 'A' && t <= 'Z' { 289 | output[i] = t - 'A' + 'a' 290 | } else { 291 | output[i] = t 292 | } 293 | } 294 | return output 295 | } 296 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/test_utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func expect(t *testing.T, expect string, actual interface{}) { 9 | actualString := fmt.Sprint(actual) 10 | if expect != actualString { 11 | t.Errorf("期待值=\"%s\", 实际=\"%s\"", expect, actualString) 12 | } 13 | } 14 | 15 | func printTokens(tokens []*Token, numTokens int) (output string) { 16 | for iToken := 0; iToken < numTokens; iToken++ { 17 | for _, word := range tokens[iToken].text { 18 | output += fmt.Sprint(string(word)) 19 | } 20 | output += " " 21 | } 22 | return 23 | } 24 | 25 | func toWords(strings ...string) []Text { 26 | words := []Text{} 27 | for _, s := range strings { 28 | words = append(words, []byte(s)) 29 | } 30 | return words 31 | } 32 | 33 | func bytesToString(bytes []Text) (output string) { 34 | for _, b := range bytes { 35 | output += (string(b) + "/") 36 | } 37 | return 38 | } 39 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/token.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | // 字串类型，可以用来表达 4 | // 1. 一个字元，比如"中"又如"国", 英文的一个字元是一个词 5 | // 2. 一个分词，比如"中国"又如"人口" 6 | // 3. 一段文字，比如"中国有十三亿人口" 7 | type Text []byte 8 | 9 | // 一个分词 10 | type Token struct { 11 | // 分词的字串，这实际上是个字元数组 12 | text []Text 13 | 14 | // 分词在语料库中的词频 15 | frequency int 16 | 17 | // log2(总词频/该分词词频)，这相当于log2(1/p(分词))，用作动态规划中 18 | // 该分词的路径长度。求解prod(p(分词))的最大值相当于求解 19 | // sum(distance(分词))的最小值，这就是“最短路径”的来历。 20 | distance float32 21 | 22 | // 词性标注 23 | pos string 24 | 25 | // 该分词文本的进一步分词划分，见Segments函数注释。 26 | segments []*Segment 27 | } 28 | 29 | // 返回分词文本 30 | func (token *Token) Text() string { 31 | return textSliceToString(token.text) 32 | } 33 | 34 | // 返回分词在语料库中的词频 35 | func (token *Token) Frequency() int { 36 | return token.frequency 37 | } 38 | 39 | // 返回分词词性标注 40 | func (token *Token) Pos() string { 41 | return token.pos 42 | } 43 | 44 | // 该分词文本的进一步分词划分，比如"中华人民共和国中央人民政府"这个分词 45 | // 有两个子分词"中华人民共和国"和"中央人民政府"。子分词也可以进一步有子分词 46 | // 形成一个树结构，遍历这个树就可以得到该分词的所有细致分词划分，这主要 47 | // 用于搜索引擎对一段文本进行全文搜索。 48 | func (token *Token) Segments() []*Segment { 49 | return token.segments 50 | } 51 | -------------------------------------------------------------------------------- /vendor/github.com/adamzy/sego/utils.go: -------------------------------------------------------------------------------- 1 | package sego 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | ) 7 | 8 | // 输出分词结果为字符串 9 | // 10 | // 有两种输出模式，以"中华人民共和国"为例 11 | // 12 | // 普通模式（searchMode=false）输出一个分词"中华人民共和国/ns " 13 | // 搜索模式（searchMode=true）输出普通模式的再细致切分： 14 | // "中华/nz 人民/n 共和/nz 共和国/ns 人民共和国/nt 中华人民共和国/ns " 15 | // 16 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。 17 | func SegmentsToString(segs []Segment, searchMode bool) (output string) { 18 | if searchMode { 19 | for _, seg := range segs { 20 | output += tokenToString(seg.token) 21 | } 22 | } else { 23 | for _, seg := range segs { 24 | output += fmt.Sprintf( 25 | "%s/%s ", textSliceToString(seg.token.text), seg.token.pos) 26 | } 27 | } 28 | return 29 | } 30 | 31 | func tokenToString(token *Token) (output string) { 32 | for _, s := range token.segments { 33 | output += tokenToString(s.token) 34 | } 35 | output += fmt.Sprintf("%s/%s ", textSliceToString(token.text), token.pos) 36 | return 37 | } 38 | 39 | // 输出分词结果到一个字符串slice 40 | // 41 | // 有两种输出模式，以"中华人民共和国"为例 42 | // 43 | // 普通模式（searchMode=false）输出一个分词"[中华人民共和国]" 44 | // 搜索模式（searchMode=true）输出普通模式的再细致切分： 45 | // "[中华人民共和共和国人民共和国中华人民共和国]" 46 | // 47 | // 搜索模式主要用于给搜索引擎提供尽可能多的关键字，详情请见Token结构体的注释。 48 | 49 | func SegmentsToSlice(segs []Segment, searchMode bool) (output []string) { 50 | if searchMode { 51 | for _, seg := range segs { 52 | output = append(output, tokenToSlice(seg.token)...) 53 | } 54 | } else { 55 | for _, seg := range segs { 56 | output = append(output, seg.token.Text()) 57 | } 58 | } 59 | return 60 | } 61 | 62 | func tokenToSlice(token *Token) (output []string) { 63 | for _, s := range token.segments { 64 | output = append(output, tokenToSlice(s.token)...) 65 | } 66 | output = append(output, textSliceToString(token.text)) 67 | return output 68 | } 69 | 70 | // 将多个字元拼接一个字符串输出 71 | func textSliceToString(text []Text) string { 72 | var output string 73 | for _, word := range text { 74 | output += string(word) 75 | } 76 | return output 77 | } 78 | 79 | // 返回多个字元的字节总长度 80 | func textSliceByteLength(text []Text) (length int) { 81 | for _, word := range text { 82 | length += len(word) 83 | } 84 | return 85 | } 86 | 87 | func textSliceToBytes(text []Text) []byte { 88 | var buf bytes.Buffer 89 | for _, word := range text { 90 | buf.Write(word) 91 | } 92 | return buf.Bytes() 93 | } 94 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | end_of_line = lf 9 | insert_final_newline = true 10 | charset = utf-8 11 | 12 | # html 13 | [*.{htm,html,js,css}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | # 配置文件 18 | [*.{yml,yaml,json}] 19 | indent_style = space 20 | indent_size = 2 21 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | # vim 27 | *.swp 28 | 29 | # osx 30 | .DS_Store 31 | 32 | .vscode 33 | .idea 34 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 caixw 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/README.md: -------------------------------------------------------------------------------- 1 | assert 2 | [![Go](https://github.com/issue9/assert/workflows/Go/badge.svg)](https://github.com/issue9/assert/actions?query=workflow%3AGo) 3 | [![codecov](https://codecov.io/gh/issue9/assert/branch/master/graph/badge.svg)](https://codecov.io/gh/issue9/assert) 4 | [![license](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://opensource.org/licenses/MIT) 5 | ====== 6 | 7 | assert 包是对 testing 的一个简单扩展，提供的一系列的断言函数， 8 | 方便在测试函数中使用： 9 | 10 | ```go 11 | func TestA(t *testing.T) { 12 | v := true 13 | assert.True(v) 14 | 15 | a := assert.New(t) 16 | a.True(v) 17 | } 18 | 19 | // 也可以对 testing.B 使用 20 | func Benchmark1(b *testing.B) { 21 | a := assert.New(b) 22 | v := false 23 | a.True(v) 24 | for(i:=0; i -1 { 63 | funcName = funcName[:index] 64 | info = funcName + "(" + basename + ":" + strconv.Itoa(line) + ")" 65 | continue 66 | } 67 | 68 | info = funcName + "(" + basename + ":" + strconv.Itoa(line) + ")" 69 | break 70 | } 71 | 72 | if info == "" { 73 | info = "<无法获取调用者信息>" 74 | } 75 | return info 76 | } 77 | 78 | // 格式化错误提示信息 79 | // 80 | // msg1 中的所有参数将依次被传递给 fmt.Sprintf() 函数， 81 | // 所以 msg1[0] 必须可以转换成 string(如:string, []byte, []rune, fmt.Stringer) 82 | // 83 | // msg2 参数格式与 msg1 完全相同，在 msg1 为空的情况下，会使用 msg2 的内容， 84 | // 否则 msg2 不会启作用。 85 | func formatMessage(msg1 []interface{}, msg2 []interface{}) string { 86 | msg := msg1 87 | if len(msg) == 0 { 88 | msg = msg2 89 | } 90 | 91 | if len(msg) == 0 { 92 | return "<未提供任何错误信息>" 93 | } 94 | 95 | if len(msg) == 1 { 96 | return fmt.Sprint(msg[0]) 97 | } 98 | 99 | format := "" 100 | switch v := msg[0].(type) { 101 | case []byte: 102 | format = string(v) 103 | case []rune: 104 | format = string(v) 105 | case string: 106 | format = v 107 | case fmt.Stringer: 108 | format = v.String() 109 | default: 110 | return fmt.Sprintln(msg...) 111 | } 112 | 113 | return fmt.Sprintf(format, msg[1:]...) 114 | } 115 | 116 | // 当 expr 条件不成立时 117 | // 118 | // expr 返回结果值为bool类型的表达式； 119 | // msg1,msg2 输出的错误信息，之所以提供两组信息，是方便在用户没有提供的情况下， 120 | // 可以使用系统内部提供的信息，优先使用 msg1 中的信息，若不存在，则使用 msg2 的内容。 121 | func assert(t testing.TB, expr bool, msg1 []interface{}, msg2 []interface{}) { 122 | if !expr { 123 | t.Error(formatMessage(msg1, msg2) + "@" + getCallerInfo()) 124 | } 125 | } 126 | 127 | // True 断言表达式 expr 为 true 128 | // 129 | // args 对应 fmt.Printf() 函数中的参数，其中 args[0] 对应第一个参数 format，依次类推， 130 | // 具体可参数 formatMessage() 函数的介绍。其它断言函数的 args 参数，功能与此相同。 131 | func True(t testing.TB, expr bool, args ...interface{}) { 132 | assert(t, expr, args, []interface{}{"True 失败，实际值为 %#v", expr}) 133 | } 134 | 135 | // False 断言表达式 expr 为 false 136 | func False(t testing.TB, expr bool, args ...interface{}) { 137 | assert(t, !expr, args, []interface{}{"False 失败，实际值为 %#v", expr}) 138 | } 139 | 140 | // Nil 断言表达式 expr 为 nil 141 | func Nil(t testing.TB, expr interface{}, args ...interface{}) { 142 | assert(t, IsNil(expr), args, []interface{}{"Nil 失败，实际值为 %#v", expr}) 143 | } 144 | 145 | // NotNil 断言表达式 expr 为非 nil 值 146 | func NotNil(t testing.TB, expr interface{}, args ...interface{}) { 147 | assert(t, !IsNil(expr), args, []interface{}{"NotNil 失败，实际值为 %#v", expr}) 148 | } 149 | 150 | // Equal 断言 v1 与 v2 两个值相等 151 | func Equal(t testing.TB, v1, v2 interface{}, args ...interface{}) { 152 | assert(t, IsEqual(v1, v2), args, []interface{}{"Equal 失败，实际值为\nv1=%#v\nv2=%#v", v1, v2}) 153 | } 154 | 155 | // NotEqual 断言 v1 与 v2 两个值不相等 156 | func NotEqual(t testing.TB, v1, v2 interface{}, args ...interface{}) { 157 | assert(t, !IsEqual(v1, v2), args, []interface{}{"NotEqual 失败，实际值为\nv1=%#v\nv2=%#v", v1, v2}) 158 | } 159 | 160 | // Empty 断言 expr 的值为空(nil,"",0,false)，否则输出错误信息 161 | func Empty(t testing.TB, expr interface{}, args ...interface{}) { 162 | assert(t, IsEmpty(expr), args, []interface{}{"Empty 失败，实际值为 %#v", expr}) 163 | } 164 | 165 | // NotEmpty 断言 expr 的值为非空(除 nil,"",0,false之外)，否则输出错误信息 166 | func NotEmpty(t testing.TB, expr interface{}, args ...interface{}) { 167 | assert(t, !IsEmpty(expr), args, []interface{}{"NotEmpty 失败，实际值为 %#v", expr}) 168 | } 169 | 170 | // Error 断言有错误发生 171 | // 172 | // 传递未初始化的 error 值(var err error = nil)，将断言失败 173 | func Error(t testing.TB, expr interface{}, args ...interface{}) { 174 | if IsNil(expr) { // 空值，必定没有错误 175 | assert(t, false, args, []interface{}{"Error 失败，实际值为 Nil：[%T]", expr}) 176 | return 177 | } 178 | 179 | _, ok := expr.(error) 180 | assert(t, ok, args, []interface{}{"Error 失败，实际类型为[%T]", expr}) 181 | } 182 | 183 | // ErrorString 断言有错误发生且错误信息中包含指定的字符串 str 184 | // 185 | // 传递未初始化的 error 值(var err error = nil)，将断言失败 186 | func ErrorString(t testing.TB, expr interface{}, str string, args ...interface{}) { 187 | if IsNil(expr) { // 空值，必定没有错误 188 | assert(t, false, args, []interface{}{"ErrorString 失败，实际值为 Nil：[%T]", expr}) 189 | return 190 | } 191 | 192 | if err, ok := expr.(error); ok { 193 | index := strings.Index(err.Error(), str) 194 | assert(t, index >= 0, args, []interface{}{"Error 失败，实际类型为[%T]", expr}) 195 | } 196 | } 197 | 198 | // ErrorType 断言有错误发生且错误的类型与 typ 的类型相同 199 | // 200 | // 传递未初始化的 error 值(var err error = nil)，将断言失败。 201 | // 202 | // 仅对 expr 是否与 typ 为同一类型作简单判断，如果要检测是否是包含关系，可以使用 errors.Is 检测。 203 | func ErrorType(t testing.TB, expr interface{}, typ error, args ...interface{}) { 204 | if IsNil(expr) { // 空值，必定没有错误 205 | assert(t, false, args, []interface{}{"ErrorType 失败，实际值为 Nil：[%T]", expr}) 206 | return 207 | } 208 | 209 | if _, ok := expr.(error); !ok { 210 | assert(t, false, args, []interface{}{"ErrorType 失败，实际类型为[%T]，且无法转换成 error 接口", expr}) 211 | return 212 | } 213 | 214 | t1 := reflect.TypeOf(expr) 215 | t2 := reflect.TypeOf(typ) 216 | assert(t, t1 == t2, args, []interface{}{"ErrorType 失败，v1[%v]为一个错误类型，但与v2[%v]的类型不相同", t1, t2}) 217 | } 218 | 219 | // NotError 断言没有错误发生 220 | func NotError(t testing.TB, expr interface{}, args ...interface{}) { 221 | if IsNil(expr) { // 空值必定没有错误 222 | assert(t, true, args, []interface{}{"NotError 失败，实际类型为[%T]", expr}) 223 | return 224 | } 225 | err, ok := expr.(error) 226 | assert(t, !ok, args, []interface{}{"NotError 失败，错误信息为[%v]", err}) 227 | } 228 | 229 | // ErrorIs 断言 expr 为 target 类型 230 | // 231 | // 相当于 True(t, errors.Is(expr, target)) 232 | func ErrorIs(t testing.TB, expr interface{}, target error, args ...interface{}) { 233 | err, ok := expr.(error) 234 | assert(t, ok, args, []interface{}{"ErrorIs 失败，expr 无法转换成 error。"}) 235 | 236 | assert(t, errors.Is(err, target), args, []interface{}{"ErrorIs 失败，expr 不是且不包含 target。"}) 237 | } 238 | 239 | // FileExists 断言文件存在 240 | func FileExists(t testing.TB, path string, args ...interface{}) { 241 | _, err := os.Stat(path) 242 | 243 | if err != nil && !os.IsExist(err) { 244 | assert(t, false, args, []interface{}{"FileExists 失败，且附带以下错误：%v", err}) 245 | } 246 | } 247 | 248 | // FileNotExists 断言文件不存在 249 | func FileNotExists(t testing.TB, path string, args ...interface{}) { 250 | _, err := os.Stat(path) 251 | 252 | if err == nil { 253 | assert(t, false, args, []interface{}{"FileNotExists 失败"}) 254 | } 255 | if os.IsExist(err) { 256 | assert(t, false, args, []interface{}{"FileNotExists 失败，且返回以下错误信息：%v", err}) 257 | } 258 | } 259 | 260 | // Panic 断言函数会发生 panic 261 | func Panic(t testing.TB, fn func(), args ...interface{}) { 262 | has, _ := HasPanic(fn) 263 | assert(t, has, args, []interface{}{"并未发生 panic"}) 264 | } 265 | 266 | // PanicString 断言函数会发生 panic 且 panic 信息中包含指定的字符串内容 267 | func PanicString(t testing.TB, fn func(), str string, args ...interface{}) { 268 | if has, msg := HasPanic(fn); has { 269 | index := strings.Index(fmt.Sprint(msg), str) 270 | assert(t, index >= 0, args, []interface{}{"panic 中并未包含 %s", str}) 271 | return 272 | } 273 | 274 | assert(t, false, args, []interface{}{"并未发生 panic"}) 275 | } 276 | 277 | // PanicType 断言函数会发生 panic 且抛出指定的类型 278 | func PanicType(t testing.TB, fn func(), typ interface{}, args ...interface{}) { 279 | has, msg := HasPanic(fn) 280 | if !has { 281 | return 282 | } 283 | 284 | t1 := reflect.TypeOf(msg) 285 | t2 := reflect.TypeOf(typ) 286 | assert(t, t1 == t2, args, []interface{}{"PanicType 失败，v1[%v]的类型与v2[%v]的类型不相同", t1, t2}) 287 | 288 | } 289 | 290 | // NotPanic 断言函数不会发生 panic 291 | func NotPanic(t testing.TB, fn func(), args ...interface{}) { 292 | has, msg := HasPanic(fn) 293 | assert(t, !has, args, []interface{}{"发生了 panic，其信息为[%v]", msg}) 294 | } 295 | 296 | // Contains 断言 container 包含 item 的或是包含 item 中的所有项 297 | // 298 | // 具体函数说明可参考 IsContains() 299 | func Contains(t testing.TB, container, item interface{}, args ...interface{}) { 300 | assert(t, IsContains(container, item), args, 301 | []interface{}{"container:[%v]并未包含item[%v]", container, item}) 302 | } 303 | 304 | // NotContains 断言 container 不包含 item 的或是不包含 item 中的所有项 305 | func NotContains(t testing.TB, container, item interface{}, args ...interface{}) { 306 | assert(t, !IsContains(container, item), args, 307 | []interface{}{"container:[%v]包含item[%v]", container, item}) 308 | } 309 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/assertion.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | package assert 4 | 5 | import "testing" 6 | 7 | // Assertion 是对 testing.TB 进行了简单的封装。 8 | // 可以以对象的方式调用包中的各个断言函数。 9 | type Assertion struct { 10 | t testing.TB 11 | } 12 | 13 | // New 返回 Assertion 对象。 14 | func New(t testing.TB) *Assertion { 15 | return &Assertion{t: t} 16 | } 17 | 18 | // TB 返回 testing.TB 接口 19 | func (a *Assertion) TB() testing.TB { 20 | return a.t 21 | } 22 | 23 | // True 参照 assert.True() 函数 24 | func (a *Assertion) True(expr bool, msg ...interface{}) *Assertion { 25 | True(a.t, expr, msg...) 26 | return a 27 | } 28 | 29 | // False 参照 assert.False() 函数 30 | func (a *Assertion) False(expr bool, msg ...interface{}) *Assertion { 31 | False(a.t, expr, msg...) 32 | return a 33 | } 34 | 35 | // Nil 参照 assert.Nil() 函数 36 | func (a *Assertion) Nil(expr interface{}, msg ...interface{}) *Assertion { 37 | Nil(a.t, expr, msg...) 38 | return a 39 | } 40 | 41 | // NotNil 参照 assert.NotNil() 函数 42 | func (a *Assertion) NotNil(expr interface{}, msg ...interface{}) *Assertion { 43 | NotNil(a.t, expr, msg...) 44 | return a 45 | } 46 | 47 | // Equal 参照 assert.Equal() 函数 48 | func (a *Assertion) Equal(v1, v2 interface{}, msg ...interface{}) *Assertion { 49 | Equal(a.t, v1, v2, msg...) 50 | return a 51 | } 52 | 53 | // NotEqual 参照 assert.NotEqual() 函数 54 | func (a *Assertion) NotEqual(v1, v2 interface{}, msg ...interface{}) *Assertion { 55 | NotEqual(a.t, v1, v2, msg...) 56 | return a 57 | } 58 | 59 | // Empty 参照 assert.Empty() 函数 60 | func (a *Assertion) Empty(expr interface{}, msg ...interface{}) *Assertion { 61 | Empty(a.t, expr, msg...) 62 | return a 63 | } 64 | 65 | // NotEmpty 参照 assert.NotEmpty() 函数 66 | func (a *Assertion) NotEmpty(expr interface{}, msg ...interface{}) *Assertion { 67 | NotEmpty(a.t, expr, msg...) 68 | return a 69 | } 70 | 71 | // Error 参照 assert.Error() 函数 72 | func (a *Assertion) Error(expr interface{}, msg ...interface{}) *Assertion { 73 | Error(a.t, expr, msg...) 74 | return a 75 | } 76 | 77 | // ErrorString 参照 assert.ErrorString() 函数 78 | func (a *Assertion) ErrorString(expr interface{}, str string, msg ...interface{}) *Assertion { 79 | ErrorString(a.t, expr, str, msg...) 80 | return a 81 | } 82 | 83 | // ErrorType 参照 assert.ErrorType() 函数 84 | func (a *Assertion) ErrorType(expr interface{}, typ error, msg ...interface{}) *Assertion { 85 | ErrorType(a.t, expr, typ, msg...) 86 | return a 87 | } 88 | 89 | // NotError 参照 assert.NotError() 函数 90 | func (a *Assertion) NotError(expr interface{}, msg ...interface{}) *Assertion { 91 | NotError(a.t, expr, msg...) 92 | return a 93 | } 94 | 95 | // ErrorIs 断言 expr 为 target 类型 96 | // 97 | // 相当于 a.True(errors.Is(expr, target)) 98 | func (a *Assertion) ErrorIs(expr interface{}, target error, msg ...interface{}) *Assertion { 99 | ErrorIs(a.t, expr, target, msg...) 100 | return a 101 | } 102 | 103 | // FileExists 参照 assert.FileExists() 函数 104 | func (a *Assertion) FileExists(path string, msg ...interface{}) *Assertion { 105 | FileExists(a.t, path, msg...) 106 | return a 107 | } 108 | 109 | // FileNotExists 参照 assert.FileNotExists() 函数 110 | func (a *Assertion) FileNotExists(path string, msg ...interface{}) *Assertion { 111 | FileNotExists(a.t, path, msg...) 112 | return a 113 | } 114 | 115 | // Panic 参照 assert.Panic() 函数 116 | func (a *Assertion) Panic(fn func(), msg ...interface{}) *Assertion { 117 | Panic(a.t, fn, msg...) 118 | return a 119 | } 120 | 121 | // PanicString 参照 assert.PanicString() 函数 122 | func (a *Assertion) PanicString(fn func(), str string, msg ...interface{}) *Assertion { 123 | PanicString(a.t, fn, str, msg...) 124 | return a 125 | } 126 | 127 | // PanicType 参照 assert.PanicType() 函数 128 | func (a *Assertion) PanicType(fn func(), typ interface{}, msg ...interface{}) *Assertion { 129 | PanicType(a.t, fn, typ, msg...) 130 | return a 131 | } 132 | 133 | // NotPanic 参照 assert.NotPanic() 函数 134 | func (a *Assertion) NotPanic(fn func(), msg ...interface{}) *Assertion { 135 | NotPanic(a.t, fn, msg...) 136 | return a 137 | } 138 | 139 | // Contains 参照 assert.Contains() 函数 140 | func (a *Assertion) Contains(container, item interface{}, msg ...interface{}) *Assertion { 141 | Contains(a.t, container, item, msg...) 142 | return a 143 | } 144 | 145 | // NotContains 参照 assert.NotContains() 函数 146 | func (a *Assertion) NotContains(container, item interface{}, msg ...interface{}) *Assertion { 147 | NotContains(a.t, container, item, msg...) 148 | return a 149 | } 150 | -------------------------------------------------------------------------------- /vendor/github.com/issue9/assert/doc.go: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT 2 | 3 | // Package assert 是对 testing 包的一些简单包装 4 | // 5 | // 提供了两种操作方式：直接调用包函数；或是使用 Assertion 对象。 6 | // 两种方式完全等价，可以根据自己需要，选择一种。 7 | // func TestAssert(t *testing.T) { 8 | // var v interface{} = 5 9 | // 10 | // // 直接调用包函数 11 | // assert.True(t, v == 5, "v的值[%v]不等于5", v) 12 | // assert.Equal(t, 5, v, "v的值[%v]不等于5", v) 13 | // assert.Nil(t, v) 14 | // 15 | // // 以 Assertion 对象方式使用 16 | // a := assert.New(t) 17 | // a.True(v==5, "v的值[%v]不等于5", v) 18 | // a.Equal(5, v, "v的值[%v]不等于5", v) 19 | // a.Nil(v) 20 | // a.TB().Log("success") 21 | // 22 | // // 以函数链的形式调用 Assertion 对象的方法 23 | // a.True(false).Equal(5,6) 24 | // } 25 | // 26 | // // 也可以对 testing.B 使用 27 | // func Benchmark1(b *testing.B) { 28 | // a := assert.New(b) 29 | // a.True(false) 30 | // for(i:=0; i= reflect.Chan && k <= reflect.Slice && v.IsNil() 80 | } 81 | 82 | // IsEqual 判断两个值是否相等。 83 | // 84 | // 除了通过 reflect.DeepEqual() 判断值是否相等之外，一些类似 85 | // 可转换的数值也能正确判断，比如以下值也将会被判断为相等： 86 | // int8(5) == int(5) 87 | // []int{1,2} == []int8{1,2} 88 | // []int{1,2} == [2]int8{1,2} 89 | // []int{1,2} == []float32{1,2} 90 | // map[string]int{"1":"2":2} == map[string]int8{"1":1,"2":2} 91 | // 92 | // // map 的键值不同，即使可相互转换也判断不相等。 93 | // map[int]int{1:1,2:2} != map[int8]int{1:1,2:2} 94 | func IsEqual(v1, v2 interface{}) bool { 95 | if reflect.DeepEqual(v1, v2) { 96 | return true 97 | } 98 | 99 | vv1 := reflect.ValueOf(v1) 100 | vv2 := reflect.ValueOf(v2) 101 | 102 | // NOTE: 这里返回 false，而不是 true 103 | if !vv1.IsValid() || !vv2.IsValid() { 104 | return false 105 | } 106 | 107 | if vv1 == vv2 { 108 | return true 109 | } 110 | 111 | vv1Type := vv1.Type() 112 | vv2Type := vv2.Type() 113 | 114 | // 过滤掉已经在 reflect.DeepEqual() 进行处理的类型 115 | switch vv1Type.Kind() { 116 | case reflect.Struct, reflect.Ptr, reflect.Func, reflect.Interface: 117 | return false 118 | case reflect.Slice, reflect.Array: 119 | // vv2.Kind() 与 vv1 的不相同 120 | if vv2.Kind() != reflect.Slice && vv2.Kind() != reflect.Array { 121 | // 虽然类型不同，但可以相互转换成 vv1 的，如：vv2 是 string，vv2 是 []byte， 122 | if vv2Type.ConvertibleTo(vv1Type) { 123 | return IsEqual(vv1.Interface(), vv2.Convert(vv1Type).Interface()) 124 | } 125 | return false 126 | } 127 | 128 | // reflect.DeepEqual() 未考虑类型不同但是类型可转换的情况，比如： 129 | // []int{8,9} == []int8{8,9}，此处重新对 slice 和 array 做比较处理。 130 | if vv1.Len() != vv2.Len() { 131 | return false 132 | } 133 | 134 | for i := 0; i < vv1.Len(); i++ { 135 | if !IsEqual(vv1.Index(i).Interface(), vv2.Index(i).Interface()) { 136 | return false 137 | } 138 | } 139 | return true // for 中所有的值比较都相等，返回 true 140 | case reflect.Map: 141 | if vv2.Kind() != reflect.Map { 142 | return false 143 | } 144 | 145 | if vv1.IsNil() != vv2.IsNil() { 146 | return false 147 | } 148 | if vv1.Len() != vv2.Len() { 149 | return false 150 | } 151 | if vv1.Pointer() == vv2.Pointer() { 152 | return true 153 | } 154 | 155 | // 两个 map 的键名类型不同 156 | if vv2Type.Key().Kind() != vv1Type.Key().Kind() { 157 | return false 158 | } 159 | 160 | for _, index := range vv1.MapKeys() { 161 | vv2Index := vv2.MapIndex(index) 162 | if !vv2Index.IsValid() { 163 | return false 164 | } 165 | 166 | if !IsEqual(vv1.MapIndex(index).Interface(), vv2Index.Interface()) { 167 | return false 168 | } 169 | } 170 | return true // for 中所有的值比较都相等，返回 true 171 | case reflect.String: 172 | if vv2.Kind() == reflect.String { 173 | return vv1.String() == vv2.String() 174 | } 175 | if vv2Type.ConvertibleTo(vv1Type) { // 考虑 v1 是 string，v2 是 []byte 的情况 176 | return IsEqual(vv1.Interface(), vv2.Convert(vv1Type).Interface()) 177 | } 178 | 179 | return false 180 | } 181 | 182 | if vv1Type.ConvertibleTo(vv2Type) { 183 | return vv2.Interface() == vv1.Convert(vv2Type).Interface() 184 | } else if vv2Type.ConvertibleTo(vv1Type) { 185 | return vv1.Interface() == vv2.Convert(vv1Type).Interface() 186 | } 187 | 188 | return false 189 | } 190 | 191 | // HasPanic 判断 fn 函数是否会发生 panic 192 | // 若发生了 panic，将把 msg 一起返回。 193 | func HasPanic(fn func()) (has bool, msg interface{}) { 194 | defer func() { 195 | if msg = recover(); msg != nil { 196 | has = true 197 | } 198 | }() 199 | fn() 200 | 201 | return 202 | } 203 | 204 | // IsContains 判断 container 是否包含了 item 的内容。若是指针，会判断指针指向的内容， 205 | // 但是不支持多重指针。 206 | // 207 | // 若 container 是字符串(string、[]byte 和 []rune，不包含 fmt.Stringer 接口)， 208 | // 都将会以字符串的形式判断其是否包含 item。 209 | // 若 container 是个列表(array、slice、map)则判断其元素中是否包含 item 中的 210 | // 的所有项，或是 item 本身就是 container 中的一个元素。 211 | func IsContains(container, item interface{}) bool { 212 | if container == nil { // nil不包含任何东西 213 | return false 214 | } 215 | 216 | cv := reflect.ValueOf(container) 217 | iv := reflect.ValueOf(item) 218 | 219 | for cv.Kind() == reflect.Ptr { 220 | cv = cv.Elem() 221 | } 222 | 223 | for iv.Kind() == reflect.Ptr { 224 | iv = iv.Elem() 225 | } 226 | 227 | if IsEqual(container, item) { 228 | return true 229 | } 230 | 231 | // 判断是字符串的情况 232 | switch c := cv.Interface().(type) { 233 | case string: 234 | switch i := iv.Interface().(type) { 235 | case string: 236 | return strings.Contains(c, i) 237 | case []byte: 238 | return strings.Contains(c, string(i)) 239 | case []rune: 240 | return strings.Contains(c, string(i)) 241 | case byte: 242 | return bytes.IndexByte([]byte(c), i) != -1 243 | case rune: 244 | return bytes.IndexRune([]byte(c), i) != -1 245 | } 246 | case []byte: 247 | switch i := iv.Interface().(type) { 248 | case string: 249 | return bytes.Contains(c, []byte(i)) 250 | case []byte: 251 | return bytes.Contains(c, i) 252 | case []rune: 253 | return strings.Contains(string(c), string(i)) 254 | case byte: 255 | return bytes.IndexByte(c, i) != -1 256 | case rune: 257 | return bytes.IndexRune(c, i) != -1 258 | } 259 | case []rune: 260 | switch i := iv.Interface().(type) { 261 | case string: 262 | return strings.Contains(string(c), string(i)) 263 | case []byte: 264 | return strings.Contains(string(c), string(i)) 265 | case []rune: 266 | return strings.Contains(string(c), string(i)) 267 | case byte: 268 | return strings.IndexByte(string(c), i) != -1 269 | case rune: 270 | return strings.IndexRune(string(c), i) != -1 271 | } 272 | } 273 | 274 | if (cv.Kind() == reflect.Slice) || (cv.Kind() == reflect.Array) { 275 | if !cv.IsValid() || cv.Len() == 0 { // 空的，就不算包含另一个，即使另一个也是空值。 276 | return false 277 | } 278 | 279 | if !iv.IsValid() { 280 | return false 281 | } 282 | 283 | // item 是 container 的一个元素 284 | for i := 0; i < cv.Len(); i++ { 285 | if IsEqual(cv.Index(i).Interface(), iv.Interface()) { 286 | return true 287 | } 288 | } 289 | 290 | // 开始判断 item 的元素是否与 container 中的元素相等。 291 | 292 | // 若 item 的长度为 0，表示不包含 293 | if (iv.Kind() != reflect.Slice) || (iv.Len() == 0) { 294 | return false 295 | } 296 | 297 | // item 的元素比 container 的元素多 298 | if iv.Len() > cv.Len() { 299 | return false 300 | } 301 | 302 | // 依次比较 item 的各个子元素是否都存在于 container，且下标都相同 303 | ivIndex := 0 304 | for i := 0; i < cv.Len(); i++ { 305 | if IsEqual(cv.Index(i).Interface(), iv.Index(ivIndex).Interface()) { 306 | if (ivIndex == 0) && (i+iv.Len() > cv.Len()) { 307 | return false 308 | } 309 | ivIndex++ 310 | if ivIndex == iv.Len() { // 已经遍历完 iv 311 | return true 312 | } 313 | } else if ivIndex > 0 { 314 | return false 315 | } 316 | } 317 | return false 318 | } // end cv.Kind == reflect.Slice and reflect.Array 319 | 320 | if cv.Kind() == reflect.Map { 321 | if cv.Len() == 0 { 322 | return false 323 | } 324 | 325 | if (iv.Kind() != reflect.Map) || (iv.Len() == 0) { 326 | return false 327 | } 328 | 329 | if iv.Len() > cv.Len() { 330 | return false 331 | } 332 | 333 | // 判断所有 item 的项都存在于 container 中 334 | for _, key := range iv.MapKeys() { 335 | cvItem := cv.MapIndex(key) 336 | if !cvItem.IsValid() { // container 中不包含该值。 337 | return false 338 | } 339 | if !IsEqual(cvItem.Interface(), iv.MapIndex(key).Interface()) { 340 | return false 341 | } 342 | } 343 | // for 中的所有判断都成立，返回 true 344 | return true 345 | } 346 | 347 | return false 348 | } 349 | -------------------------------------------------------------------------------- /vendor/modules.txt: -------------------------------------------------------------------------------- 1 | # github.com/adamzy/cedar-go v0.0.0-20170805034717-80a9c64b256d 2 | ## explicit 3 | github.com/adamzy/cedar-go 4 | # github.com/adamzy/sego v0.0.0-20151004184924-5eab9a44f8e8 5 | ## explicit 6 | github.com/adamzy/sego 7 | # github.com/issue9/assert v1.4.1 8 | ## explicit 9 | github.com/issue9/assert 10 | --------------------------------------------------------------------------------