├── match.go ├── README.md ├── radix.go ├── all_test.go └── pattern.go /match.go: -------------------------------------------------------------------------------- 1 | package radix 2 | 3 | type Pattern struct { 4 | trie PatternTrie 5 | } 6 | 7 | // Compile compiles several alternative patterns into one. 8 | func Compile(patterns ...string) *Pattern { 9 | p := &Pattern{PatternTrie{}} 10 | for _, pattern := range patterns { 11 | p.trie.Add(pattern, struct{}{}) 12 | } 13 | return p 14 | } 15 | 16 | // Match tests whether s matches any patterns in p. 17 | func (p *Pattern) Match(s string) bool { 18 | _, ok := p.trie.Lookup(s) 19 | return ok 20 | } 21 | 22 | // Match tests whether s matches pattern. 23 | func Match(pattern, s string) bool { 24 | return Compile(pattern).Match(s) 25 | } 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Radix [![Circle CI](https://circleci.com/gh/fanyang01/radix.svg?style=svg)](https://circleci.com/gh/fanyang01/radix) [![GoDoc](https://godoc.org/github.com/fanyang01/radix?status.svg)](https://godoc.org/github.com/fanyang01/radix) [![Coverage Status](https://coveralls.io/repos/fanyang01/radix/badge.svg?branch=master&service=github)](https://coveralls.io/github/fanyang01/radix?branch=master) 2 | 3 | Package glob provides a trie(also known as prefix-tree) that supports wildcard character '\*'. 4 | 5 | ```go 6 | func TestTree(t *testing.T) { 7 | patterns := []struct { 8 | s string 9 | i interface{} 10 | }{ 11 | {"*abcd*ef*", 1}, 12 | {"*.google.com", 2}, 13 | {"http://example.com/books/*", 3}, 14 | {"*://example.com/movies", 4}, 15 | {`http://example.com/\*`, 5}, 16 | {`http://example.com/*`, 6}, 17 | {"你好*世界*", 7}, 18 | {`foo\`, 8}, 19 | {`b\ar`, 9}, 20 | } 21 | data := []struct { 22 | s string 23 | v interface{} 24 | }{ 25 | {"abcdef", 1}, 26 | {"abcdefef", 1}, 27 | {"abcabcdefgef", 1}, 28 | {"google.com", nil}, 29 | {"www.google.com", 2}, 30 | {"http://example.com/books/", 3}, 31 | {"http://example.com/", 6}, 32 | {"http://example.com/*", 5}, 33 | {"你好世界", 7}, 34 | {"你你好世界", nil}, 35 | {"你好世界世界界界", 7}, 36 | {"你好,世界", 7}, 37 | {"你好,世界。", 7}, 38 | {`foo\`, nil}, 39 | {`foo`, 8}, 40 | {`b\ar`, nil}, 41 | {`bar`, 9}, 42 | } 43 | 44 | tr := &Trie{} 45 | for _, p := range patterns { 46 | tr.Add(p.s, p.i) 47 | } 48 | 49 | for _, data := range data { 50 | v, ok := tr.Lookup(data.s) 51 | if data.v == nil { 52 | assert.False(t, ok) 53 | assert.Nil(t, v) 54 | } else { 55 | assert.True(t, ok) 56 | assert.Equal(t, data.v, v) 57 | } 58 | } 59 | 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /radix.go: -------------------------------------------------------------------------------- 1 | package radix 2 | 3 | type Trie struct { 4 | root *node 5 | dump bool 6 | } 7 | 8 | type noneType int 9 | 10 | const vNONE noneType = 0 11 | 12 | type node struct { 13 | child []*node 14 | childidx []byte // first byte of each child 15 | s string 16 | v interface{} 17 | } 18 | 19 | // NewTrie creates a new trie. 20 | func NewTrie(dump bool) *Trie { 21 | return &Trie{ 22 | dump: dump, 23 | } 24 | } 25 | 26 | func dump(s string) string { 27 | ss := make([]byte, len(s)) 28 | copy(ss, s) 29 | return string(ss) 30 | } 31 | 32 | func (t *Trie) newSubTree(s string, v interface{}) *node { 33 | n := &node{ 34 | v: v, 35 | } 36 | t.setS(n, s) 37 | return n 38 | } 39 | 40 | func (t *Trie) setS(n *node, s string) { 41 | if t.dump { 42 | n.s = dump(s) 43 | } else { 44 | n.s = s 45 | } 46 | } 47 | 48 | func (n *node) setV(v interface{}) (ov interface{}, ok bool) { 49 | ov = n.v 50 | if _, ok = ov.(noneType); ok { 51 | ov, ok = nil, false 52 | } 53 | n.v = v 54 | return 55 | } 56 | 57 | // Add inserts a key-value pair into trie. If there is an old value for the 58 | // key, old value will be returned and 'has' will be true. 59 | func (t *Trie) Add(s string, v interface{}) (ov interface{}, has bool) { 60 | if s == "" { 61 | return 62 | } 63 | if t.root == nil { 64 | t.root = t.newSubTree(s, v) 65 | return 66 | } 67 | n := t.root 68 | INSERT: 69 | for { 70 | var l int 71 | min := len(s) 72 | if min > len(n.s) { 73 | min = len(n.s) 74 | } 75 | for ; l < min; l++ { 76 | if s[l] != n.s[l] { 77 | break 78 | } 79 | } 80 | switch { 81 | case l == len(n.s): // totally match this node 82 | s = s[l:] 83 | if len(s) == 0 { // end 84 | return n.setV(v) 85 | } 86 | first := 0 87 | if len(s[first:]) > 0 { 88 | for i := 0; i < len(n.childidx); i++ { 89 | if n.childidx[i] == s[first] { 90 | n = n.child[i] 91 | continue INSERT 92 | } 93 | } 94 | } 95 | default: // split 96 | prefix, suffix := n.s[:l], n.s[l:] 97 | child := &node{ 98 | child: n.child, 99 | childidx: n.childidx, 100 | v: n.v, 101 | } 102 | t.setS(child, suffix) 103 | *n = node{} 104 | t.setS(n, prefix) 105 | n.child = []*node{child} 106 | n.childidx = []byte{child.s[0]} 107 | n.v = vNONE 108 | s = s[l:] 109 | if len(s) == 0 { // end 110 | return n.setV(v) 111 | } 112 | } 113 | // construct a new subtree using rest of pattern and 114 | // append it to the child list of this node 115 | child := t.newSubTree(s, v) 116 | n.child = append(n.child, child) 117 | n.childidx = append(n.childidx, child.s[0]) 118 | return 119 | } 120 | } 121 | 122 | // Lookup searchs the trie to find an exact match and returns the 123 | // associated value. If not found, ok will be false. 124 | func (t *Trie) Lookup(s string) (v interface{}, ok bool) { 125 | n := t.root.lookup(s) 126 | if n == nil { 127 | v, ok = nil, false 128 | } else { 129 | v, ok = n.v, true 130 | if _, ok = v.(noneType); ok { 131 | v, ok = nil, false 132 | } 133 | } 134 | return 135 | } 136 | 137 | func (n *node) lookup(s string) *node { 138 | if n == nil { 139 | return nil 140 | } 141 | 142 | minLen := len(s) 143 | if minLen > len(n.s) { 144 | minLen = len(n.s) 145 | } 146 | var l int // length of longest common prefix 147 | for l = 0; l < minLen && s[l] == n.s[l]; l++ { 148 | } // at the end of loop: pattern[:l] == n.s[:l] 149 | switch l { 150 | case len(n.s): // totally match this node 151 | s = s[l:] 152 | if len(s) == 0 { // end 153 | return n 154 | } 155 | // go down 156 | var k int 157 | for k = 0; k < len(n.childidx); k++ { 158 | if n.childidx[k] == s[0] { 159 | if end := n.child[k].lookup(s); end != nil { 160 | return end 161 | } 162 | break 163 | } 164 | } 165 | fallthrough 166 | default: 167 | return nil 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /all_test.go: -------------------------------------------------------------------------------- 1 | package radix 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "testing" 7 | 8 | "github.com/armon/go-radix" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestPattern(t *testing.T) { 13 | patterns := []struct { 14 | s string 15 | i interface{} 16 | }{ 17 | {"*", 0}, 18 | {"*abcd*ef*", 1}, 19 | {"*.google.com", 2}, 20 | {"http://example.com/books/*", 3}, 21 | {"*://example.com/movies", 4}, 22 | {`http://example.com/\*`, 5}, 23 | {`http://example.com/*`, 6}, 24 | {"你好*世界*", 7}, 25 | {`foo\`, 8}, 26 | {`b\ar`, 9}, 27 | } 28 | data := []struct { 29 | s string 30 | v interface{} 31 | }{ 32 | {"abcdef", 1}, 33 | {"abcdefef", 1}, 34 | {"abcabcdefgef", 1}, 35 | {"google.com", 0}, 36 | {"www.google.com", 2}, 37 | {"http://example.com/books/", 3}, 38 | {"http://example.com/", 6}, 39 | {"http://example.com/*", 5}, 40 | {"你好世界", 7}, 41 | {"你你好世界", 0}, 42 | {"你好世界世界界界", 7}, 43 | {"你好,世界", 7}, 44 | {"你好,世界。", 7}, 45 | {`foo\`, 0}, 46 | {`foo`, 8}, 47 | {`b\ar`, 0}, 48 | {`bar`, 9}, 49 | } 50 | 51 | tr := &PatternTrie{} 52 | for _, p := range patterns { 53 | tr.Add(p.s, p.i) 54 | } 55 | 56 | for _, data := range data { 57 | v, ok := tr.Lookup(data.s) 58 | assert.True(t, ok) 59 | assert.Equal(t, data.v, v) 60 | } 61 | 62 | } 63 | 64 | func TestMatch(t *testing.T) { 65 | patterns := []string{ 66 | "hello*world", 67 | "Hello,*world", 68 | "*foo*bar", 69 | } 70 | pattern := Compile(patterns...) 71 | assert.True(t, pattern.Match("hello,world")) 72 | assert.True(t, pattern.Match("Hello,world")) 73 | assert.False(t, pattern.Match("Helloworld")) 74 | assert.True(t, pattern.Match("foobar")) 75 | assert.False(t, pattern.Match("foobar,")) 76 | 77 | assert.False(t, Match(`\*mark\*`, "mark")) 78 | assert.True(t, Match(`\*mark\*`, "*mark*")) 79 | assert.True(t, Match(`*abc*`, "aabccc")) 80 | assert.True(t, Match(`*abc*`, "abc")) 81 | assert.True(t, Match(`*abc*`, "abcabc")) 82 | assert.True(t, Match(`*abc*`, "abbabcc")) 83 | 84 | assert.True(t, Match(`*`, "foobar")) 85 | assert.True(t, Match(`*`, "")) 86 | } 87 | 88 | func BenchmarkInsert(b *testing.B) { 89 | var urls []string 90 | tr := NewTrie(false) 91 | f, err := os.Open("testdata/url.txt") 92 | if err != nil { 93 | b.Fatal(err) 94 | } 95 | defer f.Close() 96 | scanner := bufio.NewScanner(f) 97 | for scanner.Scan() { 98 | s := scanner.Text() 99 | urls = append(urls, s) 100 | } 101 | if err := scanner.Err(); err != nil { 102 | b.Fatal(err) 103 | } 104 | b.N = len(urls) 105 | b.ResetTimer() 106 | for i := 0; i < b.N; i++ { 107 | tr.Add(urls[i], i) 108 | } 109 | } 110 | 111 | func BenchmarkGoRadixInsert(b *testing.B) { 112 | var urls []string 113 | tr := radix.New() 114 | f, err := os.Open("testdata/url.txt") 115 | if err != nil { 116 | b.Fatal(err) 117 | } 118 | defer f.Close() 119 | scanner := bufio.NewScanner(f) 120 | for scanner.Scan() { 121 | s := scanner.Text() 122 | urls = append(urls, s) 123 | } 124 | if err := scanner.Err(); err != nil { 125 | b.Fatal(err) 126 | } 127 | b.N = len(urls) 128 | b.ResetTimer() 129 | for i := 0; i < b.N; i++ { 130 | tr.Insert(urls[i], i) 131 | } 132 | } 133 | 134 | func BenchmarkMapInsert(b *testing.B) { 135 | var urls []string 136 | m := map[string]int{} 137 | f, err := os.Open("testdata/url.txt") 138 | if err != nil { 139 | b.Fatal(err) 140 | } 141 | defer f.Close() 142 | scanner := bufio.NewScanner(f) 143 | for scanner.Scan() { 144 | s := scanner.Text() 145 | urls = append(urls, s) 146 | } 147 | if err := scanner.Err(); err != nil { 148 | b.Fatal(err) 149 | } 150 | b.N = len(urls) 151 | b.ResetTimer() 152 | for i := 0; i < b.N; i++ { 153 | m[urls[i]] = i 154 | } 155 | } 156 | 157 | func BenchmarkLookup(b *testing.B) { 158 | var urls []string 159 | tr := NewTrie(false) 160 | f, err := os.Open("testdata/url.txt") 161 | if err != nil { 162 | b.Fatal(err) 163 | } 164 | defer f.Close() 165 | scanner := bufio.NewScanner(f) 166 | for scanner.Scan() { 167 | s := scanner.Text() 168 | tr.Add(s, len(urls)) 169 | urls = append(urls, s) 170 | } 171 | if err := scanner.Err(); err != nil { 172 | b.Fatal(err) 173 | } 174 | b.N = len(urls) 175 | b.ResetTimer() 176 | for i := 0; i < b.N; i++ { 177 | if v, _ := tr.Lookup(urls[i]); v != i { 178 | b.Errorf("expect %d, got %d\n", i, v) 179 | } 180 | } 181 | } 182 | 183 | func BenchmarkGoRadixLookup(b *testing.B) { 184 | var urls []string 185 | tr := radix.New() 186 | f, err := os.Open("testdata/url.txt") 187 | if err != nil { 188 | b.Fatal(err) 189 | } 190 | defer f.Close() 191 | scanner := bufio.NewScanner(f) 192 | for scanner.Scan() { 193 | s := scanner.Text() 194 | tr.Insert(s, len(urls)) 195 | urls = append(urls, s) 196 | } 197 | if err := scanner.Err(); err != nil { 198 | b.Fatal(err) 199 | } 200 | b.N = len(urls) 201 | b.ResetTimer() 202 | for i := 0; i < b.N; i++ { 203 | if v, _ := tr.Get(urls[i]); v != i { 204 | b.Errorf("expect %d, got %d\n", i, v) 205 | } 206 | } 207 | } 208 | 209 | func BenchmarkMapLookup(b *testing.B) { 210 | var urls []string 211 | m := map[string]int{} 212 | f, err := os.Open("testdata/url.txt") 213 | if err != nil { 214 | b.Fatal(err) 215 | } 216 | defer f.Close() 217 | scanner := bufio.NewScanner(f) 218 | for scanner.Scan() { 219 | s := scanner.Text() 220 | m[s] = len(urls) 221 | urls = append(urls, s) 222 | } 223 | if err := scanner.Err(); err != nil { 224 | b.Fatal(err) 225 | } 226 | b.N = len(urls) 227 | b.ResetTimer() 228 | for i := 0; i < b.N; i++ { 229 | if v, ok := m[urls[i]]; !ok || v != i { 230 | b.Errorf("expect %d, got %d\n", i, v) 231 | } 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /pattern.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package glob provides a trie(also known as prefix-tree) that supports wildcard *. 3 | */ 4 | package radix 5 | 6 | type nodeType int 7 | 8 | const ( 9 | nodeCOMMON nodeType = iota // common 10 | nodeWILDCARD // wildcard character 11 | ) 12 | 13 | // Trie stores a value for each pattern. 14 | type PatternTrie struct { 15 | root *pnode 16 | } 17 | 18 | type pnode struct { 19 | child []*pnode 20 | childidx []byte // first byte of each child 21 | wcard *pnode 22 | s string 23 | v interface{} 24 | typ nodeType 25 | end bool 26 | } 27 | 28 | // NewPatternTrie returns a new pattern trie. 29 | func NewPatternTrie() *PatternTrie { return &PatternTrie{} } 30 | 31 | func newTree(pattern string, v interface{}) *pnode { 32 | var root, n, child *pnode 33 | var j int 34 | for i := 0; i < len(pattern); { 35 | s, escape := []byte{}, false 36 | FIND_AST: 37 | for j = 0; j < len(pattern[i:]); j++ { 38 | switch pattern[i+j] { 39 | case '\\': 40 | if escape = !escape; escape { 41 | continue FIND_AST 42 | } 43 | case '*': 44 | if !escape { 45 | break FIND_AST 46 | } 47 | } 48 | escape = false 49 | s = append(s, pattern[i+j]) 50 | } 51 | switch j { 52 | case 0: 53 | child = &pnode{ 54 | s: "*", 55 | typ: nodeWILDCARD, 56 | } 57 | i++ 58 | default: 59 | child = &pnode{ 60 | s: string(s), 61 | typ: nodeCOMMON, 62 | } 63 | i = i + j 64 | } 65 | if n != nil { 66 | switch child.typ { 67 | case nodeWILDCARD: 68 | n.wcard = child 69 | case nodeCOMMON: 70 | n.child = []*pnode{child} 71 | n.childidx = []byte{child.s[0]} 72 | } 73 | } else { 74 | root = child 75 | } 76 | n = child 77 | } 78 | n.v = v 79 | n.end = true 80 | return root 81 | } 82 | 83 | func (n *pnode) setV(v interface{}) (ov interface{}, is bool) { 84 | ov, is = n.v, n.end 85 | n.v, n.end = v, true 86 | return 87 | } 88 | 89 | // Add inserts pattern into trie. If there is an old value for this pattern, 90 | // old value will be returned and 'has' is set to true. 91 | func (t *PatternTrie) Add(pattern string, v interface{}) (ov interface{}, has bool) { 92 | if pattern == "" { 93 | return 94 | } 95 | if t.root == nil { 96 | t.root = newTree(pattern, v) 97 | return 98 | } 99 | n := t.root 100 | INSERT: 101 | for { 102 | var i, l int 103 | var wmatch, escape bool 104 | 105 | if n.typ == nodeWILDCARD { 106 | if len(pattern) > 0 && pattern[0] == '*' { 107 | wmatch = true 108 | } 109 | // else l == 0 thus l != len(n.s) 110 | goto SWITCH 111 | } 112 | 113 | for i < len(pattern) && l < len(n.s) { 114 | if pattern[i] == '\\' { 115 | if escape = !escape; escape { 116 | i++ 117 | continue 118 | } 119 | } 120 | if !escape && pattern[i] == '*' { 121 | break 122 | } 123 | if pattern[i] != n.s[l] { 124 | break 125 | } 126 | escape = false 127 | i, l = i+1, l+1 128 | } 129 | if escape { 130 | i-- 131 | escape = false 132 | } 133 | SWITCH: 134 | switch { 135 | case wmatch: 136 | i = 1 137 | fallthrough 138 | case l == len(n.s): // totally match this node 139 | pattern = pattern[i:] 140 | if len(pattern) == 0 { // end 141 | return n.setV(v) 142 | } 143 | if pattern[0] == '*' { 144 | if n.wcard == nil { 145 | n.wcard = newTree(pattern, v) 146 | return 147 | } else { 148 | n = n.wcard 149 | continue INSERT 150 | } 151 | } 152 | 153 | first := 0 154 | if pattern[0] == '\\' { 155 | first = 1 156 | } 157 | if len(pattern[first:]) > 0 { 158 | for i := 0; i < len(n.childidx); i++ { 159 | if n.childidx[i] == pattern[first] { 160 | n = n.child[i] 161 | continue INSERT 162 | } 163 | } 164 | } 165 | // not found 166 | case n.typ == nodeWILDCARD: 167 | i, l = 0, 0 168 | fallthrough 169 | default: // split 170 | prefix, suffix := n.s[:l], n.s[l:] 171 | child := &pnode{ 172 | s: suffix, 173 | typ: n.typ, 174 | child: n.child, 175 | childidx: n.childidx, 176 | wcard: n.wcard, 177 | v: n.v, 178 | end: n.end, 179 | } 180 | *n = pnode{} 181 | n.s = prefix 182 | n.typ = nodeCOMMON 183 | if child.typ == nodeWILDCARD { 184 | n.wcard = child 185 | } else { 186 | n.child = []*pnode{child} 187 | n.childidx = []byte{child.s[0]} 188 | } 189 | pattern = pattern[i:] 190 | if len(pattern) == 0 { // end 191 | return n.setV(v) 192 | } 193 | } 194 | // construct a new subtree using rest of pattern and 195 | // append it to the child list of this node 196 | child := newTree(pattern, v) 197 | switch child.typ { 198 | case nodeCOMMON: 199 | n.child = append(n.child, child) 200 | n.childidx = append(n.childidx, child.s[0]) 201 | case nodeWILDCARD: 202 | n.wcard = child 203 | } 204 | return 205 | } 206 | } 207 | 208 | // Lookup searchs pattern matching s most precisely and returns value associated with it. 209 | // If not found, ok will be set to false. 210 | func (t *PatternTrie) Lookup(s string) (v interface{}, ok bool) { 211 | n := lookup(t.root, s) 212 | if n != nil { 213 | v, ok = n.v, n.end 214 | } 215 | return 216 | } 217 | 218 | func lookup(n *pnode, s string) *pnode { 219 | if n == nil { 220 | return nil 221 | } 222 | if n.typ == nodeWILDCARD { 223 | for capture := 0; capture <= len(s); capture++ { 224 | if end := lookupW(n, s[capture:]); end != nil { 225 | return end 226 | } 227 | } 228 | return nil 229 | } 230 | 231 | minLen := len(s) 232 | if minLen > len(n.s) { 233 | minLen = len(n.s) 234 | } 235 | var l int // length of longest common prefix 236 | for l = 0; l < minLen && s[l] == n.s[l]; l++ { 237 | } // at the end of loop: pattern[:l] == n.s[:l] 238 | switch l { 239 | case len(n.s): // totally match this node 240 | s = s[l:] 241 | if len(s) == 0 { // end 242 | if n.end { 243 | return n 244 | } else if end := lookup(n.wcard, s); end != nil { 245 | return end 246 | } 247 | return n 248 | } 249 | // go down 250 | var k int 251 | for k = 0; k < len(n.childidx); k++ { 252 | if n.childidx[k] == s[0] { 253 | if end := lookup(n.child[k], s); end != nil { 254 | return end 255 | } 256 | break 257 | } 258 | } 259 | // try '*' 260 | return lookup(n.wcard, s) 261 | default: 262 | return nil 263 | } 264 | } 265 | 266 | // n must be a wildcard node 267 | func lookupW(n *pnode, s string) *pnode { 268 | if s == "" { 269 | return n 270 | } 271 | var end *pnode 272 | for i := 0; i < len(n.childidx); i++ { 273 | if n.childidx[i] == s[0] { 274 | if end = lookup(n.child[i], s); end != nil { 275 | return end 276 | } 277 | break 278 | } 279 | } 280 | // try '*' 281 | return lookup(n.wcard, s) 282 | } 283 | --------------------------------------------------------------------------------