├── go.sum ├── go.mod ├── LICENSE ├── README.md ├── scanner.go └── scanner_test.go /go.sum: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/yulrizka/rxscan 2 | 3 | go 1.15 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ahmy Yulrizka 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rxscan 2 | 3 | rxscan provides functionality to scan text to variables using regular expression capture group. 4 | 5 | This library is still experimental, use at your own risk. Contributions are always welcome and please 6 | submit an issue if you find any problem. 7 | 8 | ## Examples 9 | 10 | **Scanning a string** 11 | ```go 12 | input := "bright white bags contain 9 shiny gold bag." 13 | rx := regexp.MustCompile(`([\w ]+) bags contain (\d+) ([\w ]+) bag.`) 14 | var ( 15 | bag1, bag2 string 16 | i int 17 | ) 18 | n, err := rxscan.Scan(rx, input, &bag1, &i, &bag2) 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | fmt.Printf("parsed %d arguments: %s -> (%d) %s", n, bag1, i, bag2) 24 | 25 | // Output: parsed 3 arguments: bright white -> (9) shiny gold 26 | ``` 27 | 28 | **Scanning repeated pattern** 29 | ```go 30 | input := `light red bags contain 1 bright white bag, 2 muted yellow bags. 31 | dark orange bags contain 3 bright white bags, 4 muted yellow bags. 32 | bright white bags contain 1 shiny gold bag. 33 | muted yellow bags contain 2 shiny gold bags, 9 faded blue bags. 34 | shiny gold bags contain 1 dark olive bag, 2 vibrant plum bags. 35 | dark olive bags contain 3 faded blue bags, 4 dotted black bags. 36 | vibrant plum bags contain 5 faded blue bags, 6 dotted black bags. 37 | faded blue bags contain no other bags. 38 | dotted black bags contain no other bags.` 39 | 40 | rx := regexp.MustCompile(`(\d+) ([\w ]+) bag`) 41 | 42 | sc := rxscan.NewScanner(rx, input) 43 | for sc.More() { 44 | var count int 45 | var color string 46 | _, err := sc.Scan(&count, &color) 47 | if err != nil { 48 | panic(err) 49 | } 50 | 51 | fmt.Printf("- (%d) %s\n", count, color) 52 | } 53 | if err := sc.Error(); err != nil { 54 | panic(err) 55 | } 56 | 57 | // Output: 58 | //- (1) bright white 59 | //- (2) muted yellow 60 | //- (3) bright white 61 | //- (4) muted yellow 62 | //- (1) shiny gold 63 | //- (2) shiny gold 64 | //- (9) faded blue 65 | //- (1) dark olive 66 | //- (2) vibrant plum 67 | //- (3) faded blue 68 | //- (4) dotted black 69 | //- (5) faded blue 70 | //- (6) dotted black 71 | ``` 72 | -------------------------------------------------------------------------------- /scanner.go: -------------------------------------------------------------------------------- 1 | package rxscan 2 | 3 | import ( 4 | "errors" 5 | "reflect" 6 | "regexp" 7 | "strconv" 8 | ) 9 | 10 | func parse(match string, arg interface{}) (err error) { 11 | switch v := arg.(type) { 12 | case *bool: 13 | *v, err = strconv.ParseBool(match) 14 | //case *complex64: 15 | //vv, err := strconv.ParseComplex(match, 64) 16 | //if err != nil { 17 | // return err 18 | //} 19 | //*v = complex64(vv) 20 | //case *complex128: 21 | //vv, err := strconv.ParseComplex(match, 128) 22 | //if err != nil { 23 | // return err 24 | //} 25 | //*v = vv 26 | case *int: 27 | *v, err = strconv.Atoi(match) 28 | case *int8: 29 | vv, err := strconv.ParseInt(match, 10, 8) 30 | if err != nil { 31 | return err 32 | } 33 | *v = int8(vv) 34 | case *int16: 35 | vv, err := strconv.ParseInt(match, 10, 16) 36 | if err != nil { 37 | return err 38 | } 39 | *v = int16(vv) 40 | case *int32: 41 | vv, err := strconv.ParseInt(match, 10, 32) 42 | if err != nil { 43 | return err 44 | } 45 | *v = int32(vv) 46 | case *int64: 47 | vv, err := strconv.ParseInt(match, 10, 64) 48 | if err != nil { 49 | return err 50 | } 51 | *v = vv 52 | case *uint: 53 | vv, err := strconv.ParseUint(match, 10, 64) 54 | if err != nil { 55 | return err 56 | } 57 | *v = uint(vv) 58 | case *uint8: 59 | vv, err := strconv.ParseUint(match, 10, 8) 60 | if err != nil { 61 | return err 62 | } 63 | *v = uint8(vv) 64 | case *uint16: 65 | vv, err := strconv.ParseUint(match, 10, 16) 66 | if err != nil { 67 | return err 68 | } 69 | *v = uint16(vv) 70 | case *uint32: 71 | vv, err := strconv.ParseUint(match, 10, 32) 72 | if err != nil { 73 | return err 74 | } 75 | *v = uint32(vv) 76 | case *uint64: 77 | vv, err := strconv.ParseUint(match, 10, 64) 78 | if err != nil { 79 | return err 80 | } 81 | *v = vv 82 | case *uintptr: 83 | err = errors.New("uintptr is not supported yet") 84 | case *float32: 85 | vv, err := strconv.ParseFloat(match, 32) 86 | if err != nil { 87 | return err 88 | } 89 | *v = float32(vv) 90 | case *float64: 91 | vv, err := strconv.ParseFloat(match, 64) 92 | if err != nil { 93 | return err 94 | } 95 | *v = vv 96 | case *string: 97 | *v = match 98 | case *[]byte: 99 | *v = []byte(match) 100 | default: 101 | err = errors.New("can't scan type: " + reflect.TypeOf(arg).String()) 102 | } 103 | 104 | return err 105 | } 106 | 107 | // Scan string using regular expression to variables arguments. 108 | // It returns the number variables successfully parsed. Variable arguments can be less than 109 | // the capture group but it will return an error if variables are more than the capture group 110 | func Scan(re *regexp.Regexp, s string, args ...interface{}) (n int, err error) { 111 | matches := re.FindStringSubmatch(s) 112 | if len(matches) <= 1 { 113 | return 0, nil 114 | } 115 | 116 | if len(args) > len(matches)-1 { 117 | return 0, errors.New("got " + strconv.Itoa(len(args)) + " arguments for " + strconv.Itoa(len(matches)-1) + " matches") 118 | } 119 | 120 | for i, arg := range args { 121 | if arg == nil { 122 | continue 123 | } 124 | if err := parse(matches[i+1], arg); err != nil { 125 | return 0, err 126 | } 127 | n++ 128 | } 129 | return n, err 130 | } 131 | 132 | type Scanner struct { 133 | matches [][]string 134 | i int 135 | args []interface{} 136 | err error 137 | } 138 | 139 | func (s *Scanner) Error() error { 140 | return s.err 141 | } 142 | 143 | // NewScanner returns a scanner that can scan all repeating regular expression within a text 144 | func NewScanner(re *regexp.Regexp, s string) *Scanner { 145 | return &Scanner{ 146 | matches: re.FindAllStringSubmatch(s, -1), 147 | } 148 | } 149 | 150 | // More returns true if there is more matches 151 | func (s *Scanner) More() bool { 152 | return s.err == nil && s.i < len(s.matches) 153 | } 154 | 155 | // Scan matched regular expresion to the variables. 156 | // It returns the number variables successfully parsed. Variable arguments can be less than 157 | // the capture group but it will return an error if variables are more than the capture group 158 | func (s *Scanner) Scan(args ...interface{}) (int, error) { 159 | m := s.matches[s.i] 160 | if len(s.args) > len(m)-1 { 161 | s.err = errors.New("got " + strconv.Itoa(len(s.args)) + " arguments for " + strconv.Itoa(len(m)-1) + " matches") 162 | return 0, s.err 163 | } 164 | 165 | parsed := 0 166 | if len(m) > 1 { 167 | for i, arg := range args { 168 | if arg == nil { 169 | continue 170 | } 171 | if s.err = parse(m[i+1], arg); s.err != nil { 172 | return parsed, s.err 173 | } 174 | parsed++ 175 | } 176 | } 177 | 178 | s.i++ 179 | 180 | return parsed, s.err 181 | } 182 | -------------------------------------------------------------------------------- /scanner_test.go: -------------------------------------------------------------------------------- 1 | package rxscan_test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "reflect" 7 | "regexp" 8 | "testing" 9 | 10 | "github.com/yulrizka/rxscan" 11 | ) 12 | 13 | func TestScan(t *testing.T) { 14 | var ( 15 | b bool 16 | 17 | //c64 complex64 18 | //c128 complex128 19 | 20 | i int 21 | i8 int8 22 | i16 int16 23 | i32 int32 24 | i64 int64 25 | 26 | ui uint 27 | ui8 uint8 28 | ui16 uint16 29 | ui32 uint32 30 | ui64 uint64 31 | 32 | f32 float32 33 | f64 float64 34 | 35 | s string 36 | bytes []byte 37 | ) 38 | n, err := rxscan.Scan(regexp.MustCompile(`it is (\w+)$`), "it is true", &b) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | if !b { 43 | t.Fatal("want b to be true") 44 | } 45 | if n != 1 { 46 | t.Fatalf("n want 1 got %d", n) 47 | } 48 | 49 | // boolean 50 | wantB := true 51 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is true", []interface{}{&b}, []interface{}{&wantB}) 52 | wantB = false 53 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is false", []interface{}{&b}, []interface{}{&wantB}) 54 | 55 | // complex 56 | //wantC64 := complex64(3 + 5.5i) 57 | //ok(t, regexp.MustCompile(`it is (.+)$`), "it is (3.0+5.5i)", []interface{}{&c64}, []interface{}{&wantC64}) 58 | //wantC128 := 3 + 5.5i 59 | //ok(t, regexp.MustCompile(`it is (.+)$`), "it is (3.0+5.5i)", []interface{}{&c128}, []interface{}{&wantC128}) 60 | 61 | // int 62 | wantI := 11 63 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 11", []interface{}{&i}, []interface{}{&wantI}) 64 | wantI8 := int8(127) 65 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 127", []interface{}{&i8}, []interface{}{&wantI8}) 66 | wantI16 := int16(32767) 67 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 32767", []interface{}{&i16}, []interface{}{&wantI16}) 68 | wantI32 := int32(2147483647) 69 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 2147483647", []interface{}{&i32}, []interface{}{&wantI32}) 70 | wantI64 := int64(9223372036854775807) 71 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is 9223372036854775807", []interface{}{&i64}, []interface{}{&wantI64}) 72 | 73 | // uint 74 | wantUI := uint(11) 75 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 11", []interface{}{&ui}, []interface{}{&wantUI}) 76 | wantUI8 := uint8(255) 77 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 255", []interface{}{&ui8}, []interface{}{&wantUI8}) 78 | wantUI16 := uint16(65535) 79 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 65535", []interface{}{&ui16}, []interface{}{&wantUI16}) 80 | wantUI32 := uint32(4294967295) 81 | ok(t, regexp.MustCompile(`it is (\w+)$`), "it is 4294967295", []interface{}{&ui32}, []interface{}{&wantUI32}) 82 | wantUI64 := uint64(18446744073709551615) 83 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is 18446744073709551615", []interface{}{&ui64}, []interface{}{&wantUI64}) 84 | 85 | // float 86 | wantFloat32 := float32(0.123456) 87 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is 0.123456", []interface{}{&f32}, []interface{}{&wantFloat32}) 88 | wantFloat64 := 0.123456 89 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is 0.123456", []interface{}{&f64}, []interface{}{&wantFloat64}) 90 | 91 | wantS := "some cool text" 92 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is some cool text", []interface{}{&s}, []interface{}{&wantS}) 93 | wantBytes := []byte("some cool text") 94 | ok(t, regexp.MustCompile(`it is (.+)$`), "it is some cool text", []interface{}{&bytes}, []interface{}{&wantBytes}) 95 | } 96 | 97 | func ok(t *testing.T, re *regexp.Regexp, s string, args []interface{}, want []interface{}) { 98 | t.Helper() 99 | _, err := rxscan.Scan(re, s, args...) 100 | if err != nil { 101 | t.Fatal(err) 102 | } 103 | 104 | for i, v := range want { 105 | if !reflect.DeepEqual(v, args[i]) { 106 | t.Fatalf("got %+v want %+v", reflect.ValueOf(args[i]).Elem(), reflect.ValueOf(v).Elem()) 107 | } 108 | } 109 | } 110 | 111 | func ExampleScan() { 112 | input := "bright white bags contain 9 shiny gold bag." 113 | rx := regexp.MustCompile(`([\w ]+) bags contain (\d+) ([\w ]+) bag.`) 114 | var ( 115 | bag1, bag2 string 116 | i int 117 | ) 118 | n, err := rxscan.Scan(rx, input, &bag1, &i, &bag2) 119 | if err != nil { 120 | panic(err) 121 | } 122 | 123 | fmt.Printf("parsed %d arguments: %s -> (%d) %s", n, bag1, i, bag2) 124 | // Output: parsed 3 arguments: bright white -> (9) shiny gold 125 | } 126 | 127 | func ExampleScanner() { 128 | input := `light red bags contain 1 bright white bag, 2 muted yellow bags. 129 | dark orange bags contain 3 bright white bags, 4 muted yellow bags. 130 | bright white bags contain 1 shiny gold bag. 131 | muted yellow bags contain 2 shiny gold bags, 9 faded blue bags. 132 | shiny gold bags contain 1 dark olive bag, 2 vibrant plum bags. 133 | dark olive bags contain 3 faded blue bags, 4 dotted black bags. 134 | vibrant plum bags contain 5 faded blue bags, 6 dotted black bags. 135 | faded blue bags contain no other bags. 136 | dotted black bags contain no other bags.` 137 | 138 | rx := regexp.MustCompile(`(\d+) ([\w ]+) bag`) 139 | 140 | sc := rxscan.NewScanner(rx, input) 141 | for sc.More() { 142 | var count int 143 | var color string 144 | _, err := sc.Scan(&count, &color) 145 | if err != nil { 146 | panic(err) 147 | } 148 | 149 | fmt.Printf("- (%d) %s\n", count, color) 150 | } 151 | if err := sc.Error(); err != nil { 152 | panic(err) 153 | } 154 | 155 | // Output: 156 | //- (1) bright white 157 | //- (2) muted yellow 158 | //- (3) bright white 159 | //- (4) muted yellow 160 | //- (1) shiny gold 161 | //- (2) shiny gold 162 | //- (9) faded blue 163 | //- (1) dark olive 164 | //- (2) vibrant plum 165 | //- (3) faded blue 166 | //- (4) dotted black 167 | //- (5) faded blue 168 | //- (6) dotted black 169 | 170 | } 171 | 172 | var ( 173 | text = "jmp +32" 174 | rx = regexp.MustCompile(`(\w+) (\w+)`) 175 | ) 176 | 177 | func withScan() { 178 | var op string 179 | var arg int64 180 | _, _ = fmt.Sscanf(text, "%s %d", &op, &arg) 181 | } 182 | 183 | func withRegex() { 184 | var op string 185 | var arg int64 186 | _, _ = rxscan.Scan(rx, text, &op, &arg) 187 | } 188 | 189 | // $ benchstat scan.txt regex.txt 190 | // name old time/op new time/op delta 191 | // Scan-8 108µs ± 1% 107µs ± 1% ~ (p=0.222 n=5+5) 192 | func BenchmarkScan(b *testing.B) { 193 | for i := 0; i < b.N; i++ { 194 | if os.Getenv("TEST_SCAN") != "" { 195 | withScan() 196 | } else { 197 | withRegex() 198 | } 199 | } 200 | } 201 | --------------------------------------------------------------------------------