├── .gitignore ├── README.md ├── v.mod ├── vain.v └── vain_test.v /.gitignore: -------------------------------------------------------------------------------- 1 | vain_test 2 | main 3 | *.exe 4 | *.so 5 | *.dylib 6 | *.dll 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vain 2 | An extremely simple, tiny and easy to use V library for writing lexers. 3 | 4 | # Usage 5 | Vain lets you create lexer objects which act on an input string and follow a pre-defined set of rules you create to match tokens. 6 | On top of that, it allows you to process tokens on the spot (using callback functions) to convert them to a format you're more comfortable with. 7 | 8 | Here's a [usage example](vain_test.v) , which outputs: 9 | ``` 10 | // (WORD: My) 11 | // (WHITESPACE: ) 12 | // (NUMBER: 100) 13 | // (PERCENT: %) 14 | // (WHITESPACE:) 15 | // (WORD:awesome) 16 | // (WHITESPACE: ) 17 | // (WORD: string) 18 | // (EXCLAMATION: !) 19 | ``` 20 | -------------------------------------------------------------------------------- /v.mod: -------------------------------------------------------------------------------- 1 | Module { 2 | name: 'vain', 3 | description: 'An extremely simple, tiny and easy to use V library for writing lexers.', 4 | dependencies: [] 5 | } -------------------------------------------------------------------------------- /vain.v: -------------------------------------------------------------------------------- 1 | module vain 2 | 3 | import regex 4 | 5 | type FNErrorCallback = fn (arg_1 string) 6 | 7 | type FNString2String = fn (arg_1 string) string 8 | 9 | struct LexRule { 10 | id string 11 | str_rule string 12 | callback FNString2String 13 | is_regex bool 14 | re regex.RE 15 | } 16 | 17 | struct Lexer { 18 | rules []LexRule 19 | err_callback FNErrorCallback 20 | mut: 21 | pos int 22 | input string 23 | } 24 | 25 | pub fn (mut lexer Lexer) next() ?(string, string) { 26 | if lexer.pos == lexer.input.len { 27 | return none 28 | } 29 | for rule in lexer.rules { 30 | if rule.is_regex { 31 | mut regex := rule.re 32 | start, end := regex.match_string(lexer.input[lexer.pos..]) 33 | if start != 0 { 34 | continue 35 | } 36 | token := rule.callback(lexer.input[lexer.pos + start..lexer.pos + end]) 37 | lexer.pos += end 38 | return rule.id, token 39 | } else { 40 | read := lexer.input[lexer.pos..lexer.pos + rule.str_rule.len] 41 | if rule.str_rule != read { 42 | continue 43 | } 44 | token := rule.callback(read) 45 | lexer.pos += rule.str_rule.len 46 | return rule.id, token 47 | } 48 | } 49 | lexer.err_callback(lexer.input[lexer.pos..]) 50 | } 51 | 52 | fn do_nothing(str string) string { 53 | return str 54 | } 55 | 56 | pub fn literal(tok_id, str string) LexRule { 57 | return LexRule{ 58 | id: tok_id 59 | str_rule: str 60 | callback: do_nothing 61 | is_regex: false 62 | } 63 | } 64 | 65 | pub fn literal_callback(tok_id, str string, cb FNString2String) LexRule { 66 | return LexRule{ 67 | id: tok_id 68 | str_rule: str 69 | callback: cb 70 | is_regex: false 71 | } 72 | } 73 | 74 | fn regexstring2re(restring string) regex.RE { 75 | re, err, err_pos := regex.regex(restring) 76 | if err != 0 { 77 | panic('invalid regex $restring at position $err_pos, errcode: $err') 78 | } 79 | return re 80 | } 81 | 82 | pub fn regex(tok_id, str string) LexRule { 83 | return LexRule{ 84 | id: tok_id 85 | str_rule: str 86 | callback: do_nothing 87 | is_regex: true 88 | re: regexstring2re(str) 89 | } 90 | } 91 | 92 | pub fn regex_callback(tok_id, str string, cb FNString2String) LexRule { 93 | return LexRule{ 94 | id: tok_id 95 | str_rule: str 96 | callback: cb 97 | is_regex: true 98 | re: regexstring2re(str) 99 | } 100 | } 101 | 102 | pub fn make_lexer(input string, rules []LexRule, err_cb FNErrorCallback) Lexer { 103 | return Lexer{ 104 | rules: rules 105 | err_callback: err_cb 106 | pos: 0 107 | input: input 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /vain_test.v: -------------------------------------------------------------------------------- 1 | import vain 2 | fn cb_reverse(str string) string { 3 | return str.reverse() // here, we reverse any number token we match 4 | } 5 | fn test_lexing() { 6 | input := 'My 001% awesome string!' 7 | mut output := []string{} 8 | // vain will try each rule from top to bottom so it is good practice to order them by token importance! 9 | // when you specify a rule, the first argument is the token identifier which can be anything you want. 10 | // it is good practice to make it SCREAMING_CASE 11 | mut lexer := vain.make_lexer(input, [ 12 | vain.regex('WHITESPACE', '( |\t|\n|\r)+'), // any whitespace. \s does not currently work with V 13 | vain.regex_callback('NUMBER', '[0-9]+', cb_reverse), 14 | vain.regex('WORD', '[a-zA-Z]+'), // match any letter 15 | vain.literal('PERCENT', '%'), // % 16 | vain.literal('EXCLAMATION', '!'), // ! 17 | ], fn (str string) { 18 | // this is the error callback - the function here is executed if the lexer 19 | // fails to understand a token. this can happen due to not enough rules, 20 | // or certain rules being invalid. 21 | println('error when tokenizing. remaining input: $str') 22 | }) 23 | for { 24 | // lexer.next() will grab the next token and its id 25 | // once there are no tokens left, the lexer will return none 26 | id, token := lexer.next() or { 27 | break 28 | } 29 | res := '($id: $token)' 30 | output << res 31 | println(res) 32 | } 33 | expected := [ 34 | '(WORD: My)', 35 | '(WHITESPACE: )', 36 | '(NUMBER: 100)', 37 | '(PERCENT: %)', 38 | '(WHITESPACE: )', 39 | '(WORD: awesome)', 40 | '(WHITESPACE: )', 41 | '(WORD: string)', 42 | '(EXCLAMATION: !)' 43 | ] 44 | assert expected.len <= output.len 45 | for i in 0..expected.len { 46 | assert output[i] == expected[i] 47 | } 48 | } 49 | --------------------------------------------------------------------------------