├── .gitignore
├── README.md
├── v.mod
├── vain.v
└── vain_test.v


/.gitignore:
--------------------------------------------------------------------------------
1 | vain_test
2 | main
3 | *.exe
4 | *.so
5 | *.dylib
6 | *.dll
7 | 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # vain
 2 | An extremely simple, tiny and easy to use V library for writing lexers.
 3 | 
 4 | # Usage
 5 | Vain lets you create lexer objects which act on an input string and follow a pre-defined set of rules you create to match tokens.
 6 | On top of that, it allows you to process tokens on the spot (using callback functions) to convert them to a format you're more comfortable with.
 7 | 
 8 | Here's a [usage example](vain_test.v) , which outputs:
 9 | ```
10 | // (WORD: My)
11 | // (WHITESPACE: )
12 | // (NUMBER: 100)
13 | // (PERCENT: %)
14 | // (WHITESPACE：)
15 | // (WORD：awesome)
16 | // (WHITESPACE: )
17 | // (WORD: string)
18 | // (EXCLAMATION: !)
19 | ```
20 | 


--------------------------------------------------------------------------------
/v.mod:
--------------------------------------------------------------------------------
1 | Module {
2 | 	name: 'vain',
3 | 	description: 'An extremely simple, tiny and easy to use V library for writing lexers.',
4 | 	dependencies: []
5 | }


--------------------------------------------------------------------------------
/vain.v:
--------------------------------------------------------------------------------
  1 | module vain
  2 | 
  3 | import regex
  4 | 
  5 | type FNErrorCallback = fn (arg_1 string)
  6 | 
  7 | type FNString2String = fn (arg_1 string) string
  8 | 
  9 | struct LexRule {
 10 | 	id       string
 11 | 	str_rule string
 12 | 	callback FNString2String
 13 | 	is_regex bool
 14 | 	re       regex.RE
 15 | }
 16 | 
 17 | struct Lexer {
 18 | 	rules        []LexRule
 19 | 	err_callback FNErrorCallback
 20 | mut:
 21 | 	pos          int
 22 | 	input        string
 23 | }
 24 | 
 25 | pub fn (mut lexer Lexer) next() ?(string, string) {
 26 | 	if lexer.pos == lexer.input.len {
 27 | 		return none
 28 | 	}
 29 | 	for rule in lexer.rules {
 30 | 		if rule.is_regex {
 31 | 			mut regex := rule.re
 32 | 			start, end := regex.match_string(lexer.input[lexer.pos..])
 33 | 			if start != 0 {
 34 | 				continue
 35 | 			}
 36 | 			token := rule.callback(lexer.input[lexer.pos + start..lexer.pos + end])
 37 | 			lexer.pos += end
 38 | 			return rule.id, token
 39 | 		} else {
 40 | 			read := lexer.input[lexer.pos..lexer.pos + rule.str_rule.len]
 41 | 			if rule.str_rule != read {
 42 | 				continue
 43 | 			}
 44 | 			token := rule.callback(read)
 45 | 			lexer.pos += rule.str_rule.len
 46 | 			return rule.id, token
 47 | 		}
 48 | 	}
 49 | 	lexer.err_callback(lexer.input[lexer.pos..])
 50 | }
 51 | 
 52 | fn do_nothing(str string) string {
 53 | 	return str
 54 | }
 55 | 
 56 | pub fn literal(tok_id, str string) LexRule {
 57 | 	return LexRule{
 58 | 		id: tok_id
 59 | 		str_rule: str
 60 | 		callback: do_nothing
 61 | 		is_regex: false
 62 | 	}
 63 | }
 64 | 
 65 | pub fn literal_callback(tok_id, str string, cb FNString2String) LexRule {
 66 | 	return LexRule{
 67 | 		id: tok_id
 68 | 		str_rule: str
 69 | 		callback: cb
 70 | 		is_regex: false
 71 | 	}
 72 | }
 73 | 
 74 | fn regexstring2re(restring string) regex.RE {
 75 | 	re, err, err_pos := regex.regex(restring)
 76 | 	if err != 0 {
 77 | 		panic('invalid regex $restring at position $err_pos, errcode: $err')
 78 | 	}
 79 | 	return re
 80 | }
 81 | 
 82 | pub fn regex(tok_id, str string) LexRule {
 83 | 	return LexRule{
 84 | 		id: tok_id
 85 | 		str_rule: str
 86 | 		callback: do_nothing
 87 | 		is_regex: true
 88 | 		re: regexstring2re(str)
 89 | 	}
 90 | }
 91 | 
 92 | pub fn regex_callback(tok_id, str string, cb FNString2String) LexRule {
 93 | 	return LexRule{
 94 | 		id: tok_id
 95 | 		str_rule: str
 96 | 		callback: cb
 97 | 		is_regex: true
 98 | 		re: regexstring2re(str)
 99 | 	}
100 | }
101 | 
102 | pub fn make_lexer(input string, rules []LexRule, err_cb FNErrorCallback) Lexer {
103 | 	return Lexer{
104 | 		rules: rules
105 | 		err_callback: err_cb
106 | 		pos: 0
107 | 		input: input
108 | 	}
109 | }
110 | 


--------------------------------------------------------------------------------
/vain_test.v:
--------------------------------------------------------------------------------
 1 | import vain
 2 | fn cb_reverse(str string) string {
 3 | 	return str.reverse() // here, we reverse any number token we match
 4 | }
 5 | fn test_lexing() {
 6 | 	input := 'My 001% awesome string!'
 7 | 	mut output := []string{}
 8 | 	// vain will try each rule from top to bottom so it is good practice to order them by token importance!
 9 | 	// when you specify a rule, the first argument is the token identifier which can be anything you want.
10 | 	// it is good practice to make it SCREAMING_CASE
11 | 	mut lexer := vain.make_lexer(input, [ 
12 | 		vain.regex('WHITESPACE', '( |\t|\n|\r)+'), // any whitespace. \s does not currently work with V
13 | 		vain.regex_callback('NUMBER', '[0-9]+', cb_reverse),
14 | 		vain.regex('WORD', '[a-zA-Z]+'), // match any letter
15 | 		vain.literal('PERCENT', '%'), // %
16 | 		vain.literal('EXCLAMATION', '!'), // !
17 | 		], fn (str string) {
18 | 		// this is the error callback - the function here is executed if the lexer
19 | 		// fails to understand a token. this can happen due to not enough rules,
20 | 		// or certain rules being invalid.
21 | 		println('error when tokenizing. remaining input: $str')
22 | 	})
23 | 	for {
24 | 		// lexer.next() will grab the next token and its id
25 | 		// once there are no tokens left, the lexer will return none
26 | 		id, token := lexer.next() or {
27 | 			break
28 | 		}
29 | 		res := '($id: $token)'
30 | 		output << res
31 | 		println(res)
32 | 	}
33 | 	expected := [
34 | 		'(WORD: My)',
35 | 		'(WHITESPACE:  )',
36 | 		'(NUMBER: 100)',
37 | 		'(PERCENT: %)',
38 | 		'(WHITESPACE:  )',
39 | 		'(WORD: awesome)',
40 | 		'(WHITESPACE:  )',
41 | 		'(WORD: string)',
42 | 		'(EXCLAMATION: !)'
43 | 	]
44 | 	assert expected.len <= output.len
45 | 	for i in 0..expected.len {
46 | 		assert output[i] == expected[i]
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------