├── test.xml ├── Makefile ├── v.mod ├── main.v ├── README.md └── sax └── lib.v /test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Title

4 | Text is here 5 |
6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | v -o vsax main.v 3 | v fmt -w *.v sax/*.v 4 | -./vsax 5 | ./vsax test.xml 6 | 7 | clean: 8 | rm -f vsax main 9 | -------------------------------------------------------------------------------- /v.mod: -------------------------------------------------------------------------------- 1 | Module { 2 | name: 'sax' 3 | description: 'Native XML Stream Parser' 4 | version: '0.2.0' 5 | license: 'MIT' 6 | repo_url: 'https://github.com/trufae/v-sax' 7 | dependencies: [] 8 | tags: ['XML', 'SAX'] 9 | } 10 | -------------------------------------------------------------------------------- /main.v: -------------------------------------------------------------------------------- 1 | // import pdf 2 | import sax 3 | import os 4 | 5 | struct XmlToHtml { 6 | mut: 7 | depth int 8 | res string 9 | } 10 | 11 | fn (mut mp XmlToHtml) document_start(mut st sax.Parser) ! { 12 | mp.res += 'INIT\n' 13 | println('init') 14 | } 15 | 16 | fn (mut mp XmlToHtml) document_end(mut st sax.Parser) ! { 17 | println('fini') 18 | } 19 | 20 | fn (mut mp XmlToHtml) element_start(mut st sax.Parser, name string, attrs []sax.Attribute) ! { 21 | if mp.depth == 0 { 22 | if name != 'xmldoc' { 23 | return error('Invalid root directory ${name}') 24 | } 25 | } 26 | println('tag_open: <${name}>') 27 | for a in attrs { 28 | println(' KV (${a.key}) = (${a.val})') 29 | } 30 | mp.depth++ 31 | } 32 | 33 | fn (mut mp XmlToHtml) element_end(mut st sax.Parser, name string) ! { 34 | println('tag_close: ') 35 | mp.depth-- 36 | } 37 | 38 | fn (mut mp XmlToHtml) comment(mut st sax.Parser, text string) ! { 39 | println('comment: ${text}') 40 | } 41 | 42 | fn (mut mp XmlToHtml) characters(mut st sax.Parser, text string) ! { 43 | println('text: ${text}') 44 | } 45 | 46 | fn main() { 47 | if os.args.len < 2 { 48 | println('vsax file.xml') 49 | exit(1) 50 | } 51 | argv := os.args[1] 52 | file_xml := os.read_file(argv)! 53 | 54 | mut to_html := XmlToHtml{} 55 | mut p := sax.new_parser(mut to_html) 56 | p.parse(file_xml) or { println('parsing failed ${err}') } 57 | println(to_html.res) 58 | } 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stream XML parser for the V language 2 | 3 | This module implements a simple and fast way to parse xml files. 4 | 5 | ## Author 6 | 7 | pancake@nopcode.org 8 | 9 | ## Known Issues and limitations 10 | 11 | This library is in a very early stage of development, so things can change and bugs can appear. 12 | 13 | * [ ] Not suitable for large files be (input data is copied into a string) 14 | * [ ] use a callback or reader api to peek() from somewhere else 15 | * [ ] Only expect utf8 encodings 16 | 17 | ## Usage 18 | 19 | See [main.v](main.v) 20 | 21 | ```sh 22 | $ v -o vsax main.v 23 | $ ./vsax test.xml 24 | ``` 25 | 26 | ```v 27 | import sax 28 | import os 29 | 30 | struct XmlToHtml { 31 | mut: 32 | depth int 33 | res string 34 | } 35 | 36 | fn (mut mp XmlToHtml) document_start(mut st sax.Parser) ! { 37 | ... 38 | } 39 | 40 | fn (mut mp XmlToHtml) element_start(mut st sax.Parser, name string, attrs []sax.Attribute) ! { 41 | mp.depth++ 42 | } 43 | 44 | fn (mut mp XmlToHtml) element_end(mut st sax.Parser, name string) ! { 45 | if mp.depth < 1 { 46 | return error ("cant close so many nodes") 47 | } 48 | mp.depth-- 49 | } 50 | ... 51 | 52 | fn main() { 53 | if os.args.len < 2 { 54 | println('vsax file.xml') 55 | exit(1) 56 | } 57 | file_xml := os.read_file(os.args[1])! 58 | 59 | mut to_html := XmlToHtml{} 60 | mut p := sax.new_parser(mut to_html) 61 | p.parse(file_xml) or { println('parsing failed ${err}') } 62 | println(to_html.res) 63 | } 64 | ``` 65 | -------------------------------------------------------------------------------- /sax/lib.v: -------------------------------------------------------------------------------- 1 | module sax 2 | 3 | import strings 4 | 5 | pub struct Attribute { 6 | pub: 7 | key string 8 | val string 9 | } 10 | 11 | pub interface SaxCallbacks { 12 | mut: 13 | document_start(mut Parser) ! 14 | document_end(mut Parser) ! 15 | element_start(mut Parser, string, []Attribute) ! 16 | element_end(mut Parser, string) ! 17 | comment(mut Parser, string) ! 18 | characters(mut Parser, string) ! 19 | } 20 | 21 | pub struct Parser { 22 | mut: 23 | data string 24 | sb strings.Builder 25 | cursor int 26 | line int 27 | on SaxCallbacks 28 | } 29 | 30 | pub fn new_parser(mut callbacks SaxCallbacks) Parser { 31 | return Parser{ 32 | on: callbacks 33 | } 34 | } 35 | 36 | [direct_array_access; inline] 37 | fn (mut parser Parser) peek() !rune { 38 | if parser.cursor >= parser.data.len { 39 | // end of document 40 | return error('unexpected end of document') 41 | } 42 | ch := parser.data[parser.cursor] 43 | parser.cursor++ 44 | if ch == `\n` { 45 | parser.line++ 46 | } 47 | return ch 48 | } 49 | 50 | pub fn (mut parser Parser) parse_comment() !string { 51 | mut msg := strings.new_builder(1024) 52 | for { 53 | ch := parser.peek()! 54 | if ch == `>` { 55 | // end of comment 56 | break 57 | } else { 58 | msg.write_rune(ch) 59 | } 60 | } 61 | return msg.str() 62 | } 63 | 64 | fn peek_until(mut parser Parser, x rune) !string { 65 | mut text := strings.new_builder(128) 66 | for { 67 | ch := parser.peek()! 68 | if ch == `\\` { 69 | text.write_rune(ch) 70 | text.write_rune(parser.peek()!) 71 | } else if ch == x { 72 | break 73 | } else { 74 | text.write_rune(ch) 75 | } 76 | } 77 | return text.str() 78 | } 79 | 80 | pub fn (mut parser Parser) parse_attributes() ![]Attribute { 81 | mut attrs := []Attribute{} 82 | mut key := strings.new_builder(128) 83 | for { 84 | ch := parser.peek()! 85 | match ch { 86 | ` ` {} 87 | `>` { 88 | break 89 | } 90 | `=` { 91 | q0 := parser.peek()! 92 | if q0 == `"` { 93 | val := peek_until(mut parser, `"`)! 94 | attrs << Attribute{ 95 | key: key.str() 96 | val: val 97 | } 98 | key.clear() 99 | } 100 | } 101 | else { 102 | key.write_rune(ch) 103 | } 104 | } 105 | } 106 | return attrs 107 | } 108 | 109 | pub fn (mut parser Parser) parse_tag() ! { 110 | mut ch := parser.peek()! 111 | match ch { 112 | `!` { 113 | text := peek_until(mut parser, `>`)! 114 | parser.on.comment(mut parser, text)! 115 | } 116 | `/` { 117 | ch = parser.peek()! 118 | text := '${ch}' + peek_until(mut parser, `>`)! 119 | parser.on.element_end(mut parser, text)! 120 | } 121 | else { 122 | mut attrs := []Attribute{} 123 | mut name := strings.new_builder(100) 124 | for { 125 | match ch { 126 | `>` { 127 | break 128 | } 129 | ` ` { 130 | attrs = parser.parse_attributes()! 131 | break 132 | } 133 | else { 134 | name.write_rune(ch) 135 | } 136 | } 137 | ch = parser.peek()! 138 | } 139 | parser.on.element_start(mut parser, name.str(), attrs)! 140 | } 141 | } 142 | } 143 | 144 | fn (mut parser Parser) flush_chars() ! { 145 | if parser.sb.len > 0 { 146 | chars := parser.sb.str() 147 | parser.on.characters(mut parser, chars)! 148 | parser.sb.clear() 149 | } 150 | } 151 | 152 | pub fn (mut parser Parser) parse(input string) ! { 153 | parser.sb = strings.new_builder(1000) 154 | parser.data = input 155 | parser.on.document_start(mut parser)! 156 | for { 157 | ch := parser.peek() or { break } 158 | match ch { 159 | `<` { 160 | parser.flush_chars()! 161 | parser.parse_tag()! 162 | } 163 | else { 164 | parser.sb.write_rune(ch) 165 | } 166 | } 167 | } 168 | parser.flush_chars()! 169 | parser.on.document_end(mut parser)! 170 | } 171 | --------------------------------------------------------------------------------