├── LICENSE ├── README.md ├── parser.go ├── parser_test.go ├── scanner.go ├── scanner_test.go └── token.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Ben Johnson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## SQL Parser 2 | 3 | This repository contains a toy parser for parsing very simple SQL SELECT statements. 4 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | ) 7 | 8 | // SelectStatement represents a SQL SELECT statement. 9 | type SelectStatement struct { 10 | Fields []string 11 | TableName string 12 | } 13 | 14 | // Parser represents a parser. 15 | type Parser struct { 16 | s *Scanner 17 | buf struct { 18 | tok Token // last read token 19 | lit string // last read literal 20 | n int // buffer size (max=1) 21 | } 22 | } 23 | 24 | // NewParser returns a new instance of Parser. 25 | func NewParser(r io.Reader) *Parser { 26 | return &Parser{s: NewScanner(r)} 27 | } 28 | 29 | // Parse parses a SQL SELECT statement. 30 | func (p *Parser) Parse() (*SelectStatement, error) { 31 | stmt := &SelectStatement{} 32 | 33 | // First token should be a "SELECT" keyword. 34 | if tok, lit := p.scanIgnoreWhitespace(); tok != SELECT { 35 | return nil, fmt.Errorf("found %q, expected SELECT", lit) 36 | } 37 | 38 | // Next we should loop over all our comma-delimited fields. 39 | for { 40 | // Read a field. 41 | tok, lit := p.scanIgnoreWhitespace() 42 | if tok != IDENT && tok != ASTERISK { 43 | return nil, fmt.Errorf("found %q, expected field", lit) 44 | } 45 | stmt.Fields = append(stmt.Fields, lit) 46 | 47 | // If the next token is not a comma then break the loop. 48 | if tok, _ := p.scanIgnoreWhitespace(); tok != COMMA { 49 | p.unscan() 50 | break 51 | } 52 | } 53 | 54 | // Next we should see the "FROM" keyword. 55 | if tok, lit := p.scanIgnoreWhitespace(); tok != FROM { 56 | return nil, fmt.Errorf("found %q, expected FROM", lit) 57 | } 58 | 59 | // Finally we should read the table name. 60 | tok, lit := p.scanIgnoreWhitespace() 61 | if tok != IDENT { 62 | return nil, fmt.Errorf("found %q, expected table name", lit) 63 | } 64 | stmt.TableName = lit 65 | 66 | // Return the successfully parsed statement. 67 | return stmt, nil 68 | } 69 | 70 | // scan returns the next token from the underlying scanner. 71 | // If a token has been unscanned then read that instead. 72 | func (p *Parser) scan() (tok Token, lit string) { 73 | // If we have a token on the buffer, then return it. 74 | if p.buf.n != 0 { 75 | p.buf.n = 0 76 | return p.buf.tok, p.buf.lit 77 | } 78 | 79 | // Otherwise read the next token from the scanner. 80 | tok, lit = p.s.Scan() 81 | 82 | // Save it to the buffer in case we unscan later. 83 | p.buf.tok, p.buf.lit = tok, lit 84 | 85 | return 86 | } 87 | 88 | // scanIgnoreWhitespace scans the next non-whitespace token. 89 | func (p *Parser) scanIgnoreWhitespace() (tok Token, lit string) { 90 | tok, lit = p.scan() 91 | if tok == WS { 92 | tok, lit = p.scan() 93 | } 94 | return 95 | } 96 | 97 | // unscan pushes the previously read token back onto the buffer. 98 | func (p *Parser) unscan() { p.buf.n = 1 } 99 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package sql_test 2 | 3 | import ( 4 | "reflect" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/benbjohnson/sql-parser" 9 | ) 10 | 11 | // Ensure the parser can parse strings into Statement ASTs. 12 | func TestParser_ParseStatement(t *testing.T) { 13 | var tests = []struct { 14 | s string 15 | stmt *sql.SelectStatement 16 | err string 17 | }{ 18 | // Single field statement 19 | { 20 | s: `SELECT name FROM tbl`, 21 | stmt: &sql.SelectStatement{ 22 | Fields: []string{"name"}, 23 | TableName: "tbl", 24 | }, 25 | }, 26 | 27 | // Multi-field statement 28 | { 29 | s: `SELECT first_name, last_name, age FROM my_table`, 30 | stmt: &sql.SelectStatement{ 31 | Fields: []string{"first_name", "last_name", "age"}, 32 | TableName: "my_table", 33 | }, 34 | }, 35 | 36 | // Select all statement 37 | { 38 | s: `SELECT * FROM my_table`, 39 | stmt: &sql.SelectStatement{ 40 | Fields: []string{"*"}, 41 | TableName: "my_table", 42 | }, 43 | }, 44 | 45 | // Errors 46 | {s: `foo`, err: `found "foo", expected SELECT`}, 47 | {s: `SELECT !`, err: `found "!", expected field`}, 48 | {s: `SELECT field xxx`, err: `found "xxx", expected FROM`}, 49 | {s: `SELECT field FROM *`, err: `found "*", expected table name`}, 50 | } 51 | 52 | for i, tt := range tests { 53 | stmt, err := sql.NewParser(strings.NewReader(tt.s)).Parse() 54 | if !reflect.DeepEqual(tt.err, errstring(err)) { 55 | t.Errorf("%d. %q: error mismatch:\n exp=%s\n got=%s\n\n", i, tt.s, tt.err, err) 56 | } else if tt.err == "" && !reflect.DeepEqual(tt.stmt, stmt) { 57 | t.Errorf("%d. %q\n\nstmt mismatch:\n\nexp=%#v\n\ngot=%#v\n\n", i, tt.s, tt.stmt, stmt) 58 | } 59 | } 60 | } 61 | 62 | // errstring returns the string representation of an error. 63 | func errstring(err error) string { 64 | if err != nil { 65 | return err.Error() 66 | } 67 | return "" 68 | } 69 | -------------------------------------------------------------------------------- /scanner.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | "strings" 8 | ) 9 | 10 | // Scanner represents a lexical scanner. 11 | type Scanner struct { 12 | r *bufio.Reader 13 | } 14 | 15 | // NewScanner returns a new instance of Scanner. 16 | func NewScanner(r io.Reader) *Scanner { 17 | return &Scanner{r: bufio.NewReader(r)} 18 | } 19 | 20 | // Scan returns the next token and literal value. 21 | func (s *Scanner) Scan() (tok Token, lit string) { 22 | // Read the next rune. 23 | ch := s.read() 24 | 25 | // If we see whitespace then consume all contiguous whitespace. 26 | // If we see a letter then consume as an ident or reserved word. 27 | // If we see a digit then consume as a number. 28 | if isWhitespace(ch) { 29 | s.unread() 30 | return s.scanWhitespace() 31 | } else if isLetter(ch) { 32 | s.unread() 33 | return s.scanIdent() 34 | } 35 | 36 | // Otherwise read the individual character. 37 | switch ch { 38 | case eof: 39 | return EOF, "" 40 | case '*': 41 | return ASTERISK, string(ch) 42 | case ',': 43 | return COMMA, string(ch) 44 | } 45 | 46 | return ILLEGAL, string(ch) 47 | } 48 | 49 | // scanWhitespace consumes the current rune and all contiguous whitespace. 50 | func (s *Scanner) scanWhitespace() (tok Token, lit string) { 51 | // Create a buffer and read the current character into it. 52 | var buf bytes.Buffer 53 | buf.WriteRune(s.read()) 54 | 55 | // Read every subsequent whitespace character into the buffer. 56 | // Non-whitespace characters and EOF will cause the loop to exit. 57 | for { 58 | if ch := s.read(); ch == eof { 59 | break 60 | } else if !isWhitespace(ch) { 61 | s.unread() 62 | break 63 | } else { 64 | buf.WriteRune(ch) 65 | } 66 | } 67 | 68 | return WS, buf.String() 69 | } 70 | 71 | // scanIdent consumes the current rune and all contiguous ident runes. 72 | func (s *Scanner) scanIdent() (tok Token, lit string) { 73 | // Create a buffer and read the current character into it. 74 | var buf bytes.Buffer 75 | buf.WriteRune(s.read()) 76 | 77 | // Read every subsequent ident character into the buffer. 78 | // Non-ident characters and EOF will cause the loop to exit. 79 | for { 80 | if ch := s.read(); ch == eof { 81 | break 82 | } else if !isLetter(ch) && !isDigit(ch) && ch != '_' { 83 | s.unread() 84 | break 85 | } else { 86 | _, _ = buf.WriteRune(ch) 87 | } 88 | } 89 | 90 | // If the string matches a keyword then return that keyword. 91 | switch strings.ToUpper(buf.String()) { 92 | case "SELECT": 93 | return SELECT, buf.String() 94 | case "FROM": 95 | return FROM, buf.String() 96 | } 97 | 98 | // Otherwise return as a regular identifier. 99 | return IDENT, buf.String() 100 | } 101 | 102 | // read reads the next rune from the buffered reader. 103 | // Returns the rune(0) if an error occurs (or io.EOF is returned). 104 | func (s *Scanner) read() rune { 105 | ch, _, err := s.r.ReadRune() 106 | if err != nil { 107 | return eof 108 | } 109 | return ch 110 | } 111 | 112 | // unread places the previously read rune back on the reader. 113 | func (s *Scanner) unread() { _ = s.r.UnreadRune() } 114 | 115 | // isWhitespace returns true if the rune is a space, tab, or newline. 116 | func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' } 117 | 118 | // isLetter returns true if the rune is a letter. 119 | func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } 120 | 121 | // isDigit returns true if the rune is a digit. 122 | func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') } 123 | 124 | // eof represents a marker rune for the end of the reader. 125 | var eof = rune(0) 126 | -------------------------------------------------------------------------------- /scanner_test.go: -------------------------------------------------------------------------------- 1 | package sql_test 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/benbjohnson/sql-parser" 8 | ) 9 | 10 | // Ensure the scanner can scan tokens correctly. 11 | func TestScanner_Scan(t *testing.T) { 12 | var tests = []struct { 13 | s string 14 | tok sql.Token 15 | lit string 16 | }{ 17 | // Special tokens (EOF, ILLEGAL, WS) 18 | {s: ``, tok: sql.EOF}, 19 | {s: `#`, tok: sql.ILLEGAL, lit: `#`}, 20 | {s: ` `, tok: sql.WS, lit: " "}, 21 | {s: "\t", tok: sql.WS, lit: "\t"}, 22 | {s: "\n", tok: sql.WS, lit: "\n"}, 23 | 24 | // Misc characters 25 | {s: `*`, tok: sql.ASTERISK, lit: "*"}, 26 | 27 | // Identifiers 28 | {s: `foo`, tok: sql.IDENT, lit: `foo`}, 29 | {s: `Zx12_3U_-`, tok: sql.IDENT, lit: `Zx12_3U_`}, 30 | 31 | // Keywords 32 | {s: `FROM`, tok: sql.FROM, lit: "FROM"}, 33 | {s: `SELECT`, tok: sql.SELECT, lit: "SELECT"}, 34 | } 35 | 36 | for i, tt := range tests { 37 | s := sql.NewScanner(strings.NewReader(tt.s)) 38 | tok, lit := s.Scan() 39 | if tt.tok != tok { 40 | t.Errorf("%d. %q token mismatch: exp=%q got=%q <%q>", i, tt.s, tt.tok, tok, lit) 41 | } else if tt.lit != lit { 42 | t.Errorf("%d. %q literal mismatch: exp=%q got=%q", i, tt.s, tt.lit, lit) 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /token.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | // Token represents a lexical token. 4 | type Token int 5 | 6 | const ( 7 | // Special tokens 8 | ILLEGAL Token = iota 9 | EOF 10 | WS 11 | 12 | // Literals 13 | IDENT // main 14 | 15 | // Misc characters 16 | ASTERISK // * 17 | COMMA // , 18 | 19 | // Keywords 20 | SELECT 21 | FROM 22 | ) 23 | --------------------------------------------------------------------------------