├── services ├── lexer │ ├── lexer │ │ ├── LexFn.go │ │ ├── LexEqualSign.go │ │ ├── LexRightBracket.go │ │ ├── LexLeftBracket.go │ │ ├── LexBegin.go │ │ ├── LexValue.go │ │ ├── LexKey.go │ │ ├── LexSection.go │ │ └── Lexer.go │ ├── lexertoken │ │ ├── Tokens.go │ │ ├── TokenType.go │ │ └── Token.go │ ├── errors │ │ └── LexerErrors.go │ └── LexerFactory.go ├── errors │ └── LexerErrors.go └── parser │ └── Parser.go ├── model └── ini │ ├── IniKeyValue.go │ ├── IniFile.go │ └── IniSection.go ├── .gitignore ├── README.md ├── sampleIniParser.go └── LICENSE /services/lexer/lexer/LexFn.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | type LexFn func(*Lexer) LexFn 4 | -------------------------------------------------------------------------------- /model/ini/IniKeyValue.go: -------------------------------------------------------------------------------- 1 | package ini 2 | 3 | type IniKeyValue struct { 4 | Key string `json:"key"` 5 | Value string `json:"value"` 6 | } 7 | -------------------------------------------------------------------------------- /model/ini/IniFile.go: -------------------------------------------------------------------------------- 1 | package ini 2 | 3 | type IniFile struct { 4 | FileName string `json:"fileName"` 5 | Sections []IniSection `json:"sections"` 6 | } 7 | -------------------------------------------------------------------------------- /model/ini/IniSection.go: -------------------------------------------------------------------------------- 1 | package ini 2 | 3 | type IniSection struct { 4 | Name string `json:"name"` 5 | KeyValuePairs []IniKeyValue `json:"keyValuePairs"` 6 | } 7 | -------------------------------------------------------------------------------- /services/lexer/lexertoken/Tokens.go: -------------------------------------------------------------------------------- 1 | package lexertoken 2 | 3 | const EOF rune = 0 4 | 5 | const LEFT_BRACKET string = "[" 6 | const RIGHT_BRACKET string = "]" 7 | const EQUAL_SIGN string = "=" 8 | const NEWLINE string = "\n" 9 | -------------------------------------------------------------------------------- /services/lexer/errors/LexerErrors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | const ( 4 | LEXER_ERROR_UNEXPECTED_EOF string = "Unexpected end of file" 5 | LEXER_ERROR_MISSING_RIGHT_BRACKET string = "Missing a closing section bracket" 6 | ) 7 | -------------------------------------------------------------------------------- /services/lexer/lexertoken/TokenType.go: -------------------------------------------------------------------------------- 1 | package lexertoken 2 | 3 | type TokenType int 4 | 5 | const ( 6 | TOKEN_ERROR TokenType = iota 7 | TOKEN_EOF 8 | 9 | TOKEN_LEFT_BRACKET 10 | TOKEN_RIGHT_BRACKET 11 | TOKEN_EQUAL_SIGN 12 | TOKEN_NEWLINE 13 | 14 | TOKEN_SECTION 15 | TOKEN_KEY 16 | TOKEN_VALUE 17 | ) 18 | -------------------------------------------------------------------------------- /services/lexer/lexertoken/Token.go: -------------------------------------------------------------------------------- 1 | package lexertoken 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Token struct { 8 | Type TokenType 9 | Value string 10 | } 11 | 12 | func (this Token) String() string { 13 | switch this.Type { 14 | case TOKEN_EOF: 15 | return "EOF" 16 | 17 | case TOKEN_ERROR: 18 | return this.Value 19 | } 20 | 21 | return fmt.Sprintf("%q", this.Value) 22 | } 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | sample-ini-parser 27 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexEqualSign.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 5 | ) 6 | 7 | /* 8 | This lexer function emits a TOKEN_EQUAL_SIGN then returns 9 | the lexer for value. 10 | */ 11 | func LexEqualSign(lexer *Lexer) LexFn { 12 | lexer.Pos += len(lexertoken.EQUAL_SIGN) 13 | lexer.Emit(lexertoken.TOKEN_EQUAL_SIGN) 14 | return LexValue 15 | } 16 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexRightBracket.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 5 | ) 6 | 7 | /* 8 | This lexer function emits a TOKEN_RIGHT_BRACKET then returns 9 | the lexer for a begin. 10 | */ 11 | func LexRightBracket(lexer *Lexer) LexFn { 12 | lexer.Pos += len(lexertoken.RIGHT_BRACKET) 13 | lexer.Emit(lexertoken.TOKEN_RIGHT_BRACKET) 14 | return LexBegin 15 | } 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sample INI Parser 2 | Sample project for a blog series on lexing and parsing. This project demonstrates creating a lexer and basic parser using techniques presented by Rob Pike from [one of his presentations](http://cuddle.googlecode.com/hg/talk/lex.html#landing-slide). The basic premise revoles around a lexer that starts by executing functions that process tokens, each one returning a function that represents the next expected token state. -------------------------------------------------------------------------------- /services/lexer/lexer/LexLeftBracket.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 5 | ) 6 | 7 | /* 8 | This lexer function emits a TOKEN_LEFT_BRACKET then returns 9 | the lexer for a section header. 10 | */ 11 | func LexLeftBracket(lexer *Lexer) LexFn { 12 | lexer.Pos += len(lexertoken.LEFT_BRACKET) 13 | lexer.Emit(lexertoken.TOKEN_LEFT_BRACKET) 14 | return LexSection 15 | } 16 | -------------------------------------------------------------------------------- /services/errors/LexerErrors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | const ( 4 | LEXER_ERROR_MISSING_LABEL string = "Your form is missing a label" 5 | LEXER_ERROR_MISSING_OPEN_PAREN string = "Missing open parenthesis after constraint" 6 | LEXER_ERROR_MISSING_CLOSE_PAREN string = "Missing closing parenthesis after constraint" 7 | LEXER_ERROR_MISSING_TYPE string = "Missing data type for this form element" 8 | LEXER_ERROR_INVALID_CONSTRAINT string = "Invalid constraint" 9 | ) 10 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexBegin.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 7 | ) 8 | 9 | /* 10 | This lexer function starts everything off. It determines if we are 11 | beginning with a key/value assignment or a section. 12 | */ 13 | func LexBegin(lexer *Lexer) LexFn { 14 | lexer.SkipWhitespace() 15 | 16 | if strings.HasPrefix(lexer.InputToEnd(), lexertoken.LEFT_BRACKET) { 17 | return LexLeftBracket 18 | } else { 19 | return LexKey 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /sampleIniParser.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "log" 6 | 7 | "github.com/adampresley/sample-ini-parser/services/parser" 8 | ) 9 | 10 | func main() { 11 | sampleInput := ` 12 | key=abcdefg 13 | 14 | [User] 15 | userName=adampresley 16 | keyFile=~/path/to/keyfile 17 | 18 | [Servers] 19 | server1=localhost:8080 20 | ` 21 | 22 | parsedINIFile := parser.Parse("sample.ini", sampleInput) 23 | prettyJSON, err := json.MarshalIndent(parsedINIFile, "", " ") 24 | 25 | if err != nil { 26 | log.Println("Error marshalling JSON:", err.Error()) 27 | return 28 | } 29 | 30 | log.Println(string(prettyJSON)) 31 | } 32 | -------------------------------------------------------------------------------- /services/lexer/LexerFactory.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "github.com/adampresley/sample-ini-parser/services/lexer/lexer" 5 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 6 | ) 7 | 8 | /* 9 | Start a new lexer with a given input string. This returns the 10 | instance of the lexer and a channel of tokens. Reading this stream 11 | is the way to parse a given input and perform processing. 12 | */ 13 | func BeginLexing(name, input string) *lexer.Lexer { 14 | l := &lexer.Lexer{ 15 | Name: name, 16 | Input: input, 17 | State: lexer.LexBegin, 18 | Tokens: make(chan lexertoken.Token, 3), 19 | } 20 | 21 | return l 22 | } 23 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexValue.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/adampresley/sample-ini-parser/services/lexer/errors" 7 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 8 | ) 9 | 10 | /* 11 | This lexer function emits a TOKEN_VALUE with the value to be assigned 12 | to a key. 13 | */ 14 | func LexValue(lexer *Lexer) LexFn { 15 | for { 16 | if strings.HasPrefix(lexer.InputToEnd(), lexertoken.NEWLINE) { 17 | lexer.Emit(lexertoken.TOKEN_VALUE) 18 | return LexBegin 19 | } 20 | 21 | lexer.Inc() 22 | 23 | if lexer.IsEOF() { 24 | return lexer.Errorf(errors.LEXER_ERROR_UNEXPECTED_EOF) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexKey.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/adampresley/sample-ini-parser/services/lexer/errors" 7 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 8 | ) 9 | 10 | /* 11 | This lexer function emits a TOKEN_KEY with the name of an 12 | key that will be assigned a value. 13 | */ 14 | func LexKey(lexer *Lexer) LexFn { 15 | for { 16 | if strings.HasPrefix(lexer.InputToEnd(), lexertoken.EQUAL_SIGN) { 17 | lexer.Emit(lexertoken.TOKEN_KEY) 18 | return LexEqualSign 19 | } 20 | 21 | lexer.Inc() 22 | 23 | if lexer.IsEOF() { 24 | return lexer.Errorf(errors.LEXER_ERROR_UNEXPECTED_EOF) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /services/lexer/lexer/LexSection.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/adampresley/sample-ini-parser/services/lexer/errors" 7 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 8 | ) 9 | 10 | /* 11 | This lexer function emits a TOKEN_SECTION with the name of an 12 | INI file section header. 13 | */ 14 | func LexSection(lexer *Lexer) LexFn { 15 | for { 16 | if lexer.IsEOF() { 17 | return lexer.Errorf(errors.LEXER_ERROR_MISSING_RIGHT_BRACKET) 18 | } 19 | 20 | if strings.HasPrefix(lexer.InputToEnd(), lexertoken.RIGHT_BRACKET) { 21 | lexer.Emit(lexertoken.TOKEN_SECTION) 22 | return LexRightBracket 23 | } 24 | 25 | lexer.Inc() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Adam Presley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /services/parser/Parser.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "log" 5 | "strings" 6 | 7 | "github.com/adampresley/sample-ini-parser/model/ini" 8 | "github.com/adampresley/sample-ini-parser/services/lexer" 9 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 10 | ) 11 | 12 | func isEOF(token lexertoken.Token) bool { 13 | return token.Type == lexertoken.TOKEN_EOF 14 | } 15 | 16 | func Parse(fileName, input string) ini.IniFile { 17 | output := ini.IniFile{ 18 | FileName: fileName, 19 | Sections: make([]ini.IniSection, 0), 20 | } 21 | 22 | var token lexertoken.Token 23 | var tokenValue string 24 | 25 | /* State variables */ 26 | section := ini.IniSection{} 27 | key := "" 28 | 29 | log.Println("Starting lexer and parser for file", fileName, "...") 30 | 31 | l := lexer.BeginLexing(fileName, input) 32 | 33 | for { 34 | token = l.NextToken() 35 | 36 | if token.Type != lexertoken.TOKEN_VALUE { 37 | tokenValue = strings.TrimSpace(token.Value) 38 | } else { 39 | tokenValue = token.Value 40 | } 41 | 42 | if isEOF(token) { 43 | output.Sections = append(output.Sections, section) 44 | break 45 | } 46 | 47 | switch token.Type { 48 | case lexertoken.TOKEN_SECTION: 49 | /* 50 | * Reset tracking variables 51 | */ 52 | if len(section.KeyValuePairs) > 0 { 53 | output.Sections = append(output.Sections, section) 54 | } 55 | 56 | key = "" 57 | 58 | section.Name = tokenValue 59 | section.KeyValuePairs = make([]ini.IniKeyValue, 0) 60 | 61 | case lexertoken.TOKEN_KEY: 62 | key = tokenValue 63 | 64 | case lexertoken.TOKEN_VALUE: 65 | section.KeyValuePairs = append(section.KeyValuePairs, ini.IniKeyValue{Key: key, Value: tokenValue}) 66 | key = "" 67 | } 68 | } 69 | 70 | log.Println("Parser has been shutdown") 71 | return output 72 | } 73 | -------------------------------------------------------------------------------- /services/lexer/lexer/Lexer.go: -------------------------------------------------------------------------------- 1 | package lexer 2 | 3 | import ( 4 | "fmt" 5 | "unicode" 6 | "unicode/utf8" 7 | 8 | "github.com/adampresley/sample-ini-parser/services/lexer/lexertoken" 9 | ) 10 | 11 | /* 12 | Lexer object contains the state of our parser and provides 13 | a stream for accepting tokens. 14 | 15 | Based on work by Rob Pike 16 | http://cuddle.googlecode.com/hg/talk/lex.html#landing-slide 17 | */ 18 | type Lexer struct { 19 | Name string 20 | Input string 21 | Tokens chan lexertoken.Token 22 | State LexFn 23 | 24 | Start int 25 | Pos int 26 | Width int 27 | } 28 | 29 | /* 30 | Backup to the beginning of the last read token. 31 | */ 32 | func (this *Lexer) Backup() { 33 | this.Pos -= this.Width 34 | } 35 | 36 | /* 37 | Returns a slice of the current input from the current lexer start position 38 | to the current position. 39 | */ 40 | func (this *Lexer) CurrentInput() string { 41 | return this.Input[this.Start:this.Pos] 42 | } 43 | 44 | /* 45 | Decrement the position 46 | */ 47 | func (this *Lexer) Dec() { 48 | this.Pos-- 49 | } 50 | 51 | /* 52 | Puts a token onto the token channel. The value of this token is 53 | read from the input based on the current lexer position. 54 | */ 55 | func (this *Lexer) Emit(tokenType lexertoken.TokenType) { 56 | this.Tokens <- lexertoken.Token{Type: tokenType, Value: this.Input[this.Start:this.Pos]} 57 | this.Start = this.Pos 58 | } 59 | 60 | /* 61 | Returns a token with error information. 62 | */ 63 | func (this *Lexer) Errorf(format string, args ...interface{}) LexFn { 64 | this.Tokens <- lexertoken.Token{ 65 | Type: lexertoken.TOKEN_ERROR, 66 | Value: fmt.Sprintf(format, args...), 67 | } 68 | 69 | return nil 70 | } 71 | 72 | /* 73 | Ignores the current token by setting the lexer's start 74 | position to the current reading position. 75 | */ 76 | func (this *Lexer) Ignore() { 77 | this.Start = this.Pos 78 | } 79 | 80 | /* 81 | Increment the position 82 | */ 83 | func (this *Lexer) Inc() { 84 | this.Pos++ 85 | if this.Pos >= utf8.RuneCountInString(this.Input) { 86 | this.Emit(lexertoken.TOKEN_EOF) 87 | } 88 | } 89 | 90 | /* 91 | Return a slice of the input from the current lexer position 92 | to the end of the input string. 93 | */ 94 | func (this *Lexer) InputToEnd() string { 95 | return this.Input[this.Pos:] 96 | } 97 | 98 | /* 99 | Returns the true/false if the lexer is at the end of the 100 | input stream. 101 | */ 102 | func (this *Lexer) IsEOF() bool { 103 | return this.Pos >= len(this.Input) 104 | } 105 | 106 | /* 107 | Returns true/false if then next character is whitespace 108 | */ 109 | func (this *Lexer) IsWhitespace() bool { 110 | ch, _ := utf8.DecodeRuneInString(this.Input[this.Pos:]) 111 | return unicode.IsSpace(ch) 112 | } 113 | 114 | /* 115 | Reads the next rune (character) from the input stream 116 | and advances the lexer position. 117 | */ 118 | func (this *Lexer) Next() rune { 119 | if this.Pos >= utf8.RuneCountInString(this.Input) { 120 | this.Width = 0 121 | return lexertoken.EOF 122 | } 123 | 124 | result, width := utf8.DecodeRuneInString(this.Input[this.Pos:]) 125 | 126 | this.Width = width 127 | this.Pos += this.Width 128 | return result 129 | } 130 | 131 | /* 132 | Return the next token from the channel 133 | */ 134 | func (this *Lexer) NextToken() lexertoken.Token { 135 | for { 136 | select { 137 | case token := <-this.Tokens: 138 | return token 139 | default: 140 | this.State = this.State(this) 141 | } 142 | } 143 | 144 | panic("Lexer.NextToken reached an invalid state!!") 145 | } 146 | 147 | /* 148 | Returns the next rune in the stream, then puts the lexer 149 | position back. Basically reads the next rune without consuming 150 | it. 151 | */ 152 | func (this *Lexer) Peek() rune { 153 | rune := this.Next() 154 | this.Backup() 155 | return rune 156 | } 157 | 158 | /* 159 | Starts the lexical analysis and feeding tokens into the 160 | token channel. 161 | */ 162 | func (this *Lexer) Run() { 163 | for state := LexBegin; state != nil; { 164 | state = state(this) 165 | } 166 | 167 | this.Shutdown() 168 | } 169 | 170 | /* 171 | Shuts down the token stream 172 | */ 173 | func (this *Lexer) Shutdown() { 174 | close(this.Tokens) 175 | } 176 | 177 | /* 178 | Skips whitespace until we get something meaningful. 179 | */ 180 | func (this *Lexer) SkipWhitespace() { 181 | for { 182 | ch := this.Next() 183 | 184 | if !unicode.IsSpace(ch) { 185 | this.Dec() 186 | break 187 | } 188 | 189 | if ch == lexertoken.EOF { 190 | this.Emit(lexertoken.TOKEN_EOF) 191 | break 192 | } 193 | } 194 | } 195 | --------------------------------------------------------------------------------