├── .gitignore ├── LICENSE ├── README.md ├── example_test.go ├── go.mod ├── shlex.go └── shlex_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | shlex.test 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) anmitsu 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-shlex 2 | 3 | go-shlex is a library to make a lexical analyzer like Unix shell for 4 | Go. 5 | 6 | ## Install 7 | 8 | go get -u "github.com/anmitsu/go-shlex" 9 | 10 | ## Usage 11 | 12 | ```go 13 | package main 14 | 15 | import ( 16 | "fmt" 17 | "log" 18 | 19 | "github.com/anmitsu/go-shlex" 20 | ) 21 | 22 | func main() { 23 | cmd := `cp -Rdp "file name" 'file name2' dir\ name` 24 | words, err := shlex.Split(cmd, true) 25 | if err != nil { 26 | log.Fatal(err) 27 | } 28 | 29 | for _, w := range words { 30 | fmt.Println(w) 31 | } 32 | } 33 | ``` 34 | output 35 | 36 | cp 37 | -Rdp 38 | file name 39 | file name2 40 | dir name 41 | 42 | ## Documentation 43 | 44 | http://godoc.org/github.com/anmitsu/go-shlex 45 | 46 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package shlex_test 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/anmitsu/go-shlex" 8 | ) 9 | 10 | func ExampleSplit() { 11 | cmd := `cp -Rdp "file name" 'file name2' dir\ name` 12 | 13 | // Split of cmd with POSIX mode. 14 | words1, err := shlex.Split(cmd, true) 15 | if err != nil { 16 | log.Fatal(err) 17 | } 18 | // Split of cmd with Non-POSIX mode. 19 | words2, err := shlex.Split(cmd, false) 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | 24 | fmt.Println("Source command:") 25 | fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`) 26 | fmt.Println() 27 | 28 | fmt.Println("POSIX mode:") 29 | for _, word := range words1 { 30 | fmt.Println(word) 31 | } 32 | fmt.Println() 33 | fmt.Println("Non-POSIX mode:") 34 | for _, word := range words2 { 35 | fmt.Println(word) 36 | } 37 | 38 | // Output: 39 | // Source command: 40 | // cp -Rdp "file name" 'file name2' dir\ name 41 | // 42 | // POSIX mode: 43 | // cp 44 | // -Rdp 45 | // file name 46 | // file name2 47 | // dir name 48 | // 49 | // Non-POSIX mode: 50 | // cp 51 | // -Rdp 52 | // "file name" 53 | // 'file name2' 54 | // dir\ 55 | // name 56 | } 57 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/anmitsu/go-shlex 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /shlex.go: -------------------------------------------------------------------------------- 1 | // Package shlex provides a simple lexical analysis like Unix shell. 2 | package shlex 3 | 4 | import ( 5 | "bufio" 6 | "errors" 7 | "io" 8 | "strings" 9 | "unicode" 10 | ) 11 | 12 | var ( 13 | ErrNoClosing = errors.New("No closing quotation") 14 | ErrNoEscaped = errors.New("No escaped character") 15 | ) 16 | 17 | // Tokenizer is the interface that classifies a token according to 18 | // words, whitespaces, quotations, escapes and escaped quotations. 19 | type Tokenizer interface { 20 | IsWord(rune) bool 21 | IsWhitespace(rune) bool 22 | IsQuote(rune) bool 23 | IsEscape(rune) bool 24 | IsEscapedQuote(rune) bool 25 | } 26 | 27 | // DefaultTokenizer implements a simple tokenizer like Unix shell. 28 | type DefaultTokenizer struct{} 29 | 30 | func (t *DefaultTokenizer) IsWord(r rune) bool { 31 | return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r) 32 | } 33 | func (t *DefaultTokenizer) IsQuote(r rune) bool { 34 | switch r { 35 | case '\'', '"': 36 | return true 37 | default: 38 | return false 39 | } 40 | } 41 | func (t *DefaultTokenizer) IsWhitespace(r rune) bool { 42 | return unicode.IsSpace(r) 43 | } 44 | func (t *DefaultTokenizer) IsEscape(r rune) bool { 45 | return r == '\\' 46 | } 47 | func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool { 48 | return r == '"' 49 | } 50 | 51 | // Lexer represents a lexical analyzer. 52 | type Lexer struct { 53 | reader *bufio.Reader 54 | tokenizer Tokenizer 55 | posix bool 56 | whitespacesplit bool 57 | } 58 | 59 | // NewLexer creates a new Lexer reading from io.Reader. This Lexer 60 | // has a DefaultTokenizer according to posix and whitespacesplit 61 | // rules. 62 | func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer { 63 | return &Lexer{ 64 | reader: bufio.NewReader(r), 65 | tokenizer: &DefaultTokenizer{}, 66 | posix: posix, 67 | whitespacesplit: whitespacesplit, 68 | } 69 | } 70 | 71 | // NewLexerString creates a new Lexer reading from a string. This 72 | // Lexer has a DefaultTokenizer according to posix and whitespacesplit 73 | // rules. 74 | func NewLexerString(s string, posix, whitespacesplit bool) *Lexer { 75 | return NewLexer(strings.NewReader(s), posix, whitespacesplit) 76 | } 77 | 78 | // Split splits a string according to posix or non-posix rules. 79 | func Split(s string, posix bool) ([]string, error) { 80 | return NewLexerString(s, posix, true).Split() 81 | } 82 | 83 | // SetTokenizer sets a Tokenizer. 84 | func (l *Lexer) SetTokenizer(t Tokenizer) { 85 | l.tokenizer = t 86 | } 87 | 88 | func (l *Lexer) Split() ([]string, error) { 89 | result := make([]string, 0) 90 | for { 91 | token, err := l.readToken() 92 | if token != "" { 93 | result = append(result, token) 94 | } 95 | 96 | if err == io.EOF { 97 | break 98 | } else if err != nil { 99 | return result, err 100 | } 101 | } 102 | return result, nil 103 | } 104 | 105 | func (l *Lexer) readToken() (string, error) { 106 | t := l.tokenizer 107 | token := "" 108 | quoted := false 109 | state := ' ' 110 | escapedstate := ' ' 111 | scanning: 112 | for { 113 | next, _, err := l.reader.ReadRune() 114 | if err != nil { 115 | if t.IsQuote(state) { 116 | return token, ErrNoClosing 117 | } else if t.IsEscape(state) { 118 | return token, ErrNoEscaped 119 | } 120 | return token, err 121 | } 122 | 123 | switch { 124 | case t.IsWhitespace(state): 125 | switch { 126 | case t.IsWhitespace(next): 127 | break scanning 128 | case l.posix && t.IsEscape(next): 129 | escapedstate = 'a' 130 | state = next 131 | case t.IsWord(next): 132 | token += string(next) 133 | state = 'a' 134 | case t.IsQuote(next): 135 | if !l.posix { 136 | token += string(next) 137 | } 138 | state = next 139 | default: 140 | token = string(next) 141 | if l.whitespacesplit { 142 | state = 'a' 143 | } else if token != "" || (l.posix && quoted) { 144 | break scanning 145 | } 146 | } 147 | case t.IsQuote(state): 148 | quoted = true 149 | switch { 150 | case next == state: 151 | if !l.posix { 152 | token += string(next) 153 | break scanning 154 | } else { 155 | state = 'a' 156 | } 157 | case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state): 158 | escapedstate = state 159 | state = next 160 | default: 161 | token += string(next) 162 | } 163 | case t.IsEscape(state): 164 | if t.IsQuote(escapedstate) && next != state && next != escapedstate { 165 | token += string(state) 166 | } 167 | token += string(next) 168 | state = escapedstate 169 | case t.IsWord(state): 170 | switch { 171 | case t.IsWhitespace(next): 172 | if token != "" || (l.posix && quoted) { 173 | break scanning 174 | } 175 | case l.posix && t.IsQuote(next): 176 | state = next 177 | case l.posix && t.IsEscape(next): 178 | escapedstate = 'a' 179 | state = next 180 | case t.IsWord(next) || t.IsQuote(next): 181 | token += string(next) 182 | default: 183 | if l.whitespacesplit { 184 | token += string(next) 185 | } else if token != "" { 186 | l.reader.UnreadRune() 187 | break scanning 188 | } 189 | } 190 | } 191 | } 192 | return token, nil 193 | } 194 | -------------------------------------------------------------------------------- /shlex_test.go: -------------------------------------------------------------------------------- 1 | package shlex 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | var datanonposix = []struct { 9 | in string 10 | out []string 11 | err error 12 | }{ 13 | {`This string has an embedded apostrophe, doesn't it?`, 14 | []string{ 15 | "This", 16 | "string", 17 | "has", 18 | "an", 19 | "embedded", 20 | "apostrophe", 21 | ",", 22 | "doesn't", 23 | "it", 24 | "?", 25 | }, 26 | nil, 27 | }, 28 | {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n", 29 | []string{ 30 | "This", 31 | "string", 32 | "has", 33 | "embedded", 34 | `"double quotes"`, 35 | "and", 36 | `'single quotes'`, 37 | "in", 38 | "it", 39 | ",", 40 | "and", 41 | "even", 42 | `"a 'nested example'"`, 43 | ".", 44 | }, 45 | nil, 46 | }, 47 | {`Hello world!, こんにちは 世界!`, 48 | []string{ 49 | "Hello", 50 | "world", 51 | "!", 52 | ",", 53 | "こんにちは", 54 | "世界", 55 | "!", 56 | }, 57 | nil, 58 | }, 59 | {`Do"Not"Separate`, 60 | []string{`Do"Not"Separate`}, 61 | nil, 62 | }, 63 | {`"Do"Separate`, 64 | []string{`"Do"`, "Separate"}, 65 | nil, 66 | }, 67 | {`Escaped \e Character not in quotes`, 68 | []string{ 69 | "Escaped", 70 | `\`, 71 | "e", 72 | "Character", 73 | "not", 74 | "in", 75 | "quotes", 76 | }, 77 | nil, 78 | }, 79 | {`Escaped "\e" Character in double quotes`, 80 | []string{ 81 | "Escaped", 82 | `"\e"`, 83 | "Character", 84 | "in", 85 | "double", 86 | "quotes", 87 | }, 88 | nil, 89 | }, 90 | {`Escaped '\e' Character in single quotes`, 91 | []string{ 92 | "Escaped", 93 | `'\e'`, 94 | "Character", 95 | "in", 96 | "single", 97 | "quotes", 98 | }, 99 | nil, 100 | }, 101 | {`Escaped '\'' \"\'\" single quote`, 102 | []string{ 103 | "Escaped", 104 | `'\'`, 105 | `' \"\'`, 106 | `\`, 107 | `" single quote`, 108 | }, 109 | ErrNoClosing, 110 | }, 111 | {`Escaped "\"" \'\"\' double quote`, 112 | []string{ 113 | "Escaped", 114 | `"\"`, 115 | `" \'\"`, 116 | `\`, 117 | `' double quote`, 118 | }, 119 | ErrNoClosing, 120 | }, 121 | {`"'Strip extra layer of quotes'"`, 122 | []string{`"'Strip extra layer of quotes'"`}, 123 | nil, 124 | }, 125 | } 126 | 127 | var dataposix = []struct { 128 | in string 129 | out []string 130 | err error 131 | }{ 132 | {`This string has an embedded apostrophe, doesn't it?`, 133 | []string{ 134 | "This", 135 | "string", 136 | "has", 137 | "an", 138 | "embedded", 139 | "apostrophe", 140 | ",", 141 | "doesnt it?", 142 | }, 143 | ErrNoClosing, 144 | }, 145 | {"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n", 146 | []string{ 147 | "This", 148 | "string", 149 | "has", 150 | "embedded", 151 | `double quotes`, 152 | "and", 153 | `single quotes`, 154 | "in", 155 | "it", 156 | ",", 157 | "and", 158 | "even", 159 | `a 'nested example'`, 160 | ".", 161 | }, 162 | nil, 163 | }, 164 | {`Hello world!, こんにちは 世界!`, 165 | []string{ 166 | "Hello", 167 | "world", 168 | "!", 169 | ",", 170 | "こんにちは", 171 | "世界", 172 | "!", 173 | }, 174 | nil, 175 | }, 176 | {`Do"Not"Separate`, 177 | []string{`DoNotSeparate`}, 178 | nil, 179 | }, 180 | {`"Do"Separate`, 181 | []string{"DoSeparate"}, 182 | nil, 183 | }, 184 | {`Escaped \e Character not in quotes`, 185 | []string{ 186 | "Escaped", 187 | "e", 188 | "Character", 189 | "not", 190 | "in", 191 | "quotes", 192 | }, 193 | nil, 194 | }, 195 | {`Escaped "\e" Character in double quotes`, 196 | []string{ 197 | "Escaped", 198 | `\e`, 199 | "Character", 200 | "in", 201 | "double", 202 | "quotes", 203 | }, 204 | nil, 205 | }, 206 | {`Escaped '\e' Character in single quotes`, 207 | []string{ 208 | "Escaped", 209 | `\e`, 210 | "Character", 211 | "in", 212 | "single", 213 | "quotes", 214 | }, 215 | nil, 216 | }, 217 | {`Escaped '\'' \"\'\" single quote`, 218 | []string{ 219 | "Escaped", 220 | `\ \"\"`, 221 | "single", 222 | "quote", 223 | }, 224 | nil, 225 | }, 226 | {`Escaped "\"" \'\"\' double quote`, 227 | []string{ 228 | "Escaped", 229 | `"`, 230 | `'"'`, 231 | "double", 232 | "quote", 233 | }, 234 | nil, 235 | }, 236 | {`"'Strip extra layer of quotes'"`, 237 | []string{`'Strip extra layer of quotes'`}, 238 | nil, 239 | }, 240 | } 241 | 242 | func TestSplitNonPOSIX(t *testing.T) { 243 | testSplit(t, false) 244 | } 245 | 246 | func TestSplitPOSIX(t *testing.T) { 247 | testSplit(t, true) 248 | } 249 | 250 | func testSplit(t *testing.T, posix bool) { 251 | var data []struct { 252 | in string 253 | out []string 254 | err error 255 | } 256 | if posix { 257 | data = dataposix 258 | } else { 259 | data = datanonposix 260 | } 261 | 262 | for _, d := range data { 263 | t.Logf("Spliting: `%s'", d.in) 264 | 265 | result, err := NewLexerString(d.in, posix, false).Split() 266 | 267 | // check closing and escaped error 268 | if err != d.err { 269 | printToken(result) 270 | t.Fatalf("Error expected: `%v', but result catched: `%v'", 271 | d.err, err) 272 | } 273 | 274 | // check splited number 275 | if len(result) != len(d.out) { 276 | printToken(result) 277 | t.Fatalf("Split expeced: `%d', but result founds: `%d'", 278 | len(d.out), len(result)) 279 | } 280 | 281 | // check words 282 | for j, out := range d.out { 283 | if result[j] != out { 284 | printToken(result) 285 | t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d", 286 | out, result[j], j) 287 | } 288 | } 289 | t.Log("ok") 290 | } 291 | } 292 | 293 | func printToken(s []string) { 294 | for _, token := range s { 295 | fmt.Println(token) 296 | } 297 | } 298 | --------------------------------------------------------------------------------