├── .gitignore
├── LICENSE
├── README.md
├── example_test.go
├── go.mod
├── shlex.go
└── shlex_test.go


/.gitignore:
--------------------------------------------------------------------------------
1 | shlex.test
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) anmitsu <anmitsu.s@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # go-shlex
 2 | 
 3 | go-shlex is a library to make a lexical analyzer like Unix shell for
 4 | Go.
 5 | 
 6 | ## Install
 7 | 
 8 |     go get -u "github.com/anmitsu/go-shlex"
 9 | 
10 | ## Usage
11 | 
12 | ```go
13 | package main
14 | 
15 | import (
16 |     "fmt"
17 |     "log"
18 | 
19 |     "github.com/anmitsu/go-shlex"
20 | )
21 | 
22 | func main() {
23 |     cmd := `cp -Rdp "file name" 'file name2' dir\ name`
24 |     words, err := shlex.Split(cmd, true)
25 |     if err != nil {
26 |         log.Fatal(err)
27 |     }
28 | 
29 |     for _, w := range words {
30 |         fmt.Println(w)
31 |     }
32 | }
33 | ```
34 | output
35 | 
36 |     cp
37 |     -Rdp
38 |     file name
39 |     file name2
40 |     dir name
41 | 
42 | ## Documentation
43 | 
44 | http://godoc.org/github.com/anmitsu/go-shlex
45 | 
46 | 


--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
 1 | package shlex_test
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"log"
 6 | 
 7 | 	"github.com/anmitsu/go-shlex"
 8 | )
 9 | 
10 | func ExampleSplit() {
11 | 	cmd := `cp -Rdp "file name" 'file name2' dir\ name`
12 | 
13 | 	// Split of cmd with POSIX mode.
14 | 	words1, err := shlex.Split(cmd, true)
15 | 	if err != nil {
16 | 		log.Fatal(err)
17 | 	}
18 | 	// Split of cmd with Non-POSIX mode.
19 | 	words2, err := shlex.Split(cmd, false)
20 | 	if err != nil {
21 | 		log.Fatal(err)
22 | 	}
23 | 
24 | 	fmt.Println("Source command:")
25 | 	fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`)
26 | 	fmt.Println()
27 | 
28 | 	fmt.Println("POSIX mode:")
29 | 	for _, word := range words1 {
30 | 		fmt.Println(word)
31 | 	}
32 | 	fmt.Println()
33 | 	fmt.Println("Non-POSIX mode:")
34 | 	for _, word := range words2 {
35 | 		fmt.Println(word)
36 | 	}
37 | 
38 | 	// Output:
39 | 	// Source command:
40 | 	// cp -Rdp "file name" 'file name2' dir\ name
41 | 	//
42 | 	// POSIX mode:
43 | 	// cp
44 | 	// -Rdp
45 | 	// file name
46 | 	// file name2
47 | 	// dir name
48 | 	//
49 | 	// Non-POSIX mode:
50 | 	// cp
51 | 	// -Rdp
52 | 	// "file name"
53 | 	// 'file name2'
54 | 	// dir\
55 | 	// name
56 | }
57 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/anmitsu/go-shlex
2 | 
3 | go 1.13
4 | 


--------------------------------------------------------------------------------
/shlex.go:
--------------------------------------------------------------------------------
  1 | // Package shlex provides a simple lexical analysis like Unix shell.
  2 | package shlex
  3 | 
  4 | import (
  5 | 	"bufio"
  6 | 	"errors"
  7 | 	"io"
  8 | 	"strings"
  9 | 	"unicode"
 10 | )
 11 | 
 12 | var (
 13 | 	ErrNoClosing = errors.New("No closing quotation")
 14 | 	ErrNoEscaped = errors.New("No escaped character")
 15 | )
 16 | 
 17 | // Tokenizer is the interface that classifies a token according to
 18 | // words, whitespaces, quotations, escapes and escaped quotations.
 19 | type Tokenizer interface {
 20 | 	IsWord(rune) bool
 21 | 	IsWhitespace(rune) bool
 22 | 	IsQuote(rune) bool
 23 | 	IsEscape(rune) bool
 24 | 	IsEscapedQuote(rune) bool
 25 | }
 26 | 
 27 | // DefaultTokenizer implements a simple tokenizer like Unix shell.
 28 | type DefaultTokenizer struct{}
 29 | 
 30 | func (t *DefaultTokenizer) IsWord(r rune) bool {
 31 | 	return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
 32 | }
 33 | func (t *DefaultTokenizer) IsQuote(r rune) bool {
 34 | 	switch r {
 35 | 	case '\'', '"':
 36 | 		return true
 37 | 	default:
 38 | 		return false
 39 | 	}
 40 | }
 41 | func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
 42 | 	return unicode.IsSpace(r)
 43 | }
 44 | func (t *DefaultTokenizer) IsEscape(r rune) bool {
 45 | 	return r == '\\'
 46 | }
 47 | func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
 48 | 	return r == '"'
 49 | }
 50 | 
 51 | // Lexer represents a lexical analyzer.
 52 | type Lexer struct {
 53 | 	reader          *bufio.Reader
 54 | 	tokenizer       Tokenizer
 55 | 	posix           bool
 56 | 	whitespacesplit bool
 57 | }
 58 | 
 59 | // NewLexer creates a new Lexer reading from io.Reader.  This Lexer
 60 | // has a DefaultTokenizer according to posix and whitespacesplit
 61 | // rules.
 62 | func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
 63 | 	return &Lexer{
 64 | 		reader:          bufio.NewReader(r),
 65 | 		tokenizer:       &DefaultTokenizer{},
 66 | 		posix:           posix,
 67 | 		whitespacesplit: whitespacesplit,
 68 | 	}
 69 | }
 70 | 
 71 | // NewLexerString creates a new Lexer reading from a string.  This
 72 | // Lexer has a DefaultTokenizer according to posix and whitespacesplit
 73 | // rules.
 74 | func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
 75 | 	return NewLexer(strings.NewReader(s), posix, whitespacesplit)
 76 | }
 77 | 
 78 | // Split splits a string according to posix or non-posix rules.
 79 | func Split(s string, posix bool) ([]string, error) {
 80 | 	return NewLexerString(s, posix, true).Split()
 81 | }
 82 | 
 83 | // SetTokenizer sets a Tokenizer.
 84 | func (l *Lexer) SetTokenizer(t Tokenizer) {
 85 | 	l.tokenizer = t
 86 | }
 87 | 
 88 | func (l *Lexer) Split() ([]string, error) {
 89 | 	result := make([]string, 0)
 90 | 	for {
 91 | 		token, err := l.readToken()
 92 | 		if token != "" {
 93 | 			result = append(result, token)
 94 | 		}
 95 | 
 96 | 		if err == io.EOF {
 97 | 			break
 98 | 		} else if err != nil {
 99 | 			return result, err
100 | 		}
101 | 	}
102 | 	return result, nil
103 | }
104 | 
105 | func (l *Lexer) readToken() (string, error) {
106 | 	t := l.tokenizer
107 | 	token := ""
108 | 	quoted := false
109 | 	state := ' '
110 | 	escapedstate := ' '
111 | scanning:
112 | 	for {
113 | 		next, _, err := l.reader.ReadRune()
114 | 		if err != nil {
115 | 			if t.IsQuote(state) {
116 | 				return token, ErrNoClosing
117 | 			} else if t.IsEscape(state) {
118 | 				return token, ErrNoEscaped
119 | 			}
120 | 			return token, err
121 | 		}
122 | 
123 | 		switch {
124 | 		case t.IsWhitespace(state):
125 | 			switch {
126 | 			case t.IsWhitespace(next):
127 | 				break scanning
128 | 			case l.posix && t.IsEscape(next):
129 | 				escapedstate = 'a'
130 | 				state = next
131 | 			case t.IsWord(next):
132 | 				token += string(next)
133 | 				state = 'a'
134 | 			case t.IsQuote(next):
135 | 				if !l.posix {
136 | 					token += string(next)
137 | 				}
138 | 				state = next
139 | 			default:
140 | 				token = string(next)
141 | 				if l.whitespacesplit {
142 | 					state = 'a'
143 | 				} else if token != "" || (l.posix && quoted) {
144 | 					break scanning
145 | 				}
146 | 			}
147 | 		case t.IsQuote(state):
148 | 			quoted = true
149 | 			switch {
150 | 			case next == state:
151 | 				if !l.posix {
152 | 					token += string(next)
153 | 					break scanning
154 | 				} else {
155 | 					state = 'a'
156 | 				}
157 | 			case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
158 | 				escapedstate = state
159 | 				state = next
160 | 			default:
161 | 				token += string(next)
162 | 			}
163 | 		case t.IsEscape(state):
164 | 			if t.IsQuote(escapedstate) && next != state && next != escapedstate {
165 | 				token += string(state)
166 | 			}
167 | 			token += string(next)
168 | 			state = escapedstate
169 | 		case t.IsWord(state):
170 | 			switch {
171 | 			case t.IsWhitespace(next):
172 | 				if token != "" || (l.posix && quoted) {
173 | 					break scanning
174 | 				}
175 | 			case l.posix && t.IsQuote(next):
176 | 				state = next
177 | 			case l.posix && t.IsEscape(next):
178 | 				escapedstate = 'a'
179 | 				state = next
180 | 			case t.IsWord(next) || t.IsQuote(next):
181 | 				token += string(next)
182 | 			default:
183 | 				if l.whitespacesplit {
184 | 					token += string(next)
185 | 				} else if token != "" {
186 | 					l.reader.UnreadRune()
187 | 					break scanning
188 | 				}
189 | 			}
190 | 		}
191 | 	}
192 | 	return token, nil
193 | }
194 | 


--------------------------------------------------------------------------------
/shlex_test.go:
--------------------------------------------------------------------------------
  1 | package shlex
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | )
  7 | 
  8 | var datanonposix = []struct {
  9 | 	in  string
 10 | 	out []string
 11 | 	err error
 12 | }{
 13 | 	{`This string has an embedded apostrophe, doesn't it?`,
 14 | 		[]string{
 15 | 			"This",
 16 | 			"string",
 17 | 			"has",
 18 | 			"an",
 19 | 			"embedded",
 20 | 			"apostrophe",
 21 | 			",",
 22 | 			"doesn't",
 23 | 			"it",
 24 | 			"?",
 25 | 		},
 26 | 		nil,
 27 | 	},
 28 | 	{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
 29 | 		[]string{
 30 | 			"This",
 31 | 			"string",
 32 | 			"has",
 33 | 			"embedded",
 34 | 			`"double quotes"`,
 35 | 			"and",
 36 | 			`'single quotes'`,
 37 | 			"in",
 38 | 			"it",
 39 | 			",",
 40 | 			"and",
 41 | 			"even",
 42 | 			`"a 'nested example'"`,
 43 | 			".",
 44 | 		},
 45 | 		nil,
 46 | 	},
 47 | 	{`Hello world!, こんにちは　世界！`,
 48 | 		[]string{
 49 | 			"Hello",
 50 | 			"world",
 51 | 			"!",
 52 | 			",",
 53 | 			"こんにちは",
 54 | 			"世界",
 55 | 			"！",
 56 | 		},
 57 | 		nil,
 58 | 	},
 59 | 	{`Do"Not"Separate`,
 60 | 		[]string{`Do"Not"Separate`},
 61 | 		nil,
 62 | 	},
 63 | 	{`"Do"Separate`,
 64 | 		[]string{`"Do"`, "Separate"},
 65 | 		nil,
 66 | 	},
 67 | 	{`Escaped \e Character not in quotes`,
 68 | 		[]string{
 69 | 			"Escaped",
 70 | 			`\`,
 71 | 			"e",
 72 | 			"Character",
 73 | 			"not",
 74 | 			"in",
 75 | 			"quotes",
 76 | 		},
 77 | 		nil,
 78 | 	},
 79 | 	{`Escaped "\e" Character in double quotes`,
 80 | 		[]string{
 81 | 			"Escaped",
 82 | 			`"\e"`,
 83 | 			"Character",
 84 | 			"in",
 85 | 			"double",
 86 | 			"quotes",
 87 | 		},
 88 | 		nil,
 89 | 	},
 90 | 	{`Escaped '\e' Character in single quotes`,
 91 | 		[]string{
 92 | 			"Escaped",
 93 | 			`'\e'`,
 94 | 			"Character",
 95 | 			"in",
 96 | 			"single",
 97 | 			"quotes",
 98 | 		},
 99 | 		nil,
100 | 	},
101 | 	{`Escaped '\'' \"\'\" single quote`,
102 | 		[]string{
103 | 			"Escaped",
104 | 			`'\'`,
105 | 			`' \"\'`,
106 | 			`\`,
107 | 			`" single quote`,
108 | 		},
109 | 		ErrNoClosing,
110 | 	},
111 | 	{`Escaped "\"" \'\"\' double quote`,
112 | 		[]string{
113 | 			"Escaped",
114 | 			`"\"`,
115 | 			`" \'\"`,
116 | 			`\`,
117 | 			`' double quote`,
118 | 		},
119 | 		ErrNoClosing,
120 | 	},
121 | 	{`"'Strip extra layer of quotes'"`,
122 | 		[]string{`"'Strip extra layer of quotes'"`},
123 | 		nil,
124 | 	},
125 | }
126 | 
127 | var dataposix = []struct {
128 | 	in  string
129 | 	out []string
130 | 	err error
131 | }{
132 | 	{`This string has an embedded apostrophe, doesn't it?`,
133 | 		[]string{
134 | 			"This",
135 | 			"string",
136 | 			"has",
137 | 			"an",
138 | 			"embedded",
139 | 			"apostrophe",
140 | 			",",
141 | 			"doesnt it?",
142 | 		},
143 | 		ErrNoClosing,
144 | 	},
145 | 	{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
146 | 		[]string{
147 | 			"This",
148 | 			"string",
149 | 			"has",
150 | 			"embedded",
151 | 			`double quotes`,
152 | 			"and",
153 | 			`single quotes`,
154 | 			"in",
155 | 			"it",
156 | 			",",
157 | 			"and",
158 | 			"even",
159 | 			`a 'nested example'`,
160 | 			".",
161 | 		},
162 | 		nil,
163 | 	},
164 | 	{`Hello world!, こんにちは　世界！`,
165 | 		[]string{
166 | 			"Hello",
167 | 			"world",
168 | 			"!",
169 | 			",",
170 | 			"こんにちは",
171 | 			"世界",
172 | 			"！",
173 | 		},
174 | 		nil,
175 | 	},
176 | 	{`Do"Not"Separate`,
177 | 		[]string{`DoNotSeparate`},
178 | 		nil,
179 | 	},
180 | 	{`"Do"Separate`,
181 | 		[]string{"DoSeparate"},
182 | 		nil,
183 | 	},
184 | 	{`Escaped \e Character not in quotes`,
185 | 		[]string{
186 | 			"Escaped",
187 | 			"e",
188 | 			"Character",
189 | 			"not",
190 | 			"in",
191 | 			"quotes",
192 | 		},
193 | 		nil,
194 | 	},
195 | 	{`Escaped "\e" Character in double quotes`,
196 | 		[]string{
197 | 			"Escaped",
198 | 			`\e`,
199 | 			"Character",
200 | 			"in",
201 | 			"double",
202 | 			"quotes",
203 | 		},
204 | 		nil,
205 | 	},
206 | 	{`Escaped '\e' Character in single quotes`,
207 | 		[]string{
208 | 			"Escaped",
209 | 			`\e`,
210 | 			"Character",
211 | 			"in",
212 | 			"single",
213 | 			"quotes",
214 | 		},
215 | 		nil,
216 | 	},
217 | 	{`Escaped '\'' \"\'\" single quote`,
218 | 		[]string{
219 | 			"Escaped",
220 | 			`\ \"\"`,
221 | 			"single",
222 | 			"quote",
223 | 		},
224 | 		nil,
225 | 	},
226 | 	{`Escaped "\"" \'\"\' double quote`,
227 | 		[]string{
228 | 			"Escaped",
229 | 			`"`,
230 | 			`'"'`,
231 | 			"double",
232 | 			"quote",
233 | 		},
234 | 		nil,
235 | 	},
236 | 	{`"'Strip extra layer of quotes'"`,
237 | 		[]string{`'Strip extra layer of quotes'`},
238 | 		nil,
239 | 	},
240 | }
241 | 
242 | func TestSplitNonPOSIX(t *testing.T) {
243 | 	testSplit(t, false)
244 | }
245 | 
246 | func TestSplitPOSIX(t *testing.T) {
247 | 	testSplit(t, true)
248 | }
249 | 
250 | func testSplit(t *testing.T, posix bool) {
251 | 	var data []struct {
252 | 		in  string
253 | 		out []string
254 | 		err error
255 | 	}
256 | 	if posix {
257 | 		data = dataposix
258 | 	} else {
259 | 		data = datanonposix
260 | 	}
261 | 
262 | 	for _, d := range data {
263 | 		t.Logf("Spliting: `%s'", d.in)
264 | 
265 | 		result, err := NewLexerString(d.in, posix, false).Split()
266 | 
267 | 		// check closing and escaped error
268 | 		if err != d.err {
269 | 			printToken(result)
270 | 			t.Fatalf("Error expected: `%v', but result catched: `%v'",
271 | 				d.err, err)
272 | 		}
273 | 
274 | 		// check splited number
275 | 		if len(result) != len(d.out) {
276 | 			printToken(result)
277 | 			t.Fatalf("Split expeced: `%d', but result founds: `%d'",
278 | 				len(d.out), len(result))
279 | 		}
280 | 
281 | 		// check words
282 | 		for j, out := range d.out {
283 | 			if result[j] != out {
284 | 				printToken(result)
285 | 				t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d",
286 | 					out, result[j], j)
287 | 			}
288 | 		}
289 | 		t.Log("ok")
290 | 	}
291 | }
292 | 
293 | func printToken(s []string) {
294 | 	for _, token := range s {
295 | 		fmt.Println(token)
296 | 	}
297 | }
298 | 


--------------------------------------------------------------------------------