├── .travis.yml
├── LICENSE
├── README.md
├── buffruneio.go
└── buffruneio_test.go


/.travis.yml:
--------------------------------------------------------------------------------
 1 | arch:
 2 |     - amd64
 3 |     - ppc64le
 4 | language: go
 5 | sudo: false
 6 | go:
 7 |     - 1.6.4
 8 |     - 1.7.6
 9 |     - 1.8.5
10 |     - 1.9.2
11 |     - tip
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Thomas Pelletier
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # buffruneio
 2 | 
 3 | [![Tests Status](https://travis-ci.org/pelletier/go-buffruneio.svg?branch=master)](https://travis-ci.org/pelletier/go-buffruneio)
 4 | [![GoDoc](https://godoc.org/github.com/pelletier/go-buffruneio?status.svg)](https://godoc.org/github.com/pelletier/go-buffruneio)
 5 | 
 6 | Buffruneio provides rune-based buffered input.
 7 | 
 8 | ```go
 9 | import "github.com/pelletier/go-buffruneio"
10 | ```
11 | 
12 | ## Examples
13 | 
14 | ```go
15 | import (
16 |     "fmt"
17 |     "github.com/pelletier/go-buffruneio"
18 |     "strings"
19 | )
20 | 
21 | reader := buffruneio.NewReader(strings.NewReader("abcd"))
22 | fmt.Println(reader.ReadRune()) // 'a'
23 | fmt.Println(reader.ReadRune()) // 'b'
24 | fmt.Println(reader.ReadRune()) // 'c'
25 | reader.UnreadRune()
26 | reader.UnreadRune()
27 | fmt.Println(reader.ReadRune()) // 'b'
28 | fmt.Println(reader.ReadRune()) // 'c'
29 | ```
30 | 
31 | ## Documentation
32 | 
33 | The documentation and additional examples are available at
34 | [godoc.org](http://godoc.org/github.com/pelletier/go-buffruneio).
35 | 
36 | ## Contribute
37 | 
38 | Feel free to report bugs and patches using GitHub's pull requests system on
39 | [pelletier/go-buffruneio](https://github.com/pelletier/go-buffruneio). Any feedback is
40 | much appreciated!
41 | 
42 | ## LICENSE
43 | 
44 | Copyright (c) 2016 - 2018 Thomas Pelletier
45 | 
46 | Permission is hereby granted, free of charge, to any person obtaining a copy of
47 | this software and associated documentation files (the "Software"), to deal in
48 | the Software without restriction, including without limitation the rights to
49 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
50 | the Software, and to permit persons to whom the Software is furnished to do so,
51 | subject to the following conditions:
52 | 
53 | The above copyright notice and this permission notice shall be included in all
54 | copies or substantial portions of the Software.
55 | 
56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
57 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
58 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
59 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
60 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
61 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
62 | 


--------------------------------------------------------------------------------
/buffruneio.go:
--------------------------------------------------------------------------------
  1 | // Package buffruneio provides rune-based buffered input.
  2 | package buffruneio
  3 | 
  4 | import (
  5 | 	"bufio"
  6 | 	"errors"
  7 | 	"io"
  8 | 	"unicode/utf8"
  9 | )
 10 | 
 11 | // EOF is a rune value indicating end-of-file.
 12 | const EOF = -1
 13 | 
 14 | // ErrNoRuneToUnread is the error returned when UnreadRune is called with nothing to unread.
 15 | var ErrNoRuneToUnread = errors.New("no rune to unwind")
 16 | 
 17 | // A Reader implements rune-based input for an underlying byte stream.
 18 | type Reader struct {
 19 | 	buffer  []rune
 20 | 	current int
 21 | 	input   *bufio.Reader
 22 | }
 23 | 
 24 | // NewReader returns a new Reader reading the given input.
 25 | func NewReader(input io.Reader) *Reader {
 26 | 	return &Reader{
 27 | 		input: bufio.NewReader(input),
 28 | 	}
 29 | }
 30 | 
 31 | // The rune buffer stores -2 to represent RuneError of length 1 (UTF-8 decoding errors).
 32 | const badRune = -2
 33 | 
 34 | // feedBuffer adds a rune to the buffer.
 35 | // If EOF is reached, it adds EOF to the buffer and returns nil.
 36 | // If a different error is encountered, it returns the error without
 37 | // adding to the buffer.
 38 | func (rd *Reader) feedBuffer() error {
 39 | 	if rd.buffer == nil {
 40 | 		rd.buffer = make([]rune, 0, 256)
 41 | 	}
 42 | 	r, size, err := rd.input.ReadRune()
 43 | 	if err != nil {
 44 | 		if err != io.EOF {
 45 | 			return err
 46 | 		}
 47 | 		r = EOF
 48 | 	}
 49 | 	if r == utf8.RuneError && size == 1 {
 50 | 		r = badRune
 51 | 	}
 52 | 	rd.buffer = append(rd.buffer, r)
 53 | 	return nil
 54 | }
 55 | 
 56 | // ReadRune reads and returns the next rune from the input.
 57 | // The rune is also saved in an internal buffer, in case UnreadRune is called.
 58 | // To avoid unbounded buffer growth, the caller must call Forget at appropriate intervals.
 59 | //
 60 | // At end of file, ReadRune returns EOF, 0, nil.
 61 | // On read errors other than io.EOF, ReadRune returns EOF, 0, err.
 62 | func (rd *Reader) ReadRune() (rune, int, error) {
 63 | 	if rd.current >= len(rd.buffer) {
 64 | 		if err := rd.feedBuffer(); err != nil {
 65 | 			return EOF, 0, err
 66 | 		}
 67 | 	}
 68 | 	r := rd.buffer[rd.current]
 69 | 	rd.current++
 70 | 	if r == badRune {
 71 | 		return utf8.RuneError, 1, nil
 72 | 	}
 73 | 	if r == EOF {
 74 | 		return EOF, 0, nil
 75 | 	}
 76 | 	return r, utf8.RuneLen(r), nil
 77 | }
 78 | 
 79 | // UnreadRune rewinds the input by one rune, undoing the effect of a single ReadRune call.
 80 | // UnreadRune may be called multiple times to rewind a sequence of ReadRune calls,
 81 | // up to the last time Forget was called or the beginning of the input.
 82 | //
 83 | // If there are no ReadRune calls left to undo, UnreadRune returns ErrNoRuneToUnread.
 84 | func (rd *Reader) UnreadRune() error {
 85 | 	if rd.current == 0 {
 86 | 		return ErrNoRuneToUnread
 87 | 	}
 88 | 	rd.current--
 89 | 	return nil
 90 | }
 91 | 
 92 | // Forget discards buffered runes before the current input position.
 93 | // Calling Forget makes it impossible to UnreadRune earlier than the current input position
 94 | // but is necessary to avoid unbounded buffer growth.
 95 | func (rd *Reader) Forget() {
 96 | 	n := copy(rd.buffer, rd.buffer[rd.current:])
 97 | 	rd.current = 0
 98 | 	rd.buffer = rd.buffer[:n]
 99 | }
100 | 
101 | // PeekRunes returns the next n runes in the input,
102 | // without advancing the current input position.
103 | //
104 | // If the input has fewer than n runes and then returns
105 | // an io.EOF error, PeekRune returns a slice containing
106 | // the available runes followed by EOF.
107 | // On other hand, if the input ends early with a non-io.EOF error,
108 | // PeekRune returns a slice containing only the available runes,
109 | // with no terminating EOF.
110 | func (rd *Reader) PeekRunes(n int) []rune {
111 | 	for len(rd.buffer)-rd.current < n && !rd.haveEOF() {
112 | 		if err := rd.feedBuffer(); err != nil {
113 | 			break
114 | 		}
115 | 	}
116 | 
117 | 	res := make([]rune, 0, n)
118 | 	for i := 0; i < n; i++ {
119 | 		if rd.current + i >= len(rd.buffer) {
120 | 			// reached end of buffer before reading as much as we wanted
121 | 			break
122 | 		}
123 | 		r := rd.buffer[rd.current+i]
124 | 		if r == badRune {
125 | 			r = utf8.RuneError
126 | 		}
127 | 		res = append(res, r)
128 | 		if r == EOF {
129 | 			break
130 | 		}
131 | 	}
132 | 	return res
133 | }
134 | 
135 | func (rd *Reader) haveEOF() bool {
136 | 	return rd.current < len(rd.buffer) && rd.buffer[len(rd.buffer)-1] == EOF
137 | }
138 | 


--------------------------------------------------------------------------------
/buffruneio_test.go:
--------------------------------------------------------------------------------
  1 | package buffruneio
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"runtime/debug"
  6 | 	"strings"
  7 | 	"testing"
  8 | 	"unicode/utf8"
  9 | 	"io"
 10 | 	"fmt"
 11 | )
 12 | 
 13 | func assertNoError(t *testing.T, err error) {
 14 | 	if err != nil {
 15 | 		t.Log("unexpected error", err)
 16 | 		debug.PrintStack()
 17 | 		t.FailNow()
 18 | 	}
 19 | }
 20 | 
 21 | func assumeRunesArray(t *testing.T, expected []rune, got []rune) {
 22 | 	if len(expected) != len(got) {
 23 | 		t.Fatal("expected", len(expected), "runes, but got", len(got))
 24 | 	}
 25 | 	for i := 0; i < len(got); i++ {
 26 | 		if expected[i] != got[i] {
 27 | 			t.Fatal("expected rune", expected[i], "at index", i, "but got", got[i])
 28 | 		}
 29 | 	}
 30 | }
 31 | 
 32 | func assumeRune(t *testing.T, rd *Reader, r rune) {
 33 | 	gotRune, size, err := rd.ReadRune()
 34 | 	wantSize := utf8.RuneLen(r)
 35 | 	if wantSize < 0 {
 36 | 		wantSize = 0
 37 | 	}
 38 | 	if gotRune != r || size != wantSize || err != nil {
 39 | 		t.Fatalf("ReadRune() = %q, %d, %v, wanted %q, %d, nil", gotRune, size, err, r, wantSize)
 40 | 	}
 41 | }
 42 | 
 43 | func assumeBadRune(t *testing.T, rd *Reader) {
 44 | 	gotRune, size, err := rd.ReadRune()
 45 | 	if gotRune != utf8.RuneError || size != 1 || err != nil {
 46 | 		t.Fatalf("ReadRune() = %q, %d, %v, wanted %q, 1, nil", gotRune, size, err, utf8.RuneError)
 47 | 	}
 48 | }
 49 | 
 50 | func TestReadString(t *testing.T) {
 51 | 	s := "hello"
 52 | 	rd := NewReader(strings.NewReader(s))
 53 | 
 54 | 	assumeRune(t, rd, 'h')
 55 | 	assumeRune(t, rd, 'e')
 56 | 	assumeRune(t, rd, 'l')
 57 | 	assumeRune(t, rd, 'l')
 58 | 	assumeRune(t, rd, 'o')
 59 | 	assumeRune(t, rd, EOF)
 60 | }
 61 | 
 62 | func TestMultipleEOF(t *testing.T) {
 63 | 	s := ""
 64 | 	rd := NewReader(strings.NewReader(s))
 65 | 
 66 | 	assumeRune(t, rd, EOF)
 67 | 	assumeRune(t, rd, EOF)
 68 | }
 69 | 
 70 | func TestBadRunes(t *testing.T) {
 71 | 	s := "ab\xff\ufffd\xffcd"
 72 | 	rd := NewReader(strings.NewReader(s))
 73 | 
 74 | 	assumeRune(t, rd, 'a')
 75 | 	assumeRune(t, rd, 'b')
 76 | 	assumeBadRune(t, rd)
 77 | 	assumeRune(t, rd, utf8.RuneError)
 78 | 	assumeBadRune(t, rd)
 79 | 	assumeRune(t, rd, 'c')
 80 | 	assumeRune(t, rd, 'd')
 81 | 
 82 | 	for i := 0; i < 6; i++ {
 83 | 		assertNoError(t, rd.UnreadRune())
 84 | 	}
 85 | 	assumeRune(t, rd, 'b')
 86 | 	assumeBadRune(t, rd)
 87 | 	assumeRune(t, rd, utf8.RuneError)
 88 | 	assumeBadRune(t, rd)
 89 | 	assumeRune(t, rd, 'c')
 90 | 	assumeRune(t, rd, 'd')
 91 | }
 92 | 
 93 | func TestUnread(t *testing.T) {
 94 | 	s := "ab"
 95 | 	rd := NewReader(strings.NewReader(s))
 96 | 
 97 | 	assumeRune(t, rd, 'a')
 98 | 	assumeRune(t, rd, 'b')
 99 | 	assertNoError(t, rd.UnreadRune())
100 | 	assumeRune(t, rd, 'b')
101 | 	assumeRune(t, rd, EOF)
102 | }
103 | 
104 | func TestUnreadEOF(t *testing.T) {
105 | 	s := "x"
106 | 	rd := NewReader(strings.NewReader(s))
107 | 
108 | 	_ = rd.UnreadRune()
109 | 	assumeRune(t, rd, 'x')
110 | 	assumeRune(t, rd, EOF)
111 | 	assumeRune(t, rd, EOF)
112 | 	assertNoError(t, rd.UnreadRune())
113 | 	assumeRune(t, rd, EOF)
114 | 	assertNoError(t, rd.UnreadRune())
115 | 	assertNoError(t, rd.UnreadRune())
116 | 	assumeRune(t, rd, EOF)
117 | 	assumeRune(t, rd, EOF)
118 | 	assertNoError(t, rd.UnreadRune())
119 | 	assertNoError(t, rd.UnreadRune())
120 | 	assertNoError(t, rd.UnreadRune())
121 | 	assumeRune(t, rd, 'x')
122 | 	assumeRune(t, rd, EOF)
123 | 	assumeRune(t, rd, EOF)
124 | }
125 | 
126 | func TestForget(t *testing.T) {
127 | 	s := "helio"
128 | 	rd := NewReader(strings.NewReader(s))
129 | 
130 | 	assumeRune(t, rd, 'h')
131 | 	assumeRune(t, rd, 'e')
132 | 	assumeRune(t, rd, 'l')
133 | 	assumeRune(t, rd, 'i')
134 | 	rd.Forget()
135 | 	if rd.UnreadRune() != ErrNoRuneToUnread {
136 | 		t.Fatal("no rune should be available")
137 | 	}
138 | 	assumeRune(t, rd, 'o')
139 | }
140 | 
141 | func TestForgetAfterUnread(t *testing.T) {
142 | 	s := "helio"
143 | 	rd := NewReader(strings.NewReader(s))
144 | 
145 | 	assumeRune(t, rd, 'h')
146 | 	assumeRune(t, rd, 'e')
147 | 	assumeRune(t, rd, 'l')
148 | 	assumeRune(t, rd, 'i')
149 | 	assertNoError(t, rd.UnreadRune())
150 | 	rd.Forget()
151 | 	if rd.UnreadRune() != ErrNoRuneToUnread {
152 | 		t.Fatal("no rune should be available")
153 | 	}
154 | 	assumeRune(t, rd, 'i')
155 | 	assumeRune(t, rd, 'o')
156 | }
157 | 
158 | func TestForgetEmpty(t *testing.T) {
159 | 	s := ""
160 | 	rd := NewReader(strings.NewReader(s))
161 | 
162 | 	rd.Forget()
163 | 	assumeRune(t, rd, EOF)
164 | 	rd.Forget()
165 | }
166 | 
167 | func TestPeekEmpty(t *testing.T) {
168 | 	s := ""
169 | 	rd := NewReader(strings.NewReader(s))
170 | 
171 | 	runes := rd.PeekRunes(1)
172 | 	if len(runes) != 1 {
173 | 		t.Fatal("incorrect number of runes", len(runes))
174 | 	}
175 | 	if runes[0] != EOF {
176 | 		t.Fatal("incorrect rune", runes[0])
177 | 	}
178 | }
179 | 
180 | func TestPeek(t *testing.T) {
181 | 	s := "a"
182 | 	rd := NewReader(strings.NewReader(s))
183 | 
184 | 	runes := rd.PeekRunes(1)
185 | 	assumeRunesArray(t, []rune{'a'}, runes)
186 | 
187 | 	runes = rd.PeekRunes(1)
188 | 	assumeRunesArray(t, []rune{'a'}, runes)
189 | 
190 | 	assumeRune(t, rd, 'a')
191 | 	runes = rd.PeekRunes(1)
192 | 	assumeRunesArray(t, []rune{EOF}, runes)
193 | 
194 | 	assumeRune(t, rd, EOF)
195 | }
196 | 
197 | func TestPeekLarge(t *testing.T) {
198 | 	s := "abcdefg☺\xff☹"
199 | 	rd := NewReader(strings.NewReader(s))
200 | 
201 | 	runes := rd.PeekRunes(100)
202 | 	want := []rune{'a', 'b', 'c', 'd', 'e', 'f', 'g', '☺', utf8.RuneError, '☹', EOF}
203 | 	if !reflect.DeepEqual(runes, want) {
204 | 		t.Fatalf("PeekRunes(100) = %q, want %q", runes, want)
205 | 	}
206 | }
207 | 
208 | var bigString = strings.Repeat("abcdefghi☺\xff☹", 1024) // 16 kB
209 | 
210 | const bigStringRunes = 12 * 1024 // 12k runes
211 | 
212 | func BenchmarkRead16K(b *testing.B) {
213 | 	// Read 16K with no unread, no forget.
214 | 	benchmarkRead(b, 1, false)
215 | }
216 | 
217 | func BenchmarkReadForget16K(b *testing.B) {
218 | 	// Read 16K, forgetting every 128 runes.
219 | 	benchmarkRead(b, 1, true)
220 | }
221 | 
222 | func BenchmarkReadRewind16K(b *testing.B) {
223 | 	// Read 16K, unread all, read that 16K again.
224 | 	benchmarkRead(b, 2, false)
225 | }
226 | 
227 | func benchmarkRead(b *testing.B, count int, forget bool) {
228 | 	if len(bigString) != 16*1024 {
229 | 		b.Fatal("wrong length for bigString")
230 | 	}
231 | 	sr0 := strings.NewReader(bigString)
232 | 	sr := new(strings.Reader)
233 | 	b.SetBytes(int64(len(bigString)))
234 | 	b.ReportAllocs()
235 | 	for i := 0; i < b.N; i++ {
236 | 		*sr = *sr0
237 | 		rd := NewReader(sr)
238 | 		for repeat := 0; repeat < count; repeat++ {
239 | 			for j := 0; j < bigStringRunes; j++ {
240 | 				r, _, err := rd.ReadRune()
241 | 				if err != nil {
242 | 					b.Fatal(err)
243 | 				}
244 | 				if r == EOF {
245 | 					b.Fatal("unexpected EOF")
246 | 				}
247 | 				if forget && j%128 == 127 {
248 | 					rd.Forget()
249 | 				}
250 | 			}
251 | 			r, _, err := rd.ReadRune()
252 | 			if err != nil {
253 | 				b.Fatal(err)
254 | 			}
255 | 			if r != EOF {
256 | 				b.Fatalf("missing EOF - %q", r)
257 | 			}
258 | 			if repeat == count-1 {
259 | 				break
260 | 			}
261 | 			for rd.UnreadRune() == nil {
262 | 				// keep unreading
263 | 			}
264 | 		}
265 | 	}
266 | }
267 | 
268 | // test reader that will fail reading after a given number of reads
269 | type failingReader struct {
270 | 	r io.Reader // underlying reader
271 | 	failAfter int // start failing after that number of reads
272 | 	readCount int // number of reads already done
273 | }
274 | 
275 | func newFailingReaderFromString(s string, failAfter int) *failingReader {
276 | 	return &failingReader{
277 | 		r: strings.NewReader(s),
278 | 		failAfter: failAfter,
279 | 		readCount: 0,
280 | 	}
281 | }
282 | 
283 | func (r *failingReader) Read(b []byte) (n int, err error) {
284 | 	if r.readCount < r.failAfter {
285 | 		n, err = r.r.Read(b)
286 | 		r.readCount++
287 | 		return
288 | 	}
289 | 	return 0, fmt.Errorf("expected read failure")
290 | }
291 | 
292 | func TestReadFails(t *testing.T) {
293 | 	size := 4097 // needs to be more than bufio.defaultBufSize, which is 4096
294 | 	s := make([]byte, size)
295 | 	for i := 0; i < size; i++ {
296 | 		s[i] = 'a'
297 | 	}
298 | 
299 | 	rd := NewReader(newFailingReaderFromString(string(s), 1))
300 | 
301 | 	runes := rd.PeekRunes(256) // first read, ok
302 | 
303 | 	runes = rd.PeekRunes(1) // rune already loaded, ok
304 | 
305 | 	runes = rd.PeekRunes(4097) // forces a new read, fails
306 | 	if len(runes) != 4096 {
307 | 		t.Fatalf("expected %d runes. got %d", 4096, len(runes))
308 | 	}
309 | 	if runes[4095] != 'a' {
310 | 		t.Fatalf("expected last rune to be 'a'. got '%c'", runes[4095])
311 | 	}
312 | 
313 | 
314 | 	rd = NewReader(newFailingReaderFromString(string(s), 1))
315 | 	for i := 0; i < size - 1; i++ {
316 | 		r, size, err := rd.ReadRune() // read all the runes but last
317 | 		if err != nil {
318 | 			t.Fatalf("no error expeceted at that point, got %s", err)
319 | 		}
320 | 		if size != 1 {
321 | 			t.Fatalf("reading runes that should have size 1, got size %d", size)
322 | 		}
323 | 		if r != 'a' {
324 | 			t.Fatalf("reading a string of 'a', got %c", r)
325 | 		}
326 | 	}
327 | 	//  EOF, 0, err
328 | 	r, n, err := rd.ReadRune() // should error
329 | 	if r != EOF {
330 | 		t.Fatalf("expected EOF, got %c", r)
331 | 	}
332 | 	if n != 0 {
333 | 		t.Fatalf("expected size 0, got %d", n)
334 | 	}
335 | 	if err.Error() != "expected read failure" {
336 | 		t.Fatalf("incorrect error: %s", err.Error())
337 | 	}
338 | }
339 | 


--------------------------------------------------------------------------------