├── .travis.yml ├── LICENSE ├── README.md ├── buffruneio.go └── buffruneio_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | arch: 2 | - amd64 3 | - ppc64le 4 | language: go 5 | sudo: false 6 | go: 7 | - 1.6.4 8 | - 1.7.6 9 | - 1.8.5 10 | - 1.9.2 11 | - tip 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Thomas Pelletier 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # buffruneio 2 | 3 | [![Tests Status](https://travis-ci.org/pelletier/go-buffruneio.svg?branch=master)](https://travis-ci.org/pelletier/go-buffruneio) 4 | [![GoDoc](https://godoc.org/github.com/pelletier/go-buffruneio?status.svg)](https://godoc.org/github.com/pelletier/go-buffruneio) 5 | 6 | Buffruneio provides rune-based buffered input. 7 | 8 | ```go 9 | import "github.com/pelletier/go-buffruneio" 10 | ``` 11 | 12 | ## Examples 13 | 14 | ```go 15 | import ( 16 | "fmt" 17 | "github.com/pelletier/go-buffruneio" 18 | "strings" 19 | ) 20 | 21 | reader := buffruneio.NewReader(strings.NewReader("abcd")) 22 | fmt.Println(reader.ReadRune()) // 'a' 23 | fmt.Println(reader.ReadRune()) // 'b' 24 | fmt.Println(reader.ReadRune()) // 'c' 25 | reader.UnreadRune() 26 | reader.UnreadRune() 27 | fmt.Println(reader.ReadRune()) // 'b' 28 | fmt.Println(reader.ReadRune()) // 'c' 29 | ``` 30 | 31 | ## Documentation 32 | 33 | The documentation and additional examples are available at 34 | [godoc.org](http://godoc.org/github.com/pelletier/go-buffruneio). 35 | 36 | ## Contribute 37 | 38 | Feel free to report bugs and patches using GitHub's pull requests system on 39 | [pelletier/go-buffruneio](https://github.com/pelletier/go-buffruneio). Any feedback is 40 | much appreciated! 41 | 42 | ## LICENSE 43 | 44 | Copyright (c) 2016 - 2018 Thomas Pelletier 45 | 46 | Permission is hereby granted, free of charge, to any person obtaining a copy of 47 | this software and associated documentation files (the "Software"), to deal in 48 | the Software without restriction, including without limitation the rights to 49 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 50 | the Software, and to permit persons to whom the Software is furnished to do so, 51 | subject to the following conditions: 52 | 53 | The above copyright notice and this permission notice shall be included in all 54 | copies or substantial portions of the Software. 55 | 56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 57 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 58 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 59 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 60 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 61 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 62 | -------------------------------------------------------------------------------- /buffruneio.go: -------------------------------------------------------------------------------- 1 | // Package buffruneio provides rune-based buffered input. 2 | package buffruneio 3 | 4 | import ( 5 | "bufio" 6 | "errors" 7 | "io" 8 | "unicode/utf8" 9 | ) 10 | 11 | // EOF is a rune value indicating end-of-file. 12 | const EOF = -1 13 | 14 | // ErrNoRuneToUnread is the error returned when UnreadRune is called with nothing to unread. 15 | var ErrNoRuneToUnread = errors.New("no rune to unwind") 16 | 17 | // A Reader implements rune-based input for an underlying byte stream. 18 | type Reader struct { 19 | buffer []rune 20 | current int 21 | input *bufio.Reader 22 | } 23 | 24 | // NewReader returns a new Reader reading the given input. 25 | func NewReader(input io.Reader) *Reader { 26 | return &Reader{ 27 | input: bufio.NewReader(input), 28 | } 29 | } 30 | 31 | // The rune buffer stores -2 to represent RuneError of length 1 (UTF-8 decoding errors). 32 | const badRune = -2 33 | 34 | // feedBuffer adds a rune to the buffer. 35 | // If EOF is reached, it adds EOF to the buffer and returns nil. 36 | // If a different error is encountered, it returns the error without 37 | // adding to the buffer. 38 | func (rd *Reader) feedBuffer() error { 39 | if rd.buffer == nil { 40 | rd.buffer = make([]rune, 0, 256) 41 | } 42 | r, size, err := rd.input.ReadRune() 43 | if err != nil { 44 | if err != io.EOF { 45 | return err 46 | } 47 | r = EOF 48 | } 49 | if r == utf8.RuneError && size == 1 { 50 | r = badRune 51 | } 52 | rd.buffer = append(rd.buffer, r) 53 | return nil 54 | } 55 | 56 | // ReadRune reads and returns the next rune from the input. 57 | // The rune is also saved in an internal buffer, in case UnreadRune is called. 58 | // To avoid unbounded buffer growth, the caller must call Forget at appropriate intervals. 59 | // 60 | // At end of file, ReadRune returns EOF, 0, nil. 61 | // On read errors other than io.EOF, ReadRune returns EOF, 0, err. 62 | func (rd *Reader) ReadRune() (rune, int, error) { 63 | if rd.current >= len(rd.buffer) { 64 | if err := rd.feedBuffer(); err != nil { 65 | return EOF, 0, err 66 | } 67 | } 68 | r := rd.buffer[rd.current] 69 | rd.current++ 70 | if r == badRune { 71 | return utf8.RuneError, 1, nil 72 | } 73 | if r == EOF { 74 | return EOF, 0, nil 75 | } 76 | return r, utf8.RuneLen(r), nil 77 | } 78 | 79 | // UnreadRune rewinds the input by one rune, undoing the effect of a single ReadRune call. 80 | // UnreadRune may be called multiple times to rewind a sequence of ReadRune calls, 81 | // up to the last time Forget was called or the beginning of the input. 82 | // 83 | // If there are no ReadRune calls left to undo, UnreadRune returns ErrNoRuneToUnread. 84 | func (rd *Reader) UnreadRune() error { 85 | if rd.current == 0 { 86 | return ErrNoRuneToUnread 87 | } 88 | rd.current-- 89 | return nil 90 | } 91 | 92 | // Forget discards buffered runes before the current input position. 93 | // Calling Forget makes it impossible to UnreadRune earlier than the current input position 94 | // but is necessary to avoid unbounded buffer growth. 95 | func (rd *Reader) Forget() { 96 | n := copy(rd.buffer, rd.buffer[rd.current:]) 97 | rd.current = 0 98 | rd.buffer = rd.buffer[:n] 99 | } 100 | 101 | // PeekRunes returns the next n runes in the input, 102 | // without advancing the current input position. 103 | // 104 | // If the input has fewer than n runes and then returns 105 | // an io.EOF error, PeekRune returns a slice containing 106 | // the available runes followed by EOF. 107 | // On other hand, if the input ends early with a non-io.EOF error, 108 | // PeekRune returns a slice containing only the available runes, 109 | // with no terminating EOF. 110 | func (rd *Reader) PeekRunes(n int) []rune { 111 | for len(rd.buffer)-rd.current < n && !rd.haveEOF() { 112 | if err := rd.feedBuffer(); err != nil { 113 | break 114 | } 115 | } 116 | 117 | res := make([]rune, 0, n) 118 | for i := 0; i < n; i++ { 119 | if rd.current + i >= len(rd.buffer) { 120 | // reached end of buffer before reading as much as we wanted 121 | break 122 | } 123 | r := rd.buffer[rd.current+i] 124 | if r == badRune { 125 | r = utf8.RuneError 126 | } 127 | res = append(res, r) 128 | if r == EOF { 129 | break 130 | } 131 | } 132 | return res 133 | } 134 | 135 | func (rd *Reader) haveEOF() bool { 136 | return rd.current < len(rd.buffer) && rd.buffer[len(rd.buffer)-1] == EOF 137 | } 138 | -------------------------------------------------------------------------------- /buffruneio_test.go: -------------------------------------------------------------------------------- 1 | package buffruneio 2 | 3 | import ( 4 | "reflect" 5 | "runtime/debug" 6 | "strings" 7 | "testing" 8 | "unicode/utf8" 9 | "io" 10 | "fmt" 11 | ) 12 | 13 | func assertNoError(t *testing.T, err error) { 14 | if err != nil { 15 | t.Log("unexpected error", err) 16 | debug.PrintStack() 17 | t.FailNow() 18 | } 19 | } 20 | 21 | func assumeRunesArray(t *testing.T, expected []rune, got []rune) { 22 | if len(expected) != len(got) { 23 | t.Fatal("expected", len(expected), "runes, but got", len(got)) 24 | } 25 | for i := 0; i < len(got); i++ { 26 | if expected[i] != got[i] { 27 | t.Fatal("expected rune", expected[i], "at index", i, "but got", got[i]) 28 | } 29 | } 30 | } 31 | 32 | func assumeRune(t *testing.T, rd *Reader, r rune) { 33 | gotRune, size, err := rd.ReadRune() 34 | wantSize := utf8.RuneLen(r) 35 | if wantSize < 0 { 36 | wantSize = 0 37 | } 38 | if gotRune != r || size != wantSize || err != nil { 39 | t.Fatalf("ReadRune() = %q, %d, %v, wanted %q, %d, nil", gotRune, size, err, r, wantSize) 40 | } 41 | } 42 | 43 | func assumeBadRune(t *testing.T, rd *Reader) { 44 | gotRune, size, err := rd.ReadRune() 45 | if gotRune != utf8.RuneError || size != 1 || err != nil { 46 | t.Fatalf("ReadRune() = %q, %d, %v, wanted %q, 1, nil", gotRune, size, err, utf8.RuneError) 47 | } 48 | } 49 | 50 | func TestReadString(t *testing.T) { 51 | s := "hello" 52 | rd := NewReader(strings.NewReader(s)) 53 | 54 | assumeRune(t, rd, 'h') 55 | assumeRune(t, rd, 'e') 56 | assumeRune(t, rd, 'l') 57 | assumeRune(t, rd, 'l') 58 | assumeRune(t, rd, 'o') 59 | assumeRune(t, rd, EOF) 60 | } 61 | 62 | func TestMultipleEOF(t *testing.T) { 63 | s := "" 64 | rd := NewReader(strings.NewReader(s)) 65 | 66 | assumeRune(t, rd, EOF) 67 | assumeRune(t, rd, EOF) 68 | } 69 | 70 | func TestBadRunes(t *testing.T) { 71 | s := "ab\xff\ufffd\xffcd" 72 | rd := NewReader(strings.NewReader(s)) 73 | 74 | assumeRune(t, rd, 'a') 75 | assumeRune(t, rd, 'b') 76 | assumeBadRune(t, rd) 77 | assumeRune(t, rd, utf8.RuneError) 78 | assumeBadRune(t, rd) 79 | assumeRune(t, rd, 'c') 80 | assumeRune(t, rd, 'd') 81 | 82 | for i := 0; i < 6; i++ { 83 | assertNoError(t, rd.UnreadRune()) 84 | } 85 | assumeRune(t, rd, 'b') 86 | assumeBadRune(t, rd) 87 | assumeRune(t, rd, utf8.RuneError) 88 | assumeBadRune(t, rd) 89 | assumeRune(t, rd, 'c') 90 | assumeRune(t, rd, 'd') 91 | } 92 | 93 | func TestUnread(t *testing.T) { 94 | s := "ab" 95 | rd := NewReader(strings.NewReader(s)) 96 | 97 | assumeRune(t, rd, 'a') 98 | assumeRune(t, rd, 'b') 99 | assertNoError(t, rd.UnreadRune()) 100 | assumeRune(t, rd, 'b') 101 | assumeRune(t, rd, EOF) 102 | } 103 | 104 | func TestUnreadEOF(t *testing.T) { 105 | s := "x" 106 | rd := NewReader(strings.NewReader(s)) 107 | 108 | _ = rd.UnreadRune() 109 | assumeRune(t, rd, 'x') 110 | assumeRune(t, rd, EOF) 111 | assumeRune(t, rd, EOF) 112 | assertNoError(t, rd.UnreadRune()) 113 | assumeRune(t, rd, EOF) 114 | assertNoError(t, rd.UnreadRune()) 115 | assertNoError(t, rd.UnreadRune()) 116 | assumeRune(t, rd, EOF) 117 | assumeRune(t, rd, EOF) 118 | assertNoError(t, rd.UnreadRune()) 119 | assertNoError(t, rd.UnreadRune()) 120 | assertNoError(t, rd.UnreadRune()) 121 | assumeRune(t, rd, 'x') 122 | assumeRune(t, rd, EOF) 123 | assumeRune(t, rd, EOF) 124 | } 125 | 126 | func TestForget(t *testing.T) { 127 | s := "helio" 128 | rd := NewReader(strings.NewReader(s)) 129 | 130 | assumeRune(t, rd, 'h') 131 | assumeRune(t, rd, 'e') 132 | assumeRune(t, rd, 'l') 133 | assumeRune(t, rd, 'i') 134 | rd.Forget() 135 | if rd.UnreadRune() != ErrNoRuneToUnread { 136 | t.Fatal("no rune should be available") 137 | } 138 | assumeRune(t, rd, 'o') 139 | } 140 | 141 | func TestForgetAfterUnread(t *testing.T) { 142 | s := "helio" 143 | rd := NewReader(strings.NewReader(s)) 144 | 145 | assumeRune(t, rd, 'h') 146 | assumeRune(t, rd, 'e') 147 | assumeRune(t, rd, 'l') 148 | assumeRune(t, rd, 'i') 149 | assertNoError(t, rd.UnreadRune()) 150 | rd.Forget() 151 | if rd.UnreadRune() != ErrNoRuneToUnread { 152 | t.Fatal("no rune should be available") 153 | } 154 | assumeRune(t, rd, 'i') 155 | assumeRune(t, rd, 'o') 156 | } 157 | 158 | func TestForgetEmpty(t *testing.T) { 159 | s := "" 160 | rd := NewReader(strings.NewReader(s)) 161 | 162 | rd.Forget() 163 | assumeRune(t, rd, EOF) 164 | rd.Forget() 165 | } 166 | 167 | func TestPeekEmpty(t *testing.T) { 168 | s := "" 169 | rd := NewReader(strings.NewReader(s)) 170 | 171 | runes := rd.PeekRunes(1) 172 | if len(runes) != 1 { 173 | t.Fatal("incorrect number of runes", len(runes)) 174 | } 175 | if runes[0] != EOF { 176 | t.Fatal("incorrect rune", runes[0]) 177 | } 178 | } 179 | 180 | func TestPeek(t *testing.T) { 181 | s := "a" 182 | rd := NewReader(strings.NewReader(s)) 183 | 184 | runes := rd.PeekRunes(1) 185 | assumeRunesArray(t, []rune{'a'}, runes) 186 | 187 | runes = rd.PeekRunes(1) 188 | assumeRunesArray(t, []rune{'a'}, runes) 189 | 190 | assumeRune(t, rd, 'a') 191 | runes = rd.PeekRunes(1) 192 | assumeRunesArray(t, []rune{EOF}, runes) 193 | 194 | assumeRune(t, rd, EOF) 195 | } 196 | 197 | func TestPeekLarge(t *testing.T) { 198 | s := "abcdefg☺\xff☹" 199 | rd := NewReader(strings.NewReader(s)) 200 | 201 | runes := rd.PeekRunes(100) 202 | want := []rune{'a', 'b', 'c', 'd', 'e', 'f', 'g', '☺', utf8.RuneError, '☹', EOF} 203 | if !reflect.DeepEqual(runes, want) { 204 | t.Fatalf("PeekRunes(100) = %q, want %q", runes, want) 205 | } 206 | } 207 | 208 | var bigString = strings.Repeat("abcdefghi☺\xff☹", 1024) // 16 kB 209 | 210 | const bigStringRunes = 12 * 1024 // 12k runes 211 | 212 | func BenchmarkRead16K(b *testing.B) { 213 | // Read 16K with no unread, no forget. 214 | benchmarkRead(b, 1, false) 215 | } 216 | 217 | func BenchmarkReadForget16K(b *testing.B) { 218 | // Read 16K, forgetting every 128 runes. 219 | benchmarkRead(b, 1, true) 220 | } 221 | 222 | func BenchmarkReadRewind16K(b *testing.B) { 223 | // Read 16K, unread all, read that 16K again. 224 | benchmarkRead(b, 2, false) 225 | } 226 | 227 | func benchmarkRead(b *testing.B, count int, forget bool) { 228 | if len(bigString) != 16*1024 { 229 | b.Fatal("wrong length for bigString") 230 | } 231 | sr0 := strings.NewReader(bigString) 232 | sr := new(strings.Reader) 233 | b.SetBytes(int64(len(bigString))) 234 | b.ReportAllocs() 235 | for i := 0; i < b.N; i++ { 236 | *sr = *sr0 237 | rd := NewReader(sr) 238 | for repeat := 0; repeat < count; repeat++ { 239 | for j := 0; j < bigStringRunes; j++ { 240 | r, _, err := rd.ReadRune() 241 | if err != nil { 242 | b.Fatal(err) 243 | } 244 | if r == EOF { 245 | b.Fatal("unexpected EOF") 246 | } 247 | if forget && j%128 == 127 { 248 | rd.Forget() 249 | } 250 | } 251 | r, _, err := rd.ReadRune() 252 | if err != nil { 253 | b.Fatal(err) 254 | } 255 | if r != EOF { 256 | b.Fatalf("missing EOF - %q", r) 257 | } 258 | if repeat == count-1 { 259 | break 260 | } 261 | for rd.UnreadRune() == nil { 262 | // keep unreading 263 | } 264 | } 265 | } 266 | } 267 | 268 | // test reader that will fail reading after a given number of reads 269 | type failingReader struct { 270 | r io.Reader // underlying reader 271 | failAfter int // start failing after that number of reads 272 | readCount int // number of reads already done 273 | } 274 | 275 | func newFailingReaderFromString(s string, failAfter int) *failingReader { 276 | return &failingReader{ 277 | r: strings.NewReader(s), 278 | failAfter: failAfter, 279 | readCount: 0, 280 | } 281 | } 282 | 283 | func (r *failingReader) Read(b []byte) (n int, err error) { 284 | if r.readCount < r.failAfter { 285 | n, err = r.r.Read(b) 286 | r.readCount++ 287 | return 288 | } 289 | return 0, fmt.Errorf("expected read failure") 290 | } 291 | 292 | func TestReadFails(t *testing.T) { 293 | size := 4097 // needs to be more than bufio.defaultBufSize, which is 4096 294 | s := make([]byte, size) 295 | for i := 0; i < size; i++ { 296 | s[i] = 'a' 297 | } 298 | 299 | rd := NewReader(newFailingReaderFromString(string(s), 1)) 300 | 301 | runes := rd.PeekRunes(256) // first read, ok 302 | 303 | runes = rd.PeekRunes(1) // rune already loaded, ok 304 | 305 | runes = rd.PeekRunes(4097) // forces a new read, fails 306 | if len(runes) != 4096 { 307 | t.Fatalf("expected %d runes. got %d", 4096, len(runes)) 308 | } 309 | if runes[4095] != 'a' { 310 | t.Fatalf("expected last rune to be 'a'. got '%c'", runes[4095]) 311 | } 312 | 313 | 314 | rd = NewReader(newFailingReaderFromString(string(s), 1)) 315 | for i := 0; i < size - 1; i++ { 316 | r, size, err := rd.ReadRune() // read all the runes but last 317 | if err != nil { 318 | t.Fatalf("no error expeceted at that point, got %s", err) 319 | } 320 | if size != 1 { 321 | t.Fatalf("reading runes that should have size 1, got size %d", size) 322 | } 323 | if r != 'a' { 324 | t.Fatalf("reading a string of 'a', got %c", r) 325 | } 326 | } 327 | // EOF, 0, err 328 | r, n, err := rd.ReadRune() // should error 329 | if r != EOF { 330 | t.Fatalf("expected EOF, got %c", r) 331 | } 332 | if n != 0 { 333 | t.Fatalf("expected size 0, got %d", n) 334 | } 335 | if err.Error() != "expected read failure" { 336 | t.Fatalf("incorrect error: %s", err.Error()) 337 | } 338 | } 339 | --------------------------------------------------------------------------------