├── LICENSE ├── README.md ├── tsvreader.go ├── tsvreader_example_test.go ├── tsvreader_test.go └── tsvreader_timing_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Aliaksandr Valialkin, VertaMedia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tsvreader - fast reader for tab-separated data 2 | 3 | ## Features 4 | 5 | * Optimized for speed. May read more than 20M rows per second on a single 6 | CPU core. 7 | * Compatible with `TSV` (aka `TabSeparated`) format used in [ClickHouse](https://github.com/yandex/ClickHouse) responses. 8 | See [chclient](https://github.com/valyala/chclient) - clickhouse client built on top of `tsvreader`. 9 | * May read rows with variable number of columns using [Reader.HasCols](https://godoc.org/github.com/valyala/tsvreader#Reader.HasCols). 10 | This functionality allows reading [WITH TOTALS](http://clickhouse.readthedocs.io/en/latest/reference_en.html#WITH+TOTALS+modifier) 11 | row from `ClickHouse` responses and [BlockTabSeparated](http://clickhouse.readthedocs.io/en/latest/reference_en.html#BlockTabSeparated) 12 | responses. 13 | 14 | ## Documentation 15 | 16 | See [these docs](https://godoc.org/github.com/valyala/tsvreader). 17 | -------------------------------------------------------------------------------- /tsvreader.go: -------------------------------------------------------------------------------- 1 | package tsvreader 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "math" 8 | "strconv" 9 | "time" 10 | "unsafe" 11 | ) 12 | 13 | // New returns new Reader that reads TSV data from r. 14 | func New(r io.Reader) *Reader { 15 | var tr Reader 16 | tr.Reset(r) 17 | return &tr 18 | } 19 | 20 | // Reader reads tab-separated data. 21 | // 22 | // Call New for creating new TSV reader. 23 | // Call Next before reading the next row. 24 | // 25 | // It is expected that columns are separated by tabs while rows 26 | // are separated by newlines. 27 | type Reader struct { 28 | r io.Reader 29 | rb []byte 30 | rErr error 31 | rBuf [4 << 10]byte 32 | 33 | col int 34 | row int 35 | 36 | rowBuf []byte 37 | b []byte 38 | scratch []byte 39 | 40 | err error 41 | needUnescape bool 42 | } 43 | 44 | // Reset resets the reader for reading from r. 45 | func (tr *Reader) Reset(r io.Reader) { 46 | tr.r = r 47 | tr.rb = nil 48 | tr.rErr = nil 49 | 50 | tr.col = 0 51 | tr.row = 0 52 | 53 | tr.rowBuf = nil 54 | tr.b = nil 55 | tr.scratch = tr.scratch[:0] 56 | 57 | tr.err = nil 58 | tr.needUnescape = false 59 | } 60 | 61 | // Error returns the last error. 62 | func (tr *Reader) Error() error { 63 | if tr.err == io.EOF { 64 | return nil 65 | } 66 | return tr.err 67 | } 68 | 69 | // ResetError resets the current error, so the reader could proceed further. 70 | func (tr *Reader) ResetError() { 71 | tr.err = nil 72 | } 73 | 74 | // HasCols returns true if the current row contains unread columns. 75 | // 76 | // An empty row doesn't contain columns. 77 | // 78 | // This function may be used if TSV stream contains rows with different 79 | // number of colums. 80 | func (tr *Reader) HasCols() bool { 81 | return len(tr.rowBuf) > 0 && tr.b != nil 82 | } 83 | 84 | // Next advances to the next row. 85 | // 86 | // Returns true if the next row does exist. 87 | // 88 | // Next must be called after reading all the columns on the previous row. 89 | // Check Error after Next returns false. 90 | // 91 | // HasCols may be used for reading rows with variable number of columns. 92 | func (tr *Reader) Next() bool { 93 | if tr.err != nil { 94 | return false 95 | } 96 | if tr.HasCols() { 97 | tr.err = fmt.Errorf("row #%d %q contains unread columns: %q", tr.row, tr.rowBuf, tr.b) 98 | return false 99 | } 100 | 101 | tr.row++ 102 | tr.col = 0 103 | tr.rowBuf = nil 104 | 105 | for { 106 | if len(tr.rb) == 0 { 107 | // Read buffer is empty. Attempt to fill it. 108 | if tr.rErr != nil { 109 | tr.err = tr.rErr 110 | if tr.err != io.EOF { 111 | tr.err = fmt.Errorf("cannot read row #%d: %s", tr.row, tr.err) 112 | } else if len(tr.scratch) > 0 { 113 | tr.err = fmt.Errorf("cannot find newline at the end of row #%d; row: %q", tr.row, tr.scratch) 114 | } 115 | return false 116 | } 117 | n, err := tr.r.Read(tr.rBuf[:]) 118 | tr.rb = tr.rBuf[:n] 119 | tr.needUnescape = (bytes.IndexByte(tr.rb, '\\') >= 0) 120 | tr.rErr = err 121 | } 122 | 123 | // Search for the end of the current row. 124 | n := bytes.IndexByte(tr.rb, '\n') 125 | if n >= 0 { 126 | // Fast path: the row has been found. 127 | b := tr.rb[:n] 128 | tr.rb = tr.rb[n+1:] 129 | if len(tr.scratch) > 0 { 130 | tr.scratch = append(tr.scratch, b...) 131 | b = tr.scratch 132 | tr.scratch = tr.scratch[:0] 133 | } 134 | tr.rowBuf = b 135 | tr.b = tr.rowBuf 136 | return true 137 | } 138 | 139 | // Slow path: cannot find the end of row. 140 | // Append tr.rb to tr.scratch and repeat. 141 | tr.scratch = append(tr.scratch, tr.rb...) 142 | tr.rb = nil 143 | } 144 | } 145 | 146 | // Int returns the next int column value from the current row. 147 | func (tr *Reader) Int() int { 148 | if tr.err != nil { 149 | return 0 150 | } 151 | b, err := tr.nextCol() 152 | if err != nil { 153 | tr.setColError("cannot read `int`", err) 154 | return 0 155 | } 156 | 157 | n, err := strconv.Atoi(b2s(b)) 158 | if err != nil { 159 | tr.setColError("cannot parse `int`", err) 160 | return 0 161 | } 162 | return n 163 | } 164 | 165 | // Uint returns the next uint column value from the current row. 166 | func (tr *Reader) Uint() uint { 167 | if tr.err != nil { 168 | return 0 169 | } 170 | b, err := tr.nextCol() 171 | if err != nil { 172 | tr.setColError("cannot read `uint`", err) 173 | return 0 174 | } 175 | s := b2s(b) 176 | 177 | // Fast path - attempt to use Atoi 178 | n, err := strconv.Atoi(s) 179 | if err == nil && n >= 0 { 180 | return uint(n) 181 | } 182 | 183 | // Slow path - use ParseUint 184 | nu, err := strconv.ParseUint(s, 10, strconv.IntSize) 185 | if err != nil { 186 | tr.setColError("cannot parse `uint`", err) 187 | return 0 188 | } 189 | return uint(nu) 190 | } 191 | 192 | // Int32 returns the next int32 column value from the current row. 193 | func (tr *Reader) Int32() int32 { 194 | if tr.err != nil { 195 | return 0 196 | } 197 | b, err := tr.nextCol() 198 | if err != nil { 199 | tr.setColError("cannot read `int32`", err) 200 | return 0 201 | } 202 | s := b2s(b) 203 | 204 | // Fast path - attempt to use Atoi 205 | n, err := strconv.Atoi(s) 206 | if err == nil && n >= math.MinInt32 && n <= math.MaxInt32 { 207 | return int32(n) 208 | } 209 | 210 | // Slow path - use ParseInt 211 | n32, err := strconv.ParseInt(s, 10, 32) 212 | if err != nil { 213 | tr.setColError("cannot parse `int32`", err) 214 | return 0 215 | } 216 | return int32(n32) 217 | } 218 | 219 | // Uint32 returns the next uint32 column value from the current row. 220 | func (tr *Reader) Uint32() uint32 { 221 | if tr.err != nil { 222 | return 0 223 | } 224 | b, err := tr.nextCol() 225 | if err != nil { 226 | tr.setColError("cannot read `uint32`", err) 227 | return 0 228 | } 229 | s := b2s(b) 230 | 231 | // Fast path - attempt to use Atoi 232 | n, err := strconv.Atoi(s) 233 | if err == nil && n >= 0 && n <= math.MaxUint32 { 234 | return uint32(n) 235 | } 236 | 237 | // Slow path - use ParseUint 238 | n32, err := strconv.ParseUint(s, 10, 32) 239 | if err != nil { 240 | tr.setColError("cannot parse `uint32`", err) 241 | return 0 242 | } 243 | return uint32(n32) 244 | } 245 | 246 | // Int16 returns the next int16 column value from the current row. 247 | func (tr *Reader) Int16() int16 { 248 | if tr.err != nil { 249 | return 0 250 | } 251 | b, err := tr.nextCol() 252 | if err != nil { 253 | tr.setColError("cannot read `int16`", err) 254 | return 0 255 | } 256 | n, err := strconv.Atoi(b2s(b)) 257 | if err != nil { 258 | tr.setColError("cannot parse `int16`", err) 259 | return 0 260 | } 261 | if n < math.MinInt16 || n > math.MaxInt16 { 262 | tr.setColError("cannot parse `int16`", fmt.Errorf("out of range")) 263 | return 0 264 | } 265 | return int16(n) 266 | } 267 | 268 | // Uint16 returns the next uint16 column value from the current row. 269 | func (tr *Reader) Uint16() uint16 { 270 | if tr.err != nil { 271 | return 0 272 | } 273 | b, err := tr.nextCol() 274 | if err != nil { 275 | tr.setColError("cannot read `uint16`", err) 276 | return 0 277 | } 278 | n, err := strconv.Atoi(b2s(b)) 279 | if err != nil { 280 | tr.setColError("cannot parse `uint16`", err) 281 | return 0 282 | } 283 | if n < 0 { 284 | tr.setColError("cannot parse `uint16`", fmt.Errorf("invalid syntax")) 285 | return 0 286 | } 287 | if n > math.MaxUint16 { 288 | tr.setColError("cannot parse `uint16`", fmt.Errorf("out of range")) 289 | return 0 290 | } 291 | return uint16(n) 292 | } 293 | 294 | // Int8 returns the next int8 column value from the current row. 295 | func (tr *Reader) Int8() int8 { 296 | if tr.err != nil { 297 | return 0 298 | } 299 | b, err := tr.nextCol() 300 | if err != nil { 301 | tr.setColError("cannot read `int8`", err) 302 | return 0 303 | } 304 | n, err := strconv.Atoi(b2s(b)) 305 | if err != nil { 306 | tr.setColError("cannot parse `int8`", err) 307 | return 0 308 | } 309 | if n < math.MinInt8 || n > math.MaxInt8 { 310 | tr.setColError("cannot parse `int8`", fmt.Errorf("out of range")) 311 | return 0 312 | } 313 | return int8(n) 314 | } 315 | 316 | // Uint8 returns the next uint8 column value from the current row. 317 | func (tr *Reader) Uint8() uint8 { 318 | if tr.err != nil { 319 | return 0 320 | } 321 | b, err := tr.nextCol() 322 | if err != nil { 323 | tr.setColError("cannot read `uint8`", err) 324 | return 0 325 | } 326 | n, err := strconv.Atoi(b2s(b)) 327 | if err != nil { 328 | tr.setColError("cannot parse `uint8`", err) 329 | return 0 330 | } 331 | if n < 0 { 332 | tr.setColError("cannot parse `uint8`", fmt.Errorf("invalid syntax")) 333 | return 0 334 | } 335 | if n > math.MaxUint8 { 336 | tr.setColError("cannot parse `uint8`", fmt.Errorf("out of range")) 337 | return 0 338 | } 339 | return uint8(n) 340 | } 341 | 342 | // Int64 returns the next int64 column value from the current row. 343 | func (tr *Reader) Int64() int64 { 344 | if tr.err != nil { 345 | return 0 346 | } 347 | b, err := tr.nextCol() 348 | if err != nil { 349 | tr.setColError("cannot read `int64`", err) 350 | return 0 351 | } 352 | s := b2s(b) 353 | 354 | // Fast path - attempt to use Atoi 355 | n, err := strconv.Atoi(s) 356 | if err == nil && int64(n) >= math.MinInt64 && int64(n) <= math.MaxInt64 { 357 | return int64(n) 358 | } 359 | 360 | // Slow path - use ParseInt 361 | n64, err := strconv.ParseInt(s, 10, 64) 362 | if err != nil { 363 | tr.setColError("cannot parse `int64`", err) 364 | return 0 365 | } 366 | return n64 367 | } 368 | 369 | // Uint64 returns the next uint64 column value from the current row. 370 | func (tr *Reader) Uint64() uint64 { 371 | if tr.err != nil { 372 | return 0 373 | } 374 | b, err := tr.nextCol() 375 | if err != nil { 376 | tr.setColError("cannot read `uint64`", err) 377 | return 0 378 | } 379 | s := b2s(b) 380 | 381 | // Fast path - attempt to use Atoi 382 | n, err := strconv.Atoi(s) 383 | if err == nil && n >= 0 && uint64(n) <= math.MaxUint64 { 384 | return uint64(n) 385 | } 386 | 387 | // Slow path - use ParseUint 388 | n64, err := strconv.ParseUint(s, 10, 64) 389 | if err != nil { 390 | tr.setColError("cannot parse `uint64`", err) 391 | return 0 392 | } 393 | return n64 394 | } 395 | 396 | // Float32 returns the next float32 column value from the current row. 397 | func (tr *Reader) Float32() float32 { 398 | if tr.err != nil { 399 | return 0 400 | } 401 | b, err := tr.nextCol() 402 | if err != nil { 403 | tr.setColError("cannot read `float32`", err) 404 | return 0 405 | } 406 | s := b2s(b) 407 | 408 | f32, err := strconv.ParseFloat(s, 32) 409 | if err != nil { 410 | tr.setColError("cannot parse `float32`", err) 411 | return 0 412 | } 413 | return float32(f32) 414 | } 415 | 416 | // Float64 returns the next float64 column value from the current row. 417 | func (tr *Reader) Float64() float64 { 418 | if tr.err != nil { 419 | return 0 420 | } 421 | b, err := tr.nextCol() 422 | if err != nil { 423 | tr.setColError("cannot read `float64`", err) 424 | return 0 425 | } 426 | s := b2s(b) 427 | 428 | f64, err := strconv.ParseFloat(s, 64) 429 | if err != nil { 430 | tr.setColError("cannot parse `float64`", err) 431 | return 0 432 | } 433 | return f64 434 | } 435 | 436 | // SkipCol skips the next column from the current row. 437 | func (tr *Reader) SkipCol() { 438 | if tr.err != nil { 439 | return 440 | } 441 | _, err := tr.nextCol() 442 | if err != nil { 443 | tr.setColError("cannot skip column", err) 444 | } 445 | } 446 | 447 | // Bytes returns the next bytes column value from the current row. 448 | // 449 | // The returned value is valid until the next call to Reader. 450 | func (tr *Reader) Bytes() []byte { 451 | if tr.err != nil { 452 | return nil 453 | } 454 | b, err := tr.nextCol() 455 | if err != nil { 456 | tr.setColError("cannot read `bytes`", err) 457 | return nil 458 | } 459 | 460 | if !tr.needUnescape { 461 | // Fast path - nothing to unescape. 462 | return b 463 | } 464 | 465 | // Unescape b 466 | n := bytes.IndexByte(b, '\\') 467 | if n < 0 { 468 | // Nothing to unescape in the current column. 469 | return b 470 | } 471 | 472 | // Slow path - in-place unescaping compatible with ClickHouse. 473 | n++ 474 | d := b[:n] 475 | b = b[n:] 476 | for len(b) > 0 { 477 | switch b[0] { 478 | case 'b': 479 | d[len(d)-1] = '\b' 480 | case 'f': 481 | d[len(d)-1] = '\f' 482 | case 'r': 483 | d[len(d)-1] = '\r' 484 | case 'n': 485 | d[len(d)-1] = '\n' 486 | case 't': 487 | d[len(d)-1] = '\t' 488 | case '0': 489 | d[len(d)-1] = 0 490 | case '\'': 491 | d[len(d)-1] = '\'' 492 | case '\\': 493 | d[len(d)-1] = '\\' 494 | default: 495 | d[len(d)-1] = b[0] 496 | } 497 | 498 | b = b[1:] 499 | n = bytes.IndexByte(b, '\\') 500 | if n < 0 { 501 | d = append(d, b...) 502 | break 503 | } 504 | n++ 505 | d = append(d, b[:n]...) 506 | b = b[n:] 507 | } 508 | return d 509 | } 510 | 511 | // String returns the next string column value from the current row. 512 | // 513 | // String allocates memory. Use Bytes to avoid memory allocations. 514 | func (tr *Reader) String() string { 515 | return string(tr.Bytes()) 516 | } 517 | 518 | // Date returns the next date column value from the current row. 519 | // 520 | // date must be in the format YYYY-MM-DD 521 | func (tr *Reader) Date() time.Time { 522 | if tr.err != nil { 523 | return zeroTime 524 | } 525 | b, err := tr.nextCol() 526 | if err != nil { 527 | tr.setColError("cannot read `date`", err) 528 | return zeroTime 529 | } 530 | s := b2s(b) 531 | 532 | y, m, d, err := parseDate(s) 533 | if err != nil { 534 | tr.setColError("cannot parse `date`", err) 535 | return zeroTime 536 | } 537 | if y == 0 && m == 0 && d == 0 { 538 | // special case for ClickHouse 539 | return zeroTime 540 | } 541 | return time.Date(y, time.Month(m), d, 0, 0, 0, 0, time.UTC) 542 | } 543 | 544 | // DateTime returns the next datetime column value from the current row. 545 | // 546 | // datetime must be in the format YYYY-MM-DD hh:mm:ss. 547 | func (tr *Reader) DateTime() time.Time { 548 | if tr.err != nil { 549 | return zeroTime 550 | } 551 | b, err := tr.nextCol() 552 | if err != nil { 553 | tr.setColError("cannot read `datetime`", err) 554 | return zeroTime 555 | } 556 | s := b2s(b) 557 | 558 | dt, err := parseDateTime(s) 559 | if err != nil { 560 | tr.setColError("cannot parse `datetime`", err) 561 | return zeroTime 562 | } 563 | return dt 564 | } 565 | 566 | func parseDateTime(s string) (time.Time, error) { 567 | if len(s) != len("YYYY-MM-DD hh:mm:ss") { 568 | return zeroTime, fmt.Errorf("too short datetime") 569 | } 570 | y, m, d, err := parseDate(s[:len("YYYY-MM-DD")]) 571 | if err != nil { 572 | return zeroTime, err 573 | } 574 | s = s[len("YYYY-MM-DD"):] 575 | if s[0] != ' ' || s[3] != ':' || s[6] != ':' { 576 | return zeroTime, fmt.Errorf("invalid time format. Must be hh:mm:ss") 577 | } 578 | hS := s[1:3] 579 | minS := s[4:6] 580 | secS := s[7:] 581 | h, err := strconv.Atoi(hS) 582 | if err != nil { 583 | return zeroTime, fmt.Errorf("invalid hour: %s", err) 584 | } 585 | min, err := strconv.Atoi(minS) 586 | if err != nil { 587 | return zeroTime, fmt.Errorf("invalid minute: %s", err) 588 | } 589 | sec, err := strconv.Atoi(secS) 590 | if err != nil { 591 | return zeroTime, fmt.Errorf("invalid second: %s", err) 592 | } 593 | if y == 0 && m == 0 && d == 0 { 594 | // Special case for ClickHouse 595 | return zeroTime, nil 596 | } 597 | return time.Date(y, time.Month(m), d, h, min, sec, 0, time.UTC), nil 598 | } 599 | 600 | func parseDate(s string) (y, m, d int, err error) { 601 | if len(s) != len("YYYY-MM-DD") { 602 | err = fmt.Errorf("too short date") 603 | return 604 | } 605 | s = s[:len("YYYY-MM-DD")] 606 | if s[4] != '-' && s[7] != '-' { 607 | err = fmt.Errorf("invalid date format. Must be YYYY-MM-DD") 608 | return 609 | } 610 | yS := s[:4] 611 | mS := s[5:7] 612 | dS := s[8:] 613 | y, err = strconv.Atoi(yS) 614 | if err != nil { 615 | err = fmt.Errorf("invalid year: %s", err) 616 | return 617 | } 618 | m, err = strconv.Atoi(mS) 619 | if err != nil { 620 | err = fmt.Errorf("invalid month: %s", err) 621 | return 622 | } 623 | d, err = strconv.Atoi(dS) 624 | if err != nil { 625 | err = fmt.Errorf("invalid day: %s", err) 626 | return 627 | } 628 | return y, m, d, nil 629 | } 630 | 631 | var zeroTime time.Time 632 | 633 | func (tr *Reader) nextCol() ([]byte, error) { 634 | if tr.row == 0 { 635 | return nil, fmt.Errorf("missing Next call") 636 | } 637 | 638 | tr.col++ 639 | if tr.b == nil { 640 | return nil, fmt.Errorf("no more columns") 641 | } 642 | 643 | n := bytes.IndexByte(tr.b, '\t') 644 | if n < 0 { 645 | // last column 646 | b := tr.b 647 | tr.b = nil 648 | return b, nil 649 | } 650 | 651 | b := tr.b[:n] 652 | tr.b = tr.b[n+1:] 653 | return b, nil 654 | } 655 | 656 | func (tr *Reader) setColError(msg string, err error) { 657 | tr.err = fmt.Errorf("%s at row #%d, col #%d %q: %s", msg, tr.row, tr.col, tr.rowBuf, err) 658 | } 659 | 660 | func b2s(b []byte) string { 661 | return *(*string)(unsafe.Pointer(&b)) 662 | } 663 | -------------------------------------------------------------------------------- /tsvreader_example_test.go: -------------------------------------------------------------------------------- 1 | package tsvreader_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "github.com/valyala/tsvreader" 7 | ) 8 | 9 | func ExampleReader() { 10 | bs := bytes.NewBufferString( 11 | "foo\t42\n" + 12 | "bar\t123\n") 13 | 14 | r := tsvreader.New(bs) 15 | for r.Next() { 16 | col1 := r.String() 17 | col2 := r.Int() 18 | fmt.Printf("col1=%s, col2=%d\n", col1, col2) 19 | } 20 | if err := r.Error(); err != nil { 21 | fmt.Printf("unexpected error: %s", err) 22 | } 23 | 24 | // Output: 25 | // col1=foo, col2=42 26 | // col1=bar, col2=123 27 | } 28 | 29 | func ExampleReader_HasCols() { 30 | bs := bytes.NewBufferString( 31 | "foo\n" + 32 | "bar\tbaz\n" + 33 | "\n" + 34 | "a\tb\tc\n") 35 | 36 | r := tsvreader.New(bs) 37 | for r.Next() { 38 | for r.HasCols() { 39 | s := r.String() 40 | fmt.Printf("%q,", s) 41 | } 42 | fmt.Printf("\n") 43 | } 44 | if err := r.Error(); err != nil { 45 | fmt.Printf("unexpected error: %s", err) 46 | } 47 | 48 | // Output: 49 | // "foo", 50 | // "bar","baz", 51 | // 52 | // "a","b","c", 53 | } 54 | 55 | func ExampleReader_Next() { 56 | bs := bytes.NewBufferString("1\n2\n3\n42\n") 57 | 58 | r := tsvreader.New(bs) 59 | for r.Next() { 60 | n := r.Int() 61 | fmt.Printf("%d\n", n) 62 | } 63 | if err := r.Error(); err != nil { 64 | fmt.Printf("unexpected error: %s", err) 65 | } 66 | 67 | // Output: 68 | // 1 69 | // 2 70 | // 3 71 | // 42 72 | } 73 | -------------------------------------------------------------------------------- /tsvreader_test.go: -------------------------------------------------------------------------------- 1 | package tsvreader 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "math" 8 | "math/rand" 9 | "strconv" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | func TestReaderSkipCol(t *testing.T) { 15 | b := bytes.NewBufferString("foo\tbar2\t42\n") 16 | r := New(b) 17 | if !r.Next() { 18 | t.Fatalf("Next must return true") 19 | } 20 | r.SkipCol() 21 | r.SkipCol() 22 | bb := r.Bytes() 23 | if string(bb) != "42" { 24 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "42") 25 | } 26 | if r.HasCols() { 27 | t.Fatalf("HasCols must return false") 28 | } 29 | if err := r.Error(); err != nil { 30 | t.Fatalf("unexpected error: %s", err) 31 | } 32 | 33 | // This must fail, since there are no more columns 34 | r.SkipCol() 35 | err := r.Error() 36 | if err == nil { 37 | t.Fatalf("expecting non-nil error") 38 | } 39 | if errS := err.Error(); !strings.Contains(errS, "no more columns") { 40 | t.Fatalf("unexpected error: %q; must contain `no more columns`", err) 41 | } 42 | } 43 | 44 | func TestReaderHasCols(t *testing.T) { 45 | b := bytes.NewBufferString("foo\t\n\n") 46 | r := New(b) 47 | if r.HasCols() { 48 | t.Fatalf("HasCols must return false before calling Next") 49 | } 50 | 51 | if !r.Next() { 52 | t.Fatalf("Next must return true") 53 | } 54 | bb := r.Bytes() 55 | if string(bb) != "foo" { 56 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "foo") 57 | } 58 | if !r.HasCols() { 59 | t.Fatalf("HasCols must return true") 60 | } 61 | bb = r.Bytes() 62 | if len(bb) > 0 { 63 | t.Fatalf("unexpected non-empty bytes: %q", bb) 64 | } 65 | if r.HasCols() { 66 | t.Fatalf("HasCols must return false") 67 | } 68 | 69 | // An empty row is treated as it has a single empty column 70 | if !r.Next() { 71 | t.Fatalf("Next must return true") 72 | } 73 | if r.Error() != nil { 74 | t.Fatalf("unexpected error: %s", r.Error()) 75 | } 76 | if r.HasCols() { 77 | t.Fatalf("HasCols must return false") 78 | } 79 | 80 | // No more rows 81 | if r.Next() { 82 | t.Fatalf("Next must return false") 83 | } 84 | if r.Error() != nil { 85 | t.Fatalf("unexpected error: %s", r.Error()) 86 | } 87 | if r.HasCols() { 88 | t.Fatalf("HasCols must return false") 89 | } 90 | } 91 | 92 | func TestReaderResetError(t *testing.T) { 93 | b := bytes.NewBufferString("foo\tbar\n\nbaz\n") 94 | r := New(b) 95 | if !r.Next() { 96 | t.Fatalf("Next must return true") 97 | } 98 | bb := r.Bytes() 99 | if string(bb) != "foo" { 100 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "foo") 101 | } 102 | bb = r.Bytes() 103 | if string(bb) != "bar" { 104 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "bar") 105 | } 106 | 107 | // Attempt to read the second (empty) row 108 | if !r.Next() { 109 | t.Fatalf("Next must return true") 110 | } 111 | bb = r.Bytes() 112 | if string(bb) != "" { 113 | t.Fatalf("unexpected non-empty bytes: %q", bb) 114 | } 115 | if r.Error() != nil { 116 | t.Fatalf("unexpected error: %s", r.Error()) 117 | } 118 | bb = r.Bytes() 119 | if string(bb) != "" { 120 | t.Fatalf("unexpected non-empty bytes: %q", bb) 121 | } 122 | err := r.Error() 123 | if err == nil { 124 | t.Fatalf("expecting non-zero error") 125 | } 126 | errS := err.Error() 127 | if !strings.Contains(errS, "no more columns") { 128 | t.Fatalf("unexpected error: %s. Expecting %q", errS, "no more columns") 129 | } 130 | 131 | r.ResetError() 132 | if r.Error() != nil { 133 | t.Fatalf("unexpected error: %s", r.Error()) 134 | } 135 | 136 | // Read the last error 137 | if !r.Next() { 138 | t.Fatalf("Next must return true") 139 | } 140 | bb = r.Bytes() 141 | if string(bb) != "baz" { 142 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "baz") 143 | } 144 | if r.Error() != nil { 145 | t.Fatalf("unexpected error: %s", r.Error()) 146 | } 147 | if r.Next() { 148 | t.Fatalf("Next must return false") 149 | } 150 | } 151 | 152 | func TestReaderEmpty(t *testing.T) { 153 | b := bytes.NewBufferString("") 154 | r := New(b) 155 | if r.Next() { 156 | t.Fatalf("Next must return false on empty data") 157 | } 158 | err := r.Error() 159 | if err != nil { 160 | t.Fatalf("unexpected error after reading empty data: %s", err) 161 | } 162 | 163 | // Make sure r.Next() returns false on subsequent calls. 164 | for i := 0; i < 10; i++ { 165 | if r.Next() { 166 | t.Fatalf("Next must return false at the end of data") 167 | } 168 | err = r.Error() 169 | if err != nil { 170 | t.Fatalf("unexpected error at the end of data: %s", err) 171 | } 172 | } 173 | } 174 | 175 | func TestReaderNoNext(t *testing.T) { 176 | b := bytes.NewBufferString("aaa\n") 177 | r := New(b) 178 | 179 | n := r.Int() 180 | if n != 0 { 181 | t.Fatalf("unexpected non-zero int: %d", n) 182 | } 183 | err := r.Error() 184 | if err == nil { 185 | t.Fatalf("expecting non-nil error") 186 | } 187 | errS := err.Error() 188 | if !strings.Contains(errS, "missing Next call") { 189 | t.Fatalf("unexpected error: %s. Must contains %q", errS, "missing Next call") 190 | } 191 | } 192 | 193 | func TestReaderEmptyCol(t *testing.T) { 194 | b := bytes.NewBufferString("\t\tfoobar\t\n") 195 | r := New(b) 196 | if !r.Next() { 197 | t.Fatalf("Next must return true") 198 | } 199 | if r.Error() != nil { 200 | t.Fatalf("unexpected error: %s", r.Error()) 201 | } 202 | 203 | for i := 0; i < 4; i++ { 204 | bb := r.Bytes() 205 | if i == 2 { 206 | if string(bb) != "foobar" { 207 | t.Fatalf("unexpected bytes on col #%d: %q. Expecting %q", i+1, bb, "foobar") 208 | } 209 | } else if len(bb) != 0 { 210 | t.Fatalf("unexpected non-empty bytes on col #%d: %q", i+1, bb) 211 | } 212 | if r.Error() != nil { 213 | t.Fatalf("unexpected error on col #%d: %s", i+1, r.Error()) 214 | } 215 | } 216 | } 217 | 218 | func TestReaderNoNewline(t *testing.T) { 219 | testReaderNoNewline(t, "foobar") 220 | testReaderNoNewline(t, "foo\t") 221 | testReaderNoNewline(t, "\t") 222 | testReaderNoNewline(t, "\tfoo\t\tbar") 223 | testReaderNoNewline(t, "\tfoo") 224 | testReaderNoNewline(t, "\tfoo\t") 225 | testReaderNoNewline(t, "foo\tbar") 226 | testReaderNoNewline(t, "foo\x00bar") 227 | testReaderNoNewline(t, "\x00") 228 | } 229 | 230 | func testReaderNoNewline(t *testing.T, s string) { 231 | t.Helper() 232 | 233 | b := bytes.NewBufferString(s) 234 | r := New(b) 235 | if r.Next() { 236 | t.Fatalf("Next must return false when no newline; s: %q", s) 237 | } 238 | err := r.Error() 239 | if err == nil { 240 | t.Fatalf("expecting error when no newline; s: %q", s) 241 | } 242 | errS := err.Error() 243 | if !strings.Contains(errS, "cannot find newline") { 244 | t.Fatalf("unexpected error: %s; must contain %q", s, "cannot find newline") 245 | } 246 | 247 | // Make sure r.Next() returns false on subsequent calls. 248 | for i := 0; i < 10; i++ { 249 | if r.Next() { 250 | t.Fatalf("Next must return false after error; s: %q", s) 251 | } 252 | err1 := r.Error() 253 | if err1 != err { 254 | t.Fatalf("unexpected error: %v. Expecting %s; s: %q", err1, err, s) 255 | } 256 | } 257 | } 258 | 259 | func TestReaderReset(t *testing.T) { 260 | var r Reader 261 | 262 | for i := 0; i < 10; i++ { 263 | s := fmt.Sprintf("foobar %d\n", i) 264 | b := bytes.NewBufferString(s) 265 | r.Reset(b) 266 | if !r.Next() { 267 | t.Fatalf("Next must return true for TSV %q", s) 268 | } 269 | if r.Error() != nil { 270 | t.Fatalf("unexpected error before reading TSV %q: %s", s, r.Error()) 271 | } 272 | bb := r.Bytes() 273 | if string(bb) != s[:len(s)-1] { 274 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, s[:len(s)-1]) 275 | } 276 | if r.Error() != nil { 277 | t.Fatalf("unexpected error after reading TSV %q: %s", s, r.Error()) 278 | } 279 | } 280 | } 281 | 282 | func TestReaderSingleRowBytesCol(t *testing.T) { 283 | expectedS := "foobar" 284 | b := bytes.NewBufferString(fmt.Sprintf("%s\n", expectedS)) 285 | r := New(b) 286 | if !r.Next() { 287 | t.Fatalf("Next must return true on the first line. err: %v", r.Error()) 288 | } 289 | err := r.Error() 290 | if err != nil { 291 | t.Fatalf("unexpected error after reading the first line: %s", err) 292 | } 293 | 294 | bb := r.Bytes() 295 | if string(bb) != expectedS { 296 | t.Fatalf("unexpected bytes read: %q. Expecting %q", bb, expectedS) 297 | } 298 | err = r.Error() 299 | if err != nil { 300 | t.Fatalf("unexpected error after reading the first col: %s", err) 301 | } 302 | 303 | // Attempt to read the next col, which doesn't exist. 304 | if r.Next() { 305 | t.Fatalf("Next must return false on a single row") 306 | } 307 | err = r.Error() 308 | if err != nil { 309 | t.Fatalf("unexpected error at the end of data: %s", err) 310 | } 311 | 312 | // Make sure r.Next() returns false on subsequent calls. 313 | for i := 0; i < 10; i++ { 314 | if r.Next() { 315 | t.Fatalf("Next must return false at the end of data") 316 | } 317 | if err != nil { 318 | t.Fatalf("unexpected error at the end of data: %s", err) 319 | } 320 | } 321 | } 322 | 323 | func TestReaderSingleRowIntCol(t *testing.T) { 324 | expectedN := 12346 325 | b := bytes.NewBufferString(fmt.Sprintf("%d\n", expectedN)) 326 | r := New(b) 327 | if !r.Next() { 328 | t.Fatalf("Next must return true on the first line. err: %v", r.Error()) 329 | } 330 | err := r.Error() 331 | if err != nil { 332 | t.Fatalf("unexpected error after reading the first line: %s", err) 333 | } 334 | 335 | n := r.Int() 336 | if n != expectedN { 337 | t.Fatalf("unexpected int read: %d. Expecting %d", n, expectedN) 338 | } 339 | err = r.Error() 340 | if err != nil { 341 | t.Fatalf("unexpected error after reading the first col: %s", err) 342 | } 343 | 344 | // Attempt to read the next col, which doesn't exist. 345 | if r.Next() { 346 | t.Fatalf("Next must return false on a single row") 347 | } 348 | err = r.Error() 349 | if err != nil { 350 | t.Fatalf("unexpected error at the end of data: %s", err) 351 | } 352 | 353 | // Make sure r.Next() returns false on subsequent calls. 354 | for i := 0; i < 10; i++ { 355 | if r.Next() { 356 | t.Fatalf("Next must return false at the end of data") 357 | } 358 | if err != nil { 359 | t.Fatalf("unexpected error at the end of data: %s", err) 360 | } 361 | } 362 | } 363 | 364 | func TestReaderInvalidColType(t *testing.T) { 365 | b := bytes.NewBufferString("foobar\n") 366 | r := New(b) 367 | if !r.Next() { 368 | t.Fatalf("Next must return true on the first line. err: %v", r.Error()) 369 | } 370 | err := r.Error() 371 | if err != nil { 372 | t.Fatalf("unexpected error after reading the first line: %s", err) 373 | } 374 | 375 | n := r.Int() 376 | if n != 0 { 377 | t.Fatalf("unexpected n: %d. Expecting 0", n) 378 | } 379 | err = r.Error() 380 | if err == nil { 381 | t.Fatalf("expecting non-nil error") 382 | } 383 | errS := err.Error() 384 | if !strings.Contains(errS, "cannot parse") { 385 | t.Fatalf("unexpected error: %s. Must contain %q", err, "cannot parse") 386 | } 387 | } 388 | 389 | func TestReaderNoMoreCols(t *testing.T) { 390 | b := bytes.NewBufferString("aaa\n") 391 | r := New(b) 392 | if !r.Next() { 393 | t.Fatalf("Next must return true on the first line. err: %v", r.Error()) 394 | } 395 | err := r.Error() 396 | if err != nil { 397 | t.Fatalf("unexpected error after reading the first line: %s", err) 398 | } 399 | 400 | bb := r.Bytes() 401 | if string(bb) != "aaa" { 402 | t.Fatalf("unexpected bytes value: %q. Expecting %q", bb, "aaa") 403 | } 404 | err = r.Error() 405 | if err != nil { 406 | t.Fatalf("unexpected error after reading the first col: %s", err) 407 | } 408 | 409 | // attempt to read more col 410 | for i := 0; i < 10; i++ { 411 | bb := r.Bytes() 412 | if len(bb) > 0 { 413 | t.Fatalf("unexpected non-empty bytes: %q", bb) 414 | } 415 | err = r.Error() 416 | if err == nil { 417 | t.Fatalf("expecting non-nil error") 418 | } 419 | errS := err.Error() 420 | if !strings.Contains(errS, "no more columns") { 421 | t.Fatalf("unexpected error: %s. Must contain %q", err, "no more columns") 422 | } 423 | 424 | n := r.Int() 425 | if n != 0 { 426 | t.Fatalf("unexpected non-zero int: %d", n) 427 | } 428 | err = r.Error() 429 | if err == nil { 430 | t.Fatalf("expecting non-nil error") 431 | } 432 | errS = err.Error() 433 | if !strings.Contains(errS, "no more columns") { 434 | t.Fatalf("unexpected error: %s. Must contain %q", err, "no more columns") 435 | } 436 | } 437 | 438 | // atempt to read more rows 439 | for i := 0; i < 10; i++ { 440 | if r.Next() { 441 | t.Fatalf("Next must return false") 442 | } 443 | err = r.Error() 444 | if err == nil { 445 | t.Fatalf("expecting non-nil error") 446 | } 447 | errS := err.Error() 448 | if !strings.Contains(errS, "no more columns") { 449 | t.Fatalf("unexpected error: %s. Must contain %q", err, "no more columns") 450 | } 451 | } 452 | } 453 | 454 | func TestReaderSingleRowMultiCols(t *testing.T) { 455 | b := bytes.NewBufferString("foobar\t-42\t3\tbaz\n") 456 | r := New(b) 457 | 458 | if !r.Next() { 459 | t.Fatalf("Next must return true on the first line. err: %v", r.Error()) 460 | } 461 | err := r.Error() 462 | if err != nil { 463 | t.Fatalf("unexpected error after reading the first line: %s", err) 464 | } 465 | 466 | bb := r.Bytes() 467 | if string(bb) != "foobar" { 468 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "foobar") 469 | } 470 | err = r.Error() 471 | if err != nil { 472 | t.Fatalf("unexpected error after reading the first col: %s", err) 473 | } 474 | 475 | n := r.Int() 476 | if n != -42 { 477 | t.Fatalf("unexpected int: %d. Expecting %d", n, -42) 478 | } 479 | err = r.Error() 480 | if err != nil { 481 | t.Fatalf("unexpected error after reading the second col: %s", err) 482 | } 483 | 484 | n = r.Int() 485 | if n != 3 { 486 | t.Fatalf("unexpected int: %d. Expecting %d", n, 3) 487 | } 488 | err = r.Error() 489 | if err != nil { 490 | t.Fatalf("unexpected error after reading the third col: %s", err) 491 | } 492 | 493 | bb = r.Bytes() 494 | if string(bb) != "baz" { 495 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "baz") 496 | } 497 | err = r.Error() 498 | if err != nil { 499 | t.Fatalf("unexpected error after reading the fourth col: %s", err) 500 | } 501 | 502 | // Attempt to read more rows 503 | for i := 0; i < 10; i++ { 504 | if r.Next() { 505 | t.Fatalf("Next must return false") 506 | } 507 | err = r.Error() 508 | if err != nil { 509 | t.Fatalf("unexpected error: %s", err) 510 | } 511 | } 512 | } 513 | 514 | func TestReaderUnreadColsSingle(t *testing.T) { 515 | b := bytes.NewBufferString("foo\tbar\n") 516 | r := New(b) 517 | if !r.Next() { 518 | t.Fatalf("Next must return true") 519 | } 520 | if r.Error() != nil { 521 | t.Fatalf("unexpected error: %s", r.Error()) 522 | } 523 | 524 | bb := r.Bytes() 525 | if string(bb) != "foo" { 526 | t.Fatalf("unexpected bytes: %q. Expecting %q", bb, "foo") 527 | } 528 | if r.Error() != nil { 529 | t.Fatalf("unexpected error: %s", r.Error()) 530 | } 531 | 532 | // Attempt to read next row while the current row isnt read till the end 533 | for i := 0; i < 10; i++ { 534 | if r.Next() { 535 | t.Fatalf("Next must return false, because the previous row has unread columns") 536 | } 537 | err := r.Error() 538 | if err == nil { 539 | t.Fatalf("expecting non-nil error") 540 | } 541 | errS := err.Error() 542 | if !strings.Contains(errS, "unread columns") { 543 | t.Fatalf("unexpected error: %s. Must contain %q", err, "unread columns") 544 | } 545 | } 546 | } 547 | 548 | func TestReaderUnreadColsAll(t *testing.T) { 549 | b := bytes.NewBufferString("foo\tbar\n") 550 | r := New(b) 551 | if !r.Next() { 552 | t.Fatalf("Next must return true") 553 | } 554 | if r.Error() != nil { 555 | t.Fatalf("unexpected error: %s", r.Error()) 556 | } 557 | 558 | // Attempt to read next row while the current row isnt read till the end 559 | for i := 0; i < 10; i++ { 560 | if r.Next() { 561 | t.Fatalf("Next must return false, because the previous row has unread columns") 562 | } 563 | err := r.Error() 564 | if err == nil { 565 | t.Fatalf("expecting non-nil error") 566 | } 567 | errS := err.Error() 568 | if !strings.Contains(errS, "unread columns") { 569 | t.Fatalf("unexpected error: %s. Must contain %q", err, "unread columns") 570 | } 571 | } 572 | } 573 | 574 | func TestReaderMultiRowsBytesCol(t *testing.T) { 575 | testReaderMultiRowsBytesCol(t, 2) 576 | testReaderMultiRowsBytesCol(t, 10) 577 | testReaderMultiRowsBytesCol(t, 100) 578 | testReaderMultiRowsBytesCol(t, 1000) 579 | testReaderMultiRowsBytesCol(t, 10000) 580 | } 581 | 582 | func testReaderMultiRowsBytesCol(t *testing.T, rows int) { 583 | t.Helper() 584 | 585 | var expected []string 586 | var ss []string 587 | for i := 0; i < rows; i++ { 588 | s := fmt.Sprintf("foo%d bar", rand.Int()) 589 | expected = append(expected, s) 590 | ss = append(ss, fmt.Sprintf("%s\n", s)) 591 | } 592 | 593 | b := bytes.NewBufferString(strings.Join(ss, "")) 594 | r := New(b) 595 | for i, expectedS := range expected { 596 | if !r.Next() { 597 | t.Fatalf("Next must return true when reading %q at row #%d", expectedS, i+1) 598 | } 599 | if r.Error() != nil { 600 | t.Fatalf("unexpected error when reading %q at row #%d: %s", expectedS, i+1, r.Error()) 601 | } 602 | bb := r.Bytes() 603 | if string(bb) != expectedS { 604 | t.Fatalf("unexpected bytes at row #%d: %q. Expecting %q", i+1, bb, expectedS) 605 | } 606 | if r.Error() != nil { 607 | t.Fatalf("unexpected error after reading %q at row #%d: %s", expectedS, i+1, r.Error()) 608 | } 609 | } 610 | 611 | if r.Next() { 612 | t.Fatalf("Next must return false") 613 | } 614 | if r.Error() != nil { 615 | t.Fatalf("unexpected error: %s", r.Error()) 616 | } 617 | } 618 | 619 | func TestReaderMultiRowsIntCol(t *testing.T) { 620 | testReaderMultiRowsIntCol(t, 2) 621 | testReaderMultiRowsIntCol(t, 10) 622 | testReaderMultiRowsIntCol(t, 100) 623 | testReaderMultiRowsIntCol(t, 1000) 624 | testReaderMultiRowsIntCol(t, 10000) 625 | } 626 | 627 | func testReaderMultiRowsIntCol(t *testing.T, rows int) { 628 | t.Helper() 629 | 630 | var expected []int 631 | var ss []string 632 | for i := 0; i < rows; i++ { 633 | n := rand.Int() 634 | if rand.Intn(2) == 0 { 635 | n = -n 636 | } 637 | expected = append(expected, n) 638 | ss = append(ss, fmt.Sprintf("%d\n", n)) 639 | } 640 | 641 | b := bytes.NewBufferString(strings.Join(ss, "")) 642 | r := New(b) 643 | for i, expectedN := range expected { 644 | if !r.Next() { 645 | t.Fatalf("Next must return true when reading %d at row #%d", expectedN, i+1) 646 | } 647 | if r.Error() != nil { 648 | t.Fatalf("unexpected error when reading %d at row #%d: %s", expectedN, i+1, r.Error()) 649 | } 650 | n := r.Int() 651 | if n != expectedN { 652 | t.Fatalf("unexpected int at row #%d: %d. Expecting %d", i+1, n, expectedN) 653 | } 654 | if r.Error() != nil { 655 | t.Fatalf("unexpected error after reading %d at row #%d: %s", expectedN, i+1, r.Error()) 656 | } 657 | } 658 | 659 | if r.Next() { 660 | t.Fatalf("Next must return false") 661 | } 662 | if r.Error() != nil { 663 | t.Fatalf("unexpected error: %s", r.Error()) 664 | } 665 | } 666 | 667 | func TestReaderMultiRowsMultiCols(t *testing.T) { 668 | testReaderMultiRowsMultiCols(t, 2, 2) 669 | testReaderMultiRowsMultiCols(t, 10, 5) 670 | testReaderMultiRowsMultiCols(t, 100, 10) 671 | testReaderMultiRowsMultiCols(t, 1000, 100) 672 | testReaderMultiRowsMultiCols(t, 10000, 3) 673 | testReaderMultiRowsMultiCols(t, 3, 500) 674 | } 675 | 676 | func testReaderMultiRowsMultiCols(t *testing.T, rows int, cols int) { 677 | t.Helper() 678 | 679 | var expected [][]string 680 | var ss []string 681 | for i := 0; i < rows; i++ { 682 | var rowS []string 683 | for j := 0; j < cols; j++ { 684 | s := fmt.Sprintf("foobar%d", j+i*cols) 685 | rowS = append(rowS, s) 686 | } 687 | expected = append(expected, rowS) 688 | ss = append(ss, strings.Join(rowS, "\t")+"\n") 689 | } 690 | 691 | b := bytes.NewBufferString(strings.Join(ss, "")) 692 | r := New(b) 693 | testReaderMultiRowsCols(t, r, expected) 694 | } 695 | 696 | func TestReaderSlowSource(t *testing.T) { 697 | testReaderSlowSource(t, 1, 10000) 698 | testReaderSlowSource(t, 10, 1000) 699 | testReaderSlowSource(t, 100, 100) 700 | testReaderSlowSource(t, 1000, 10) 701 | testReaderSlowSource(t, 10000, 1) 702 | } 703 | 704 | func testReaderSlowSource(t *testing.T, rows, cols int) { 705 | t.Helper() 706 | 707 | var expected [][]string 708 | var ss []string 709 | for i := 0; i < rows; i++ { 710 | var rowS []string 711 | for j := 0; j < cols; j++ { 712 | s := fmt.Sprintf("foo тест %d", j+i*cols) 713 | rowS = append(rowS, s) 714 | } 715 | expected = append(expected, rowS) 716 | ss = append(ss, strings.Join(rowS, "\t")+"\n") 717 | } 718 | 719 | b := &slowSource{ 720 | s: []byte(strings.Join(ss, "")), 721 | } 722 | r := New(b) 723 | testReaderMultiRowsCols(t, r, expected) 724 | } 725 | 726 | func testReaderMultiRowsCols(t *testing.T, r *Reader, expected [][]string) { 727 | t.Helper() 728 | 729 | for i, rowS := range expected { 730 | if !r.Next() { 731 | t.Fatalf("Next must return true when reading row #%d", i+1) 732 | } 733 | if r.Error() != nil { 734 | t.Fatalf("unexpected error when reading row #%d: %s", i+1, r.Error()) 735 | } 736 | if r.row != i+1 { 737 | t.Fatalf("unexpected row number: %d. Expecting %d", r.row, i+1) 738 | } 739 | for j, expectedS := range rowS { 740 | bb := r.Bytes() 741 | if string(bb) != expectedS { 742 | t.Fatalf("unexpected bytes at col #%d, row #%d: %q. Expecting %q", j+1, i+1, bb, expectedS) 743 | } 744 | if r.Error() != nil { 745 | t.Fatalf("unexpected error after reading col #%d, row #%d: %s", j+1, i+1, r.Error()) 746 | } 747 | if r.row != i+1 { 748 | t.Fatalf("unexpected row number: %d. Expecting %d", r.row, i+1) 749 | } 750 | if r.col != j+1 { 751 | t.Fatalf("unexpected col number on row #%d: %d. Expecting %d", i+1, r.col, j+1) 752 | } 753 | } 754 | } 755 | } 756 | 757 | // slowSource returns data by small chunks. 758 | type slowSource struct { 759 | s []byte 760 | } 761 | 762 | func (ss *slowSource) Read(p []byte) (int, error) { 763 | if len(ss.s) == 0 { 764 | return 0, io.EOF 765 | } 766 | 767 | chunkSize := rand.Intn(10) + 1 768 | if chunkSize > len(ss.s) { 769 | chunkSize = len(ss.s) 770 | } 771 | n := copy(p, ss.s[:chunkSize]) 772 | ss.s = ss.s[n:] 773 | return n, nil 774 | } 775 | 776 | func TestReaderUintSuccess(t *testing.T) { 777 | testReaderUintSuccess(t, (1<