├── README.md ├── basic.go ├── comma-separated-string.go ├── filetoread.txt ├── reading-chunkwise-multiple.go ├── reading-chunkwise.go ├── scanner-example.go ├── scanner-word-list-grow.go └── scanner.go /README.md: -------------------------------------------------------------------------------- 1 | Reading files in Go 2 | ------------------- 3 | 4 | 5 | This repo contains runnable files that accompany the blog post ["Reading 6 | Files in Go"](http://kgrz.io/reading-files-in-go-an-overview.html). 7 | 8 | The code style is not representative of idiomatic Go. There is pretty 9 | much no useful error handling. 10 | 11 | 12 | Usage: 13 | 14 | 15 | Run each file as `go run `. 16 | -------------------------------------------------------------------------------- /basic.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | ) 7 | 8 | func main() { 9 | file, err := os.Open("filetoread.txt") 10 | if err != nil { 11 | fmt.Println(err) 12 | return 13 | } 14 | defer file.Close() 15 | 16 | fileinfo, err := file.Stat() 17 | if err != nil { 18 | fmt.Println(err) 19 | return 20 | } 21 | 22 | filesize := fileinfo.Size() 23 | buffer := make([]byte, filesize) 24 | 25 | bytesread, err := file.Read(buffer) 26 | if err != nil { 27 | fmt.Println(err) 28 | return 29 | } 30 | 31 | fmt.Println("bytes read: ", bytesread) 32 | fmt.Println("bytestream to string: ", string(buffer)) 33 | } 34 | -------------------------------------------------------------------------------- /comma-separated-string.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "strings" 8 | ) 9 | 10 | func main() { 11 | csvstring := "name, age, occupation" 12 | 13 | // An anonymous function declaration to avoid repeating main() 14 | ScanCSV := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 15 | commaidx := bytes.IndexByte(data, ',') 16 | if commaidx > 0 { 17 | // we need to return the next position 18 | buffer := data[:commaidx] 19 | return commaidx + 1, bytes.TrimSpace(buffer), nil 20 | } 21 | 22 | // if we are at the end of the string, just return the entire buffer 23 | if atEOF { 24 | // but only do that when there is some data. If not, this might mean 25 | // that we've reached the end of our input CSV string 26 | if len(data) > 0 { 27 | return len(data), bytes.TrimSpace(data), nil 28 | } 29 | } 30 | 31 | // when 0, nil, nil is returned, this is a signal to the interface to read 32 | // more data in from the input reader. In this case, this input is our 33 | // string reader and this pretty much will never occur. 34 | return 0, nil, nil 35 | } 36 | 37 | scanner := bufio.NewScanner(strings.NewReader(csvstring)) 38 | scanner.Split(ScanCSV) 39 | 40 | for scanner.Scan() { 41 | fmt.Println(scanner.Text()) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /filetoread.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod 2 | tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At 3 | vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, 4 | no sea takimata sanctus est Lorem ipsum dolor sit amet. 5 | -------------------------------------------------------------------------------- /reading-chunkwise-multiple.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "sync" 7 | ) 8 | 9 | type chunk struct { 10 | bufsize int 11 | offset int64 12 | } 13 | 14 | func main() { 15 | const BufferSize = 100 16 | file, err := os.Open("filetoread.txt") 17 | if err != nil { 18 | fmt.Println(err) 19 | return 20 | } 21 | defer file.Close() 22 | 23 | fileinfo, err := file.Stat() 24 | if err != nil { 25 | fmt.Println(err) 26 | return 27 | } 28 | 29 | filesize := int(fileinfo.Size()) 30 | // Number of go routines we need to spawn. 31 | concurrency := filesize / BufferSize 32 | // buffer sizes that each of the go routine below should use. ReadAt 33 | // returns an error if the buffer size is larger than the bytes returned 34 | // from the file. 35 | chunksizes := make([]chunk, concurrency) 36 | 37 | // All buffer sizes are the same in the normal case. Offsets depend on the 38 | // index. Second go routine should start at 100, for example, given our 39 | // buffer size of 100. 40 | for i := 0; i < concurrency; i++ { 41 | chunksizes[i].bufsize = BufferSize 42 | chunksizes[i].offset = int64(BufferSize * i) 43 | } 44 | 45 | // check for any left over bytes. Add the residual number of bytes as the 46 | // the last chunk size. 47 | if remainder := filesize % BufferSize; remainder != 0 { 48 | c := chunk{bufsize: remainder, offset: int64(concurrency * BufferSize)} 49 | concurrency++ 50 | chunksizes = append(chunksizes, c) 51 | } 52 | 53 | var wg sync.WaitGroup 54 | wg.Add(concurrency) 55 | 56 | for i := 0; i < concurrency; i++ { 57 | go func(chunksizes []chunk, i int) { 58 | defer wg.Done() 59 | 60 | chunk := chunksizes[i] 61 | buffer := make([]byte, chunk.bufsize) 62 | bytesread, err := file.ReadAt(buffer, chunk.offset) 63 | 64 | if err != nil { 65 | fmt.Println(err) 66 | return 67 | } 68 | 69 | fmt.Println("bytes read, string(bytestream): ", bytesread) 70 | fmt.Println("bytestream to string: ", string(buffer)) 71 | }(chunksizes, i) 72 | } 73 | 74 | wg.Wait() 75 | } 76 | -------------------------------------------------------------------------------- /reading-chunkwise.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | ) 8 | 9 | func main() { 10 | const BufferSize = 100 11 | file, err := os.Open("filetoread.txt") 12 | if err != nil { 13 | fmt.Println(err) 14 | return 15 | } 16 | defer file.Close() 17 | 18 | buffer := make([]byte, BufferSize) 19 | 20 | for { 21 | bytesread, err := file.Read(buffer) 22 | 23 | // err value can be io.EOF, which means that we reached the end of 24 | // file, and we have to terminate the loop. Note the fmt.Println lines 25 | // will get executed for the last chunk because the io.EOF gets 26 | // returned from the Read function only on the *next* iteration, and 27 | // the bytes returned will be 0 on that read. 28 | if err != nil { 29 | if err != io.EOF { 30 | fmt.Println(err) 31 | } 32 | 33 | break 34 | } 35 | 36 | fmt.Println("bytes read: ", bytesread) 37 | fmt.Println("bytestream to string: ", string(buffer[:bytesread])) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /scanner-example.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | func main() { 10 | file, err := os.Open("filetoread.txt") 11 | if err != nil { 12 | fmt.Println(err) 13 | return 14 | } 15 | defer file.Close() 16 | 17 | scanner := bufio.NewScanner(file) 18 | scanner.Split(bufio.ScanLines) 19 | 20 | // Returns a boolean based on whether there's a next instance of `\n` 21 | // character in the IO stream. This step also advances the internal pointer 22 | // to the next position (after '\n') if it did find that token. 23 | read := scanner.Scan() 24 | 25 | if read { 26 | fmt.Println("read byte array: ", scanner.Bytes()) 27 | fmt.Println("read string: ", scanner.Text()) 28 | } 29 | 30 | // goto line number 30 and repeat 31 | } 32 | -------------------------------------------------------------------------------- /scanner-word-list-grow.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | func main() { 10 | file, err := os.Open("filetoread.txt") 11 | if err != nil { 12 | fmt.Println(err) 13 | return 14 | } 15 | defer file.Close() 16 | 17 | scanner := bufio.NewScanner(file) 18 | scanner.Split(bufio.ScanWords) 19 | 20 | // initial size of our wordlist 21 | bufferSize := 50 22 | words := make([]string, bufferSize) 23 | pos := 0 24 | 25 | for scanner.Scan() { 26 | if err := scanner.Err(); err != nil { 27 | // This error is a non-EOF error. End the iteration if we encounter 28 | // an error 29 | fmt.Println(err) 30 | break 31 | } 32 | 33 | words[pos] = scanner.Text() 34 | pos++ 35 | 36 | if pos >= len(words) { 37 | // expand the buffer by 100 again 38 | newbuf := make([]string, bufferSize) 39 | words = append(words, newbuf...) 40 | } 41 | } 42 | 43 | fmt.Println("word list:") 44 | // we are iterating only until the value of "pos" because our buffer size 45 | // might be more than the number of words because we increase the length by 46 | // a constant value. Or the scanner loop might've terminated due to an 47 | // error prematurely. In this case the "pos" contains the index of the last 48 | // successful update. 49 | for _, word := range words[:pos] { 50 | fmt.Println(word) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /scanner.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | func main() { 10 | file, err := os.Open("filetoread.txt") 11 | if err != nil { 12 | fmt.Println(err) 13 | return 14 | } 15 | defer file.Close() 16 | 17 | scanner := bufio.NewScanner(file) 18 | scanner.Split(bufio.ScanLines) 19 | 20 | // This is our buffer now 21 | var lines []string 22 | 23 | for scanner.Scan() { 24 | lines = append(lines, scanner.Text()) 25 | } 26 | 27 | fmt.Println("read lines:") 28 | for _, line := range lines { 29 | fmt.Println(line) 30 | } 31 | } 32 | --------------------------------------------------------------------------------