├── .env ├── .gitignore └── src ├── fileiter └── fileiter.go ├── mapreduce └── mapreduce.go └── wordcount └── wordcount.go /.env: -------------------------------------------------------------------------------- 1 | # install https://github.com/kennethreitz/autoenv - it will change your life 2 | export GOPATH=$(pwd) 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /pkg 3 | -------------------------------------------------------------------------------- /src/fileiter/fileiter.go: -------------------------------------------------------------------------------- 1 | package fileiter 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "os" 7 | ) 8 | 9 | func EachLine(filename string) chan string { 10 | output := make(chan string) 11 | go func() { 12 | file, err := os.Open(filename) 13 | if err != nil { 14 | return 15 | } 16 | defer file.Close() 17 | reader := bufio.NewReader(file) 18 | for { 19 | line, err := reader.ReadString('\n') 20 | output <- line 21 | if err == io.EOF { 22 | break 23 | } 24 | } 25 | close(output) 26 | }() 27 | return output 28 | } 29 | -------------------------------------------------------------------------------- /src/mapreduce/mapreduce.go: -------------------------------------------------------------------------------- 1 | package mapreduce 2 | 3 | func MapReduce(mapper func(interface{}, chan interface{}), 4 | reducer func(chan interface{}, chan interface{}), 5 | input chan interface{}, 6 | pool_size int) interface{} { 7 | reduce_input := make(chan interface{}) 8 | reduce_output := make(chan interface{}) 9 | worker_output := make(chan chan interface{}, pool_size) 10 | go reducer(reduce_input, reduce_output) 11 | go func() { 12 | for worker_chan := range worker_output { 13 | reduce_input <- <-worker_chan 14 | } 15 | close(reduce_input) 16 | }() 17 | go func() { 18 | for item := range input { 19 | my_chan := make(chan interface{}) 20 | go mapper(item, my_chan) 21 | worker_output <- my_chan 22 | } 23 | close(worker_output) 24 | }() 25 | return <-reduce_output 26 | } 27 | -------------------------------------------------------------------------------- /src/wordcount/wordcount.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fileiter" 5 | "fmt" 6 | "mapreduce" 7 | "os" 8 | "regexp" 9 | ) 10 | 11 | func find_files(dirname string) chan interface{} { 12 | output := make(chan interface{}) 13 | go func() { 14 | _find_files(dirname, output) 15 | close(output) 16 | }() 17 | return output 18 | } 19 | 20 | func _find_files(dirname string, output chan interface{}) { 21 | dir, _ := os.Open(dirname) 22 | dirnames, _ := dir.Readdirnames(-1) 23 | for i := 0; i < len(dirnames); i++ { 24 | fullpath := dirname + "/" + dirnames[i] 25 | file, _ := os.Stat(fullpath) 26 | if file.IsDir() { 27 | _find_files(fullpath, output) 28 | } else { 29 | output <- fullpath 30 | } 31 | } 32 | } 33 | 34 | func wordcount(filename interface{}, output chan interface{}) { 35 | results := map[string]int{} 36 | wordsRE := regexp.MustCompile(`[A-Za-z0-9_]*`) 37 | for line := range fileiter.EachLine(filename.(string)) { 38 | for _, match := range wordsRE.FindAllString(line, -1) { 39 | results[match]++ 40 | } 41 | } 42 | output <- results 43 | } 44 | 45 | func reducer(input chan interface{}, output chan interface{}) { 46 | results := map[string]int{} 47 | for new_matches := range input { 48 | for key, value := range new_matches.(map[string]int) { 49 | previous_count, exists := results[key] 50 | if !exists { 51 | results[key] = value 52 | } else { 53 | results[key] = previous_count + value 54 | } 55 | } 56 | } 57 | output <- results 58 | } 59 | 60 | func main() { 61 | fmt.Print(mapreduce.MapReduce(wordcount, reducer, find_files("."), 20)) 62 | } 63 | --------------------------------------------------------------------------------