├── .gitignore ├── .idea ├── misc.xml ├── modules.xml ├── optimizing-go.iml ├── vcs.xml ├── watcherTasks.xml └── workspace.xml ├── code ├── benchmarks │ └── reportAllocs │ │ ├── a_test.go │ │ └── s.sh ├── bounds-check │ ├── a.go │ ├── b.go │ ├── c.go │ ├── d.go │ ├── e.go │ ├── f.go │ ├── g.go │ ├── h.go │ ├── i.go │ └── readme.md ├── coredump │ ├── coredump.go │ └── coredump.md ├── cover │ ├── cover.go │ ├── cover_test.go │ └── readme.md ├── defer │ └── defer_test.go ├── easyjson │ ├── main.go │ ├── main_easyjson.go │ ├── main_test.go │ └── readme.md ├── escape-analysis │ ├── 1.go │ ├── main.c │ └── readme.md ├── file-io │ └── 1-file-io_test.go ├── fmt │ └── main_test.go ├── gogc │ └── mergesort.go ├── gomaxprocs │ ├── 1.go │ ├── mergesort.go │ └── mergesort_test.go ├── inline │ ├── inline.go │ └── readme.md ├── map-access │ └── 1-map_test.go ├── panic │ ├── a-panic-program.go │ └── a-panic-program.md ├── parallelize │ └── rand_strings_test.go ├── profiler-labels │ └── 1_test.go ├── profiler │ ├── main.go │ ├── main_test.go │ └── readme.md ├── regex │ └── 1-regex-compile_test.go ├── responsewriter │ └── main_test.go ├── slices │ ├── 1-array.go │ ├── 2-slice-of-array.go │ ├── 3-slice-backed-by-array.go │ ├── 4-appending-to-slice.go │ └── 5-make-slice.go ├── stack-and-heap │ ├── h.go │ ├── h_test.go │ ├── s.go │ └── s_test.go ├── string-concat │ └── 1-string-concat_test.go ├── sync-once │ ├── 1.go │ ├── 2.go │ └── 3.go ├── sync.pool │ ├── 1_test.go │ ├── 2_test.go │ ├── book1_test.go │ └── book2_test.go ├── testing │ └── search_test.go └── tracing │ ├── mergesort.go │ └── mergesort_test.go ├── images ├── gogc │ ├── gogc-100.png │ ├── gogc-200.png │ ├── gogc-50.png │ └── gogc-off.png ├── gomaxprocs │ ├── gomaxprocs-1.png │ ├── gomaxprocs-18.png │ └── gomaxprocs-8.png └── tracing │ ├── 1-OS-process-and-its-threads.png │ ├── 2-goroutines-on-a-thread.png │ ├── 3-goroutines-on-a-blocking-thread.png │ ├── 4-concurrency-and-parallelism.png │ ├── tracing-gc.png │ ├── view-goroutine.png │ └── view-trace.png └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.out 2 | **/*.o 3 | **/*.prof 4 | **/*.pprof 5 | **/*.test 6 | **/*.trace 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/optimizing-go.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/watcherTasks.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 16 | 28 | 29 | 40 | 52 | 53 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 15 | 16 | 21 | 22 | 23 | 24 | OpenFile 25 | 26 | 27 | 28 | 29 | 31 | 32 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 1558416963997 91 | 100 | 101 | 102 | 103 | 105 | 106 | 108 | 109 | 120 | 121 | -------------------------------------------------------------------------------- /code/benchmarks/reportAllocs/a_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | const size = 2000000 6 | 7 | func f() ([size]int, [size]int) { 8 | a := [size]int{} 9 | b := [size]int{} 10 | a[19] = 100 11 | return a, b 12 | } 13 | 14 | func f2() [size]int { 15 | a := [size]int{} 16 | a[19] = 100 17 | return a 18 | } 19 | 20 | func BenchmarkHelloWorld(b *testing.B) { 21 | // t.Fatal("not implemented") 22 | b.ReportAllocs() 23 | for i := 0; i < b.N; i++ { 24 | a := f2() 25 | _ = a 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /code/benchmarks/reportAllocs/s.sh: -------------------------------------------------------------------------------- 1 | go test -run=. -bench=. -cpuprofile=cpu.out -benchmem -memprofile=mem.out -trace trace.out 2 | go tool pprof -pdf $FILENAME.test cpu.out > cpu.pdf && open cpu.pdf 3 | go tool pprof -pdf --alloc_space $FILENAME.test mem.out > alloc_space.pdf && open alloc_space.pdf 4 | go tool pprof -pdf --alloc_objects $FILENAME.test mem.out > alloc_objects.pdf && open alloc_objects.pdf 5 | go tool pprof -pdf --inuse_space $FILENAME.test mem.out > inuse_space.pdf && open inuse_space.pdf 6 | go tool pprof -pdf --inuse_objects $FILENAME.test mem.out > inuse_objects.pdf && open inuse_objects.pdf 7 | go tool trace trace.out 8 | 9 | go-torch $FILENAME.test cpu.out -f ${FILENAME}_cpu.svg && open ${FILENAME}_cpu.svg 10 | go-torch --alloc_objects $FILENAME.test mem.out -f ${FILENAME}_alloc_obj.svg && open ${FILENAME}_alloc_obj.svg 11 | go-torch --alloc_space $FILENAME.test mem.out -f ${FILENAME}_alloc_space.svg && open ${FILENAME}_alloc_space.svg 12 | go-torch --inuse_objects $FILENAME.test mem.out -f ${FILENAME}_inuse_obj.svg && open ${FILENAME}_inuse_obj.svg 13 | go-torch --inuse_space $FILENAME.test mem.out -f ${FILENAME}_inuse_space.svg && open ${FILENAME}_inuse_space.svg 14 | 15 | # For live data 16 | 17 | go-torch -u http://localhost:8080 --seconds 32 -f ${FILENAME}_live.svg && open ${FILENAME}_live.svg 18 | 19 | # 20 | 21 | go tool pprof -cum cpu.out 22 | go tool pprof -cum --alloc_space mem.out 23 | go tool pprof -cum --alloc_objects mem.out 24 | go tool pprof -cum --inuse_space mem.out 25 | go tool pprof -cum --inuse_objects mem.out 26 | 27 | # 28 | 29 | go tool pprof $FILENAME.test cpu.out 30 | # (pprof) list 31 | 32 | # 33 | 34 | rm alloc_space.pdf alloc_objects.pdf inuse_space.pdf inuse_objects.pdf cpu.out cpu.pdf mem.out $FILENAME.test ${FILENAME}_cpu.svg ${FILENAME}_alloc_obj.svg ${FILENAME}_alloc_space.svg ${FILENAME}_inuse_obj.svg ${FILENAME}_inuse_space.svg ${FILENAME}_live.svg trace.out 35 | 36 | -------------------------------------------------------------------------------- /code/bounds-check/a.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func a(a []int) { 4 | n := 6 5 | _ = a[n] 6 | } 7 | -------------------------------------------------------------------------------- /code/bounds-check/b.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func b(b [5]int) { 4 | n := len(b) - 1 5 | _ = b[n] 6 | } 7 | -------------------------------------------------------------------------------- /code/bounds-check/c.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func c(b []int) { 4 | n := len(b) - 1 5 | _ = b[n] 6 | } 7 | -------------------------------------------------------------------------------- /code/bounds-check/d.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func d(b []byte) { 4 | for i := 0; i < len(b); i++ { 5 | b[i] = 9 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /code/bounds-check/e.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func e(b []byte, n int) { 4 | for i := 0; i < n; i++ { 5 | b[i] = 9 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /code/bounds-check/f.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func f(b []byte, n int) { 4 | _ = b[n-1] 5 | for i := 0; i < n; i++ { 6 | b[i] = 9 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /code/bounds-check/g.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func g1(b []byte, v uint32) { 6 | b[0] = byte(v + 48) 7 | b[1] = byte(v + 49) 8 | b[2] = byte(v + 50) 9 | b[3] = byte(v + 51) 10 | fmt.Println(b) 11 | } 12 | 13 | func g2(b []byte, v uint32) { 14 | b[3] = byte(v + 51) 15 | b[0] = byte(v + 48) 16 | b[1] = byte(v + 49) 17 | b[2] = byte(v + 50) 18 | fmt.Println(b) 19 | } 20 | 21 | func main() { 22 | b := make([]byte, 4) 23 | g1(b, 10) 24 | g2(b, 10) 25 | } 26 | -------------------------------------------------------------------------------- /code/bounds-check/h.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func h1(b []byte, n int) { 6 | b[n+0] = byte(1) // Found IsInBounds 7 | b[n+1] = byte(2) // Found IsInBounds 8 | b[n+2] = byte(3) // Found IsInBounds 9 | b[n+3] = byte(4) // Found IsInBounds 10 | b[n+4] = byte(5) // Found IsInBounds 11 | b[n+5] = byte(6) // Found IsInBounds 12 | fmt.Println("in h1(): ", b) 13 | } 14 | 15 | func h2(b []byte, n int) { 16 | b = b[n : n+6] // Found IsSliceInBounds 17 | b[0] = byte(1) 18 | b[1] = byte(2) 19 | b[2] = byte(3) 20 | b[3] = byte(4) 21 | b[4] = byte(5) 22 | b[5] = byte(6) 23 | fmt.Println("in h2(): ", b) 24 | } 25 | 26 | func main() { 27 | b := make([]byte, 20) 28 | h1(b, 10) 29 | fmt.Println("in main: ", b) 30 | h2(b, 10) 31 | fmt.Println("in main: ", b) 32 | } 33 | -------------------------------------------------------------------------------- /code/bounds-check/i.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func i1(a, b, c []byte) { 4 | for i := range a { 5 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds 6 | } 7 | } 8 | 9 | func i2(a, b, c []byte) { 10 | _ = b[len(a)-1] // Found IsInBounds 11 | _ = c[len(a)-1] // Found IsInBounds 12 | for i := range a { 13 | a[i] = b[i] + c[i] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /code/bounds-check/readme.md: -------------------------------------------------------------------------------- 1 | Let's compare what the compiled output of these two fairly similar programs are. 2 | 3 | ``` 4 | // a.go 5 | 3 func a(a []int) { 6 | 4 n := 6 7 | 5 _ = a[n] 8 | 6 } 9 | ``` 10 | 11 | ``` 12 | // b.go 13 | 3 func b(b [5]int) { 14 | 4 n := len(b) - 1 15 | 5 _ = b[n] 16 | 6 } 17 | ``` 18 | 19 | ``` 20 | $ go tool compile -S a.go > a.co 21 | $ go tool compile -S b.go > b.co 22 | $ vimdiff a.co b.co 23 | ``` 24 | 25 | ``` 26 | "".a STEXT nosplit size=39 args=0x18 locals=0x8 27 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $8-24 28 | (a.go:3) SUBQ $8, SP 29 | (a.go:3) MOVQ BP, (SP) 30 | (a.go:3) LEAQ (SP), BP 31 | (a.go:3) FUNCDATA $0, gclocals·1a65... 32 | (a.go:3) FUNCDATA $1, gclocals·69c1... 33 | (a.go:3) FUNCDATA $3, gclocals·33cd... 34 | (a.go:5) PCDATA $2, $0 35 | (a.go:5) PCDATA $0, $1 36 | (a.go:5) MOVQ "".a+24(SP), AX 37 | (a.go:5) CMPQ AX, $6 38 | (a.go:5) JLS 32 39 | (a.go:6) PCDATA $2, $-2 40 | (a.go:6) PCDATA $0, $-2 41 | (a.go:6) MOVQ (SP), BP 42 | (a.go:6) ADDQ $8, SP 43 | (a.go:6) RET 44 | (a.go:5) PCDATA $2, $0 45 | (a.go:5) PCDATA $0, $1 46 | (a.go:5) CALL runtime.panicindex(SB) 47 | (a.go:5) UNDEF 48 | 0x0000 48 83 ec 08 48 89 2c 24 48 8d 2c 24 48 8b 44 24 H...H.,$H.,$H.D$ 49 | 0x0010 18 48 83 f8 06 76 09 48 8b 2c 24 48 83 c4 08 c3 .H...v.H.,$H.... 50 | 0x0020 e8 00 00 00 00 0f 0b ....... 51 | rel 33+4 t=8 runtime.panicindex+0 52 | ``` 53 | 54 | ``` 55 | // b.co 56 | "".b STEXT nosplit size=1 args=0x28 locals=0x0 57 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $0-40 58 | (b.go:3) FUNCDATA $0, gclocals·33cd... 59 | (b.go:3) FUNCDATA $1, gclocals·33cd... 60 | (b.go:3) FUNCDATA $3, gclocals·33cd... 61 | (b.go:6) RET 62 | ``` 63 | 64 | There seems to be way more happening in a.go than in b.go - about 20+ lines more, which seems surprising. 65 | 66 | A little too much though. That's probably because of optimizations by the compiler. Let's remove those with the -N option. 67 | 68 | ``` 69 | $ go tool compile -S -N a.go > a.co 70 | $ go tool compile -S -N b.go > b.co 71 | $ vimdiff a.co b.co 72 | ``` 73 | 74 | ``` 75 | "".a STEXT nosplit size=49 args=0x18 locals=0x10 76 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $16-24 77 | (a.go:3) SUBQ $16, SP 78 | (a.go:3) MOVQ BP, 8(SP) 79 | (a.go:3) LEAQ 8(SP), BP 80 | (a.go:3) FUNCDATA $0, gclocals·1a65... 81 | (a.go:3) FUNCDATA $1, gclocals·69c1... 82 | (a.go:3) FUNCDATA $3, gclocals·33cd... 83 | (a.go:4) PCDATA $2, $0 84 | (a.go:4) PCDATA $0, $0 85 | (a.go:4) MOVQ $6, "".n(SP) 86 | (a.go:5) PCDATA $0, $1 87 | (a.go:5) CMPQ "".a+32(SP), $6 88 | (a.go:5) JHI 32 89 | (a.go:5) JMP 42 90 | (a.go:6) PCDATA $2, $-2 91 | (a.go:6) PCDATA $0, $-2 92 | (a.go:6) MOVQ 8(SP), BP 93 | (a.go:6) ADDQ $16, SP 94 | (a.go:6) RET 95 | (a.go:5) PCDATA $2, $0 96 | (a.go:5) PCDATA $0, $1 97 | (a.go:5) CALL runtime.panicindex(SB) 98 | (a.go:5) UNDEF 99 | 0x0000 48 83 ... 100 | 0x0010 04 24 ... 101 | 0x0020 48 8b ... 102 | 0x0030 0b 103 | rel 43+4 t=8 runtime.panicindex+0 104 | ``` 105 | 106 | ``` 107 | "".b STEXT nosplit size=34 args=0x28 locals=0x10 108 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $16-40 109 | (b.go:3) SUBQ $16, SP 110 | (b.go:3) MOVQ BP, 8(SP) 111 | (b.go:3) LEAQ 8(SP), BP 112 | (b.go:3) FUNCDATA $0, gclocals·33cd... 113 | (b.go:3) FUNCDATA $1, gclocals·33cd... 114 | (b.go:3) FUNCDATA $3, gclocals·33cd... 115 | (b.go:4) PCDATA $2, $0 116 | (b.go:4) PCDATA $0, $0 117 | (b.go:4) MOVQ $4, "".n(SP) 118 | (b.go:5) JMP 24 119 | (b.go:6) PCDATA $2, $-2 120 | (b.go:6) PCDATA $0, $-2 121 | (b.go:6) MOVQ 8(SP), BP 122 | (b.go:6) ADDQ $16, SP 123 | (b.go:6) RET 124 | 0x0000 48 83 ... 125 | 0x0010 04 24 ... 126 | 0x0020 10 c3 127 | ``` 128 | 129 | Even without the optimizations, there are more instructions that the CPU has to run in the case of a.go {n:=6} more than b.go {n:=len(b)-1}. 130 | 131 | There are some interesting differences between the two. The {n:=6} version has a compare statement (CMPQ) and panic statements (runtime.panicindex) while the other version does not have them. 132 | 133 | Let's also compile both with another option and see if we get any clues there. 134 | 135 | ``` 136 | $ go tool compile -d=ssa/check_bce/debug=1 a.go 137 | a.go:5:7: Found IsInBounds 138 | 139 | $ go tool compile -d=ssa/check_bce/debug=1 b.go 140 | ``` 141 | 142 | So, the compile tool shows no output with this option for b.go while a.go says "Found IsInBounds" at line number 5 (\_ = a[n]). 143 | 144 | ### Bounds Check Elimination (bce) 145 | From Wikipedia: bounds-checking elimination is a compiler optimization useful in programming languages or runtimes that enforce bounds checking, the practice of checking every index into an array to verify that the index is within the defined valid range of indexes. Its goal is to detect which of these indexing operations do not need to be validated at runtime, and eliminating those checks. 146 | 147 | When arrays and slices are being accessed, grow provides safety by checking that the index is valid. This implies additional instructions. A language like C does not have this check; instead it is upto the programmer to add it if required or not do it at their own risk. 148 | 149 | Go provides the check but is able to eliminate in certain cases when it is able to prove that the index being accessed is within the allowed range. 150 | 151 | In the function ```func a(a []int) { n := 6; _ = a[n] }```, Go is not able to prove at compile time that the index 6 will be in the slice that is passed. However, in the function ```func b(b [5]int) { n := len(b) - 1; _ = b[n] }```, it is guaranted that the index will be within the length of the array of size 5. Thus Go is able to optimize by eliminating the bounds check. 152 | 153 | Exercise: What if we passed a slice into b.go instead of an array. Is there a bounds check still? Why or why not? 154 | See c.go 155 | 156 | ``` 157 | 3 func c(b []int) { 158 | 4 n := len(b) - 1 159 | 5 _ = b[n] 160 | 6 } 161 | ``` 162 | 163 | ``` 164 | $ go tool compile -d=ssa/check_bce/debug=1 c.go 165 | c.go:5:7: Found IsInBounds 166 | ``` 167 | 168 | What is the bce output of the case below? will the compiler be able to eliminate the bounds check? 169 | 170 | ``` 171 | // d.go 172 | func d(b []byte) { 173 | for i := 0; i < len(b); i++ { 174 | b[i] = 9 175 | } 176 | } 177 | ``` 178 | 179 | ``` 180 | $ go tool compile -d=ssa/check_bce/debug=1 d.go 181 | ``` 182 | 183 | When it is definite that the index will not receive a value outside of its size (on either end), then bce can happen. 184 | 185 | ### Providing bce Hints 186 | 187 | Example 1 188 | 189 | ``` 190 | // e.go 191 | 3 func e(b []byte, n int) { 192 | 4 for i := 0; i < n; i++ { 193 | 5 b[i] = 9 194 | 6 } 195 | 7 } 196 | ``` 197 | 198 | ``` 199 | $ go tool compile -d=ssa/check_bce/debug=1 d.go 200 | d.go:5:8: Found IsInBounds 201 | ``` 202 | 203 | Give that this is running inside a loop, the bce will run as many times. Is there a way to reduce this? Probably something outside the loop and prior? 204 | 205 | ``` 206 | // f.go 207 | 3 func f(b []byte, n int) { 208 | 4 _ = b[n-1] 209 | 5 for i := 0; i < n; i++ { 210 | 6 b[i] = 9 211 | 7 } 212 | 8 } 213 | ``` 214 | 215 | ``` 216 | $ go tool compile -d=ssa/check_bce/debug=1 e.go 217 | e.go:4:7: Found IsInBounds 218 | ``` 219 | 220 | Having done the check once outside, we are able to eliminate the remaining checks in the loop. 221 | 222 | 223 | How about this one? There are 4 bounds checks. Can we reduce them? 224 | 225 | Example 2 226 | 227 | ``` 228 | // g.go 229 | func g1(b []byte, v uint32) { 230 | b[0] = byte(v + 48) // Found IsInBounds 231 | b[1] = byte(v + 49) // Found IsInBounds 232 | b[2] = byte(v + 50) // Found IsInBounds 233 | b[3] = byte(v + 51) // Found IsInBounds 234 | } 235 | ``` 236 | 237 | ``` 238 | // g.go 239 | func g2(b []byte, v uint32) { 240 | b[3] = byte(v + 51) // Found IsInBounds 241 | b[0] = byte(v + 48) 242 | b[1] = byte(v + 49) 243 | b[2] = byte(v + 50) 244 | } 245 | ``` 246 | 247 | Example 3 248 | 249 | ``` 250 | // h.go 251 | func h1(b []byte, n int) { 252 | b[n+0] = byte(1) // Found IsInBounds 253 | b[n+1] = byte(2) // Found IsInBounds 254 | b[n+2] = byte(3) // Found IsInBounds 255 | b[n+3] = byte(4) // Found IsInBounds 256 | b[n+4] = byte(5) // Found IsInBounds 257 | b[n+5] = byte(6) // Found IsInBounds 258 | } 259 | ``` 260 | 261 | ``` 262 | func h2(b []byte, n int) { 263 | b = b[n : n+6] // Found IsSliceInBounds 264 | b[0] = byte(1) 265 | b[1] = byte(2) 266 | b[2] = byte(3) 267 | b[3] = byte(4) 268 | b[4] = byte(5) 269 | b[5] = byte(6) 270 | } 271 | ``` 272 | 273 | Example 4 274 | 275 | ``` 276 | func i1(a, b, c []byte) { 277 | for i := range a { 278 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds 279 | } 280 | } 281 | ``` 282 | 283 | ``` 284 | func i2(a, b, c []byte) { 285 | _ = b[len(a)-1] // Found IsInBounds 286 | _ = c[len(a)-1] // Found IsInBounds 287 | for i := range a { 288 | a[i] = b[i] + c[i] 289 | } 290 | } 291 | ``` 292 | 293 | -------------------------------------------------------------------------------- /code/coredump/coredump.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/http" 7 | ) 8 | 9 | func main() { 10 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 11 | fmt.Fprint(w, "hello world\n") 12 | }) 13 | log.Fatal(http.ListenAndServe("localhost:7777", nil)) 14 | } 15 | -------------------------------------------------------------------------------- /code/coredump/coredump.md: -------------------------------------------------------------------------------- 1 | Code that we can test a cordump with. 2 | 3 | Works on: Linux only 4 | 5 | Ref: 6 | https://rakyll.org/coredumps/ 7 | -------------------------------------------------------------------------------- /code/cover/cover.go: -------------------------------------------------------------------------------- 1 | package size 2 | 3 | func Size(a int) string { 4 | switch { 5 | case a < 0: 6 | return "negative" 7 | case a == 0: 8 | return "zero" 9 | case a < 10: 10 | return "small" 11 | case a < 100: 12 | return "big" 13 | case a < 1000: 14 | return "huge" 15 | } 16 | return "enormous" 17 | } 18 | -------------------------------------------------------------------------------- /code/cover/cover_test.go: -------------------------------------------------------------------------------- 1 | package size 2 | 3 | import "testing" 4 | 5 | type Test struct { 6 | in int 7 | out string 8 | } 9 | 10 | var tests = []Test{ 11 | {-1, "negative"}, 12 | {5, "small"}, 13 | } 14 | 15 | func TestSize(t *testing.T) { 16 | for i, test := range tests { 17 | size := Size(test.in) 18 | if size != test.out { 19 | t.Errorf("#%d: Size(%d)=%s; want %s", i, test.in, size, test.out) 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /code/cover/readme.md: -------------------------------------------------------------------------------- 1 | ``` 2 | go test -covermode=count -coverprofile=count.out fmt 3 | go tool cover -html=count.out 4 | ``` 5 | 6 | For current folder: 7 | ``` 8 | go test -covermode=count -coverprofile=count.out 9 | go tool cover -html=count.out 10 | ``` 11 | -------------------------------------------------------------------------------- /code/defer/defer_test.go: -------------------------------------------------------------------------------- 1 | package mydefer 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | ) 7 | 8 | type T struct { 9 | mu sync.Mutex 10 | n int64 11 | } 12 | 13 | var t T 14 | 15 | func (t *T) CounterA() int64 { 16 | t.mu.Lock() 17 | defer t.mu.Unlock() 18 | return t.n 19 | } 20 | 21 | func (t *T) CounterB() (count int64) { 22 | t.mu.Lock() 23 | count = t.n 24 | t.mu.Unlock() 25 | return 26 | } 27 | 28 | func (t *T) IncreaseA() { 29 | t.mu.Lock() 30 | defer t.mu.Unlock() 31 | t.n++ 32 | } 33 | 34 | func (t *T) IncreaseB() { 35 | t.mu.Lock() 36 | t.n++ // this line will not panic for sure 37 | t.mu.Unlock() 38 | } 39 | 40 | func Benchmark_CounterA(b *testing.B) { 41 | for i := 0; i < b.N; i++ { 42 | t.CounterA() 43 | } 44 | } 45 | 46 | func Benchmark_CounterB(b *testing.B) { 47 | for i := 0; i < b.N; i++ { 48 | t.CounterB() 49 | } 50 | } 51 | 52 | func Benchmark_IncreaseA(b *testing.B) { 53 | for i := 0; i < b.N; i++ { 54 | t.IncreaseA() 55 | } 56 | } 57 | 58 | func Benchmark_IncreaseB(b *testing.B) { 59 | for i := 0; i < b.N; i++ { 60 | t.IncreaseB() 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /code/easyjson/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | //easyjson:json 8 | type JSONData struct { 9 | Data []string 10 | } 11 | 12 | func unmarshaljsonFn() { 13 | var j JSONData 14 | json.Unmarshal([]byte(`{"Data" : ["One", "Two", "Three"]} `), &j) 15 | } 16 | 17 | func easyjsonFn() { 18 | d := &JSONData{} 19 | d.UnmarshalJSON([]byte(`{"Data" : ["One", "Two", "Three"]} `)) 20 | } 21 | -------------------------------------------------------------------------------- /code/easyjson/main_easyjson.go: -------------------------------------------------------------------------------- 1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. 2 | 3 | package main 4 | 5 | import ( 6 | json "encoding/json" 7 | 8 | easyjson "github.com/mailru/easyjson" 9 | jlexer "github.com/mailru/easyjson/jlexer" 10 | jwriter "github.com/mailru/easyjson/jwriter" 11 | ) 12 | 13 | // suppress unused package warning 14 | var ( 15 | _ *json.RawMessage 16 | _ *jlexer.Lexer 17 | _ *jwriter.Writer 18 | _ easyjson.Marshaler 19 | ) 20 | 21 | func easyjson89aae3efDecodeEasyjson(in *jlexer.Lexer, out *JSONData) { 22 | isTopLevel := in.IsStart() 23 | if in.IsNull() { 24 | if isTopLevel { 25 | in.Consumed() 26 | } 27 | in.Skip() 28 | return 29 | } 30 | in.Delim('{') 31 | for !in.IsDelim('}') { 32 | key := in.UnsafeString() 33 | in.WantColon() 34 | if in.IsNull() { 35 | in.Skip() 36 | in.WantComma() 37 | continue 38 | } 39 | switch key { 40 | case "Data": 41 | if in.IsNull() { 42 | in.Skip() 43 | out.Data = nil 44 | } else { 45 | in.Delim('[') 46 | if out.Data == nil { 47 | if !in.IsDelim(']') { 48 | out.Data = make([]string, 0, 4) 49 | } else { 50 | out.Data = []string{} 51 | } 52 | } else { 53 | out.Data = (out.Data)[:0] 54 | } 55 | for !in.IsDelim(']') { 56 | var v1 string 57 | v1 = string(in.String()) 58 | out.Data = append(out.Data, v1) 59 | in.WantComma() 60 | } 61 | in.Delim(']') 62 | } 63 | default: 64 | in.SkipRecursive() 65 | } 66 | in.WantComma() 67 | } 68 | in.Delim('}') 69 | if isTopLevel { 70 | in.Consumed() 71 | } 72 | } 73 | func easyjson89aae3efEncodeEasyjson(out *jwriter.Writer, in JSONData) { 74 | out.RawByte('{') 75 | first := true 76 | _ = first 77 | { 78 | const prefix string = ",\"Data\":" 79 | if first { 80 | first = false 81 | out.RawString(prefix[1:]) 82 | } else { 83 | out.RawString(prefix) 84 | } 85 | if in.Data == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 { 86 | out.RawString("null") 87 | } else { 88 | out.RawByte('[') 89 | for v2, v3 := range in.Data { 90 | if v2 > 0 { 91 | out.RawByte(',') 92 | } 93 | out.String(string(v3)) 94 | } 95 | out.RawByte(']') 96 | } 97 | } 98 | out.RawByte('}') 99 | } 100 | 101 | // MarshalJSON supports json.Marshaler interface 102 | func (v JSONData) MarshalJSON() ([]byte, error) { 103 | w := jwriter.Writer{} 104 | easyjson89aae3efEncodeEasyjson(&w, v) 105 | return w.Buffer.BuildBytes(), w.Error 106 | } 107 | 108 | // MarshalEasyJSON supports easyjson.Marshaler interface 109 | func (v JSONData) MarshalEasyJSON(w *jwriter.Writer) { 110 | easyjson89aae3efEncodeEasyjson(w, v) 111 | } 112 | 113 | // UnmarshalJSON supports json.Unmarshaler interface 114 | func (v *JSONData) UnmarshalJSON(data []byte) error { 115 | r := jlexer.Lexer{Data: data} 116 | easyjson89aae3efDecodeEasyjson(&r, v) 117 | return r.Error() 118 | } 119 | 120 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface 121 | func (v *JSONData) UnmarshalEasyJSON(l *jlexer.Lexer) { 122 | easyjson89aae3efDecodeEasyjson(l, v) 123 | } 124 | -------------------------------------------------------------------------------- /code/easyjson/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | func Benchmark_unmarshaljson(b *testing.B) { 6 | for i := 0; i < b.N; i++ { 7 | unmarshaljsonFn() 8 | } 9 | } 10 | 11 | func Benchmark_easyjson(b *testing.B) { 12 | for i := 0; i < b.N; i++ { 13 | easyjsonFn() 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /code/easyjson/readme.md: -------------------------------------------------------------------------------- 1 | This requires the path to be set properly first. The local code needs to be in GOPATH. The regular golang/gopath also needs to be available for the tool. 2 | 3 | Once that is set, run: 4 | ```easyjson -all main.go``` 5 | to get the generated file which ends in _easyjson.go 6 | -------------------------------------------------------------------------------- /code/escape-analysis/1.go: -------------------------------------------------------------------------------- 1 | // go build -gcflags='-m' 1.go 2 | // go build -gcflags='-m -l' 1.go to avoid inlining 3 | // go build -gcflags='-m -l -m' 1.go for verbose comments. 4 | 5 | package main 6 | 7 | /* 8 | func f() { 9 | var i = 5 10 | i++ 11 | _ = i 12 | } 13 | 14 | func f_returns() int { 15 | var i = 5 16 | i++ 17 | return i 18 | } 19 | */ 20 | 21 | func f_returns_ptr() *int { 22 | var i = 5 23 | i++ 24 | return &i 25 | } 26 | 27 | func main() { 28 | //f() 29 | //f_returns() 30 | f_returns_ptr() 31 | } 32 | -------------------------------------------------------------------------------- /code/escape-analysis/main.c: -------------------------------------------------------------------------------- 1 | // online c editor - https://onlinegdb.com/HySykSJoE 2 | 3 | #include 4 | 5 | int* f() { 6 | int a; 7 | a = 10; 8 | return &a; 9 | } 10 | 11 | void main() 12 | { 13 | int* p = f(); 14 | printf("p is: %x\n", p); // p is 0 15 | printf("*p is: %d\n", *p); // segmentation fault 16 | 17 | // 18 | } 19 | 20 | -------------------------------------------------------------------------------- /code/escape-analysis/readme.md: -------------------------------------------------------------------------------- 1 | * Returning a local variable in c would cause errors. But it is possible in Go. 2 | * "Note that, unlike in C, it’s perfectly OK to return the address of a local variable; the storage associated with the variable survives after the function returns." 3 | * 4 | 5 | Run: 6 | go run -gcflags '-m -l' 1.go 7 | 8 | References: 9 | [Escape Analysis in Go](https://scvalex.net/posts/29/) 10 | -------------------------------------------------------------------------------- /code/file-io/1-file-io_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "os" 7 | "testing" 8 | ) 9 | 10 | func BenchmarkWriteFile(b *testing.B) { 11 | for n := 0; n < b.N; n++ { 12 | f, err := os.Create("/tmp/test.txt") 13 | if err != nil { 14 | panic(err) 15 | } 16 | 17 | for i := 0; i < 100000; i++ { 18 | f.WriteString("some text!\n") 19 | } 20 | 21 | f.Close() 22 | } 23 | } 24 | 25 | func BenchmarkWriteFileBuffered(b *testing.B) { 26 | for n := 0; n < b.N; n++ { 27 | f, err := os.Create("/tmp/test.txt") 28 | if err != nil { 29 | panic(err) 30 | } 31 | 32 | w := bufio.NewWriter(f) 33 | 34 | for i := 0; i < 100000; i++ { 35 | w.WriteString("some text!\n") 36 | } 37 | 38 | w.Flush() 39 | f.Close() 40 | } 41 | } 42 | 43 | func BenchmarkReadFile(b *testing.B) { 44 | for n := 0; n < b.N; n++ { 45 | f, err := os.Open("/tmp/test.txt") 46 | if err != nil { 47 | panic(err) 48 | } 49 | 50 | b := make([]byte, 10) 51 | 52 | _, err = f.Read(b) 53 | for err == nil { 54 | _, err = f.Read(b) 55 | } 56 | if err != io.EOF { 57 | panic(err) 58 | } 59 | 60 | f.Close() 61 | } 62 | } 63 | 64 | func BenchmarkReadFileBuffered(b *testing.B) { 65 | for n := 0; n < b.N; n++ { 66 | f, err := os.Open("/tmp/test.txt") 67 | if err != nil { 68 | panic(err) 69 | } 70 | 71 | r := bufio.NewReader(f) 72 | 73 | _, err = r.ReadString('\n') 74 | for err == nil { 75 | _, err = r.ReadString('\n') 76 | } 77 | if err != io.EOF { 78 | panic(err) 79 | } 80 | 81 | f.Close() 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /code/fmt/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "testing" 7 | ) 8 | 9 | func fmtFn(i int) string { 10 | return fmt.Sprintf("%d", i) 11 | } 12 | 13 | func Benchmark_fmtFn(b *testing.B) { 14 | for i := 0; i < b.N; i++ { 15 | fmtFn(1234) 16 | } 17 | } 18 | 19 | func strconvFn(i int) string { 20 | return strconv.Itoa(i) 21 | } 22 | func Benchmark_strconvFn(b *testing.B) { 23 | for i := 0; i < b.N; i++ { 24 | strconvFn(1234) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /code/gogc/mergesort.go: -------------------------------------------------------------------------------- 1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006 2 | 3 | // GOGC=off go run mergesort.go v1 && go tool trace v1.trace 4 | // GOGC=50 go run mergesort.go v1 && go tool trace v1.trace 5 | // GOGC=100 go run mergesort.go v1 && go tool trace v1.trace 6 | // GOGC=200 go run mergesort.go v1 && go tool trace v1.trace 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "math/rand" 12 | "os" 13 | "runtime/trace" 14 | "sync" 15 | "time" 16 | ) 17 | 18 | const max = 1 << 11 19 | 20 | func merge(s []int, middle int) { 21 | helper := make([]int, len(s)) 22 | copy(helper, s) 23 | 24 | helperLeft := 0 25 | helperRight := middle 26 | current := 0 27 | high := len(s) - 1 28 | 29 | for helperLeft <= middle-1 && helperRight <= high { 30 | if helper[helperLeft] <= helper[helperRight] { 31 | s[current] = helper[helperLeft] 32 | helperLeft++ 33 | } else { 34 | s[current] = helper[helperRight] 35 | helperRight++ 36 | } 37 | current++ 38 | } 39 | 40 | for helperLeft <= middle-1 { 41 | s[current] = helper[helperLeft] 42 | current++ 43 | helperLeft++ 44 | } 45 | } 46 | 47 | func mergesortv1(s []int) { 48 | len := len(s) 49 | 50 | if len > 1 { 51 | middle := len / 2 52 | 53 | var wg sync.WaitGroup 54 | wg.Add(2) 55 | 56 | // First half 57 | go func() { 58 | defer wg.Done() 59 | mergesortv1(s[:middle]) 60 | }() 61 | 62 | // Second half 63 | go func() { 64 | defer wg.Done() 65 | mergesortv1(s[middle:]) 66 | }() 67 | 68 | // Wait that the two goroutines are completed 69 | wg.Wait() 70 | merge(s, middle) 71 | } 72 | } 73 | 74 | /* Sequential */ 75 | 76 | func mergesort(s []int) { 77 | if len(s) > 1 { 78 | middle := len(s) / 2 79 | mergesort(s[:middle]) 80 | mergesort(s[middle:]) 81 | merge(s, middle) 82 | } 83 | } 84 | 85 | func mergesortv2(s []int) { 86 | len := len(s) 87 | 88 | if len > 1 { 89 | if len <= max { // Sequential 90 | mergesort(s) 91 | } else { // Parallel 92 | middle := len / 2 93 | 94 | var wg sync.WaitGroup 95 | wg.Add(2) 96 | 97 | go func() { 98 | defer wg.Done() 99 | mergesortv2(s[:middle]) 100 | }() 101 | 102 | go func() { 103 | defer wg.Done() 104 | mergesortv2(s[middle:]) 105 | }() 106 | 107 | wg.Wait() 108 | merge(s, middle) 109 | } 110 | } 111 | } 112 | 113 | func mergesortv3(s []int) { 114 | len := len(s) 115 | 116 | if len > 1 { 117 | if len <= max { // Sequential 118 | mergesort(s) 119 | } else { // Parallel 120 | middle := len / 2 121 | 122 | var wg sync.WaitGroup 123 | wg.Add(1) 124 | 125 | go func() { 126 | defer wg.Done() 127 | mergesortv3(s[:middle]) 128 | }() 129 | 130 | mergesortv3(s[middle:]) 131 | 132 | wg.Wait() 133 | merge(s, middle) 134 | } 135 | } 136 | } 137 | 138 | // Generates a slice of size, size filled with random numbers 139 | func generateSlice(size int) []int { 140 | 141 | slice := make([]int, size, size) 142 | rand.Seed(time.Now().UnixNano()) 143 | for i := 0; i < size; i++ { 144 | slice[i] = rand.Intn(999) - rand.Intn(999) 145 | } 146 | return slice 147 | } 148 | 149 | func main() { 150 | version := "v1" 151 | if len(os.Args) == 2 { 152 | version = os.Args[1] 153 | } 154 | 155 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 156 | if err != nil { 157 | fmt.Println("Error:", err) 158 | return 159 | } 160 | 161 | trace.Start(f) 162 | defer trace.Stop() 163 | 164 | for i := 0; i < 10000; i++ { 165 | s := generateSlice(10) 166 | 167 | switch version { 168 | case "v1": 169 | mergesortv1(s) 170 | case "v2": 171 | mergesortv2(s) 172 | case "v3": 173 | mergesortv3(s) 174 | } 175 | } 176 | 177 | } 178 | -------------------------------------------------------------------------------- /code/gomaxprocs/1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | ) 7 | 8 | func main() { 9 | fmt.Println("runtime.NumCPU()=", runtime.NumCPU()) 10 | } 11 | -------------------------------------------------------------------------------- /code/gomaxprocs/mergesort.go: -------------------------------------------------------------------------------- 1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006 2 | 3 | // go run main.go [v1 (default) | v2 | v3 ] 4 | // GOMAXPROCS=1 go run mergesort.go v1 && go tool trace v1.trace 5 | // GOMAXPROCS=8 go run mergesort.go v1 && go tool trace v1.trace 6 | // GOMAXPROCS=18 go run mergesort.go v1 && go tool trace v1.trace 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "os" 12 | "runtime/trace" 13 | "sync" 14 | ) 15 | 16 | const max = 1 << 11 17 | 18 | var s = []int{ 19 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 20 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 21 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 22 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 23 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 24 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 25 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 26 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 27 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 28 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 29 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 30 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 31 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 32 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 33 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 34 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 35 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 36 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 37 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 38 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 39 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 40 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 41 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 42 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 43 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 44 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 45 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 46 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 47 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 48 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 49 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 50 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 51 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 52 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 53 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 54 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 55 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 56 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 57 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 58 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 59 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 60 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 61 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 62 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 63 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 64 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 65 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 66 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 67 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 68 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 69 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 70 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 71 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 72 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 73 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 74 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 75 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 76 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 77 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 78 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 79 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 80 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 81 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 82 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 83 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 84 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 85 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 86 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 87 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 88 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 89 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 90 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 91 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 92 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 93 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 94 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 95 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 96 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 97 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 98 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 99 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 100 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 101 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 102 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 103 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 104 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 105 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 106 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 107 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 108 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 109 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 110 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 111 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 112 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 113 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 114 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 115 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 116 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 117 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 118 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 119 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 120 | } 121 | 122 | func merge(s []int, middle int) { 123 | helper := make([]int, len(s)) 124 | copy(helper, s) 125 | 126 | helperLeft := 0 127 | helperRight := middle 128 | current := 0 129 | high := len(s) - 1 130 | 131 | for helperLeft <= middle-1 && helperRight <= high { 132 | if helper[helperLeft] <= helper[helperRight] { 133 | s[current] = helper[helperLeft] 134 | helperLeft++ 135 | } else { 136 | s[current] = helper[helperRight] 137 | helperRight++ 138 | } 139 | current++ 140 | } 141 | 142 | for helperLeft <= middle-1 { 143 | s[current] = helper[helperLeft] 144 | current++ 145 | helperLeft++ 146 | } 147 | } 148 | 149 | func mergesortv1(s []int) { 150 | len := len(s) 151 | 152 | if len > 1 { 153 | middle := len / 2 154 | 155 | var wg sync.WaitGroup 156 | wg.Add(2) 157 | 158 | // First half 159 | go func() { 160 | defer wg.Done() 161 | mergesortv1(s[:middle]) 162 | }() 163 | 164 | // Second half 165 | go func() { 166 | defer wg.Done() 167 | mergesortv1(s[middle:]) 168 | }() 169 | 170 | // Wait that the two goroutines are completed 171 | wg.Wait() 172 | merge(s, middle) 173 | } 174 | } 175 | 176 | /* Sequential */ 177 | 178 | func mergesort(s []int) { 179 | if len(s) > 1 { 180 | middle := len(s) / 2 181 | mergesort(s[:middle]) 182 | mergesort(s[middle:]) 183 | merge(s, middle) 184 | } 185 | } 186 | 187 | func mergesortv2(s []int) { 188 | len := len(s) 189 | 190 | if len > 1 { 191 | if len <= max { // Sequential 192 | mergesort(s) 193 | } else { // Parallel 194 | middle := len / 2 195 | 196 | var wg sync.WaitGroup 197 | wg.Add(2) 198 | 199 | go func() { 200 | defer wg.Done() 201 | mergesortv2(s[:middle]) 202 | }() 203 | 204 | go func() { 205 | defer wg.Done() 206 | mergesortv2(s[middle:]) 207 | }() 208 | 209 | wg.Wait() 210 | merge(s, middle) 211 | } 212 | } 213 | } 214 | 215 | func mergesortv3(s []int) { 216 | len := len(s) 217 | 218 | if len > 1 { 219 | if len <= max { // Sequential 220 | mergesort(s) 221 | } else { // Parallel 222 | middle := len / 2 223 | 224 | var wg sync.WaitGroup 225 | wg.Add(1) 226 | 227 | go func() { 228 | defer wg.Done() 229 | mergesortv3(s[:middle]) 230 | }() 231 | 232 | mergesortv3(s[middle:]) 233 | 234 | wg.Wait() 235 | merge(s, middle) 236 | } 237 | } 238 | } 239 | 240 | func main() { 241 | version := "v1" 242 | if len(os.Args) == 2 { 243 | version = os.Args[1] 244 | } 245 | 246 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 247 | if err != nil { 248 | fmt.Println("Error:", err) 249 | return 250 | } 251 | trace.Start(f) 252 | defer trace.Stop() 253 | 254 | switch version { 255 | case "v1": 256 | mergesortv1(s) 257 | case "v2": 258 | mergesortv2(s) 259 | case "v3": 260 | mergesortv3(s) 261 | } 262 | 263 | } 264 | -------------------------------------------------------------------------------- /code/gomaxprocs/mergesort_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | func Benchmark_mergesortv1(b *testing.B) { 6 | for i := 0; i < b.N; i++ { 7 | mergesortv1(s) 8 | } 9 | } 10 | 11 | func Benchmark_mergesortv2(b *testing.B) { 12 | for i := 0; i < b.N; i++ { 13 | mergesortv2(s) 14 | } 15 | } 16 | 17 | 18 | func Benchmark_mergesortv3(b *testing.B) { 19 | for i := 0; i < b.N; i++ { 20 | mergesortv3(s) 21 | } 22 | } -------------------------------------------------------------------------------- /code/inline/inline.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func f() int { 6 | return 2 7 | } 8 | 9 | func main() { 10 | x := f() 11 | fmt.Println(x) 12 | } 13 | -------------------------------------------------------------------------------- /code/inline/readme.md: -------------------------------------------------------------------------------- 1 | go build -gcflags="-m" inline.go 2 | -------------------------------------------------------------------------------- /code/map-access/1-map_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math/rand" 5 | "strconv" 6 | "testing" 7 | ) 8 | 9 | var NumItems int = 1000000 10 | 11 | func BenchmarkMapStringKeys(b *testing.B) { 12 | m := make(map[string]string) 13 | k := make([]string, 0) 14 | 15 | for i := 0; i < NumItems; i++ { 16 | key := strconv.Itoa(rand.Intn(NumItems)) 17 | //key += ` is the key value that is being used. ` 18 | key += ` is the key value that is being used and a shakespeare sonnet. ` + sonnet106 19 | m[key] = "value" + strconv.Itoa(i) 20 | k = append(k, key) 21 | } 22 | 23 | i := 0 24 | l := len(m) 25 | 26 | b.ResetTimer() 27 | for n := 0; n < b.N; n++ { 28 | if _, ok := m[k[i]]; ok { 29 | } 30 | 31 | i++ 32 | if i >= l { 33 | i = 0 34 | } 35 | } 36 | } 37 | 38 | func BenchmarkMapIntKeys(b *testing.B) { 39 | m := make(map[int]string) 40 | k := make([]int, 0) 41 | 42 | for i := 0; i < NumItems; i++ { 43 | key := rand.Intn(NumItems) 44 | m[key] = "value" + strconv.Itoa(i) 45 | k = append(k, key) 46 | } 47 | 48 | i := 0 49 | l := len(m) 50 | 51 | b.ResetTimer() 52 | for n := 0; n < b.N; n++ { 53 | if _, ok := m[k[i]]; ok { 54 | } 55 | 56 | i++ 57 | if i >= l { 58 | i = 0 59 | } 60 | } 61 | } 62 | 63 | var sonnet106 = `When in the chronicle of wasted time 64 | I see descriptions of the fairest wights, 65 | And beauty making beautiful old rhyme 66 | In praise of ladies dead, and lovely knights, 67 | Then, in the blazon of sweet beauty’s best, 68 | Of hand, of foot, of lip, of eye, of brow, 69 | I see their antique pen would have express’d 70 | Even such a beauty as you master now. 71 | So all their praises are but prophecies 72 | Of this our time, all you prefiguring; 73 | And, for they look’d but with divining eyes, 74 | They had not skill enough your worth to sing: 75 | For we, which now behold these present days, 76 | Had eyes to wonder, but lack tongues to praise.` 77 | -------------------------------------------------------------------------------- /code/panic/a-panic-program.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | example(make([]string, 2, 4), "hello", 10) 5 | } 6 | 7 | //go:noinline 8 | func example(slice []string, str string, i int) { 9 | panic("Want stack trace") 10 | } 11 | -------------------------------------------------------------------------------- /code/panic/a-panic-program.md: -------------------------------------------------------------------------------- 1 | * code does a panic to show a stack trace. 2 | * shows that the hex value shows the program counter (PC) which points to the instruction after the one that crashed. 3 | 4 | Ref: 5 | https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part1.html 6 | -------------------------------------------------------------------------------- /code/parallelize/rand_strings_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math/rand" 5 | "sync" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func init() { 11 | rand.Seed(time.Now().UnixNano()) 12 | } 13 | 14 | var s []string 15 | 16 | func RandString_Sequential() { 17 | for i := 0; i < 1000; i++ { 18 | s = append(s, RandString(100)) 19 | } 20 | } 21 | 22 | func Benchmark_Sequential(b *testing.B) { 23 | for i := 0; i < b.N; i++ { 24 | RandString_Sequential() 25 | } 26 | } 27 | 28 | func RandString_Concurrent() { 29 | for i := 0; i < 100000; i++ { 30 | go func() { 31 | s = append(s, RandString(100)) 32 | }() 33 | } 34 | } 35 | 36 | func Benchmark_Concurrent(b *testing.B) { 37 | for i := 0; i < b.N; i++ { 38 | RandString_Concurrent() 39 | } 40 | } 41 | 42 | var mu sync.Mutex 43 | 44 | func RandString_Locked_Mutex() { 45 | for i := 0; i < 100000; i++ { 46 | go func() { 47 | mu.Lock() 48 | defer mu.Unlock() 49 | 50 | s = append(s, RandString(100)) 51 | }() 52 | } 53 | } 54 | 55 | func Benchmark_Locked_Mutex(b *testing.B) { 56 | for i := 0; i < b.N; i++ { 57 | RandString_Locked_Mutex() 58 | } 59 | } 60 | 61 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 62 | 63 | func RandString(n int) string { 64 | b := make([]rune, n) 65 | for i := range b { 66 | b[i] = letters[rand.Intn(len(letters))] 67 | } 68 | //time.Sleep(10 * time.Microsecond) 69 | return string(b) 70 | } 71 | -------------------------------------------------------------------------------- /code/profiler-labels/1_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "regexp" 6 | "runtime/pprof" 7 | "testing" 8 | ) 9 | 10 | var ss = []string{ 11 | `^[a-z]+\[[0-9]+\]$`, 12 | `foo.*`, 13 | `foo(.?)`, 14 | `foo.?`, 15 | `a(x*)b(y|z)c`, 16 | } 17 | 18 | func f(s string) { 19 | labels := pprof.Labels("pat", s) 20 | pprof.Do(context.Background(), labels, func(ctx context.Context) { 21 | // Do some work... 22 | r := regexp.MustCompile(s) 23 | _ = r 24 | 25 | //go update(ctx) // propagates labels in ctx. 26 | }) 27 | } 28 | 29 | func bench_f(b *testing.B, s string) { 30 | for i := 0; i < b.N; i++ { 31 | f(s) 32 | } 33 | } 34 | 35 | func Benchmark_0f(b *testing.B) { 36 | bench_f(b, ss[0]) 37 | } 38 | 39 | func Benchmark_1f(b *testing.B) { 40 | bench_f(b, ss[1]) 41 | } 42 | -------------------------------------------------------------------------------- /code/profiler/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "regexp" 7 | ) 8 | 9 | func main() { 10 | var data string 11 | if len(os.Args) == 2 { 12 | data = os.Args[1] 13 | } 14 | 15 | id, ok := isGopher(data) 16 | if !ok { 17 | id = "stranger" 18 | } 19 | fmt.Printf("hello, %s\n", id) 20 | } 21 | 22 | func isGopher(email string) (string, bool) { 23 | re := regexp.MustCompile("^([[:alpha:]]+)@golang.org$") 24 | match := re.FindStringSubmatch(email) 25 | if len(match) == 2 { 26 | return match[1], true 27 | } 28 | return "", false 29 | } 30 | -------------------------------------------------------------------------------- /code/profiler/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | func Test_isGopher(t *testing.T) { 6 | 7 | tcs := []struct { 8 | in string 9 | exp bool 10 | expId string 11 | }{ 12 | { 13 | "", 14 | false, 15 | "", 16 | }, 17 | { 18 | "a@email.com", 19 | false, 20 | "", 21 | }, 22 | { 23 | "a@golang.org", 24 | true, 25 | "a", 26 | }, 27 | } 28 | 29 | for _, tc := range tcs { 30 | id, ok := isGopher(tc.in) 31 | if ok != tc.exp { 32 | t.Errorf("For input %s, expected: %t but got: %t", tc.in, tc.exp, ok) 33 | } 34 | if id != tc.expId { 35 | t.Errorf("For input %s, expected: %s but got: %s", tc.in, tc.expId, id) 36 | } 37 | } 38 | } 39 | 40 | func Benchmark_isGopher(b *testing.B) { 41 | 42 | tcs := []struct { 43 | in string 44 | exp bool 45 | expId string 46 | }{ 47 | { 48 | "a@golang.org", 49 | true, 50 | "a", 51 | }, 52 | } 53 | 54 | for i := 0; i < b.N; i++ { 55 | isGopher(tcs[0].in) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /code/profiler/readme.md: -------------------------------------------------------------------------------- 1 | ``` 2 | go test -bench=. -cpuprofile=cpu.pprof 3 | 4 | go tool pprof cpu.pprof 5 | 6 | go-torch --binaryname web.test -b cpu.pprof 7 | 8 | pprof -http=:8080 cpu.pprof 9 | ``` 10 | -------------------------------------------------------------------------------- /code/regex/1-regex-compile_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | ) 7 | 8 | var testRegexp string = `^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$` 9 | 10 | func BenchmarkMatchString(b *testing.B) { 11 | for n := 0; n < b.N; n++ { 12 | _, err := regexp.MatchString(testRegexp, "jsmith@example.com") 13 | if err != nil { 14 | panic(err) 15 | } 16 | } 17 | } 18 | 19 | func BenchmarkMatchStringCompiled(b *testing.B) { 20 | r, err := regexp.Compile(testRegexp) 21 | if err != nil { 22 | panic(err) 23 | } 24 | 25 | b.ResetTimer() 26 | for n := 0; n < b.N; n++ { 27 | r.MatchString("jsmith@example.com") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /code/responsewriter/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | ) 9 | 10 | func withoutSetHeader(w http.ResponseWriter, r *http.Request) { 11 | fmt.Fprintln(w, "hello, stranger") 12 | } 13 | 14 | func Benchmark_withoutSetHeader(b *testing.B) { 15 | for i := 0; i < b.N; i++ { 16 | req, _ := http.NewRequest("GET", "/", nil) 17 | 18 | rr := httptest.NewRecorder() 19 | handler := http.HandlerFunc(withoutSetHeader) 20 | 21 | handler.ServeHTTP(rr, req) 22 | } 23 | 24 | } 25 | 26 | func withSetHeader(w http.ResponseWriter, r *http.Request) { 27 | w.Header().Set("Content-Type", "text/plain") 28 | fmt.Fprintln(w, "hello, stranger") 29 | } 30 | 31 | func Benchmark_withSetHeader(b *testing.B) { 32 | for i := 0; i < b.N; i++ { 33 | req, _ := http.NewRequest("GET", "/", nil) 34 | 35 | rr := httptest.NewRecorder() 36 | handler := http.HandlerFunc(withSetHeader) 37 | 38 | handler.ServeHTTP(rr, req) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /code/slices/1-array.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | var a [5]int 5 | var b [6]int 6 | 7 | b = a 8 | } 9 | -------------------------------------------------------------------------------- /code/slices/2-slice-of-array.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | var a [5]int 5 | s := a[0:3] 6 | s = a[:3] 7 | s = a[3:] 8 | 9 | // negative indexing is not allowed 10 | // s = a[0:-2] // compile error 11 | } 12 | -------------------------------------------------------------------------------- /code/slices/3-slice-backed-by-array.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func main() { 6 | a := [5]int{1, 2, 3, 4, 5} 7 | s := a[0:3] 8 | s[0] = 11 9 | fmt.Println(a, s) 10 | 11 | fmt.Printf("%p %p\n", &a, &s) 12 | fmt.Printf("%p %p\n", &a[0], &s[0]) 13 | } 14 | -------------------------------------------------------------------------------- /code/slices/4-appending-to-slice.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "fmt" 4 | 5 | func main() { 6 | a := [5]int{1, 2, 3, 4, 5} 7 | s := a[0:3] 8 | fmt.Println(a, s) 9 | 10 | s = append(s, 9) 11 | fmt.Println(a, s) 12 | 13 | s = append(s, 19) 14 | fmt.Println(a, s) 15 | 16 | s = append(s, 99) 17 | fmt.Println(a, s) 18 | } 19 | -------------------------------------------------------------------------------- /code/slices/5-make-slice.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func main() { 4 | months := make([]int, 0, 12) 5 | months = append(months, 1) 6 | months = append(months, 7) 7 | } 8 | -------------------------------------------------------------------------------- /code/stack-and-heap/h.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type T struct { 4 | a int 5 | } 6 | 7 | func h() *T { 8 | return &T{} 9 | } 10 | 11 | func main() { 12 | h() 13 | } 14 | -------------------------------------------------------------------------------- /code/stack-and-heap/h_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "runtime/trace" 6 | "testing" 7 | ) 8 | 9 | func Benchmark_h(b *testing.B) { 10 | var t *T 11 | 12 | f, err := os.Create("h.prof") 13 | if err != nil { 14 | panic(err) 15 | } 16 | defer f.Close() 17 | 18 | err = trace.Start(f) 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | for i := 0; i < b.N; i++ { 24 | t = h() 25 | } 26 | 27 | trace.Stop() 28 | 29 | b.StopTimer() 30 | 31 | _ = t 32 | } 33 | -------------------------------------------------------------------------------- /code/stack-and-heap/s.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type T struct { 4 | a int 5 | } 6 | 7 | func s() T { 8 | return T{} 9 | } 10 | 11 | func main() { 12 | s() 13 | } 14 | -------------------------------------------------------------------------------- /code/stack-and-heap/s_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "runtime/trace" 6 | "testing" 7 | ) 8 | 9 | func Benchmark_s(b *testing.B) { 10 | var t T 11 | 12 | f, err := os.Create("s.prof") 13 | if err != nil { 14 | panic(err) 15 | } 16 | defer f.Close() 17 | 18 | err = trace.Start(f) 19 | if err != nil { 20 | panic(err) 21 | } 22 | 23 | for i := 0; i < b.N; i++ { 24 | t = s() 25 | } 26 | 27 | trace.Stop() 28 | 29 | b.StopTimer() 30 | 31 | _ = t 32 | } 33 | -------------------------------------------------------------------------------- /code/string-concat/1-string-concat_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | ) 8 | 9 | var strLen int = 1000 10 | 11 | func BenchmarkConcatString(b *testing.B) { 12 | var str string 13 | 14 | i := 0 15 | 16 | b.ResetTimer() 17 | for n := 0; n < b.N; n++ { 18 | str += "x" 19 | 20 | i++ 21 | if i >= strLen { 22 | i = 0 23 | str = "" 24 | } 25 | } 26 | } 27 | 28 | func BenchmarkConcatBuffer(b *testing.B) { 29 | var buffer bytes.Buffer 30 | 31 | i := 0 32 | 33 | b.ResetTimer() 34 | for n := 0; n < b.N; n++ { 35 | buffer.WriteString("x") 36 | 37 | i++ 38 | if i >= strLen { 39 | i = 0 40 | buffer = bytes.Buffer{} 41 | } 42 | } 43 | } 44 | 45 | func BenchmarkConcatBuilder(b *testing.B) { 46 | var builder strings.Builder 47 | 48 | i := 0 49 | 50 | b.ResetTimer() 51 | for n := 0; n < b.N; n++ { 52 | builder.WriteString("x") 53 | 54 | i++ 55 | if i >= strLen { 56 | i = 0 57 | builder = strings.Builder{} 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /code/sync-once/1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "html/template" 5 | ) 6 | 7 | var s = ` 8 |

9 |
10 | {{range .Todos}} 11 | {{if .Done}} 12 |
{{.Title}}
13 | {{else}} 14 |
{{.Title}}
15 | {{end}} 16 | {{end}} 17 |
18 | ` 19 | 20 | var t *template.Template 21 | 22 | func f() { 23 | t = template.Must(template.New("").Parse(s)) 24 | _ = t 25 | 26 | // do task with template 27 | } 28 | 29 | func main() { 30 | for i := 0; i < 10000; i++ { 31 | f() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /code/sync-once/2.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "html/template" 5 | ) 6 | 7 | var s = ` 8 |

9 |
10 | {{range .Todos}} 11 | {{if .Done}} 12 |
{{.Title}}
13 | {{else}} 14 |
{{.Title}}
15 | {{end}} 16 | {{end}} 17 |
18 | ` 19 | 20 | var t *template.Template 21 | 22 | func f() { 23 | 24 | // do task with template 25 | } 26 | 27 | func main() { 28 | // costs time at load and maybe unused 29 | t = template.Must(template.New("").Parse(s)) 30 | _ = t 31 | 32 | for i := 0; i < 10000; i++ { 33 | f() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /code/sync-once/3.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "html/template" 6 | "sync" 7 | ) 8 | 9 | var s = ` 10 |

11 |
12 | {{range .Todos}} 13 | {{if .Done}} 14 |
{{.Title}}
15 | {{else}} 16 |
{{.Title}}
17 | {{end}} 18 | {{end}} 19 |
20 | ` 21 | 22 | var t template.Template 23 | var o sync.Once 24 | 25 | func g() { 26 | fmt.Println("within g()") 27 | t = template.Must(template.New("").Parse(s)) 28 | _ = t 29 | } 30 | 31 | func f() { 32 | // only done once and when used 33 | o.Do(g) 34 | 35 | // do task with template 36 | 37 | } 38 | 39 | func main() { 40 | for i := 0; i < 10000; i++ { 41 | f() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /code/sync.pool/1_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func Benchmark_f1(b testing.B) { 9 | for i := 0; i < b.N; i++ { 10 | f1() 11 | } 12 | } 13 | 14 | func f1() { 15 | s := &bytes.Buffer{} 16 | s.Write([]byte("dirty")) 17 | 18 | return 19 | } 20 | -------------------------------------------------------------------------------- /code/sync.pool/2_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "sync" 6 | "testing" 7 | ) 8 | 9 | var pool2 = sync.Pool{ 10 | New: func() interface{} { 11 | return &bytes.Buffer{} 12 | }, 13 | } 14 | 15 | func Benchmark_f2(b testing.B) { 16 | for i := 0; i < b.N; i++ { 17 | f2() 18 | } 19 | } 20 | 21 | func f2() { 22 | // When getting from a Pool, you need to cast 23 | s := pool2.Get().(bytes.Buffer) 24 | // We write to the object 25 | s.Write([]byte("dirty")) 26 | // Then put it back 27 | pool2.Put(s) 28 | 29 | return 30 | } 31 | -------------------------------------------------------------------------------- /code/sync.pool/book1_test.go: -------------------------------------------------------------------------------- 1 | // run: go test -bench=write1 -benchmem 2 | // vs 3 | // go test -bench=write2 -benchmem 4 | 5 | // study: difference in allocations and speed between the versions 6 | // expected: the one with sync.Pool should have lesser allocations. 7 | package main 8 | 9 | import ( 10 | "encoding/json" 11 | "testing" 12 | ) 13 | 14 | type Book struct { 15 | Author string 16 | Title string 17 | ISBN string 18 | } 19 | 20 | func write1(a, t string) { 21 | b := &Book{} 22 | b.Author = a 23 | b.Title = t 24 | b.ISBN = "abcd" 25 | data, _ := json.Marshal(b) 26 | _ = data 27 | } 28 | 29 | func Benchmark_write1(b testing.B) { 30 | for i := 0; i < b.N; i++ { 31 | write1("harry", "rowling") 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /code/sync.pool/book2_test.go: -------------------------------------------------------------------------------- 1 | // run: go test -bench=write1 -benchmem 2 | // vs 3 | // go test -bench=write2 -benchmem 4 | 5 | // study: difference in allocations and speed between the versions 6 | // expected: the one with sync.Pool should have lesser allocations. 7 | package main 8 | 9 | import ( 10 | "encoding/json" 11 | "sync" 12 | "testing" 13 | ) 14 | 15 | type Book2 struct { 16 | Author string 17 | Title string 18 | ISBN string 19 | } 20 | 21 | var bookPool = sync.Pool{ 22 | New: func() interface{} { 23 | return &Book2{} 24 | }, 25 | } 26 | 27 | func write2(a, t string) { 28 | b := bookPool.Get().(Book2) 29 | b.Author = a 30 | b.Title = t 31 | b.ISBN = "abcd" 32 | data, _ := json.Marshal(b) 33 | _ = data 34 | 35 | bookPool.Put(b) 36 | } 37 | 38 | func Benchmark_write2(b testing.B) { 39 | for i := 0; i < b.N; i++ { 40 | write2("harry", "rowling") 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /code/testing/search_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func BeginsWith(s, pat string) bool { 9 | return strings.HasPrefix(s, pat) 10 | 11 | } 12 | 13 | func Test_BeginsWith(t testing.T) { 14 | tc := []struct { 15 | s, pat string 16 | exp bool 17 | }{ 18 | {"GoLang", "Go", true}, 19 | {"GoLang", "Java", false}, 20 | {"GoLang is awesome", "awe", false}, 21 | {"awesome is GoLang. - Yoda", "awe", true}, 22 | } 23 | 24 | for _, tt := range tc { 25 | if BeginsWith(tt.s, tt.pat) != tt.exp { 26 | t.Fail() 27 | } 28 | } 29 | } 30 | 31 | func Benchmark_BeginsWith(b testing.B) { 32 | for i := 0; i < b.N; i++ { 33 | BeginsWith("GoLang", "Go") 34 | } 35 | } 36 | 37 | // forced allocations for benchmem 38 | / 39 | func x() string { 40 | s := "hello world there" 41 | return &s 42 | } 43 | 44 | func Benchmark_x(b testing.B) { 45 | for i := 0; i < b.N; i++ { 46 | a := x() 47 | a += a 48 | _ = a 49 | } 50 | } 51 | / 52 | -------------------------------------------------------------------------------- /code/tracing/mergesort.go: -------------------------------------------------------------------------------- 1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006 2 | 3 | // go run main.go [v1 (default) | v2 | v3 ] 4 | // GOMAXPROCS=1 go run mergesort.go v1 && go tool trace v1.trace 5 | // GOMAXPROCS=8 go run mergesort.go v1 && go tool trace v1.trace 6 | // GOMAXPROCS=18 go run mergesort.go v1 && go tool trace v1.trace 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "os" 12 | "runtime/trace" 13 | "sync" 14 | ) 15 | 16 | const max = 1 << 11 17 | 18 | var s = []int{ 19 | 89, 123, 12, 9, 198, 1546, 108, 872, 93, 20 | } 21 | 22 | func merge(s []int, middle int) { 23 | helper := make([]int, len(s)) 24 | copy(helper, s) 25 | 26 | helperLeft := 0 27 | helperRight := middle 28 | current := 0 29 | high := len(s) - 1 30 | 31 | for helperLeft <= middle-1 && helperRight <= high { 32 | if helper[helperLeft] <= helper[helperRight] { 33 | s[current] = helper[helperLeft] 34 | helperLeft++ 35 | } else { 36 | s[current] = helper[helperRight] 37 | helperRight++ 38 | } 39 | current++ 40 | } 41 | 42 | for helperLeft <= middle-1 { 43 | s[current] = helper[helperLeft] 44 | current++ 45 | helperLeft++ 46 | } 47 | } 48 | 49 | func mergesortv1(s []int) { 50 | len := len(s) 51 | 52 | if len > 1 { 53 | middle := len / 2 54 | 55 | var wg sync.WaitGroup 56 | wg.Add(2) 57 | 58 | // First half 59 | go func() { 60 | defer wg.Done() 61 | mergesortv1(s[:middle]) 62 | }() 63 | 64 | // Second half 65 | go func() { 66 | defer wg.Done() 67 | mergesortv1(s[middle:]) 68 | }() 69 | 70 | // Wait that the two goroutines are completed 71 | wg.Wait() 72 | merge(s, middle) 73 | } 74 | } 75 | 76 | / Sequential / 77 | 78 | func mergesort(s []int) { 79 | if len(s) > 1 { 80 | middle := len(s) / 2 81 | mergesort(s[:middle]) 82 | mergesort(s[middle:]) 83 | merge(s, middle) 84 | } 85 | } 86 | 87 | func mergesortv2(s []int) { 88 | len := len(s) 89 | 90 | if len > 1 { 91 | if len <= max { // Sequential 92 | mergesort(s) 93 | } else { // Parallel 94 | middle := len / 2 95 | 96 | var wg sync.WaitGroup 97 | wg.Add(2) 98 | 99 | go func() { 100 | defer wg.Done() 101 | mergesortv2(s[:middle]) 102 | }() 103 | 104 | go func() { 105 | defer wg.Done() 106 | mergesortv2(s[middle:]) 107 | }() 108 | 109 | wg.Wait() 110 | merge(s, middle) 111 | } 112 | } 113 | } 114 | 115 | func mergesortv3(s []int) { 116 | len := len(s) 117 | 118 | if len > 1 { 119 | if len <= max { // Sequential 120 | mergesort(s) 121 | } else { // Parallel 122 | middle := len / 2 123 | 124 | var wg sync.WaitGroup 125 | wg.Add(1) 126 | 127 | go func() { 128 | defer wg.Done() 129 | mergesortv3(s[:middle]) 130 | }() 131 | 132 | mergesortv3(s[middle:]) 133 | 134 | wg.Wait() 135 | merge(s, middle) 136 | } 137 | } 138 | } 139 | 140 | func main() { 141 | version := "v1" 142 | if len(os.Args) == 2 { 143 | version = os.Args[1] 144 | } 145 | 146 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 147 | if err != nil { 148 | fmt.Println("Error:", err) 149 | return 150 | } 151 | trace.Start(f) 152 | defer trace.Stop() 153 | 154 | switch version { 155 | case "v1": 156 | mergesortv1(s) 157 | case "v2": 158 | mergesortv2(s) 159 | case "v3": 160 | mergesortv3(s) 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /code/tracing/mergesort_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | func Benchmark_mergesortv1(b testing.B) { 6 | for i := 0; i < b.N; i++ { 7 | mergesortv1(s) 8 | } 9 | } 10 | 11 | func Benchmark_mergesortv2(b testing.B) { 12 | for i := 0; i < b.N; i++ { 13 | mergesortv2(s) 14 | } 15 | } 16 | 17 | func Benchmark_mergesortv3(b testing.B) { 18 | for i := 0; i < b.N; i++ { 19 | mergesortv3(s) 20 | } 21 | } 22 | 23 | func Test_mergesortv1(t testing.T) { 24 | inp := []int{89, 123, 12, 9, 198, 1546, 108, 872, 93} 25 | exp := []int{9, 12, 89, 93, 108, 123, 198, 872, 1546} 26 | mergesortv1(inp) 27 | if inp[0] != exp[0] && inp[len(exp)-1] != exp[len(exp)-1] { 28 | t.Errorf("Test failed") 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /images/gogc/gogc-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-100.png -------------------------------------------------------------------------------- /images/gogc/gogc-200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-200.png -------------------------------------------------------------------------------- /images/gogc/gogc-50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-50.png -------------------------------------------------------------------------------- /images/gogc/gogc-off.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-off.png -------------------------------------------------------------------------------- /images/gomaxprocs/gomaxprocs-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-1.png -------------------------------------------------------------------------------- /images/gomaxprocs/gomaxprocs-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-18.png -------------------------------------------------------------------------------- /images/gomaxprocs/gomaxprocs-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-8.png -------------------------------------------------------------------------------- /images/tracing/1-OS-process-and-its-threads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/1-OS-process-and-its-threads.png -------------------------------------------------------------------------------- /images/tracing/2-goroutines-on-a-thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/2-goroutines-on-a-thread.png -------------------------------------------------------------------------------- /images/tracing/3-goroutines-on-a-blocking-thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/3-goroutines-on-a-blocking-thread.png -------------------------------------------------------------------------------- /images/tracing/4-concurrency-and-parallelism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/4-concurrency-and-parallelism.png -------------------------------------------------------------------------------- /images/tracing/tracing-gc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/tracing-gc.png -------------------------------------------------------------------------------- /images/tracing/view-goroutine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/view-goroutine.png -------------------------------------------------------------------------------- /images/tracing/view-trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/view-trace.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Optimizing Go Programs 2 | 3 | This is a collation of tools and techniques that will help optimize Go programs. It is divided into two parts - Tools and Techniques. Tools look at the go command line tools that help you instrument your code. Techniques look at a list of ideas that you could potentially use to gain performance. 4 | 5 | At the end are also a large list of references of the posts that I went through to understand it myself. I have personally worked on, reworked, tried, and tested all the code (from about April 2019 to June 2019 - so far). However, as the large list of references show, I am indebted to others. I have often liberally adopted and adapted their thoughts and, occasionally, their notes. 6 | 7 | ## Go Tools for Optimization 8 | [Testing](#testing) 9 | * [Coverage](#coverage) 10 | * [Benchmarking](#benchmarking) 11 | * [Profiling](#profiling) 12 | * [Tracing](#tracing) 13 | - how to read the views 14 | - tagging sections 15 | * environment variables 16 | - [GOMAXPROCS](#gomaxprocs) 17 | - [GOGC](#gogc) 18 | * go memory analysis 19 | - [stack and heap](#stack-and-heap) 20 | - [escape analysis](#escape-analysis) 21 | * [Inlining](#inlining) 22 | 23 | ## Go Techniques for Optimization 24 | * [Parallelize CPU Work](#parallelize-cpu-work) 25 | * [Bounds Check Elimination](#bounds-check-elimination) 26 | * [sync Pools](#syncpools) 27 | * [sync once and lazy initializations](#synconce-for-lazy-initialization) 28 | * [Arrays and Slices](#arrays-and-slices) 29 | - how do slices work internally. allocation and reuse. 30 | * [String Concatenation](#string-concatenation) 31 | * [Map Keys: int vs string](#map-keys-int-vs-string) 32 | * [JSON Unmarshaling](#json-unmarshaling) 33 | * [File I/O](#file-io) 34 | * [Regexp Compilation](#regexp-compilation) 35 | * [Defer](#defer) 36 | * [fmt vs strconv](#fmt-vs-strconv) 37 | * [Explicitly Set Derived Values](#explicitly-set-derived-values) 38 | 39 | * [Go Performance Patterns](#go-performance-patterns) 40 | 41 | ## Testing 42 | 43 | What do we need? The ability to validate and verify our code (before customers test it). 44 | 45 | Unit testing is important enough to be a standard library. 46 | 47 | To write tests in Go: 48 | * the file name must end in ```_test.go``` 49 | * the test function should start with ```Test``` 50 | * the function signature is ```func Test_someFn(t testing.T) { ... }``` 51 | 52 | 53 | ```code/testing``` 54 | 55 | ``` 56 | func BeginsWith(s, pat string) bool { 57 | return strings.HasPrefix(s, pat) 58 | } 59 | 60 | func Test_BeginsWith(t testing.T) { 61 | tc := []struct { 62 | s, pat string 63 | exp bool 64 | }{ 65 | {"GoLang", "Go", true}, 66 | {"GoLang", "Java", false}, 67 | {"GoLang is awesome", "awe", false}, 68 | {"awesome is GoLang. - Yoda", "awe", true}, 69 | } 70 | 71 | for _, tt := range tc { 72 | if BeginsWith(tt.s, tt.pat) != tt.exp { 73 | t.Fail() 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | ``` 80 | $ go test -v 81 | === RUN Test_BeginsWith 82 | --- PASS: Test_BeginsWith (0.00s) 83 | PASS 84 | ``` 85 | 86 | Testing validates your code. It checks for correctness. 87 | 88 | ```Tip: unit testing first, always.``` 89 | ```Tip: keep unit testing running and watching for file changes. (see, codeskyblue/fswatch)``` 90 | 91 | p.s. When you run benchmarks, tests are run first. 92 | 93 | ## Coverage 94 | 95 | What do we need? So we've written tests, but does it cover all our code? 96 | 97 | The Go tooling also gives you coverage results. Less code is faster code. Tested and covered code is more reliable code. 98 | 99 | ```code/cover``` 100 | 101 | ``` 102 | go test -covermode=count -coverprofile=count.out fmt 103 | go tool cover -html=count.out 104 | ``` 105 | 106 | Red areas have had zero coverage. The brighter green sections have been covered more than the duller green sections. 107 | 108 | For current folder: 109 | ``` 110 | go test -covermode=count -coverprofile=count.out 111 | go tool cover -html=count.out 112 | ``` 113 | 114 | ```Tip: Keep coverage as a check-in metric objective. Or at least track coverage history in your build tool.``` 115 | 116 | ## Benchmarking 117 | 118 | What do we need? The ability to instrument specific functions and see where it is spending time or allocating resources. 119 | 120 | Benchmarking checks for optimization. 121 | 122 | ```code/testing``` 123 | 124 | ``` 125 | func Benchmark_BeginsWith(b testing.B) { 126 | for i := 0; i < b.N; i++ { 127 | BeginsWith("GoLang", "Go") 128 | } 129 | } 130 | ``` 131 | 132 | ``` 133 | $ go test -v -bench=. -benchmem 134 | === RUN Test_BeginsWith 135 | --- PASS: Test_BeginsWith (0.00s) 136 | goos: darwin 137 | goarch: amd64 138 | Benchmark_BeginsWith-8 500000000 3.69 ns/op 0 B/op 0 allocs/op 139 | PASS 140 | ``` 141 | 142 | Benchmarking functions don't always care about the result (that is checked by unit testing). However, the speed/allocations/blocking of a function could be dependent on the inputs - so test different inputs. 143 | 144 | ```Tip: Map optimization goals to business SLOs and SLAs.``` 145 | 146 | ### Benchcmp 147 | 148 | Use benchcmp to easily compare between benchmarks. 149 | 150 | ``` 151 | $ go test -run=NONE -bench=. ./... > old.txt 152 | // make changes 153 | $ go test -run=NONE -bench=. ./... > new.txt 154 | 155 | $ benchcmp old.txt new.txt 156 | 157 | benchmark old ns/op new ns/op delta 158 | BenchmarkConcat 523 68.6 -86.88% 159 | 160 | benchmark old allocs new allocs delta 161 | BenchmarkConcat 3 1 -66.67% 162 | 163 | benchmark old bytes new bytes delta 164 | BenchmarkConcat 80 48 -40.00% 165 | ``` 166 | 167 | ## Profiling 168 | 169 | What do we need?* The ability to instrument and analyze execution metrics. 170 | 171 | Package pprof writes runtime profiling data in the format expected by the pprof visualization tool. 172 | 173 | The first step to profiling a Go program is to enable profiling. Support for profiling benchmarks built with the standard testing package is built into go test. 174 | 175 | ``` 176 | func isGopher(email string) (string, bool) { 177 | re := regexp.MustCompile("^([[:alpha:]]+)@golang.org$") 178 | match := re.FindStringSubmatch(email) 179 | if len(match) == 2 { 180 | return match[1], true 181 | } 182 | return "", false 183 | } 184 | 185 | func Benchmark_isGopher(b testing.B) { 186 | 187 | tcs := []struct { 188 | in string 189 | exp bool 190 | expId string 191 | }{ 192 | { 193 | "a@golang.org", 194 | true, 195 | "a", 196 | }, 197 | } 198 | 199 | for i := 0; i < b.N; i++ { 200 | isGopher(tcs[0].in) 201 | } 202 | } 203 | ``` 204 | 205 | ``` 206 | go test -bench=. -cpuprofile=cpu.pprof 207 | 208 | go tool pprof cpu.pprof 209 | 210 | go-torch --binaryname web.test -b cpu.pprof 211 | open torch.svg 212 | ``` 213 | 214 | More recently (1.10?), pprof got its own UI. 215 | 216 | ``` 217 | $ go get github.com/google/pprof 218 | ``` 219 | 220 | The tool launches a web UI if -http flag is provided. For example, in order to launch the UI with an existing profile data, run the following command: 221 | 222 | 223 | ``` 224 | pprof -http=:8080 cpu.pprof 225 | ``` 226 | 227 | There is also a standard HTTP interface to profiling data. Adding the following line will install handlers under the /debug/pprof/ URL to download live profiles: 228 | 229 | ``` 230 | import _ "net/http/pprof" 231 | See the net/http/pprof package for more details. 232 | ``` 233 | 234 | ## M, P, G 235 | 236 | Question: How does concurrency work in Go? How is it different from threads? 237 | 238 | OS Layout 239 | 240 | ![OS Layout](./images/tracing/1-OS-process-and-its-threads.png) 241 | 242 | Goroutines on a Thread 243 | 244 | ![Goroutines on a Thread](./images/tracing/2-goroutines-on-a-thread.png) 245 | 246 | Goroutines on Blocking Thread 247 | 248 | ![Goroutines on Blocking Thread](./images/tracing/3-goroutines-on-a-blocking-thread.png) 249 | 250 | Concurrency and Parallelism 251 | 252 | ![Concurrency and Parallelism](./images/tracing/4-concurrency-and-parallelism.png) 253 | 254 | ## Tracing 255 | 256 | https://blog.gopheracademy.com/advent-2017/go-execution-tracer/ 257 | 258 | Ever wondered how are your goroutines being scheduled by the go runtime? Ever tried to understand why adding concurrency to your program has not given it better performance? The go execution tracer can help answer these and other questions to help you diagnose performance issues, e.g, latency, contention and poor parallelization. 259 | 260 | Data is collected by the tracer without any kind of aggregation or sampling. In some busy applications this may result in a large file. 261 | 262 | While the CPU profiler does a nice job to telling you what function is spending most CPU time, it does not help you figure out what is preventing a goroutine from running or how are the goroutines being scheduled on the available OS threads. That’s precisely where the tracer really shines. 263 | 264 | ### Ways to get a Trace 265 | 266 | Using the runtime/trace pkg 267 | This involved calling trace.Start and trace.Stop and was covered in our “Hello, Tracing” example. 268 | 269 | * Using -trace= test flag 270 | This is useful to collect trace information about code being tested and the test itself. 271 | 272 | ```code/tracing``` 273 | ``` 274 | go test -trace=a.out && go tool trace a.out 275 | ``` 276 | 277 | * Using debug/pprof/trace handler 278 | This is the best method to collect tracing from a running web application. 279 | 280 | ### View Trace 281 | 282 | ``` 283 | go tool trace trace_file.out 284 | ``` 285 | 286 | ![View Trace](./images/tracing/view-trace.png) 287 | 288 | 1. Timeline 289 | Shows the time during the execution and the units of time may change depending on the navigation. One can navigate the timeline by using keyboard shortcuts (WASD keys, just like video games). 290 | 2. Heap 291 | Shows memory allocations during the execution, this can be really useful to find memory leaks and to check how much memory the garbage collection is being able to free at each run. 292 | 3. Goroutines 293 | Shows how many goroutines are running and how many are runnable (waiting to be scheduled) at each point in time. A high number of runnable goroutines may indicate scheduling contention, e.g, when the program creates too many goroutines and is causing the scheduler to work too hard. 294 | 4. OS Threads 295 | Shows how many OS threads are being used and how many are blocked by syscalls. 296 | 5. Virtual Processors 297 | Shows a line for each virtual processor. The number of virtual processors is controlled by the GOMAXPROCS environment variable (defaulting to the number of cores). 298 | 6. Goroutines and events 299 | Displays where/what goroutine is running on each virtual processor. Lines connecting goroutines represent events. In the example image, we can see that the goroutine “G1 runtime.main” spawned two different goroutines: G6 and G5 (the former is the goroutine responsible for collecting the trace data and the latter is the one we started using the “go” keyword). 300 | A second row per processor may show additional events such as syscalls and runtime events. This also includes some work that the goroutine does on behalf of the runtime (e.g assisting the garbage collector). 301 | 302 | 303 | ### View Goroutine 304 | 305 | ![View Trace](./images/tracing/view-goroutine.png) 306 | 307 | This information includes: 308 | 309 | * Its “name” (Title) 310 | * When it started (Start) 311 | * Its duration (Wall Duration) 312 | * The stack trace when it started 313 | * The stack trace when it finished 314 | * Events generated by this goroutine 315 | 316 | ### Tracing Example 317 | 318 | ```code/tracing``` 319 | 320 | ``` 321 | func main() { 322 | f, _ := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 323 | trace.Start(f) 324 | defer trace.Stop() 325 | 326 | mergesortv1(s) 327 | 328 | } 329 | 330 | ``` 331 | 332 | ``` 333 | go run mergesort.go v1 && go tool trace v1.trace 334 | ``` 335 | 336 | ### Tracing GC 337 | 338 | The trace tool gives you a very good view into when the GC kicks in, when it is run, and how you could potentially optimize for it. 339 | 340 | ![Tracing GC](./images/tracing/tracing-gc.png) 341 | 342 | ### Tracing Conclusion 343 | The tracer is a powerful tool for debugging concurrency issues, e.g, contentions and logical races. But it does not solve all problems: it is not the best tool available to track down what piece of code is spending most CPU time or allocations. The go tool pprof is better suited for these use cases. 344 | 345 | The tool really shines when you want to understand the behavior of a program over time and to know what each goroutine is doing when NOT running. Collecting traces may have some overhead and can generate a high amount of data to be inspected. 346 | 347 | ## GOMAXPROCS 348 | 349 | Discussion: for a program to be more efficient should you have more threads/goroutines or less? 350 | 351 | Discussion: goroutines are kinda sorta similar to threads. So why don't we just use threads instead of goroutines? 352 | 353 | Threads typically take up more resources than goroutines - a minimum thread stack typically is upwards of 1MB. 354 | A goroutine typically starts of at 2kb. So, that's, at a very minimum, a reduction of 500x. Anything else though? 355 | 356 | Context switching in Linux is about 1000ns while in go it is about 200ns - https://eli.thegreenplace.net/2018/measuring-context-switching-and-memory-overheads-for-linux-threads/ 357 | 358 | A primary cost factor is contention. Programs that has parallelism does not necessarily have higher performance because of greater contention for resources. 359 | 360 | ### What is GOMAXPROCS? 361 | The GOMAXPROCS setting controls how many operating systems threads attempt to execute code simultaneously. For example, if GOMAXPROCS is 4, then the program will only execute code on 4 operating system threads at once, even if there are 1000 goroutines. The limit does not count threads blocked in system calls such as I/O. 362 | 363 | GOMAXPROCS can be set explicitly using the GOMAXPROCS environment variable or by calling runtime.GOMAXPROCS from within a program. 364 | 365 | ```code/gomaxprocs``` 366 | 367 | ``` 368 | func main() { 369 | fmt.Println("runtime.NumCPU()=", runtime.NumCPU()) 370 | } 371 | ``` 372 | 373 | On my quad-core CPU it prints: 374 | ``` 375 | runtime.NumCPU()= 8 376 | ``` 377 | 378 | Why is it showing 8 for NumCPU for a quad-core machine? The Intel chips on my machine is hyperthreaded - for each processor core that is physically present, the operating system addresses two virtual (logical) cores and shares the workload between them when possible. 379 | 380 | ### What should be the value of GOMAXPROCS? 381 | 382 | The default setting of GOMAXPROCS in all Go releases [up to 1.4] is 1, because programs with frequent goroutine switches ran much slower when using multiple threads. It is much cheaper to switch between two goroutines in the same thread than to switch between two goroutines in different threads. 383 | 384 | Goroutine scheduling affinity and other improvements to the scheduler have largely addressed the problem, by keeping goroutines that are concurrent but not parallel in the same thread. 385 | 386 | For Go 1.5, the default setting of GOMAXPROCS to the number of CPUs available, as determined by runtime.NumCPU. 387 | 388 | ### Running with different GOMAXPROCS 389 | 390 | ``` 391 | GOMAXPROCS=1 go run mergesort.go v1 & go tool trace v1.trace 392 | ``` 393 | 394 | ![GOMAXPROCS=1](./images/gomaxprocs/gomaxprocs-1.png) 395 | 396 | ``` 397 | GOMAXPROCS=8 go run mergesort.go v1 & go tool trace v1.trace 398 | ``` 399 | 400 | ![GOMAXPROCS=8](./images/gomaxprocs/gomaxprocs-8.png) 401 | 402 | ``` 403 | GOMAXPROCS=18 go run mergesort.go v1 & go tool trace v1.trace 404 | ``` 405 | 406 | ![GOMAXPROCS=18](./images/gomaxprocs/gomaxprocs-18.png) 407 | 408 | The number is the max possible and it is not required that the Go runtime create as many logical processors as you have specified. 409 | 410 | ### Exercise 411 | ```gocode/gomaxprocs``` 412 | 413 | Run the following and see the differences in the trace. 414 | 415 | ``` 416 | GOMAXPROCS=1 go run mergesort.go v2 && go tool trace v2.trace 417 | GOMAXPROCS=8 go run mergesort.go v2 && go tool trace v2.trace 418 | GOMAXPROCS=18 go run mergesort.go v2 && go tool trace v2.trace 419 | 420 | GOMAXPROCS=1 go run mergesort.go v3 && go tool trace v3.trace 421 | GOMAXPROCS=8 go run mergesort.go v3 && go tool trace v3.trace 422 | GOMAXPROCS=18 go run mergesort.go v3 && go tool trace v3.trace 423 | ``` 424 | 425 | ```Opt Tip: Do not assume that increasing the number of GOMAXPROCS always improves speed.``` 426 | 427 | ## GOGC 428 | 429 | Question: If GC is so important, can we adjust GC parameters? Can we change the GC algorithm? 430 | 431 | The GOGC variable sets the initial garbage collection target percentage. A collection is triggered when the ratio of freshly allocated data to live data remaining after the previous collection reaches this percentage. The default is GOGC=100. Setting GOGC=off disables the garbage collector entirely. The runtime/debug package's SetGCPercent function allows changing this percentage at run time. 432 | 433 | GOGC controls the aggressiveness of the garbage collector. 434 | 435 | Setting this value higher, say GOGC=200, will delay the start of a garbage collection cycle until the live heap has grown to 200% of the previous size. Setting the value lower, say GOGC=20 will cause the garbage collector to be triggered more often as less new data can be allocated on the heap before triggering a collection. 436 | 437 | With the introduction of the low latency collector in Go 1.5, phrases like “trigger a garbage collection cycle” become more fluid, but the underlying message that values of GOGC greater than 100 mean the garbage collector will run less often, and for values of GOGC less than 100, more often 438 | 439 | 440 | ### Exercise 441 | ```gocode/gogc``` 442 | 443 | Run the following and see the differences in the trace for heap and GC. 444 | 445 | ``` 446 | GOGC=off go run mergesort.go v1 & go tool trace v1.trace 447 | GOGC=50 go run mergesort.go v1 & go tool trace v1.trace 448 | GOGC=100 go run mergesort.go v1 & go tool trace v1.trace 449 | GOGC=200 go run mergesort.go v1 & go tool trace v1.trace 450 | ``` 451 | 452 | GOGC=off 453 | ![GOGC=off](./images/gogc/gogc-off.png) 454 | 455 | GOGC=50 456 | ![GOGC=50](./images/gogc/gogc-50.png) 457 | 458 | GOGC=100 459 | ![GOGC=100](./images/gogc/gogc-100.png) 460 | 461 | GOGC=200 462 | ![GOGC=200](./images/gogc/gogc-200.png) 463 | 464 | ```Opt Tip: This helps you analyze your GC patterns but I can't find any posts that recommend this as a good performance tuning strategy.``` 465 | 466 | 467 | ## Stack and Heap 468 | 469 | Discussion: where is the stack memory shown in a trace diagram? Is knowing stack and heap allocation important? How about in languages like Python, Java, JavaScript, etc.? 470 | 471 | ref: https://scvalex.net/posts/29/ 472 | 473 | ### Stack Frame 474 | ref: http://www.cs.uwm.edu/classes/cs315/Bacon/Lecture/HTML/ch10s07.html 475 | 476 | The stack frame, also known as activation record is the collection of all data on the stack associated with one subprogram call. 477 | 478 | The stack frame generally includes the following components: 479 | 480 | * The return address 481 | * Argument variables passed on the stack 482 | * Local variables (in HLLs) 483 | * Saved copies of any registers modified by the subprogram that need to be restored 484 | 485 | The Stack 486 | --------- 487 | 488 | ``` 489 | | f() | 490 | | | 491 | +---------------+ 492 | | func f(){ | \ 493 | | g() | } Stack frame of calling function f() 494 | | } | / 495 | +---------------+ 496 | | func g() { | \ 497 | | a := 10 | } Stack frame of called function: g() 498 | | } | / 499 | +---------------+ 500 | ================= // invalid below this 501 | ``` 502 | 503 | As the function call returns, the stack unwinds leaving previous stack frames invalid. 504 | 505 | ``` 506 | | f() | 507 | | | 508 | +---------------+ 509 | | func f(){ | \ 510 | | g() | } Stack frame of calling function f() 511 | | } | / 512 | +---------------+ 513 | ================= // invalid below this 514 | | func g() { | \ 515 | | a := 10 | } Stack frame of called function: g() 516 | | | / 517 | +---------------+ 518 | ``` 519 | 520 | All local variables are no more accessible. In C, returning a pointer to a local variable would cause a segmentation fault. 521 | 522 | ``` 523 | // online c editor - https://onlinegdb.com/HySykSJoE 524 | 525 | #include 526 | 527 | int* f() { 528 | int a; 529 | a = 10; 530 | return &a; 531 | } 532 | 533 | void main() 534 | { 535 | int* p = f(); 536 | printf("p is: %x\n", p); // p is 0 537 | printf("p is: %d\n", p); // segmentation fault 538 | 539 | // 540 | } 541 | ``` 542 | 543 | ## Escape Analysis 544 | 545 | In C, returning the reference of a local variable causes a segfault because that memory is no more valid. 546 | 547 | ``` 548 | // online c editor - https://onlinegdb.com/HySykSJoE 549 | 550 | #include 551 | 552 | int* f() { 553 | int a; 554 | a = 10; 555 | return &a; 556 | } 557 | 558 | void main() 559 | { 560 | int* p = f(); 561 | printf("p is: %x\n", p); // p is 0 562 | printf("p is: %d\n", p); // segmentation fault 563 | 564 | // 565 | } 566 | ``` 567 | 568 | In Go, it is allowed to return the reference of a local variable. 569 | 570 | ``` 571 | package main 572 | 573 | import ( 574 | "fmt" 575 | ) 576 | 577 | func f() int { 578 | x := 10 579 | return &x 580 | } 581 | 582 | func main() { 583 | fmt.Println(f()) // prints 10 584 | } 585 | ``` 586 | 587 | How is that possible? 588 | 589 | From Effective Go: "Note that, unlike in C, it’s perfectly OK to return the address of a local variable; the storage associated with the variable survives after the function returns." 590 | 591 | "When possible, the Go compilers will allocate variables that are local to a function in that function’s stack frame. However, if the compiler cannot prove that the variable is not referenced after the function returns, then the compiler must allocate the variable on the garbage-collected heap to avoid dangling pointer errors. In the current compilers, if a variable has its address taken, that variable is a candidate for allocation on the heap. However, a basic escape analysis recognizes some cases when such variables will not live past the return from the function and can reside on the stack." 592 | 593 | Can we figure out when variables escape to the heap? 594 | 595 | ``` 596 | // go build -gcflags='-m' 1.go 597 | // go build -gcflags='-m -l' 1.go to avoid inlining 598 | // go build -gcflags='-m -l -m' 1.go for verbose comments. 599 | ``` 600 | 601 | ``` 602 | func f() { 603 | var i = 5 604 | i++ 605 | _ = i 606 | } 607 | 608 | func main() { 609 | f() 610 | } 611 | ``` 612 | 613 | ``` 614 | $ go build -gcflags='-m -l -m' 1.go 615 | // returns nothing. 616 | ``` 617 | 618 | ``` 619 | func f_returns() int { 620 | var i = 5 621 | i++ 622 | return i 623 | } 624 | 625 | func main() { 626 | f_returns() 627 | } 628 | ``` 629 | 630 | ``` 631 | $ go build -gcflags='-m -l -m' 1.go 632 | // returns nothing. 633 | ``` 634 | 635 | ``` 636 | func f_returns_ptr() int { 637 | var i = 5 638 | i++ 639 | return &i 640 | } 641 | 642 | func main() { 643 | f_returns_ptr() 644 | } 645 | ``` 646 | 647 | ``` 648 | $ go build -gcflags='-m -l -m' 1.go 649 | # command-line-arguments 650 | ./1.go:24:9: &i escapes to heap 651 | ./1.go:24:9: from ~r0 (return) at ./1.go:24:2 652 | ./1.go:22:6: moved to heap: 653 | ``` 654 | 655 | Once the variable is on the heap, there is pressure on the Garbage Collector. 656 | 657 | Garbage collection is a convenient feature of Go - automatic memory management makes code cleaner and memory leaks less likely. However, GC also adds overhead as the program periodically needs to stop and collect unused objects. The Go compiler is smart enough to automatically decide whether a variable should be allocated on the heap, where it will later need to be garbage collected, or whether it can be allocated as part of the stack frame of the function which declared it. Stack-allocated variables, unlike heap-allocated variables, don’t incur any GC overhead because they’re destroyed when the rest of the stack frame is destroyed - when the function returns. 658 | 659 | To perform escape analysis, Go builds a graph of function calls at compile time, and traces the flow of input arguments and return values. 660 | 661 | However, if there are variables to be shared, it is appropriate for it to be on the heap. 662 | 663 | ```Tip: If you’ve profiled your program’s heap usage and need to reduce GC time, there may be some wins from moving frequently allocated variables off the heap. ``` 664 | 665 | See: https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/ 666 | See: http://www.agardner.me/golang/garbage/collection/gc/escape/analysis/2015/10/18/go-escape-analysis.html 667 | 668 | 669 | ## Inlining 670 | 671 | "By default, the inliner will try to inline leaf function (doesn't call other functions/method/interfaces) that doesn't call panic or recover or select or switch or create closure or go/defer functions (see example below) and which is less than 40 nodes when represented (roughly corresponding to 40 simple operations). But please beware that this only describes the current status quo of the gc compiler, and it will surely improve in the future. Thus please try not to depend on this." 672 | 673 | Inlining is done by the compiler automatically and there is no way to indicate that the piece of code should be inlined. Sometimes, your performance benchmarks might look suspiciously skewed because the code has been inlined. Then use the ```-gcflags="-m"``` option to see if the compiler has inlined your code. 674 | ``` 675 | // code/inline/inline.go 676 | package main 677 | 678 | import "fmt" 679 | 680 | func f() int { 681 | return 2 682 | } 683 | 684 | func main() { 685 | x := f() 686 | fmt.Println(x) 687 | } 688 | ``` 689 | 690 | ``` 691 | $ go build -gcflags="-m" inline.go 692 | # command-line-arguments 693 | ./inline.go:5:6: can inline f 694 | ./inline.go:9:6: can inline main 695 | ./inline.go:10:8: inlining call to f 696 | ./inline.go:11:13: inlining call to fmt.Println 697 | ./inline.go:11:13: x escapes to heap 698 | ./inline.go:11:13: io.Writer(os.Stdout) escapes to heap 699 | ./inline.go:11:13: main []interface {} literal does not escape 700 | :1: os.(File).close .this does not escape 701 | ``` 702 | 703 | 704 | ## Parallelize CPU work 705 | 706 | ### Queueing theory 707 | Ref: [What happens when you add another teller? - by John D. Cook](https://www.johndcook.com/blog/2008/10/21/what-happens-when-you-add-a-new-teller/) 708 | 709 | "Suppose a small bank has only one teller. Customers take an average of 10 minutes to serve and they arrive at the rate of 5.8 per hour. What will the expected waiting time be? What happens if you add another teller? 710 | 711 | We assume customer arrivals and customer service times are random. With only one teller, customers will have to wait nearly five hours on average before they are served. But if you add a second teller, the average waiting time is not just cut in half; it goes down to about 3 minutes. The waiting time is reduced by a factor of 93x." 712 | 713 | When the work can be parallelized without too much synchronization, taking advantage of all available cores can speed up execution linearly to the number of physical cores. 714 | 715 | ```code/parallelize/rand_strings_test.go``` 716 | 717 | ``` 718 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 719 | 720 | func RandString(n int) string { 721 | b := make([]rune, n) 722 | for i := range b { 723 | b[i] = letters[rand.Intn(len(letters))] 724 | } 725 | time.Sleep(10 * time.Microsecond) 726 | return string(b) 727 | } 728 | 729 | func RandString_Sequential() { 730 | for i := 0; i < 1000; i++ { 731 | s = append(s, RandString(100)) 732 | } 733 | } 734 | 735 | func RandString_Concurrent() { 736 | for i := 0; i < 1000; i++ { 737 | go func() { 738 | s = append(s, RandString(100)) 739 | }() 740 | } 741 | } 742 | 743 | ``` 744 | 745 | ``` 746 | $ go test -bench=Sequential rand_strings_test.go 747 | Benchmark_Concurrent-8 200 26,936,125 ns/op 748 | ``` 749 | 750 | ``` 751 | $ go test -bench=Concurrent rand_strings_test.go 752 | Benchmark_Concurrent-8 50 9,422,900 ns/op 753 | ``` 754 | 755 | Running the code concurrently has given you a 3x performance improvement. 756 | 757 | Now run it with the //time.Sleep commented out. 758 | 759 | ``` 760 | $ go test -bench=Sequential rand_strings_test.go 761 | Benchmark_Sequential-8 500 3,865,565 ns/op 762 | ``` 763 | 764 | ``` 765 | ok command-line-arguments 2.354s 766 | $ go test -bench=Concurrent rand_strings_test.go 767 | Benchmark_Concurrent-8 200 9,539,612 ns/op 768 | ok command-line-arguments 2.991s 769 | ``` 770 | 771 | Now we see a 3x drop in performance! 772 | 773 | Consider tight loops. Tight loops do not allow the runtime scheduler to schedule goroutines efficiently. 774 | 775 | But consider contention. If concurrent lines of work are stuck waiting for common resources, you're going to have worse performance. 776 | 777 | ```Tip: Concurrency is good. But have 'mechanical sympathy'.``` 778 | 779 | 780 | ## Bounds Check Elimination 781 | 782 | Let's compare what the compiled output of these two fairly similar programs are. 783 | 784 | ``` 785 | // a.go 786 | 3 func a(a []int) { 787 | 4 n := 6 788 | 5 _ = a[n] 789 | 6 } 790 | ``` 791 | 792 | ``` 793 | // b.go 794 | 3 func b(b [5]int) { 795 | 4 n := len(b) - 1 796 | 5 _ = b[n] 797 | 6 } 798 | ``` 799 | 800 | ``` 801 | $ go tool compile -S a.go > a.co 802 | $ go tool compile -S b.go > b.co 803 | $ vimdiff a.co b.co 804 | ``` 805 | 806 | ``` 807 | "".a STEXT nosplit size=39 args=0x18 locals=0x8 808 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $8-24 809 | (a.go:3) SUBQ $8, SP 810 | (a.go:3) MOVQ BP, (SP) 811 | (a.go:3) LEAQ (SP), BP 812 | (a.go:3) FUNCDATA $0, gclocals·1a65... 813 | (a.go:3) FUNCDATA $1, gclocals·69c1... 814 | (a.go:3) FUNCDATA $3, gclocals·33cd... 815 | (a.go:5) PCDATA $2, $0 816 | (a.go:5) PCDATA $0, $1 817 | (a.go:5) MOVQ "".a+24(SP), AX 818 | (a.go:5) CMPQ AX, $6 819 | (a.go:5) JLS 32 820 | (a.go:6) PCDATA $2, $-2 821 | (a.go:6) PCDATA $0, $-2 822 | (a.go:6) MOVQ (SP), BP 823 | (a.go:6) ADDQ $8, SP 824 | (a.go:6) RET 825 | (a.go:5) PCDATA $2, $0 826 | (a.go:5) PCDATA $0, $1 827 | (a.go:5) CALL runtime.panicindex(SB) 828 | (a.go:5) UNDEF 829 | 0x0000 48 83 ec 08 48 89 2c 24 48 8d 2c 24 48 8b 44 24 H...H.,$H.,$H.D$ 830 | 0x0010 18 48 83 f8 06 76 09 48 8b 2c 24 48 83 c4 08 c3 .H...v.H.,$H.... 831 | 0x0020 e8 00 00 00 00 0f 0b ....... 832 | rel 33+4 t=8 runtime.panicindex+0 833 | ``` 834 | 835 | ``` 836 | // b.co 837 | "".b STEXT nosplit size=1 args=0x28 locals=0x0 838 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $0-40 839 | (b.go:3) FUNCDATA $0, gclocals·33cd... 840 | (b.go:3) FUNCDATA $1, gclocals·33cd... 841 | (b.go:3) FUNCDATA $3, gclocals·33cd... 842 | (b.go:6) RET 843 | ``` 844 | 845 | There seems to be way more happening in a.go than in b.go - about 20+ lines more, which seems surprising. 846 | 847 | A little too much though. That's probably because of optimizations by the compiler. Let's remove those with the -N option. 848 | 849 | ``` 850 | $ go tool compile -S -N a.go > a.co 851 | $ go tool compile -S -N b.go > b.co 852 | $ vimdiff a.co b.co 853 | ``` 854 | 855 | ``` 856 | "".a STEXT nosplit size=49 args=0x18 locals=0x10 857 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $16-24 858 | (a.go:3) SUBQ $16, SP 859 | (a.go:3) MOVQ BP, 8(SP) 860 | (a.go:3) LEAQ 8(SP), BP 861 | (a.go:3) FUNCDATA $0, gclocals·1a65... 862 | (a.go:3) FUNCDATA $1, gclocals·69c1... 863 | (a.go:3) FUNCDATA $3, gclocals·33cd... 864 | (a.go:4) PCDATA $2, $0 865 | (a.go:4) PCDATA $0, $0 866 | (a.go:4) MOVQ $6, "".n(SP) 867 | (a.go:5) PCDATA $0, $1 868 | (a.go:5) CMPQ "".a+32(SP), $6 869 | (a.go:5) JHI 32 870 | (a.go:5) JMP 42 871 | (a.go:6) PCDATA $2, $-2 872 | (a.go:6) PCDATA $0, $-2 873 | (a.go:6) MOVQ 8(SP), BP 874 | (a.go:6) ADDQ $16, SP 875 | (a.go:6) RET 876 | (a.go:5) PCDATA $2, $0 877 | (a.go:5) PCDATA $0, $1 878 | (a.go:5) CALL runtime.panicindex(SB) 879 | (a.go:5) UNDEF 880 | 0x0000 48 83 ... 881 | 0x0010 04 24 ... 882 | 0x0020 48 8b ... 883 | 0x0030 0b 884 | rel 43+4 t=8 runtime.panicindex+0 885 | ``` 886 | 887 | ``` 888 | "".b STEXT nosplit size=34 args=0x28 locals=0x10 889 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $16-40 890 | (b.go:3) SUBQ $16, SP 891 | (b.go:3) MOVQ BP, 8(SP) 892 | (b.go:3) LEAQ 8(SP), BP 893 | (b.go:3) FUNCDATA $0, gclocals·33cd... 894 | (b.go:3) FUNCDATA $1, gclocals·33cd... 895 | (b.go:3) FUNCDATA $3, gclocals·33cd... 896 | (b.go:4) PCDATA $2, $0 897 | (b.go:4) PCDATA $0, $0 898 | (b.go:4) MOVQ $4, "".n(SP) 899 | (b.go:5) JMP 24 900 | (b.go:6) PCDATA $2, $-2 901 | (b.go:6) PCDATA $0, $-2 902 | (b.go:6) MOVQ 8(SP), BP 903 | (b.go:6) ADDQ $16, SP 904 | (b.go:6) RET 905 | 0x0000 48 83 ... 906 | 0x0010 04 24 ... 907 | 0x0020 10 c3 908 | ``` 909 | 910 | Even without the optimizations, there are more instructions that the CPU has to run in the case of a.go {n:=6} more than b.go {n:=len(b)-1}. 911 | 912 | There are some interesting differences between the two. The {n:=6} version has a compare statement (CMPQ) and panic statements (runtime.panicindex) while the other version does not have them. 913 | 914 | Let's also compile both with another option and see if we get any clues there. 915 | 916 | ``` 917 | $ go tool compile -d=ssa/check_bce/debug=1 a.go 918 | a.go:5:7: Found IsInBounds 919 | 920 | $ go tool compile -d=ssa/check_bce/debug=1 b.go 921 | ``` 922 | 923 | So, the compile tool shows no output with this option for b.go while a.go says "Found IsInBounds" at line number 5 (\_ = a[n]). 924 | 925 | ### BCE Definition 926 | From Wikipedia: bounds-checking elimination is a compiler optimization useful in programming languages or runtimes that enforce bounds checking, the practice of checking every index into an array to verify that the index is within the defined valid range of indexes. Its goal is to detect which of these indexing operations do not need to be validated at runtime, and eliminating those checks. 927 | 928 | When arrays and slices are being accessed, grow provides safety by checking that the index is valid. This implies additional instructions. A language like C does not have this check; instead it is upto the programmer to add it if required or not do it at their own risk. 929 | 930 | Go provides the check but is able to eliminate in certain cases when it is able to prove that the index being accessed is within the allowed range. 931 | 932 | In the function ```func a(a []int) { n := 6; _ = a[n] }```, Go is not able to prove at compile time that the index 6 will be in the slice that is passed. However, in the function ```func b(b [5]int) { n := len(b) - 1; _ = b[n] }```, it is guaranted that the index will be within the length of the array of size 5. Thus Go is able to optimize by eliminating the bounds check. 933 | 934 | Exercise: What if we passed a slice into b.go instead of an array. Is there a bounds check still? Why or why not? 935 | See c.go 936 | 937 | ``` 938 | 3 func c(b []int) { 939 | 4 n := len(b) - 1 940 | 5 _ = b[n] 941 | 6 } 942 | ``` 943 | 944 | ``` 945 | $ go tool compile -d=ssa/check_bce/debug=1 c.go 946 | c.go:5:7: Found IsInBounds 947 | ``` 948 | 949 | What is the bce output of the case below? will the compiler be able to eliminate the bounds check? 950 | 951 | ``` 952 | // d.go 953 | func d(b []byte) { 954 | for i := 0; i < len(b); i++ { 955 | b[i] = 9 956 | } 957 | } 958 | ``` 959 | 960 | ``` 961 | $ go tool compile -d=ssa/check_bce/debug=1 d.go 962 | ``` 963 | 964 | When it is definite that the index will not receive a value outside of its size (on either end), then bce can happen. 965 | 966 | ### Providing bce Hints 967 | 968 | Example 1 969 | 970 | ``` 971 | // e.go 972 | 3 func e(b []byte, n int) { 973 | 4 for i := 0; i < n; i++ { 974 | 5 b[i] = 9 975 | 6 } 976 | 7 } 977 | ``` 978 | 979 | ``` 980 | $ go tool compile -d=ssa/check_bce/debug=1 d.go 981 | d.go:5:8: Found IsInBounds 982 | ``` 983 | 984 | Give that this is running inside a loop, the bce will run as many times. Is there a way to reduce this? Probably something outside the loop and prior? 985 | 986 | ``` 987 | // f.go 988 | 3 func f(b []byte, n int) { 989 | 4 _ = b[n-1] 990 | 5 for i := 0; i < n; i++ { 991 | 6 b[i] = 9 992 | 7 } 993 | 8 } 994 | ``` 995 | 996 | ``` 997 | $ go tool compile -d=ssa/check_bce/debug=1 e.go 998 | e.go:4:7: Found IsInBounds 999 | ``` 1000 | 1001 | Having done the check once outside, we are able to eliminate the remaining checks in the loop. 1002 | 1003 | 1004 | How about this one? There are 4 bounds checks. Can we reduce them? 1005 | 1006 | Example 2 1007 | 1008 | ``` 1009 | // g.go 1010 | func g1(b []byte, v uint32) { 1011 | b[0] = byte(v + 48) // Found IsInBounds 1012 | b[1] = byte(v + 49) // Found IsInBounds 1013 | b[2] = byte(v + 50) // Found IsInBounds 1014 | b[3] = byte(v + 51) // Found IsInBounds 1015 | } 1016 | ``` 1017 | 1018 | ``` 1019 | // g.go 1020 | func g2(b []byte, v uint32) { 1021 | b[3] = byte(v + 51) // Found IsInBounds 1022 | b[0] = byte(v + 48) 1023 | b[1] = byte(v + 49) 1024 | b[2] = byte(v + 50) 1025 | } 1026 | ``` 1027 | 1028 | Example 3 1029 | 1030 | ``` 1031 | // h.go 1032 | func h1(b []byte, n int) { 1033 | b[n+0] = byte(1) // Found IsInBounds 1034 | b[n+1] = byte(2) // Found IsInBounds 1035 | b[n+2] = byte(3) // Found IsInBounds 1036 | b[n+3] = byte(4) // Found IsInBounds 1037 | b[n+4] = byte(5) // Found IsInBounds 1038 | b[n+5] = byte(6) // Found IsInBounds 1039 | } 1040 | ``` 1041 | 1042 | ``` 1043 | func h2(b []byte, n int) { 1044 | b = b[n : n+6] // Found IsSliceInBounds 1045 | b[0] = byte(1) 1046 | b[1] = byte(2) 1047 | b[2] = byte(3) 1048 | b[3] = byte(4) 1049 | b[4] = byte(5) 1050 | b[5] = byte(6) 1051 | } 1052 | ``` 1053 | 1054 | Example 4 1055 | 1056 | ``` 1057 | func i1(a, b, c []byte) { 1058 | for i := range a { 1059 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds 1060 | } 1061 | } 1062 | ``` 1063 | 1064 | ``` 1065 | func i2(a, b, c []byte) { 1066 | _ = b[len(a)-1] // Found IsInBounds 1067 | _ = c[len(a)-1] // Found IsInBounds 1068 | for i := range a { 1069 | a[i] = b[i] + c[i] 1070 | } 1071 | } 1072 | ``` 1073 | 1074 | ```Tip: bce helps shave off ns. Document your bce hints code.``` 1075 | 1076 | 1077 | ## sync.Pools 1078 | Pool's purpose is to cache allocated but unused items for later reuse, relieving pressure on the garbage collector. That is, it makes it easy to build efficient, thread-safe free lists. However, it is not suitable for all free lists. 1079 | 1080 | A Pool is a set of temporary objects that may be individually saved and retrieved. 1081 | 1082 | Any item stored in the Pool may be removed automatically at any time without notification. If the Pool holds the only reference when this happens, the item might be deallocated. 1083 | 1084 | A Pool is safe for use by multiple goroutines simultaneously. 1085 | 1086 | An appropriate use of a Pool is to manage a group of temporary items silently shared among and potentially reused by concurrent independent clients of a package. Pool provides a way to amortize allocation overhead across many clients. 1087 | 1088 | An example of good use of a Pool is in the fmt package, which maintains a dynamically-sized store of temporary output buffers. The store scales under load (when many goroutines are actively printing) and shrinks when quiescent. 1089 | 1090 | ``` 1091 | // 1_test.go 1092 | package main 1093 | 1094 | import ( 1095 | "bytes" 1096 | "testing" 1097 | ) 1098 | 1099 | func Benchmark_f1(b testing.B) { 1100 | for i := 0; i < b.N; i++ { 1101 | f1() 1102 | } 1103 | } 1104 | 1105 | func f1() { 1106 | s.Write([]byte("dirty")) 1107 | 1108 | return 1109 | } 1110 | 1111 | ``` 1112 | 1113 | ``` 1114 | $ go test -bench=f1 -benchmem 1115 | Benchmark_f1-8 30000000 43.5 ns/op 64 B/op 1 allocs/op 1116 | ``` 1117 | 1118 | ``` 1119 | // 2_test.go 1120 | package main 1121 | 1122 | import ( 1123 | "bytes" 1124 | "sync" 1125 | "testing" 1126 | ) 1127 | 1128 | var pool2 = sync.Pool{ 1129 | New: func() interface{} { 1130 | return &bytes.Buffer{} 1131 | }, 1132 | } 1133 | 1134 | func Benchmark_f2(b testing.B) { 1135 | for i := 0; i < b.N; i++ { 1136 | f2() 1137 | } 1138 | } 1139 | 1140 | func f2() { 1141 | // When getting from a Pool, you need to cast 1142 | s := pool2.Get().(bytes.Buffer) 1143 | // We write to the object 1144 | s.Write([]byte("dirty")) 1145 | // Then put it back 1146 | pool2.Put(s) 1147 | 1148 | return 1149 | } 1150 | 1151 | ``` 1152 | 1153 | ``` 1154 | $ go test -bench=f2 -benchmem 1155 | Benchmark_f2-8 50000000 38.2 ns/op 14 B/op 0 allocs/op 1156 | ``` 1157 | 1158 | ```Tip: Use sync.Pool is reduced your memory allocation pressure.``` 1159 | 1160 | ### Exercise: sync.Pool 1161 | A type of data (book) needs to be written to a json file. An ISBN number is added to new book ({title, author}) and written out to a file. Use sync.Pool to reduce allocations prior to writing. 1162 | See book1_test.go and book2_test.go 1163 | 1164 | ## sync.Once for Lazy Initialization 1165 | 1166 | When programs have costly resources being loaded, it helps to do that only once. 1167 | 1168 | In version 1 of our code, we have a template that needs to be parsed. This example template is currently being read from memory, but there are usually many templates and they are read from the file system which can be very slow. 1169 | 1170 | ```code/sync-once``` 1171 | 1172 | In the first naive example, we load the template each time as and when required. This is useful in that the template is loaded only when it is needed. 1173 | 1174 | ``` 1175 | // 1.go 1176 | var t template.Template 1177 | 1178 | func f() { 1179 | t = template.Must(template.New("").Parse(s)) 1180 | _ = t 1181 | 1182 | // do task with template 1183 | } 1184 | 1185 | func main() { 1186 | for i := 0; i < 10000; i++ { 1187 | f() 1188 | } 1189 | } 1190 | 1191 | ``` 1192 | 1193 | The time taken for this is about 0.637 seconds. Can we improve on this? 1194 | ``` 1195 | $ time go run 1.go 1196 | 1197 | real 0m0.637s 1198 | user 0m0.712s 1199 | sys 0m0.346s 1200 | ``` 1201 | 1202 | In version 1, we are re-parsing the template each time, which is unnecessary. In the second version, we load the template only once at the beginning of the program. 1203 | 1204 | ``` 1205 | // 2.go 1206 | func main() { 1207 | // costs time at load and maybe unused 1208 | t = template.Must(template.New("").Parse(s)) 1209 | _ = t 1210 | 1211 | for i := 0; i < 10000; i++ { 1212 | f() 1213 | } 1214 | } 1215 | ``` 1216 | 1217 | This works well, but doing all our initialization at the very beginning will slow down the program's start. It's often the case that there are many templates but all of them aren't needed or used immediately. This is not preferred when there are multiple copies running in Kubernetes pods and we expect scaling to be very quick 1218 | 1219 | ``` 1220 | time go run 2.go 1221 | 1222 | real 0m0.365s 1223 | user 0m0.376s 1224 | sys 0m0.198s 1225 | ``` 1226 | 1227 | In version 3 of our code, we use the sync.Once struct to ensure that the code is run once and only once and only at the instance it is first invoked, thus loading it 'lazily'. 1228 | 1229 | sync.Once is goroutine safe and will not be called simultaneously. 1230 | 1231 | ``` 1232 | // 3.go 1233 | var t template.Template 1234 | var o sync.Once 1235 | 1236 | func g() { 1237 | fmt.Println("within g()") 1238 | t = template.Must(template.New("").Parse(s)) 1239 | _ = t 1240 | } 1241 | 1242 | func f() { 1243 | // only done once and when used 1244 | o.Do(g) 1245 | 1246 | // do task with template 1247 | 1248 | } 1249 | 1250 | func main() { 1251 | for i := 0; i < 10000; i++ { 1252 | f() 1253 | } 1254 | } 1255 | ``` 1256 | 1257 | You can see that in our very simple program, the difference is not much. But in typical production code, such changes could have a considerable impact. 1258 | 1259 | ``` 1260 | time go run 3.go 1261 | within g() 1262 | 1263 | real 0m0.380s 1264 | user 0m0.392s 1265 | sys 0m0.209s 1266 | ``` 1267 | 1268 | ```Tip: Consider lazily loading your resources using sync.Once at time of first use.``` 1269 | 1270 | ## Arrays and Slices 1271 | 1272 | Discussion: what are the key characteristics of an array? 1273 | 1274 | In Go, the size of an array is a distinct part of an array. An array of one size cannot be assigned to an array of another size. 1275 | 1276 | ```code/slices``` 1277 | 1278 | ``` 1279 | var a [5]int 1280 | var b [6]int 1281 | b = a 1282 | ``` 1283 | 1284 | ``` 1285 | // compile error 1286 | cannot use a (type [5]int) as type [6]int in assignment 1287 | ``` 1288 | 1289 | In Go, arrays are immutable. You cannot append to (or delete from) an array. 1290 | 1291 | Slices are mutable. 1292 | They can also be assigned different size slices. Depending on the lengths of the source and target, there could be different behaviors. 1293 | 1294 | Slices can be made from arrays. 1295 | 1296 | ``` 1297 | var a [5]int 1298 | s := a[0:3] 1299 | s = a[:3] 1300 | s = a[3:] 1301 | ``` 1302 | 1303 | Slices point to an array. Always. Within that array, there is a beginning position and a count of contiguous items that the slice refers to. 1304 | 1305 | ``` 1306 | a := [5]int{1, 2, 3, 4, 5} 1307 | s := a[0:3] 1308 | s[0] = 11 1309 | fmt.Println(a, s) 1310 | ``` 1311 | 1312 | ``` 1313 | [11 2 3 4 5] 1314 | [11 2 3] 1315 | ``` 1316 | 1317 | So are the addresses of the array and slice the same? 1318 | ``` 1319 | fmt.Printf("%p %p\n", &a, &s) 1320 | ``` 1321 | 1322 | ``` 1323 | 0xc0000181b0 0xc00000c060 1324 | ``` 1325 | 1326 | The slice has its own data structure that points to the array. 1327 | So then are their element addresses the same? 1328 | 1329 | ``` 1330 | fmt.Printf("%p %p\n", &a[0], &s[0]) 1331 | ``` 1332 | 1333 | ``` 1334 | 0xc0000181b0 0xc0000181b0 1335 | ``` 1336 | 1337 | Yes, they are. A slice has no storage of its own; it merely points to the array. 1338 | 1339 | But why would it be designed that way? 1340 | 1341 | ``` 1342 | a := [5]int{1, 2, 3, 4, 5} 1343 | s := a[0:3] 1344 | fmt.Println(a, s) 1345 | 1346 | s = append(s, 9) 1347 | fmt.Println(a, s) 1348 | 1349 | s = append(s, 19) 1350 | fmt.Println(a, s) 1351 | ``` 1352 | 1353 | ``` 1354 | [1 2 3 4 5] [1 2 3] 1355 | [1 2 3 9 5] [1 2 3 9] 1356 | [1 2 3 9 19] [1 2 3 9 19] 1357 | ``` 1358 | 1359 | What happens when we breach the boundary? 1360 | 1361 | ``` 1362 | s = append(s, 99) 1363 | fmt.Println(a, s) 1364 | ``` 1365 | 1366 | ``` 1367 | [1 2 3 9 19] [1 2 3 9 19 99] 1368 | ``` 1369 | 1370 | Once the boundary is breached, the array remains the same. The slice is given new expanded memory elsewhere backed by a new array. 1371 | 1372 | Why would we do it this way? 1373 | Because, memory allocation is very costly. Go allows you to pre-allocate memory to avoid runtime performance cost of repeatedly allocating new memory. Repeated reallocation also adds significant GC pressure. 1374 | 1375 | You can pre-allocate the expected size (capacity) of the slice using make. 1376 | 1377 | ``` 1378 | months := make([]int, 0, 12) 1379 | months = append(months, 1) 1380 | months = append(months, 7) 1381 | ``` 1382 | 1383 | You can also pre-allocate the size of the slice after a slicing operation by providing a third parameter. 1384 | 1385 | ``` 1386 | s := a[0:3:12] 1387 | ``` 1388 | 1389 | ```Tip: Pre-allocating slices to expected sizes can significantly increase performance.``` 1390 | 1391 | 1392 | ## String Concatenation 1393 | 1394 | Strings are like an array of characters. They are immutable. Concatenating strings with the + operator causes constant reallocation and GC pressure. 1395 | 1396 | There are two options in the std lib: bytes.Buffer and strings.Builder. Which would you guess performs better? 1397 | 1398 | ``` 1399 | for n := 0; n < b.N; n++ { 1400 | str += "x" 1401 | // vs 1402 | buffer.WriteString("x") 1403 | // vs 1404 | builder.WriteString("x") 1405 | } 1406 | ``` 1407 | 1408 | ``` 1409 | BenchmarkConcatString-8 10000000 128 ns/op 1410 | BenchmarkConcatBuffer-8 200000000 9.54 ns/op 1411 | BenchmarkConcatBuilder-8 1000000000 2.63 ns/op 1412 | ``` 1413 | 1414 | In earlier benchmarks I see on the net, both bytes.Buffer and strings.Builder performed approximately similar. But looks like strings.Builder has been further optimized since then. 1415 | 1416 | ```Tip: Use strings.Builder > bytes.Buffer > string concatenation.``` 1417 | 1418 | 1419 | ## Map Keys: int vs string 1420 | 1421 | Which do you think would be faster? 1422 | Pretty obvious, I guess. 1423 | 1424 | ```code/map-access``` 1425 | 1426 | ``` 1427 | key := strconv.Itoa(rand.Intn(NumItems)) 1428 | //vs 1429 | key := rand.Intn(NumItems) 1430 | ``` 1431 | 1432 | ``` 1433 | BenchmarkMapStringKeys-8 20000000 109 ns/op 1434 | BenchmarkMapIntKeys-8 20000000 53.5 ns/op 1435 | ``` 1436 | 1437 | Will the time change if the string is longer? 1438 | 1439 | ``` 1440 | key := strconv.Itoa(rand.Intn(NumItems)) 1441 | key += ` is the key value that is being used. ` 1442 | //vs 1443 | key := rand.Intn(NumItems) 1444 | 1445 | ``` 1446 | 1447 | ``` 1448 | BenchmarkMapStringKeys-8 10000000 120 ns/op 1449 | BenchmarkMapIntKeys-8 30000000 56.9 ns/op 1450 | ``` 1451 | 1452 | Apparently it does. 1453 | 1454 | 1455 | ``` 1456 | key := strconv.Itoa(rand.Intn(NumItems)) 1457 | key += ` is the key value that is being used and a shakespeare sonnet. ` + sonnet106 1458 | //vs 1459 | key := rand.Intn(NumItems) 1460 | ``` 1461 | 1462 | ``` 1463 | BenchmarkMapStringKeys-8 10000000 246 ns/op 1464 | BenchmarkMapIntKeys-8 30000000 50.4 ns/op 1465 | ``` 1466 | 1467 | I found that the map access time taken for longer key strings is longer. 1468 | 1469 | ```Tip: use int types instead of string types in maps. If strings have to be used, use shorter strings.``` 1470 | 1471 | 1472 | ## JSON Unmarshaling 1473 | 1474 | JSON Unmarshaling uses reflection, which is not very efficient. It is convenient and straightforward though. 1475 | 1476 | easyjson is an external tool that can be used to generate code. Generating code means that you don't need to write or maintain the code. You will have more (generated) code at compile time, but your code performance could be higher. 1477 | 1478 | ``` 1479 | //main.go 1480 | 1481 | //easyjson:json 1482 | type JSONData struct { 1483 | Data []string 1484 | } 1485 | 1486 | func unmarshaljsonFn() { 1487 | var j JSONData 1488 | json.Unmarshal([]byte(`{"Data" : ["One", "Two", "Three"]} `), &j) 1489 | } 1490 | 1491 | func easyjsonFn() { 1492 | d := &JSONData{} 1493 | d.UnmarshalJSON([]byte(`{"Data" : ["One", "Two", "Three"]} `)) 1494 | } 1495 | ``` 1496 | 1497 | ``` 1498 | $ go get -u github.com/mailru/easyjson/... 1499 | 1500 | $ easyjson -all main.go 1501 | // this generates a file called main_easyjson.go 1502 | ``` 1503 | 1504 | ``` 1505 | go test -bench=. -benchmem 1506 | Benchmark_unmarshaljson-8 2000000 981 ns/op 344 B/op 9 allocs/op 1507 | Benchmark_easyjson-8 5000000 350 ns/op 124 B/op 5 allocs/op 1508 | ``` 1509 | 1510 | ```Tip: if data serialization/deserialization is common, see if you can avoid reflection and interfaces. Generate code at build time instead to reduce performance cost at run time.``` 1511 | 1512 | 1513 | ## File I/O 1514 | 1515 | User buffered I/O, shortened to buffering or buffered I/O, refers to the technique of temporarily storing the results of an I/O operation in user-space before transmitting it to the kernel (in the case of writes) or before providing it to your process (in the case of reads). By so buffering the data, you can minimize the number of system calls and can block-align I/O operations, which may improve the performance of your application. 1516 | 1517 | For example, consider a process that writes one character at a time to a file. This is obviously inefficient: Each write operation corresponds to a write() system call, which means a trip into the kernel, a memory copy (of a single byte!), and a return to user-space, only to repeat the whole ordeal. Worse, filesystems and storage media work in terms of blocks; operations are fastest when aligned to integer multiples of those blocks. Misaligned operations, particularly very small ones, incur additional overhead. 1518 | 1519 | You want unbuffered output whenever you want to ensure that the output has been written before continuing. One example is standard error under a C runtime library - this is usually unbuffered by default. Since errors are (hopefully) infrequent, you want to know about them immediately. On the other hand, standard output is buffered simply because it's assumed there will be far more data going through it. 1520 | 1521 | In addition, it's not just system calls that are minimized but disk I/O as well. Let's say a program reads a file one byte at a time. With unbuffered input, you will go out to the (relatively very slow) disk for every byte even though it probably has to read in a whole block anyway (the disk hardware itself may have buffers but you're still going out to the disk controller which is going to be slower than in-memory access). By buffering, the whole block is read in to the buffer at once then the individual bytes are delivered to you from the (in-memory, incredibly fast) buffer area. 1522 | 1523 | Buffering can take many forms, such as in the following example: 1524 | 1525 | ``` 1526 | +-------------------+-------------------+ 1527 | | Process A | Process B | 1528 | +-------------------+-------------------+ 1529 | | C runtime library | C runtime library | C RTL buffers 1530 | +-------------------+-------------------+ 1531 | | OS caches | Operating system buffers 1532 | +---------------------------------------+ 1533 | | Disk controller hardware cache | Disk hardware buffers 1534 | +---------------------------------------+ 1535 | | Disk | 1536 | +---------------------------------------+ 1537 | ``` 1538 | 1539 | ``` 1540 | f, _ := os.Create("/tmp/test.txt") 1541 | for i := 0; i < 100000; i++ { 1542 | f.WriteString("some text!\n") 1543 | } 1544 | 1545 | // vs 1546 | 1547 | f, _ := os.Create("/tmp/test.txt") 1548 | w := bufio.NewWriter(f) 1549 | for i := 0; i < 100000; i++ { 1550 | w.WriteString("some text!\n") 1551 | } 1552 | 1553 | ``` 1554 | 1555 | ``` 1556 | BenchmarkWriteFile-8 2 882,154,299 ns/op 1557 | BenchmarkWriteFileBuffered-8 300 4,666,152 ns/op 1558 | 1559 | BenchmarkReadFile-8 3 337,684,006 ns/op 1560 | BenchmarkReadFileBuffered-8 200 6,820,032 ns/op 1561 | ``` 1562 | 1563 | ``Tip: use buffered reads and writes.``` 1564 | 1565 | 1566 | ## Regexp Compilation 1567 | 1568 | Regular expressions are costly. Where possible, avoid them. Where you have to have them, compile them once prior. 1569 | Also consider using this with sync.Once. 1570 | 1571 | ``` 1572 | for i:=0; i< b.N; i++ { 1573 | regexp.MatchString(testRegexp, "jsmith@example.com") 1574 | } 1575 | 1576 | // vs 1577 | 1578 | r, _ := regexp.Compile(testRegexp) 1579 | for i:=0; i< b.N; i++ { 1580 | r.MatchString("jsmith@example.com") 1581 | } 1582 | ``` 1583 | 1584 | ``` 1585 | BenchmarkMatchString-8 200000 7195 ns/op 1586 | BenchmarkMatchStringCompiled-8 2000000 630 ns/op 1587 | ``` 1588 | 1589 | ```Tip: take pre-compiled options in regex, sql prepared statements, etc.``` 1590 | 1591 | ## Defer 1592 | 1593 | Defer does additional work for you and therefore it is not as fast as straight-line code. 1594 | 1595 | ```code/defer``` 1596 | 1597 | ``` 1598 | func (t T) CounterA() int64 { 1599 | t.mu.Lock() 1600 | defer t.mu.Unlock() 1601 | return t.n 1602 | } 1603 | 1604 | func (t T) CounterB() (count int64) { 1605 | t.mu.Lock() 1606 | count = t.n 1607 | t.mu.Unlock() 1608 | return 1609 | } 1610 | 1611 | func (t T) IncreaseA() { 1612 | t.mu.Lock() 1613 | defer t.mu.Unlock() 1614 | t.n++ 1615 | } 1616 | 1617 | func (t T) IncreaseB() { 1618 | t.mu.Lock() 1619 | t.n++ // this line will not panic for sure 1620 | t.mu.Unlock() 1621 | } 1622 | ``` 1623 | 1624 | Up to now (Go 1.12), for the official Go compiler, deferred function calls will cause a few performance losses at run time. 1625 | 1626 | ``` 1627 | Benchmark_CounterA-8 30000000 52.9 ns/op 1628 | Benchmark_CounterB-8 100000000 18.9 ns/op 1629 | Benchmark_IncreaseA-8 30000000 51.9 ns/op 1630 | Benchmark_IncreaseB-8 100000000 19.3 ns/op 1631 | ``` 1632 | 1633 | ```Tip: where performance is a consideration and code is unlikely to do panic/recover, see if defers can be replaced.``` 1634 | 1635 | ## fmt vs strconv 1636 | 1637 | fmt takes all parameters as interface{}. It is always faster to give more definite types that don't need to be reflected on or asserted. 1638 | 1639 | ``` 1640 | // fmt/main_test.go 1641 | func fmtFn(i int) string { 1642 | return fmt.Sprintf("%d", i) 1643 | } 1644 | 1645 | func Benchmark_fmtFn(b testing.B) { 1646 | for i := 0; i < b.N; i++ { 1647 | fmtFn(1234) 1648 | } 1649 | } 1650 | ``` 1651 | 1652 | ``` 1653 | $ go test -bench=. -benchmem 1654 | Benchmark_fmtFn-8 20000000 100 ns/op 16 B/op 2 allocs/op 1655 | Benchmark_strconvFn-8 50000000 31.2 ns/op 4 B/op 1 allocs/op 1656 | ``` 1657 | 1658 | It can increase the number of allocations needed. Passing a non-pointer type as an interface{} usually causes heap allocations. [ref](https://stephen.sh/posts/quick-go-performance-improvements) 1659 | 1660 | ```Tip: consider using functions that take specific data types as opposed to an empty interface, e.g. strconv functions as opposed to fmt.Sprintf.``` 1661 | 1662 | ## Explicitly Set Derived Values 1663 | 1664 | Certain parts of the standard library or external libraries might derive information that is not explicitly set. Using benchmarking, flame-graphs, etc., figure out whether explicitly setting values could avoid the process of deriving it. 1665 | 1666 | ``` 1667 | // responsewriter/main_test.go 1668 | func withoutSetHeader(w http.ResponseWriter, r http.Request) { 1669 | fmt.Fprintln(w, "hello, stranger") 1670 | } 1671 | 1672 | func withSetHeader(w http.ResponseWriter, r http.Request) { 1673 | w.Header().Set("Content-Type", "text/plain") 1674 | fmt.Fprintln(w, "hello, stranger") 1675 | } 1676 | ``` 1677 | 1678 | ``` 1679 | $ go test -bench=. 1680 | goos: darwin 1681 | goarch: amd64 1682 | Benchmark_withoutSetHeader-8 1000000 1664 ns/op 1683 | Benchmark_withSetHeader-8 1000000 1183 ns/op 1684 | ``` 1685 | 1686 | If the header is not set for "Content-Type", then the behaviour of the write (Fprintln) to the ResponseWriter is to parse the data to derive the content type. By explicitly setting the Content type, we're able to increase the performance. 1687 | 1688 | ```Tip: Look at implementation to see if otherwise derived values can be set in advance.``` 1689 | 1690 | 1691 | ## Go Performance Patterns 1692 | When application performance is a critical requirement, the use of built-in or third-party packages and methods should be considered carefully. The cases when a compiler can optimize code automatically are limited. The Go Performance Patterns are benchmark- and practice-based recommendations for choosing the most efficient package, method or implementation technique. 1693 | 1694 | Some points may not be applicable to a particular program; the actual performance optimization benefits depend almost entirely on the application logic and load. 1695 | 1696 | ### Make multiple I/O operations asynchronous 1697 | Network and file I/O (e.g. a database query) is the most common bottleneck in I/O-bound applications. Making independent I/O operations asynchronous, i.e. running in parallel, can improve downstream latency. Use sync.WaitGroup to synchronize multiple operations. 1698 | 1699 | ### Avoid memory allocation in hot code 1700 | Object creation not only requires additional CPU cycles, but will also keep the garbage collector busy. It is a good practice to reuse objects whenever possible, especially in program hot spots. You can use sync.Pool for convenience. See also: Object Creation Benchmark 1701 | 1702 | ### Favor lock-free algorithms 1703 | Synchronization often leads to contention and race conditions. Avoiding mutexes whenever possible will have a positive impact on efficiency as well as latency. Lock-free alternatives to some common data structures are available (e.g. Circular buffers). 1704 | 1705 | ### Use read-only locks 1706 | The use of full locks for read-heavy synchronized variables will unnecessarily make reading goroutines wait. Use read-only locks to avoid it. 1707 | 1708 | ### Use buffered I/O 1709 | Disks operate in blocks of data. Accessing disk for every byte is inefficient; reading and writing bigger chunks of data greatly improves the speed. 1710 | 1711 | ### Use StringBuffer or StringBuilder instead of += operator 1712 | A new string is allocated on every assignment, which is inefficient and should be avoided. See also: String Concatenation Benchmark. 1713 | 1714 | ### Use compiled regular expressions for repeated matching 1715 | It is inefficient to compile the same regular expression before every matching. While obvious, it is often overlooked. See also: Regexp Benchmark. 1716 | 1717 | ### Preallocate slices 1718 | Go manages dynamically growing slices intelligently; it allocates twice as much memory every time the current capacity is reached. During re-allocation, the underlying array is copied to a new location. To avoid copying the memory and occupying garbage collection, preallocate the slice fully whenever possible. See also: Slice Appending Benchmark. 1719 | 1720 | ### Use Protocol Buffers or MessagePack instead of JSON and Gob 1721 | JSON and Gob use reflection, which is relatively slow due to the amount of work it does. Although Gob serialization and deserialization is comparably fast, though, and may be preferred as it does not require type generation. 1722 | 1723 | ### Use int keys instead of string keys for maps 1724 | If the program relies heavily on maps, using int keys might be meaningful, if applicable. See also: Map Access Benchmark. 1725 | 1726 | ### Use methods that allow you to pass byte slices (?) 1727 | When using packages, look to use methods that allow you to pass a byte slice: these methods usually give you more control over allocation. 1728 | 1729 | time.Format vs. time.AppendFormat is a good example. time.Format returns a string. Under the hood, this allocates a new byte slice and calls time.AppendFormat on it. time.AppendFormat takes a byte buffer, writes the formatted representation of the time, and returns the extended byte slice. This is common in other packages in the standard library: see strconv.AppendFloat, or bytes.NewBuffer. 1730 | 1731 | Why does this give you increased performance? Well, you can now pass byte slices that you've obtained from your sync.Pool, instead of allocating a new buffer every time. Or you can increase the initial buffer size to a value that you know is more suited to your program, to reduce slice re-copying. 1732 | 1733 | ## Avoid using structures containing pointers as map keys for large maps 1734 | 1735 | During a garbage collection, the runtime scans objects containing pointers, and chases them. If you have a very large map[string]int, the GC has to check every string within the map, every GC, as strings contain pointers. [ref](https://stephen.sh/posts/quick-go-performance-improvements) 1736 | 1737 | # References 1738 | * [Daniel Marti's talk - Optimizing Go Code without a Blindfold](https://www.dotconferences.com/2019/03/daniel-marti-optimizing-go-code-without-a-blindfold) 1739 | * [dave cheney high performance workshop](https://dave.cheney.net/high-performance-go-workshop/dotgo-paris.html) 1740 | * [github - dave cheney high performance workshop](https://github.com/davecheney/high-performance-go-workshop) 1741 | * [don't lock around io](https://commandercoriander.net/blog/2018/04/10/dont-lock-around-io/) 1742 | * [advent 2017 - go execution tracer](https://blog.gopheracademy.com/advent-2017/go-execution-tracer/) 1743 | * [execution tracer design doc](https://docs.google.com/document/u/1/d/1FP5apqzBgr7ahCCgFO-yoVhk4YZrNIDNf9RybngBc14/pub) 1744 | * https://www.alexedwards.net/blog/an-overview-of-go-tooling 1745 | * [configuring sqldb for better performance](https://www.alexedwards.net/blog/configuring-sqldb) 1746 | * [rate limit http requests](https://www.alexedwards.net/blog/how-to-rate-limit-http-requests) 1747 | * https://www.alexedwards.net/blog/understanding-mutexes 1748 | * https://stackimpact.com/docs/go-performance-tuning/ 1749 | * https://stackimpact.com/blog/practical-golang-benchmarks/ 1750 | * https://www.ardanlabs.com/blog/2017/06/design-philosophy-on-data-and-semantics.html 1751 | * https://github.com/ardanlabs/gotraining 1752 | * http://www.doxsey.net/blog/go-and-assembly 1753 | * https://medium.com/observability/debugging-latency-in-go-1-11-9f97a7910d68 1754 | * https://rakyll.org/profiler-labels/ 1755 | * https://stackoverflow.com/questions/45027236/what-differentiates-exception-frames-from-other-data-on-the-return-stack 1756 | * https://www.infoq.com/presentations/self-heal-scalable-system 1757 | * https://dave.cheney.net/paste/clear-is-better-than-clever.pdf 1758 | * https://golang.org/pkg/sync/#Pool, https://dev.to/hsatac/syncpool-34pd 1759 | * http://dominik.honnef.co/go-tip/2014-01-10/#syncpool 1760 | * https://www.quora.com/In-C-what-does-buffering-I-O-or-buffered-I-O-mean 1761 | * https://stackoverflow.com/questions/1450551/buffered-vs-unbuffered-io 1762 | * http://www.agardner.me/golang/garbage/collection/gc/escape/analysis/2015/10/18/go-escape-analysis.html 1763 | * [Performance Optimization Sins - Aliaksandar Valialkin](https://docs.google.com/presentation/d/e/2PACX-1vTxoBN41dYFB8aV8c0SDET3B2htsAavXPAwR-CMyfT2LfARR2KjOt8EPIU1zn8ceSuxrL8BmkOqqL_c/pub?start=false&loop=false&delayms=3000&slide=id.g524654fd95_0_117) 1764 | * https://blog.gopheracademy.com/advent-2018/postmortem-debugging-delve/ 1765 | * https://github.com/golang/go/wiki/DesignDocuments 1766 | * [Go execution modes](https://docs.google.com/document/d/1nr-TQHw_er6GOQRsF6T43GGhFDelrAP0NqSS_00RgZQ/edit) 1767 | * https://rakyll.org/profiler-labels/ 1768 | * https://rakyll.org/pprof-ui/ 1769 | * https://medium.com/@blanchon.vincent/go-should-i-use-a-pointer-instead-of-a-copy-of-my-struct-44b43b104963 1770 | * [Performance tuning Go in GCP](https://www.youtube.com/watch?v=b0o-xeEoug0) 1771 | * https://medium.com/observability/want-to-debug-latency-7aa48ecbe8f7 1772 | * https://medium.com/dm03514-tech-blog/sre-debugging-simple-memory-leaks-in-go-e0a9e6d63d4d 1773 | * https://www.ardanlabs.com/blog/2013/07/understanding-type-in-go.html 1774 | * https://www.geeksforgeeks.org/structure-member-alignment-padding-and-data-packing/ 1775 | * https://developers.redhat.com/blog/2016/06/01/how-to-avoid-wasting-megabytes-of-memory-a-few-bytes-at-a-time/ 1776 | * https://go101.org/article/memory-layout.html 1777 | * https://dave.cheney.net/2015/10/09/padding-is-hard 1778 | * http://www.catb.org/esr/structure-packing/ 1779 | * [Escape Analysis in Go](https://scvalex.net/posts/29/) 1780 | * https://www.ardanlabs.com/blog/2018/01/escape-analysis-flaws.html 1781 | * https://www.ardanlabs.com/blog/2017/05/language-mechanics-on-stacks-and-pointers.html 1782 | * https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/ 1783 | * https://godoc.org/golang.org/x/perf/cmd/benchstat 1784 | * https://www.dotconferences.com/2019/03/daniel-marti-optimizing-go-code-without-a-blindfold 1785 | * https://www.youtube.com/watch?v=jiXnzkAzy30 1786 | * [go cpu mem profiling benchmarks gist](https://gist.github.com/arsham/bbc93990d8e5c9b54128a3d88901ab90) 1787 | * https://hashrocket.com/blog/posts/go-performance-observations 1788 | * https://www.ardanlabs.com/blog/2017/05/language-mechanics-on-escape-analysis.html 1789 | * https://dave.cheney.net/2014/06/07/five-things-that-make-go-fast 1790 | * https://stackoverflow.com/questions/2113751/sizeof-struct-in-go 1791 | * https://stackoverflow.com/questions/31496804/how-to-get-the-size-of-struct-and-its-contents-in-bytes-in-golang?rq=1 1792 | * https://github.com/campoy/go-tooling-workshop/tree/master/3-dynamic-analysis 1793 | * https://blog.usejournal.com/why-you-should-like-sync-pool-2c7960c023ba 1794 | * [work stealing scheduler](https://rakyll.org/scheduler/) 1795 | * https://morsmachine.dk/go-scheduler 1796 | * https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part1.html 1797 | * https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part2.html 1798 | * https://www.ardanlabs.com/blog/2018/12/scheduling-in-go-part3.html 1799 | * https://www.welcometothejungle.co/fr/articles/languages-software-go-elixir 1800 | * https://eng.uber.com/optimizing-m3/ 1801 | * https://medium.com/@fzambia/bisecting-go-performance-degradation-4d4a7ee83a63 1802 | * https://golang.org/doc/diagnostics.html 1803 | * http://jesseszwedko.com/gsp-go-debugging/#slide1 1804 | * https://fntlnz.wtf/post/gopostmortem/ 1805 | * https://dave.cheney.net/2013/10/15/how-does-the-go-build-command-work 1806 | * https://medium.freecodecamp.org/how-i-investigated-memory-leaks-in-go-using-pprof-on-a-large-codebase-4bec4325e192 1807 | * https://medium.com/@cep21/using-go-1-10-new-trace-features-to-debug-an-integration-test-1dc39e4e812d 1808 | * https://medium.com/golangspec/goroutine-leak-400063aef468 1809 | * https://medium.com/@val_deleplace/go-code-refactoring-the-23x-performance-hunt-156746b522f7 1810 | * https://medium.com/@teivah/good-code-vs-bad-code-in-golang-84cb3c5da49d 1811 | * https://matoski.com/article/golang-profiling-flamegraphs/ 1812 | * https://dzone.com/articles/so-you-wanna-go-fast 1813 | * https://www.slideshare.net/BadooDev/profiling-and-optimizing-go-programs 1814 | * https://about.sourcegraph.com/go/an-introduction-to-go-tool-trace-rhys-hiltner 1815 | * https://speakerdeck.com/rhysh/an-introduction-to-go-tool-trace 1816 | * https://stackimpact.com/blog/go-profiler-internals/ 1817 | * https://syslog.ravelin.com/go-and-memory-layout-6ef30c730d51 1818 | * https://github.com/golang/go/wiki/Performance 1819 | * https://blog.golang.org/ismmkeynote 1820 | * https://making.pusher.com/golangs-real-time-gc-in-theory-and-practice/ 1821 | * https://pusher.com/sessions/meetup/the-realtime-guild/golangs-realtime-garbage-collector 1822 | * https://blog.cloudflare.com/go-dont-collect-my-garbage/ 1823 | * https://syslog.ravelin.com/further-dangers-of-large-heaps-in-go-7a267b57d487 1824 | * https://www.akshaydeo.com/blog/2017/12/23/How-did-I-improve-latency-by-700-percent-using-syncPool/ 1825 | * [Go 1.5 GOMAXPROCS default document](https://docs.google.com/document/d/1At2Ls5_fhJQ59kDK2DFVhFu3g5mATSXqqV5QrxinasI/edit) 1826 | * https://dave.cheney.net/2015/11/29/a-whirlwind-tour-of-gos-runtime-environment-variables 1827 | * [https://engineers.sg/video/understanding-allocations-the-stack-and-the-heap-gophercon-sg-2019--3371](https://www.youtube.com/watch?v=ZMZpH4yT7M0) 1828 | * [Getting to Go's Garbage Collector](https://blog.golang.org/ismmkeynote) 1829 | * [Go GC progress in tweets](https://talks.golang.org/2017/state-of-go.slide#34) 1830 | * https://go101.org/article/concurrent-atomic-operation.html 1831 | * https://www.integralist.co.uk/posts/profiling-go/ 1832 | * https://medium.com/golangspec/sync-rwmutex-ca6c6c3208a0 1833 | * https://rakyll.org/mutexprofile/ 1834 | * https://jvns.ca/blog/2017/09/24/profiling-go-with-pprof/ 1835 | * https://blog.gopheracademy.com/advent-2018/avoid-gc-overhead-large-heaps/ 1836 | * [Journey of go's Garbage collector](https://blog.golang.org/ismmkeynote) 1837 | * [Memory Layout and Type Alignment Guarantees](https://go101.org/article/memory-layout.html) 1838 | * https://dougrichardson.org/2016/01/23/go-memory-allocations.html 1839 | * https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/ 1840 | * https://stackimpact.com/docs/go-performance-tuning/ 1841 | * https://hackernoon.com/dancing-with-go-s-mutexes-92407ae927bf 1842 | * [GoLand - Profiling Go Applications and Tests](https://blog.jetbrains.com/go/2019/04/03/profiling-go-applications-and-tests/) 1843 | * https://povilasv.me/go-memory-management/ 1844 | * [gperftools - docs for various profilers](https://github.com/gperftools/gperftools) 1845 | * https://software.intel.com/en-us/blogs/2014/05/10/debugging-performance-issues-in-go-programs 1846 | * [when too much concurrency slows you down](https://medium.com/@_orcaman/when-too-much-concurrency-slows-you-down-golang-9c144ca305a) 1847 | * [defer more](https://go101.org/article/defer-more.html) 1848 | * https://go101.org/article/bounds-check-elimination.html 1849 | * [Agniva's slides from Golang Bangalore meetup](https://drive.google.com/file/d/1nm7QoZe047lfnLXmdKC0s8Ub7A8LzF56/view) 1850 | * [JSON unmarshal vs decode benchmark](https://github.com/kpango/go-json-bench) 1851 | * https://www.darkcoding.net/software/go-the-price-of-interface/ 1852 | * [Russ Cox - Go Data Structures: Interfaces](https://research.swtch.com/interfaces) 1853 | * https://github.com/golang/go/wiki/CompilerOptimizations 1854 | * https://dave.cheney.net/2014/06/07/five-things-that-make-go-fast 1855 | * [Carmen Andoh - The Why of Go](https://www.youtube.com/watch?v=bmZNaUcwBt4) 1856 | --------------------------------------------------------------------------------

{{.PageTitle}}

{{.PageTitle}}

{{.PageTitle}}