120 |
121 |
--------------------------------------------------------------------------------
/code/benchmarks/reportAllocs/a_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "testing"
4 |
5 | const size = 2000000
6 |
7 | func f() ([size]int, [size]int) {
8 | a := [size]int{}
9 | b := [size]int{}
10 | a[19] = 100
11 | return a, b
12 | }
13 |
14 | func f2() [size]int {
15 | a := [size]int{}
16 | a[19] = 100
17 | return a
18 | }
19 |
20 | func BenchmarkHelloWorld(b *testing.B) {
21 | // t.Fatal("not implemented")
22 | b.ReportAllocs()
23 | for i := 0; i < b.N; i++ {
24 | a := f2()
25 | _ = a
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/code/benchmarks/reportAllocs/s.sh:
--------------------------------------------------------------------------------
1 | go test -run=. -bench=. -cpuprofile=cpu.out -benchmem -memprofile=mem.out -trace trace.out
2 | go tool pprof -pdf $FILENAME.test cpu.out > cpu.pdf && open cpu.pdf
3 | go tool pprof -pdf --alloc_space $FILENAME.test mem.out > alloc_space.pdf && open alloc_space.pdf
4 | go tool pprof -pdf --alloc_objects $FILENAME.test mem.out > alloc_objects.pdf && open alloc_objects.pdf
5 | go tool pprof -pdf --inuse_space $FILENAME.test mem.out > inuse_space.pdf && open inuse_space.pdf
6 | go tool pprof -pdf --inuse_objects $FILENAME.test mem.out > inuse_objects.pdf && open inuse_objects.pdf
7 | go tool trace trace.out
8 |
9 | go-torch $FILENAME.test cpu.out -f ${FILENAME}_cpu.svg && open ${FILENAME}_cpu.svg
10 | go-torch --alloc_objects $FILENAME.test mem.out -f ${FILENAME}_alloc_obj.svg && open ${FILENAME}_alloc_obj.svg
11 | go-torch --alloc_space $FILENAME.test mem.out -f ${FILENAME}_alloc_space.svg && open ${FILENAME}_alloc_space.svg
12 | go-torch --inuse_objects $FILENAME.test mem.out -f ${FILENAME}_inuse_obj.svg && open ${FILENAME}_inuse_obj.svg
13 | go-torch --inuse_space $FILENAME.test mem.out -f ${FILENAME}_inuse_space.svg && open ${FILENAME}_inuse_space.svg
14 |
15 | # For live data
16 |
17 | go-torch -u http://localhost:8080 --seconds 32 -f ${FILENAME}_live.svg && open ${FILENAME}_live.svg
18 |
19 | #
20 |
21 | go tool pprof -cum cpu.out
22 | go tool pprof -cum --alloc_space mem.out
23 | go tool pprof -cum --alloc_objects mem.out
24 | go tool pprof -cum --inuse_space mem.out
25 | go tool pprof -cum --inuse_objects mem.out
26 |
27 | #
28 |
29 | go tool pprof $FILENAME.test cpu.out
30 | # (pprof) list
31 |
32 | #
33 |
34 | rm alloc_space.pdf alloc_objects.pdf inuse_space.pdf inuse_objects.pdf cpu.out cpu.pdf mem.out $FILENAME.test ${FILENAME}_cpu.svg ${FILENAME}_alloc_obj.svg ${FILENAME}_alloc_space.svg ${FILENAME}_inuse_obj.svg ${FILENAME}_inuse_space.svg ${FILENAME}_live.svg trace.out
35 |
36 |
--------------------------------------------------------------------------------
/code/bounds-check/a.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func a(a []int) {
4 | n := 6
5 | _ = a[n]
6 | }
7 |
--------------------------------------------------------------------------------
/code/bounds-check/b.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func b(b [5]int) {
4 | n := len(b) - 1
5 | _ = b[n]
6 | }
7 |
--------------------------------------------------------------------------------
/code/bounds-check/c.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func c(b []int) {
4 | n := len(b) - 1
5 | _ = b[n]
6 | }
7 |
--------------------------------------------------------------------------------
/code/bounds-check/d.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func d(b []byte) {
4 | for i := 0; i < len(b); i++ {
5 | b[i] = 9
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/code/bounds-check/e.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func e(b []byte, n int) {
4 | for i := 0; i < n; i++ {
5 | b[i] = 9
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/code/bounds-check/f.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func f(b []byte, n int) {
4 | _ = b[n-1]
5 | for i := 0; i < n; i++ {
6 | b[i] = 9
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/code/bounds-check/g.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | func g1(b []byte, v uint32) {
6 | b[0] = byte(v + 48)
7 | b[1] = byte(v + 49)
8 | b[2] = byte(v + 50)
9 | b[3] = byte(v + 51)
10 | fmt.Println(b)
11 | }
12 |
13 | func g2(b []byte, v uint32) {
14 | b[3] = byte(v + 51)
15 | b[0] = byte(v + 48)
16 | b[1] = byte(v + 49)
17 | b[2] = byte(v + 50)
18 | fmt.Println(b)
19 | }
20 |
21 | func main() {
22 | b := make([]byte, 4)
23 | g1(b, 10)
24 | g2(b, 10)
25 | }
26 |
--------------------------------------------------------------------------------
/code/bounds-check/h.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | func h1(b []byte, n int) {
6 | b[n+0] = byte(1) // Found IsInBounds
7 | b[n+1] = byte(2) // Found IsInBounds
8 | b[n+2] = byte(3) // Found IsInBounds
9 | b[n+3] = byte(4) // Found IsInBounds
10 | b[n+4] = byte(5) // Found IsInBounds
11 | b[n+5] = byte(6) // Found IsInBounds
12 | fmt.Println("in h1(): ", b)
13 | }
14 |
15 | func h2(b []byte, n int) {
16 | b = b[n : n+6] // Found IsSliceInBounds
17 | b[0] = byte(1)
18 | b[1] = byte(2)
19 | b[2] = byte(3)
20 | b[3] = byte(4)
21 | b[4] = byte(5)
22 | b[5] = byte(6)
23 | fmt.Println("in h2(): ", b)
24 | }
25 |
26 | func main() {
27 | b := make([]byte, 20)
28 | h1(b, 10)
29 | fmt.Println("in main: ", b)
30 | h2(b, 10)
31 | fmt.Println("in main: ", b)
32 | }
33 |
--------------------------------------------------------------------------------
/code/bounds-check/i.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func i1(a, b, c []byte) {
4 | for i := range a {
5 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds
6 | }
7 | }
8 |
9 | func i2(a, b, c []byte) {
10 | _ = b[len(a)-1] // Found IsInBounds
11 | _ = c[len(a)-1] // Found IsInBounds
12 | for i := range a {
13 | a[i] = b[i] + c[i]
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/code/bounds-check/readme.md:
--------------------------------------------------------------------------------
1 | Let's compare what the compiled output of these two fairly similar programs are.
2 |
3 | ```
4 | // a.go
5 | 3 func a(a []int) {
6 | 4 n := 6
7 | 5 _ = a[n]
8 | 6 }
9 | ```
10 |
11 | ```
12 | // b.go
13 | 3 func b(b [5]int) {
14 | 4 n := len(b) - 1
15 | 5 _ = b[n]
16 | 6 }
17 | ```
18 |
19 | ```
20 | $ go tool compile -S a.go > a.co
21 | $ go tool compile -S b.go > b.co
22 | $ vimdiff a.co b.co
23 | ```
24 |
25 | ```
26 | "".a STEXT nosplit size=39 args=0x18 locals=0x8
27 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $8-24
28 | (a.go:3) SUBQ $8, SP
29 | (a.go:3) MOVQ BP, (SP)
30 | (a.go:3) LEAQ (SP), BP
31 | (a.go:3) FUNCDATA $0, gclocals·1a65...
32 | (a.go:3) FUNCDATA $1, gclocals·69c1...
33 | (a.go:3) FUNCDATA $3, gclocals·33cd...
34 | (a.go:5) PCDATA $2, $0
35 | (a.go:5) PCDATA $0, $1
36 | (a.go:5) MOVQ "".a+24(SP), AX
37 | (a.go:5) CMPQ AX, $6
38 | (a.go:5) JLS 32
39 | (a.go:6) PCDATA $2, $-2
40 | (a.go:6) PCDATA $0, $-2
41 | (a.go:6) MOVQ (SP), BP
42 | (a.go:6) ADDQ $8, SP
43 | (a.go:6) RET
44 | (a.go:5) PCDATA $2, $0
45 | (a.go:5) PCDATA $0, $1
46 | (a.go:5) CALL runtime.panicindex(SB)
47 | (a.go:5) UNDEF
48 | 0x0000 48 83 ec 08 48 89 2c 24 48 8d 2c 24 48 8b 44 24 H...H.,$H.,$H.D$
49 | 0x0010 18 48 83 f8 06 76 09 48 8b 2c 24 48 83 c4 08 c3 .H...v.H.,$H....
50 | 0x0020 e8 00 00 00 00 0f 0b .......
51 | rel 33+4 t=8 runtime.panicindex+0
52 | ```
53 |
54 | ```
55 | // b.co
56 | "".b STEXT nosplit size=1 args=0x28 locals=0x0
57 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $0-40
58 | (b.go:3) FUNCDATA $0, gclocals·33cd...
59 | (b.go:3) FUNCDATA $1, gclocals·33cd...
60 | (b.go:3) FUNCDATA $3, gclocals·33cd...
61 | (b.go:6) RET
62 | ```
63 |
64 | There seems to be way more happening in a.go than in b.go - about 20+ lines more, which seems surprising.
65 |
66 | A little too much though. That's probably because of optimizations by the compiler. Let's remove those with the -N option.
67 |
68 | ```
69 | $ go tool compile -S -N a.go > a.co
70 | $ go tool compile -S -N b.go > b.co
71 | $ vimdiff a.co b.co
72 | ```
73 |
74 | ```
75 | "".a STEXT nosplit size=49 args=0x18 locals=0x10
76 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $16-24
77 | (a.go:3) SUBQ $16, SP
78 | (a.go:3) MOVQ BP, 8(SP)
79 | (a.go:3) LEAQ 8(SP), BP
80 | (a.go:3) FUNCDATA $0, gclocals·1a65...
81 | (a.go:3) FUNCDATA $1, gclocals·69c1...
82 | (a.go:3) FUNCDATA $3, gclocals·33cd...
83 | (a.go:4) PCDATA $2, $0
84 | (a.go:4) PCDATA $0, $0
85 | (a.go:4) MOVQ $6, "".n(SP)
86 | (a.go:5) PCDATA $0, $1
87 | (a.go:5) CMPQ "".a+32(SP), $6
88 | (a.go:5) JHI 32
89 | (a.go:5) JMP 42
90 | (a.go:6) PCDATA $2, $-2
91 | (a.go:6) PCDATA $0, $-2
92 | (a.go:6) MOVQ 8(SP), BP
93 | (a.go:6) ADDQ $16, SP
94 | (a.go:6) RET
95 | (a.go:5) PCDATA $2, $0
96 | (a.go:5) PCDATA $0, $1
97 | (a.go:5) CALL runtime.panicindex(SB)
98 | (a.go:5) UNDEF
99 | 0x0000 48 83 ...
100 | 0x0010 04 24 ...
101 | 0x0020 48 8b ...
102 | 0x0030 0b
103 | rel 43+4 t=8 runtime.panicindex+0
104 | ```
105 |
106 | ```
107 | "".b STEXT nosplit size=34 args=0x28 locals=0x10
108 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $16-40
109 | (b.go:3) SUBQ $16, SP
110 | (b.go:3) MOVQ BP, 8(SP)
111 | (b.go:3) LEAQ 8(SP), BP
112 | (b.go:3) FUNCDATA $0, gclocals·33cd...
113 | (b.go:3) FUNCDATA $1, gclocals·33cd...
114 | (b.go:3) FUNCDATA $3, gclocals·33cd...
115 | (b.go:4) PCDATA $2, $0
116 | (b.go:4) PCDATA $0, $0
117 | (b.go:4) MOVQ $4, "".n(SP)
118 | (b.go:5) JMP 24
119 | (b.go:6) PCDATA $2, $-2
120 | (b.go:6) PCDATA $0, $-2
121 | (b.go:6) MOVQ 8(SP), BP
122 | (b.go:6) ADDQ $16, SP
123 | (b.go:6) RET
124 | 0x0000 48 83 ...
125 | 0x0010 04 24 ...
126 | 0x0020 10 c3
127 | ```
128 |
129 | Even without the optimizations, there are more instructions that the CPU has to run in the case of a.go {n:=6} more than b.go {n:=len(b)-1}.
130 |
131 | There are some interesting differences between the two. The {n:=6} version has a compare statement (CMPQ) and panic statements (runtime.panicindex) while the other version does not have them.
132 |
133 | Let's also compile both with another option and see if we get any clues there.
134 |
135 | ```
136 | $ go tool compile -d=ssa/check_bce/debug=1 a.go
137 | a.go:5:7: Found IsInBounds
138 |
139 | $ go tool compile -d=ssa/check_bce/debug=1 b.go
140 | ```
141 |
142 | So, the compile tool shows no output with this option for b.go while a.go says "Found IsInBounds" at line number 5 (\_ = a[n]).
143 |
144 | ### Bounds Check Elimination (bce)
145 | From Wikipedia: bounds-checking elimination is a compiler optimization useful in programming languages or runtimes that enforce bounds checking, the practice of checking every index into an array to verify that the index is within the defined valid range of indexes. Its goal is to detect which of these indexing operations do not need to be validated at runtime, and eliminating those checks.
146 |
147 | When arrays and slices are being accessed, grow provides safety by checking that the index is valid. This implies additional instructions. A language like C does not have this check; instead it is upto the programmer to add it if required or not do it at their own risk.
148 |
149 | Go provides the check but is able to eliminate in certain cases when it is able to prove that the index being accessed is within the allowed range.
150 |
151 | In the function ```func a(a []int) { n := 6; _ = a[n] }```, Go is not able to prove at compile time that the index 6 will be in the slice that is passed. However, in the function ```func b(b [5]int) { n := len(b) - 1; _ = b[n] }```, it is guaranted that the index will be within the length of the array of size 5. Thus Go is able to optimize by eliminating the bounds check.
152 |
153 | Exercise: What if we passed a slice into b.go instead of an array. Is there a bounds check still? Why or why not?
154 | See c.go
155 |
156 | ```
157 | 3 func c(b []int) {
158 | 4 n := len(b) - 1
159 | 5 _ = b[n]
160 | 6 }
161 | ```
162 |
163 | ```
164 | $ go tool compile -d=ssa/check_bce/debug=1 c.go
165 | c.go:5:7: Found IsInBounds
166 | ```
167 |
168 | What is the bce output of the case below? will the compiler be able to eliminate the bounds check?
169 |
170 | ```
171 | // d.go
172 | func d(b []byte) {
173 | for i := 0; i < len(b); i++ {
174 | b[i] = 9
175 | }
176 | }
177 | ```
178 |
179 | ```
180 | $ go tool compile -d=ssa/check_bce/debug=1 d.go
181 | ```
182 |
183 | When it is definite that the index will not receive a value outside of its size (on either end), then bce can happen.
184 |
185 | ### Providing bce Hints
186 |
187 | Example 1
188 |
189 | ```
190 | // e.go
191 | 3 func e(b []byte, n int) {
192 | 4 for i := 0; i < n; i++ {
193 | 5 b[i] = 9
194 | 6 }
195 | 7 }
196 | ```
197 |
198 | ```
199 | $ go tool compile -d=ssa/check_bce/debug=1 d.go
200 | d.go:5:8: Found IsInBounds
201 | ```
202 |
203 | Give that this is running inside a loop, the bce will run as many times. Is there a way to reduce this? Probably something outside the loop and prior?
204 |
205 | ```
206 | // f.go
207 | 3 func f(b []byte, n int) {
208 | 4 _ = b[n-1]
209 | 5 for i := 0; i < n; i++ {
210 | 6 b[i] = 9
211 | 7 }
212 | 8 }
213 | ```
214 |
215 | ```
216 | $ go tool compile -d=ssa/check_bce/debug=1 e.go
217 | e.go:4:7: Found IsInBounds
218 | ```
219 |
220 | Having done the check once outside, we are able to eliminate the remaining checks in the loop.
221 |
222 |
223 | How about this one? There are 4 bounds checks. Can we reduce them?
224 |
225 | Example 2
226 |
227 | ```
228 | // g.go
229 | func g1(b []byte, v uint32) {
230 | b[0] = byte(v + 48) // Found IsInBounds
231 | b[1] = byte(v + 49) // Found IsInBounds
232 | b[2] = byte(v + 50) // Found IsInBounds
233 | b[3] = byte(v + 51) // Found IsInBounds
234 | }
235 | ```
236 |
237 | ```
238 | // g.go
239 | func g2(b []byte, v uint32) {
240 | b[3] = byte(v + 51) // Found IsInBounds
241 | b[0] = byte(v + 48)
242 | b[1] = byte(v + 49)
243 | b[2] = byte(v + 50)
244 | }
245 | ```
246 |
247 | Example 3
248 |
249 | ```
250 | // h.go
251 | func h1(b []byte, n int) {
252 | b[n+0] = byte(1) // Found IsInBounds
253 | b[n+1] = byte(2) // Found IsInBounds
254 | b[n+2] = byte(3) // Found IsInBounds
255 | b[n+3] = byte(4) // Found IsInBounds
256 | b[n+4] = byte(5) // Found IsInBounds
257 | b[n+5] = byte(6) // Found IsInBounds
258 | }
259 | ```
260 |
261 | ```
262 | func h2(b []byte, n int) {
263 | b = b[n : n+6] // Found IsSliceInBounds
264 | b[0] = byte(1)
265 | b[1] = byte(2)
266 | b[2] = byte(3)
267 | b[3] = byte(4)
268 | b[4] = byte(5)
269 | b[5] = byte(6)
270 | }
271 | ```
272 |
273 | Example 4
274 |
275 | ```
276 | func i1(a, b, c []byte) {
277 | for i := range a {
278 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds
279 | }
280 | }
281 | ```
282 |
283 | ```
284 | func i2(a, b, c []byte) {
285 | _ = b[len(a)-1] // Found IsInBounds
286 | _ = c[len(a)-1] // Found IsInBounds
287 | for i := range a {
288 | a[i] = b[i] + c[i]
289 | }
290 | }
291 | ```
292 |
293 |
--------------------------------------------------------------------------------
/code/coredump/coredump.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "net/http"
7 | )
8 |
9 | func main() {
10 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
11 | fmt.Fprint(w, "hello world\n")
12 | })
13 | log.Fatal(http.ListenAndServe("localhost:7777", nil))
14 | }
15 |
--------------------------------------------------------------------------------
/code/coredump/coredump.md:
--------------------------------------------------------------------------------
1 | Code that we can test a cordump with.
2 |
3 | Works on: Linux only
4 |
5 | Ref:
6 | https://rakyll.org/coredumps/
7 |
--------------------------------------------------------------------------------
/code/cover/cover.go:
--------------------------------------------------------------------------------
1 | package size
2 |
3 | func Size(a int) string {
4 | switch {
5 | case a < 0:
6 | return "negative"
7 | case a == 0:
8 | return "zero"
9 | case a < 10:
10 | return "small"
11 | case a < 100:
12 | return "big"
13 | case a < 1000:
14 | return "huge"
15 | }
16 | return "enormous"
17 | }
18 |
--------------------------------------------------------------------------------
/code/cover/cover_test.go:
--------------------------------------------------------------------------------
1 | package size
2 |
3 | import "testing"
4 |
5 | type Test struct {
6 | in int
7 | out string
8 | }
9 |
10 | var tests = []Test{
11 | {-1, "negative"},
12 | {5, "small"},
13 | }
14 |
15 | func TestSize(t *testing.T) {
16 | for i, test := range tests {
17 | size := Size(test.in)
18 | if size != test.out {
19 | t.Errorf("#%d: Size(%d)=%s; want %s", i, test.in, size, test.out)
20 | }
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/code/cover/readme.md:
--------------------------------------------------------------------------------
1 | ```
2 | go test -covermode=count -coverprofile=count.out fmt
3 | go tool cover -html=count.out
4 | ```
5 |
6 | For current folder:
7 | ```
8 | go test -covermode=count -coverprofile=count.out
9 | go tool cover -html=count.out
10 | ```
11 |
--------------------------------------------------------------------------------
/code/defer/defer_test.go:
--------------------------------------------------------------------------------
1 | package mydefer
2 |
3 | import (
4 | "sync"
5 | "testing"
6 | )
7 |
8 | type T struct {
9 | mu sync.Mutex
10 | n int64
11 | }
12 |
13 | var t T
14 |
15 | func (t *T) CounterA() int64 {
16 | t.mu.Lock()
17 | defer t.mu.Unlock()
18 | return t.n
19 | }
20 |
21 | func (t *T) CounterB() (count int64) {
22 | t.mu.Lock()
23 | count = t.n
24 | t.mu.Unlock()
25 | return
26 | }
27 |
28 | func (t *T) IncreaseA() {
29 | t.mu.Lock()
30 | defer t.mu.Unlock()
31 | t.n++
32 | }
33 |
34 | func (t *T) IncreaseB() {
35 | t.mu.Lock()
36 | t.n++ // this line will not panic for sure
37 | t.mu.Unlock()
38 | }
39 |
40 | func Benchmark_CounterA(b *testing.B) {
41 | for i := 0; i < b.N; i++ {
42 | t.CounterA()
43 | }
44 | }
45 |
46 | func Benchmark_CounterB(b *testing.B) {
47 | for i := 0; i < b.N; i++ {
48 | t.CounterB()
49 | }
50 | }
51 |
52 | func Benchmark_IncreaseA(b *testing.B) {
53 | for i := 0; i < b.N; i++ {
54 | t.IncreaseA()
55 | }
56 | }
57 |
58 | func Benchmark_IncreaseB(b *testing.B) {
59 | for i := 0; i < b.N; i++ {
60 | t.IncreaseB()
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/code/easyjson/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | )
6 |
7 | //easyjson:json
8 | type JSONData struct {
9 | Data []string
10 | }
11 |
12 | func unmarshaljsonFn() {
13 | var j JSONData
14 | json.Unmarshal([]byte(`{"Data" : ["One", "Two", "Three"]} `), &j)
15 | }
16 |
17 | func easyjsonFn() {
18 | d := &JSONData{}
19 | d.UnmarshalJSON([]byte(`{"Data" : ["One", "Two", "Three"]} `))
20 | }
21 |
--------------------------------------------------------------------------------
/code/easyjson/main_easyjson.go:
--------------------------------------------------------------------------------
1 | // Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT.
2 |
3 | package main
4 |
5 | import (
6 | json "encoding/json"
7 |
8 | easyjson "github.com/mailru/easyjson"
9 | jlexer "github.com/mailru/easyjson/jlexer"
10 | jwriter "github.com/mailru/easyjson/jwriter"
11 | )
12 |
13 | // suppress unused package warning
14 | var (
15 | _ *json.RawMessage
16 | _ *jlexer.Lexer
17 | _ *jwriter.Writer
18 | _ easyjson.Marshaler
19 | )
20 |
21 | func easyjson89aae3efDecodeEasyjson(in *jlexer.Lexer, out *JSONData) {
22 | isTopLevel := in.IsStart()
23 | if in.IsNull() {
24 | if isTopLevel {
25 | in.Consumed()
26 | }
27 | in.Skip()
28 | return
29 | }
30 | in.Delim('{')
31 | for !in.IsDelim('}') {
32 | key := in.UnsafeString()
33 | in.WantColon()
34 | if in.IsNull() {
35 | in.Skip()
36 | in.WantComma()
37 | continue
38 | }
39 | switch key {
40 | case "Data":
41 | if in.IsNull() {
42 | in.Skip()
43 | out.Data = nil
44 | } else {
45 | in.Delim('[')
46 | if out.Data == nil {
47 | if !in.IsDelim(']') {
48 | out.Data = make([]string, 0, 4)
49 | } else {
50 | out.Data = []string{}
51 | }
52 | } else {
53 | out.Data = (out.Data)[:0]
54 | }
55 | for !in.IsDelim(']') {
56 | var v1 string
57 | v1 = string(in.String())
58 | out.Data = append(out.Data, v1)
59 | in.WantComma()
60 | }
61 | in.Delim(']')
62 | }
63 | default:
64 | in.SkipRecursive()
65 | }
66 | in.WantComma()
67 | }
68 | in.Delim('}')
69 | if isTopLevel {
70 | in.Consumed()
71 | }
72 | }
73 | func easyjson89aae3efEncodeEasyjson(out *jwriter.Writer, in JSONData) {
74 | out.RawByte('{')
75 | first := true
76 | _ = first
77 | {
78 | const prefix string = ",\"Data\":"
79 | if first {
80 | first = false
81 | out.RawString(prefix[1:])
82 | } else {
83 | out.RawString(prefix)
84 | }
85 | if in.Data == nil && (out.Flags&jwriter.NilSliceAsEmpty) == 0 {
86 | out.RawString("null")
87 | } else {
88 | out.RawByte('[')
89 | for v2, v3 := range in.Data {
90 | if v2 > 0 {
91 | out.RawByte(',')
92 | }
93 | out.String(string(v3))
94 | }
95 | out.RawByte(']')
96 | }
97 | }
98 | out.RawByte('}')
99 | }
100 |
101 | // MarshalJSON supports json.Marshaler interface
102 | func (v JSONData) MarshalJSON() ([]byte, error) {
103 | w := jwriter.Writer{}
104 | easyjson89aae3efEncodeEasyjson(&w, v)
105 | return w.Buffer.BuildBytes(), w.Error
106 | }
107 |
108 | // MarshalEasyJSON supports easyjson.Marshaler interface
109 | func (v JSONData) MarshalEasyJSON(w *jwriter.Writer) {
110 | easyjson89aae3efEncodeEasyjson(w, v)
111 | }
112 |
113 | // UnmarshalJSON supports json.Unmarshaler interface
114 | func (v *JSONData) UnmarshalJSON(data []byte) error {
115 | r := jlexer.Lexer{Data: data}
116 | easyjson89aae3efDecodeEasyjson(&r, v)
117 | return r.Error()
118 | }
119 |
120 | // UnmarshalEasyJSON supports easyjson.Unmarshaler interface
121 | func (v *JSONData) UnmarshalEasyJSON(l *jlexer.Lexer) {
122 | easyjson89aae3efDecodeEasyjson(l, v)
123 | }
124 |
--------------------------------------------------------------------------------
/code/easyjson/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "testing"
4 |
5 | func Benchmark_unmarshaljson(b *testing.B) {
6 | for i := 0; i < b.N; i++ {
7 | unmarshaljsonFn()
8 | }
9 | }
10 |
11 | func Benchmark_easyjson(b *testing.B) {
12 | for i := 0; i < b.N; i++ {
13 | easyjsonFn()
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/code/easyjson/readme.md:
--------------------------------------------------------------------------------
1 | This requires the path to be set properly first. The local code needs to be in GOPATH. The regular golang/gopath also needs to be available for the tool.
2 |
3 | Once that is set, run:
4 | ```easyjson -all main.go```
5 | to get the generated file which ends in _easyjson.go
6 |
--------------------------------------------------------------------------------
/code/escape-analysis/1.go:
--------------------------------------------------------------------------------
1 | // go build -gcflags='-m' 1.go
2 | // go build -gcflags='-m -l' 1.go to avoid inlining
3 | // go build -gcflags='-m -l -m' 1.go for verbose comments.
4 |
5 | package main
6 |
7 | /*
8 | func f() {
9 | var i = 5
10 | i++
11 | _ = i
12 | }
13 |
14 | func f_returns() int {
15 | var i = 5
16 | i++
17 | return i
18 | }
19 | */
20 |
21 | func f_returns_ptr() *int {
22 | var i = 5
23 | i++
24 | return &i
25 | }
26 |
27 | func main() {
28 | //f()
29 | //f_returns()
30 | f_returns_ptr()
31 | }
32 |
--------------------------------------------------------------------------------
/code/escape-analysis/main.c:
--------------------------------------------------------------------------------
1 | // online c editor - https://onlinegdb.com/HySykSJoE
2 |
3 | #include
4 |
5 | int* f() {
6 | int a;
7 | a = 10;
8 | return &a;
9 | }
10 |
11 | void main()
12 | {
13 | int* p = f();
14 | printf("p is: %x\n", p); // p is 0
15 | printf("*p is: %d\n", *p); // segmentation fault
16 |
17 | //
18 | }
19 |
20 |
--------------------------------------------------------------------------------
/code/escape-analysis/readme.md:
--------------------------------------------------------------------------------
1 | * Returning a local variable in c would cause errors. But it is possible in Go.
2 | * "Note that, unlike in C, it’s perfectly OK to return the address of a local variable; the storage associated with the variable survives after the function returns."
3 | *
4 |
5 | Run:
6 | go run -gcflags '-m -l' 1.go
7 |
8 | References:
9 | [Escape Analysis in Go](https://scvalex.net/posts/29/)
10 |
--------------------------------------------------------------------------------
/code/file-io/1-file-io_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "io"
6 | "os"
7 | "testing"
8 | )
9 |
10 | func BenchmarkWriteFile(b *testing.B) {
11 | for n := 0; n < b.N; n++ {
12 | f, err := os.Create("/tmp/test.txt")
13 | if err != nil {
14 | panic(err)
15 | }
16 |
17 | for i := 0; i < 100000; i++ {
18 | f.WriteString("some text!\n")
19 | }
20 |
21 | f.Close()
22 | }
23 | }
24 |
25 | func BenchmarkWriteFileBuffered(b *testing.B) {
26 | for n := 0; n < b.N; n++ {
27 | f, err := os.Create("/tmp/test.txt")
28 | if err != nil {
29 | panic(err)
30 | }
31 |
32 | w := bufio.NewWriter(f)
33 |
34 | for i := 0; i < 100000; i++ {
35 | w.WriteString("some text!\n")
36 | }
37 |
38 | w.Flush()
39 | f.Close()
40 | }
41 | }
42 |
43 | func BenchmarkReadFile(b *testing.B) {
44 | for n := 0; n < b.N; n++ {
45 | f, err := os.Open("/tmp/test.txt")
46 | if err != nil {
47 | panic(err)
48 | }
49 |
50 | b := make([]byte, 10)
51 |
52 | _, err = f.Read(b)
53 | for err == nil {
54 | _, err = f.Read(b)
55 | }
56 | if err != io.EOF {
57 | panic(err)
58 | }
59 |
60 | f.Close()
61 | }
62 | }
63 |
64 | func BenchmarkReadFileBuffered(b *testing.B) {
65 | for n := 0; n < b.N; n++ {
66 | f, err := os.Open("/tmp/test.txt")
67 | if err != nil {
68 | panic(err)
69 | }
70 |
71 | r := bufio.NewReader(f)
72 |
73 | _, err = r.ReadString('\n')
74 | for err == nil {
75 | _, err = r.ReadString('\n')
76 | }
77 | if err != io.EOF {
78 | panic(err)
79 | }
80 |
81 | f.Close()
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/code/fmt/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | "testing"
7 | )
8 |
9 | func fmtFn(i int) string {
10 | return fmt.Sprintf("%d", i)
11 | }
12 |
13 | func Benchmark_fmtFn(b *testing.B) {
14 | for i := 0; i < b.N; i++ {
15 | fmtFn(1234)
16 | }
17 | }
18 |
19 | func strconvFn(i int) string {
20 | return strconv.Itoa(i)
21 | }
22 | func Benchmark_strconvFn(b *testing.B) {
23 | for i := 0; i < b.N; i++ {
24 | strconvFn(1234)
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/code/gogc/mergesort.go:
--------------------------------------------------------------------------------
1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006
2 |
3 | // GOGC=off go run mergesort.go v1 && go tool trace v1.trace
4 | // GOGC=50 go run mergesort.go v1 && go tool trace v1.trace
5 | // GOGC=100 go run mergesort.go v1 && go tool trace v1.trace
6 | // GOGC=200 go run mergesort.go v1 && go tool trace v1.trace
7 | package main
8 |
9 | import (
10 | "fmt"
11 | "math/rand"
12 | "os"
13 | "runtime/trace"
14 | "sync"
15 | "time"
16 | )
17 |
18 | const max = 1 << 11
19 |
20 | func merge(s []int, middle int) {
21 | helper := make([]int, len(s))
22 | copy(helper, s)
23 |
24 | helperLeft := 0
25 | helperRight := middle
26 | current := 0
27 | high := len(s) - 1
28 |
29 | for helperLeft <= middle-1 && helperRight <= high {
30 | if helper[helperLeft] <= helper[helperRight] {
31 | s[current] = helper[helperLeft]
32 | helperLeft++
33 | } else {
34 | s[current] = helper[helperRight]
35 | helperRight++
36 | }
37 | current++
38 | }
39 |
40 | for helperLeft <= middle-1 {
41 | s[current] = helper[helperLeft]
42 | current++
43 | helperLeft++
44 | }
45 | }
46 |
47 | func mergesortv1(s []int) {
48 | len := len(s)
49 |
50 | if len > 1 {
51 | middle := len / 2
52 |
53 | var wg sync.WaitGroup
54 | wg.Add(2)
55 |
56 | // First half
57 | go func() {
58 | defer wg.Done()
59 | mergesortv1(s[:middle])
60 | }()
61 |
62 | // Second half
63 | go func() {
64 | defer wg.Done()
65 | mergesortv1(s[middle:])
66 | }()
67 |
68 | // Wait that the two goroutines are completed
69 | wg.Wait()
70 | merge(s, middle)
71 | }
72 | }
73 |
74 | /* Sequential */
75 |
76 | func mergesort(s []int) {
77 | if len(s) > 1 {
78 | middle := len(s) / 2
79 | mergesort(s[:middle])
80 | mergesort(s[middle:])
81 | merge(s, middle)
82 | }
83 | }
84 |
85 | func mergesortv2(s []int) {
86 | len := len(s)
87 |
88 | if len > 1 {
89 | if len <= max { // Sequential
90 | mergesort(s)
91 | } else { // Parallel
92 | middle := len / 2
93 |
94 | var wg sync.WaitGroup
95 | wg.Add(2)
96 |
97 | go func() {
98 | defer wg.Done()
99 | mergesortv2(s[:middle])
100 | }()
101 |
102 | go func() {
103 | defer wg.Done()
104 | mergesortv2(s[middle:])
105 | }()
106 |
107 | wg.Wait()
108 | merge(s, middle)
109 | }
110 | }
111 | }
112 |
113 | func mergesortv3(s []int) {
114 | len := len(s)
115 |
116 | if len > 1 {
117 | if len <= max { // Sequential
118 | mergesort(s)
119 | } else { // Parallel
120 | middle := len / 2
121 |
122 | var wg sync.WaitGroup
123 | wg.Add(1)
124 |
125 | go func() {
126 | defer wg.Done()
127 | mergesortv3(s[:middle])
128 | }()
129 |
130 | mergesortv3(s[middle:])
131 |
132 | wg.Wait()
133 | merge(s, middle)
134 | }
135 | }
136 | }
137 |
138 | // Generates a slice of size, size filled with random numbers
139 | func generateSlice(size int) []int {
140 |
141 | slice := make([]int, size, size)
142 | rand.Seed(time.Now().UnixNano())
143 | for i := 0; i < size; i++ {
144 | slice[i] = rand.Intn(999) - rand.Intn(999)
145 | }
146 | return slice
147 | }
148 |
149 | func main() {
150 | version := "v1"
151 | if len(os.Args) == 2 {
152 | version = os.Args[1]
153 | }
154 |
155 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
156 | if err != nil {
157 | fmt.Println("Error:", err)
158 | return
159 | }
160 |
161 | trace.Start(f)
162 | defer trace.Stop()
163 |
164 | for i := 0; i < 10000; i++ {
165 | s := generateSlice(10)
166 |
167 | switch version {
168 | case "v1":
169 | mergesortv1(s)
170 | case "v2":
171 | mergesortv2(s)
172 | case "v3":
173 | mergesortv3(s)
174 | }
175 | }
176 |
177 | }
178 |
--------------------------------------------------------------------------------
/code/gomaxprocs/1.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "runtime"
6 | )
7 |
8 | func main() {
9 | fmt.Println("runtime.NumCPU()=", runtime.NumCPU())
10 | }
11 |
--------------------------------------------------------------------------------
/code/gomaxprocs/mergesort.go:
--------------------------------------------------------------------------------
1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006
2 |
3 | // go run main.go [v1 (default) | v2 | v3 ]
4 | // GOMAXPROCS=1 go run mergesort.go v1 && go tool trace v1.trace
5 | // GOMAXPROCS=8 go run mergesort.go v1 && go tool trace v1.trace
6 | // GOMAXPROCS=18 go run mergesort.go v1 && go tool trace v1.trace
7 | package main
8 |
9 | import (
10 | "fmt"
11 | "os"
12 | "runtime/trace"
13 | "sync"
14 | )
15 |
16 | const max = 1 << 11
17 |
18 | var s = []int{
19 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
20 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
21 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
22 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
23 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
24 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
25 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
26 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
27 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
28 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
29 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
30 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
31 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
32 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
33 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
34 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
35 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
36 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
37 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
38 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
39 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
40 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
41 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
42 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
43 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
44 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
45 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
46 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
47 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
48 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
49 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
50 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
51 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
52 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
53 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
54 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
55 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
56 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
57 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
58 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
59 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
60 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
61 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
62 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
63 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
64 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
65 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
66 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
67 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
68 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
69 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
70 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
71 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
72 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
73 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
74 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
75 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
76 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
77 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
78 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
79 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
80 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
81 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
82 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
83 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
84 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
85 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
86 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
87 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
88 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
89 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
90 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
91 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
92 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
93 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
94 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
95 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
96 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
97 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
98 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
99 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
100 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
101 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
102 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
103 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
104 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
105 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
106 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
107 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
108 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
109 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
110 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
111 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
112 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
113 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
114 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
115 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
116 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
117 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
118 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
119 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
120 | }
121 |
122 | func merge(s []int, middle int) {
123 | helper := make([]int, len(s))
124 | copy(helper, s)
125 |
126 | helperLeft := 0
127 | helperRight := middle
128 | current := 0
129 | high := len(s) - 1
130 |
131 | for helperLeft <= middle-1 && helperRight <= high {
132 | if helper[helperLeft] <= helper[helperRight] {
133 | s[current] = helper[helperLeft]
134 | helperLeft++
135 | } else {
136 | s[current] = helper[helperRight]
137 | helperRight++
138 | }
139 | current++
140 | }
141 |
142 | for helperLeft <= middle-1 {
143 | s[current] = helper[helperLeft]
144 | current++
145 | helperLeft++
146 | }
147 | }
148 |
149 | func mergesortv1(s []int) {
150 | len := len(s)
151 |
152 | if len > 1 {
153 | middle := len / 2
154 |
155 | var wg sync.WaitGroup
156 | wg.Add(2)
157 |
158 | // First half
159 | go func() {
160 | defer wg.Done()
161 | mergesortv1(s[:middle])
162 | }()
163 |
164 | // Second half
165 | go func() {
166 | defer wg.Done()
167 | mergesortv1(s[middle:])
168 | }()
169 |
170 | // Wait that the two goroutines are completed
171 | wg.Wait()
172 | merge(s, middle)
173 | }
174 | }
175 |
176 | /* Sequential */
177 |
178 | func mergesort(s []int) {
179 | if len(s) > 1 {
180 | middle := len(s) / 2
181 | mergesort(s[:middle])
182 | mergesort(s[middle:])
183 | merge(s, middle)
184 | }
185 | }
186 |
187 | func mergesortv2(s []int) {
188 | len := len(s)
189 |
190 | if len > 1 {
191 | if len <= max { // Sequential
192 | mergesort(s)
193 | } else { // Parallel
194 | middle := len / 2
195 |
196 | var wg sync.WaitGroup
197 | wg.Add(2)
198 |
199 | go func() {
200 | defer wg.Done()
201 | mergesortv2(s[:middle])
202 | }()
203 |
204 | go func() {
205 | defer wg.Done()
206 | mergesortv2(s[middle:])
207 | }()
208 |
209 | wg.Wait()
210 | merge(s, middle)
211 | }
212 | }
213 | }
214 |
215 | func mergesortv3(s []int) {
216 | len := len(s)
217 |
218 | if len > 1 {
219 | if len <= max { // Sequential
220 | mergesort(s)
221 | } else { // Parallel
222 | middle := len / 2
223 |
224 | var wg sync.WaitGroup
225 | wg.Add(1)
226 |
227 | go func() {
228 | defer wg.Done()
229 | mergesortv3(s[:middle])
230 | }()
231 |
232 | mergesortv3(s[middle:])
233 |
234 | wg.Wait()
235 | merge(s, middle)
236 | }
237 | }
238 | }
239 |
240 | func main() {
241 | version := "v1"
242 | if len(os.Args) == 2 {
243 | version = os.Args[1]
244 | }
245 |
246 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
247 | if err != nil {
248 | fmt.Println("Error:", err)
249 | return
250 | }
251 | trace.Start(f)
252 | defer trace.Stop()
253 |
254 | switch version {
255 | case "v1":
256 | mergesortv1(s)
257 | case "v2":
258 | mergesortv2(s)
259 | case "v3":
260 | mergesortv3(s)
261 | }
262 |
263 | }
264 |
--------------------------------------------------------------------------------
/code/gomaxprocs/mergesort_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "testing"
4 |
5 | func Benchmark_mergesortv1(b *testing.B) {
6 | for i := 0; i < b.N; i++ {
7 | mergesortv1(s)
8 | }
9 | }
10 |
11 | func Benchmark_mergesortv2(b *testing.B) {
12 | for i := 0; i < b.N; i++ {
13 | mergesortv2(s)
14 | }
15 | }
16 |
17 |
18 | func Benchmark_mergesortv3(b *testing.B) {
19 | for i := 0; i < b.N; i++ {
20 | mergesortv3(s)
21 | }
22 | }
--------------------------------------------------------------------------------
/code/inline/inline.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | func f() int {
6 | return 2
7 | }
8 |
9 | func main() {
10 | x := f()
11 | fmt.Println(x)
12 | }
13 |
--------------------------------------------------------------------------------
/code/inline/readme.md:
--------------------------------------------------------------------------------
1 | go build -gcflags="-m" inline.go
2 |
--------------------------------------------------------------------------------
/code/map-access/1-map_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "math/rand"
5 | "strconv"
6 | "testing"
7 | )
8 |
9 | var NumItems int = 1000000
10 |
11 | func BenchmarkMapStringKeys(b *testing.B) {
12 | m := make(map[string]string)
13 | k := make([]string, 0)
14 |
15 | for i := 0; i < NumItems; i++ {
16 | key := strconv.Itoa(rand.Intn(NumItems))
17 | //key += ` is the key value that is being used. `
18 | key += ` is the key value that is being used and a shakespeare sonnet. ` + sonnet106
19 | m[key] = "value" + strconv.Itoa(i)
20 | k = append(k, key)
21 | }
22 |
23 | i := 0
24 | l := len(m)
25 |
26 | b.ResetTimer()
27 | for n := 0; n < b.N; n++ {
28 | if _, ok := m[k[i]]; ok {
29 | }
30 |
31 | i++
32 | if i >= l {
33 | i = 0
34 | }
35 | }
36 | }
37 |
38 | func BenchmarkMapIntKeys(b *testing.B) {
39 | m := make(map[int]string)
40 | k := make([]int, 0)
41 |
42 | for i := 0; i < NumItems; i++ {
43 | key := rand.Intn(NumItems)
44 | m[key] = "value" + strconv.Itoa(i)
45 | k = append(k, key)
46 | }
47 |
48 | i := 0
49 | l := len(m)
50 |
51 | b.ResetTimer()
52 | for n := 0; n < b.N; n++ {
53 | if _, ok := m[k[i]]; ok {
54 | }
55 |
56 | i++
57 | if i >= l {
58 | i = 0
59 | }
60 | }
61 | }
62 |
63 | var sonnet106 = `When in the chronicle of wasted time
64 | I see descriptions of the fairest wights,
65 | And beauty making beautiful old rhyme
66 | In praise of ladies dead, and lovely knights,
67 | Then, in the blazon of sweet beauty’s best,
68 | Of hand, of foot, of lip, of eye, of brow,
69 | I see their antique pen would have express’d
70 | Even such a beauty as you master now.
71 | So all their praises are but prophecies
72 | Of this our time, all you prefiguring;
73 | And, for they look’d but with divining eyes,
74 | They had not skill enough your worth to sing:
75 | For we, which now behold these present days,
76 | Had eyes to wonder, but lack tongues to praise.`
77 |
--------------------------------------------------------------------------------
/code/panic/a-panic-program.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func main() {
4 | example(make([]string, 2, 4), "hello", 10)
5 | }
6 |
7 | //go:noinline
8 | func example(slice []string, str string, i int) {
9 | panic("Want stack trace")
10 | }
11 |
--------------------------------------------------------------------------------
/code/panic/a-panic-program.md:
--------------------------------------------------------------------------------
1 | * code does a panic to show a stack trace.
2 | * shows that the hex value shows the program counter (PC) which points to the instruction after the one that crashed.
3 |
4 | Ref:
5 | https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part1.html
6 |
--------------------------------------------------------------------------------
/code/parallelize/rand_strings_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "math/rand"
5 | "sync"
6 | "testing"
7 | "time"
8 | )
9 |
10 | func init() {
11 | rand.Seed(time.Now().UnixNano())
12 | }
13 |
14 | var s []string
15 |
16 | func RandString_Sequential() {
17 | for i := 0; i < 1000; i++ {
18 | s = append(s, RandString(100))
19 | }
20 | }
21 |
22 | func Benchmark_Sequential(b *testing.B) {
23 | for i := 0; i < b.N; i++ {
24 | RandString_Sequential()
25 | }
26 | }
27 |
28 | func RandString_Concurrent() {
29 | for i := 0; i < 100000; i++ {
30 | go func() {
31 | s = append(s, RandString(100))
32 | }()
33 | }
34 | }
35 |
36 | func Benchmark_Concurrent(b *testing.B) {
37 | for i := 0; i < b.N; i++ {
38 | RandString_Concurrent()
39 | }
40 | }
41 |
42 | var mu sync.Mutex
43 |
44 | func RandString_Locked_Mutex() {
45 | for i := 0; i < 100000; i++ {
46 | go func() {
47 | mu.Lock()
48 | defer mu.Unlock()
49 |
50 | s = append(s, RandString(100))
51 | }()
52 | }
53 | }
54 |
55 | func Benchmark_Locked_Mutex(b *testing.B) {
56 | for i := 0; i < b.N; i++ {
57 | RandString_Locked_Mutex()
58 | }
59 | }
60 |
61 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
62 |
63 | func RandString(n int) string {
64 | b := make([]rune, n)
65 | for i := range b {
66 | b[i] = letters[rand.Intn(len(letters))]
67 | }
68 | //time.Sleep(10 * time.Microsecond)
69 | return string(b)
70 | }
71 |
--------------------------------------------------------------------------------
/code/profiler-labels/1_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "context"
5 | "regexp"
6 | "runtime/pprof"
7 | "testing"
8 | )
9 |
10 | var ss = []string{
11 | `^[a-z]+\[[0-9]+\]$`,
12 | `foo.*`,
13 | `foo(.?)`,
14 | `foo.?`,
15 | `a(x*)b(y|z)c`,
16 | }
17 |
18 | func f(s string) {
19 | labels := pprof.Labels("pat", s)
20 | pprof.Do(context.Background(), labels, func(ctx context.Context) {
21 | // Do some work...
22 | r := regexp.MustCompile(s)
23 | _ = r
24 |
25 | //go update(ctx) // propagates labels in ctx.
26 | })
27 | }
28 |
29 | func bench_f(b *testing.B, s string) {
30 | for i := 0; i < b.N; i++ {
31 | f(s)
32 | }
33 | }
34 |
35 | func Benchmark_0f(b *testing.B) {
36 | bench_f(b, ss[0])
37 | }
38 |
39 | func Benchmark_1f(b *testing.B) {
40 | bench_f(b, ss[1])
41 | }
42 |
--------------------------------------------------------------------------------
/code/profiler/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "os"
6 | "regexp"
7 | )
8 |
9 | func main() {
10 | var data string
11 | if len(os.Args) == 2 {
12 | data = os.Args[1]
13 | }
14 |
15 | id, ok := isGopher(data)
16 | if !ok {
17 | id = "stranger"
18 | }
19 | fmt.Printf("hello, %s\n", id)
20 | }
21 |
22 | func isGopher(email string) (string, bool) {
23 | re := regexp.MustCompile("^([[:alpha:]]+)@golang.org$")
24 | match := re.FindStringSubmatch(email)
25 | if len(match) == 2 {
26 | return match[1], true
27 | }
28 | return "", false
29 | }
30 |
--------------------------------------------------------------------------------
/code/profiler/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "testing"
4 |
5 | func Test_isGopher(t *testing.T) {
6 |
7 | tcs := []struct {
8 | in string
9 | exp bool
10 | expId string
11 | }{
12 | {
13 | "",
14 | false,
15 | "",
16 | },
17 | {
18 | "a@email.com",
19 | false,
20 | "",
21 | },
22 | {
23 | "a@golang.org",
24 | true,
25 | "a",
26 | },
27 | }
28 |
29 | for _, tc := range tcs {
30 | id, ok := isGopher(tc.in)
31 | if ok != tc.exp {
32 | t.Errorf("For input %s, expected: %t but got: %t", tc.in, tc.exp, ok)
33 | }
34 | if id != tc.expId {
35 | t.Errorf("For input %s, expected: %s but got: %s", tc.in, tc.expId, id)
36 | }
37 | }
38 | }
39 |
40 | func Benchmark_isGopher(b *testing.B) {
41 |
42 | tcs := []struct {
43 | in string
44 | exp bool
45 | expId string
46 | }{
47 | {
48 | "a@golang.org",
49 | true,
50 | "a",
51 | },
52 | }
53 |
54 | for i := 0; i < b.N; i++ {
55 | isGopher(tcs[0].in)
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/code/profiler/readme.md:
--------------------------------------------------------------------------------
1 | ```
2 | go test -bench=. -cpuprofile=cpu.pprof
3 |
4 | go tool pprof cpu.pprof
5 |
6 | go-torch --binaryname web.test -b cpu.pprof
7 |
8 | pprof -http=:8080 cpu.pprof
9 | ```
10 |
--------------------------------------------------------------------------------
/code/regex/1-regex-compile_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "regexp"
5 | "testing"
6 | )
7 |
8 | var testRegexp string = `^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]+$`
9 |
10 | func BenchmarkMatchString(b *testing.B) {
11 | for n := 0; n < b.N; n++ {
12 | _, err := regexp.MatchString(testRegexp, "jsmith@example.com")
13 | if err != nil {
14 | panic(err)
15 | }
16 | }
17 | }
18 |
19 | func BenchmarkMatchStringCompiled(b *testing.B) {
20 | r, err := regexp.Compile(testRegexp)
21 | if err != nil {
22 | panic(err)
23 | }
24 |
25 | b.ResetTimer()
26 | for n := 0; n < b.N; n++ {
27 | r.MatchString("jsmith@example.com")
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/code/responsewriter/main_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "net/http"
6 | "net/http/httptest"
7 | "testing"
8 | )
9 |
10 | func withoutSetHeader(w http.ResponseWriter, r *http.Request) {
11 | fmt.Fprintln(w, "hello, stranger")
12 | }
13 |
14 | func Benchmark_withoutSetHeader(b *testing.B) {
15 | for i := 0; i < b.N; i++ {
16 | req, _ := http.NewRequest("GET", "/", nil)
17 |
18 | rr := httptest.NewRecorder()
19 | handler := http.HandlerFunc(withoutSetHeader)
20 |
21 | handler.ServeHTTP(rr, req)
22 | }
23 |
24 | }
25 |
26 | func withSetHeader(w http.ResponseWriter, r *http.Request) {
27 | w.Header().Set("Content-Type", "text/plain")
28 | fmt.Fprintln(w, "hello, stranger")
29 | }
30 |
31 | func Benchmark_withSetHeader(b *testing.B) {
32 | for i := 0; i < b.N; i++ {
33 | req, _ := http.NewRequest("GET", "/", nil)
34 |
35 | rr := httptest.NewRecorder()
36 | handler := http.HandlerFunc(withSetHeader)
37 |
38 | handler.ServeHTTP(rr, req)
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/code/slices/1-array.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func main() {
4 | var a [5]int
5 | var b [6]int
6 |
7 | b = a
8 | }
9 |
--------------------------------------------------------------------------------
/code/slices/2-slice-of-array.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func main() {
4 | var a [5]int
5 | s := a[0:3]
6 | s = a[:3]
7 | s = a[3:]
8 |
9 | // negative indexing is not allowed
10 | // s = a[0:-2] // compile error
11 | }
12 |
--------------------------------------------------------------------------------
/code/slices/3-slice-backed-by-array.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | func main() {
6 | a := [5]int{1, 2, 3, 4, 5}
7 | s := a[0:3]
8 | s[0] = 11
9 | fmt.Println(a, s)
10 |
11 | fmt.Printf("%p %p\n", &a, &s)
12 | fmt.Printf("%p %p\n", &a[0], &s[0])
13 | }
14 |
--------------------------------------------------------------------------------
/code/slices/4-appending-to-slice.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "fmt"
4 |
5 | func main() {
6 | a := [5]int{1, 2, 3, 4, 5}
7 | s := a[0:3]
8 | fmt.Println(a, s)
9 |
10 | s = append(s, 9)
11 | fmt.Println(a, s)
12 |
13 | s = append(s, 19)
14 | fmt.Println(a, s)
15 |
16 | s = append(s, 99)
17 | fmt.Println(a, s)
18 | }
19 |
--------------------------------------------------------------------------------
/code/slices/5-make-slice.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | func main() {
4 | months := make([]int, 0, 12)
5 | months = append(months, 1)
6 | months = append(months, 7)
7 | }
8 |
--------------------------------------------------------------------------------
/code/stack-and-heap/h.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | type T struct {
4 | a int
5 | }
6 |
7 | func h() *T {
8 | return &T{}
9 | }
10 |
11 | func main() {
12 | h()
13 | }
14 |
--------------------------------------------------------------------------------
/code/stack-and-heap/h_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "runtime/trace"
6 | "testing"
7 | )
8 |
9 | func Benchmark_h(b *testing.B) {
10 | var t *T
11 |
12 | f, err := os.Create("h.prof")
13 | if err != nil {
14 | panic(err)
15 | }
16 | defer f.Close()
17 |
18 | err = trace.Start(f)
19 | if err != nil {
20 | panic(err)
21 | }
22 |
23 | for i := 0; i < b.N; i++ {
24 | t = h()
25 | }
26 |
27 | trace.Stop()
28 |
29 | b.StopTimer()
30 |
31 | _ = t
32 | }
33 |
--------------------------------------------------------------------------------
/code/stack-and-heap/s.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | type T struct {
4 | a int
5 | }
6 |
7 | func s() T {
8 | return T{}
9 | }
10 |
11 | func main() {
12 | s()
13 | }
14 |
--------------------------------------------------------------------------------
/code/stack-and-heap/s_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "os"
5 | "runtime/trace"
6 | "testing"
7 | )
8 |
9 | func Benchmark_s(b *testing.B) {
10 | var t T
11 |
12 | f, err := os.Create("s.prof")
13 | if err != nil {
14 | panic(err)
15 | }
16 | defer f.Close()
17 |
18 | err = trace.Start(f)
19 | if err != nil {
20 | panic(err)
21 | }
22 |
23 | for i := 0; i < b.N; i++ {
24 | t = s()
25 | }
26 |
27 | trace.Stop()
28 |
29 | b.StopTimer()
30 |
31 | _ = t
32 | }
33 |
--------------------------------------------------------------------------------
/code/string-concat/1-string-concat_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "strings"
6 | "testing"
7 | )
8 |
9 | var strLen int = 1000
10 |
11 | func BenchmarkConcatString(b *testing.B) {
12 | var str string
13 |
14 | i := 0
15 |
16 | b.ResetTimer()
17 | for n := 0; n < b.N; n++ {
18 | str += "x"
19 |
20 | i++
21 | if i >= strLen {
22 | i = 0
23 | str = ""
24 | }
25 | }
26 | }
27 |
28 | func BenchmarkConcatBuffer(b *testing.B) {
29 | var buffer bytes.Buffer
30 |
31 | i := 0
32 |
33 | b.ResetTimer()
34 | for n := 0; n < b.N; n++ {
35 | buffer.WriteString("x")
36 |
37 | i++
38 | if i >= strLen {
39 | i = 0
40 | buffer = bytes.Buffer{}
41 | }
42 | }
43 | }
44 |
45 | func BenchmarkConcatBuilder(b *testing.B) {
46 | var builder strings.Builder
47 |
48 | i := 0
49 |
50 | b.ResetTimer()
51 | for n := 0; n < b.N; n++ {
52 | builder.WriteString("x")
53 |
54 | i++
55 | if i >= strLen {
56 | i = 0
57 | builder = strings.Builder{}
58 | }
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/code/sync-once/1.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "html/template"
5 | )
6 |
7 | var s = `
8 |
{{.PageTitle}}
9 |
10 | {{range .Todos}}
11 | {{if .Done}}
12 |
{{.Title}}
13 | {{else}}
14 |
{{.Title}}
15 | {{end}}
16 | {{end}}
17 |
18 | `
19 |
20 | var t *template.Template
21 |
22 | func f() {
23 | t = template.Must(template.New("").Parse(s))
24 | _ = t
25 |
26 | // do task with template
27 | }
28 |
29 | func main() {
30 | for i := 0; i < 10000; i++ {
31 | f()
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/code/sync-once/2.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "html/template"
5 | )
6 |
7 | var s = `
8 |
{{.PageTitle}}
9 |
10 | {{range .Todos}}
11 | {{if .Done}}
12 |
{{.Title}}
13 | {{else}}
14 |
{{.Title}}
15 | {{end}}
16 | {{end}}
17 |
18 | `
19 |
20 | var t *template.Template
21 |
22 | func f() {
23 |
24 | // do task with template
25 | }
26 |
27 | func main() {
28 | // costs time at load and maybe unused
29 | t = template.Must(template.New("").Parse(s))
30 | _ = t
31 |
32 | for i := 0; i < 10000; i++ {
33 | f()
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/code/sync-once/3.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "html/template"
6 | "sync"
7 | )
8 |
9 | var s = `
10 |
{{.PageTitle}}
11 |
12 | {{range .Todos}}
13 | {{if .Done}}
14 |
{{.Title}}
15 | {{else}}
16 |
{{.Title}}
17 | {{end}}
18 | {{end}}
19 |
20 | `
21 |
22 | var t *template.Template
23 | var o sync.Once
24 |
25 | func g() {
26 | fmt.Println("within g()")
27 | t = template.Must(template.New("").Parse(s))
28 | _ = t
29 | }
30 |
31 | func f() {
32 | // only done once and when used
33 | o.Do(g)
34 |
35 | // do task with template
36 |
37 | }
38 |
39 | func main() {
40 | for i := 0; i < 10000; i++ {
41 | f()
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/code/sync.pool/1_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 | )
7 |
8 | func Benchmark_f1(b *testing.B) {
9 | for i := 0; i < b.N; i++ {
10 | f1()
11 | }
12 | }
13 |
14 | func f1() {
15 | s := &bytes.Buffer{}
16 | s.Write([]byte("dirty"))
17 |
18 | return
19 | }
20 |
--------------------------------------------------------------------------------
/code/sync.pool/2_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bytes"
5 | "sync"
6 | "testing"
7 | )
8 |
9 | var pool2 = sync.Pool{
10 | New: func() interface{} {
11 | return &bytes.Buffer{}
12 | },
13 | }
14 |
15 | func Benchmark_f2(b *testing.B) {
16 | for i := 0; i < b.N; i++ {
17 | f2()
18 | }
19 | }
20 |
21 | func f2() {
22 | // When getting from a Pool, you need to cast
23 | s := pool2.Get().(*bytes.Buffer)
24 | // We write to the object
25 | s.Write([]byte("dirty"))
26 | // Then put it back
27 | pool2.Put(s)
28 |
29 | return
30 | }
31 |
--------------------------------------------------------------------------------
/code/sync.pool/book1_test.go:
--------------------------------------------------------------------------------
1 | // run: go test -bench=write1 -benchmem
2 | // vs
3 | // go test -bench=write2 -benchmem
4 |
5 | // study: difference in allocations and speed between the versions
6 | // expected: the one with sync.Pool should have lesser allocations.
7 | package main
8 |
9 | import (
10 | "encoding/json"
11 | "testing"
12 | )
13 |
14 | type Book struct {
15 | Author string
16 | Title string
17 | ISBN string
18 | }
19 |
20 | func write1(a, t string) {
21 | b := &Book{}
22 | b.Author = a
23 | b.Title = t
24 | b.ISBN = "abcd"
25 | data, _ := json.Marshal(b)
26 | _ = data
27 | }
28 |
29 | func Benchmark_write1(b *testing.B) {
30 | for i := 0; i < b.N; i++ {
31 | write1("harry", "rowling")
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/code/sync.pool/book2_test.go:
--------------------------------------------------------------------------------
1 | // run: go test -bench=write1 -benchmem
2 | // vs
3 | // go test -bench=write2 -benchmem
4 |
5 | // study: difference in allocations and speed between the versions
6 | // expected: the one with sync.Pool should have lesser allocations.
7 | package main
8 |
9 | import (
10 | "encoding/json"
11 | "sync"
12 | "testing"
13 | )
14 |
15 | type Book2 struct {
16 | Author string
17 | Title string
18 | ISBN string
19 | }
20 |
21 | var bookPool = sync.Pool{
22 | New: func() interface{} {
23 | return &Book2{}
24 | },
25 | }
26 |
27 | func write2(a, t string) {
28 | b := bookPool.Get().(*Book2)
29 | b.Author = a
30 | b.Title = t
31 | b.ISBN = "abcd"
32 | data, _ := json.Marshal(b)
33 | _ = data
34 |
35 | bookPool.Put(b)
36 | }
37 |
38 | func Benchmark_write2(b *testing.B) {
39 | for i := 0; i < b.N; i++ {
40 | write2("harry", "rowling")
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/code/testing/search_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "strings"
5 | "testing"
6 | )
7 |
8 | func BeginsWith(s, pat string) bool {
9 | return strings.HasPrefix(s, pat)
10 |
11 | }
12 |
13 | func Test_BeginsWith(t *testing.T) {
14 | tc := []struct {
15 | s, pat string
16 | exp bool
17 | }{
18 | {"GoLang", "Go", true},
19 | {"GoLang", "Java", false},
20 | {"GoLang is awesome", "awe", false},
21 | {"awesome is GoLang. - Yoda", "awe", true},
22 | }
23 |
24 | for _, tt := range tc {
25 | if BeginsWith(tt.s, tt.pat) != tt.exp {
26 | t.Fail()
27 | }
28 | }
29 | }
30 |
31 | func Benchmark_BeginsWith(b *testing.B) {
32 | for i := 0; i < b.N; i++ {
33 | BeginsWith("GoLang", "Go")
34 | }
35 | }
36 |
37 | // forced allocations for benchmem
38 | /*
39 | func x() *string {
40 | s := "hello world there"
41 | return &s
42 | }
43 |
44 | func Benchmark_x(b *testing.B) {
45 | for i := 0; i < b.N; i++ {
46 | a := x()
47 | *a += *a
48 | _ = a
49 | }
50 | }
51 | */
52 |
--------------------------------------------------------------------------------
/code/tracing/mergesort.go:
--------------------------------------------------------------------------------
1 | // ref: https://hackernoon.com/parallel-merge-sort-in-go-fe14c1bc006
2 |
3 | // go run main.go [v1 (default) | v2 | v3 ]
4 | // GOMAXPROCS=1 go run mergesort.go v1 && go tool trace v1.trace
5 | // GOMAXPROCS=8 go run mergesort.go v1 && go tool trace v1.trace
6 | // GOMAXPROCS=18 go run mergesort.go v1 && go tool trace v1.trace
7 | package main
8 |
9 | import (
10 | "fmt"
11 | "os"
12 | "runtime/trace"
13 | "sync"
14 | )
15 |
16 | const max = 1 << 11
17 |
18 | var s = []int{
19 | 89, 123, 12, 9, 198, 1546, 108, 872, 93,
20 | }
21 |
22 | func merge(s []int, middle int) {
23 | helper := make([]int, len(s))
24 | copy(helper, s)
25 |
26 | helperLeft := 0
27 | helperRight := middle
28 | current := 0
29 | high := len(s) - 1
30 |
31 | for helperLeft <= middle-1 && helperRight <= high {
32 | if helper[helperLeft] <= helper[helperRight] {
33 | s[current] = helper[helperLeft]
34 | helperLeft++
35 | } else {
36 | s[current] = helper[helperRight]
37 | helperRight++
38 | }
39 | current++
40 | }
41 |
42 | for helperLeft <= middle-1 {
43 | s[current] = helper[helperLeft]
44 | current++
45 | helperLeft++
46 | }
47 | }
48 |
49 | func mergesortv1(s []int) {
50 | len := len(s)
51 |
52 | if len > 1 {
53 | middle := len / 2
54 |
55 | var wg sync.WaitGroup
56 | wg.Add(2)
57 |
58 | // First half
59 | go func() {
60 | defer wg.Done()
61 | mergesortv1(s[:middle])
62 | }()
63 |
64 | // Second half
65 | go func() {
66 | defer wg.Done()
67 | mergesortv1(s[middle:])
68 | }()
69 |
70 | // Wait that the two goroutines are completed
71 | wg.Wait()
72 | merge(s, middle)
73 | }
74 | }
75 |
76 | /* Sequential */
77 |
78 | func mergesort(s []int) {
79 | if len(s) > 1 {
80 | middle := len(s) / 2
81 | mergesort(s[:middle])
82 | mergesort(s[middle:])
83 | merge(s, middle)
84 | }
85 | }
86 |
87 | func mergesortv2(s []int) {
88 | len := len(s)
89 |
90 | if len > 1 {
91 | if len <= max { // Sequential
92 | mergesort(s)
93 | } else { // Parallel
94 | middle := len / 2
95 |
96 | var wg sync.WaitGroup
97 | wg.Add(2)
98 |
99 | go func() {
100 | defer wg.Done()
101 | mergesortv2(s[:middle])
102 | }()
103 |
104 | go func() {
105 | defer wg.Done()
106 | mergesortv2(s[middle:])
107 | }()
108 |
109 | wg.Wait()
110 | merge(s, middle)
111 | }
112 | }
113 | }
114 |
115 | func mergesortv3(s []int) {
116 | len := len(s)
117 |
118 | if len > 1 {
119 | if len <= max { // Sequential
120 | mergesort(s)
121 | } else { // Parallel
122 | middle := len / 2
123 |
124 | var wg sync.WaitGroup
125 | wg.Add(1)
126 |
127 | go func() {
128 | defer wg.Done()
129 | mergesortv3(s[:middle])
130 | }()
131 |
132 | mergesortv3(s[middle:])
133 |
134 | wg.Wait()
135 | merge(s, middle)
136 | }
137 | }
138 | }
139 |
140 | func main() {
141 | version := "v1"
142 | if len(os.Args) == 2 {
143 | version = os.Args[1]
144 | }
145 |
146 | f, err := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
147 | if err != nil {
148 | fmt.Println("Error:", err)
149 | return
150 | }
151 | trace.Start(f)
152 | defer trace.Stop()
153 |
154 | switch version {
155 | case "v1":
156 | mergesortv1(s)
157 | case "v2":
158 | mergesortv2(s)
159 | case "v3":
160 | mergesortv3(s)
161 | }
162 |
163 | }
164 |
--------------------------------------------------------------------------------
/code/tracing/mergesort_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import "testing"
4 |
5 | func Benchmark_mergesortv1(b *testing.B) {
6 | for i := 0; i < b.N; i++ {
7 | mergesortv1(s)
8 | }
9 | }
10 |
11 | func Benchmark_mergesortv2(b *testing.B) {
12 | for i := 0; i < b.N; i++ {
13 | mergesortv2(s)
14 | }
15 | }
16 |
17 | func Benchmark_mergesortv3(b *testing.B) {
18 | for i := 0; i < b.N; i++ {
19 | mergesortv3(s)
20 | }
21 | }
22 |
23 | func Test_mergesortv1(t *testing.T) {
24 | inp := []int{89, 123, 12, 9, 198, 1546, 108, 872, 93}
25 | exp := []int{9, 12, 89, 93, 108, 123, 198, 872, 1546}
26 | mergesortv1(inp)
27 | if inp[0] != exp[0] && inp[len(exp)-1] != exp[len(exp)-1] {
28 | t.Errorf("Test failed")
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/images/gogc/gogc-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-100.png
--------------------------------------------------------------------------------
/images/gogc/gogc-200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-200.png
--------------------------------------------------------------------------------
/images/gogc/gogc-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-50.png
--------------------------------------------------------------------------------
/images/gogc/gogc-off.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gogc/gogc-off.png
--------------------------------------------------------------------------------
/images/gomaxprocs/gomaxprocs-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-1.png
--------------------------------------------------------------------------------
/images/gomaxprocs/gomaxprocs-18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-18.png
--------------------------------------------------------------------------------
/images/gomaxprocs/gomaxprocs-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/gomaxprocs/gomaxprocs-8.png
--------------------------------------------------------------------------------
/images/tracing/1-OS-process-and-its-threads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/1-OS-process-and-its-threads.png
--------------------------------------------------------------------------------
/images/tracing/2-goroutines-on-a-thread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/2-goroutines-on-a-thread.png
--------------------------------------------------------------------------------
/images/tracing/3-goroutines-on-a-blocking-thread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/3-goroutines-on-a-blocking-thread.png
--------------------------------------------------------------------------------
/images/tracing/4-concurrency-and-parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/4-concurrency-and-parallelism.png
--------------------------------------------------------------------------------
/images/tracing/tracing-gc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/tracing-gc.png
--------------------------------------------------------------------------------
/images/tracing/view-goroutine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/view-goroutine.png
--------------------------------------------------------------------------------
/images/tracing/view-trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sathishvj/optimizing-go-programs/cad75ef1e99faf139f0e637a5b7bac6b8776306e/images/tracing/view-trace.png
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## Optimizing Go Programs
2 |
3 | This is a collation of tools and techniques that will help optimize Go programs. It is divided into two parts - Tools and Techniques. Tools look at the go command line tools that help you instrument your code. Techniques look at a list of ideas that you could potentially use to gain performance.
4 |
5 | At the end are also a large list of references of the posts that I went through to understand it myself. I have personally worked on, reworked, tried, and tested all the code (from about April 2019 to June 2019 - so far). However, as the large list of references show, I am indebted to others. I have often liberally adopted and adapted their thoughts and, occasionally, their notes.
6 |
7 | ## Go Tools for Optimization
8 | * [Testing](#testing)
9 | * [Coverage](#coverage)
10 | * [Benchmarking](#benchmarking)
11 | * [Profiling](#profiling)
12 | * [Tracing](#tracing)
13 | - how to read the views
14 | - tagging sections
15 | * environment variables
16 | - [GOMAXPROCS](#gomaxprocs)
17 | - [GOGC](#gogc)
18 | * go memory analysis
19 | - [stack and heap](#stack-and-heap)
20 | - [escape analysis](#escape-analysis)
21 | * [Inlining](#inlining)
22 |
23 | ## Go Techniques for Optimization
24 | * [Parallelize CPU Work](#parallelize-cpu-work)
25 | * [Bounds Check Elimination](#bounds-check-elimination)
26 | * [sync Pools](#syncpools)
27 | * [sync once and lazy initializations](#synconce-for-lazy-initialization)
28 | * [Arrays and Slices](#arrays-and-slices)
29 | - how do slices work internally. allocation and reuse.
30 | * [String Concatenation](#string-concatenation)
31 | * [Map Keys: int vs string](#map-keys-int-vs-string)
32 | * [JSON Unmarshaling](#json-unmarshaling)
33 | * [File I/O](#file-io)
34 | * [Regexp Compilation](#regexp-compilation)
35 | * [Defer](#defer)
36 | * [fmt vs strconv](#fmt-vs-strconv)
37 | * [Explicitly Set Derived Values](#explicitly-set-derived-values)
38 |
39 | * [Go Performance Patterns](#go-performance-patterns)
40 |
41 | ## Testing
42 |
43 | *What do we need?* The ability to validate and verify our code (before customers test it).
44 |
45 | Unit testing is important enough to be a standard library.
46 |
47 | To write tests in Go:
48 | * the file name must end in ```_test.go```
49 | * the test function should start with ```Test```
50 | * the function signature is ```func Test_someFn(t *testing.T) { ... }```
51 |
52 |
53 | ```code/testing```
54 |
55 | ```
56 | func BeginsWith(s, pat string) bool {
57 | return strings.HasPrefix(s, pat)
58 | }
59 |
60 | func Test_BeginsWith(t *testing.T) {
61 | tc := []struct {
62 | s, pat string
63 | exp bool
64 | }{
65 | {"GoLang", "Go", true},
66 | {"GoLang", "Java", false},
67 | {"GoLang is awesome", "awe", false},
68 | {"awesome is GoLang. - Yoda", "awe", true},
69 | }
70 |
71 | for _, tt := range tc {
72 | if BeginsWith(tt.s, tt.pat) != tt.exp {
73 | t.Fail()
74 | }
75 | }
76 | }
77 | ```
78 |
79 | ```
80 | $ go test -v
81 | === RUN Test_BeginsWith
82 | --- PASS: Test_BeginsWith (0.00s)
83 | PASS
84 | ```
85 |
86 | Testing validates your code. It checks for correctness.
87 |
88 | ```Tip: unit testing first, always.```
89 | ```Tip: keep unit testing running and watching for file changes. (see, codeskyblue/fswatch)```
90 |
91 | p.s. When you run benchmarks, tests are run first.
92 |
93 | ## Coverage
94 |
95 | *What do we need?* So we've written tests, but does it cover all our code?
96 |
97 | The Go tooling also gives you coverage results. Less code is faster code. Tested and covered code is more reliable code.
98 |
99 | ```code/cover```
100 |
101 | ```
102 | go test -covermode=count -coverprofile=count.out fmt
103 | go tool cover -html=count.out
104 | ```
105 |
106 | Red areas have had zero coverage. The brighter green sections have been covered more than the duller green sections.
107 |
108 | For current folder:
109 | ```
110 | go test -covermode=count -coverprofile=count.out
111 | go tool cover -html=count.out
112 | ```
113 |
114 | ```Tip: Keep coverage as a check-in metric objective. Or at least track coverage history in your build tool.```
115 |
116 | ## Benchmarking
117 |
118 | *What do we need?* The ability to instrument specific functions and see where it is spending time or allocating resources.
119 |
120 | Benchmarking checks for optimization.
121 |
122 | ```code/testing```
123 |
124 | ```
125 | func Benchmark_BeginsWith(b *testing.B) {
126 | for i := 0; i < b.N; i++ {
127 | BeginsWith("GoLang", "Go")
128 | }
129 | }
130 | ```
131 |
132 | ```
133 | $ go test -v -bench=. -benchmem
134 | === RUN Test_BeginsWith
135 | --- PASS: Test_BeginsWith (0.00s)
136 | goos: darwin
137 | goarch: amd64
138 | Benchmark_BeginsWith-8 500000000 3.69 ns/op 0 B/op 0 allocs/op
139 | PASS
140 | ```
141 |
142 | Benchmarking functions don't always care about the result (that is checked by unit testing). However, the speed/allocations/blocking of a function could be dependent on the inputs - so test different inputs.
143 |
144 | ```Tip: Map optimization goals to business SLOs and SLAs.```
145 |
146 | ### Benchcmp
147 |
148 | Use benchcmp to easily compare between benchmarks.
149 |
150 | ```
151 | $ go test -run=NONE -bench=. ./... > old.txt
152 | // make changes
153 | $ go test -run=NONE -bench=. ./... > new.txt
154 |
155 | $ benchcmp old.txt new.txt
156 |
157 | benchmark old ns/op new ns/op delta
158 | BenchmarkConcat 523 68.6 -86.88%
159 |
160 | benchmark old allocs new allocs delta
161 | BenchmarkConcat 3 1 -66.67%
162 |
163 | benchmark old bytes new bytes delta
164 | BenchmarkConcat 80 48 -40.00%
165 | ```
166 |
167 | ## Profiling
168 |
169 | *What do we need?* The ability to instrument and analyze execution metrics.
170 |
171 | Package pprof writes runtime profiling data in the format expected by the pprof visualization tool.
172 |
173 | The first step to profiling a Go program is to enable profiling. Support for profiling benchmarks built with the standard testing package is built into go test.
174 |
175 | ```
176 | func isGopher(email string) (string, bool) {
177 | re := regexp.MustCompile("^([[:alpha:]]+)@golang.org$")
178 | match := re.FindStringSubmatch(email)
179 | if len(match) == 2 {
180 | return match[1], true
181 | }
182 | return "", false
183 | }
184 |
185 | func Benchmark_isGopher(b *testing.B) {
186 |
187 | tcs := []struct {
188 | in string
189 | exp bool
190 | expId string
191 | }{
192 | {
193 | "a@golang.org",
194 | true,
195 | "a",
196 | },
197 | }
198 |
199 | for i := 0; i < b.N; i++ {
200 | isGopher(tcs[0].in)
201 | }
202 | }
203 | ```
204 |
205 | ```
206 | go test -bench=. -cpuprofile=cpu.pprof
207 |
208 | go tool pprof cpu.pprof
209 |
210 | go-torch --binaryname web.test -b cpu.pprof
211 | open torch.svg
212 | ```
213 |
214 | More recently (1.10?), pprof got its own UI.
215 |
216 | ```
217 | $ go get github.com/google/pprof
218 | ```
219 |
220 | The tool launches a web UI if -http flag is provided. For example, in order to launch the UI with an existing profile data, run the following command:
221 |
222 |
223 | ```
224 | pprof -http=:8080 cpu.pprof
225 | ```
226 |
227 | There is also a standard HTTP interface to profiling data. Adding the following line will install handlers under the /debug/pprof/ URL to download live profiles:
228 |
229 | ```
230 | import _ "net/http/pprof"
231 | See the net/http/pprof package for more details.
232 | ```
233 |
234 | ## M, P, G
235 |
236 | *Question*: How does concurrency work in Go? How is it different from threads?
237 |
238 | OS Layout
239 |
240 | 
241 |
242 | Goroutines on a Thread
243 |
244 | 
245 |
246 | Goroutines on Blocking Thread
247 |
248 | 
249 |
250 | Concurrency and Parallelism
251 |
252 | 
253 |
254 | ## Tracing
255 |
256 | https://blog.gopheracademy.com/advent-2017/go-execution-tracer/
257 |
258 | Ever wondered how are your goroutines being scheduled by the go runtime? Ever tried to understand why adding concurrency to your program has not given it better performance? The go execution tracer can help answer these and other questions to help you diagnose performance issues, e.g, latency, contention and poor parallelization.
259 |
260 | Data is collected by the tracer without any kind of aggregation or sampling. In some busy applications this may result in a large file.
261 |
262 | While the CPU profiler does a nice job to telling you what function is spending most CPU time, it does not help you figure out what is preventing a goroutine from running or how are the goroutines being scheduled on the available OS threads. That’s precisely where the tracer really shines.
263 |
264 | ### Ways to get a Trace
265 |
266 | * Using the runtime/trace pkg
267 | This involved calling trace.Start and trace.Stop and was covered in our “Hello, Tracing” example.
268 |
269 | * Using -trace= test flag
270 | This is useful to collect trace information about code being tested and the test itself.
271 |
272 | ```code/tracing```
273 | ```
274 | go test -trace=a.out && go tool trace a.out
275 | ```
276 |
277 | * Using debug/pprof/trace handler
278 | This is the best method to collect tracing from a running web application.
279 |
280 | ### View Trace
281 |
282 | ```
283 | go tool trace trace_file.out
284 | ```
285 |
286 | 
287 |
288 | 1. Timeline
289 | Shows the time during the execution and the units of time may change depending on the navigation. One can navigate the timeline by using keyboard shortcuts (WASD keys, just like video games).
290 | 2. Heap
291 | Shows memory allocations during the execution, this can be really useful to find memory leaks and to check how much memory the garbage collection is being able to free at each run.
292 | 3. Goroutines
293 | Shows how many goroutines are running and how many are runnable (waiting to be scheduled) at each point in time. A high number of runnable goroutines may indicate scheduling contention, e.g, when the program creates too many goroutines and is causing the scheduler to work too hard.
294 | 4. OS Threads
295 | Shows how many OS threads are being used and how many are blocked by syscalls.
296 | 5. Virtual Processors
297 | Shows a line for each virtual processor. The number of virtual processors is controlled by the GOMAXPROCS environment variable (defaulting to the number of cores).
298 | 6. Goroutines and events
299 | Displays where/what goroutine is running on each virtual processor. Lines connecting goroutines represent events. In the example image, we can see that the goroutine “G1 runtime.main” spawned two different goroutines: G6 and G5 (the former is the goroutine responsible for collecting the trace data and the latter is the one we started using the “go” keyword).
300 | A second row per processor may show additional events such as syscalls and runtime events. This also includes some work that the goroutine does on behalf of the runtime (e.g assisting the garbage collector).
301 |
302 |
303 | ### View Goroutine
304 |
305 | 
306 |
307 | This information includes:
308 |
309 | * Its “name” (Title)
310 | * When it started (Start)
311 | * Its duration (Wall Duration)
312 | * The stack trace when it started
313 | * The stack trace when it finished
314 | * Events generated by this goroutine
315 |
316 | ### Tracing Example
317 |
318 | ```code/tracing```
319 |
320 | ```
321 | func main() {
322 | f, _ := os.OpenFile(version+".trace", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
323 | trace.Start(f)
324 | defer trace.Stop()
325 |
326 | mergesortv1(s)
327 |
328 | }
329 |
330 | ```
331 |
332 | ```
333 | go run mergesort.go v1 && go tool trace v1.trace
334 | ```
335 |
336 | ### Tracing GC
337 |
338 | The trace tool gives you a very good view into when the GC kicks in, when it is run, and how you could potentially optimize for it.
339 |
340 | 
341 |
342 | ### Tracing Conclusion
343 | The tracer is a powerful tool for debugging concurrency issues, e.g, contentions and logical races. But it does not solve all problems: it is not the best tool available to track down what piece of code is spending most CPU time or allocations. The go tool pprof is better suited for these use cases.
344 |
345 | The tool really shines when you want to understand the behavior of a program over time and to know what each goroutine is doing when NOT running. Collecting traces may have some overhead and can generate a high amount of data to be inspected.
346 |
347 | ## GOMAXPROCS
348 |
349 | Discussion: for a program to be more efficient should you have more threads/goroutines or less?
350 |
351 | Discussion: goroutines are kinda sorta similar to threads. So why don't we just use threads instead of goroutines?
352 |
353 | Threads typically take up more resources than goroutines - a minimum thread stack typically is upwards of 1MB.
354 | A goroutine typically starts of at 2kb. So, that's, at a very minimum, a reduction of 500x. Anything else though?
355 |
356 | Context switching in Linux is about 1000ns while in go it is about 200ns - https://eli.thegreenplace.net/2018/measuring-context-switching-and-memory-overheads-for-linux-threads/
357 |
358 | A primary cost factor is contention. Programs that has parallelism does not necessarily have higher performance because of greater contention for resources.
359 |
360 | ### What is GOMAXPROCS?
361 | The GOMAXPROCS setting controls how many operating systems threads attempt to execute code simultaneously. For example, if GOMAXPROCS is 4, then the program will only execute code on 4 operating system threads at once, even if there are 1000 goroutines. The limit does not count threads blocked in system calls such as I/O.
362 |
363 | GOMAXPROCS can be set explicitly using the GOMAXPROCS environment variable or by calling runtime.GOMAXPROCS from within a program.
364 |
365 | ```code/gomaxprocs```
366 |
367 | ```
368 | func main() {
369 | fmt.Println("runtime.NumCPU()=", runtime.NumCPU())
370 | }
371 | ```
372 |
373 | On my quad-core CPU it prints:
374 | ```
375 | runtime.NumCPU()= 8
376 | ```
377 |
378 | Why is it showing 8 for NumCPU for a quad-core machine? The Intel chips on my machine is hyperthreaded - for each processor core that is physically present, the operating system addresses two virtual (logical) cores and shares the workload between them when possible.
379 |
380 | ### What should be the value of GOMAXPROCS?
381 |
382 | The default setting of GOMAXPROCS in all Go releases [up to 1.4] is 1, because programs with frequent goroutine switches ran much slower when using multiple threads. It is much cheaper to switch between two goroutines in the same thread than to switch between two goroutines in different threads.
383 |
384 | Goroutine scheduling affinity and other improvements to the scheduler have largely addressed the problem, by keeping goroutines that are concurrent but not parallel in the same thread.
385 |
386 | For Go 1.5, the default setting of GOMAXPROCS to the number of CPUs available, as determined by runtime.NumCPU.
387 |
388 | ### Running with different GOMAXPROCS
389 |
390 | ```
391 | GOMAXPROCS=1 go run mergesort.go v1 & go tool trace v1.trace
392 | ```
393 |
394 | 
395 |
396 | ```
397 | GOMAXPROCS=8 go run mergesort.go v1 & go tool trace v1.trace
398 | ```
399 |
400 | 
401 |
402 | ```
403 | GOMAXPROCS=18 go run mergesort.go v1 & go tool trace v1.trace
404 | ```
405 |
406 | 
407 |
408 | The number is the max possible and it is not required that the Go runtime create as many logical processors as you have specified.
409 |
410 | ### Exercise
411 | ```gocode/gomaxprocs```
412 |
413 | Run the following and see the differences in the trace.
414 |
415 | ```
416 | GOMAXPROCS=1 go run mergesort.go v2 && go tool trace v2.trace
417 | GOMAXPROCS=8 go run mergesort.go v2 && go tool trace v2.trace
418 | GOMAXPROCS=18 go run mergesort.go v2 && go tool trace v2.trace
419 |
420 | GOMAXPROCS=1 go run mergesort.go v3 && go tool trace v3.trace
421 | GOMAXPROCS=8 go run mergesort.go v3 && go tool trace v3.trace
422 | GOMAXPROCS=18 go run mergesort.go v3 && go tool trace v3.trace
423 | ```
424 |
425 | ```Opt Tip: Do not assume that increasing the number of GOMAXPROCS always improves speed.```
426 |
427 | ## GOGC
428 |
429 | *Question:* If GC is so important, can we adjust GC parameters? Can we change the GC algorithm?
430 |
431 | The GOGC variable sets the initial garbage collection target percentage. A collection is triggered when the ratio of freshly allocated data to live data remaining after the previous collection reaches this percentage. The default is GOGC=100. Setting GOGC=off disables the garbage collector entirely. The runtime/debug package's SetGCPercent function allows changing this percentage at run time.
432 |
433 | GOGC controls the aggressiveness of the garbage collector.
434 |
435 | Setting this value higher, say GOGC=200, will delay the start of a garbage collection cycle until the live heap has grown to 200% of the previous size. Setting the value lower, say GOGC=20 will cause the garbage collector to be triggered more often as less new data can be allocated on the heap before triggering a collection.
436 |
437 | With the introduction of the low latency collector in Go 1.5, phrases like “trigger a garbage collection cycle” become more fluid, but the underlying message that values of GOGC greater than 100 mean the garbage collector will run less often, and for values of GOGC less than 100, more often
438 |
439 |
440 | ### Exercise
441 | ```gocode/gogc```
442 |
443 | Run the following and see the differences in the trace for heap and GC.
444 |
445 | ```
446 | GOGC=off go run mergesort.go v1 & go tool trace v1.trace
447 | GOGC=50 go run mergesort.go v1 & go tool trace v1.trace
448 | GOGC=100 go run mergesort.go v1 & go tool trace v1.trace
449 | GOGC=200 go run mergesort.go v1 & go tool trace v1.trace
450 | ```
451 |
452 | GOGC=off
453 | 
454 |
455 | GOGC=50
456 | 
457 |
458 | GOGC=100
459 | 
460 |
461 | GOGC=200
462 | 
463 |
464 | ```Opt Tip: This helps you analyze your GC patterns but I can't find any posts that recommend this as a good performance tuning strategy.```
465 |
466 |
467 | ## Stack and Heap
468 |
469 | *Discussion:* where is the stack memory shown in a trace diagram? Is knowing stack and heap allocation important? How about in languages like Python, Java, JavaScript, etc.?
470 |
471 | ref: https://scvalex.net/posts/29/
472 |
473 | ### Stack Frame
474 | ref: http://www.cs.uwm.edu/classes/cs315/Bacon/Lecture/HTML/ch10s07.html
475 |
476 | The stack frame, also known as activation record is the collection of all data on the stack associated with one subprogram call.
477 |
478 | The stack frame generally includes the following components:
479 |
480 | * The return address
481 | * Argument variables passed on the stack
482 | * Local variables (in HLLs)
483 | * Saved copies of any registers modified by the subprogram that need to be restored
484 |
485 | The Stack
486 | ---------
487 |
488 | ```
489 | | f() |
490 | | |
491 | +---------------+
492 | | func f(){ | \
493 | | g() | } Stack frame of calling function f()
494 | | } | /
495 | +---------------+
496 | | func g() { | \
497 | | a := 10 | } Stack frame of called function: g()
498 | | } | /
499 | +---------------+
500 | ================= // invalid below this
501 | ```
502 |
503 | As the function call returns, the stack unwinds leaving previous stack frames invalid.
504 |
505 | ```
506 | | f() |
507 | | |
508 | +---------------+
509 | | func f(){ | \
510 | | g() | } Stack frame of calling function f()
511 | | } | /
512 | +---------------+
513 | ================= // invalid below this
514 | | func g() { | \
515 | | a := 10 | } Stack frame of called function: g()
516 | | | /
517 | +---------------+
518 | ```
519 |
520 | All local variables are no more accessible. In C, returning a pointer to a local variable would cause a segmentation fault.
521 |
522 | ```
523 | // online c editor - https://onlinegdb.com/HySykSJoE
524 |
525 | #include
526 |
527 | int* f() {
528 | int a;
529 | a = 10;
530 | return &a;
531 | }
532 |
533 | void main()
534 | {
535 | int* p = f();
536 | printf("p is: %x\n", p); // p is 0
537 | printf("*p is: %d\n", *p); // segmentation fault
538 |
539 | //
540 | }
541 | ```
542 |
543 | ## Escape Analysis
544 |
545 | In C, returning the reference of a local variable causes a segfault because that memory is no more valid.
546 |
547 | ```
548 | // online c editor - https://onlinegdb.com/HySykSJoE
549 |
550 | #include
551 |
552 | int* f() {
553 | int a;
554 | a = 10;
555 | return &a;
556 | }
557 |
558 | void main()
559 | {
560 | int* p = f();
561 | printf("p is: %x\n", p); // p is 0
562 | printf("*p is: %d\n", *p); // segmentation fault
563 |
564 | //
565 | }
566 | ```
567 |
568 | In Go, it is allowed to return the reference of a local variable.
569 |
570 | ```
571 | package main
572 |
573 | import (
574 | "fmt"
575 | )
576 |
577 | func f() *int {
578 | x := 10
579 | return &x
580 | }
581 |
582 | func main() {
583 | fmt.Println(*f()) // prints 10
584 | }
585 | ```
586 |
587 | How is that possible?
588 |
589 | From Effective Go: "Note that, unlike in C, it’s perfectly OK to return the address of a local variable; the storage associated with the variable survives after the function returns."
590 |
591 | "When possible, the Go compilers will allocate variables that are local to a function in that function’s stack frame. However, if the compiler cannot prove that the variable is not referenced after the function returns, then the compiler must allocate the variable on the garbage-collected heap to avoid dangling pointer errors. In the current compilers, if a variable has its address taken, that variable is a candidate for allocation on the heap. However, a basic escape analysis recognizes some cases when such variables will not live past the return from the function and can reside on the stack."
592 |
593 | *Can we figure out when variables escape to the heap?*
594 |
595 | ```
596 | // go build -gcflags='-m' 1.go
597 | // go build -gcflags='-m -l' 1.go to avoid inlining
598 | // go build -gcflags='-m -l -m' 1.go for verbose comments.
599 | ```
600 |
601 | ```
602 | func f() {
603 | var i = 5
604 | i++
605 | _ = i
606 | }
607 |
608 | func main() {
609 | f()
610 | }
611 | ```
612 |
613 | ```
614 | $ go build -gcflags='-m -l -m' 1.go
615 | // returns nothing.
616 | ```
617 |
618 | ```
619 | func f_returns() int {
620 | var i = 5
621 | i++
622 | return i
623 | }
624 |
625 | func main() {
626 | f_returns()
627 | }
628 | ```
629 |
630 | ```
631 | $ go build -gcflags='-m -l -m' 1.go
632 | // returns nothing.
633 | ```
634 |
635 | ```
636 | func f_returns_ptr() *int {
637 | var i = 5
638 | i++
639 | return &i
640 | }
641 |
642 | func main() {
643 | f_returns_ptr()
644 | }
645 | ```
646 |
647 | ```
648 | $ go build -gcflags='-m -l -m' 1.go
649 | # command-line-arguments
650 | ./1.go:24:9: &i escapes to heap
651 | ./1.go:24:9: from ~r0 (return) at ./1.go:24:2
652 | ./1.go:22:6: moved to heap:
653 | ```
654 |
655 | Once the variable is on the heap, there is pressure on the Garbage Collector.
656 |
657 | Garbage collection is a convenient feature of Go - automatic memory management makes code cleaner and memory leaks less likely. However, GC also adds overhead as the program periodically needs to stop and collect unused objects. The Go compiler is smart enough to automatically decide whether a variable should be allocated on the heap, where it will later need to be garbage collected, or whether it can be allocated as part of the stack frame of the function which declared it. Stack-allocated variables, unlike heap-allocated variables, don’t incur any GC overhead because they’re destroyed when the rest of the stack frame is destroyed - when the function returns.
658 |
659 | To perform escape analysis, Go builds a graph of function calls at compile time, and traces the flow of input arguments and return values.
660 |
661 | However, if there are variables to be shared, it is appropriate for it to be on the heap.
662 |
663 | ```Tip: If you’ve profiled your program’s heap usage and need to reduce GC time, there may be some wins from moving frequently allocated variables off the heap. ```
664 |
665 | See: https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/
666 | See: http://www.agardner.me/golang/garbage/collection/gc/escape/analysis/2015/10/18/go-escape-analysis.html
667 |
668 |
669 | ## Inlining
670 |
671 | "By default, the inliner will try to inline leaf function (doesn't call other functions/method/interfaces) that doesn't call panic or recover or select or switch or create closure or go/defer functions (see example below) and which is less than 40 nodes when represented (roughly corresponding to 40 simple operations). But please beware that this only describes the current status quo of the gc compiler, and it will surely improve in the future. Thus please try not to depend on this."
672 |
673 | Inlining is done by the compiler automatically and there is no way to indicate that the piece of code should be inlined. Sometimes, your performance benchmarks might look suspiciously skewed because the code has been inlined. Then use the ```-gcflags="-m"``` option to see if the compiler has inlined your code.
674 | ```
675 | // code/inline/inline.go
676 | package main
677 |
678 | import "fmt"
679 |
680 | func f() int {
681 | return 2
682 | }
683 |
684 | func main() {
685 | x := f()
686 | fmt.Println(x)
687 | }
688 | ```
689 |
690 | ```
691 | $ go build -gcflags="-m" inline.go
692 | # command-line-arguments
693 | ./inline.go:5:6: can inline f
694 | ./inline.go:9:6: can inline main
695 | ./inline.go:10:8: inlining call to f
696 | ./inline.go:11:13: inlining call to fmt.Println
697 | ./inline.go:11:13: x escapes to heap
698 | ./inline.go:11:13: io.Writer(os.Stdout) escapes to heap
699 | ./inline.go:11:13: main []interface {} literal does not escape
700 | :1: os.(*File).close .this does not escape
701 | ```
702 |
703 |
704 | ## Parallelize CPU work
705 |
706 | ### Queueing theory
707 | Ref: [What happens when you add another teller? - by John D. Cook](https://www.johndcook.com/blog/2008/10/21/what-happens-when-you-add-a-new-teller/)
708 |
709 | "Suppose a small bank has only one teller. Customers take an average of 10 minutes to serve and they arrive at the rate of 5.8 per hour. What will the expected waiting time be? What happens if you add another teller?
710 |
711 | We assume customer arrivals and customer service times are random. With only one teller, customers will have to wait nearly five hours on average before they are served. But if you add a second teller, the average waiting time is not just cut in half; it goes down to about 3 minutes. The waiting time is reduced by a factor of 93x."
712 |
713 | When the work can be parallelized without too much synchronization, taking advantage of all available cores can speed up execution linearly to the number of physical cores.
714 |
715 | ```code/parallelize/rand_strings_test.go```
716 |
717 | ```
718 | var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
719 |
720 | func RandString(n int) string {
721 | b := make([]rune, n)
722 | for i := range b {
723 | b[i] = letters[rand.Intn(len(letters))]
724 | }
725 | time.Sleep(10 * time.Microsecond)
726 | return string(b)
727 | }
728 |
729 | func RandString_Sequential() {
730 | for i := 0; i < 1000; i++ {
731 | s = append(s, RandString(100))
732 | }
733 | }
734 |
735 | func RandString_Concurrent() {
736 | for i := 0; i < 1000; i++ {
737 | go func() {
738 | s = append(s, RandString(100))
739 | }()
740 | }
741 | }
742 |
743 | ```
744 |
745 | ```
746 | $ go test -bench=Sequential rand_strings_test.go
747 | Benchmark_Concurrent-8 200 26,936,125 ns/op
748 | ```
749 |
750 | ```
751 | $ go test -bench=Concurrent rand_strings_test.go
752 | Benchmark_Concurrent-8 50 9,422,900 ns/op
753 | ```
754 |
755 | Running the code concurrently has given you a 3x performance improvement.
756 |
757 | Now run it with the //time.Sleep commented out.
758 |
759 | ```
760 | $ go test -bench=Sequential rand_strings_test.go
761 | Benchmark_Sequential-8 500 3,865,565 ns/op
762 | ```
763 |
764 | ```
765 | ok command-line-arguments 2.354s
766 | $ go test -bench=Concurrent rand_strings_test.go
767 | Benchmark_Concurrent-8 200 9,539,612 ns/op
768 | ok command-line-arguments 2.991s
769 | ```
770 |
771 | Now we see a 3x drop in performance!
772 |
773 | Consider tight loops. Tight loops do not allow the runtime scheduler to schedule goroutines efficiently.
774 |
775 | But consider contention. If concurrent lines of work are stuck waiting for common resources, you're going to have worse performance.
776 |
777 | ```Tip: Concurrency is good. But have 'mechanical sympathy'.```
778 |
779 |
780 | ## Bounds Check Elimination
781 |
782 | Let's compare what the compiled output of these two fairly similar programs are.
783 |
784 | ```
785 | // a.go
786 | 3 func a(a []int) {
787 | 4 n := 6
788 | 5 _ = a[n]
789 | 6 }
790 | ```
791 |
792 | ```
793 | // b.go
794 | 3 func b(b [5]int) {
795 | 4 n := len(b) - 1
796 | 5 _ = b[n]
797 | 6 }
798 | ```
799 |
800 | ```
801 | $ go tool compile -S a.go > a.co
802 | $ go tool compile -S b.go > b.co
803 | $ vimdiff a.co b.co
804 | ```
805 |
806 | ```
807 | "".a STEXT nosplit size=39 args=0x18 locals=0x8
808 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $8-24
809 | (a.go:3) SUBQ $8, SP
810 | (a.go:3) MOVQ BP, (SP)
811 | (a.go:3) LEAQ (SP), BP
812 | (a.go:3) FUNCDATA $0, gclocals·1a65...
813 | (a.go:3) FUNCDATA $1, gclocals·69c1...
814 | (a.go:3) FUNCDATA $3, gclocals·33cd...
815 | (a.go:5) PCDATA $2, $0
816 | (a.go:5) PCDATA $0, $1
817 | (a.go:5) MOVQ "".a+24(SP), AX
818 | (a.go:5) CMPQ AX, $6
819 | (a.go:5) JLS 32
820 | (a.go:6) PCDATA $2, $-2
821 | (a.go:6) PCDATA $0, $-2
822 | (a.go:6) MOVQ (SP), BP
823 | (a.go:6) ADDQ $8, SP
824 | (a.go:6) RET
825 | (a.go:5) PCDATA $2, $0
826 | (a.go:5) PCDATA $0, $1
827 | (a.go:5) CALL runtime.panicindex(SB)
828 | (a.go:5) UNDEF
829 | 0x0000 48 83 ec 08 48 89 2c 24 48 8d 2c 24 48 8b 44 24 H...H.,$H.,$H.D$
830 | 0x0010 18 48 83 f8 06 76 09 48 8b 2c 24 48 83 c4 08 c3 .H...v.H.,$H....
831 | 0x0020 e8 00 00 00 00 0f 0b .......
832 | rel 33+4 t=8 runtime.panicindex+0
833 | ```
834 |
835 | ```
836 | // b.co
837 | "".b STEXT nosplit size=1 args=0x28 locals=0x0
838 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $0-40
839 | (b.go:3) FUNCDATA $0, gclocals·33cd...
840 | (b.go:3) FUNCDATA $1, gclocals·33cd...
841 | (b.go:3) FUNCDATA $3, gclocals·33cd...
842 | (b.go:6) RET
843 | ```
844 |
845 | There seems to be way more happening in a.go than in b.go - about 20+ lines more, which seems surprising.
846 |
847 | A little too much though. That's probably because of optimizations by the compiler. Let's remove those with the -N option.
848 |
849 | ```
850 | $ go tool compile -S -N a.go > a.co
851 | $ go tool compile -S -N b.go > b.co
852 | $ vimdiff a.co b.co
853 | ```
854 |
855 | ```
856 | "".a STEXT nosplit size=49 args=0x18 locals=0x10
857 | (a.go:3) TEXT "".a(SB), NOSPLIT|ABIInternal, $16-24
858 | (a.go:3) SUBQ $16, SP
859 | (a.go:3) MOVQ BP, 8(SP)
860 | (a.go:3) LEAQ 8(SP), BP
861 | (a.go:3) FUNCDATA $0, gclocals·1a65...
862 | (a.go:3) FUNCDATA $1, gclocals·69c1...
863 | (a.go:3) FUNCDATA $3, gclocals·33cd...
864 | (a.go:4) PCDATA $2, $0
865 | (a.go:4) PCDATA $0, $0
866 | (a.go:4) MOVQ $6, "".n(SP)
867 | (a.go:5) PCDATA $0, $1
868 | (a.go:5) CMPQ "".a+32(SP), $6
869 | (a.go:5) JHI 32
870 | (a.go:5) JMP 42
871 | (a.go:6) PCDATA $2, $-2
872 | (a.go:6) PCDATA $0, $-2
873 | (a.go:6) MOVQ 8(SP), BP
874 | (a.go:6) ADDQ $16, SP
875 | (a.go:6) RET
876 | (a.go:5) PCDATA $2, $0
877 | (a.go:5) PCDATA $0, $1
878 | (a.go:5) CALL runtime.panicindex(SB)
879 | (a.go:5) UNDEF
880 | 0x0000 48 83 ...
881 | 0x0010 04 24 ...
882 | 0x0020 48 8b ...
883 | 0x0030 0b
884 | rel 43+4 t=8 runtime.panicindex+0
885 | ```
886 |
887 | ```
888 | "".b STEXT nosplit size=34 args=0x28 locals=0x10
889 | (b.go:3) TEXT "".b(SB), NOSPLIT|ABIInternal, $16-40
890 | (b.go:3) SUBQ $16, SP
891 | (b.go:3) MOVQ BP, 8(SP)
892 | (b.go:3) LEAQ 8(SP), BP
893 | (b.go:3) FUNCDATA $0, gclocals·33cd...
894 | (b.go:3) FUNCDATA $1, gclocals·33cd...
895 | (b.go:3) FUNCDATA $3, gclocals·33cd...
896 | (b.go:4) PCDATA $2, $0
897 | (b.go:4) PCDATA $0, $0
898 | (b.go:4) MOVQ $4, "".n(SP)
899 | (b.go:5) JMP 24
900 | (b.go:6) PCDATA $2, $-2
901 | (b.go:6) PCDATA $0, $-2
902 | (b.go:6) MOVQ 8(SP), BP
903 | (b.go:6) ADDQ $16, SP
904 | (b.go:6) RET
905 | 0x0000 48 83 ...
906 | 0x0010 04 24 ...
907 | 0x0020 10 c3
908 | ```
909 |
910 | Even without the optimizations, there are more instructions that the CPU has to run in the case of a.go {n:=6} more than b.go {n:=len(b)-1}.
911 |
912 | There are some interesting differences between the two. The {n:=6} version has a compare statement (CMPQ) and panic statements (runtime.panicindex) while the other version does not have them.
913 |
914 | Let's also compile both with another option and see if we get any clues there.
915 |
916 | ```
917 | $ go tool compile -d=ssa/check_bce/debug=1 a.go
918 | a.go:5:7: Found IsInBounds
919 |
920 | $ go tool compile -d=ssa/check_bce/debug=1 b.go
921 | ```
922 |
923 | So, the compile tool shows no output with this option for b.go while a.go says "Found IsInBounds" at line number 5 (\_ = a[n]).
924 |
925 | ### BCE Definition
926 | From Wikipedia: bounds-checking elimination is a compiler optimization useful in programming languages or runtimes that enforce bounds checking, the practice of checking every index into an array to verify that the index is within the defined valid range of indexes. Its goal is to detect which of these indexing operations do not need to be validated at runtime, and eliminating those checks.
927 |
928 | When arrays and slices are being accessed, grow provides safety by checking that the index is valid. This implies additional instructions. A language like C does not have this check; instead it is upto the programmer to add it if required or not do it at their own risk.
929 |
930 | Go provides the check but is able to eliminate in certain cases when it is able to prove that the index being accessed is within the allowed range.
931 |
932 | In the function ```func a(a []int) { n := 6; _ = a[n] }```, Go is not able to prove at compile time that the index 6 will be in the slice that is passed. However, in the function ```func b(b [5]int) { n := len(b) - 1; _ = b[n] }```, it is guaranted that the index will be within the length of the array of size 5. Thus Go is able to optimize by eliminating the bounds check.
933 |
934 | Exercise: What if we passed a slice into b.go instead of an array. Is there a bounds check still? Why or why not?
935 | See c.go
936 |
937 | ```
938 | 3 func c(b []int) {
939 | 4 n := len(b) - 1
940 | 5 _ = b[n]
941 | 6 }
942 | ```
943 |
944 | ```
945 | $ go tool compile -d=ssa/check_bce/debug=1 c.go
946 | c.go:5:7: Found IsInBounds
947 | ```
948 |
949 | What is the bce output of the case below? will the compiler be able to eliminate the bounds check?
950 |
951 | ```
952 | // d.go
953 | func d(b []byte) {
954 | for i := 0; i < len(b); i++ {
955 | b[i] = 9
956 | }
957 | }
958 | ```
959 |
960 | ```
961 | $ go tool compile -d=ssa/check_bce/debug=1 d.go
962 | ```
963 |
964 | When it is definite that the index will not receive a value outside of its size (on either end), then bce can happen.
965 |
966 | ### Providing bce Hints
967 |
968 | *Example 1*
969 |
970 | ```
971 | // e.go
972 | 3 func e(b []byte, n int) {
973 | 4 for i := 0; i < n; i++ {
974 | 5 b[i] = 9
975 | 6 }
976 | 7 }
977 | ```
978 |
979 | ```
980 | $ go tool compile -d=ssa/check_bce/debug=1 d.go
981 | d.go:5:8: Found IsInBounds
982 | ```
983 |
984 | Give that this is running inside a loop, the bce will run as many times. Is there a way to reduce this? Probably something outside the loop and prior?
985 |
986 | ```
987 | // f.go
988 | 3 func f(b []byte, n int) {
989 | 4 _ = b[n-1]
990 | 5 for i := 0; i < n; i++ {
991 | 6 b[i] = 9
992 | 7 }
993 | 8 }
994 | ```
995 |
996 | ```
997 | $ go tool compile -d=ssa/check_bce/debug=1 e.go
998 | e.go:4:7: Found IsInBounds
999 | ```
1000 |
1001 | Having done the check once outside, we are able to eliminate the remaining checks in the loop.
1002 |
1003 |
1004 | How about this one? There are 4 bounds checks. Can we reduce them?
1005 |
1006 | *Example 2*
1007 |
1008 | ```
1009 | // g.go
1010 | func g1(b []byte, v uint32) {
1011 | b[0] = byte(v + 48) // Found IsInBounds
1012 | b[1] = byte(v + 49) // Found IsInBounds
1013 | b[2] = byte(v + 50) // Found IsInBounds
1014 | b[3] = byte(v + 51) // Found IsInBounds
1015 | }
1016 | ```
1017 |
1018 | ```
1019 | // g.go
1020 | func g2(b []byte, v uint32) {
1021 | b[3] = byte(v + 51) // Found IsInBounds
1022 | b[0] = byte(v + 48)
1023 | b[1] = byte(v + 49)
1024 | b[2] = byte(v + 50)
1025 | }
1026 | ```
1027 |
1028 | *Example 3*
1029 |
1030 | ```
1031 | // h.go
1032 | func h1(b []byte, n int) {
1033 | b[n+0] = byte(1) // Found IsInBounds
1034 | b[n+1] = byte(2) // Found IsInBounds
1035 | b[n+2] = byte(3) // Found IsInBounds
1036 | b[n+3] = byte(4) // Found IsInBounds
1037 | b[n+4] = byte(5) // Found IsInBounds
1038 | b[n+5] = byte(6) // Found IsInBounds
1039 | }
1040 | ```
1041 |
1042 | ```
1043 | func h2(b []byte, n int) {
1044 | b = b[n : n+6] // Found IsSliceInBounds
1045 | b[0] = byte(1)
1046 | b[1] = byte(2)
1047 | b[2] = byte(3)
1048 | b[3] = byte(4)
1049 | b[4] = byte(5)
1050 | b[5] = byte(6)
1051 | }
1052 | ```
1053 |
1054 | *Example 4*
1055 |
1056 | ```
1057 | func i1(a, b, c []byte) {
1058 | for i := range a {
1059 | a[i] = b[i] + c[i] // 5:11 Found IsInBounds and 5:12 Found IsInBounds
1060 | }
1061 | }
1062 | ```
1063 |
1064 | ```
1065 | func i2(a, b, c []byte) {
1066 | _ = b[len(a)-1] // Found IsInBounds
1067 | _ = c[len(a)-1] // Found IsInBounds
1068 | for i := range a {
1069 | a[i] = b[i] + c[i]
1070 | }
1071 | }
1072 | ```
1073 |
1074 | ```Tip: bce helps shave off ns. Document your bce hints code.```
1075 |
1076 |
1077 | ## sync.Pools
1078 | Pool's purpose is to cache allocated but unused items for later reuse, relieving pressure on the garbage collector. That is, it makes it easy to build efficient, thread-safe free lists. However, it is not suitable for all free lists.
1079 |
1080 | A Pool is a set of temporary objects that may be individually saved and retrieved.
1081 |
1082 | Any item stored in the Pool may be removed automatically at any time without notification. If the Pool holds the only reference when this happens, the item might be deallocated.
1083 |
1084 | A Pool is safe for use by multiple goroutines simultaneously.
1085 |
1086 | An appropriate use of a Pool is to manage a group of temporary items silently shared among and potentially reused by concurrent independent clients of a package. Pool provides a way to amortize allocation overhead across many clients.
1087 |
1088 | An example of good use of a Pool is in the fmt package, which maintains a dynamically-sized store of temporary output buffers. The store scales under load (when many goroutines are actively printing) and shrinks when quiescent.
1089 |
1090 | ```
1091 | // 1_test.go
1092 | package main
1093 |
1094 | import (
1095 | "bytes"
1096 | "testing"
1097 | )
1098 |
1099 | func Benchmark_f1(b *testing.B) {
1100 | for i := 0; i < b.N; i++ {
1101 | f1()
1102 | }
1103 | }
1104 |
1105 | func f1() {
1106 | s.Write([]byte("dirty"))
1107 |
1108 | return
1109 | }
1110 |
1111 | ```
1112 |
1113 | ```
1114 | $ go test -bench=f1 -benchmem
1115 | Benchmark_f1-8 30000000 43.5 ns/op 64 B/op 1 allocs/op
1116 | ```
1117 |
1118 | ```
1119 | // 2_test.go
1120 | package main
1121 |
1122 | import (
1123 | "bytes"
1124 | "sync"
1125 | "testing"
1126 | )
1127 |
1128 | var pool2 = sync.Pool{
1129 | New: func() interface{} {
1130 | return &bytes.Buffer{}
1131 | },
1132 | }
1133 |
1134 | func Benchmark_f2(b *testing.B) {
1135 | for i := 0; i < b.N; i++ {
1136 | f2()
1137 | }
1138 | }
1139 |
1140 | func f2() {
1141 | // When getting from a Pool, you need to cast
1142 | s := pool2.Get().(*bytes.Buffer)
1143 | // We write to the object
1144 | s.Write([]byte("dirty"))
1145 | // Then put it back
1146 | pool2.Put(s)
1147 |
1148 | return
1149 | }
1150 |
1151 | ```
1152 |
1153 | ```
1154 | $ go test -bench=f2 -benchmem
1155 | Benchmark_f2-8 50000000 38.2 ns/op 14 B/op 0 allocs/op
1156 | ```
1157 |
1158 | ```Tip: Use sync.Pool is reduced your memory allocation pressure.```
1159 |
1160 | ### Exercise: sync.Pool
1161 | A type of data (book) needs to be written to a json file. An ISBN number is added to new book ({title, author}) and written out to a file. Use sync.Pool to reduce allocations prior to writing.
1162 | See book1_test.go and book2_test.go
1163 |
1164 | ## sync.Once for Lazy Initialization
1165 |
1166 | When programs have costly resources being loaded, it helps to do that only once.
1167 |
1168 | In version 1 of our code, we have a template that needs to be parsed. This example template is currently being read from memory, but there are usually many templates and they are read from the file system which can be very slow.
1169 |
1170 | ```code/sync-once```
1171 |
1172 | In the first naive example, we load the template each time as and when required. This is useful in that the template is loaded only when it is needed.
1173 |
1174 | ```
1175 | // 1.go
1176 | var t *template.Template
1177 |
1178 | func f() {
1179 | t = template.Must(template.New("").Parse(s))
1180 | _ = t
1181 |
1182 | // do task with template
1183 | }
1184 |
1185 | func main() {
1186 | for i := 0; i < 10000; i++ {
1187 | f()
1188 | }
1189 | }
1190 |
1191 | ```
1192 |
1193 | The time taken for this is about 0.637 seconds. Can we improve on this?
1194 | ```
1195 | $ time go run 1.go
1196 |
1197 | real 0m0.637s
1198 | user 0m0.712s
1199 | sys 0m0.346s
1200 | ```
1201 |
1202 | In version 1, we are re-parsing the template each time, which is unnecessary. In the second version, we load the template only once at the beginning of the program.
1203 |
1204 | ```
1205 | // 2.go
1206 | func main() {
1207 | // costs time at load and maybe unused
1208 | t = template.Must(template.New("").Parse(s))
1209 | _ = t
1210 |
1211 | for i := 0; i < 10000; i++ {
1212 | f()
1213 | }
1214 | }
1215 | ```
1216 |
1217 | This works well, but doing all our initialization at the very beginning will slow down the program's start. It's often the case that there are many templates but all of them aren't needed or used immediately. This is not preferred when there are multiple copies running in Kubernetes pods and we expect scaling to be very quick
1218 |
1219 | ```
1220 | time go run 2.go
1221 |
1222 | real 0m0.365s
1223 | user 0m0.376s
1224 | sys 0m0.198s
1225 | ```
1226 |
1227 | In version 3 of our code, we use the sync.Once struct to ensure that the code is run once and only once and only at the instance it is first invoked, thus loading it 'lazily'.
1228 |
1229 | sync.Once is goroutine safe and will not be called simultaneously.
1230 |
1231 | ```
1232 | // 3.go
1233 | var t *template.Template
1234 | var o sync.Once
1235 |
1236 | func g() {
1237 | fmt.Println("within g()")
1238 | t = template.Must(template.New("").Parse(s))
1239 | _ = t
1240 | }
1241 |
1242 | func f() {
1243 | // only done once and when used
1244 | o.Do(g)
1245 |
1246 | // do task with template
1247 |
1248 | }
1249 |
1250 | func main() {
1251 | for i := 0; i < 10000; i++ {
1252 | f()
1253 | }
1254 | }
1255 | ```
1256 |
1257 | You can see that in our very simple program, the difference is not much. But in typical production code, such changes could have a considerable impact.
1258 |
1259 | ```
1260 | time go run 3.go
1261 | within g()
1262 |
1263 | real 0m0.380s
1264 | user 0m0.392s
1265 | sys 0m0.209s
1266 | ```
1267 |
1268 | ```Tip: Consider lazily loading your resources using sync.Once at time of first use.```
1269 |
1270 | ## Arrays and Slices
1271 |
1272 | Discussion: what are the key characteristics of an array?
1273 |
1274 | In Go, the size of an array is a distinct part of an array. An array of one size cannot be assigned to an array of another size.
1275 |
1276 | ```code/slices```
1277 |
1278 | ```
1279 | var a [5]int
1280 | var b [6]int
1281 | b = a
1282 | ```
1283 |
1284 | ```
1285 | // compile error
1286 | cannot use a (type [5]int) as type [6]int in assignment
1287 | ```
1288 |
1289 | In Go, arrays are immutable. You cannot append to (or delete from) an array.
1290 |
1291 | Slices are mutable.
1292 | They can also be assigned different size slices. Depending on the lengths of the source and target, there could be different behaviors.
1293 |
1294 | Slices can be made from arrays.
1295 |
1296 | ```
1297 | var a [5]int
1298 | s := a[0:3]
1299 | s = a[:3]
1300 | s = a[3:]
1301 | ```
1302 |
1303 | Slices point to an array. Always. Within that array, there is a beginning position and a count of contiguous items that the slice refers to.
1304 |
1305 | ```
1306 | a := [5]int{1, 2, 3, 4, 5}
1307 | s := a[0:3]
1308 | s[0] = 11
1309 | fmt.Println(a, s)
1310 | ```
1311 |
1312 | ```
1313 | [11 2 3 4 5]
1314 | [11 2 3]
1315 | ```
1316 |
1317 | So are the addresses of the array and slice the same?
1318 | ```
1319 | fmt.Printf("%p %p\n", &a, &s)
1320 | ```
1321 |
1322 | ```
1323 | 0xc0000181b0 0xc00000c060
1324 | ```
1325 |
1326 | The slice has its own data structure that points to the array.
1327 | So then are their element addresses the same?
1328 |
1329 | ```
1330 | fmt.Printf("%p %p\n", &a[0], &s[0])
1331 | ```
1332 |
1333 | ```
1334 | 0xc0000181b0 0xc0000181b0
1335 | ```
1336 |
1337 | Yes, they are. A slice has no storage of its own; it merely points to the array.
1338 |
1339 | But why would it be designed that way?
1340 |
1341 | ```
1342 | a := [5]int{1, 2, 3, 4, 5}
1343 | s := a[0:3]
1344 | fmt.Println(a, s)
1345 |
1346 | s = append(s, 9)
1347 | fmt.Println(a, s)
1348 |
1349 | s = append(s, 19)
1350 | fmt.Println(a, s)
1351 | ```
1352 |
1353 | ```
1354 | [1 2 3 4 5] [1 2 3]
1355 | [1 2 3 9 5] [1 2 3 9]
1356 | [1 2 3 9 19] [1 2 3 9 19]
1357 | ```
1358 |
1359 | What happens when we breach the boundary?
1360 |
1361 | ```
1362 | s = append(s, 99)
1363 | fmt.Println(a, s)
1364 | ```
1365 |
1366 | ```
1367 | [1 2 3 9 19] [1 2 3 9 19 99]
1368 | ```
1369 |
1370 | Once the boundary is breached, the array remains the same. The slice is given new expanded memory elsewhere backed by a new array.
1371 |
1372 | Why would we do it this way?
1373 | Because, memory allocation is very costly. Go allows you to pre-allocate memory to avoid runtime performance cost of repeatedly allocating new memory. Repeated reallocation also adds significant GC pressure.
1374 |
1375 | You can pre-allocate the expected size (capacity) of the slice using make.
1376 |
1377 | ```
1378 | months := make([]int, 0, 12)
1379 | months = append(months, 1)
1380 | months = append(months, 7)
1381 | ```
1382 |
1383 | You can also pre-allocate the size of the slice after a slicing operation by providing a third parameter.
1384 |
1385 | ```
1386 | s := a[0:3:12]
1387 | ```
1388 |
1389 | ```Tip: Pre-allocating slices to expected sizes can significantly increase performance.```
1390 |
1391 |
1392 | ## String Concatenation
1393 |
1394 | Strings are like an array of characters. They are immutable. Concatenating strings with the + operator causes constant reallocation and GC pressure.
1395 |
1396 | There are two options in the std lib: bytes.Buffer and strings.Builder. Which would you guess performs better?
1397 |
1398 | ```
1399 | for n := 0; n < b.N; n++ {
1400 | str += "x"
1401 | // vs
1402 | buffer.WriteString("x")
1403 | // vs
1404 | builder.WriteString("x")
1405 | }
1406 | ```
1407 |
1408 | ```
1409 | BenchmarkConcatString-8 10000000 128 ns/op
1410 | BenchmarkConcatBuffer-8 200000000 9.54 ns/op
1411 | BenchmarkConcatBuilder-8 1000000000 2.63 ns/op
1412 | ```
1413 |
1414 | In earlier benchmarks I see on the net, both bytes.Buffer and strings.Builder performed approximately similar. But looks like strings.Builder has been further optimized since then.
1415 |
1416 | ```Tip: Use strings.Builder > bytes.Buffer > string concatenation.```
1417 |
1418 |
1419 | ## Map Keys: int vs string
1420 |
1421 | Which do you think would be faster?
1422 | Pretty obvious, I guess.
1423 |
1424 | ```code/map-access```
1425 |
1426 | ```
1427 | key := strconv.Itoa(rand.Intn(NumItems))
1428 | //vs
1429 | key := rand.Intn(NumItems)
1430 | ```
1431 |
1432 | ```
1433 | BenchmarkMapStringKeys-8 20000000 109 ns/op
1434 | BenchmarkMapIntKeys-8 20000000 53.5 ns/op
1435 | ```
1436 |
1437 | Will the time change if the string is longer?
1438 |
1439 | ```
1440 | key := strconv.Itoa(rand.Intn(NumItems))
1441 | key += ` is the key value that is being used. `
1442 | //vs
1443 | key := rand.Intn(NumItems)
1444 |
1445 | ```
1446 |
1447 | ```
1448 | BenchmarkMapStringKeys-8 10000000 120 ns/op
1449 | BenchmarkMapIntKeys-8 30000000 56.9 ns/op
1450 | ```
1451 |
1452 | Apparently it does.
1453 |
1454 |
1455 | ```
1456 | key := strconv.Itoa(rand.Intn(NumItems))
1457 | key += ` is the key value that is being used and a shakespeare sonnet. ` + sonnet106
1458 | //vs
1459 | key := rand.Intn(NumItems)
1460 | ```
1461 |
1462 | ```
1463 | BenchmarkMapStringKeys-8 10000000 246 ns/op
1464 | BenchmarkMapIntKeys-8 30000000 50.4 ns/op
1465 | ```
1466 |
1467 | I found that the map access time taken for longer key strings is longer.
1468 |
1469 | ```Tip: use int types instead of string types in maps. If strings have to be used, use shorter strings.```
1470 |
1471 |
1472 | ## JSON Unmarshaling
1473 |
1474 | JSON Unmarshaling uses reflection, which is not very efficient. It is convenient and straightforward though.
1475 |
1476 | easyjson is an external tool that can be used to generate code. Generating code means that you don't need to write or maintain the code. You will have more (generated) code at compile time, but your code performance could be higher.
1477 |
1478 | ```
1479 | //main.go
1480 |
1481 | //easyjson:json
1482 | type JSONData struct {
1483 | Data []string
1484 | }
1485 |
1486 | func unmarshaljsonFn() {
1487 | var j JSONData
1488 | json.Unmarshal([]byte(`{"Data" : ["One", "Two", "Three"]} `), &j)
1489 | }
1490 |
1491 | func easyjsonFn() {
1492 | d := &JSONData{}
1493 | d.UnmarshalJSON([]byte(`{"Data" : ["One", "Two", "Three"]} `))
1494 | }
1495 | ```
1496 |
1497 | ```
1498 | $ go get -u github.com/mailru/easyjson/...
1499 |
1500 | $ easyjson -all main.go
1501 | // this generates a file called main_easyjson.go
1502 | ```
1503 |
1504 | ```
1505 | go test -bench=. -benchmem
1506 | Benchmark_unmarshaljson-8 2000000 981 ns/op 344 B/op 9 allocs/op
1507 | Benchmark_easyjson-8 5000000 350 ns/op 124 B/op 5 allocs/op
1508 | ```
1509 |
1510 | ```Tip: if data serialization/deserialization is common, see if you can avoid reflection and interfaces. Generate code at build time instead to reduce performance cost at run time.```
1511 |
1512 |
1513 | ## File I/O
1514 |
1515 | User buffered I/O, shortened to buffering or buffered I/O, refers to the technique of temporarily storing the results of an I/O operation in user-space before transmitting it to the kernel (in the case of writes) or before providing it to your process (in the case of reads). By so buffering the data, you can minimize the number of system calls and can block-align I/O operations, which may improve the performance of your application.
1516 |
1517 | For example, consider a process that writes one character at a time to a file. This is obviously inefficient: Each write operation corresponds to a write() system call, which means a trip into the kernel, a memory copy (of a single byte!), and a return to user-space, only to repeat the whole ordeal. Worse, filesystems and storage media work in terms of blocks; operations are fastest when aligned to integer multiples of those blocks. Misaligned operations, particularly very small ones, incur additional overhead.
1518 |
1519 | You want unbuffered output whenever you want to ensure that the output has been written before continuing. One example is standard error under a C runtime library - this is usually unbuffered by default. Since errors are (hopefully) infrequent, you want to know about them immediately. On the other hand, standard output is buffered simply because it's assumed there will be far more data going through it.
1520 |
1521 | In addition, it's not just system calls that are minimized but disk I/O as well. Let's say a program reads a file one byte at a time. With unbuffered input, you will go out to the (relatively very slow) disk for every byte even though it probably has to read in a whole block anyway (the disk hardware itself may have buffers but you're still going out to the disk controller which is going to be slower than in-memory access). By buffering, the whole block is read in to the buffer at once then the individual bytes are delivered to you from the (in-memory, incredibly fast) buffer area.
1522 |
1523 | Buffering can take many forms, such as in the following example:
1524 |
1525 | ```
1526 | +-------------------+-------------------+
1527 | | Process A | Process B |
1528 | +-------------------+-------------------+
1529 | | C runtime library | C runtime library | C RTL buffers
1530 | +-------------------+-------------------+
1531 | | OS caches | Operating system buffers
1532 | +---------------------------------------+
1533 | | Disk controller hardware cache | Disk hardware buffers
1534 | +---------------------------------------+
1535 | | Disk |
1536 | +---------------------------------------+
1537 | ```
1538 |
1539 | ```
1540 | f, _ := os.Create("/tmp/test.txt")
1541 | for i := 0; i < 100000; i++ {
1542 | f.WriteString("some text!\n")
1543 | }
1544 |
1545 | // vs
1546 |
1547 | f, _ := os.Create("/tmp/test.txt")
1548 | w := bufio.NewWriter(f)
1549 | for i := 0; i < 100000; i++ {
1550 | w.WriteString("some text!\n")
1551 | }
1552 |
1553 | ```
1554 |
1555 | ```
1556 | BenchmarkWriteFile-8 2 882,154,299 ns/op
1557 | BenchmarkWriteFileBuffered-8 300 4,666,152 ns/op
1558 |
1559 | BenchmarkReadFile-8 3 337,684,006 ns/op
1560 | BenchmarkReadFileBuffered-8 200 6,820,032 ns/op
1561 | ```
1562 |
1563 | ``Tip: use buffered reads and writes.```
1564 |
1565 |
1566 | ## Regexp Compilation
1567 |
1568 | Regular expressions are costly. Where possible, avoid them. Where you have to have them, compile them once prior.
1569 | Also consider using this with sync.Once.
1570 |
1571 | ```
1572 | for i:=0; i< b.N; i++ {
1573 | regexp.MatchString(testRegexp, "jsmith@example.com")
1574 | }
1575 |
1576 | // vs
1577 |
1578 | r, _ := regexp.Compile(testRegexp)
1579 | for i:=0; i< b.N; i++ {
1580 | r.MatchString("jsmith@example.com")
1581 | }
1582 | ```
1583 |
1584 | ```
1585 | BenchmarkMatchString-8 200000 7195 ns/op
1586 | BenchmarkMatchStringCompiled-8 2000000 630 ns/op
1587 | ```
1588 |
1589 | ```Tip: take pre-compiled options in regex, sql prepared statements, etc.```
1590 |
1591 | ## Defer
1592 |
1593 | Defer does additional work for you and therefore it is not as fast as straight-line code.
1594 |
1595 | ```code/defer```
1596 |
1597 | ```
1598 | func (t *T) CounterA() int64 {
1599 | t.mu.Lock()
1600 | defer t.mu.Unlock()
1601 | return t.n
1602 | }
1603 |
1604 | func (t *T) CounterB() (count int64) {
1605 | t.mu.Lock()
1606 | count = t.n
1607 | t.mu.Unlock()
1608 | return
1609 | }
1610 |
1611 | func (t *T) IncreaseA() {
1612 | t.mu.Lock()
1613 | defer t.mu.Unlock()
1614 | t.n++
1615 | }
1616 |
1617 | func (t *T) IncreaseB() {
1618 | t.mu.Lock()
1619 | t.n++ // this line will not panic for sure
1620 | t.mu.Unlock()
1621 | }
1622 | ```
1623 |
1624 | Up to now (Go 1.12), for the official Go compiler, deferred function calls will cause a few performance losses at run time.
1625 |
1626 | ```
1627 | Benchmark_CounterA-8 30000000 52.9 ns/op
1628 | Benchmark_CounterB-8 100000000 18.9 ns/op
1629 | Benchmark_IncreaseA-8 30000000 51.9 ns/op
1630 | Benchmark_IncreaseB-8 100000000 19.3 ns/op
1631 | ```
1632 |
1633 | ```Tip: where performance is a consideration and code is unlikely to do panic/recover, see if defers can be replaced.```
1634 |
1635 | ## fmt vs strconv
1636 |
1637 | fmt takes all parameters as interface{}. It is always faster to give more definite types that don't need to be reflected on or asserted.
1638 |
1639 | ```
1640 | // fmt/main_test.go
1641 | func fmtFn(i int) string {
1642 | return fmt.Sprintf("%d", i)
1643 | }
1644 |
1645 | func Benchmark_fmtFn(b *testing.B) {
1646 | for i := 0; i < b.N; i++ {
1647 | fmtFn(1234)
1648 | }
1649 | }
1650 | ```
1651 |
1652 | ```
1653 | $ go test -bench=. -benchmem
1654 | Benchmark_fmtFn-8 20000000 100 ns/op 16 B/op 2 allocs/op
1655 | Benchmark_strconvFn-8 50000000 31.2 ns/op 4 B/op 1 allocs/op
1656 | ```
1657 |
1658 | It can increase the number of allocations needed. Passing a non-pointer type as an interface{} usually causes heap allocations. [ref](https://stephen.sh/posts/quick-go-performance-improvements)
1659 |
1660 | ```Tip: consider using functions that take specific data types as opposed to an empty interface, e.g. strconv functions as opposed to fmt.Sprintf.```
1661 |
1662 | ## Explicitly Set Derived Values
1663 |
1664 | Certain parts of the standard library or external libraries might derive information that is not explicitly set. Using benchmarking, flame-graphs, etc., figure out whether explicitly setting values could avoid the process of deriving it.
1665 |
1666 | ```
1667 | // responsewriter/main_test.go
1668 | func withoutSetHeader(w http.ResponseWriter, r *http.Request) {
1669 | fmt.Fprintln(w, "hello, stranger")
1670 | }
1671 |
1672 | func withSetHeader(w http.ResponseWriter, r *http.Request) {
1673 | w.Header().Set("Content-Type", "text/plain")
1674 | fmt.Fprintln(w, "hello, stranger")
1675 | }
1676 | ```
1677 |
1678 | ```
1679 | $ go test -bench=.
1680 | goos: darwin
1681 | goarch: amd64
1682 | Benchmark_withoutSetHeader-8 1000000 1664 ns/op
1683 | Benchmark_withSetHeader-8 1000000 1183 ns/op
1684 | ```
1685 |
1686 | If the header is not set for "Content-Type", then the behaviour of the write (Fprintln) to the ResponseWriter is to parse the data to derive the content type. By explicitly setting the Content type, we're able to increase the performance.
1687 |
1688 | ```Tip: Look at implementation to see if otherwise derived values can be set in advance.```
1689 |
1690 |
1691 | ## Go Performance Patterns
1692 | When application performance is a critical requirement, the use of built-in or third-party packages and methods should be considered carefully. The cases when a compiler can optimize code automatically are limited. The Go Performance Patterns are benchmark- and practice-based recommendations for choosing the most efficient package, method or implementation technique.
1693 |
1694 | Some points may not be applicable to a particular program; the actual performance optimization benefits depend almost entirely on the application logic and load.
1695 |
1696 | ### Make multiple I/O operations asynchronous
1697 | Network and file I/O (e.g. a database query) is the most common bottleneck in I/O-bound applications. Making independent I/O operations asynchronous, i.e. running in parallel, can improve downstream latency. Use sync.WaitGroup to synchronize multiple operations.
1698 |
1699 | ### Avoid memory allocation in hot code
1700 | Object creation not only requires additional CPU cycles, but will also keep the garbage collector busy. It is a good practice to reuse objects whenever possible, especially in program hot spots. You can use sync.Pool for convenience. See also: Object Creation Benchmark
1701 |
1702 | ### Favor lock-free algorithms
1703 | Synchronization often leads to contention and race conditions. Avoiding mutexes whenever possible will have a positive impact on efficiency as well as latency. Lock-free alternatives to some common data structures are available (e.g. Circular buffers).
1704 |
1705 | ### Use read-only locks
1706 | The use of full locks for read-heavy synchronized variables will unnecessarily make reading goroutines wait. Use read-only locks to avoid it.
1707 |
1708 | ### Use buffered I/O
1709 | Disks operate in blocks of data. Accessing disk for every byte is inefficient; reading and writing bigger chunks of data greatly improves the speed.
1710 |
1711 | ### Use StringBuffer or StringBuilder instead of += operator
1712 | A new string is allocated on every assignment, which is inefficient and should be avoided. See also: String Concatenation Benchmark.
1713 |
1714 | ### Use compiled regular expressions for repeated matching
1715 | It is inefficient to compile the same regular expression before every matching. While obvious, it is often overlooked. See also: Regexp Benchmark.
1716 |
1717 | ### Preallocate slices
1718 | Go manages dynamically growing slices intelligently; it allocates twice as much memory every time the current capacity is reached. During re-allocation, the underlying array is copied to a new location. To avoid copying the memory and occupying garbage collection, preallocate the slice fully whenever possible. See also: Slice Appending Benchmark.
1719 |
1720 | ### Use Protocol Buffers or MessagePack instead of JSON and Gob
1721 | JSON and Gob use reflection, which is relatively slow due to the amount of work it does. Although Gob serialization and deserialization is comparably fast, though, and may be preferred as it does not require type generation.
1722 |
1723 | ### Use int keys instead of string keys for maps
1724 | If the program relies heavily on maps, using int keys might be meaningful, if applicable. See also: Map Access Benchmark.
1725 |
1726 | ### Use methods that allow you to pass byte slices (?)
1727 | When using packages, look to use methods that allow you to pass a byte slice: these methods usually give you more control over allocation.
1728 |
1729 | time.Format vs. time.AppendFormat is a good example. time.Format returns a string. Under the hood, this allocates a new byte slice and calls time.AppendFormat on it. time.AppendFormat takes a byte buffer, writes the formatted representation of the time, and returns the extended byte slice. This is common in other packages in the standard library: see strconv.AppendFloat, or bytes.NewBuffer.
1730 |
1731 | Why does this give you increased performance? Well, you can now pass byte slices that you've obtained from your sync.Pool, instead of allocating a new buffer every time. Or you can increase the initial buffer size to a value that you know is more suited to your program, to reduce slice re-copying.
1732 |
1733 | ## Avoid using structures containing pointers as map keys for large maps
1734 |
1735 | During a garbage collection, the runtime scans objects containing pointers, and chases them. If you have a very large map[string]int, the GC has to check every string within the map, every GC, as strings contain pointers. [ref](https://stephen.sh/posts/quick-go-performance-improvements)
1736 |
1737 | # References
1738 | * [Daniel Marti's talk - Optimizing Go Code without a Blindfold](https://www.dotconferences.com/2019/03/daniel-marti-optimizing-go-code-without-a-blindfold)
1739 | * [dave cheney high performance workshop](https://dave.cheney.net/high-performance-go-workshop/dotgo-paris.html)
1740 | * [github - dave cheney high performance workshop](https://github.com/davecheney/high-performance-go-workshop)
1741 | * [don't lock around io](https://commandercoriander.net/blog/2018/04/10/dont-lock-around-io/)
1742 | * [advent 2017 - go execution tracer](https://blog.gopheracademy.com/advent-2017/go-execution-tracer/)
1743 | * [execution tracer design doc](https://docs.google.com/document/u/1/d/1FP5apqzBgr7ahCCgFO-yoVhk4YZrNIDNf9RybngBc14/pub)
1744 | * https://www.alexedwards.net/blog/an-overview-of-go-tooling
1745 | * [configuring sqldb for better performance](https://www.alexedwards.net/blog/configuring-sqldb)
1746 | * [rate limit http requests](https://www.alexedwards.net/blog/how-to-rate-limit-http-requests)
1747 | * https://www.alexedwards.net/blog/understanding-mutexes
1748 | * https://stackimpact.com/docs/go-performance-tuning/
1749 | * https://stackimpact.com/blog/practical-golang-benchmarks/
1750 | * https://www.ardanlabs.com/blog/2017/06/design-philosophy-on-data-and-semantics.html
1751 | * https://github.com/ardanlabs/gotraining
1752 | * http://www.doxsey.net/blog/go-and-assembly
1753 | * https://medium.com/observability/debugging-latency-in-go-1-11-9f97a7910d68
1754 | * https://rakyll.org/profiler-labels/
1755 | * https://stackoverflow.com/questions/45027236/what-differentiates-exception-frames-from-other-data-on-the-return-stack
1756 | * https://www.infoq.com/presentations/self-heal-scalable-system
1757 | * https://dave.cheney.net/paste/clear-is-better-than-clever.pdf
1758 | * https://golang.org/pkg/sync/#Pool, https://dev.to/hsatac/syncpool-34pd
1759 | * http://dominik.honnef.co/go-tip/2014-01-10/#syncpool
1760 | * https://www.quora.com/In-C-what-does-buffering-I-O-or-buffered-I-O-mean
1761 | * https://stackoverflow.com/questions/1450551/buffered-vs-unbuffered-io
1762 | * http://www.agardner.me/golang/garbage/collection/gc/escape/analysis/2015/10/18/go-escape-analysis.html
1763 | * [Performance Optimization Sins - Aliaksandar Valialkin](https://docs.google.com/presentation/d/e/2PACX-1vTxoBN41dYFB8aV8c0SDET3B2htsAavXPAwR-CMyfT2LfARR2KjOt8EPIU1zn8ceSuxrL8BmkOqqL_c/pub?start=false&loop=false&delayms=3000&slide=id.g524654fd95_0_117)
1764 | * https://blog.gopheracademy.com/advent-2018/postmortem-debugging-delve/
1765 | * https://github.com/golang/go/wiki/DesignDocuments
1766 | * [Go execution modes](https://docs.google.com/document/d/1nr-TQHw_er6GOQRsF6T43GGhFDelrAP0NqSS_00RgZQ/edit)
1767 | * https://rakyll.org/profiler-labels/
1768 | * https://rakyll.org/pprof-ui/
1769 | * https://medium.com/@blanchon.vincent/go-should-i-use-a-pointer-instead-of-a-copy-of-my-struct-44b43b104963
1770 | * [Performance tuning Go in GCP](https://www.youtube.com/watch?v=b0o-xeEoug0)
1771 | * https://medium.com/observability/want-to-debug-latency-7aa48ecbe8f7
1772 | * https://medium.com/dm03514-tech-blog/sre-debugging-simple-memory-leaks-in-go-e0a9e6d63d4d
1773 | * https://www.ardanlabs.com/blog/2013/07/understanding-type-in-go.html
1774 | * https://www.geeksforgeeks.org/structure-member-alignment-padding-and-data-packing/
1775 | * https://developers.redhat.com/blog/2016/06/01/how-to-avoid-wasting-megabytes-of-memory-a-few-bytes-at-a-time/
1776 | * https://go101.org/article/memory-layout.html
1777 | * https://dave.cheney.net/2015/10/09/padding-is-hard
1778 | * http://www.catb.org/esr/structure-packing/
1779 | * [Escape Analysis in Go](https://scvalex.net/posts/29/)
1780 | * https://www.ardanlabs.com/blog/2018/01/escape-analysis-flaws.html
1781 | * https://www.ardanlabs.com/blog/2017/05/language-mechanics-on-stacks-and-pointers.html
1782 | * https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/
1783 | * https://godoc.org/golang.org/x/perf/cmd/benchstat
1784 | * https://www.dotconferences.com/2019/03/daniel-marti-optimizing-go-code-without-a-blindfold
1785 | * https://www.youtube.com/watch?v=jiXnzkAzy30
1786 | * [go cpu mem profiling benchmarks gist](https://gist.github.com/arsham/bbc93990d8e5c9b54128a3d88901ab90)
1787 | * https://hashrocket.com/blog/posts/go-performance-observations
1788 | * https://www.ardanlabs.com/blog/2017/05/language-mechanics-on-escape-analysis.html
1789 | * https://dave.cheney.net/2014/06/07/five-things-that-make-go-fast
1790 | * https://stackoverflow.com/questions/2113751/sizeof-struct-in-go
1791 | * https://stackoverflow.com/questions/31496804/how-to-get-the-size-of-struct-and-its-contents-in-bytes-in-golang?rq=1
1792 | * https://github.com/campoy/go-tooling-workshop/tree/master/3-dynamic-analysis
1793 | * https://blog.usejournal.com/why-you-should-like-sync-pool-2c7960c023ba
1794 | * [work stealing scheduler](https://rakyll.org/scheduler/)
1795 | * https://morsmachine.dk/go-scheduler
1796 | * https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part1.html
1797 | * https://www.ardanlabs.com/blog/2018/08/scheduling-in-go-part2.html
1798 | * https://www.ardanlabs.com/blog/2018/12/scheduling-in-go-part3.html
1799 | * https://www.welcometothejungle.co/fr/articles/languages-software-go-elixir
1800 | * https://eng.uber.com/optimizing-m3/
1801 | * https://medium.com/@fzambia/bisecting-go-performance-degradation-4d4a7ee83a63
1802 | * https://golang.org/doc/diagnostics.html
1803 | * http://jesseszwedko.com/gsp-go-debugging/#slide1
1804 | * https://fntlnz.wtf/post/gopostmortem/
1805 | * https://dave.cheney.net/2013/10/15/how-does-the-go-build-command-work
1806 | * https://medium.freecodecamp.org/how-i-investigated-memory-leaks-in-go-using-pprof-on-a-large-codebase-4bec4325e192
1807 | * https://medium.com/@cep21/using-go-1-10-new-trace-features-to-debug-an-integration-test-1dc39e4e812d
1808 | * https://medium.com/golangspec/goroutine-leak-400063aef468
1809 | * https://medium.com/@val_deleplace/go-code-refactoring-the-23x-performance-hunt-156746b522f7
1810 | * https://medium.com/@teivah/good-code-vs-bad-code-in-golang-84cb3c5da49d
1811 | * https://matoski.com/article/golang-profiling-flamegraphs/
1812 | * https://dzone.com/articles/so-you-wanna-go-fast
1813 | * https://www.slideshare.net/BadooDev/profiling-and-optimizing-go-programs
1814 | * https://about.sourcegraph.com/go/an-introduction-to-go-tool-trace-rhys-hiltner
1815 | * https://speakerdeck.com/rhysh/an-introduction-to-go-tool-trace
1816 | * https://stackimpact.com/blog/go-profiler-internals/
1817 | * https://syslog.ravelin.com/go-and-memory-layout-6ef30c730d51
1818 | * https://github.com/golang/go/wiki/Performance
1819 | * https://blog.golang.org/ismmkeynote
1820 | * https://making.pusher.com/golangs-real-time-gc-in-theory-and-practice/
1821 | * https://pusher.com/sessions/meetup/the-realtime-guild/golangs-realtime-garbage-collector
1822 | * https://blog.cloudflare.com/go-dont-collect-my-garbage/
1823 | * https://syslog.ravelin.com/further-dangers-of-large-heaps-in-go-7a267b57d487
1824 | * https://www.akshaydeo.com/blog/2017/12/23/How-did-I-improve-latency-by-700-percent-using-syncPool/
1825 | * [Go 1.5 GOMAXPROCS default document](https://docs.google.com/document/d/1At2Ls5_fhJQ59kDK2DFVhFu3g5mATSXqqV5QrxinasI/edit)
1826 | * https://dave.cheney.net/2015/11/29/a-whirlwind-tour-of-gos-runtime-environment-variables
1827 | * [https://engineers.sg/video/understanding-allocations-the-stack-and-the-heap-gophercon-sg-2019--3371](https://www.youtube.com/watch?v=ZMZpH4yT7M0)
1828 | * [Getting to Go's Garbage Collector](https://blog.golang.org/ismmkeynote)
1829 | * [Go GC progress in tweets](https://talks.golang.org/2017/state-of-go.slide#34)
1830 | * https://go101.org/article/concurrent-atomic-operation.html
1831 | * https://www.integralist.co.uk/posts/profiling-go/
1832 | * https://medium.com/golangspec/sync-rwmutex-ca6c6c3208a0
1833 | * https://rakyll.org/mutexprofile/
1834 | * https://jvns.ca/blog/2017/09/24/profiling-go-with-pprof/
1835 | * https://blog.gopheracademy.com/advent-2018/avoid-gc-overhead-large-heaps/
1836 | * [Journey of go's Garbage collector](https://blog.golang.org/ismmkeynote)
1837 | * [Memory Layout and Type Alignment Guarantees](https://go101.org/article/memory-layout.html)
1838 | * https://dougrichardson.org/2016/01/23/go-memory-allocations.html
1839 | * https://segment.com/blog/allocation-efficiency-in-high-performance-go-services/
1840 | * https://stackimpact.com/docs/go-performance-tuning/
1841 | * https://hackernoon.com/dancing-with-go-s-mutexes-92407ae927bf
1842 | * [GoLand - Profiling Go Applications and Tests](https://blog.jetbrains.com/go/2019/04/03/profiling-go-applications-and-tests/)
1843 | * https://povilasv.me/go-memory-management/
1844 | * [gperftools - docs for various profilers](https://github.com/gperftools/gperftools)
1845 | * https://software.intel.com/en-us/blogs/2014/05/10/debugging-performance-issues-in-go-programs
1846 | * [when too much concurrency slows you down](https://medium.com/@_orcaman/when-too-much-concurrency-slows-you-down-golang-9c144ca305a)
1847 | * [defer more](https://go101.org/article/defer-more.html)
1848 | * https://go101.org/article/bounds-check-elimination.html
1849 | * [Agniva's slides from Golang Bangalore meetup](https://drive.google.com/file/d/1nm7QoZe047lfnLXmdKC0s8Ub7A8LzF56/view)
1850 | * [JSON unmarshal vs decode benchmark](https://github.com/kpango/go-json-bench)
1851 | * https://www.darkcoding.net/software/go-the-price-of-interface/
1852 | * [Russ Cox - Go Data Structures: Interfaces](https://research.swtch.com/interfaces)
1853 | * https://github.com/golang/go/wiki/CompilerOptimizations
1854 | * https://dave.cheney.net/2014/06/07/five-things-that-make-go-fast
1855 | * [Carmen Andoh - The Why of Go](https://www.youtube.com/watch?v=bmZNaUcwBt4)
1856 |
--------------------------------------------------------------------------------