├── .gitignore ├── README.md ├── examples ├── daisy │ └── main.go ├── mandelbrot-buffered │ ├── exercise │ │ ├── mandelbrot.go │ │ └── trace.out │ ├── mandelbrot.go │ └── trace.out ├── mandelbrot-pkg-profile │ ├── cpu.pprof │ ├── cpu.svg │ └── mandelbrot.go ├── mandelbrot-runtime-pprof │ ├── cpu.pprof │ └── mandelbrot.go ├── mandelbrot-trace │ ├── mandelbrot.go │ ├── px.out │ ├── row.out │ ├── seq.out │ ├── workers1.out │ └── workers4.out ├── mandelbrot │ ├── mandelbrot.go │ └── mandelbrot.png ├── mandelweb │ ├── mandelweb.go │ ├── trace-1qps.out │ └── trace-5qps.out ├── ping-pong │ └── main.go └── sieve │ ├── main.go │ └── trace.out ├── understanding-the-execution-tracer.slide └── vendor └── github.com └── pkg └── profile ├── .travis.yml ├── AUTHORS ├── LICENSE ├── README.md ├── cpu.pprof ├── example_test.go ├── mutex.go ├── mutex17.go ├── profile.go ├── profile_test.go ├── trace.go ├── trace16.go └── trace_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.sw? 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | This slide deck and supporting material is part of the [_Understanding the Execution Tracer_](http://dave.cheney.net/training) workshop by Dave Cheney. 4 | 5 | # Installation 6 | 7 | 1. Clone this code into a directory 8 | ``` 9 | git clone https://github.com/davecheney/understanding-the-execution-tracer 10 | ``` 11 | 12 | 2. Install the Go present tool 13 | ``` 14 | go get -u -v golang.org/x/tools/cmd/present 15 | ``` 16 | 17 | 3. Run the present tool 18 | ``` 19 | cd understanding-the-execution-tracer && present 20 | ``` 21 | 22 | Th slides will be available at [http://127.0.0.1:3999/understanding-the-execution-tracer.slide](http://127.0.0.1:3999/understanding-the-execution-tracer.slide#1) 23 | 24 | # Online 25 | You can view current version of slides at [https://go-talks.appspot.com/github.com/davecheney/understanding-the-execution-tracer/understanding-the-execution-tracer.slide#1](https://go-talks.appspot.com/github.com/davecheney/understanding-the-execution-tracer/understanding-the-execution-tracer.slide#1) 26 | 27 | # License and Materials 28 | 29 | This presentation is licensed under the [Creative Commons Attribution-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-sa/4.0/) licence. 30 | 31 | The code samples included in this presentation are copywrite their respective authors. 32 | 33 | You are encouraged to remix, transform, or build upon the material, providing you give appropriate credit and distribute your contributions under the same license. 34 | -------------------------------------------------------------------------------- /examples/daisy/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | "os" 20 | 21 | "github.com/pkg/profile" 22 | ) 23 | 24 | func main() { 25 | defer profile.Start(profile.TraceProfile).Stop() 26 | 27 | const n = 500 28 | 29 | leftmost := make(chan int) 30 | right := leftmost 31 | left := leftmost 32 | 33 | for i := 0; i < n; i++ { 34 | right = make(chan int) 35 | go pass(left, right) 36 | left = right 37 | } 38 | 39 | go sendFirst(right) 40 | fmt.Fprintln(os.Stderr, <-leftmost) 41 | } 42 | 43 | func pass(left, right chan int) { 44 | v := 1 + <-right 45 | left <- v 46 | } 47 | 48 | func sendFirst(ch chan int) { ch <- 0 } 49 | -------------------------------------------------------------------------------- /examples/mandelbrot-buffered/exercise/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | // START OMIT 16 | 17 | import "github.com/pkg/profile" 18 | 19 | func main() { 20 | defer profile.Start(profile.TraceProfile, profile.ProfilePath(".")).Stop() 21 | // END OMIT 22 | 23 | var ( 24 | height = flag.Int("h", 1024, "height of the output image in pixels") 25 | width = flag.Int("w", 1024, "width of the output image in pixels") 26 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 27 | workers = flag.Int("workers", 1, "number of workers to use") 28 | ) 29 | flag.Parse() 30 | 31 | const output = "mandelbrot.png" 32 | 33 | // open a new file 34 | f, err := os.Create(output) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | 39 | // create the image 40 | c := make([][]color.RGBA, *height) 41 | for i := range c { 42 | c[i] = make([]color.RGBA, *width) 43 | } 44 | 45 | img := &img{ 46 | h: *height, 47 | w: *width, 48 | m: c, 49 | } 50 | 51 | switch *mode { 52 | case "seq": 53 | seqFillImg(img) 54 | case "px": 55 | oneToOneFillImg(img) 56 | case "row": 57 | onePerRowFillImg(img) 58 | case "workers": 59 | nWorkersFillImg(img, *workers) 60 | default: 61 | panic("unknown mode") 62 | } 63 | 64 | // and encoding it 65 | if err := png.Encode(f, img); err != nil { 66 | log.Fatal(err) 67 | } 68 | } 69 | 70 | type img struct { 71 | h, w int 72 | m [][]color.RGBA 73 | } 74 | 75 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 76 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 77 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 78 | 79 | func seqFillImg(m *img) { 80 | for i, row := range m.m { 81 | for j := range row { 82 | fillPixel(m, i, j) 83 | } 84 | } 85 | } 86 | 87 | func oneToOneFillImg(m *img) { 88 | var wg sync.WaitGroup 89 | wg.Add(m.h * m.w) 90 | for i, row := range m.m { 91 | for j := range row { 92 | go func(i, j int) { 93 | fillPixel(m, i, j) 94 | wg.Done() 95 | }(i, j) 96 | } 97 | } 98 | wg.Wait() 99 | } 100 | 101 | func onePerRowFillImg(m *img) { 102 | var wg sync.WaitGroup 103 | wg.Add(m.h) 104 | for i := range m.m { 105 | go func(i int) { 106 | for j := range m.m[i] { 107 | fillPixel(m, i, j) 108 | } 109 | wg.Done() 110 | }(i) 111 | } 112 | wg.Wait() 113 | } 114 | 115 | func nWorkersFillImg(m *img, workers int) { 116 | c := make(chan int, m.h) 117 | var wg sync.WaitGroup 118 | wg.Add(workers) 119 | for i := 0; i < workers; i++ { 120 | go func() { 121 | defer wg.Done() 122 | for i := range c { 123 | for j := range m.m[i] { 124 | fillPixel(m, i, j) 125 | } 126 | } 127 | }() 128 | } 129 | 130 | for i := range m.m { 131 | c <- i 132 | } 133 | close(c) 134 | wg.Wait() 135 | } 136 | 137 | func fillPixel(m *img, x, y int) { 138 | const n = 1000 139 | const Limit = 2.0 140 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 141 | Cr := (2*float64(x)/float64(n) - 1.5) 142 | Ci := (2*float64(y)/float64(n) - 1.0) 143 | 144 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 145 | Zi = 2*Zr*Zi + Ci 146 | Zr = Tr - Ti + Cr 147 | Tr = Zr * Zr 148 | Ti = Zi * Zi 149 | } 150 | paint(&m.m[x][y], Tr, Ti) 151 | } 152 | 153 | func paint(c *color.RGBA, x, y float64) { 154 | n := byte(x * y * 2) 155 | c.R, c.G, c.B, c.A = n, n, n, 255 156 | } 157 | -------------------------------------------------------------------------------- /examples/mandelbrot-buffered/exercise/trace.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-buffered/exercise/trace.out -------------------------------------------------------------------------------- /examples/mandelbrot-buffered/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | // START OMIT 16 | 17 | import "github.com/pkg/profile" 18 | 19 | func main() { 20 | defer profile.Start(profile.TraceProfile, profile.ProfilePath(".")).Stop() 21 | // END OMIT 22 | 23 | var ( 24 | height = flag.Int("h", 1024, "height of the output image in pixels") 25 | width = flag.Int("w", 1024, "width of the output image in pixels") 26 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 27 | workers = flag.Int("workers", 1, "number of workers to use") 28 | ) 29 | flag.Parse() 30 | 31 | const output = "mandelbrot.png" 32 | 33 | // open a new file 34 | f, err := os.Create(output) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | 39 | // create the image 40 | c := make([][]color.RGBA, *height) 41 | for i := range c { 42 | c[i] = make([]color.RGBA, *width) 43 | } 44 | 45 | img := &img{ 46 | h: *height, 47 | w: *width, 48 | m: c, 49 | } 50 | 51 | switch *mode { 52 | case "seq": 53 | seqFillImg(img) 54 | case "px": 55 | oneToOneFillImg(img) 56 | case "row": 57 | onePerRowFillImg(img) 58 | case "workers": 59 | nWorkersFillImg(img, *workers) 60 | default: 61 | panic("unknown mode") 62 | } 63 | 64 | // and encoding it 65 | if err := png.Encode(f, img); err != nil { 66 | log.Fatal(err) 67 | } 68 | } 69 | 70 | type img struct { 71 | h, w int 72 | m [][]color.RGBA 73 | } 74 | 75 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 76 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 77 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 78 | 79 | func seqFillImg(m *img) { 80 | for i, row := range m.m { 81 | for j := range row { 82 | fillPixel(m, i, j) 83 | } 84 | } 85 | } 86 | 87 | func oneToOneFillImg(m *img) { 88 | var wg sync.WaitGroup 89 | wg.Add(m.h * m.w) 90 | for i, row := range m.m { 91 | for j := range row { 92 | go func(i, j int) { 93 | fillPixel(m, i, j) 94 | wg.Done() 95 | }(i, j) 96 | } 97 | } 98 | wg.Wait() 99 | } 100 | 101 | func onePerRowFillImg(m *img) { 102 | var wg sync.WaitGroup 103 | wg.Add(m.h) 104 | for i := range m.m { 105 | go func(i int) { 106 | for j := range m.m[i] { 107 | fillPixel(m, i, j) 108 | } 109 | wg.Done() 110 | }(i) 111 | } 112 | wg.Wait() 113 | } 114 | 115 | // BUFSTART OMIT 116 | func nWorkersFillImg(m *img, workers int) { 117 | c := make(chan struct{ i, j int }, m.h*m.w) 118 | var wg sync.WaitGroup 119 | wg.Add(workers) 120 | for i := 0; i < workers; i++ { 121 | go func() { 122 | defer wg.Done() 123 | for t := range c { 124 | fillPixel(m, t.i, t.j) 125 | } 126 | }() 127 | } 128 | // BUFEND OMIT 129 | 130 | for i, row := range m.m { 131 | for j := range row { 132 | c <- struct{ i, j int }{i, j} 133 | } 134 | } 135 | close(c) 136 | wg.Wait() 137 | } 138 | 139 | func fillPixel(m *img, x, y int) { 140 | const n = 1000 141 | const Limit = 2.0 142 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 143 | Cr := (2*float64(x)/float64(n) - 1.5) 144 | Ci := (2*float64(y)/float64(n) - 1.0) 145 | 146 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 147 | Zi = 2*Zr*Zi + Ci 148 | Zr = Tr - Ti + Cr 149 | Tr = Zr * Zr 150 | Ti = Zi * Zi 151 | } 152 | paint(&m.m[x][y], Tr, Ti) 153 | } 154 | 155 | func paint(c *color.RGBA, x, y float64) { 156 | n := byte(x * y * 2) 157 | c.R, c.G, c.B, c.A = n, n, n, 255 158 | } 159 | -------------------------------------------------------------------------------- /examples/mandelbrot-buffered/trace.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-buffered/trace.out -------------------------------------------------------------------------------- /examples/mandelbrot-pkg-profile/cpu.pprof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-pkg-profile/cpu.pprof -------------------------------------------------------------------------------- /examples/mandelbrot-pkg-profile/cpu.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | 292 | unnamed 293 | 294 | 295 | cluster_L 296 | 297 | 298 | 299 | 300 | Type: cpu 301 | 302 | Type: cpu 303 | Time: Sep 17, 2017 at 12:22pm (AEST) 304 | Duration: 1.81s, Total samples = 1.53s (84.33%) 305 | Showing nodes accounting for 1.53s, 100% of 1.53s total 306 | 307 | 308 | 309 | N1 310 | 311 | 312 | main 313 | paint 314 | mandelbrot.go 315 | 1s (65.36%) 316 | 317 | 318 | 319 | 320 | 321 | N2 322 | 323 | 324 | runtime 325 | main 326 | proc.go 327 | 0 of 1.53s (100%) 328 | 329 | 330 | 331 | 332 | 333 | N4 334 | 335 | 336 | main 337 | main 338 | mandelbrot.go 339 | 0 of 1.53s (100%) 340 | 341 | 342 | 343 | 344 | 345 | N2->N4 346 | 347 | 348 | 349 | 350 | 351 | 352 | 1.53s 353 | 354 | 355 | 356 | 357 | 358 | N3 359 | 360 | 361 | main 362 | fillPixel 363 | mandelbrot.go 364 | 0.27s (17.65%) 365 | of 1.27s (83.01%) 366 | 367 | 368 | 369 | 370 | 371 | N3->N1 372 | 373 | 374 | 375 | 376 | 377 | 378 | 1s 379 | (inline) 380 | 381 | 382 | 383 | 384 | 385 | N35 386 | 387 | 388 | image/png 389 | Encode 390 | writer.go 391 | 0 of 0.26s (16.99%) 392 | 393 | 394 | 395 | 396 | 397 | N4->N35 398 | 399 | 400 | 401 | 402 | 403 | 404 | 0.26s 405 | 406 | 407 | 408 | 409 | 410 | N38 411 | 412 | 413 | main 414 | seqFillImg 415 | mandelbrot.go 416 | 0 of 1.27s (83.01%) 417 | 418 | 419 | 420 | 421 | 422 | N4->N38 423 | 424 | 425 | 426 | 427 | 428 | 429 | 1.27s 430 | 431 | 432 | 433 | 434 | 435 | N5 436 | 437 | 438 | runtime 439 | mallocgc 440 | malloc.go 441 | 0.13s (8.50%) 442 | of 0.16s (10.46%) 443 | 444 | 445 | 446 | 447 | 448 | N41 449 | 450 | 451 | runtime 452 | (*mcache) 453 | nextFree 454 | malloc.go 455 | 0 of 0.03s (1.96%) 456 | 457 | 458 | 459 | 460 | 461 | N5->N41 462 | 463 | 464 | 465 | 466 | 467 | 468 | 0.03s 469 | 470 | 471 | 472 | 473 | 474 | N6 475 | 476 | 477 | image/png 478 | (*encoder) 479 | writeImage 480 | writer.go 481 | 0 of 0.19s (12.42%) 482 | 483 | 484 | 485 | 486 | 487 | N8 488 | 489 | 490 | main 491 | (*img) 492 | At 493 | mandelbrot.go 494 | 0 of 0.18s (11.76%) 495 | 496 | 497 | 498 | 499 | 500 | N6->N8 501 | 502 | 503 | 504 | 505 | 506 | 507 | 0.11s 508 | 509 | 510 | 511 | 512 | 513 | N18 514 | 515 | 516 | image/png 517 | filter 518 | writer.go 519 | 0.01s (0.65%) 520 | 521 | 522 | 523 | 524 | 525 | N6->N18 526 | 527 | 528 | 529 | 530 | 531 | 532 | 0.01s 533 | 534 | 535 | 536 | 537 | 538 | N31 539 | 540 | 541 | compress/zlib 542 | (*Writer) 543 | Write 544 | writer.go 545 | 0 of 0.07s (4.58%) 546 | 547 | 548 | 549 | 550 | 551 | N6->N31 552 | 553 | 554 | 555 | 556 | 557 | 558 | 0.07s 559 | 560 | 561 | 562 | 563 | 564 | N7 565 | 566 | 567 | image/png 568 | (*Encoder) 569 | Encode 570 | writer.go 571 | 0 of 0.26s (16.99%) 572 | 573 | 574 | 575 | 576 | 577 | N34 578 | 579 | 580 | image/png 581 | (*encoder) 582 | writeIDATs 583 | writer.go 584 | 0 of 0.19s (12.42%) 585 | 586 | 587 | 588 | 589 | 590 | N7->N34 591 | 592 | 593 | 594 | 595 | 596 | 597 | 0.19s 598 | 599 | 600 | 601 | 602 | 603 | N36 604 | 605 | 606 | image/png 607 | opaque 608 | writer.go 609 | 0 of 0.07s (4.58%) 610 | 611 | 612 | 613 | 614 | 615 | N7->N36 616 | 617 | 618 | 619 | 620 | 621 | 622 | 0.07s 623 | 624 | 625 | 626 | 627 | 628 | N10 629 | 630 | 631 | runtime 632 | convT2Inoptr 633 | iface.go 634 | 0 of 0.18s (11.76%) 635 | 636 | 637 | 638 | 639 | 640 | N8->N10 641 | 642 | 643 | 644 | 645 | 646 | 647 | 0.18s 648 | 649 | 650 | 651 | 652 | 653 | N9 654 | 655 | 656 | syscall 657 | Syscall 658 | asm_darwin_amd64.s 659 | 0.05s (3.27%) 660 | 661 | 662 | 663 | 664 | 665 | N10->N5 666 | 667 | 668 | 669 | 670 | 671 | 672 | 0.16s 673 | 674 | 675 | 676 | 677 | 678 | N13 679 | 680 | 681 | runtime 682 | memmove 683 | memmove_amd64.s 684 | 0.02s (1.31%) 685 | 686 | 687 | 688 | 689 | 690 | N10->N13 691 | 692 | 693 | 694 | 695 | 696 | 697 | 0.02s 698 | 699 | 700 | 701 | 702 | 703 | N11 704 | 705 | 706 | compress/flate 707 | (*compressor) 708 | deflate 709 | deflate.go 710 | 0.01s (0.65%) 711 | of 0.07s (4.58%) 712 | 713 | 714 | 715 | 716 | 717 | N24 718 | 719 | 720 | compress/flate 721 | (*compressor) 722 | findMatch 723 | deflate.go 724 | 0 of 0.01s (0.65%) 725 | 726 | 727 | 728 | 729 | 730 | N11->N24 731 | 732 | 733 | 734 | 735 | 736 | 737 | 0.01s 738 | 739 | 740 | 741 | 742 | 743 | N26 744 | 745 | 746 | compress/flate 747 | (*compressor) 748 | writeBlock 749 | deflate.go 750 | 0 of 0.05s (3.27%) 751 | 752 | 753 | 754 | 755 | 756 | N11->N26 757 | 758 | 759 | 760 | 761 | 762 | 763 | 0.05s 764 | 765 | 766 | 767 | 768 | 769 | N12 770 | 771 | 772 | runtime 773 | mmap 774 | sys_darwin_amd64.s 775 | 0.02s (1.31%) 776 | 777 | 778 | 779 | 780 | 781 | N14 782 | 783 | 784 | compress/flate 785 | (*huffmanBitWriter) 786 | write 787 | huffman_bit_writer.go 788 | 0 of 0.05s (3.27%) 789 | 790 | 791 | 792 | 793 | 794 | N27 795 | 796 | 797 | compress/flate 798 | (*dictWriter) 799 | Write 800 | deflate.go 801 | 0 of 0.05s (3.27%) 802 | 803 | 804 | 805 | 806 | 807 | N14->N27 808 | 809 | 810 | 811 | 812 | 813 | 814 | 0.05s 815 | 816 | 817 | 818 | 819 | 820 | N15 821 | 822 | 823 | compress/flate 824 | (*huffmanBitWriter) 825 | writeTokens 826 | huffman_bit_writer.go 827 | 0 of 0.05s (3.27%) 828 | 829 | 830 | 831 | 832 | 833 | N28 834 | 835 | 836 | compress/flate 837 | (*huffmanBitWriter) 838 | writeBits 839 | huffman_bit_writer.go 840 | 0 of 0.01s (0.65%) 841 | 842 | 843 | 844 | 845 | 846 | N15->N28 847 | 848 | 849 | 850 | 851 | 852 | 853 | 0.01s 854 | 855 | 856 | 857 | 858 | 859 | N30 860 | 861 | 862 | compress/flate 863 | (*huffmanBitWriter) 864 | writeCode 865 | huffman_bit_writer.go 866 | 0 of 0.04s (2.61%) 867 | 868 | 869 | 870 | 871 | 872 | N15->N30 873 | 874 | 875 | 876 | 877 | 878 | 879 | 0.04s 880 | 881 | 882 | 883 | 884 | 885 | N16 886 | 887 | 888 | runtime 889 | systemstack 890 | asm_amd64.s 891 | 0 of 0.03s (1.96%) 892 | 893 | 894 | 895 | 896 | 897 | N42 898 | 899 | 900 | runtime 901 | (*mcache) 902 | nextFree 903 | func1 904 | malloc.go 905 | 0 of 0.02s (1.31%) 906 | 907 | 908 | 909 | 910 | 911 | N16->N42 912 | 913 | 914 | 915 | 916 | 917 | 918 | 0.02s 919 | 920 | 921 | 922 | 923 | 924 | N46 925 | 926 | 927 | runtime 928 | (*mheap) 929 | alloc 930 | func1 931 | mheap.go 932 | 0 of 0.01s (0.65%) 933 | 934 | 935 | 936 | 937 | 938 | N16->N46 939 | 940 | 941 | 942 | 943 | 944 | 945 | 0.01s 946 | 947 | 948 | 949 | 950 | 951 | N17 952 | 953 | 954 | compress/flate 955 | matchLen 956 | deflate.go 957 | 0.01s (0.65%) 958 | 959 | 960 | 961 | 962 | 963 | N19 964 | 965 | 966 | runtime 967 | (*mcentral) 968 | grow 969 | mcentral.go 970 | 0 of 0.02s (1.31%) 971 | 972 | 973 | 974 | 975 | 976 | N45 977 | 978 | 979 | runtime 980 | (*mheap) 981 | alloc 982 | mheap.go 983 | 0 of 0.01s (0.65%) 984 | 985 | 986 | 987 | 988 | 989 | N19->N45 990 | 991 | 992 | 993 | 994 | 995 | 996 | 0.01s 997 | 998 | 999 | 1000 | 1001 | 1002 | N51 1003 | 1004 | 1005 | runtime 1006 | heapBits 1007 | initSpan 1008 | mbitmap.go 1009 | 0 of 0.01s (0.65%) 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | N19->N51 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | 1022 | 0.01s 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | N20 1029 | 1030 | 1031 | runtime 1032 | memclrNoHeapPointers 1033 | memclr_amd64.s 1034 | 0.01s (0.65%) 1035 | 1036 | 1037 | 1038 | 1039 | 1040 | N21 1041 | 1042 | 1043 | bufio 1044 | (*Writer) 1045 | Flush 1046 | bufio.go 1047 | 0 of 0.05s (3.27%) 1048 | 1049 | 1050 | 1051 | 1052 | 1053 | N32 1054 | 1055 | 1056 | image/png 1057 | (*encoder) 1058 | Write 1059 | writer.go 1060 | 0 of 0.05s (3.27%) 1061 | 1062 | 1063 | 1064 | 1065 | 1066 | N21->N32 1067 | 1068 | 1069 | 1070 | 1071 | 1072 | 1073 | 0.05s 1074 | 1075 | 1076 | 1077 | 1078 | 1079 | N22 1080 | 1081 | 1082 | bufio 1083 | (*Writer) 1084 | Write 1085 | bufio.go 1086 | 0 of 0.05s (3.27%) 1087 | 1088 | 1089 | 1090 | 1091 | 1092 | N22->N21 1093 | 1094 | 1095 | 1096 | 1097 | 1098 | 1099 | 0.05s 1100 | 1101 | 1102 | 1103 | 1104 | 1105 | N23 1106 | 1107 | 1108 | compress/flate 1109 | (*Writer) 1110 | Write 1111 | deflate.go 1112 | 0 of 0.07s (4.58%) 1113 | 1114 | 1115 | 1116 | 1117 | 1118 | N25 1119 | 1120 | 1121 | compress/flate 1122 | (*compressor) 1123 | write 1124 | deflate.go 1125 | 0 of 0.07s (4.58%) 1126 | 1127 | 1128 | 1129 | 1130 | 1131 | N23->N25 1132 | 1133 | 1134 | 1135 | 1136 | 1137 | 1138 | 0.07s 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | N24->N17 1145 | 1146 | 1147 | 1148 | 1149 | 1150 | 1151 | 0.01s 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | N25->N11 1158 | 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 0.07s 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | N29 1171 | 1172 | 1173 | compress/flate 1174 | (*huffmanBitWriter) 1175 | writeBlock 1176 | huffman_bit_writer.go 1177 | 0 of 0.05s (3.27%) 1178 | 1179 | 1180 | 1181 | 1182 | 1183 | N26->N29 1184 | 1185 | 1186 | 1187 | 1188 | 1189 | 1190 | 0.05s 1191 | 1192 | 1193 | 1194 | 1195 | 1196 | N27->N22 1197 | 1198 | 1199 | 1200 | 1201 | 1202 | 1203 | 0.05s 1204 | 1205 | 1206 | 1207 | 1208 | 1209 | N28->N14 1210 | 1211 | 1212 | 1213 | 1214 | 1215 | 1216 | 0.01s 1217 | 1218 | 1219 | 1220 | 1221 | 1222 | N29->N15 1223 | 1224 | 1225 | 1226 | 1227 | 1228 | 1229 | 0.05s 1230 | 1231 | 1232 | 1233 | 1234 | 1235 | N30->N14 1236 | 1237 | 1238 | 1239 | 1240 | 1241 | 1242 | 0.04s 1243 | 1244 | 1245 | 1246 | 1247 | 1248 | N31->N23 1249 | 1250 | 1251 | 1252 | 1253 | 1254 | 1255 | 0.07s 1256 | 1257 | 1258 | 1259 | 1260 | 1261 | N33 1262 | 1263 | 1264 | image/png 1265 | (*encoder) 1266 | writeChunk 1267 | writer.go 1268 | 0 of 0.05s (3.27%) 1269 | 1270 | 1271 | 1272 | 1273 | 1274 | N32->N33 1275 | 1276 | 1277 | 1278 | 1279 | 1280 | 1281 | 0.05s 1282 | 1283 | 1284 | 1285 | 1286 | 1287 | N39 1288 | 1289 | 1290 | os 1291 | (*File) 1292 | Write 1293 | file.go 1294 | 0 of 0.05s (3.27%) 1295 | 1296 | 1297 | 1298 | 1299 | 1300 | N33->N39 1301 | 1302 | 1303 | 1304 | 1305 | 1306 | 1307 | 0.05s 1308 | 1309 | 1310 | 1311 | 1312 | 1313 | N34->N6 1314 | 1315 | 1316 | 1317 | 1318 | 1319 | 1320 | 0.19s 1321 | 1322 | 1323 | 1324 | 1325 | 1326 | N35->N7 1327 | 1328 | 1329 | 1330 | 1331 | 1332 | 1333 | 0.26s 1334 | 1335 | 1336 | 1337 | 1338 | 1339 | N36->N8 1340 | 1341 | 1342 | 1343 | 1344 | 1345 | 1346 | 0.07s 1347 | 1348 | 1349 | 1350 | 1351 | 1352 | N37 1353 | 1354 | 1355 | internal/poll 1356 | (*FD) 1357 | Write 1358 | fd_unix.go 1359 | 0 of 0.05s (3.27%) 1360 | 1361 | 1362 | 1363 | 1364 | 1365 | N56 1366 | 1367 | 1368 | syscall 1369 | Write 1370 | syscall_unix.go 1371 | 0 of 0.05s (3.27%) 1372 | 1373 | 1374 | 1375 | 1376 | 1377 | N37->N56 1378 | 1379 | 1380 | 1381 | 1382 | 1383 | 1384 | 0.05s 1385 | 1386 | 1387 | 1388 | 1389 | 1390 | N38->N3 1391 | 1392 | 1393 | 1394 | 1395 | 1396 | 1397 | 1.27s 1398 | 1399 | 1400 | 1401 | 1402 | 1403 | N40 1404 | 1405 | 1406 | os 1407 | (*File) 1408 | write 1409 | file_unix.go 1410 | 0 of 0.05s (3.27%) 1411 | 1412 | 1413 | 1414 | 1415 | 1416 | N39->N40 1417 | 1418 | 1419 | 1420 | 1421 | 1422 | 1423 | 0.05s 1424 | 1425 | 1426 | 1427 | 1428 | 1429 | N40->N37 1430 | 1431 | 1432 | 1433 | 1434 | 1435 | 1436 | 0.05s 1437 | 1438 | 1439 | 1440 | 1441 | 1442 | N41->N16 1443 | 1444 | 1445 | 1446 | 1447 | 1448 | 1449 | 0.03s 1450 | 1451 | 1452 | 1453 | 1454 | 1455 | N43 1456 | 1457 | 1458 | runtime 1459 | (*mcache) 1460 | refill 1461 | mcache.go 1462 | 0 of 0.02s (1.31%) 1463 | 1464 | 1465 | 1466 | 1467 | 1468 | N42->N43 1469 | 1470 | 1471 | 1472 | 1473 | 1474 | 1475 | 0.02s 1476 | 1477 | 1478 | 1479 | 1480 | 1481 | N44 1482 | 1483 | 1484 | runtime 1485 | (*mcentral) 1486 | cacheSpan 1487 | mcentral.go 1488 | 0 of 0.02s (1.31%) 1489 | 1490 | 1491 | 1492 | 1493 | 1494 | N43->N44 1495 | 1496 | 1497 | 1498 | 1499 | 1500 | 1501 | 0.02s 1502 | 1503 | 1504 | 1505 | 1506 | 1507 | N44->N19 1508 | 1509 | 1510 | 1511 | 1512 | 1513 | 1514 | 0.02s 1515 | 1516 | 1517 | 1518 | 1519 | 1520 | N45->N20 1521 | 1522 | 1523 | 1524 | 1525 | 1526 | 1527 | 0.01s 1528 | 1529 | 1530 | 1531 | 1532 | 1533 | N48 1534 | 1535 | 1536 | runtime 1537 | (*mheap) 1538 | alloc_m 1539 | mheap.go 1540 | 0 of 0.01s (0.65%) 1541 | 1542 | 1543 | 1544 | 1545 | 1546 | N46->N48 1547 | 1548 | 1549 | 1550 | 1551 | 1552 | 1553 | 0.01s 1554 | 1555 | 1556 | 1557 | 1558 | 1559 | N47 1560 | 1561 | 1562 | runtime 1563 | (*mheap) 1564 | allocSpanLocked 1565 | mheap.go 1566 | 0 of 0.01s (0.65%) 1567 | 1568 | 1569 | 1570 | 1571 | 1572 | N49 1573 | 1574 | 1575 | runtime 1576 | (*mheap) 1577 | grow 1578 | mheap.go 1579 | 0 of 0.01s (0.65%) 1580 | 1581 | 1582 | 1583 | 1584 | 1585 | N47->N49 1586 | 1587 | 1588 | 1589 | 1590 | 1591 | 1592 | 0.01s 1593 | 1594 | 1595 | 1596 | 1597 | 1598 | N48->N47 1599 | 1600 | 1601 | 1602 | 1603 | 1604 | 1605 | 0.01s 1606 | 1607 | 1608 | 1609 | 1610 | 1611 | N50 1612 | 1613 | 1614 | runtime 1615 | (*mheap) 1616 | sysAlloc 1617 | malloc.go 1618 | 0 of 0.01s (0.65%) 1619 | 1620 | 1621 | 1622 | 1623 | 1624 | N49->N50 1625 | 1626 | 1627 | 1628 | 1629 | 1630 | 1631 | 0.01s 1632 | 1633 | 1634 | 1635 | 1636 | 1637 | N55 1638 | 1639 | 1640 | runtime 1641 | sysMap 1642 | mem_darwin.go 1643 | 0 of 0.01s (0.65%) 1644 | 1645 | 1646 | 1647 | 1648 | 1649 | N50->N55 1650 | 1651 | 1652 | 1653 | 1654 | 1655 | 1656 | 0.01s 1657 | 1658 | 1659 | 1660 | 1661 | 1662 | N53 1663 | 1664 | 1665 | runtime 1666 | newMarkBits 1667 | mheap.go 1668 | 0 of 0.01s (0.65%) 1669 | 1670 | 1671 | 1672 | 1673 | 1674 | N51->N53 1675 | 1676 | 1677 | 1678 | 1679 | 1680 | 1681 | 0.01s 1682 | 1683 | 1684 | 1685 | 1686 | 1687 | N52 1688 | 1689 | 1690 | runtime 1691 | newArenaMayUnlock 1692 | mheap.go 1693 | 0 of 0.01s (0.65%) 1694 | 1695 | 1696 | 1697 | 1698 | 1699 | N54 1700 | 1701 | 1702 | runtime 1703 | sysAlloc 1704 | mem_darwin.go 1705 | 0 of 0.01s (0.65%) 1706 | 1707 | 1708 | 1709 | 1710 | 1711 | N52->N54 1712 | 1713 | 1714 | 1715 | 1716 | 1717 | 1718 | 0.01s 1719 | 1720 | 1721 | 1722 | 1723 | 1724 | N53->N52 1725 | 1726 | 1727 | 1728 | 1729 | 1730 | 1731 | 0.01s 1732 | 1733 | 1734 | 1735 | 1736 | 1737 | N54->N12 1738 | 1739 | 1740 | 1741 | 1742 | 1743 | 1744 | 0.01s 1745 | 1746 | 1747 | 1748 | 1749 | 1750 | N55->N12 1751 | 1752 | 1753 | 1754 | 1755 | 1756 | 1757 | 0.01s 1758 | 1759 | 1760 | 1761 | 1762 | 1763 | N57 1764 | 1765 | 1766 | syscall 1767 | write 1768 | zsyscall_darwin_amd64.go 1769 | 0 of 0.05s (3.27%) 1770 | 1771 | 1772 | 1773 | 1774 | 1775 | N56->N57 1776 | 1777 | 1778 | 1779 | 1780 | 1781 | 1782 | 0.05s 1783 | 1784 | 1785 | 1786 | 1787 | 1788 | N57->N9 1789 | 1790 | 1791 | 1792 | 1793 | 1794 | 1795 | 0.05s 1796 | 1797 | 1798 | 1799 | 1800 | 1801 | -------------------------------------------------------------------------------- /examples/mandelbrot-pkg-profile/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | // START OMIT 16 | 17 | import "github.com/pkg/profile" 18 | 19 | func main() { 20 | defer profile.Start(profile.CPUProfile, profile.ProfilePath(".")).Stop() 21 | // END OMIT 22 | 23 | var ( 24 | height = flag.Int("h", 1024, "height of the output image in pixels") 25 | width = flag.Int("w", 1024, "width of the output image in pixels") 26 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 27 | workers = flag.Int("workers", 1, "number of workers to use") 28 | ) 29 | flag.Parse() 30 | 31 | const output = "mandelbrot.png" 32 | 33 | // open a new file 34 | f, err := os.Create(output) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | 39 | // create the image 40 | c := make([][]color.RGBA, *height) 41 | for i := range c { 42 | c[i] = make([]color.RGBA, *width) 43 | } 44 | 45 | img := &img{ 46 | h: *height, 47 | w: *width, 48 | m: c, 49 | } 50 | 51 | switch *mode { 52 | case "seq": 53 | seqFillImg(img) 54 | case "px": 55 | oneToOneFillImg(img) 56 | case "row": 57 | onePerRowFillImg(img) 58 | case "workers": 59 | nWorkersFillImg(img, *workers) 60 | default: 61 | panic("unknown mode") 62 | } 63 | 64 | // and encoding it 65 | if err := png.Encode(f, img); err != nil { 66 | log.Fatal(err) 67 | } 68 | } 69 | 70 | type img struct { 71 | h, w int 72 | m [][]color.RGBA 73 | } 74 | 75 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 76 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 77 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 78 | 79 | // SEQSTART OMIT 80 | func seqFillImg(m *img) { 81 | for i, row := range m.m { 82 | for j := range row { 83 | fillPixel(m, i, j) 84 | } 85 | } 86 | } 87 | 88 | // SEQEND OMIT 89 | 90 | func oneToOneFillImg(m *img) { 91 | var wg sync.WaitGroup 92 | wg.Add(m.h * m.w) 93 | for i, row := range m.m { 94 | for j := range row { 95 | go func(i, j int) { 96 | fillPixel(m, i, j) 97 | wg.Done() 98 | }(i, j) 99 | } 100 | } 101 | wg.Wait() 102 | } 103 | 104 | func onePerRowFillImg(m *img) { 105 | var wg sync.WaitGroup 106 | wg.Add(m.h) 107 | for i := range m.m { 108 | go func(i int) { 109 | for j := range m.m[i] { 110 | fillPixel(m, i, j) 111 | } 112 | wg.Done() 113 | }(i) 114 | } 115 | wg.Wait() 116 | } 117 | 118 | func nWorkersFillImg(m *img, workers int) { 119 | c := make(chan struct{ i, j int }) 120 | for i := 0; i < workers; i++ { 121 | go func() { 122 | for t := range c { 123 | fillPixel(m, t.i, t.j) 124 | } 125 | }() 126 | } 127 | 128 | for i, row := range m.m { 129 | for j := range row { 130 | c <- struct{ i, j int }{i, j} 131 | } 132 | } 133 | close(c) 134 | } 135 | 136 | func fillPixel(m *img, x, y int) { 137 | const n = 1000 138 | const Limit = 2.0 139 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 140 | Cr := (2*float64(x)/float64(n) - 1.5) 141 | Ci := (2*float64(y)/float64(n) - 1.0) 142 | 143 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 144 | Zi = 2*Zr*Zi + Ci 145 | Zr = Tr - Ti + Cr 146 | Tr = Zr * Zr 147 | Ti = Zi * Zi 148 | } 149 | paint(&m.m[x][y], Tr, Ti) 150 | } 151 | 152 | func paint(c *color.RGBA, x, y float64) { 153 | n := byte(x * y * 2) 154 | c.R, c.G, c.B, c.A = n, n, n, 255 155 | } 156 | -------------------------------------------------------------------------------- /examples/mandelbrot-runtime-pprof/cpu.pprof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-runtime-pprof/cpu.pprof -------------------------------------------------------------------------------- /examples/mandelbrot-runtime-pprof/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | // START OMIT 16 | 17 | import "runtime/pprof" 18 | 19 | func main() { 20 | pprof.StartCPUProfile(os.Stdout) 21 | defer pprof.StopCPUProfile() 22 | // END OMIT 23 | 24 | var ( 25 | height = flag.Int("h", 1024, "height of the output image in pixels") 26 | width = flag.Int("w", 1024, "width of the output image in pixels") 27 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 28 | workers = flag.Int("workers", 1, "number of workers to use") 29 | ) 30 | flag.Parse() 31 | 32 | const output = "mandelbrot.png" 33 | 34 | // open a new file 35 | f, err := os.Create(output) 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | 40 | // create the image 41 | c := make([][]color.RGBA, *height) 42 | for i := range c { 43 | c[i] = make([]color.RGBA, *width) 44 | } 45 | 46 | img := &img{ 47 | h: *height, 48 | w: *width, 49 | m: c, 50 | } 51 | 52 | switch *mode { 53 | case "seq": 54 | seqFillImg(img) 55 | case "px": 56 | oneToOneFillImg(img) 57 | case "row": 58 | onePerRowFillImg(img) 59 | case "workers": 60 | nWorkersFillImg(img, *workers) 61 | default: 62 | panic("unknown mode") 63 | } 64 | 65 | // and encoding it 66 | if err := png.Encode(f, img); err != nil { 67 | log.Fatal(err) 68 | } 69 | } 70 | 71 | type img struct { 72 | h, w int 73 | m [][]color.RGBA 74 | } 75 | 76 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 77 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 78 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 79 | 80 | func seqFillImg(m *img) { 81 | for i, row := range m.m { 82 | for j := range row { 83 | fillPixel(m, i, j) 84 | } 85 | } 86 | } 87 | 88 | func oneToOneFillImg(m *img) { 89 | var wg sync.WaitGroup 90 | wg.Add(m.h * m.w) 91 | for i, row := range m.m { 92 | for j := range row { 93 | go func(i, j int) { 94 | fillPixel(m, i, j) 95 | wg.Done() 96 | }(i, j) 97 | } 98 | } 99 | wg.Wait() 100 | } 101 | 102 | func onePerRowFillImg(m *img) { 103 | var wg sync.WaitGroup 104 | wg.Add(m.h) 105 | for i := range m.m { 106 | go func(i int) { 107 | for j := range m.m[i] { 108 | fillPixel(m, i, j) 109 | } 110 | wg.Done() 111 | }(i) 112 | } 113 | wg.Wait() 114 | } 115 | 116 | func nWorkersFillImg(m *img, workers int) { 117 | c := make(chan struct{ i, j int }) 118 | for i := 0; i < workers; i++ { 119 | go func() { 120 | for t := range c { 121 | fillPixel(m, t.i, t.j) 122 | } 123 | }() 124 | } 125 | 126 | for i, row := range m.m { 127 | for j := range row { 128 | c <- struct{ i, j int }{i, j} 129 | } 130 | } 131 | close(c) 132 | } 133 | 134 | func fillPixel(m *img, x, y int) { 135 | const n = 1000 136 | const Limit = 2.0 137 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 138 | Cr := (2*float64(x)/float64(n) - 1.5) 139 | Ci := (2*float64(y)/float64(n) - 1.0) 140 | 141 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 142 | Zi = 2*Zr*Zi + Ci 143 | Zr = Tr - Ti + Cr 144 | Tr = Zr * Zr 145 | Ti = Zi * Zi 146 | } 147 | paint(&m.m[x][y], Tr, Ti) 148 | } 149 | 150 | func paint(c *color.RGBA, x, y float64) { 151 | n := byte(x * y * 2) 152 | c.R, c.G, c.B, c.A = n, n, n, 255 153 | } 154 | -------------------------------------------------------------------------------- /examples/mandelbrot-trace/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | // START OMIT 16 | 17 | import "github.com/pkg/profile" 18 | 19 | func main() { 20 | defer profile.Start(profile.TraceProfile, profile.ProfilePath(".")).Stop() 21 | // END OMIT 22 | 23 | var ( 24 | height = flag.Int("h", 1024, "height of the output image in pixels") 25 | width = flag.Int("w", 1024, "width of the output image in pixels") 26 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 27 | workers = flag.Int("workers", 1, "number of workers to use") 28 | ) 29 | flag.Parse() 30 | 31 | const output = "mandelbrot.png" 32 | 33 | // open a new file 34 | f, err := os.Create(output) 35 | if err != nil { 36 | log.Fatal(err) 37 | } 38 | 39 | // create the image 40 | c := make([][]color.RGBA, *height) 41 | for i := range c { 42 | c[i] = make([]color.RGBA, *width) 43 | } 44 | 45 | img := &img{ 46 | h: *height, 47 | w: *width, 48 | m: c, 49 | } 50 | 51 | switch *mode { 52 | case "seq": 53 | seqFillImg(img) 54 | case "px": 55 | oneToOneFillImg(img) 56 | case "row": 57 | onePerRowFillImg(img) 58 | case "workers": 59 | nWorkersFillImg(img, *workers) 60 | default: 61 | panic("unknown mode") 62 | } 63 | 64 | // and encoding it 65 | if err := png.Encode(f, img); err != nil { 66 | log.Fatal(err) 67 | } 68 | } 69 | 70 | type img struct { 71 | h, w int 72 | m [][]color.RGBA 73 | } 74 | 75 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 76 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 77 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 78 | 79 | // SEQSTART OMIT 80 | func seqFillImg(m *img) { 81 | for i, row := range m.m { 82 | for j := range row { 83 | fillPixel(m, i, j) 84 | } 85 | } 86 | } 87 | 88 | // SEQEND OMIT 89 | 90 | func oneToOneFillImg(m *img) { 91 | var wg sync.WaitGroup 92 | wg.Add(m.h * m.w) 93 | for i, row := range m.m { 94 | for j := range row { 95 | go func(i, j int) { 96 | fillPixel(m, i, j) 97 | wg.Done() 98 | }(i, j) 99 | } 100 | } 101 | wg.Wait() 102 | } 103 | 104 | func onePerRowFillImg(m *img) { 105 | var wg sync.WaitGroup 106 | wg.Add(m.h) 107 | for i := range m.m { 108 | go func(i int) { 109 | for j := range m.m[i] { 110 | fillPixel(m, i, j) 111 | } 112 | wg.Done() 113 | }(i) 114 | } 115 | wg.Wait() 116 | } 117 | 118 | func nWorkersFillImg(m *img, workers int) { 119 | c := make(chan struct{ i, j int }) 120 | for i := 0; i < workers; i++ { 121 | go func() { 122 | for t := range c { 123 | fillPixel(m, t.i, t.j) 124 | } 125 | }() 126 | } 127 | 128 | for i, row := range m.m { 129 | for j := range row { 130 | c <- struct{ i, j int }{i, j} 131 | } 132 | } 133 | close(c) 134 | } 135 | 136 | func fillPixel(m *img, x, y int) { 137 | const n = 1000 138 | const Limit = 2.0 139 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 140 | Cr := (2*float64(x)/float64(n) - 1.5) 141 | Ci := (2*float64(y)/float64(n) - 1.0) 142 | 143 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 144 | Zi = 2*Zr*Zi + Ci 145 | Zr = Tr - Ti + Cr 146 | Tr = Zr * Zr 147 | Ti = Zi * Zi 148 | } 149 | paint(&m.m[x][y], Tr, Ti) 150 | } 151 | 152 | func paint(c *color.RGBA, x, y float64) { 153 | n := byte(x * y * 2) 154 | c.R, c.G, c.B, c.A = n, n, n, 255 155 | } 156 | -------------------------------------------------------------------------------- /examples/mandelbrot-trace/px.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-trace/px.out -------------------------------------------------------------------------------- /examples/mandelbrot-trace/row.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-trace/row.out -------------------------------------------------------------------------------- /examples/mandelbrot-trace/seq.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-trace/seq.out -------------------------------------------------------------------------------- /examples/mandelbrot-trace/workers1.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-trace/workers1.out -------------------------------------------------------------------------------- /examples/mandelbrot-trace/workers4.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot-trace/workers4.out -------------------------------------------------------------------------------- /examples/mandelbrot/mandelbrot.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "image" 8 | "image/color" 9 | "image/png" 10 | "log" 11 | "os" 12 | "sync" 13 | ) 14 | 15 | func main() { 16 | var ( 17 | height = flag.Int("h", 1024, "height of the output image in pixels") 18 | width = flag.Int("w", 1024, "width of the output image in pixels") 19 | mode = flag.String("mode", "seq", "mode: seq, px, row, workers") 20 | workers = flag.Int("workers", 1, "number of workers to use") 21 | ) 22 | flag.Parse() 23 | 24 | const output = "mandelbrot.png" 25 | 26 | // open a new file 27 | f, err := os.Create(output) 28 | if err != nil { 29 | log.Fatal(err) 30 | } 31 | 32 | // create the image 33 | c := make([][]color.RGBA, *height) 34 | for i := range c { 35 | c[i] = make([]color.RGBA, *width) 36 | } 37 | 38 | img := &img{ 39 | h: *height, 40 | w: *width, 41 | m: c, 42 | } 43 | 44 | switch *mode { 45 | case "seq": 46 | seqFillImg(img) 47 | case "px": 48 | oneToOneFillImg(img) 49 | case "row": 50 | onePerRowFillImg(img) 51 | case "workers": 52 | nWorkersFillImg(img, *workers) 53 | default: 54 | panic("unknown mode") 55 | } 56 | 57 | // and encoding it 58 | if err := png.Encode(f, img); err != nil { 59 | log.Fatal(err) 60 | } 61 | } 62 | 63 | type img struct { 64 | h, w int 65 | m [][]color.RGBA 66 | } 67 | 68 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 69 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 70 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 71 | 72 | func seqFillImg(m *img) { 73 | for i, row := range m.m { 74 | for j := range row { 75 | fillPixel(m, i, j) 76 | } 77 | } 78 | } 79 | 80 | func oneToOneFillImg(m *img) { 81 | var wg sync.WaitGroup 82 | wg.Add(m.h * m.w) 83 | for i, row := range m.m { 84 | for j := range row { 85 | go func(i, j int) { 86 | fillPixel(m, i, j) 87 | wg.Done() 88 | }(i, j) 89 | } 90 | } 91 | wg.Wait() 92 | } 93 | 94 | func onePerRowFillImg(m *img) { 95 | var wg sync.WaitGroup 96 | wg.Add(m.h) 97 | for i := range m.m { 98 | go func(i int) { 99 | for j := range m.m[i] { 100 | fillPixel(m, i, j) 101 | } 102 | wg.Done() 103 | }(i) 104 | } 105 | wg.Wait() 106 | } 107 | 108 | func nWorkersFillImg(m *img, workers int) { 109 | c := make(chan struct{ i, j int }) 110 | for i := 0; i < workers; i++ { 111 | go func() { 112 | for t := range c { 113 | fillPixel(m, t.i, t.j) 114 | } 115 | }() 116 | } 117 | 118 | for i, row := range m.m { 119 | for j := range row { 120 | c <- struct{ i, j int }{i, j} 121 | } 122 | } 123 | close(c) 124 | } 125 | 126 | func fillPixel(m *img, x, y int) { 127 | const n = 1000 128 | const Limit = 2.0 129 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 130 | Cr := (2*float64(x)/float64(n) - 1.5) 131 | Ci := (2*float64(y)/float64(n) - 1.0) 132 | 133 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 134 | Zi = 2*Zr*Zi + Ci 135 | Zr = Tr - Ti + Cr 136 | Tr = Zr * Zr 137 | Ti = Zi * Zi 138 | } 139 | paint(&m.m[x][y], Tr, Ti) 140 | } 141 | 142 | func paint(c *color.RGBA, x, y float64) { 143 | n := byte(x * y * 2) 144 | c.R, c.G, c.B, c.A = n, n, n, 255 145 | } 146 | -------------------------------------------------------------------------------- /examples/mandelbrot/mandelbrot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelbrot/mandelbrot.png -------------------------------------------------------------------------------- /examples/mandelweb/mandelweb.go: -------------------------------------------------------------------------------- 1 | // mandelbrot example code adapted from Francesc Campoy's mandelbrot package. 2 | // https://github.com/campoy/mandelbrot 3 | package main 4 | 5 | import ( 6 | "image" 7 | "image/color" 8 | "image/png" 9 | "log" 10 | "runtime" 11 | "sync" 12 | "time" 13 | 14 | "net/http" 15 | _ "net/http/pprof" 16 | ) 17 | 18 | func main() { 19 | http.HandleFunc("/mandelbrot", mandelbrot) 20 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 21 | http.Redirect(w, r, "/mandelbrot", http.StatusPermanentRedirect) 22 | }) 23 | log.Println("listening on http://127.0.0.1:8080/") 24 | http.ListenAndServe(":8080", logRequest(http.DefaultServeMux)) 25 | } 26 | 27 | func logRequest(h http.Handler) http.Handler { 28 | return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 29 | start := time.Now() 30 | h.ServeHTTP(w, req) 31 | log.Println(req.RemoteAddr, req.RequestURI, time.Since(start)) 32 | }) 33 | } 34 | 35 | func mandelbrot(w http.ResponseWriter, req *http.Request) { 36 | const height, width = 512, 512 37 | c := make([][]color.RGBA, height) 38 | for i := range c { 39 | c[i] = make([]color.RGBA, width) 40 | } 41 | img := &img{h: height, w: width, m: c} 42 | 43 | fillImage(img, runtime.NumCPU()) 44 | png.Encode(w, img) 45 | } 46 | 47 | type img struct { 48 | h, w int 49 | m [][]color.RGBA 50 | } 51 | 52 | func (m *img) At(x, y int) color.Color { return m.m[x][y] } 53 | func (m *img) ColorModel() color.Model { return color.RGBAModel } 54 | func (m *img) Bounds() image.Rectangle { return image.Rect(0, 0, m.h, m.w) } 55 | 56 | func fillImage(m *img, workers int) { 57 | c := make(chan int, m.h) 58 | var wg sync.WaitGroup 59 | wg.Add(workers) 60 | for i := 0; i < workers; i++ { 61 | go func() { 62 | defer wg.Done() 63 | for i := range c { 64 | for j := range m.m[i] { 65 | fillPixel(m, i, j) 66 | } 67 | } 68 | }() 69 | } 70 | 71 | for i := range m.m { 72 | c <- i 73 | } 74 | close(c) 75 | wg.Wait() 76 | } 77 | 78 | func fillPixel(m *img, x, y int) { 79 | const n = 1000 80 | const Limit = 2.0 81 | const Zoom = 4 82 | Zr, Zi, Tr, Ti := 0.0, 0.0, 0.0, 0.0 83 | Cr := (Zoom*float64(x)/float64(n) - 1.5) 84 | Ci := (Zoom*float64(y)/float64(n) - 1.0) 85 | 86 | for i := 0; i < n && (Tr+Ti <= Limit*Limit); i++ { 87 | Zi = 2*Zr*Zi + Ci 88 | Zr = Tr - Ti + Cr 89 | Tr = Zr * Zr 90 | Ti = Zi * Zi 91 | } 92 | paint(&m.m[x][y], Tr, Ti) 93 | } 94 | 95 | func paint(c *color.RGBA, x, y float64) { 96 | n := byte(x * y * 2) 97 | c.R, c.G, c.B, c.A = n, n, n, 255 98 | } 99 | -------------------------------------------------------------------------------- /examples/mandelweb/trace-1qps.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelweb/trace-1qps.out -------------------------------------------------------------------------------- /examples/mandelweb/trace-5qps.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/mandelweb/trace-5qps.out -------------------------------------------------------------------------------- /examples/ping-pong/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | "time" 20 | 21 | "github.com/pkg/profile" 22 | ) 23 | 24 | type Ball struct{ hits int } 25 | 26 | func main() { 27 | defer profile.Start(profile.TraceProfile).Stop() 28 | table := make(chan *Ball) 29 | go player("ping", table) 30 | go player("pong", table) 31 | 32 | table <- new(Ball) // game on; toss the ball 33 | time.Sleep(1 * time.Second) 34 | <-table // game over; grab the ball 35 | } 36 | 37 | func player(name string, table chan *Ball) { 38 | for { 39 | ball := <-table 40 | ball.hits++ 41 | fmt.Println(name, ball.hits) 42 | time.Sleep(100 * time.Millisecond) 43 | table <- ball 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /examples/sieve/main.go: -------------------------------------------------------------------------------- 1 | // Adapted from https://golang.org/doc/play/sieve.go 2 | // Copywrite the Go authors, 2009 3 | 4 | // A concurrent prime sieve 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | 10 | "github.com/pkg/profile" 11 | ) 12 | 13 | // Send the sequence 2, 3, 4, ... to channel 'ch'. 14 | func Generate(ch chan<- int) { 15 | for i := 2; ; i++ { 16 | ch <- i // Send 'i' to channel 'ch'. 17 | } 18 | } 19 | 20 | // Copy the values from channel 'in' to channel 'out', 21 | // removing those divisible by 'prime'. 22 | func Filter(in <-chan int, out chan<- int, prime int) { 23 | for { 24 | i := <-in // Receive value from 'in'. 25 | if i%prime != 0 { 26 | out <- i // Send 'i' to 'out'. 27 | } 28 | } 29 | } 30 | 31 | // The prime sieve: Daisy-chain Filter processes. 32 | func main() { 33 | defer profile.Start(profile.TraceProfile, profile.ProfilePath(".")).Stop() 34 | ch := make(chan int) // Create a new channel. 35 | go Generate(ch) // Launch Generate goroutine. 36 | for i := 0; i < 10; i++ { 37 | prime := <-ch 38 | fmt.Println(prime) 39 | ch1 := make(chan int) 40 | go Filter(ch, ch1, prime) 41 | ch = ch1 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /examples/sieve/trace.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/examples/sieve/trace.out -------------------------------------------------------------------------------- /understanding-the-execution-tracer.slide: -------------------------------------------------------------------------------- 1 | Understanding the Go execution tracer 2 | Novosibirsk 3 | 24 Sep 2017 4 | 5 | Dave Cheney 6 | dave@cheney.net 7 | http://dave.cheney.net/ 8 | @davecheney 9 | 10 | * License and Materials 11 | 12 | This presentation is a collaboration between [[https://twitter.com/davecheney][David Cheney]] and [[https://twitter.com/francesc][Francesc Campoy]]. 13 | 14 | This presentation is licensed under the [[https://creativecommons.org/licenses/by-sa/4.0/][Creative Commons Attribution-ShareAlike 4.0 International]] licence. 15 | 16 | The materials for this presentation are available on GitHub: 17 | 18 | .link https://github.com/davecheney/understanding-the-execution-tracer 19 | 20 | The code samples are licensed by, and copywrite of, their respective authors. 21 | 22 | You are encouraged to remix, transform, or build upon the material, providing you give appropriate credit and distribute your contributions under the same license. 23 | 24 | If you have suggestions or corrections to this presentation, please raise [[https://github.com/davecheney/understanding-the-execution-tracer/issues][an issue on the GitHub project]]. 25 | 26 | * Introduction 27 | 28 | This is a presentation about the Go execution tracer. 29 | 30 | The execution tracer was developed by [[https://github.com/dvyukov][Dmitry Vyukov]] for Go 1.5 and remained under documented, and under utilised, until last year. 31 | 32 | Unlike sample based profiling, the execution tracer is integrated into the Go runtime, so it does just know what a Go program is doing at a particular point in time, but _why_. 33 | 34 | * Agenda 35 | 36 | This workshop is aimed at developers who are looking to improve the performance, or diagnose perfromance issues, in their Go applications. 37 | 38 | - What is execution tracing, why do we need it? 39 | - Excution tracer basics. 40 | - Diagnosis with the execution tracer. 41 | - Conclusion. 42 | 43 | After each section we'll have time for questions. 44 | 45 | * One more thing ... 46 | 47 | This is a workshop, not a lecture. It's 💯 to ask questions. 48 | 49 | If you don't understand something, or think what you're hearing is incorrect, please speak up. 50 | 51 | * What is the execution tracer, why do we need it? 52 | 53 | * What is the execution tracer, why do we need it? 54 | 55 | I think its easiest to explain what the execution tracer does, and why it's important by looking at a piece of code where the profiler, `go`tool`pprof` performs poorly. 56 | 57 | * mandelbrot.go (example) 58 | 59 | The `examples/mandelbrot` directory contains a simple mandelbrot generator. This code is derived from [[https://github.com/campoy/mandelbrot][Francesc Campoy's mandelbrot package]]. 60 | 61 | cd examples/mandelbrot 62 | go build && ./mandelbrot 63 | 64 | If we build it, then run it, it generates something like this 65 | 66 | .image examples/mandelbrot/mandelbrot.png _ 320 67 | 68 | * How long does it take? 69 | 70 | So, how long does this program take to generate a 1024 x 1024 pixel image? 71 | 72 | The simplest way I know how to do this is to use something like `time(1)`. 73 | 74 | % time ./mandelbrot 75 | real 0m1.654s 76 | user 0m1.630s 77 | sys 0m0.015s 78 | 79 | _Note:_ Don't use `time`go`run`mandebrot.go` or you'll time how long it takes to _compile_ the program as well as run it. 80 | 81 | * What is the program doing? 82 | 83 | So, in this example the program took 1.6 seconds to generate the mandelbrot and write to to a png. 84 | 85 | Is that good? Could we make it faster? 86 | 87 | One way to answer that question would be to use Go's built in pprof support to profile the program. 88 | 89 | Let's try that. 90 | 91 | * Generating the profile 92 | 93 | To turn generate a profile we need to either 94 | 95 | 1. Use the `runtime/pprof` package directly. 96 | 2. Use a wrapper like `github.com/pkg/profile` to automate this. 97 | 98 | * Generating a profile with runtime/pprof 99 | 100 | To show you that there's no magic, let's modify the program to write a CPU profile to `os.Stdout`. 101 | 102 | .code examples/mandelbrot-runtime-pprof/mandelbrot.go /START OMIT/,/END OMIT/ 103 | 104 | By adding this code to the top of the `main` function, this program will write a profile to `os.Stdout`. 105 | 106 | cd examples/mandelbrot-runtime-pprof 107 | go run mandelbrot.go > cpu.pprof 108 | 109 | _Note_: We can use `go`run` in this case because the cpu profile will only include the execution of `mandelbrot.go`, not its compilation. 110 | 111 | * Generating a profile with github.com/pkg/profile 112 | 113 | The previous slide showed a super cheap way to generate a profile, but it has a few problems. 114 | 115 | - If you forget to redirect the output to a file then you'll blow up that terminal session. 😞 (hint: `reset(1)` is your friend) 116 | - If you write anything else to `os.Stdout`, for example, `fmt.Println` you'll corrupt the trace. 117 | 118 | The recommended way to use `runtime/pprof` is to [[https://godoc.org/runtime/pprof#hdr-Profiling_a_Go_program][write the trace to a file]]. But, then you have to make sure the trace is stopped, and file is closed before your program stops, including if someone `^C`'s it. 119 | 120 | * Generating a profile with github.com/pkg/profile (cont.) 121 | 122 | So, a few years ago I wrote a [[https://godoc.org/github.gom/pkg/profile][package]] to take care of it. 123 | 124 | .code examples/mandelbrot-pkg-profile/mandelbrot.go /START OMIT/,/END OMIT/ 125 | 126 | If we run this version, we get a profile written to the current working directory 127 | 128 | go run mandelbrot.go 129 | 2017/09/17 12:22:06 profile: cpu profiling enabled, cpu.pprof 130 | 2017/09/17 12:22:08 profile: cpu profiling disabled, cpu.pprof 131 | 132 | _Note_: Using `pkg/profile` is not mandatory, but it takes care of a lot of the boilerplate around collecting and recording traces, so we'll use it for the rest of this workshop. 133 | 134 | * Analysing the profile 135 | 136 | Now we have a profile, we can use `go`tool`pprof` to analyse it. 137 | 138 | % go tool pprof cpu.pprof 139 | Type: cpu 140 | Time: Sep 17, 2017 at 12:22pm (AEST) 141 | Duration: 1.81s, Total samples = 1.53s (84.33%) 142 | Entering interactive mode (type "help" for commands, "o" for options) 143 | (pprof) 144 | 145 | In this run we see that the program ran for 1.81s seconds (profiling adds a small overhead). We can also see that pprof only captured data for 1.53 seconds, as pprof is sample based, relying on the operating system's `SIGPROF` timer. 146 | 147 | _Note_: Since Go 1.9 the `pprof` trace contains all the information you need to analyse the trace. You no longer need to also have the matching binary which produced the trace. 🎉 148 | 149 | * Analysing the profile (cont.) 150 | 151 | We can use the `top` pprof function to sort functions recorded by the trace 152 | 153 | (pprof) top 154 | Showing nodes accounting for 1.53s, 100% of 1.53s total 155 | Showing top 10 nodes out of 57 156 | flat flat% sum% cum cum% 157 | 1s 65.36% 65.36% 1s 65.36% main.paint /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go (inline) 158 | 0.27s 17.65% 83.01% 1.27s 83.01% main.fillPixel /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go 159 | 0.13s 8.50% 91.50% 0.16s 10.46% runtime.mallocgc /Users/dfc/go/src/runtime/malloc.go 160 | 0.05s 3.27% 94.77% 0.05s 3.27% syscall.Syscall /Users/dfc/go/src/syscall/asm_darwin_amd64.s 161 | 0.02s 1.31% 96.08% 0.02s 1.31% runtime.memmove /Users/dfc/go/src/runtime/memmove_amd64.s 162 | 0.02s 1.31% 97.39% 0.02s 1.31% runtime.mmap /Users/dfc/go/src/runtime/sys_darwin_amd64.s 163 | 0.01s 0.65% 98.04% 0.07s 4.58% compress/flate.(*compressor).deflate /Users/dfc/go/src/compress/flate/deflate.go 164 | 0.01s 0.65% 98.69% 0.01s 0.65% compress/flate.matchLen /Users/dfc/go/src/compress/flate/deflate.go 165 | 0.01s 0.65% 99.35% 0.01s 0.65% image/png.filter /Users/dfc/go/src/image/png/writer.go 166 | 0.01s 0.65% 100% 0.01s 0.65% runtime.memclrNoHeapPointers /Users/dfc/go/src/runtime/memclr_amd64.s 167 | 168 | We see that the [[examples/mandelbrot-pkg-profile/mandelbrot.go][`main.paint`]] function was on the CPU the most when pprof captured the stack. 169 | 170 | * Analysing the profile (cont.) 171 | 172 | Finding `main.paint` on the stack isn't a surprise, this is what the program does; it paints pixels. But what is causing `paint` to spend so much time? We can check that with the _cummulative_ flag to `top`. 173 | 174 | (pprof) top --cum 175 | Showing nodes accounting for 1270ms, 83.01% of 1530ms total 176 | Showing top 10 nodes out of 57 177 | flat flat% sum% cum cum% 178 | 0 0% 0% 1530ms 100% main.main /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go 179 | 0 0% 0% 1530ms 100% runtime.main /Users/dfc/go/src/runtime/proc.go 180 | 270ms 17.65% 17.65% 1270ms 83.01% main.fillPixel /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go 181 | 0 0% 17.65% 1270ms 83.01% main.seqFillImg /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go 182 | 1000ms 65.36% 83.01% 1000ms 65.36% main.paint /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go (inline) 183 | 0 0% 83.01% 260ms 16.99% image/png.(*Encoder).Encode /Users/dfc/go/src/image/png/writer.go 184 | 0 0% 83.01% 260ms 16.99% image/png.Encode /Users/dfc/go/src/image/png/writer.go 185 | 0 0% 83.01% 190ms 12.42% image/png.(*encoder).writeIDATs /Users/dfc/go/src/image/png/writer.go 186 | 0 0% 83.01% 190ms 12.42% image/png.(*encoder).writeImage /Users/dfc/go/src/image/png/writer.go 187 | 0 0% 83.01% 180ms 11.76% main.(*img).At /Users/dfc/devel/understanding-the-execution-tracer/examples/mandelbrot-pkg-profile/mandelbrot.go 188 | 189 | This is sort of suggesting that [[examples/mandelbrot-pkg-profile/mandelbrot.go][`main.fillPixed`]] is actually doing most of the work. 190 | 191 | _Note_: You can also visualise the profile with the `web` command, which looks [[examples/mandelbrot-pkg-profile/cpu.svg][like this]]. 192 | 193 | * Tracing vs Profiling 194 | 195 | Hopefully this example shows the limitations of profiling. Profiling told us what it saw, `fillPixel` was doing all the work, and there didn't look like there was much that could be done about that. 196 | 197 | So now it's a good time to introduce the execution tracer which gives a different view of the same program. 198 | 199 | * Using the execution tracer 200 | 201 | Using the tracer is as simple as asking for a `profile.TraceProfile`, nothing else changes. 202 | 203 | .code examples/mandelbrot-trace/mandelbrot.go /START OMIT/,/END OMIT/ 204 | 205 | When we run the program, we get a `trace.out` file in the current working directory. 206 | 207 | % go build mandelbrot.go 208 | % % time ./mandelbrot 209 | 2017/09/17 13:19:10 profile: trace enabled, trace.out 210 | 2017/09/17 13:19:12 profile: trace disabled, trace.out 211 | 212 | real 0m1.740s 213 | user 0m1.707s 214 | sys 0m0.020s 215 | 216 | * Using the execution tracer (cont.) 217 | 218 | Just like pprof, there is a tool in the `go` command to analyse the trace. 219 | 220 | % go tool trace trace.out 221 | 2017/09/17 12:41:39 Parsing trace... 222 | 2017/09/17 12:41:40 Serializing trace... 223 | 2017/09/17 12:41:40 Splitting trace... 224 | 2017/09/17 12:41:40 Opening browser. Trace viewer is listening on http://127.0.0.1:57842 225 | 226 | * Analysing the trace 227 | 228 | We can see from the trace that the program is only using one cpu. 229 | 230 | .code examples/mandelbrot-trace/mandelbrot.go /SEQSTART OMIT/,/SEQEND OMIT/ 231 | 232 | This isn't a surprise, by default `mandelbrot.go` calls `fillPixel` for each pixel in each row in sequence. 233 | 234 | Once the image is painted, see the execution switches to writing the `.png` file. This generates garbage on the heap, and so the 235 | 236 | The trace profile offers timing resolution down to the _microsecond_ level. This is something you just can't get with external profiling. 237 | 238 | * go tool trace 239 | 240 | Before we go on there are some things we should talk about the usage of the trace tool. 241 | 242 | - The tool uses the javascript debugging support built into Chrome. Trace profiles can only be viewed in Chrome, they won't work in Firefox, Safari, IE/Edge. Sorry. 243 | - Because this is a Google product, it supports keyboard shortcuts; use `WASD` to navigate, use `?` to get a list. 244 | - Viewing traces can take a *lot* of memory. Seriously, 4Gb won't cut it, 8Gb is probably the minimum, more is definitely better. 245 | - If you've installed Go from an OS distribution like Fedora, the support files for the trace viewer may not be part of the main `golang` deb/rpm, they might be in some `-extra` package. 246 | 247 | * Using more than one CPU 248 | 249 | We saw from the previous trace that the program is running sequentially and not taking advantage of the other CPUs on this machine. 250 | 251 | Mandelbrot generation is known as _embarassingly_parallel_. Each pixel is independant of any other, they could all be computed in parallel. So, let's try that. 252 | 253 | % go build mandelbrot.go 254 | % time ./mandelbrot -mode px 255 | 2017/09/17 13:19:48 profile: trace enabled, trace.out 256 | 2017/09/17 13:19:50 profile: trace disabled, trace.out 257 | 258 | real 0m1.764s 259 | user 0m4.031s 260 | sys 0m0.865s 261 | 262 | So the runtime was basically the same. There was more user time, which makes sense, we were using all the CPUs, but the real (wall clock) time was about the same. 263 | 264 | Let's look a the trace. 265 | 266 | * Using more than on CPU (cont.) 267 | 268 | As you can see this trace generated _much_ more data. 269 | 270 | - It looks like lots of work is being done, but if you zoom right in, there are gaps. This is believed to be the scheduler. 271 | - While we're using all four cores, because each `fillPixel` is a relatively small amount of work, we're spending a lot of time in scheduling overhead. 272 | 273 | * Batching up work 274 | 275 | Using one goroutine per pixel was too fine grained. There wasn't enough work to justify the cost of the goroutine. 276 | 277 | Instead, let's try processing one row per goroutine. 278 | 279 | % go build mandelbrot.go 280 | % time ./mandelbrot -mode row 281 | 2017/09/17 13:41:55 profile: trace enabled, trace.out 282 | 2017/09/17 13:41:55 profile: trace disabled, trace.out 283 | 284 | real 0m0.764s 285 | user 0m1.907s 286 | sys 0m0.025s 287 | 288 | This looks like a good improvement, we almost halved the runtime of the program. Let's look at the trace 289 | 290 | * Batching up work (cont.) 291 | 292 | As you can see the trace is now smaller and easier to work with. We get to see the whole trace in span, which is a nice bonus. 293 | 294 | - At the start of the program we see the number of goroutines ramp up to around 1,000. This is an improvement over the 1 << 20 that we saw in the previous trace. 295 | - Zooming in we see `onePerRowFillImg` runs for longer, and as the goroutine _producing_ work is done early, the scheduler efficiently works through the remaining runnable goroutines. 296 | 297 | * Using workers 298 | 299 | `mandelbrot.go` supports one other mode, let's try it. 300 | 301 | % go build mandelbrot.go 302 | % time ./mandelbrot -mode workers 303 | 2017/09/17 13:49:46 profile: trace enabled, trace.out 304 | 2017/09/17 13:49:50 profile: trace disabled, trace.out 305 | 306 | real 0m4.207s 307 | user 0m4.459s 308 | sys 0m1.284s 309 | 310 | So, the runtime was much worse than any previous. Let's look at the trace and see if we can figure out what happened. 311 | 312 | * Using workers (cont.) 313 | 314 | Looking at the trace you can see that with only one worker process the producer and consumer tend to alternate because there is only one worker and one consumer. Let's increase the number of workers 315 | 316 | % go build mandelbrot.go 317 | % time ./mandelbrot -mode workers -workers 4 318 | 2017/09/17 13:52:51 profile: trace enabled, trace.out 319 | 2017/09/17 13:52:57 profile: trace disabled, trace.out 320 | 321 | real 0m5.528s 322 | user 0m7.307s 323 | sys 0m4.311s 324 | 325 | So that made it worse! More real time, more CPU time. Let's look at the trace to see what happened. 326 | 327 | * Using workers (cont.) 328 | 329 | That trace was a mess. There were more workers available, but the seemed to spend all their time fighting over the work to do. 330 | 331 | This is because the channel is unbuffered. An unbuffered channel cannot send until there is someone ready to receive. 332 | 333 | - The producer cannot send work until there is a worker ready to receive it. 334 | - Workers cannot receive work until there is someone ready to send, so they compete with each other when they are waiting. 335 | - The sender is not privileged, it cannot take priority over a worker that is already running. 336 | 337 | What we see here is a lot of latency introduced by the unbuffered channel. There are lots of stops and starts inside the scheduler, and potentially locks and mutexes while waiting for work, this is why we see the `sys` time higher. 338 | 339 | * Using buffered channels 340 | 341 | .code examples/mandelbrot-buffered/mandelbrot.go /BUFSTART OMIT/,/BUFEND OMIT/ 342 | 343 | % go build mandelbrot.go 344 | % time ./mandelbrot -mode workers -workers 4 345 | 2017/09/17 14:23:56 profile: trace enabled, trace.out 346 | 2017/09/17 14:23:57 profile: trace disabled, trace.out 347 | 348 | real 0m0.905s 349 | user 0m2.150s 350 | sys 0m0.121s 351 | 352 | Which is pretty close to the per row mode above. 353 | 354 | * Using buffered channels (cont.) 355 | 356 | Using a buffered channel the trace showed us that: 357 | 358 | - The producer doesn't have to wait for a worker to arrive, it can fill up the channel quickly 359 | 360 | Using this method we got nearly the same speed using a channel to hand off work per pixel than we did previously scheduling on goroutine per row. 361 | 362 | _Exercise_: modify `nWorkersFillImg` to work per row. Time the result and analyse the trace. 363 | 364 | * Break 365 | 366 | * Mandelbrot microservice 367 | 368 | It's 2017, generating Mandelbrots is pointless unless you can offer them on the internet as a web sevice. 369 | 370 | Thus, I present to you, _Mandelweb_ 371 | 372 | % go run examples/mandelweb/mandelweb.go 373 | 2017/09/17 15:29:21 listening on http://127.0.0.1:8080/ 374 | 375 | .link http://127.0.0.1:8080/mandelbrot 376 | 377 | * Tracing running applications 378 | 379 | In the previous example we ran the trace over the whole program. 380 | 381 | As you saw, traces can be very large, even for small amounts of time, so collecting trace data continually would generate far too much data. Also, tracing can have an impact on the speed of your program, especially if there is a lot of activity. 382 | 383 | What we want is a way to collect a short trace from a running program. 384 | 385 | Fortuntately, the `net/http/pprof` package has just such a facility. 386 | 387 | * Collecting traces via http 388 | 389 | Hopefully everyone knows about the `net/http/pprof` package. 390 | 391 | When imported 392 | 393 | import _ "net/http/pprof" 394 | 395 | It will register tracing and profiling routes with `http.DefaultServeMux`. Since Go 1.5 this includes the trace profiler. 396 | 397 | We can grab a five second trace from mandelweb with `curl` (or `wget`) 398 | 399 | curl -o trace.out http://127.0.0.1:8080/debug/pprof/trace?seconds=5 400 | 401 | * Generating some load 402 | 403 | The previous example was interesting, but an idle webserver has, by definition, no performance issues. We need to generate some load. For this I'm using [[https://github.com/rakyll/hey][`hey` by JBD]]. 404 | 405 | go get -u github.com/rakyll/hey 406 | 407 | Let's start with one request per second. 408 | 409 | hey -c 1 -n 1000 -q 1 http://127.0.0.1:8080/mandelbrot 410 | 411 | And with that running, in another window collect the trace 412 | 413 | % curl -o trace.out http://127.0.0.1:8080/debug/pprof/trace?seconds=5 414 | % Total % Received % Xferd Average Speed Time Time Time Current 415 | Dload Upload Total Spent Left Speed 416 | 100 66169 0 66169 0 0 13233 0 --:--:-- 0:00:05 --:--:-- 17390 417 | % go tool trace trace.out 418 | 2017/09/17 16:09:30 Parsing trace... 419 | 2017/09/17 16:09:30 Serializing trace... 420 | 2017/09/17 16:09:30 Splitting trace... 421 | 2017/09/17 16:09:30 Opening browser. Trace viewer is listening on http://127.0.0.1:60301 422 | 423 | * Simulating overload 424 | 425 | Let's increase the rate to 5 requests per second. 426 | 427 | hey -c 5 -n 1000 -q 5 http://127.0.0.1:8080/mandelbrot 428 | 429 | And with that running, in another window collect the trace 430 | 431 | % curl -o trace.out http://127.0.0.1:8080/debug/pprof/trace?seconds=5 432 | % Total % Received % Xferd Average Speed Time Time Time Current 433 | Dload Upload Total Spent Left Speed 434 | 100 66169 0 66169 0 0 13233 0 --:--:-- 0:00:05 --:--:-- 17390 435 | % go tool trace trace.out 436 | 2017/09/17 16:09:30 Parsing trace... 437 | 2017/09/17 16:09:30 Serializing trace... 438 | 2017/09/17 16:09:30 Splitting trace... 439 | 2017/09/17 16:09:30 Opening browser. Trace viewer is listening on http://127.0.0.1:60301 440 | 441 | * Extra credit 442 | 443 | * Sieve of Eratosthenes 444 | 445 | The concurrent [[examples/sieve/main.go][prime sieve]] is one of the first Go programs written. 446 | 447 | Ivan Daniluk [[http://divan.github.io/posts/go_concurrency_visualize/][wrote a great post on visualising]] it. 448 | 449 | Let's take a look at its operation using the execution tracer. 450 | 451 | * Conclusion 452 | 453 | That's it! Now you know everything about the execution tracer. 454 | 455 | * More resources 456 | 457 | - Rhys Hiltner, [[https://www.youtube.com/watch?v=mmqDlbWk_XA][Go's execution tracer]] (dotGo 2016) 458 | - Rhys Hiltner, [[https://www.youtube.com/watch?v=V74JnrGTwKA][An Introduction to "go tool trace"]] (GopherCon 2017) 459 | - Dave Cheney, [[https://www.youtube.com/watch?v=2h_NFBFrciI][Seven ways to profile Go programs]] (GolangUK 2016) 460 | - Dave Cheney, [[https://dave.cheney.net/training#high-performance-go][High performance Go workshop]] 461 | - Ivan Daniluk, [[https://www.youtube.com/watch?v=KyuFeiG3Y60][Visualizing Concurrency in Go]] (GopherCon 2016) 462 | - Kavya Joshi, [[https://www.youtube.com/watch?v=KBZlN0izeiY][Understanding Channels]] (GopherCon 2017) 463 | 464 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go_import_path: github.com/pkg/profile 3 | go: 4 | - 1.4.3 5 | - 1.5.2 6 | - 1.6.3 7 | - tip 8 | 9 | script: 10 | - go test github.com/pkg/profile 11 | - go test -race github.com/pkg/profile 12 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/AUTHORS: -------------------------------------------------------------------------------- 1 | Dave Cheney 2 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Dave Cheney. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/README.md: -------------------------------------------------------------------------------- 1 | profile 2 | ======= 3 | 4 | Simple profiling support package for Go 5 | 6 | [![Build Status](https://travis-ci.org/pkg/profile.svg?branch=master)](https://travis-ci.org/pkg/profile) [![GoDoc](http://godoc.org/github.com/pkg/profile?status.svg)](http://godoc.org/github.com/pkg/profile) 7 | 8 | 9 | installation 10 | ------------ 11 | 12 | go get github.com/pkg/profile 13 | 14 | usage 15 | ----- 16 | 17 | Enabling profiling in your application is as simple as one line at the top of your main function 18 | 19 | ```go 20 | import "github.com/pkg/profile" 21 | 22 | func main() { 23 | defer profile.Start().Stop() 24 | ... 25 | } 26 | ``` 27 | 28 | options 29 | ------- 30 | 31 | What to profile is controlled by config value passed to profile.Start. 32 | By default CPU profiling is enabled. 33 | 34 | ```go 35 | import "github.com/pkg/profile" 36 | 37 | func main() { 38 | // p.Stop() must be called before the program exits to 39 | // ensure profiling information is written to disk. 40 | p := profile.Start(profile.MemProfile, profile.ProfilePath("."), profile.NoShutdownHook) 41 | ... 42 | } 43 | ``` 44 | 45 | Several convenience package level values are provided for cpu, memory, and block (contention) profiling. 46 | 47 | For more complex options, consult the [documentation](http://godoc.org/github.com/pkg/profile). 48 | 49 | contributing 50 | ------------ 51 | 52 | We welcome pull requests, bug fixes and issue reports. 53 | 54 | Before proposing a change, please discuss it first by raising an issue. 55 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/cpu.pprof: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davecheney/understanding-the-execution-tracer/941abff7b736f5a1b5d9549dedf385666d69627f/vendor/github.com/pkg/profile/cpu.pprof -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/example_test.go: -------------------------------------------------------------------------------- 1 | package profile_test 2 | 3 | import ( 4 | "flag" 5 | "os" 6 | 7 | "github.com/pkg/profile" 8 | ) 9 | 10 | func ExampleStart() { 11 | // start a simple CPU profile and register 12 | // a defer to Stop (flush) the profiling data. 13 | defer profile.Start().Stop() 14 | } 15 | 16 | func ExampleCPUProfile() { 17 | // CPU profiling is the default profiling mode, but you can specify it 18 | // explicitly for completeness. 19 | defer profile.Start(profile.CPUProfile).Stop() 20 | } 21 | 22 | func ExampleMemProfile() { 23 | // use memory profiling, rather than the default cpu profiling. 24 | defer profile.Start(profile.MemProfile).Stop() 25 | } 26 | 27 | func ExampleMemProfileRate() { 28 | // use memory profiling with custom rate. 29 | defer profile.Start(profile.MemProfileRate(2048)).Stop() 30 | } 31 | 32 | func ExampleProfilePath() { 33 | // set the location that the profile will be written to 34 | defer profile.Start(profile.ProfilePath(os.Getenv("HOME"))).Stop() 35 | } 36 | 37 | func ExampleNoShutdownHook() { 38 | // disable the automatic shutdown hook. 39 | defer profile.Start(profile.NoShutdownHook).Stop() 40 | } 41 | 42 | func ExampleStart_withFlags() { 43 | // use the flags package to selectively enable profiling. 44 | mode := flag.String("profile.mode", "", "enable profiling mode, one of [cpu, mem, mutex, block]") 45 | flag.Parse() 46 | switch *mode { 47 | case "cpu": 48 | defer profile.Start(profile.CPUProfile).Stop() 49 | case "mem": 50 | defer profile.Start(profile.MemProfile).Stop() 51 | case "mutex": 52 | defer profile.Start(profile.MutexProfile).Stop() 53 | case "block": 54 | defer profile.Start(profile.BlockProfile).Stop() 55 | default: 56 | // do nothing 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/mutex.go: -------------------------------------------------------------------------------- 1 | // +build go1.8 2 | 3 | package profile 4 | 5 | import "runtime" 6 | 7 | func enableMutexProfile() { 8 | runtime.SetMutexProfileFraction(1) 9 | } 10 | 11 | func disableMutexProfile() { 12 | runtime.SetMutexProfileFraction(0) 13 | } 14 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/mutex17.go: -------------------------------------------------------------------------------- 1 | // +build !go1.8 2 | 3 | package profile 4 | 5 | // mock mutex support for Go 1.7 and earlier. 6 | 7 | func enableMutexProfile() {} 8 | 9 | func disableMutexProfile() {} 10 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/profile.go: -------------------------------------------------------------------------------- 1 | // Package profile provides a simple way to manage runtime/pprof 2 | // profiling of your Go application. 3 | package profile 4 | 5 | import ( 6 | "io/ioutil" 7 | "log" 8 | "os" 9 | "os/signal" 10 | "path/filepath" 11 | "runtime" 12 | "runtime/pprof" 13 | "sync/atomic" 14 | ) 15 | 16 | const ( 17 | cpuMode = iota 18 | memMode 19 | mutexMode 20 | blockMode 21 | traceMode 22 | ) 23 | 24 | // Profile represents an active profiling session. 25 | type Profile struct { 26 | // quiet suppresses informational messages during profiling. 27 | quiet bool 28 | 29 | // noShutdownHook controls whether the profiling package should 30 | // hook SIGINT to write profiles cleanly. 31 | noShutdownHook bool 32 | 33 | // mode holds the type of profiling that will be made 34 | mode int 35 | 36 | // path holds the base path where various profiling files are written. 37 | // If blank, the base path will be generated by ioutil.TempDir. 38 | path string 39 | 40 | // memProfileRate holds the rate for the memory profile. 41 | memProfileRate int 42 | 43 | // closer holds a cleanup function that run after each profile 44 | closer func() 45 | 46 | // stopped records if a call to profile.Stop has been made 47 | stopped uint32 48 | } 49 | 50 | // NoShutdownHook controls whether the profiling package should 51 | // hook SIGINT to write profiles cleanly. 52 | // Programs with more sophisticated signal handling should set 53 | // this to true and ensure the Stop() function returned from Start() 54 | // is called during shutdown. 55 | func NoShutdownHook(p *Profile) { p.noShutdownHook = true } 56 | 57 | // Quiet suppresses informational messages during profiling. 58 | func Quiet(p *Profile) { p.quiet = true } 59 | 60 | // CPUProfile enables cpu profiling. 61 | // It disables any previous profiling settings. 62 | func CPUProfile(p *Profile) { p.mode = cpuMode } 63 | 64 | // DefaultMemProfileRate is the default memory profiling rate. 65 | // See also http://golang.org/pkg/runtime/#pkg-variables 66 | const DefaultMemProfileRate = 4096 67 | 68 | // MemProfile enables memory profiling. 69 | // It disables any previous profiling settings. 70 | func MemProfile(p *Profile) { 71 | p.memProfileRate = DefaultMemProfileRate 72 | p.mode = memMode 73 | } 74 | 75 | // MemProfileRate enables memory profiling at the preferred rate. 76 | // It disables any previous profiling settings. 77 | func MemProfileRate(rate int) func(*Profile) { 78 | return func(p *Profile) { 79 | p.memProfileRate = rate 80 | p.mode = memMode 81 | } 82 | } 83 | 84 | // MutexProfile enables mutex profiling. 85 | // It disables any previous profiling settings. 86 | // 87 | // Mutex profiling is a no-op before go1.8. 88 | func MutexProfile(p *Profile) { p.mode = mutexMode } 89 | 90 | // BlockProfile enables block (contention) profiling. 91 | // It disables any previous profiling settings. 92 | func BlockProfile(p *Profile) { p.mode = blockMode } 93 | 94 | // Trace profile controls if execution tracing will be enabled. It disables any previous profiling settings. 95 | func TraceProfile(p *Profile) { p.mode = traceMode } 96 | 97 | // ProfilePath controls the base path where various profiling 98 | // files are written. If blank, the base path will be generated 99 | // by ioutil.TempDir. 100 | func ProfilePath(path string) func(*Profile) { 101 | return func(p *Profile) { 102 | p.path = path 103 | } 104 | } 105 | 106 | // Stop stops the profile and flushes any unwritten data. 107 | func (p *Profile) Stop() { 108 | if !atomic.CompareAndSwapUint32(&p.stopped, 0, 1) { 109 | // someone has already called close 110 | return 111 | } 112 | p.closer() 113 | atomic.StoreUint32(&started, 0) 114 | } 115 | 116 | // started is non zero if a profile is running. 117 | var started uint32 118 | 119 | // Start starts a new profiling session. 120 | // The caller should call the Stop method on the value returned 121 | // to cleanly stop profiling. 122 | func Start(options ...func(*Profile)) interface { 123 | Stop() 124 | } { 125 | if !atomic.CompareAndSwapUint32(&started, 0, 1) { 126 | log.Fatal("profile: Start() already called") 127 | } 128 | 129 | var prof Profile 130 | for _, option := range options { 131 | option(&prof) 132 | } 133 | 134 | path, err := func() (string, error) { 135 | if p := prof.path; p != "" { 136 | return p, os.MkdirAll(p, 0777) 137 | } 138 | return ioutil.TempDir("", "profile") 139 | }() 140 | 141 | if err != nil { 142 | log.Fatalf("profile: could not create initial output directory: %v", err) 143 | } 144 | 145 | logf := func(format string, args ...interface{}) { 146 | if !prof.quiet { 147 | log.Printf(format, args...) 148 | } 149 | } 150 | 151 | switch prof.mode { 152 | case cpuMode: 153 | fn := filepath.Join(path, "cpu.pprof") 154 | f, err := os.Create(fn) 155 | if err != nil { 156 | log.Fatalf("profile: could not create cpu profile %q: %v", fn, err) 157 | } 158 | logf("profile: cpu profiling enabled, %s", fn) 159 | pprof.StartCPUProfile(f) 160 | prof.closer = func() { 161 | pprof.StopCPUProfile() 162 | f.Close() 163 | logf("profile: cpu profiling disabled, %s", fn) 164 | } 165 | 166 | case memMode: 167 | fn := filepath.Join(path, "mem.pprof") 168 | f, err := os.Create(fn) 169 | if err != nil { 170 | log.Fatalf("profile: could not create memory profile %q: %v", fn, err) 171 | } 172 | old := runtime.MemProfileRate 173 | runtime.MemProfileRate = prof.memProfileRate 174 | logf("profile: memory profiling enabled (rate %d), %s", runtime.MemProfileRate, fn) 175 | prof.closer = func() { 176 | pprof.Lookup("heap").WriteTo(f, 0) 177 | f.Close() 178 | runtime.MemProfileRate = old 179 | logf("profile: memory profiling disabled, %s", fn) 180 | } 181 | 182 | case mutexMode: 183 | fn := filepath.Join(path, "mutex.pprof") 184 | f, err := os.Create(fn) 185 | if err != nil { 186 | log.Fatalf("profile: could not create mutex profile %q: %v", fn, err) 187 | } 188 | enableMutexProfile() 189 | logf("profile: mutex profiling enabled, %s", fn) 190 | prof.closer = func() { 191 | if mp := pprof.Lookup("mutex"); mp != nil { 192 | mp.WriteTo(f, 0) 193 | } 194 | f.Close() 195 | disableMutexProfile() 196 | logf("profile: mutex profiling disabled, %s", fn) 197 | } 198 | 199 | case blockMode: 200 | fn := filepath.Join(path, "block.pprof") 201 | f, err := os.Create(fn) 202 | if err != nil { 203 | log.Fatalf("profile: could not create block profile %q: %v", fn, err) 204 | } 205 | runtime.SetBlockProfileRate(1) 206 | logf("profile: block profiling enabled, %s", fn) 207 | prof.closer = func() { 208 | pprof.Lookup("block").WriteTo(f, 0) 209 | f.Close() 210 | runtime.SetBlockProfileRate(0) 211 | logf("profile: block profiling disabled, %s", fn) 212 | } 213 | 214 | case traceMode: 215 | fn := filepath.Join(path, "trace.out") 216 | f, err := os.Create(fn) 217 | if err != nil { 218 | log.Fatalf("profile: could not create trace output file %q: %v", fn, err) 219 | } 220 | if err := startTrace(f); err != nil { 221 | log.Fatalf("profile: could not start trace: %v", err) 222 | } 223 | logf("profile: trace enabled, %s", fn) 224 | prof.closer = func() { 225 | stopTrace() 226 | logf("profile: trace disabled, %s", fn) 227 | } 228 | } 229 | 230 | if !prof.noShutdownHook { 231 | go func() { 232 | c := make(chan os.Signal, 1) 233 | signal.Notify(c, os.Interrupt) 234 | <-c 235 | 236 | log.Println("profile: caught interrupt, stopping profiles") 237 | prof.Stop() 238 | 239 | os.Exit(0) 240 | }() 241 | } 242 | 243 | return &prof 244 | } 245 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/profile_test.go: -------------------------------------------------------------------------------- 1 | package profile 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | "io/ioutil" 8 | "os" 9 | "os/exec" 10 | "path/filepath" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | type checkFn func(t *testing.T, stdout, stderr []byte, err error) 16 | 17 | func TestProfile(t *testing.T) { 18 | f, err := ioutil.TempFile("", "profile_test") 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | defer os.Remove(f.Name()) 23 | 24 | var profileTests = []struct { 25 | name string 26 | code string 27 | checks []checkFn 28 | }{{ 29 | name: "default profile (cpu)", 30 | code: ` 31 | package main 32 | 33 | import "github.com/pkg/profile" 34 | 35 | func main() { 36 | defer profile.Start().Stop() 37 | } 38 | `, 39 | checks: []checkFn{ 40 | NoStdout, 41 | Stderr("profile: cpu profiling enabled"), 42 | NoErr, 43 | }, 44 | }, { 45 | name: "memory profile", 46 | code: ` 47 | package main 48 | 49 | import "github.com/pkg/profile" 50 | 51 | func main() { 52 | defer profile.Start(profile.MemProfile).Stop() 53 | } 54 | `, 55 | checks: []checkFn{ 56 | NoStdout, 57 | Stderr("profile: memory profiling enabled"), 58 | NoErr, 59 | }, 60 | }, { 61 | name: "memory profile (rate 2048)", 62 | code: ` 63 | package main 64 | 65 | import "github.com/pkg/profile" 66 | 67 | func main() { 68 | defer profile.Start(profile.MemProfileRate(2048)).Stop() 69 | } 70 | `, 71 | checks: []checkFn{ 72 | NoStdout, 73 | Stderr("profile: memory profiling enabled (rate 2048)"), 74 | NoErr, 75 | }, 76 | }, { 77 | name: "double start", 78 | code: ` 79 | package main 80 | 81 | import "github.com/pkg/profile" 82 | 83 | func main() { 84 | profile.Start() 85 | profile.Start() 86 | } 87 | `, 88 | checks: []checkFn{ 89 | NoStdout, 90 | Stderr("cpu profiling enabled", "profile: Start() already called"), 91 | Err, 92 | }, 93 | }, { 94 | name: "block profile", 95 | code: ` 96 | package main 97 | 98 | import "github.com/pkg/profile" 99 | 100 | func main() { 101 | defer profile.Start(profile.BlockProfile).Stop() 102 | } 103 | `, 104 | checks: []checkFn{ 105 | NoStdout, 106 | Stderr("profile: block profiling enabled"), 107 | NoErr, 108 | }, 109 | }, { 110 | name: "mutex profile", 111 | code: ` 112 | package main 113 | 114 | import "github.com/pkg/profile" 115 | 116 | func main() { 117 | defer profile.Start(profile.MutexProfile).Stop() 118 | } 119 | `, 120 | checks: []checkFn{ 121 | NoStdout, 122 | Stderr("profile: mutex profiling enabled"), 123 | NoErr, 124 | }, 125 | }, { 126 | name: "profile path", 127 | code: ` 128 | package main 129 | 130 | import "github.com/pkg/profile" 131 | 132 | func main() { 133 | defer profile.Start(profile.ProfilePath(".")).Stop() 134 | } 135 | `, 136 | checks: []checkFn{ 137 | NoStdout, 138 | Stderr("profile: cpu profiling enabled, cpu.pprof"), 139 | NoErr, 140 | }, 141 | }, { 142 | name: "profile path error", 143 | code: ` 144 | package main 145 | 146 | import "github.com/pkg/profile" 147 | 148 | func main() { 149 | defer profile.Start(profile.ProfilePath("` + f.Name() + `")).Stop() 150 | } 151 | `, 152 | checks: []checkFn{ 153 | NoStdout, 154 | Stderr("could not create initial output"), 155 | Err, 156 | }, 157 | }, { 158 | name: "multiple profile sessions", 159 | code: ` 160 | package main 161 | 162 | import "github.com/pkg/profile" 163 | 164 | func main() { 165 | profile.Start(profile.CPUProfile).Stop() 166 | profile.Start(profile.MemProfile).Stop() 167 | profile.Start(profile.BlockProfile).Stop() 168 | profile.Start(profile.CPUProfile).Stop() 169 | profile.Start(profile.MutexProfile).Stop() 170 | } 171 | `, 172 | checks: []checkFn{ 173 | NoStdout, 174 | Stderr("profile: cpu profiling enabled", 175 | "profile: cpu profiling disabled", 176 | "profile: memory profiling enabled", 177 | "profile: memory profiling disabled", 178 | "profile: block profiling enabled", 179 | "profile: block profiling disabled", 180 | "profile: cpu profiling enabled", 181 | "profile: cpu profiling disabled", 182 | "profile: mutex profiling enabled", 183 | "profile: mutex profiling disabled"), 184 | NoErr, 185 | }, 186 | }, { 187 | name: "profile quiet", 188 | code: ` 189 | package main 190 | 191 | import "github.com/pkg/profile" 192 | 193 | func main() { 194 | defer profile.Start(profile.Quiet).Stop() 195 | } 196 | `, 197 | checks: []checkFn{NoStdout, NoStderr, NoErr}, 198 | }} 199 | for _, tt := range profileTests { 200 | t.Log(tt.name) 201 | stdout, stderr, err := runTest(t, tt.code) 202 | for _, f := range tt.checks { 203 | f(t, stdout, stderr, err) 204 | } 205 | } 206 | } 207 | 208 | // NoStdout checks that stdout was blank. 209 | func NoStdout(t *testing.T, stdout, _ []byte, _ error) { 210 | if len := len(stdout); len > 0 { 211 | t.Errorf("stdout: wanted 0 bytes, got %d", len) 212 | } 213 | } 214 | 215 | // Stderr verifies that the given lines match the output from stderr 216 | func Stderr(lines ...string) checkFn { 217 | return func(t *testing.T, _, stderr []byte, _ error) { 218 | r := bytes.NewReader(stderr) 219 | if !validateOutput(r, lines) { 220 | t.Errorf("stderr: wanted '%s', got '%s'", lines, stderr) 221 | } 222 | } 223 | } 224 | 225 | // NoStderr checks that stderr was blank. 226 | func NoStderr(t *testing.T, _, stderr []byte, _ error) { 227 | if len := len(stderr); len > 0 { 228 | t.Errorf("stderr: wanted 0 bytes, got %d", len) 229 | } 230 | } 231 | 232 | // Err checks that there was an error returned 233 | func Err(t *testing.T, _, _ []byte, err error) { 234 | if err == nil { 235 | t.Errorf("expected error") 236 | } 237 | } 238 | 239 | // NoErr checks that err was nil 240 | func NoErr(t *testing.T, _, _ []byte, err error) { 241 | if err != nil { 242 | t.Errorf("error: expected nil, got %v", err) 243 | } 244 | } 245 | 246 | // validatedOutput validates the given slice of lines against data from the given reader. 247 | func validateOutput(r io.Reader, want []string) bool { 248 | s := bufio.NewScanner(r) 249 | for _, line := range want { 250 | if !s.Scan() || !strings.Contains(s.Text(), line) { 251 | return false 252 | } 253 | } 254 | return true 255 | } 256 | 257 | var validateOutputTests = []struct { 258 | input string 259 | lines []string 260 | want bool 261 | }{{ 262 | input: "", 263 | want: true, 264 | }, { 265 | input: `profile: yes 266 | `, 267 | want: true, 268 | }, { 269 | input: `profile: yes 270 | `, 271 | lines: []string{"profile: yes"}, 272 | want: true, 273 | }, { 274 | input: `profile: yes 275 | profile: no 276 | `, 277 | lines: []string{"profile: yes"}, 278 | want: true, 279 | }, { 280 | input: `profile: yes 281 | profile: no 282 | `, 283 | lines: []string{"profile: yes", "profile: no"}, 284 | want: true, 285 | }, { 286 | input: `profile: yes 287 | profile: no 288 | `, 289 | lines: []string{"profile: no"}, 290 | want: false, 291 | }} 292 | 293 | func TestValidateOutput(t *testing.T) { 294 | for _, tt := range validateOutputTests { 295 | r := strings.NewReader(tt.input) 296 | got := validateOutput(r, tt.lines) 297 | if tt.want != got { 298 | t.Errorf("validateOutput(%q, %q), want %v, got %v", tt.input, tt.lines, tt.want, got) 299 | } 300 | } 301 | } 302 | 303 | // runTest executes the go program supplied and returns the contents of stdout, 304 | // stderr, and an error which may contain status information about the result 305 | // of the program. 306 | func runTest(t *testing.T, code string) ([]byte, []byte, error) { 307 | chk := func(err error) { 308 | if err != nil { 309 | t.Fatal(err) 310 | } 311 | } 312 | gopath, err := ioutil.TempDir("", "profile-gopath") 313 | chk(err) 314 | defer os.RemoveAll(gopath) 315 | 316 | srcdir := filepath.Join(gopath, "src") 317 | err = os.Mkdir(srcdir, 0755) 318 | chk(err) 319 | src := filepath.Join(srcdir, "main.go") 320 | err = ioutil.WriteFile(src, []byte(code), 0644) 321 | chk(err) 322 | 323 | cmd := exec.Command("go", "run", src) 324 | 325 | var stdout, stderr bytes.Buffer 326 | cmd.Stdout = &stdout 327 | cmd.Stderr = &stderr 328 | err = cmd.Run() 329 | return stdout.Bytes(), stderr.Bytes(), err 330 | } 331 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/trace.go: -------------------------------------------------------------------------------- 1 | // +build go1.7 2 | 3 | package profile 4 | 5 | import "runtime/trace" 6 | 7 | var startTrace = trace.Start 8 | var stopTrace = trace.Stop 9 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/trace16.go: -------------------------------------------------------------------------------- 1 | // +build !go1.7 2 | 3 | package profile 4 | 5 | import "io" 6 | 7 | // mock trace support for Go 1.6 and earlier. 8 | 9 | func startTrace(w io.Writer) error { return nil } 10 | func stopTrace() {} 11 | -------------------------------------------------------------------------------- /vendor/github.com/pkg/profile/trace_test.go: -------------------------------------------------------------------------------- 1 | package profile_test 2 | 3 | import "github.com/pkg/profile" 4 | 5 | func ExampleTraceProfile() { 6 | // use execution tracing, rather than the default cpu profiling. 7 | defer profile.Start(profile.TraceProfile).Stop() 8 | } 9 | --------------------------------------------------------------------------------