├── .github ├── dependabot.yml ├── release.yml └── workflows │ ├── tagpr-release.yml │ └── test.yml ├── .gitignore ├── .goreleaser.yml ├── .tagpr ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── cmd └── tracer │ └── main.go ├── events.go ├── export_test.go ├── go.mod ├── go.sum ├── lambda.go ├── lambda ├── .gitignore ├── Makefile ├── example.tf └── function.jsonnet ├── lambda_test.go ├── tracer.go └── tracer_test.go /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: monthly 7 | groups: 8 | aws-sdk-go-v2: 9 | patterns: 10 | - "github.com/aws/aws-sdk-go-v2*" 11 | - package-ecosystem: github-actions 12 | directory: "/" 13 | schedule: 14 | interval: monthly 15 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | labels: 4 | - tagpr 5 | -------------------------------------------------------------------------------- /.github/workflows/tagpr-release.yml: -------------------------------------------------------------------------------- 1 | name: tagpr and release 2 | on: 3 | push: 4 | branches: ["v1"] 5 | workflow_dispatch: 6 | inputs: 7 | tag: 8 | description: "release tag" 9 | required: false 10 | type: string 11 | 12 | permissions: 13 | pull-requests: write 14 | packages: write 15 | contents: write 16 | actions: write 17 | issues: write 18 | 19 | jobs: 20 | deploy: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | with: 25 | ref: ${{ inputs.tag || github.ref }} 26 | - uses: Songmu/tagpr@v1 27 | id: tagpr 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | if: ${{ github.event_name != 'workflow_dispatch' }} # skip on workflow_dispatch 31 | # after tagpr adds a release tag, or workflow_dispatch, release it 32 | - name: Set up Go 33 | uses: actions/setup-go@v5 34 | with: 35 | go-version: "1.24" 36 | if: ${{ steps.tagpr.outputs.tag != '' || github.event_name == 'workflow_dispatch' }} 37 | - name: Run GoReleaser 38 | uses: goreleaser/goreleaser-action@v6 39 | with: 40 | version: '~> v2' 41 | args: release 42 | env: 43 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 44 | if: ${{ steps.tagpr.outputs.tag != '' || github.event_name == 'workflow_dispatch' }} 45 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | on: [push, pull_request] 3 | jobs: 4 | test: 5 | strategy: 6 | matrix: 7 | go: 8 | - "1.23" 9 | - "1.24" 10 | name: Build 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Set up Go 14 | uses: actions/setup-go@v5 15 | with: 16 | go-version: ${{ matrix.go }} 17 | id: go 18 | 19 | - name: Check out code into the Go module directory 20 | uses: actions/checkout@v4 21 | 22 | - name: Build & Test 23 | run: | 24 | go test -race -v ./... 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | *~ 15 | .envrc 16 | cmd/tracer/tracer 17 | dist/ 18 | /tracer 19 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # This is an example goreleaser.yaml file with some sane defaults. 2 | # Make sure to check the documentation at http://goreleaser.com 3 | before: 4 | hooks: 5 | - go mod download 6 | builds: 7 | - env: 8 | - CGO_ENABLED=0 9 | main: ./cmd/tracer/main.go 10 | ldflags: 11 | - "-s -w" 12 | - "-X main.Version=v{{ .Version }}" 13 | goos: 14 | - darwin 15 | - linux 16 | goarch: 17 | - amd64 18 | - arm64 19 | archives: 20 | checksum: 21 | name_template: "checksums.txt" 22 | snapshot: 23 | name_template: "{{ .Tag }}-next" 24 | changelog: 25 | sort: asc 26 | filters: 27 | exclude: 28 | - "^docs:" 29 | - "^test:" 30 | -------------------------------------------------------------------------------- /.tagpr: -------------------------------------------------------------------------------- 1 | # config file for the tagpr in git config format 2 | # The tagpr generates the initial configuration, which you can rewrite to suit your environment. 3 | # CONFIGURATIONS: 4 | # tagpr.releaseBranch 5 | # Generally, it is "main." It is the branch for releases. The tagpr tracks this branch, 6 | # creates or updates a pull request as a release candidate, or tags when they are merged. 7 | # 8 | # tagpr.versionFile 9 | # Versioning file containing the semantic version needed to be updated at release. 10 | # It will be synchronized with the "git tag". 11 | # Often this is a meta-information file such as gemspec, setup.cfg, package.json, etc. 12 | # Sometimes the source code file, such as version.go or Bar.pm, is used. 13 | # If you do not want to use versioning files but only git tags, specify the "-" string here. 14 | # You can specify multiple version files by comma separated strings. 15 | # 16 | # tagpr.vPrefix 17 | # Flag whether or not v-prefix is added to semver when git tagging. (e.g. v1.2.3 if true) 18 | # This is only a tagging convention, not how it is described in the version file. 19 | # 20 | # tagpr.changelog (Optional) 21 | # Flag whether or not changelog is added or changed during the release. 22 | # 23 | # tagpr.command (Optional) 24 | # Command to change files just before release. 25 | # 26 | # tagpr.template (Optional) 27 | # Pull request template file in go template format 28 | # 29 | # tagpr.templateText (Optional) 30 | # Pull request template text in go template format 31 | # 32 | # tagpr.release (Optional) 33 | # GitHub Release creation behavior after tagging [true, draft, false] 34 | # If this value is not set, the release is to be created. 35 | # 36 | # tagpr.majorLabels (Optional) 37 | # Label of major update targets. Default is [major] 38 | # 39 | # tagpr.minorLabels (Optional) 40 | # Label of minor update targets. Default is [minor] 41 | # 42 | # tagpr.commitPrefix (Optional) 43 | # Prefix of commit message. Default is "[tagpr]" 44 | # 45 | [tagpr] 46 | vPrefix = true 47 | releaseBranch = v1 48 | versionFile = - 49 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v1.1.1](https://github.com/fujiwara/tracer/compare/v1.1.0...v1.1.1) - 2025-03-21 4 | - Add Dependabot configuration, replace release workflow with tagpr based release by @fujiwara in https://github.com/fujiwara/tracer/pull/15 5 | - Bump the aws-sdk-go-v2 group with 5 updates by @dependabot in https://github.com/fujiwara/tracer/pull/17 6 | - Bump github.com/aws/aws-lambda-go from 1.28.0 to 1.47.0 by @dependabot in https://github.com/fujiwara/tracer/pull/18 7 | 8 | ## [v1.1.0](https://github.com/fujiwara/tracer/compare/v1.0.2...v1.1.0) - 2024-08-18 9 | - refactoring by @fujiwara in https://github.com/fujiwara/tracer/pull/13 10 | - add -json flag by @fujiwara in https://github.com/fujiwara/tracer/pull/14 11 | 12 | ## [v1.0.2](https://github.com/fujiwara/tracer/compare/v1.0.1...v1.0.2) - 2023-07-25 13 | - fetch logs after pull stopped + duration by @fujiwara in https://github.com/fujiwara/tracer/pull/12 14 | 15 | ## [v1.0.1](https://github.com/fujiwara/tracer/compare/v1.0.0...v1.0.1) - 2023-07-03 16 | - add SetOutput to specify io.Writer by @fujiwara in https://github.com/fujiwara/tracer/pull/11 17 | 18 | ## [v1.0.0](https://github.com/fujiwara/tracer/compare/v0.1.3...v1.0.0) - 2022-08-31 19 | - remove ctx from struct. by @fujiwara in https://github.com/fujiwara/tracer/pull/8 20 | - Switch to aws-sdk-go-v2 by @fujiwara in https://github.com/fujiwara/tracer/pull/9 21 | - Remove `time.Time` fields from the type of input payload of lambda handler function. by @massat in https://github.com/fujiwara/tracer/pull/10 22 | 23 | ## [v0.1.3](https://github.com/fujiwara/tracer/compare/v0.1.2...v0.1.3) - 2022-03-23 24 | - When executing from a lambda function, the SNS subject is too long when using ARNs by @mashiike in https://github.com/fujiwara/tracer/pull/7 25 | 26 | ## [v0.1.2](https://github.com/fujiwara/tracer/compare/v0.1.1...v0.1.2) - 2022-03-01 27 | - show all status of containers and a task. by @fujiwara in https://github.com/fujiwara/tracer/pull/6 28 | 29 | ## [v0.1.1](https://github.com/fujiwara/tracer/compare/v0.1.0...v0.1.1) - 2022-03-01 30 | - Create the flag to display the version by @ebi-yade in https://github.com/fujiwara/tracer/pull/3 31 | - add -sns option by @fujiwara in https://github.com/fujiwara/tracer/pull/4 32 | - show all status of containers by @fujiwara in https://github.com/fujiwara/tracer/pull/5 33 | 34 | ## [v0.1.0](https://github.com/fujiwara/tracer/compare/v0.0.4...v0.1.0) - 2022-02-04 35 | - show all clusters when no cli args. by @fujiwara in https://github.com/fujiwara/tracer/pull/1 36 | - Run as Lambda function. by @fujiwara in https://github.com/fujiwara/tracer/pull/2 37 | 38 | ## [v0.0.4](https://github.com/fujiwara/tracer/compare/v0.0.3...v0.0.4) - 2021-12-03 39 | 40 | ## [v0.0.3](https://github.com/fujiwara/tracer/compare/v0.0.2...v0.0.3) - 2021-11-30 41 | 42 | ## [v0.0.2](https://github.com/fujiwara/tracer/compare/v0.0.1...v0.0.2) - 2021-11-30 43 | 44 | ## [v0.0.1](https://github.com/fujiwara/tracer/commits/v0.0.1) - 2021-11-29 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 FUJIWARA Shunichiro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | tracer: *.go *.go cmd/tracer/* 2 | go build -o $@ cmd/tracer/main.go 3 | 4 | install: 5 | go install github.com/fujiwara/tracer/cmd/tracer 6 | 7 | test: 8 | go test -v ./... 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tracer 2 | 3 | tracer is a tracing tool for Amazon ECS tasks. 4 | 5 | tracer shows events and logs of the tasks order by timestamp. 6 | 7 | ## Install 8 | 9 | ```console 10 | $ brew install fujiwara/tap/tracer 11 | ``` 12 | 13 | [Binary Releases](https://github.com/fujiwara/tracer/releases) 14 | 15 | ## Usage 16 | 17 | ### as a CLI 18 | 19 | ``` 20 | Usage of tracer: 21 | tracer [options] [cluster] [task-id] 22 | 23 | -duration duration 24 | fetch logs duration from created / before stopping (default 1m0s) 25 | -json 26 | output as JSON lines 27 | -sns string 28 | SNS topic ARN 29 | -stdout 30 | output to stdout (default true) 31 | -version 32 | show the version 33 | ``` 34 | 35 | Environment variable `AWS_REGION` is required. 36 | 37 | - `tracer` (no arguments) shows list of clusters. 38 | - `tracer {cluster}` shows all tasks in the clusters. 39 | - `tracer {cluster} {task-id}` shows a tracing logs of the task. 40 | 41 | 42 | ### as a Lambda function 43 | 44 | `tracer` also runs on AWS Lambda functions invoked by EventBridge's "ECS Task State Change" events. 45 | 46 | 1. Put a `tracer` binary into a lambda function's archive(zip) as `bootstrap` named. 47 | 1. Set to call the lambda function by EvnetBridge rule as below. 48 | ```json 49 | { 50 | "source": ["aws.ecs"], 51 | "detail-type": ["ECS Task State Change"] 52 | } 53 | ``` 54 | 1. The tracer lambda function will put trace logs when ECS tasks STOPPED. 55 | 56 | See also [lambda directory](lambda/). 57 | 58 | ### IAM permissions 59 | 60 | tracer requires IAM permissions as below. 61 | 62 | - `ecs:Describe*` 63 | - `ecs:List*` 64 | - `logs:GetLog*` 65 | 66 | See also [example.tf](lambda/example.tf). 67 | 68 | 69 | ## Example 70 | 71 | Run a task successfully and shutdown. 72 | 73 | ```console 74 | $ tracer default 834a5628bef14f2dbb81c7bc0b272160 75 | 2021-12-03T11:06:21.633+09:00 TASK Created 76 | 2021-12-03T11:06:21.664+09:00 SERVICE (service nginx-local) has started 1 tasks: (task 834a5628bef14f2dbb81c7bc0b272160). 77 | 2021-12-03T11:06:22.342+09:00 SERVICE (service nginx-local) was unable to place a task. Reason: Capacity is unavailable at this time. Please try again later or in a different availability zone. For more information, see the Troubleshooting section of the Amazon ECS Developer Guide. 78 | 2021-12-03T11:06:24.906+09:00 TASK Connected 79 | 2021-12-03T11:06:39.602+09:00 TASK Pull started 80 | 2021-12-03T11:06:46.366+09:00 TASK Pull stopped 81 | 2021-12-03T11:06:46.746+09:00 CONTAINER:nginx /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration 82 | 2021-12-03T11:06:46.746+09:00 CONTAINER:nginx /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/ 83 | 2021-12-03T11:06:46.746+09:00 CONTAINER:nginx /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh 84 | 2021-12-03T11:06:46.758+09:00 CONTAINER:nginx 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf 85 | 2021-12-03T11:06:46.762+09:00 CONTAINER:nginx 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf 86 | 2021-12-03T11:06:46.762+09:00 CONTAINER:nginx /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh 87 | 2021-12-03T11:06:46.768+09:00 TASK Started 88 | 2021-12-03T11:06:46.820+09:00 CONTAINER:nginx /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh 89 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: using the "epoll" event method 90 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: nginx/1.21.4 91 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: built by gcc 10.2.1 20210110 (Debian 10.2.1-6) 92 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: OS: Linux 4.14.248-189.473.amzn2.aarch64 93 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1024:4096 94 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx /docker-entrypoint.sh: Configuration complete; ready for start up 95 | 2021-12-03T11:06:46.832+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: start worker processes 96 | 2021-12-03T11:06:46.837+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: start worker process 37 97 | 2021-12-03T11:06:46.837+09:00 CONTAINER:nginx 2021/12/03 02:06:46 [notice] 1#1: start worker process 38 98 | 2021-12-03T11:21:36.818+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:21:36 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 99 | 2021-12-03T11:21:36.836+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:21:36 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 100 | 2021-12-03T11:21:46.819+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:21:46 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 101 | 2021-12-03T11:21:46.837+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:21:46 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 102 | 2021-12-03T11:21:56.820+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:21:56 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 103 | 2021-12-03T11:21:56.839+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:21:56 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 104 | 2021-12-03T11:22:06.821+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:22:06 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 105 | 2021-12-03T11:22:06.840+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:22:06 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 106 | 2021-12-03T11:22:12.681+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:22:12 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)" "209.17.96.194" 107 | 2021-12-03T11:22:16.821+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:22:16 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 108 | 2021-12-03T11:22:16.841+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:22:16 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 109 | 2021-12-03T11:22:19.833+09:00 SERVICE (service nginx-local) deregistered 1 targets in (target-group arn:aws:elasticloadbalancing:ap-northeast-1:314472643515:targetgroup/alpha/6a301850702273d9) 110 | 2021-12-03T11:22:19.834+09:00 SERVICE (service nginx-local) has begun draining connections on 1 tasks. 111 | 2021-12-03T11:22:26.822+09:00 CONTAINER:nginx 10.3.1.18 - - [03/Dec/2021:02:22:26 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 112 | 2021-12-03T11:22:26.842+09:00 CONTAINER:nginx 10.3.3.10 - - [03/Dec/2021:02:22:26 +0000] "GET / HTTP/1.1" 200 615 "-" "ELB-HealthChecker/2.0" "-" 113 | 2021-12-03T11:22:28.910+09:00 TASK Stopping 114 | 2021-12-03T11:22:28.910+09:00 TASK StoppedReason:Scaling activity initiated by (deployment ecs-svc/8709920613704280865) 115 | 2021-12-03T11:22:28.910+09:00 TASK StoppedCode:ServiceSchedulerInitiated 116 | 2021-12-03T11:22:28.938+09:00 SERVICE (service nginx-local) has stopped 1 running tasks: (task 834a5628bef14f2dbb81c7bc0b272160). 117 | 2021-12-03T11:22:29.244+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: signal 15 (SIGTERM) received, exiting 118 | 2021-12-03T11:22:29.245+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 37#37: exiting 119 | 2021-12-03T11:22:29.245+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 37#37: exit 120 | 2021-12-03T11:22:29.245+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 38#38: exiting 121 | 2021-12-03T11:22:29.245+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 38#38: exit 122 | 2021-12-03T11:22:29.294+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: signal 14 (SIGALRM) received 123 | 2021-12-03T11:22:29.328+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: signal 17 (SIGCHLD) received from 37 124 | 2021-12-03T11:22:29.328+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: worker process 37 exited with code 0 125 | 2021-12-03T11:22:29.328+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: signal 29 (SIGIO) received 126 | 2021-12-03T11:22:29.329+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: signal 17 (SIGCHLD) received from 38 127 | 2021-12-03T11:22:29.329+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: worker process 38 exited with code 0 128 | 2021-12-03T11:22:29.329+09:00 CONTAINER:nginx 2021/12/03 02:22:29 [notice] 1#1: exit 129 | 2021-12-03T11:22:38.224+09:00 SERVICE (service nginx-local) has reached a steady state. 130 | 2021-12-03T11:22:40.527+09:00 TASK Execution stopped 131 | 2021-12-03T11:23:04.873+09:00 TASK Stopped 132 | 2021-12-03T11:23:04.873+09:00 CONTAINER:nginx STOPPED (exit code: 0) 133 | ``` 134 | 135 | Failed to run task. (typo container image URL) 136 | 137 | ```console 138 | $ tracer default 9f654c76cde14c7c85cf54dce087658a 139 | 2021-11-27T02:29:15.055+09:00 TASK Created 140 | 2021-11-27T02:29:33.527+09:00 TASK Execution stopped 141 | 2021-11-27T02:29:43.569+09:00 TASK Stopping 142 | 2021-11-27T02:29:43.569+09:00 TASK StoppedReason:CannotPullContainerError: inspect image has been retried 1 time(s): failed to resolve ref "docker.io/library/ngin:latest": pull access denied, repository does not exist or may require authorization: server message: insufficient_scope: authorization failed 143 | 2021-11-27T02:29:43.569+09:00 TASK StoppedCode:TaskFailedToStart 144 | 2021-11-27T02:29:57.070+09:00 TASK Stopped 145 | ``` 146 | 147 | ## LICENSE 148 | 149 | MIT 150 | 151 | ## Author 152 | 153 | fujiwara 154 | -------------------------------------------------------------------------------- /cmd/tracer/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log/slog" 8 | "os" 9 | "strings" 10 | "time" 11 | 12 | "github.com/aws/aws-lambda-go/lambda" 13 | "github.com/aws/aws-sdk-go-v2/config" 14 | "github.com/fujiwara/tracer" 15 | ) 16 | 17 | var Version = "current" 18 | 19 | func init() { 20 | flag.Usage = func() { 21 | w := flag.CommandLine.Output() 22 | fmt.Fprintf(w, "Usage of %s:\n", os.Args[0]) 23 | fmt.Fprintln(w, "tracer [options] [cluster] [task-id]") 24 | fmt.Fprintln(w, "") 25 | flag.PrintDefaults() 26 | } 27 | } 28 | 29 | func main() { 30 | ctx := context.Background() 31 | cfg, err := config.LoadDefaultConfig(ctx, 32 | config.WithRegion(os.Getenv("AWS_REGION")), 33 | ) 34 | if err != nil { 35 | panic(err) 36 | } 37 | t, err := tracer.NewWithConfig(cfg) 38 | if err != nil { 39 | panic(err) 40 | } 41 | var showVersion bool 42 | opt := tracer.RunOption{} 43 | flag.DurationVar(&opt.Duration, "duration", time.Minute, "fetch logs duration from created / before stopping") 44 | flag.BoolVar(&showVersion, "version", false, "show the version") 45 | flag.BoolVar(&opt.Stdout, "stdout", true, "output to stdout") 46 | flag.StringVar(&opt.SNSTopicArn, "sns", "", "SNS topic ARN") 47 | flag.BoolVar(&opt.JSON, "json", false, "output as JSON lines") 48 | flag.VisitAll(envToFlag) 49 | flag.Parse() 50 | 51 | if opt.JSON { 52 | slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stderr, nil))) 53 | } 54 | 55 | if showVersion { 56 | fmt.Println("tracer", Version) 57 | return 58 | } 59 | 60 | if onLambda() { 61 | lambda.Start(t.LambdaHandlerFunc(&opt)) 62 | return 63 | } 64 | 65 | args := make([]string, 2) 66 | copy(args, flag.Args()) 67 | 68 | if err := t.Run(ctx, args[0], args[1], &opt); err != nil { 69 | slog.Error(err.Error()) 70 | os.Exit(1) 71 | } 72 | } 73 | 74 | func onLambda() bool { 75 | return strings.HasPrefix(os.Getenv("AWS_EXECUTE_ENV"), "AWS_Lambda") || 76 | os.Getenv("AWS_LAMBDA_RUNTIME_API") != "" 77 | } 78 | 79 | func envToFlag(f *flag.Flag) { 80 | name := strings.ToUpper(strings.Replace(f.Name, "-", "_", -1)) 81 | if s, ok := os.LookupEnv("TRACER_" + name); ok { 82 | f.Value.Set(s) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /events.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | ) 7 | 8 | type ECSTaskEvent struct { 9 | Detail ECSTaskEventDetail `json:"detail"` 10 | } 11 | 12 | type ECSTaskEventDetail struct { 13 | DesiredStatus string `json:"desiredStatus"` 14 | LastStatus string `json:"lastStatus"` 15 | StopCode string `json:"stopCode"` 16 | StoppedReason string `json:"stoppedReason"` 17 | TaskArn string `json:"taskArn"` 18 | ClusterArn string `json:"clusterArn"` 19 | } 20 | 21 | func (e *ECSTaskEvent) String() string { 22 | b := bytes.Buffer{} 23 | json.NewEncoder(&b).Encode(e) 24 | return b.String() 25 | } 26 | -------------------------------------------------------------------------------- /export_test.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | var ( 4 | ExtractTaskID = extractTaskID 5 | ExtractClusterName = extractClusterName 6 | ) 7 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/fujiwara/tracer 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/aws/aws-lambda-go v1.47.0 7 | github.com/aws/aws-sdk-go-v2 v1.36.3 8 | github.com/aws/aws-sdk-go-v2/config v1.29.12 9 | github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.1 10 | github.com/aws/aws-sdk-go-v2/service/ecs v1.54.3 11 | github.com/aws/aws-sdk-go-v2/service/sns v1.34.2 12 | ) 13 | 14 | require ( 15 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect 16 | github.com/aws/aws-sdk-go-v2/credentials v1.17.65 // indirect 17 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect 18 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect 19 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect 20 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect 21 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect 22 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect 23 | github.com/aws/aws-sdk-go-v2/service/sso v1.25.2 // indirect 24 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.0 // indirect 25 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 // indirect 26 | github.com/aws/smithy-go v1.22.2 // indirect 27 | ) 28 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/aws/aws-lambda-go v1.47.0 h1:0H8s0vumYx/YKs4sE7YM0ktwL2eWse+kfopsRI1sXVI= 2 | github.com/aws/aws-lambda-go v1.47.0/go.mod h1:dpMpZgvWx5vuQJfBt0zqBha60q7Dd7RfgJv23DymV8A= 3 | github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= 4 | github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= 5 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= 6 | github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= 7 | github.com/aws/aws-sdk-go-v2/config v1.29.12 h1:Y/2a+jLPrPbHpFkpAAYkVEtJmxORlXoo5k2g1fa2sUo= 8 | github.com/aws/aws-sdk-go-v2/config v1.29.12/go.mod h1:xse1YTjmORlb/6fhkWi8qJh3cvZi4JoVNhc+NbJt4kI= 9 | github.com/aws/aws-sdk-go-v2/credentials v1.17.65 h1:q+nV2yYegofO/SUXruT+pn4KxkxmaQ++1B/QedcKBFM= 10 | github.com/aws/aws-sdk-go-v2/credentials v1.17.65/go.mod h1:4zyjAuGOdikpNYiSGpsGz8hLGmUzlY8pc8r9QQ/RXYQ= 11 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw= 12 | github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= 13 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= 14 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= 15 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= 16 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= 17 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= 18 | github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= 19 | github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.1 h1:IKznEkCo7L8VHkQ3tC1e50F1eudenoQ7BTHJhMOswtE= 20 | github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs v1.47.1/go.mod h1:uo14VBn5cNk/BPGTPz3kyLBxgpgOObgO8lmz+H7Z4Ck= 21 | github.com/aws/aws-sdk-go-v2/service/ecs v1.54.3 h1:YDT9RUCa87ffMffcHOWyGAoGrvS8j7f60lfRDiBZNIk= 22 | github.com/aws/aws-sdk-go-v2/service/ecs v1.54.3/go.mod h1:wAtdeFanDuF9Re/ge4DRDaYe3Wy1OGrU7jG042UcuI4= 23 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= 24 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= 25 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= 26 | github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= 27 | github.com/aws/aws-sdk-go-v2/service/sns v1.34.2 h1:PajtbJ/5bEo6iUAIGMYnK8ljqg2F1h4mMCGh1acjN30= 28 | github.com/aws/aws-sdk-go-v2/service/sns v1.34.2/go.mod h1:PJtxxMdj747j8DeZENRTTYAz/lx/pADn/U0k7YNNiUY= 29 | github.com/aws/aws-sdk-go-v2/service/sso v1.25.2 h1:pdgODsAhGo4dvzC3JAG5Ce0PX8kWXrTZGx+jxADD+5E= 30 | github.com/aws/aws-sdk-go-v2/service/sso v1.25.2/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI= 31 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.0 h1:90uX0veLKcdHVfvxhkWUQSCi5VabtwMLFutYiRke4oo= 32 | github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.0/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs= 33 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 h1:PZV5W8yk4OtH1JAuhV2PXwwO9v5G5Aoj+eMCn4T+1Kc= 34 | github.com/aws/aws-sdk-go-v2/service/sts v1.33.17/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4= 35 | github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ= 36 | github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= 37 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 38 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 39 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 40 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 41 | github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s= 42 | github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= 43 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 44 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 45 | -------------------------------------------------------------------------------- /lambda.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "context" 5 | "log/slog" 6 | "strings" 7 | 8 | "github.com/aws/aws-sdk-go-v2/aws/arn" 9 | ) 10 | 11 | func (t *Tracer) LambdaHandlerFunc(opt *RunOption) func(ctx context.Context, event *ECSTaskEvent) error { 12 | return func(ctx context.Context, event *ECSTaskEvent) error { 13 | slog.Info("event", "payload", event.String()) 14 | lastStatus := event.Detail.LastStatus 15 | if lastStatus != "STOPPED" { 16 | return nil 17 | } 18 | cluster := extractClusterName(event.Detail.ClusterArn) 19 | return t.Run(ctx, cluster, extractTaskID(cluster, event.Detail.TaskArn), opt) 20 | } 21 | } 22 | 23 | func extractClusterName(clusterArn string) string { 24 | parsed, err := arn.Parse(clusterArn) 25 | if err != nil { 26 | return clusterArn 27 | } 28 | prefix := "cluster/" 29 | if parsed.Service == "ecs" && strings.HasPrefix(parsed.Resource, prefix) { 30 | return strings.TrimPrefix(parsed.Resource, prefix) 31 | } 32 | return clusterArn 33 | } 34 | 35 | func extractTaskID(cluster, taskArn string) string { 36 | parsed, err := arn.Parse(taskArn) 37 | if err != nil { 38 | return taskArn 39 | } 40 | prefix := "task/" + cluster + "/" 41 | if parsed.Service == "ecs" && strings.HasPrefix(parsed.Resource, prefix) { 42 | return strings.TrimPrefix(parsed.Resource, prefix) 43 | } 44 | return taskArn 45 | } 46 | -------------------------------------------------------------------------------- /lambda/.gitignore: -------------------------------------------------------------------------------- 1 | .envrc 2 | bootstrap 3 | -------------------------------------------------------------------------------- /lambda/Makefile: -------------------------------------------------------------------------------- 1 | bootstrap: 2 | GOOS=linux GOARCH=amd64 go build -o bootstrap ../cmd/tracer/main.go 3 | 4 | clean: 5 | rm -f bootstrap 6 | 7 | deploy: bootstrap 8 | lambroll deploy --function function.jsonnet 9 | -------------------------------------------------------------------------------- /lambda/example.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "tracer" { 2 | name = "tracer" 3 | 4 | assume_role_policy = jsonencode({ 5 | Version = "2012-10-17" 6 | Statement = [ 7 | { 8 | Action = "sts:AssumeRole" 9 | Effect = "Allow" 10 | Sid = "" 11 | Principal = { 12 | Service = "lambda.amazonaws.com" 13 | } 14 | } 15 | ] 16 | }) 17 | } 18 | 19 | resource "aws_iam_policy" "tracer" { 20 | name = "tracer" 21 | path = "/" 22 | policy = data.aws_iam_policy_document.tracer.json 23 | } 24 | 25 | resource "aws_iam_role_policy_attachment" "tracer" { 26 | role = aws_iam_role.tracer.name 27 | policy_arn = aws_iam_policy.tracer.arn 28 | } 29 | 30 | data "aws_iam_policy_document" "tracer" { 31 | statement { 32 | actions = [ 33 | "ecs:Describe*", 34 | "ecs:List*", 35 | ] 36 | resources = ["*"] 37 | } 38 | statement { 39 | actions = [ 40 | "logs:GetLog*", 41 | "logs:CreateLogGroup", 42 | "logs:CreateLogStream", 43 | "logs:PutLogEvents", 44 | ] 45 | resources = ["*"] 46 | } 47 | } 48 | 49 | resource "aws_cloudwatch_event_rule" "tracer" { 50 | name = "tracer" 51 | is_enabled = true 52 | event_pattern = jsonencode({ 53 | "detail-type" = [ 54 | "ECS Task State Change" 55 | ] 56 | "source" = [ 57 | "aws.ecs" 58 | ] 59 | }) 60 | } 61 | 62 | resource "aws_cloudwatch_event_target" "tracer-lambda" { 63 | rule = aws_cloudwatch_event_rule.tracer.name 64 | arn = data.aws_lambda_function.tracer.arn 65 | } 66 | 67 | data "aws_lambda_function" "tracer" { 68 | function_name = "tracer" 69 | } 70 | -------------------------------------------------------------------------------- /lambda/function.jsonnet: -------------------------------------------------------------------------------- 1 | { 2 | FunctionName: 'tracer', 3 | MemorySize: 128, 4 | Handler: 'index.handler', 5 | // Role: 'arn:aws:iam::{account_id}:role/{role_name}', 6 | Role: 'arn:aws:iam::314472643515:role/tracer', 7 | Runtime: 'provided.al2', 8 | } 9 | -------------------------------------------------------------------------------- /lambda_test.go: -------------------------------------------------------------------------------- 1 | package tracer_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/fujiwara/tracer" 7 | ) 8 | 9 | func TestExtractClusterName(t *testing.T) { 10 | cases := []struct { 11 | input string 12 | expected string 13 | }{ 14 | { 15 | input: "arn:aws:ecs:ap-northeast-1:012345678901:cluster/main", 16 | expected: "main", 17 | }, 18 | { 19 | input: "main", 20 | expected: "main", 21 | }, 22 | } 23 | 24 | for _, c := range cases { 25 | t.Run(c.input, func(t *testing.T) { 26 | actual := tracer.ExtractClusterName(c.input) 27 | if c.expected != actual { 28 | t.Errorf("expected: %s, actual: %s", c.expected, actual) 29 | } 30 | }) 31 | } 32 | } 33 | 34 | func TestExtractTaskID(t *testing.T) { 35 | cases := []struct { 36 | input string 37 | cluster string 38 | expected string 39 | }{ 40 | { 41 | input: "0123456789abcdef0123456789abcdef", 42 | cluster: "main", 43 | expected: "0123456789abcdef0123456789abcdef", 44 | }, 45 | { 46 | input: "arn:aws:ecs:ap-northeast-1:012345678901:task/main/0123456789abcdef0123456789abcdef", 47 | cluster: "main", 48 | expected: "0123456789abcdef0123456789abcdef", 49 | }, 50 | } 51 | 52 | for _, c := range cases { 53 | t.Run(c.input, func(t *testing.T) { 54 | actual := tracer.ExtractTaskID(c.cluster, c.input) 55 | if c.expected != actual { 56 | t.Errorf("expected: %s, actual: %s", c.expected, actual) 57 | } 58 | }) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /tracer.go: -------------------------------------------------------------------------------- 1 | package tracer 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "log/slog" 10 | "os" 11 | "sort" 12 | "strings" 13 | "sync" 14 | "time" 15 | 16 | "github.com/aws/aws-sdk-go-v2/aws" 17 | "github.com/aws/aws-sdk-go-v2/config" 18 | "github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs" 19 | "github.com/aws/aws-sdk-go-v2/service/ecs" 20 | ecsTypes "github.com/aws/aws-sdk-go-v2/service/ecs/types" 21 | "github.com/aws/aws-sdk-go-v2/service/sns" 22 | ) 23 | 24 | const ( 25 | snsMaxPayloadSize = 256 * 1024 26 | ) 27 | 28 | var TimeFormat = "2006-01-02T15:04:05.000Z07:00" 29 | 30 | var epochBase = time.Unix(0, 0) 31 | 32 | var MaxFetchLogs = 100 33 | 34 | type Tracer struct { 35 | ecs *ecs.Client 36 | logs *cloudwatchlogs.Client 37 | sns *sns.Client 38 | timeline *Timeline 39 | buf *bytes.Buffer 40 | w io.Writer 41 | 42 | now time.Time 43 | headBegin time.Time 44 | headEnd time.Time 45 | tailBegin time.Time 46 | tailEnd time.Time 47 | 48 | option *RunOption 49 | } 50 | 51 | func (t *Tracer) AddEvent(ts time.Time, source, message string) { 52 | t.timeline.Add(newEvent(ts, source, message)) 53 | } 54 | 55 | func NewTimeline() *Timeline { 56 | return &Timeline{ 57 | seen: make(map[string]bool), 58 | } 59 | } 60 | 61 | type Timeline struct { 62 | events []*TimelineEvent 63 | seen map[string]bool 64 | mu sync.Mutex 65 | } 66 | 67 | func (tl *Timeline) Add(event *TimelineEvent) { 68 | if event.Timestamp.IsZero() { // ignore zero time event 69 | return 70 | } 71 | tl.mu.Lock() 72 | defer tl.mu.Unlock() 73 | tl.events = append(tl.events, event) 74 | } 75 | 76 | func (tl *Timeline) Print(w io.Writer, json bool) (int, error) { 77 | tl.mu.Lock() 78 | defer tl.mu.Unlock() 79 | 80 | tls := make([]*TimelineEvent, 0, len(tl.events)) 81 | tls = append(tls, tl.events...) 82 | sort.SliceStable(tls, func(i, j int) bool { 83 | return tls[i].Timestamp.Before(tls[j].Timestamp) 84 | }) 85 | n := 0 86 | toString := func(e *TimelineEvent) string { 87 | if json { 88 | return e.JSON() 89 | } 90 | return e.String() 91 | } 92 | for _, e := range tls { 93 | s := toString(e) 94 | if tl.seen[s] { 95 | continue 96 | } 97 | l, err := fmt.Fprint(w, s) 98 | if err != nil { 99 | return n, err 100 | } 101 | n += l 102 | tl.seen[s] = true 103 | } 104 | return n, nil 105 | } 106 | 107 | type TimelineEvent struct { 108 | Timestamp time.Time 109 | Source string 110 | Message string 111 | } 112 | 113 | func (e *TimelineEvent) String() string { 114 | ts := e.Timestamp.In(time.Local) 115 | return fmt.Sprintf("%s\t%s\t%s\n", ts.Format(TimeFormat), e.Source, e.Message) 116 | } 117 | 118 | func (e *TimelineEvent) JSON() string { 119 | ts := e.Timestamp.In(time.Local) 120 | b, _ := json.Marshal(struct { 121 | Time string `json:"time"` 122 | Source string `json:"src"` 123 | Message string `json:"msg"` 124 | }{ 125 | Time: ts.Format(TimeFormat), 126 | Source: e.Source, 127 | Message: e.Message, 128 | }) 129 | return string(b) + "\n" 130 | } 131 | 132 | func New(ctx context.Context) (*Tracer, error) { 133 | region := os.Getenv("AWS_REGION") 134 | awscfg, err := config.LoadDefaultConfig(ctx, config.WithRegion(region)) 135 | if err != nil { 136 | return nil, err 137 | } 138 | return NewWithConfig(awscfg) 139 | } 140 | 141 | func NewWithConfig(config aws.Config) (*Tracer, error) { 142 | return &Tracer{ 143 | ecs: ecs.NewFromConfig(config), 144 | logs: cloudwatchlogs.NewFromConfig(config), 145 | sns: sns.NewFromConfig(config), 146 | timeline: NewTimeline(), 147 | buf: new(bytes.Buffer), 148 | w: os.Stdout, 149 | }, nil 150 | } 151 | 152 | func newEvent(ts time.Time, src, msg string) *TimelineEvent { 153 | return &TimelineEvent{ 154 | Timestamp: ts, 155 | Source: src, 156 | Message: msg, 157 | } 158 | } 159 | 160 | type RunOption struct { 161 | Stdout bool 162 | SNSTopicArn string 163 | Duration time.Duration 164 | JSON bool 165 | } 166 | 167 | func (t *Tracer) SetOutput(w io.Writer) { 168 | t.w = w 169 | } 170 | 171 | func (t *Tracer) Run(ctx context.Context, cluster string, taskID string, opt *RunOption) error { 172 | t.now = time.Now() 173 | t.option = opt 174 | 175 | defer func() { t.report(ctx, cluster, taskID) }() 176 | 177 | if cluster == "" { 178 | return t.listClusters(ctx, opt) 179 | } 180 | 181 | if taskID == "" { 182 | return t.listAllTasks(ctx, cluster) 183 | } 184 | 185 | task, err := t.traceTask(ctx, cluster, taskID) 186 | if err != nil { 187 | return err 188 | } 189 | 190 | defer func() { 191 | if _, err := t.timeline.Print(t.buf, opt.JSON); err != nil { 192 | slog.Error("failed to print timeline", "error", err) 193 | } 194 | }() 195 | if err := t.traceLogs(ctx, task); err != nil { 196 | return err 197 | } 198 | 199 | return nil 200 | } 201 | 202 | func (t *Tracer) report(ctx context.Context, cluster, taskID string) { 203 | opt := t.option 204 | if opt.Stdout { 205 | sub := &subject{cluster, taskID} 206 | if opt.JSON { 207 | fmt.Fprintln(t.w, sub.JSON()) 208 | } else { 209 | fmt.Fprintln(t.w, sub.String()) 210 | } 211 | if _, err := t.WriteTo(t.w); err != nil { 212 | slog.Error("failed to write to output", "error", err) 213 | } 214 | } 215 | if opt.SNSTopicArn != "" { 216 | if err := t.Publish(ctx, opt.SNSTopicArn, cluster, taskID); err != nil { 217 | slog.Error("failed to publish to SNS", "error", err) 218 | } 219 | } 220 | } 221 | 222 | func (t *Tracer) WriteTo(w io.Writer) (int64, error) { 223 | n, err := io.WriteString(w, t.buf.String()) 224 | return int64(n), err 225 | } 226 | 227 | type subject struct { 228 | Cluster string `json:"cluster"` 229 | TaskID string `json:"task_id"` 230 | } 231 | 232 | func (s *subject) JSON() string { 233 | b, _ := json.Marshal(s) 234 | return string(b) 235 | } 236 | 237 | func (s *subject) String() string { 238 | str := "Tracer:" 239 | if s.TaskID != "" { 240 | str += " " + s.TaskID 241 | } else if s.Cluster != "" { 242 | str += " tasks" 243 | } 244 | if s.Cluster != "" { 245 | str += " on " + s.Cluster 246 | } else { 247 | str += " clusters" 248 | } 249 | return str 250 | } 251 | 252 | const ( 253 | snsSubjectLimitLength = 100 254 | ellipsisString = "..." 255 | ) 256 | 257 | func (t *Tracer) Publish(ctx context.Context, topicArn, cluster, taskID string) error { 258 | msg := t.buf.String() 259 | if len(msg) >= snsMaxPayloadSize { 260 | msg = msg[:snsMaxPayloadSize] 261 | } 262 | 263 | s := (&subject{cluster, taskID}).String() 264 | if len(s) > snsSubjectLimitLength { 265 | s = s[0:snsSubjectLimitLength-len(ellipsisString)] + ellipsisString 266 | } 267 | _, err := t.sns.Publish(ctx, &sns.PublishInput{ 268 | Message: &msg, 269 | Subject: aws.String(s), 270 | TopicArn: &topicArn, 271 | }) 272 | return err 273 | } 274 | 275 | func (t *Tracer) traceTask(ctx context.Context, cluster string, taskID string) (*ecsTypes.Task, error) { 276 | res, err := t.ecs.DescribeTasks(ctx, &ecs.DescribeTasksInput{ 277 | Cluster: &cluster, 278 | Tasks: []string{taskID}, 279 | }) 280 | if err != nil { 281 | return nil, fmt.Errorf("failed to describe tasks: %w", err) 282 | } 283 | if len(res.Tasks) == 0 { 284 | return nil, fmt.Errorf("no tasks found. cluster: %s, task_id: %s", cluster, taskID) 285 | } 286 | task := res.Tasks[0] 287 | 288 | t.setBoundaries(&task) 289 | 290 | taskGroup := strings.SplitN(aws.ToString(task.Group), ":", 2) 291 | if len(taskGroup) == 2 && taskGroup[0] == "service" { 292 | t.fetchServiceEvents(ctx, cluster, taskGroup[1]) 293 | } 294 | 295 | t.AddEvent(aws.ToTime(task.CreatedAt), "TASK", "Created") 296 | t.AddEvent(aws.ToTime(task.ConnectivityAt), "TASK", "Connected") 297 | t.AddEvent(aws.ToTime(task.StartedAt), "TASK", "Started") 298 | t.AddEvent(aws.ToTime(task.PullStartedAt), "TASK", "Pull started") 299 | t.AddEvent(aws.ToTime(task.PullStoppedAt), "TASK", "Pull stopped") 300 | t.AddEvent(aws.ToTime(task.StoppedAt), "TASK", "Stopped") 301 | t.AddEvent(aws.ToTime(task.StoppingAt), "TASK", "Stopping") 302 | if task.StoppedReason != nil { 303 | t.AddEvent(aws.ToTime(task.StoppingAt), "TASK", "StoppedReason:"+aws.ToString(task.StoppedReason)) 304 | } 305 | t.AddEvent(aws.ToTime(task.StoppingAt), "TASK", "StoppedCode:"+string(task.StopCode)) 306 | t.AddEvent(aws.ToTime(task.ExecutionStoppedAt), "TASK", "Execution stopped") 307 | 308 | for _, c := range task.Containers { 309 | containerName := *c.Name 310 | msg := fmt.Sprintf("LastStatus:%s HealthStatus:%s", *c.LastStatus, c.HealthStatus) 311 | if c.ExitCode != nil { 312 | msg += fmt.Sprintf(" (exit code: %d)", *c.ExitCode) 313 | } 314 | if c.Reason != nil { 315 | msg += fmt.Sprintf(" (reason: %s)", *c.Reason) 316 | } 317 | t.AddEvent(t.now, "CONTAINER:"+containerName, msg) 318 | } 319 | 320 | t.AddEvent(t.now, "TASK", "LastStatus:"+aws.ToString(task.LastStatus)) 321 | 322 | return &task, nil 323 | } 324 | 325 | func (t *Tracer) traceLogs(ctx context.Context, task *ecsTypes.Task) error { 326 | res, err := t.ecs.DescribeTaskDefinition(ctx, &ecs.DescribeTaskDefinitionInput{ 327 | TaskDefinition: task.TaskDefinitionArn, 328 | }) 329 | if err != nil { 330 | return fmt.Errorf("failed to describe task definition: %w", err) 331 | } 332 | var wg sync.WaitGroup 333 | for _, c := range res.TaskDefinition.ContainerDefinitions { 334 | containerName := *c.Name 335 | if c.LogConfiguration == nil { 336 | continue 337 | } 338 | if c.LogConfiguration.LogDriver != ecsTypes.LogDriverAwslogs { 339 | continue 340 | } 341 | opt := c.LogConfiguration.Options 342 | logGroup := opt["awslogs-group"] 343 | logStream := opt["awslogs-stream-prefix"] + "/" + *c.Name + "/" + taskID(task) 344 | wg.Add(1) 345 | go func() { 346 | defer wg.Done() 347 | // head of logs 348 | t.fetchLogs(ctx, containerName, logGroup, logStream, &t.headBegin, &t.headEnd) 349 | 350 | // tail of logs 351 | t.fetchLogs(ctx, containerName, logGroup, logStream, &t.tailBegin, nil) 352 | }() 353 | } 354 | wg.Wait() 355 | return nil 356 | } 357 | 358 | func taskID(task *ecsTypes.Task) string { 359 | an := aws.ToString(task.TaskArn) 360 | return an[strings.LastIndex(an, "/")+1:] 361 | } 362 | 363 | func (t *Tracer) fetchServiceEvents(ctx context.Context, cluster, service string) error { 364 | res, err := t.ecs.DescribeServices(ctx, &ecs.DescribeServicesInput{ 365 | Cluster: &cluster, 366 | Services: []string{service}, 367 | }) 368 | if err != nil { 369 | return fmt.Errorf("failed to describe services: %w", err) 370 | } 371 | if len(res.Services) == 0 { 372 | return fmt.Errorf("no services found: %w", err) 373 | } 374 | for _, e := range res.Services[0].Events { 375 | ts := aws.ToTime(e.CreatedAt) 376 | if ts.After(t.headBegin) && ts.Before(t.headEnd) || ts.After(t.tailBegin) && ts.Before(t.tailEnd) { 377 | t.AddEvent(ts, "SERVICE", aws.ToString(e.Message)) 378 | } 379 | } 380 | return nil 381 | } 382 | 383 | func (t *Tracer) fetchLogs(ctx context.Context, containerName, group, stream string, from, to *time.Time) error { 384 | var nextToken *string 385 | in := &cloudwatchlogs.GetLogEventsInput{ 386 | LogGroupName: aws.String(group), 387 | LogStreamName: aws.String(stream), 388 | Limit: aws.Int32(100), 389 | } 390 | if from != nil { 391 | in.StartTime = aws.Int64(timeToInt64msec(*from)) 392 | } else { 393 | in.StartFromHead = aws.Bool(true) 394 | } 395 | if to != nil { 396 | in.EndTime = aws.Int64(timeToInt64msec(*to)) 397 | } 398 | 399 | fetched := 0 400 | for { 401 | if nextToken != nil { 402 | in.NextToken = nextToken 403 | in.StartFromHead = nil 404 | } 405 | res, err := t.logs.GetLogEvents(ctx, in) 406 | if err != nil { 407 | return err 408 | } 409 | fetched++ 410 | for _, e := range res.Events { 411 | ts := msecToTime(aws.ToInt64(e.Timestamp)) 412 | t.AddEvent(ts, "CONTAINER:"+containerName, aws.ToString(e.Message)) 413 | } 414 | if aws.ToString(nextToken) == aws.ToString(res.NextForwardToken) { 415 | break 416 | } 417 | if fetched >= MaxFetchLogs { 418 | break 419 | } 420 | nextToken = res.NextForwardToken 421 | } 422 | return nil 423 | } 424 | 425 | func (t *Tracer) listAllTasks(ctx context.Context, cluster string) error { 426 | for _, s := range []ecsTypes.DesiredStatus{ 427 | ecsTypes.DesiredStatusRunning, 428 | ecsTypes.DesiredStatusPending, 429 | ecsTypes.DesiredStatusStopped, 430 | } { 431 | err := t.listTasks(ctx, cluster, s) 432 | if err != nil { 433 | return err 434 | } 435 | } 436 | return nil 437 | } 438 | 439 | func (t *Tracer) listClusters(ctx context.Context, opt *RunOption) error { 440 | res, err := t.ecs.ListClusters(ctx, &ecs.ListClustersInput{}) 441 | if err != nil { 442 | return err 443 | } 444 | clusters := make([]string, 0, len(res.ClusterArns)) 445 | for _, c := range res.ClusterArns { 446 | clusters = append(clusters, arnToName(c)) 447 | } 448 | sort.Strings(clusters) 449 | if opt.JSON { 450 | err := json.NewEncoder(t.buf).Encode( 451 | struct { 452 | Clusters []string `json:"clusters"` 453 | }{clusters}, 454 | ) 455 | if err != nil { 456 | return fmt.Errorf("failed to encode JSON: %w", err) 457 | } 458 | return nil 459 | } 460 | for _, c := range clusters { 461 | t.buf.WriteString(c) 462 | t.buf.WriteByte('\n') 463 | } 464 | return nil 465 | } 466 | 467 | func (t *Tracer) listTasks(ctx context.Context, cluster string, status ecsTypes.DesiredStatus) error { 468 | var nextToken *string 469 | for { 470 | listRes, err := t.ecs.ListTasks(ctx, &ecs.ListTasksInput{ 471 | Cluster: &cluster, 472 | DesiredStatus: status, 473 | NextToken: nextToken, 474 | }) 475 | if err != nil { 476 | return fmt.Errorf("failed to list tasks: %w", err) 477 | } 478 | if len(listRes.TaskArns) == 0 { 479 | break 480 | } 481 | res, err := t.ecs.DescribeTasks(ctx, &ecs.DescribeTasksInput{ 482 | Cluster: &cluster, 483 | Tasks: listRes.TaskArns, 484 | }) 485 | if err != nil { 486 | return fmt.Errorf("failed to describe tasks: %w", err) 487 | } 488 | for _, ts := range res.Tasks { 489 | task := newTask(&ts) 490 | if t.option.JSON { 491 | t.buf.WriteString(task.JSON()) 492 | } else { 493 | t.buf.WriteString(task.String()) 494 | } 495 | } 496 | if nextToken = listRes.NextToken; nextToken == nil { 497 | break 498 | } 499 | } 500 | return nil 501 | } 502 | 503 | func (t *Tracer) setBoundaries(task *ecsTypes.Task) { 504 | d := t.option.Duration 505 | 506 | t.headBegin = task.CreatedAt.Add(-d) 507 | if task.StartedAt != nil { 508 | t.headEnd = task.StartedAt.Add(d) 509 | } else { 510 | t.headEnd = task.CreatedAt.Add(d) 511 | } 512 | // logs are not output before pull stopped 513 | if task.PullStoppedAt != nil { 514 | t.headEnd = task.PullStoppedAt.Add(d) 515 | } 516 | 517 | if task.StoppingAt != nil { 518 | t.tailBegin = task.StoppingAt.Add(-d) 519 | } else { 520 | t.tailBegin = t.now.Add(-d) 521 | } 522 | if task.StoppedAt != nil { 523 | t.tailEnd = task.StoppedAt.Add(d) 524 | } else { 525 | t.tailEnd = t.now 526 | } 527 | } 528 | 529 | func msecToTime(i int64) time.Time { 530 | return epochBase.Add(time.Duration(i) * time.Millisecond) 531 | } 532 | 533 | func timeToInt64msec(t time.Time) int64 { 534 | return int64(t.Sub(epochBase) / time.Millisecond) 535 | } 536 | 537 | func arnToName(arn string) string { 538 | return arn[strings.LastIndex(arn, "/")+1:] 539 | } 540 | 541 | type task struct { 542 | Arn string `json:"arn"` 543 | TaskDefinition string `json:"task_definition"` 544 | LastStatus string `json:"last_status"` 545 | DesiredStatus string `json:"desired_status"` 546 | CreatedAt string `json:"created_at"` 547 | Group string `json:"group"` 548 | LaunchType string `json:"launch_type"` 549 | } 550 | 551 | func newTask(t *ecsTypes.Task) *task { 552 | return &task{ 553 | Arn: arnToName(*t.TaskArn), 554 | TaskDefinition: arnToName(*t.TaskDefinitionArn), 555 | LastStatus: aws.ToString(t.LastStatus), 556 | DesiredStatus: aws.ToString(t.DesiredStatus), 557 | CreatedAt: t.CreatedAt.In(time.Local).Format(time.RFC3339), 558 | Group: aws.ToString(t.Group), 559 | LaunchType: string(t.LaunchType), 560 | } 561 | } 562 | 563 | func (t *task) String() string { 564 | return strings.Join([]string{ 565 | t.Arn, 566 | t.TaskDefinition, 567 | t.LastStatus, 568 | t.DesiredStatus, 569 | t.CreatedAt, 570 | t.Group, 571 | t.LaunchType, 572 | }, "\t") + "\n" 573 | } 574 | 575 | func (t *task) JSON() string { 576 | b, _ := json.Marshal(t) 577 | return string(b) + "\n" 578 | } 579 | -------------------------------------------------------------------------------- /tracer_test.go: -------------------------------------------------------------------------------- 1 | package tracer_test 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | "time" 7 | 8 | "github.com/aws/aws-sdk-go-v2/aws" 9 | "github.com/fujiwara/tracer" 10 | ) 11 | 12 | var ( 13 | testEvents = []tracer.TimelineEvent{ 14 | { 15 | Timestamp: time.Date(2021, 1, 2, 3, 4, 5, 123_999_000, time.UTC), 16 | Message: "test message 1", 17 | Source: "test_source 1", 18 | }, 19 | { 20 | Timestamp: time.Date(2021, 1, 2, 3, 4, 5, 123_999_999, time.UTC), 21 | Message: "test message 5", 22 | Source: "test_source 5", 23 | }, 24 | { 25 | Timestamp: time.Date(2021, 1, 2, 3, 4, 6, 123_999_000, time.UTC), 26 | Message: "test message 2", 27 | Source: "test_source 2", 28 | }, 29 | { 30 | // same timestamp to test sort stable 31 | Timestamp: time.Date(2021, 1, 2, 3, 4, 5, 123_999_000, time.UTC), 32 | Message: "test message 3", 33 | Source: "test_source 3", 34 | }, 35 | { 36 | // duplicate event 37 | Timestamp: time.Date(2021, 1, 2, 3, 4, 5, 123_999_000, time.UTC), 38 | Message: "test message 3", 39 | Source: "test_source 3", 40 | }, 41 | { 42 | Timestamp: aws.ToTime(nil), 43 | Message: "test message ignored", 44 | Source: "test_source ignored", 45 | }, 46 | } 47 | expectedOutput = `2021-01-02T03:04:05.123Z test_source 1 test message 1 48 | 2021-01-02T03:04:05.123Z test_source 3 test message 3 49 | 2021-01-02T03:04:05.123Z test_source 5 test message 5 50 | 2021-01-02T03:04:06.123Z test_source 2 test message 2 51 | ` 52 | expectedJSONOutput = `{"time":"2021-01-02T03:04:05.123Z","src":"test_source 1","msg":"test message 1"} 53 | {"time":"2021-01-02T03:04:05.123Z","src":"test_source 3","msg":"test message 3"} 54 | {"time":"2021-01-02T03:04:05.123Z","src":"test_source 5","msg":"test message 5"} 55 | {"time":"2021-01-02T03:04:06.123Z","src":"test_source 2","msg":"test message 2"} 56 | ` 57 | ) 58 | 59 | func TestTimelineEvent(t *testing.T) { 60 | t.Setenv("TZ", "UTC") 61 | now := time.Date(2021, 1, 2, 3, 4, 5, 123_999_000, time.UTC) 62 | ev := tracer.TimelineEvent{ 63 | Timestamp: now, 64 | Message: "test message", 65 | Source: "test_source", 66 | } 67 | if ev.String() != "2021-01-02T03:04:05.123Z\ttest_source\ttest message\n" { 68 | t.Errorf("unexpected string: %s", ev.String()) 69 | } 70 | } 71 | 72 | func TestTimeline(t *testing.T) { 73 | t.Setenv("TZ", "UTC") 74 | tl := tracer.NewTimeline() 75 | for _, ev := range testEvents { 76 | ev := ev 77 | tl.Add(&ev) 78 | } 79 | t.Run("Print(plaintext)", func(t *testing.T) { 80 | b := new(strings.Builder) 81 | n, err := tl.Print(b, false) 82 | if err != nil { 83 | t.Errorf("unexpected error: %v", err) 84 | } 85 | if n != len(expectedOutput) { 86 | t.Errorf("unexpected length: %d", n) 87 | } 88 | if b.String() != expectedOutput { 89 | t.Errorf("unexpected output: %s", b.String()) 90 | } 91 | }) 92 | t.Run("Print(json)", func(t *testing.T) { 93 | b := new(strings.Builder) 94 | n, err := tl.Print(b, true) 95 | if err != nil { 96 | t.Errorf("unexpected error: %v", err) 97 | } 98 | if n != len(expectedJSONOutput) { 99 | t.Errorf("unexpected length: %d", n) 100 | } 101 | if b.String() != expectedJSONOutput { 102 | t.Errorf("unexpected output: %s", b.String()) 103 | } 104 | }) 105 | } 106 | --------------------------------------------------------------------------------