├── assets
    └── podtrace-logo.png
├── embedded_bpf.go
├── test
    ├── pool-test
    │   ├── go.mod
    │   ├── go.sum
    │   └── pool-test-app.go
    ├── prometheus.yml
    ├── docker-compose.yml
    ├── README.md
    ├── quick-test.sh
    ├── cleanup-test-pods.sh
    ├── test-debug.sh
    ├── test-pods.yaml
    ├── setup-test-pods.sh
    ├── run-tests.sh
    ├── test-pods-full.yaml
    └── integration_test.go
├── internal
    ├── kubernetes
    │   ├── export_test.go
    │   ├── resolver.go
    │   ├── errors.go
    │   ├── errors_test.go
    │   ├── service_resolver.go
    │   └── events_correlator.go
    ├── ebpf
    │   ├── tracer_wrapper.go
    │   ├── tracer
    │   │   ├── interface.go
    │   │   ├── errors.go
    │   │   └── errors_test.go
    │   ├── loader
    │   │   ├── errors.go
    │   │   ├── loader.go
    │   │   ├── loader_test.go
    │   │   └── errors_test.go
    │   ├── probes
    │   │   └── errors.go
    │   ├── cache
    │   │   ├── pathcache.go
    │   │   ├── cache.go
    │   │   └── lru.go
    │   ├── tracer_wrapper_test.go
    │   └── parser
    │   │   └── parser.go
    ├── cri
    │   ├── jsonfind_test.go
    │   ├── jsonfind.go
    │   └── resolver_test.go
    ├── diagnose
    │   ├── analyzer
    │   │   ├── common.go
    │   │   ├── cpu.go
    │   │   ├── filesystem.go
    │   │   ├── dns.go
    │   │   ├── tls.go
    │   │   ├── network.go
    │   │   ├── pool.go
    │   │   └── tls_test.go
    │   ├── sampling.go
    │   ├── errors.go
    │   ├── profiling
    │   │   └── cpu_profiling_test.go
    │   ├── detector
    │   │   └── issues.go
    │   ├── formatter
    │   │   ├── formatter.go
    │   │   └── formatter_test.go
    │   ├── tracker
    │   │   ├── pod_communication_test.go
    │   │   ├── process.go
    │   │   ├── connection.go
    │   │   └── trace_tracker_test.go
    │   └── stacktrace
    │   │   └── stacktrace.go
    ├── alerting
    │   ├── rate_limiter.go
    │   ├── rate_limiter_test.go
    │   ├── deduplicator.go
    │   ├── sender.go
    │   ├── deduplicator_test.go
    │   ├── logger_hook.go
    │   ├── manager_test.go
    │   ├── webhook.go
    │   ├── splunk.go
    │   ├── alert.go
    │   └── manager.go
    ├── logger
    │   ├── logger_test.go
    │   └── logger.go
    └── tracing
    │   ├── extractor
    │       ├── http_test.go
    │       └── http.go
    │   ├── exporter
    │       ├── splunk.go
    │       └── otlp.go
    │   └── context
    │       └── context.go
├── bpf
    ├── podtrace.bpf.c
    ├── filesystem.h
    ├── events.h
    ├── common.h
    ├── memory.c
    ├── resources.c
    └── maps.h
├── .gitignore
├── scripts
    ├── build-and-setup.sh
    └── setup-capabilities.sh
├── cmd
    └── podtrace
    │   ├── interrupt_test.go
    │   ├── mocks.go
    │   ├── export_test.go
    │   ├── main_test.go
    │   └── diagnose_env.go
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── epic.yaml
    │   ├── feature_request.yaml
    │   ├── adopters.yaml
    │   └── bug_report.yaml
    └── workflows
    │   ├── bash-checks.yml
    │   ├── security.yml
    │   └── ebpf-build.yml
├── doc
    └── README.md
└── go.mod


/assets/podtrace-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gma1k/podtrace/HEAD/assets/podtrace-logo.png


--------------------------------------------------------------------------------
/embedded_bpf.go:
--------------------------------------------------------------------------------
1 | package podtrace
2 | 
3 | import _ "embed"
4 | 
5 | var EmbeddedPodtraceBPFObj []byte
6 | 


--------------------------------------------------------------------------------
/test/pool-test/go.mod:
--------------------------------------------------------------------------------
1 | module pool-test
2 | 
3 | go 1.21
4 | 
5 | require github.com/mattn/go-sqlite3 v1.14.32
6 | 


--------------------------------------------------------------------------------
/test/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 |   scrape_interval: 5s
3 | 
4 | scrape_configs:
5 |   - job_name: 'podtrace'
6 |     static_configs:
7 |       - targets: ['172.17.0.1:3000'] 
8 | 


--------------------------------------------------------------------------------
/test/pool-test/go.sum:
--------------------------------------------------------------------------------
1 | github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
2 | github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
3 | 


--------------------------------------------------------------------------------
/internal/kubernetes/export_test.go:
--------------------------------------------------------------------------------
 1 | package kubernetes
 2 | 
 3 | import "k8s.io/client-go/kubernetes"
 4 | 
 5 | func NewPodResolverForTesting(clientset kubernetes.Interface) *PodResolver {
 6 | 	return &PodResolver{
 7 | 		clientset: clientset,
 8 | 	}
 9 | }
10 | 
11 | 


--------------------------------------------------------------------------------
/internal/ebpf/tracer_wrapper.go:
--------------------------------------------------------------------------------
 1 | package ebpf
 2 | 
 3 | import (
 4 | 	"github.com/podtrace/podtrace/internal/ebpf/tracer"
 5 | )
 6 | 
 7 | type TracerInterface = tracer.TracerInterface
 8 | 
 9 | func NewTracer() (TracerInterface, error) {
10 | 	return tracer.NewTracer()
11 | }
12 | 
13 | func WaitForInterrupt() {
14 | 	tracer.WaitForInterrupt()
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/internal/ebpf/tracer/interface.go:
--------------------------------------------------------------------------------
 1 | package tracer
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/events"
 7 | )
 8 | 
 9 | type TracerInterface interface {
10 | 	AttachToCgroup(cgroupPath string) error
11 | 	SetContainerID(containerID string) error
12 | 	Start(ctx context.Context, eventChan chan<- *events.Event) error
13 | 	Stop() error
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/internal/kubernetes/resolver.go:
--------------------------------------------------------------------------------
 1 | package kubernetes
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"k8s.io/client-go/kubernetes"
 7 | )
 8 | 
 9 | type PodResolverInterface interface {
10 | 	ResolvePod(ctx context.Context, podName, namespace, containerName string) (*PodInfo, error)
11 | }
12 | 
13 | type ClientsetProvider interface {
14 | 	GetClientset() kubernetes.Interface
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/bpf/podtrace.bpf.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #include "common.h"
 4 | #include "maps.h"
 5 | #include "events.h"
 6 | #include "helpers.h"
 7 | 
 8 | #include "network.c"
 9 | #include "filesystem.c"
10 | #include "cpu.c"
11 | #include "memory.c"
12 | #include "syscalls.c"
13 | #include "resources.c"
14 | #include "database.c"
15 | 
16 | char LICENSE[] SEC("license") = "GPL";
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries
 2 | bin/
 3 | *.o
 4 | *.so
 5 | 
 6 | # Go
 7 | *.exe
 8 | *.test
 9 | *.prof
10 | vendor/
11 | 
12 | # eBPF
13 | bpf/*.o
14 | vmlinux.h
15 | 
16 | # IDE
17 | .idea/
18 | .vscode/
19 | *.swp
20 | *.swo
21 | 
22 | # OS
23 | .DS_Store
24 | Thumbs.db
25 | 
26 | # Build artifacts
27 | *.a
28 | 
29 | # Test coverage
30 | coverage.out
31 | *.coverprofile
32 | *.out
33 | test_output.log
34 | 


--------------------------------------------------------------------------------
/bpf/filesystem.h:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #ifndef PODTRACE_FILESYSTEM_H
 4 | #define PODTRACE_FILESYSTEM_H
 5 | 
 6 | #include "common.h"
 7 | #include "maps.h"
 8 | 
 9 | static inline int get_path_str_from_file(struct file *file, char *out_buf, u32 buf_size)
10 | {
11 |     if (file == NULL || out_buf == NULL || buf_size < 2)
12 |         return 0;
13 | 
14 |     struct path path;
15 |     bpf_core_read(&path, sizeof(path), &file->f_path);
16 |     
17 |     struct dentry *dentry;
18 |     bpf_core_read(&dentry, sizeof(dentry), &path.dentry);
19 |     if (dentry == NULL)
20 |         return 0;
21 | 
22 |     out_buf[0] = '\0';
23 |     return 0;
24 | }
25 | 
26 | #endif
27 | 
28 | 


--------------------------------------------------------------------------------
/internal/ebpf/loader/errors.go:
--------------------------------------------------------------------------------
 1 | package loader
 2 | 
 3 | import "fmt"
 4 | 
 5 | type ErrorCode int
 6 | 
 7 | const (
 8 | 	ErrCodeLoadFailed ErrorCode = iota + 1
 9 | )
10 | 
11 | type LoaderError struct {
12 | 	Code    ErrorCode
13 | 	Message string
14 | 	Err     error
15 | }
16 | 
17 | func (e *LoaderError) Error() string {
18 | 	if e.Err != nil {
19 | 		return fmt.Sprintf("%s: %v", e.Message, e.Err)
20 | 	}
21 | 	return e.Message
22 | }
23 | 
24 | func (e *LoaderError) Unwrap() error {
25 | 	return e.Err
26 | }
27 | 
28 | func NewLoadError(path string, err error) *LoaderError {
29 | 	return &LoaderError{
30 | 		Code:    ErrCodeLoadFailed,
31 | 		Message: fmt.Sprintf("failed to load eBPF program from %s", path),
32 | 		Err:     err,
33 | 	}
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/internal/ebpf/probes/errors.go:
--------------------------------------------------------------------------------
 1 | package probes
 2 | 
 3 | import "fmt"
 4 | 
 5 | type ErrorCode int
 6 | 
 7 | const (
 8 | 	ErrCodeProbeAttachFailed ErrorCode = iota + 1
 9 | )
10 | 
11 | type ProbeError struct {
12 | 	Code    ErrorCode
13 | 	Message string
14 | 	Err     error
15 | }
16 | 
17 | func (e *ProbeError) Error() string {
18 | 	if e.Err != nil {
19 | 		return fmt.Sprintf("%s: %v", e.Message, e.Err)
20 | 	}
21 | 	return e.Message
22 | }
23 | 
24 | func (e *ProbeError) Unwrap() error {
25 | 	return e.Err
26 | }
27 | 
28 | func NewProbeAttachError(probeName string, err error) *ProbeError {
29 | 	return &ProbeError{
30 | 		Code:    ErrCodeProbeAttachFailed,
31 | 		Message: fmt.Sprintf("failed to attach probe %s", probeName),
32 | 		Err:     err,
33 | 	}
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/test/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.8'
 2 | 
 3 | # WARNING: This docker-compose file contains hardcoded test credentials (admin/admin)
 4 | # DO NOT USE IN PRODUCTION - These credentials are for testing purposes only
 5 | # In production, use environment variables or secrets management for credentials
 6 | 
 7 | services:
 8 |   prometheus:
 9 |     image: prom/prometheus:latest
10 |     container_name: prometheus
11 |     volumes:
12 |       - ./prometheus.yml:/etc/prometheus/prometheus.yml
13 |     ports:
14 |       - "9090:9090"
15 | 
16 |   grafana:
17 |     image: grafana/grafana:latest
18 |     container_name: grafana
19 |     environment:
20 |       - GF_SECURITY_ADMIN_USER=admin
21 |       - GF_SECURITY_ADMIN_PASSWORD=admin
22 |     ports:
23 |       - "3001:3000" 
24 | 


--------------------------------------------------------------------------------
/internal/cri/jsonfind_test.go:
--------------------------------------------------------------------------------
 1 | package cri
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestFindJSONHelpers(t *testing.T) {
 6 | 	obj := map[string]any{
 7 | 		"pid": float64(123),
 8 | 		"runtimeSpec": map[string]any{
 9 | 			"linux": map[string]any{
10 | 				"cgroupsPath": "/kubepods.slice/test.scope",
11 | 			},
12 | 		},
13 | 	}
14 | 
15 | 	if pid, ok := findJSONInt(obj, []string{"pid"}); !ok || pid != 123 {
16 | 		t.Fatalf("expected pid=123, got pid=%d ok=%v", pid, ok)
17 | 	}
18 | 
19 | 	if cg, ok := findJSONString(obj, []string{"runtimeSpec.linux.cgroupsPath"}); !ok || cg != "/kubepods.slice/test.scope" {
20 | 		t.Fatalf("expected cgroupsPath, got %q ok=%v", cg, ok)
21 | 	}
22 | 
23 | 	if _, ok := findJSONString(obj, []string{"missing.path"}); ok {
24 | 		t.Fatalf("expected missing path to return ok=false")
25 | 	}
26 | }
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/common.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | )
 8 | 
 9 | func Percentile(sorted []float64, p float64) float64 {
10 | 	if len(sorted) == 0 {
11 | 		return 0
12 | 	}
13 | 	index := int(float64(len(sorted)-1) * p / 100)
14 | 	return sorted[index]
15 | }
16 | 
17 | func FormatBytes(bytes uint64) string {
18 | 	if bytes < config.KB {
19 | 		return fmt.Sprintf("%d B", bytes)
20 | 	} else if bytes < config.MB {
21 | 		return fmt.Sprintf("%.2f KB", float64(bytes)/float64(config.KB))
22 | 	} else if bytes < config.GB {
23 | 		return fmt.Sprintf("%.2f MB", float64(bytes)/float64(config.MB))
24 | 	} else {
25 | 		return fmt.Sprintf("%.2f GB", float64(bytes)/float64(config.GB))
26 | 	}
27 | }
28 | 
29 | type TargetCount struct {
30 | 	Target string
31 | 	Count  int
32 | }
33 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/cpu.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func AnalyzeCPU(events []*events.Event) (avgBlock, maxBlock float64, p50, p95, p99 float64) {
11 | 	var totalBlock float64
12 | 	var blocks []float64
13 | 	maxBlock = 0
14 | 
15 | 	for _, e := range events {
16 | 		blockMs := float64(e.LatencyNS) / float64(config.NSPerMS)
17 | 		blocks = append(blocks, blockMs)
18 | 		totalBlock += blockMs
19 | 		if blockMs > maxBlock {
20 | 			maxBlock = blockMs
21 | 		}
22 | 	}
23 | 
24 | 	if len(events) > 0 {
25 | 		avgBlock = totalBlock / float64(len(events))
26 | 		sort.Float64s(blocks)
27 | 		p50 = Percentile(blocks, 50)
28 | 		p95 = Percentile(blocks, 95)
29 | 		p99 = Percentile(blocks, 99)
30 | 	}
31 | 	return
32 | }
33 | 


--------------------------------------------------------------------------------
/internal/ebpf/loader/loader.go:
--------------------------------------------------------------------------------
 1 | package loader
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 
 6 | 	"github.com/cilium/ebpf"
 7 | 
 8 | 	podtrace "github.com/podtrace/podtrace"
 9 | 	"github.com/podtrace/podtrace/internal/config"
10 | )
11 | 
12 | func LoadPodtrace() (*ebpf.CollectionSpec, error) {
13 | 	spec, err := ebpf.LoadCollectionSpec(config.BPFObjectPath)
14 | 	if err != nil {
15 | 		spec, err = ebpf.LoadCollectionSpec("../" + config.BPFObjectPath)
16 | 		if err != nil {
17 | 			if config.BPFObjectPath == "bpf/podtrace.bpf.o" && len(podtrace.EmbeddedPodtraceBPFObj) > 0 {
18 | 				if embeddedSpec, embeddedErr := ebpf.LoadCollectionSpecFromReader(bytes.NewReader(podtrace.EmbeddedPodtraceBPFObj)); embeddedErr == nil {
19 | 					return embeddedSpec, nil
20 | 				}
21 | 			}
22 | 			return nil, NewLoadError(config.BPFObjectPath, err)
23 | 		}
24 | 	}
25 | 
26 | 	return spec, nil
27 | }
28 | 


--------------------------------------------------------------------------------
/scripts/build-and-setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Build podtrace and automatically set capabilities
 3 | 
 4 | set -e
 5 | 
 6 | ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
 7 | 
 8 | build_podtrace() {
 9 | 	echo "Building podtrace..."
10 | 	cd "${ROOT_DIR}"
11 | 
12 | 	make clean
13 | 	make build
14 | 
15 | 	if [[ ! -f "./bin/podtrace" ]]; then
16 | 		echo "Error: Build failed - bin/podtrace not found"
17 | 		exit 1
18 | 	fi
19 | }
20 | 
21 | set_capabilities() {
22 | 	echo ""
23 | 	echo "Setting capabilities..."
24 | 	if sudo ./scripts/setup-capabilities.sh; then
25 | 		echo ""
26 | 		echo "Build and setup complete!"
27 | 		echo ""
28 | 		echo "You can now run podtrace:"
29 | 		echo "  ./bin/podtrace -n <namespace> <pod-name>"
30 | 	else
31 | 		echo ""
32 | 		echo "Build succeeded but failed to set capabilities."
33 | 		echo "Run manually: sudo ./scripts/setup-capabilities.sh"
34 | 		exit 1
35 | 	fi
36 | }
37 | 
38 | main() {
39 | 	build_podtrace
40 | 	set_capabilities
41 | }
42 | 
43 | main "$@"
44 | 


--------------------------------------------------------------------------------
/internal/alerting/rate_limiter.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | type RateLimiter struct {
 9 | 	limit     int
10 | 	window    time.Duration
11 | 	counts    []time.Time
12 | 	mu        sync.Mutex
13 | }
14 | 
15 | func NewRateLimiter(limitPerMinute int) *RateLimiter {
16 | 	return &RateLimiter{
17 | 		limit:  limitPerMinute,
18 | 		window: time.Minute,
19 | 		counts: make([]time.Time, 0, limitPerMinute),
20 | 	}
21 | }
22 | 
23 | func (rl *RateLimiter) Allow() bool {
24 | 	rl.mu.Lock()
25 | 	defer rl.mu.Unlock()
26 | 	now := time.Now()
27 | 	cutoff := now.Add(-rl.window)
28 | 	validCounts := make([]time.Time, 0, rl.limit)
29 | 	for _, t := range rl.counts {
30 | 		if t.After(cutoff) {
31 | 			validCounts = append(validCounts, t)
32 | 		}
33 | 	}
34 | 	if len(validCounts) >= rl.limit {
35 | 		return false
36 | 	}
37 | 	validCounts = append(validCounts, now)
38 | 	rl.counts = validCounts
39 | 	return true
40 | }
41 | 
42 | func (rl *RateLimiter) Reset() {
43 | 	rl.mu.Lock()
44 | 	defer rl.mu.Unlock()
45 | 	rl.counts = make([]time.Time, 0, rl.limit)
46 | }
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
 1 | # podtrace Test Environment
 2 | 
 3 | This directory contains test pods and scripts to test the podtrace CLI.
 4 | 
 5 | ## Quick Start
 6 | 
 7 | ### 1. Setup Test Pods
 8 | 
 9 | ```bash
10 | cd /path/to/podtrace/test
11 | ./setup-test-pods.sh
12 | ```
13 | 
14 | This will:
15 | - Create a `podtrace-test` namespace
16 | - Deploy 3 test pods:
17 |   - `nginx-test`
18 |   - `busybox-test`
19 |   - `alpine-test`
20 | 
21 | ### 2. Test podtrace
22 | 
23 | ```bash
24 | sudo ./bin/podtrace -n podtrace-test nginx-test
25 | ```
26 | 
27 | ### 3. Cleanup
28 | 
29 | ```bash
30 | ./cleanup-test-pods.sh
31 | ```
32 | 
33 | ## Automated Test Runner
34 | 
35 | Run all tests automatically:
36 | 
37 | ```bash
38 | ./test/run-tests.sh
39 | ```
40 | 
41 | ## Files
42 | 
43 | - `test-pods.yaml` - Kubernetes manifests for test pods
44 | - `setup-test-pods.sh` - Script to create test environment
45 | - `cleanup-test-pods.sh` - Script to clean up test environment
46 | - `run-tests.sh` - Automated test runner
47 | - `quick-test.sh` - Quick test script
48 | - `test-debug.sh` - Debug test script
49 | - `test-cpu-usage.sh` - CPU usage test script
50 | 


--------------------------------------------------------------------------------
/bpf/events.h:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #ifndef PODTRACE_EVENTS_H
 4 | #define PODTRACE_EVENTS_H
 5 | 
 6 | #include "common.h"
 7 | 
 8 | enum event_type {
 9 | 	EVENT_DNS,
10 | 	EVENT_CONNECT,
11 | 	EVENT_TCP_SEND,
12 | 	EVENT_TCP_RECV,
13 | 	EVENT_WRITE,
14 | 	EVENT_READ,
15 | 	EVENT_FSYNC,
16 | 	EVENT_SCHED_SWITCH,
17 | 	EVENT_TCP_STATE,
18 | 	EVENT_PAGE_FAULT,
19 | 	EVENT_OOM_KILL,
20 | 	EVENT_UDP_SEND,
21 | 	EVENT_UDP_RECV,
22 | 	EVENT_HTTP_REQ,
23 | 	EVENT_HTTP_RESP,
24 | 	EVENT_LOCK_CONTENTION,
25 | 	EVENT_TCP_RETRANS,
26 | 	EVENT_NET_DEV_ERROR,
27 | 	EVENT_DB_QUERY,
28 | 	EVENT_EXEC,
29 | 	EVENT_FORK,
30 | 	EVENT_OPEN,
31 | 	EVENT_CLOSE,
32 | 	EVENT_TLS_HANDSHAKE,
33 | 	EVENT_TLS_ERROR,
34 | 	EVENT_RESOURCE_LIMIT,
35 | 	EVENT_POOL_ACQUIRE,
36 | 	EVENT_POOL_RELEASE,
37 | 	EVENT_POOL_EXHAUSTED,
38 | };
39 | 
40 | struct event {
41 | 	u64 timestamp;
42 | 	u32 pid;
43 | 	u32 type;
44 | 	u64 latency_ns;
45 | 	s32 error;
46 | 	u64 bytes;
47 | 	u32 tcp_state;
48 | 	u64 stack_key;
49 | 	u64 cgroup_id;
50 | 	char comm[COMM_LEN];
51 | 	char target[MAX_STRING_LEN];
52 | 	char details[MAX_STRING_LEN];
53 | };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/internal/cri/jsonfind.go:
--------------------------------------------------------------------------------
 1 | package cri
 2 | 
 3 | import (
 4 | 	"strings"
 5 | )
 6 | 
 7 | func findJSONString(obj any, keyPaths []string) (string, bool) {
 8 | 	for _, kp := range keyPaths {
 9 | 		if v, ok := findJSONValue(obj, strings.Split(kp, ".")); ok {
10 | 			if s, ok := v.(string); ok && s != "" {
11 | 				return s, true
12 | 			}
13 | 		}
14 | 	}
15 | 	return "", false
16 | }
17 | 
18 | func findJSONInt(obj any, keys []string) (int64, bool) {
19 | 	for _, k := range keys {
20 | 		if v, ok := findJSONValue(obj, strings.Split(k, ".")); ok {
21 | 			switch t := v.(type) {
22 | 			case float64:
23 | 				return int64(t), true
24 | 			case int64:
25 | 				return t, true
26 | 			case int:
27 | 				return int64(t), true
28 | 			}
29 | 		}
30 | 	}
31 | 	return 0, false
32 | }
33 | 
34 | func findJSONValue(obj any, path []string) (any, bool) {
35 | 	if len(path) == 0 {
36 | 		return obj, true
37 | 	}
38 | 	cur := obj
39 | 	for _, p := range path {
40 | 		m, ok := cur.(map[string]any)
41 | 		if !ok {
42 | 			return nil, false
43 | 		}
44 | 		next, ok := m[p]
45 | 		if !ok {
46 | 			return nil, false
47 | 		}
48 | 		cur = next
49 | 	}
50 | 	return cur, true
51 | }
52 | 


--------------------------------------------------------------------------------
/test/quick-test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | NAMESPACE="podtrace-test"
 5 | POD_NAME="${1:-nginx-cpu-test}"
 6 | DURATION="${2:-20s}"
 7 | 
 8 | log() {
 9 | 	echo "$@"
10 | }
11 | 
12 | check_pod_exists() {
13 | 	local pod="$1"
14 | 	local ns="$2"
15 | 
16 | 	if ! kubectl get pod "${pod}" -n "${ns}" &>/dev/null; then
17 | 		log "Error: Pod ${pod} not found in namespace ${ns}"
18 | 		log "Available pods:"
19 | 		kubectl get pods -n "${ns}" || log "Namespace ${ns} not found"
20 | 		exit 1
21 | 	fi
22 | }
23 | 
24 | check_podtrace_binary() {
25 | 	if [[ ! -f "./bin/podtrace" ]]; then
26 | 		log "Error: ./bin/podtrace not found. Run 'make build' first."
27 | 		exit 1
28 | 	fi
29 | }
30 | 
31 | run_diagnose() {
32 | 	local pod="$1"
33 | 	local ns="$2"
34 | 	local duration="$3"
35 | 
36 | 	log "Running diagnose mode..."
37 | 	./bin/podtrace -n "${ns}" "${pod}" --diagnose "${duration}"
38 | }
39 | 
40 | main() {
41 | 	log "=== Testing podtrace on ${POD_NAME} for ${DURATION} ==="
42 | 	log ""
43 | 
44 | 	check_pod_exists "${POD_NAME}" "${NAMESPACE}"
45 | 	check_podtrace_binary
46 | 	run_diagnose "${POD_NAME}" "${NAMESPACE}" "${DURATION}"
47 | }
48 | 
49 | main
50 | 


--------------------------------------------------------------------------------
/cmd/podtrace/interrupt_test.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | 	"time"
 7 | )
 8 | 
 9 | func TestInterruptChan(t *testing.T) {
10 | 	ch := interruptChan()
11 | 	if ch == nil {
12 | 		t.Fatal("interruptChan returned nil channel")
13 | 	}
14 | 
15 | 	go func() {
16 | 		time.Sleep(50 * time.Millisecond)
17 | 		proc, _ := os.FindProcess(os.Getpid())
18 | 		_ = proc.Signal(os.Interrupt)
19 | 	}()
20 | 
21 | 	select {
22 | 	case sig := <-ch:
23 | 		if sig != os.Interrupt {
24 | 			t.Errorf("Expected os.Interrupt, got %v", sig)
25 | 		}
26 | 	case <-time.After(1 * time.Second):
27 | 		t.Error("interruptChan did not receive signal in time")
28 | 	}
29 | }
30 | 
31 | func TestInterruptChan_PanicRecovery(t *testing.T) {
32 | 	ch := interruptChan()
33 | 	if ch == nil {
34 | 		t.Fatal("interruptChan returned nil channel")
35 | 	}
36 | 
37 | 	go func() {
38 | 		time.Sleep(10 * time.Millisecond)
39 | 		proc, _ := os.FindProcess(os.Getpid())
40 | 		_ = proc.Signal(os.Interrupt)
41 | 	}()
42 | 
43 | 	select {
44 | 	case <-ch:
45 | 	case <-time.After(1 * time.Second):
46 | 		t.Error("interruptChan did not receive signal in time")
47 | 	}
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/test/cleanup-test-pods.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Cleanup script for podtrace test pods
 3 | 
 4 | set -e
 5 | 
 6 | NAMESPACE="podtrace-test"
 7 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 8 | 
 9 | GREEN='\033[0;32m'
10 | # YELLOW='\033[1;33m'
11 | RED='\033[0;31m'
12 | NC='\033[0m'
13 | 
14 | print_header() {
15 | 	echo "=== Cleaning up podtrace test environment ==="
16 | 	echo ""
17 | }
18 | 
19 | check_kubectl() {
20 | 	if ! command -v kubectl &>/dev/null; then
21 | 		echo -e "${RED}Error: kubectl is not installed${NC}"
22 | 		exit 1
23 | 	fi
24 | }
25 | 
26 | delete_resources() {
27 | 	echo "Deleting test pods and namespace..."
28 | 	kubectl delete -f "${SCRIPT_DIR}/test-pods.yaml" --ignore-not-found=true
29 | }
30 | 
31 | wait_for_namespace_deletion() {
32 | 	echo ""
33 | 	echo "Waiting for namespace to be deleted..."
34 | 	kubectl wait --for=delete namespace/"${NAMESPACE}" --timeout=60s 2>/dev/null || true
35 | }
36 | 
37 | print_success() {
38 | 	echo ""
39 | 	echo -e "${GREEN}✓ Cleanup completed${NC}"
40 | }
41 | 
42 | main() {
43 | 	print_header
44 | 	check_kubectl
45 | 	delete_resources
46 | 	wait_for_namespace_deletion
47 | 	print_success
48 | }
49 | 
50 | main "$@"
51 | 


--------------------------------------------------------------------------------
/internal/alerting/rate_limiter_test.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestRateLimiter_Allow(t *testing.T) {
 9 | 	rl := NewRateLimiter(5)
10 | 	for i := 0; i < 5; i++ {
11 | 		if !rl.Allow() {
12 | 			t.Errorf("Request %d should be allowed", i+1)
13 | 		}
14 | 	}
15 | 	if rl.Allow() {
16 | 		t.Error("Request 6 should not be allowed (rate limit exceeded)")
17 | 	}
18 | }
19 | 
20 | func TestRateLimiter_Allow_TimeWindow(t *testing.T) {
21 | 	rl := NewRateLimiter(2)
22 | 	if !rl.Allow() {
23 | 		t.Error("First request should be allowed")
24 | 	}
25 | 	if !rl.Allow() {
26 | 		t.Error("Second request should be allowed")
27 | 	}
28 | 	if rl.Allow() {
29 | 		t.Error("Third request should not be allowed")
30 | 	}
31 | 	time.Sleep(61 * time.Second)
32 | 	if !rl.Allow() {
33 | 		t.Error("Request after window should be allowed")
34 | 	}
35 | }
36 | 
37 | func TestRateLimiter_Reset(t *testing.T) {
38 | 	rl := NewRateLimiter(2)
39 | 	rl.Allow()
40 | 	rl.Allow()
41 | 	if rl.Allow() {
42 | 		t.Error("Request should not be allowed after limit")
43 | 	}
44 | 	rl.Reset()
45 | 	if !rl.Allow() {
46 | 		t.Error("Request should be allowed after reset")
47 | 	}
48 | }
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/internal/alerting/deduplicator.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | type AlertDeduplicator struct {
 9 | 	seenAlerts map[string]time.Time
10 | 	window     time.Duration
11 | 	mu         sync.RWMutex
12 | }
13 | 
14 | func NewAlertDeduplicator(window time.Duration) *AlertDeduplicator {
15 | 	return &AlertDeduplicator{
16 | 		seenAlerts: make(map[string]time.Time),
17 | 		window:     window,
18 | 	}
19 | }
20 | 
21 | func (d *AlertDeduplicator) ShouldSend(alert *Alert) bool {
22 | 	if alert == nil {
23 | 		return false
24 | 	}
25 | 	key := alert.Key()
26 | 	d.mu.Lock()
27 | 	defer d.mu.Unlock()
28 | 	if lastSent, exists := d.seenAlerts[key]; exists {
29 | 		if time.Since(lastSent) < d.window {
30 | 			return false
31 | 		}
32 | 	}
33 | 	d.seenAlerts[key] = time.Now()
34 | 	return true
35 | }
36 | 
37 | func (d *AlertDeduplicator) Cleanup(olderThan time.Duration) {
38 | 	d.mu.Lock()
39 | 	defer d.mu.Unlock()
40 | 	now := time.Now()
41 | 	for key, timestamp := range d.seenAlerts {
42 | 		if now.Sub(timestamp) > olderThan {
43 | 			delete(d.seenAlerts, key)
44 | 		}
45 | 	}
46 | }
47 | 
48 | func (d *AlertDeduplicator) Reset() {
49 | 	d.mu.Lock()
50 | 	defer d.mu.Unlock()
51 | 	d.seenAlerts = make(map[string]time.Time)
52 | }
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/filesystem.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func AnalyzeFS(events []*events.Event, fsSlowThreshold float64) (avgLatency, maxLatency float64, slowOps int, p50, p95, p99 float64, totalBytes, avgBytes uint64) {
11 | 	var totalLatency float64
12 | 	var latencies []float64
13 | 	maxLatency = 0
14 | 	slowOps = 0
15 | 	totalBytes = 0
16 | 
17 | 	for _, e := range events {
18 | 		latencyMs := float64(e.LatencyNS) / float64(config.NSPerMS)
19 | 		latencies = append(latencies, latencyMs)
20 | 		totalLatency += latencyMs
21 | 		if latencyMs > maxLatency {
22 | 			maxLatency = latencyMs
23 | 		}
24 | 		if latencyMs > fsSlowThreshold {
25 | 			slowOps++
26 | 		}
27 | 		if e.Bytes > 0 && e.Bytes < uint64(config.MaxBytesForBandwidth) {
28 | 			totalBytes += e.Bytes
29 | 		}
30 | 	}
31 | 
32 | 	if len(events) > 0 {
33 | 		avgLatency = totalLatency / float64(len(events))
34 | 		sort.Float64s(latencies)
35 | 		p50 = Percentile(latencies, 50)
36 | 		p95 = Percentile(latencies, 95)
37 | 		p99 = Percentile(latencies, 99)
38 | 		if totalBytes > 0 {
39 | 			avgBytes = totalBytes / uint64(len(events))
40 | 		}
41 | 	}
42 | 	return
43 | }
44 | 


--------------------------------------------------------------------------------
/internal/ebpf/loader/loader_test.go:
--------------------------------------------------------------------------------
 1 | package loader
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/podtrace/podtrace/internal/config"
 8 | )
 9 | 
10 | func TestLoadPodtrace_ExplicitPathIsStrict(t *testing.T) {
11 | 	originalPath := config.BPFObjectPath
12 | 	defer func() { config.BPFObjectPath = originalPath }()
13 | 
14 | 	config.BPFObjectPath = "/nonexistent/path/to/bpf.o"
15 | 	spec, err := LoadPodtrace()
16 | 	if err == nil {
17 | 		t.Fatalf("expected error for explicit non-existent path, got nil")
18 | 	}
19 | 	if spec != nil {
20 | 		t.Fatalf("expected nil spec on error, got non-nil")
21 | 	}
22 | }
23 | 
24 | func TestLoadPodtrace_DefaultPathFallsBackToEmbedded(t *testing.T) {
25 | 	originalPath := config.BPFObjectPath
26 | 	defer func() { config.BPFObjectPath = originalPath }()
27 | 
28 | 	oldWD, err := os.Getwd()
29 | 	if err != nil {
30 | 		t.Fatalf("Getwd: %v", err)
31 | 	}
32 | 	defer func() { _ = os.Chdir(oldWD) }()
33 | 
34 | 	emptyWD := t.TempDir()
35 | 	if err := os.Chdir(emptyWD); err != nil {
36 | 		t.Fatalf("Chdir: %v", err)
37 | 	}
38 | 
39 | 	config.BPFObjectPath = "bpf/podtrace.bpf.o"
40 | 	spec, err := LoadPodtrace()
41 | 	if err != nil {
42 | 		t.Skipf("BPF object not available in test environment: %v", err)
43 | 	}
44 | 	if spec == nil {
45 | 		t.Fatalf("expected non-nil spec from embedded fallback")
46 | 	}
47 | }
48 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/dns.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func AnalyzeDNS(events []*events.Event) (avgLatency, maxLatency float64, errors int, p50, p95, p99 float64, topTargets []TargetCount) {
11 | 	var totalLatency float64
12 | 	var latencies []float64
13 | 	maxLatency = 0
14 | 	errors = 0
15 | 	targetMap := make(map[string]int)
16 | 
17 | 	for _, e := range events {
18 | 		latencyMs := float64(e.LatencyNS) / float64(config.NSPerMS)
19 | 		latencies = append(latencies, latencyMs)
20 | 		totalLatency += latencyMs
21 | 		if latencyMs > maxLatency {
22 | 			maxLatency = latencyMs
23 | 		}
24 | 		if e.Error != 0 {
25 | 			errors++
26 | 		}
27 | 		if e.Target != "" && e.Target != "?" {
28 | 			targetMap[e.Target]++
29 | 		}
30 | 	}
31 | 
32 | 	if len(events) > 0 {
33 | 		avgLatency = totalLatency / float64(len(events))
34 | 		sort.Float64s(latencies)
35 | 		p50 = Percentile(latencies, 50)
36 | 		p95 = Percentile(latencies, 95)
37 | 		p99 = Percentile(latencies, 99)
38 | 	}
39 | 
40 | 	for target, count := range targetMap {
41 | 		topTargets = append(topTargets, TargetCount{target, count})
42 | 	}
43 | 	sort.Slice(topTargets, func(i, j int) bool {
44 | 		return topTargets[i].Count > topTargets[j].Count
45 | 	})
46 | 
47 | 	return
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/epic.yaml:
--------------------------------------------------------------------------------
 1 | name: Epic
 2 | description: For tracking a large feature, including how to demo it.
 3 | title: "epic: "
 4 | labels:
 5 |   - epic
 6 | body:
 7 |   - type: textarea
 8 |     id: objective
 9 |     attributes:
10 |       label: Demo Objective
11 |       description: Please describe the objective of your demo.
12 |       placeholder: |
13 |         - [ ] User should be able to ...
14 |         - [ ] ...
15 |     validations:
16 |       required: true
17 | 
18 |   - type: textarea
19 |     id: steps
20 |     attributes:
21 |       label: Demo Steps
22 |       description: Please describe the steps for the demo.
23 |       placeholder: |
24 |         1. Admin does X
25 |         1. User does Y
26 |         1. Everyone is happy :)
27 | 
28 |   - type: checkboxes
29 |     id: action-items
30 |     attributes:
31 |       label: Action Items
32 |       description: Please check the following
33 |       options:
34 |         - label: Scope of the current demo is necessary to fit in the prototype boundaries
35 |           required: true
36 |         - label: Contribute to the final demo script and recording
37 | 
38 |   - type: textarea
39 |     id: stories
40 |     attributes:
41 |       label: Stories
42 |       placeholder: |
43 |         - [ ] (Example) **stretch-goal:** Add Widgets to `podtrace` CLI
44 |         - Out-of-scope (prototype x): Send Widgets to space
45 |     validations:
46 |       required: false
47 | 


--------------------------------------------------------------------------------
/internal/ebpf/tracer/errors.go:
--------------------------------------------------------------------------------
 1 | package tracer
 2 | 
 3 | import "fmt"
 4 | 
 5 | type ErrorCode int
 6 | 
 7 | const (
 8 | 	ErrCodeCollectionFailed ErrorCode = iota + 1
 9 | 	ErrCodeRingBufferFailed
10 | 	ErrCodeMapLookupFailed
11 | 	ErrCodeInvalidEvent
12 | )
13 | 
14 | type TracerError struct {
15 | 	Code    ErrorCode
16 | 	Message string
17 | 	Err     error
18 | }
19 | 
20 | func (e *TracerError) Error() string {
21 | 	if e.Err != nil {
22 | 		return fmt.Sprintf("%s: %v", e.Message, e.Err)
23 | 	}
24 | 	return e.Message
25 | }
26 | 
27 | func (e *TracerError) Unwrap() error {
28 | 	return e.Err
29 | }
30 | 
31 | func NewCollectionError(err error) *TracerError {
32 | 	return &TracerError{
33 | 		Code:    ErrCodeCollectionFailed,
34 | 		Message: "failed to create eBPF collection",
35 | 		Err:     err,
36 | 	}
37 | }
38 | 
39 | func NewRingBufferError(err error) *TracerError {
40 | 	return &TracerError{
41 | 		Code:    ErrCodeRingBufferFailed,
42 | 		Message: "failed to create ring buffer reader",
43 | 		Err:     err,
44 | 	}
45 | }
46 | 
47 | func NewMapLookupError(mapName string, err error) *TracerError {
48 | 	return &TracerError{
49 | 		Code:    ErrCodeMapLookupFailed,
50 | 		Message: fmt.Sprintf("failed to lookup map %s", mapName),
51 | 		Err:     err,
52 | 	}
53 | }
54 | 
55 | func NewInvalidEventError(reason string) *TracerError {
56 | 	return &TracerError{
57 | 		Code:    ErrCodeInvalidEvent,
58 | 		Message: fmt.Sprintf("invalid event: %s", reason),
59 | 	}
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
 1 | # Podtrace Documentation
 2 | 
 3 | Welcome to the `Podtrace` documentation. This directory contains comprehensive guides for using, understanding, and developing with `Podtrace`.
 4 | 
 5 | ## Documentation Index
 6 | 
 7 | - **[Architecture](architecture.md)** - System architecture, components, and data flow
 8 | - **[Installation](installation.md)** - Installation guide, prerequisites, and troubleshooting
 9 | - **[Usage Guide](usage.md)** - Usage examples, command-line options, and tips
10 | - **[eBPF Internals](ebpf-internals.md)** - Deep dive into eBPF programs and tracing mechanisms
11 | - **[Metrics](metrics.md)** - Prometheus metrics, Grafana integration, and query examples
12 | - **[Development](development.md)** - Development guide, code structure, testing, and contributing
13 | - **[Distributed Tracing Guide](distributed-tracing.md)** - Complete distributed tracing user guide
14 | - **[Tracing Exporters Setup](tracing-exporters.md)** - Detailed exporter configuration (OTLP, Jaeger, Splunk)
15 | 
16 | ## Quick Start
17 | 
18 | 1. **New to Podtrace?** Start with [Installation](installation.md) and [Usage Guide](usage.md)
19 | 2. **Want to understand how it works?** Read [Architecture](architecture.md)
20 | 3. **Need to integrate metrics?** Check [Metrics](metrics.md)
21 | 4. **Setting up distributed tracing?** See [Distributed Tracing Guide](distributed-tracing.md) and [Tracing Exporters Setup](tracing-exporters.md)
22 | 5. **Contributing?** See [Development](development.md)


--------------------------------------------------------------------------------
/internal/ebpf/cache/pathcache.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | 
 7 | 	"github.com/podtrace/podtrace/internal/config"
 8 | )
 9 | 
10 | type pathCacheEntry struct {
11 | 	path      string
12 | 	timestamp time.Time
13 | }
14 | 
15 | type PathCache struct {
16 | 	mu    sync.RWMutex
17 | 	cache map[string]*pathCacheEntry
18 | 	ttl   time.Duration
19 | }
20 | 
21 | func NewPathCache() *PathCache {
22 | 	ttl := time.Duration(config.CacheTTLSeconds) * time.Second
23 | 	return &PathCache{
24 | 		cache: make(map[string]*pathCacheEntry),
25 | 		ttl:   ttl,
26 | 	}
27 | }
28 | 
29 | func (pc *PathCache) Get(key string) (string, bool) {
30 | 	pc.mu.RLock()
31 | 	defer pc.mu.RUnlock()
32 | 	entry, ok := pc.cache[key]
33 | 	if !ok {
34 | 		return "", false
35 | 	}
36 | 	if time.Since(entry.timestamp) > pc.ttl {
37 | 		return "", false
38 | 	}
39 | 	return entry.path, true
40 | }
41 | 
42 | func (pc *PathCache) Set(key, path string) {
43 | 	if path == "" {
44 | 		return
45 | 	}
46 | 	pc.mu.Lock()
47 | 	defer pc.mu.Unlock()
48 | 	pc.cache[key] = &pathCacheEntry{
49 | 		path:      path,
50 | 		timestamp: time.Now(),
51 | 	}
52 | }
53 | 
54 | func (pc *PathCache) Clear() {
55 | 	pc.mu.Lock()
56 | 	defer pc.mu.Unlock()
57 | 	pc.cache = make(map[string]*pathCacheEntry)
58 | }
59 | 
60 | func (pc *PathCache) CleanupExpired() {
61 | 	pc.mu.Lock()
62 | 	defer pc.mu.Unlock()
63 | 	now := time.Now()
64 | 	for key, entry := range pc.cache {
65 | 		if now.Sub(entry.timestamp) > pc.ttl {
66 | 			delete(pc.cache, key)
67 | 		}
68 | 	}
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/internal/ebpf/tracer_wrapper_test.go:
--------------------------------------------------------------------------------
 1 | package ebpf
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"testing"
 6 | 	"time"
 7 | 
 8 | 	"github.com/podtrace/podtrace/internal/events"
 9 | )
10 | 
11 | func TestNewTracer(t *testing.T) {
12 | 	tracer, err := NewTracer()
13 | 	if err == nil && tracer == nil {
14 | 		t.Log("NewTracer returned nil tracer without error (expected for non-existent BPF object)")
15 | 	}
16 | 	if err != nil {
17 | 		t.Logf("NewTracer returned error as expected: %v", err)
18 | 	}
19 | }
20 | 
21 | func TestWaitForInterrupt(t *testing.T) {
22 | 	done := make(chan bool, 1)
23 | 	go func() {
24 | 		defer func() {
25 | 			if r := recover(); r != nil {
26 | 				done <- true
27 | 			}
28 | 		}()
29 | 		WaitForInterrupt()
30 | 		done <- true
31 | 	}()
32 | 	
33 | 	select {
34 | 	case <-done:
35 | 		t.Log("WaitForInterrupt completed")
36 | 	case <-time.After(100 * time.Millisecond):
37 | 		t.Log("WaitForInterrupt is waiting for signal (expected behavior)")
38 | 	}
39 | }
40 | 
41 | func TestTracerInterface(t *testing.T) {
42 | 	var _ TracerInterface = (*mockTracerForInterface)(nil)
43 | }
44 | 
45 | type mockTracerForInterface struct{}
46 | 
47 | func (m *mockTracerForInterface) AttachToCgroup(cgroupPath string) error {
48 | 	return nil
49 | }
50 | 
51 | func (m *mockTracerForInterface) SetContainerID(containerID string) error {
52 | 	return nil
53 | }
54 | 
55 | func (m *mockTracerForInterface) Start(ctx context.Context, eventChan chan<- *events.Event) error {
56 | 	return nil
57 | }
58 | 
59 | func (m *mockTracerForInterface) Stop() error {
60 | 	return nil
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/tls.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func AnalyzeTLS(events []*events.Event) (
11 | 	avgLatency, maxLatency float64,
12 | 	errors int,
13 | 	p50, p95, p99 float64,
14 | 	errorBreakdown map[int32]int,
15 | 	topTargets []TargetCount,
16 | ) {
17 | 	var totalLatency float64
18 | 	var latencies []float64
19 | 	maxLatency = 0
20 | 	errors = 0
21 | 	errorBreakdown = make(map[int32]int)
22 | 	targetMap := make(map[string]int)
23 | 
24 | 	for _, e := range events {
25 | 		latencyMs := float64(e.LatencyNS) / float64(config.NSPerMS)
26 | 		latencies = append(latencies, latencyMs)
27 | 		totalLatency += latencyMs
28 | 
29 | 		if latencyMs > maxLatency {
30 | 			maxLatency = latencyMs
31 | 		}
32 | 
33 | 		if e.Error != 0 {
34 | 			errors++
35 | 			errorBreakdown[e.Error]++
36 | 		}
37 | 
38 | 		if e.Target != "" && e.Target != "?" && e.Target != "unknown" && e.Target != "file" {
39 | 			targetMap[e.Target]++
40 | 		}
41 | 	}
42 | 
43 | 	if len(events) > 0 {
44 | 		avgLatency = totalLatency / float64(len(events))
45 | 		sort.Float64s(latencies)
46 | 		p50 = Percentile(latencies, 50)
47 | 		p95 = Percentile(latencies, 95)
48 | 		p99 = Percentile(latencies, 99)
49 | 	}
50 | 
51 | 	for target, count := range targetMap {
52 | 		topTargets = append(topTargets, TargetCount{target, count})
53 | 	}
54 | 	sort.Slice(topTargets, func(i, j int) bool {
55 | 		return topTargets[i].Count > topTargets[j].Count
56 | 	})
57 | 
58 | 	return
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Suggest an idea for this project
 3 | title: "feature: "
 4 | labels:
 5 |   - kind/feature
 6 | body:
 7 |   - type: textarea
 8 |     id: problem
 9 |     attributes:
10 |       label: Feature Description
11 |       description: Is your feature request related to a problem? A clear and concise description of what the problem is.
12 |       placeholder: I'm always frustrated when [...]
13 |     validations:
14 |       required: true
15 | 
16 |   - type: textarea
17 |     id: solution
18 |     attributes:
19 |       label: Proposed Solution
20 |       description: A clear and concise description of what you want to happen.
21 |       placeholder: We can do [...]
22 |     validations:
23 |       required: true
24 | 
25 |   - type: textarea
26 |     id: alternatives
27 |     attributes:
28 |       label: Alternative Solutions
29 |       description: A clear and concise description of any alternative solutions or features that you've considered.
30 |       placeholder: I think another approach would be [...]
31 |     validations:
32 |       required: false
33 | 
34 |   - type: checkboxes
35 |     id: contribute
36 |     attributes:
37 |       label: Want to contribute?
38 |       options:
39 |         - label: I would like to work on this issue.
40 |           required: false
41 | 
42 |   - type: textarea
43 |     id: additional
44 |     attributes:
45 |       label: Additional Context
46 |       description: Add any other context or screenshots about the feature request here.
47 |     validations:
48 |       required: false
49 | 


--------------------------------------------------------------------------------
/test/test-debug.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | NAMESPACE="${1:-podtrace-test}"
 5 | POD_NAME="${2:-nginx-cpu-test}"
 6 | DURATION="${3:-20s}"
 7 | 
 8 | log() {
 9 | 	echo "$@"
10 | }
11 | 
12 | check_pod_exists() {
13 | 	local ns="$1"
14 | 	local pod="$2"
15 | 
16 | 	if ! kubectl get pod "${pod}" -n "${ns}" &>/dev/null; then
17 | 		log "Error: Pod ${pod} not found"
18 | 		exit 1
19 | 	fi
20 | }
21 | 
22 | show_pod_info() {
23 | 	local ns="$1"
24 | 	local pod="$2"
25 | 
26 | 	log "Pod Info:"
27 | 	kubectl get pod "${pod}" -n "${ns}" -o wide
28 | 	log ""
29 | }
30 | 
31 | show_recent_logs() {
32 | 	local ns="$1"
33 | 	local pod="$2"
34 | 
35 | 	log "Recent Pod Logs:"
36 | 	kubectl logs "${pod}" -n "${ns}" --tail=10 || log "No logs available"
37 | 	log ""
38 | }
39 | 
40 | show_pod_activity() {
41 | 	local ns="$1"
42 | 	local pod="$2"
43 | 
44 | 	log "Checking pod activity..."
45 | 	kubectl exec "${pod}" -n "${ns}" -- ps aux 2>/dev/null || log "Cannot exec into pod"
46 | 	log ""
47 | }
48 | 
49 | run_podtrace() {
50 | 	local ns="$1"
51 | 	local pod="$2"
52 | 	local duration="$3"
53 | 
54 | 	log "Running podtrace (check stderr for eBPF attachment info)..."
55 | 	log "---"
56 | 	./bin/podtrace -n "${ns}" "${pod}" --diagnose "${duration}" 2>&1
57 | 	log "---"
58 | }
59 | 
60 | main() {
61 | 	log "=== Debug Test: ${POD_NAME} for ${DURATION} ==="
62 | 	log ""
63 | 
64 | 	check_pod_exists "${NAMESPACE}" "${POD_NAME}"
65 | 	show_pod_info "${NAMESPACE}" "${POD_NAME}"
66 | 	show_recent_logs "${NAMESPACE}" "${POD_NAME}"
67 | 	show_pod_activity "${NAMESPACE}" "${POD_NAME}"
68 | 	run_podtrace "${NAMESPACE}" "${POD_NAME}" "${DURATION}"
69 | }
70 | 
71 | main
72 | 


--------------------------------------------------------------------------------
/internal/alerting/sender.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"time"
 8 | )
 9 | 
10 | type Sender interface {
11 | 	Send(ctx context.Context, alert *Alert) error
12 | 	Name() string
13 | }
14 | 
15 | type RetrySender struct {
16 | 	sender      Sender
17 | 	maxRetries  int
18 | 	backoffBase time.Duration
19 | }
20 | 
21 | func NewRetrySender(sender Sender, maxRetries int, backoffBase time.Duration) *RetrySender {
22 | 	return &RetrySender{
23 | 		sender:      sender,
24 | 		maxRetries:  maxRetries,
25 | 		backoffBase: backoffBase,
26 | 	}
27 | }
28 | 
29 | func (rs *RetrySender) Send(ctx context.Context, alert *Alert) error {
30 | 	if alert == nil {
31 | 		return fmt.Errorf("alert is nil")
32 | 	}
33 | 	if err := alert.Validate(); err != nil {
34 | 		return fmt.Errorf("invalid alert: %w", err)
35 | 	}
36 | 	alert.Sanitize()
37 | 	var lastErr error
38 | 	for attempt := 0; attempt <= rs.maxRetries; attempt++ {
39 | 		if attempt > 0 {
40 | 			backoff := rs.backoffBase * time.Duration(1<<uint(attempt-1))
41 | 			if backoff > 30*time.Second {
42 | 				backoff = 30 * time.Second
43 | 			}
44 | 			select {
45 | 			case <-ctx.Done():
46 | 				return ctx.Err()
47 | 			case <-time.After(backoff):
48 | 			}
49 | 		}
50 | 		err := rs.sender.Send(ctx, alert)
51 | 		if err == nil {
52 | 			return nil
53 | 		}
54 | 		lastErr = err
55 | 		if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
56 | 			return err
57 | 		}
58 | 	}
59 | 	return fmt.Errorf("failed after %d attempts: %w", rs.maxRetries+1, lastErr)
60 | }
61 | 
62 | func (rs *RetrySender) Name() string {
63 | 	return rs.sender.Name()
64 | }
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/adopters.yaml:
--------------------------------------------------------------------------------
 1 | name: Register as adopter
 2 | description: If your organization is using podtrace, we would be delighted to add you to our list of adopters. Please report how you use podtrace and we will take care of adding it to our adopters list.
 3 | title: "adopter: COMPANY_NAME"
 4 | labels:
 5 |   - kind/documentation
 6 | body:
 7 |   - type: input
 8 |     id: contact
 9 |     attributes:
10 |       label: Contact Details
11 |       description: How can we get in touch with you if we need more info?
12 |       placeholder: eg. email@example.com
13 |     validations:
14 |       required: false
15 | 
16 |   - type: input
17 |     id: org
18 |     attributes:
19 |       label: Organization
20 |       description: Your organization's name.
21 |     validations:
22 |       required: true
23 | 
24 |   - type: textarea
25 |     id: description
26 |     attributes:
27 |       label: Description
28 |       description: What are you using podtrace for at your organization? Are you using it for a specific product or project?
29 |     validations:
30 |       required: true
31 | 
32 |   - type: dropdown
33 |     id: maturity
34 |     attributes:
35 |       label: Maturity Stage
36 |       description: What stage are you at in your adoption of podtrace?
37 |       multiple: false
38 |       options:
39 |         - Production
40 |         - Pre-production
41 |         - Development
42 |         - Conceptual
43 |         - I don't know
44 |     validations:
45 |       required: false
46 | 
47 |   - type: input
48 |     id: url
49 |     attributes:
50 |       label: Info Link
51 |       description: If you have public documentation for the product or project, feel free to share it here.
52 |     validations:
53 |       required: false
54 | 


--------------------------------------------------------------------------------
/internal/alerting/deduplicator_test.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestAlertDeduplicator_ShouldSend(t *testing.T) {
 9 | 	dedup := NewAlertDeduplicator(5 * time.Minute)
10 | 	alert := &Alert{
11 | 		Severity:  SeverityWarning,
12 | 		Title:     "Test Alert",
13 | 		Message:   "Test message",
14 | 		Timestamp: time.Now(),
15 | 		Source:    "test",
16 | 	}
17 | 	if !dedup.ShouldSend(alert) {
18 | 		t.Error("First alert should be sent")
19 | 	}
20 | 	if dedup.ShouldSend(alert) {
21 | 		t.Error("Duplicate alert should not be sent")
22 | 	}
23 | }
24 | 
25 | func TestAlertDeduplicator_ShouldSend_Nil(t *testing.T) {
26 | 	dedup := NewAlertDeduplicator(5 * time.Minute)
27 | 	if dedup.ShouldSend(nil) {
28 | 		t.Error("Nil alert should not be sent")
29 | 	}
30 | }
31 | 
32 | func TestAlertDeduplicator_Cleanup(t *testing.T) {
33 | 	dedup := NewAlertDeduplicator(1 * time.Second)
34 | 	alert := &Alert{
35 | 		Severity:  SeverityWarning,
36 | 		Title:     "Test Alert",
37 | 		Message:   "Test message",
38 | 		Timestamp: time.Now(),
39 | 		Source:    "test",
40 | 	}
41 | 	dedup.ShouldSend(alert)
42 | 	time.Sleep(2 * time.Second)
43 | 	dedup.Cleanup(1 * time.Second)
44 | 	if !dedup.ShouldSend(alert) {
45 | 		t.Error("Alert should be sendable after cleanup")
46 | 	}
47 | }
48 | 
49 | func TestAlertDeduplicator_Reset(t *testing.T) {
50 | 	dedup := NewAlertDeduplicator(5 * time.Minute)
51 | 	alert := &Alert{
52 | 		Severity:  SeverityWarning,
53 | 		Title:     "Test Alert",
54 | 		Message:   "Test message",
55 | 		Timestamp: time.Now(),
56 | 		Source:    "test",
57 | 	}
58 | 	dedup.ShouldSend(alert)
59 | 	dedup.Reset()
60 | 	if !dedup.ShouldSend(alert) {
61 | 		t.Error("Alert should be sendable after reset")
62 | 	}
63 | }
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/bpf/common.h:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #ifndef PODTRACE_COMMON_H
 4 | #define PODTRACE_COMMON_H
 5 | 
 6 | #include "vmlinux.h"
 7 | #include <bpf/bpf_helpers.h>
 8 | #include <bpf/bpf_tracing.h>
 9 | #include <bpf/bpf_core_read.h>
10 | 
11 | #ifndef PODTRACE_VMLINUX_FROM_BTF
12 | struct pt_regs {
13 | 	long unsigned int r15;
14 | 	long unsigned int r14;
15 | 	long unsigned int r13;
16 | 	long unsigned int r12;
17 | 	long unsigned int bp;
18 | 	long unsigned int bx;
19 | 	long unsigned int r11;
20 | 	long unsigned int r10;
21 | 	long unsigned int r9;
22 | 	long unsigned int r8;
23 | 	long unsigned int ax;
24 | 	long unsigned int cx;
25 | 	long unsigned int dx;
26 | 	long unsigned int si;
27 | 	long unsigned int di;
28 | 	long unsigned int orig_ax;
29 | 	long unsigned int ip;
30 | 	long unsigned int cs;
31 | 	long unsigned int flags;
32 | 	long unsigned int sp;
33 | 	long unsigned int ss;
34 | };
35 | 
36 | struct sockaddr_in {
37 | 	u16 sin_family;
38 | 	u16 sin_port;
39 | 	struct {
40 | 		u32 s_addr;
41 | 	} sin_addr;
42 | 	u8 sin_zero[8];
43 | };
44 | #endif
45 | 
46 | #define MAX_STRING_LEN 128
47 | #define MAX_STACK_DEPTH 64
48 | 
49 | #define NS_PER_MS 1000000ULL
50 | #define PAGE_SIZE 4096
51 | #define MAX_BYTES_THRESHOLD (10ULL * 1024ULL * 1024ULL)
52 | #define MIN_LATENCY_NS (1ULL * NS_PER_MS)
53 | 
54 | #define AF_INET 2
55 | #define AF_INET6 10
56 | #define EAGAIN 11
57 | #define HEX_ADDR_LEN 16
58 | #define COMM_LEN 16
59 | 
60 | #ifndef BPF_MAP_TYPE_RINGBUF
61 | #define BPF_MAP_TYPE_RINGBUF 27
62 | #endif
63 | #ifndef BPF_MAP_TYPE_HASH
64 | #define BPF_MAP_TYPE_HASH 1
65 | #endif
66 | #ifndef BPF_MAP_TYPE_ARRAY
67 | #define BPF_MAP_TYPE_ARRAY 2
68 | #endif
69 | #ifndef BPF_ANY
70 | #define BPF_ANY 0
71 | #endif
72 | #ifndef BPF_F_USER_STACK
73 | #define BPF_F_USER_STACK 8
74 | #endif
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/internal/cri/resolver_test.go:
--------------------------------------------------------------------------------
 1 | package cri
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func TestDefaultCandidateEndpoints_PodmanDisabled(t *testing.T) {
 8 | 	t.Setenv("PODTRACE_CRI_ALLOW_PODMAN", "")
 9 | 	eps := DefaultCandidateEndpoints()
10 | 	for _, ep := range eps {
11 | 		if ep == "unix:///run/podman/podman.sock" || ep == "unix:///var/run/podman/podman.sock" {
12 | 			t.Fatalf("expected podman endpoints to be excluded by default, got %v", eps)
13 | 		}
14 | 	}
15 | }
16 | 
17 | func TestDefaultCandidateEndpoints_PodmanEnabled(t *testing.T) {
18 | 	t.Setenv("PODTRACE_CRI_ALLOW_PODMAN", "1")
19 | 	eps := DefaultCandidateEndpoints()
20 | 	foundRun := false
21 | 	foundVar := false
22 | 	for _, ep := range eps {
23 | 		if ep == "unix:///run/podman/podman.sock" {
24 | 			foundRun = true
25 | 		}
26 | 		if ep == "unix:///var/run/podman/podman.sock" {
27 | 			foundVar = true
28 | 		}
29 | 	}
30 | 	if !foundRun || !foundVar {
31 | 		t.Fatalf("expected podman endpoints when enabled, got %v", eps)
32 | 	}
33 | }
34 | 
35 | func TestNormalizeUnixTarget(t *testing.T) {
36 | 	cases := []struct {
37 | 		in   string
38 | 		want string
39 | 	}{
40 | 		{"unix:///run/containerd/containerd.sock", "unix:///run/containerd/containerd.sock"},
41 | 		{"/run/containerd/containerd.sock", "unix:///run/containerd/containerd.sock"},
42 | 		{"something", "something"},
43 | 	}
44 | 	for _, tc := range cases {
45 | 		if got := normalizeUnixTarget(tc.in); got != tc.want {
46 | 			t.Fatalf("normalizeUnixTarget(%q)=%q, want %q", tc.in, got, tc.want)
47 | 		}
48 | 	}
49 | }
50 | 
51 | func TestExtractLooseCgroupsPath(t *testing.T) {
52 | 	s := `{"cgroupsPath":"\\/kubepods.slice\\/kubepods-burstable.slice\\/cri-containerd-abcdef.scope"}`
53 | 	got := extractLooseCgroupsPath(s)
54 | 	if got != "/kubepods.slice/kubepods-burstable.slice/cri-containerd-abcdef.scope" {
55 | 		t.Fatalf("unexpected cgroups path: %q", got)
56 | 	}
57 | }
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/test/test-pods.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Namespace
 4 | metadata:
 5 |   name: podtrace-test
 6 | ---
 7 | apiVersion: v1
 8 | kind: Pod
 9 | metadata:
10 |   name: nginx-test
11 |   namespace: podtrace-test
12 |   labels:
13 |     app: nginx
14 |     test: podtrace
15 | spec:
16 |   containers:
17 |   - name: nginx
18 |     image: nginx:alpine
19 |     ports:
20 |     - containerPort: 80
21 |     - containerPort: 3000
22 |     command: ["/bin/sh", "-c"]
23 |     args:
24 |     - |
25 |       nginx
26 |       while true; do
27 |         wget -q -O /dev/null http://localhost/ || true
28 |         nslookup google.com || true
29 |         sleep 5
30 |       done
31 | ---
32 | apiVersion: v1
33 | kind: Pod
34 | metadata:
35 |   name: busybox-test
36 |   namespace: podtrace-test
37 |   labels:
38 |     app: busybox
39 |     test: podtrace
40 | spec:
41 |   containers:
42 |   - name: busybox
43 |     image: busybox:latest
44 |     command: ["/bin/sh", "-c"]
45 |     args:
46 |     - |
47 |       apk add --no-cache curl bind-tools 2>/dev/null || true
48 |       while true; do
49 |         nslookup kubernetes.default.svc.cluster.local || true
50 |         nslookup google.com || true
51 |         wget -q -O /dev/null http://www.google.com || true
52 |         echo "test $(date)" >> /tmp/test.log
53 |         sync
54 |         sleep 3
55 |       done
56 | ---
57 | apiVersion: v1
58 | kind: Pod
59 | metadata:
60 |   name: alpine-test
61 |   namespace: podtrace-test
62 |   labels:
63 |     app: alpine
64 |     test: podtrace
65 | spec:
66 |   containers:
67 |   - name: alpine
68 |     image: alpine:latest
69 |     command: ["/bin/sh", "-c"]
70 |     args:
71 |     - |
72 |       apk add --no-cache curl bind-tools
73 |       while true; do
74 |         curl -s --max-time 5 http://www.google.com > /dev/null || true
75 |         nslookup github.com || true
76 |         echo "$(date) - test log entry" >> /var/log/app.log
77 |         sync
78 |         sleep 4
79 |       done
80 | 


--------------------------------------------------------------------------------
/cmd/podtrace/mocks.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/ebpf"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | 	"github.com/podtrace/podtrace/internal/kubernetes"
 9 | )
10 | 
11 | type mockPodResolver struct {
12 | 	resolvePodFunc func(ctx context.Context, podName, namespace, containerName string) (*kubernetes.PodInfo, error)
13 | }
14 | 
15 | func (m *mockPodResolver) ResolvePod(ctx context.Context, podName, namespace, containerName string) (*kubernetes.PodInfo, error) {
16 | 	if m.resolvePodFunc != nil {
17 | 		return m.resolvePodFunc(ctx, podName, namespace, containerName)
18 | 	}
19 | 	return &kubernetes.PodInfo{
20 | 		PodName:       podName,
21 | 		Namespace:     namespace,
22 | 		ContainerID:   "test-container-id",
23 | 		CgroupPath:    "/sys/fs/cgroup/test",
24 | 		ContainerName: containerName,
25 | 	}, nil
26 | }
27 | 
28 | type mockTracer struct {
29 | 	attachToCgroupFunc func(cgroupPath string) error
30 | 	setContainerIDFunc  func(containerID string) error
31 | 	startFunc           func(ctx context.Context, eventChan chan<- *events.Event) error
32 | 	stopFunc            func() error
33 | }
34 | 
35 | func (m *mockTracer) AttachToCgroup(cgroupPath string) error {
36 | 	if m.attachToCgroupFunc != nil {
37 | 		return m.attachToCgroupFunc(cgroupPath)
38 | 	}
39 | 	return nil
40 | }
41 | 
42 | func (m *mockTracer) SetContainerID(containerID string) error {
43 | 	if m.setContainerIDFunc != nil {
44 | 		return m.setContainerIDFunc(containerID)
45 | 	}
46 | 	return nil
47 | }
48 | 
49 | func (m *mockTracer) Start(ctx context.Context, eventChan chan<- *events.Event) error {
50 | 	if m.startFunc != nil {
51 | 		return m.startFunc(ctx, eventChan)
52 | 	}
53 | 	return nil
54 | }
55 | 
56 | func (m *mockTracer) Stop() error {
57 | 	if m.stopFunc != nil {
58 | 		return m.stopFunc()
59 | 	}
60 | 	return nil
61 | }
62 | 
63 | var _ ebpf.TracerInterface = (*mockTracer)(nil)
64 | var _ kubernetes.PodResolverInterface = (*mockPodResolver)(nil)


--------------------------------------------------------------------------------
/internal/alerting/logger_hook.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | 
 7 | 	"go.uber.org/zap"
 8 | 	"go.uber.org/zap/zapcore"
 9 | )
10 | 
11 | var (
12 | 	globalManager *Manager
13 | 	managerMu     sync.RWMutex
14 | )
15 | 
16 | func SetGlobalManager(manager *Manager) {
17 | 	managerMu.Lock()
18 | 	defer managerMu.Unlock()
19 | 	globalManager = manager
20 | }
21 | 
22 | func GetGlobalManager() *Manager {
23 | 	managerMu.RLock()
24 | 	defer managerMu.RUnlock()
25 | 	return globalManager
26 | }
27 | 
28 | func CreateAlertFromLog(level zapcore.Level, msg string, fields []zap.Field, podName, namespace string) *Alert {
29 | 	managerMu.RLock()
30 | 	manager := globalManager
31 | 	managerMu.RUnlock()
32 | 	if manager == nil || !manager.IsEnabled() {
33 | 		return nil
34 | 	}
35 | 	var severity AlertSeverity
36 | 	switch level {
37 | 	case zapcore.FatalLevel:
38 | 		severity = SeverityFatal
39 | 	case zapcore.ErrorLevel:
40 | 		severity = SeverityError
41 | 	case zapcore.WarnLevel:
42 | 		severity = SeverityWarning
43 | 	default:
44 | 		return nil
45 | 	}
46 | 	context := make(map[string]interface{})
47 | 	errorCode := ""
48 | 	for _, field := range fields {
49 | 		switch field.Type {
50 | 		case zapcore.StringType:
51 | 			context[field.Key] = field.String
52 | 		case zapcore.Int64Type, zapcore.Int32Type:
53 | 			context[field.Key] = field.Integer
54 | 		case zapcore.ErrorType:
55 | 			if field.Interface != nil {
56 | 				context[field.Key] = field.Interface.(error).Error()
57 | 			}
58 | 		}
59 | 		if field.Key == "error_code" || field.Key == "code" {
60 | 			if field.Type == zapcore.StringType {
61 | 				errorCode = field.String
62 | 			}
63 | 		}
64 | 	}
65 | 	alert := &Alert{
66 | 		Severity:  severity,
67 | 		Title:     "Podtrace " + level.String() + " Error",
68 | 		Message:   msg,
69 | 		Timestamp: time.Now(),
70 | 		Source:    "logger",
71 | 		PodName:   podName,
72 | 		Namespace: namespace,
73 | 		Context:   context,
74 | 		ErrorCode: errorCode,
75 | 	}
76 | 	return alert
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/internal/ebpf/cache/cache.go:
--------------------------------------------------------------------------------
 1 | package cache
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"strings"
 7 | 	"time"
 8 | 
 9 | 	"github.com/podtrace/podtrace/internal/config"
10 | 	"github.com/podtrace/podtrace/internal/metricsexporter"
11 | 	"github.com/podtrace/podtrace/internal/validation"
12 | )
13 | 
14 | var (
15 | 	globalCache *LRUCache
16 | )
17 | 
18 | func init() {
19 | 	ttl := time.Duration(config.CacheTTLSeconds) * time.Second
20 | 	globalCache = NewLRUCache(config.CacheMaxSize, ttl)
21 | }
22 | 
23 | func ResetGlobalCache() {
24 | 	if globalCache != nil {
25 | 		globalCache.Close()
26 | 	}
27 | 	ttl := time.Duration(config.CacheTTLSeconds) * time.Second
28 | 	globalCache = NewLRUCache(config.CacheMaxSize, ttl)
29 | }
30 | 
31 | func GetProcessNameQuick(pid uint32) string {
32 | 	if !validation.ValidatePID(pid) {
33 | 		return ""
34 | 	}
35 | 
36 | 	if name, ok := globalCache.Get(pid); ok {
37 | 		return name
38 | 	}
39 | 
40 | 	metricsexporter.RecordProcessCacheMiss()
41 | 
42 | 	name := ""
43 | 
44 | 	cmdlinePath := fmt.Sprintf("%s/%d/cmdline", config.ProcBasePath, pid)
45 | 	if cmdline, err := os.ReadFile(cmdlinePath); err == nil {
46 | 		parts := strings.Split(string(cmdline), "\x00")
47 | 		if len(parts) > 0 && parts[0] != "" {
48 | 			name = parts[0]
49 | 			if idx := strings.LastIndex(name, "/"); idx >= 0 {
50 | 				name = name[idx+1:]
51 | 			}
52 | 		}
53 | 	}
54 | 
55 | 	if name == "" {
56 | 		statPath := fmt.Sprintf("%s/%d/stat", config.ProcBasePath, pid)
57 | 		if data, err := os.ReadFile(statPath); err == nil {
58 | 			statStr := string(data)
59 | 			start := strings.Index(statStr, "(")
60 | 			end := strings.LastIndex(statStr, ")")
61 | 			if start >= 0 && end > start {
62 | 				name = statStr[start+1 : end]
63 | 			}
64 | 		}
65 | 	}
66 | 
67 | 	if name == "" {
68 | 		commPath := fmt.Sprintf("%s/%d/comm", config.ProcBasePath, pid)
69 | 		if data, err := os.ReadFile(commPath); err == nil {
70 | 			name = strings.TrimSpace(string(data))
71 | 		}
72 | 	}
73 | 
74 | 	sanitized := validation.SanitizeProcessName(name)
75 | 	globalCache.Set(pid, sanitized)
76 | 	return sanitized
77 | }
78 | 
79 | 


--------------------------------------------------------------------------------
/bpf/memory.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #include "common.h"
 4 | #include "maps.h"
 5 | #include "events.h"
 6 | #include "helpers.h"
 7 | 
 8 | SEC("tp/exceptions/page_fault_user")
 9 | int tracepoint_page_fault_user(void *ctx) {
10 | 	// NOTE: Tracepoint argument layouts can differ across kernels/distros when using raw tracepoints.
11 | 	// For stability, avoid relying on tracepoint "common_pid" field offsets here and use the
12 | 	// current task PID from bpf_get_current_pid_tgid().
13 | 	u32 pid = bpf_get_current_pid_tgid() >> 32;
14 | 	
15 | 	struct event *e = get_event_buf();
16 | 	if (!e) {
17 | 		return 0;
18 | 	}
19 | 	e->timestamp = bpf_ktime_get_ns();
20 | 	e->pid = pid;
21 | 	e->type = EVENT_PAGE_FAULT;
22 | 	e->latency_ns = 0;
23 | 	// Best-effort: omit error_code (layout is not stable without BTF-typed tracepoints).
24 | 	e->error = 0;
25 | 	e->bytes = 0;
26 | 	e->tcp_state = 0;
27 | 	e->target[0] = '\0';
28 | 	
29 | 	capture_user_stack(ctx, e->pid, 0, e);
30 | 	bpf_ringbuf_output(&events, e, sizeof(*e), 0);
31 | 	return 0;
32 | }
33 | 
34 | SEC("tp/oom/oom_kill_process")
35 | int tracepoint_oom_kill_process(void *ctx) {
36 | 	struct {
37 | 		unsigned short common_type;
38 | 		unsigned char common_flags;
39 | 		unsigned char common_preempt_count;
40 | 		int common_pid;
41 | 		char comm[16];
42 | 		u32 pid;
43 | 		u32 tid;
44 | 		u64 totalpages;
45 | 		u64 points;
46 | 		u64 victim_points;
47 | 		const char *constraint;
48 | 		u32 constraint_kind;
49 | 		u32 gfp_mask;
50 | 		int order;
51 | 	} args_local;
52 | 	
53 | 	bpf_probe_read_kernel(&args_local, sizeof(args_local), ctx);
54 | 	
55 | 	struct event *e = get_event_buf();
56 | 	if (!e) {
57 | 		return 0;
58 | 	}
59 | 	e->timestamp = bpf_ktime_get_ns();
60 | 	e->pid = args_local.pid;
61 | 	e->type = EVENT_OOM_KILL;
62 | 	e->latency_ns = 0;
63 | 	e->error = 0;
64 | 	e->bytes = args_local.totalpages * PAGE_SIZE;
65 | 	e->tcp_state = 0;
66 | 	
67 | 	bpf_probe_read_kernel_str(e->target, sizeof(e->target), args_local.comm);
68 | 	
69 | 	capture_user_stack(ctx, e->pid, 0, e);
70 | 	bpf_ringbuf_output(&events, e, sizeof(*e), 0);
71 | 	return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/.github/workflows/bash-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Bash Script Checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["**"]
 6 |     paths:
 7 |       - "**/*.sh"
 8 |       - "scripts/**"
 9 |       - "build.sh"
10 |       - "test/**/*.sh"
11 |   pull_request:
12 |     branches: ["**"]
13 |     paths:
14 |       - "**/*.sh"
15 |       - "scripts/**"
16 |       - "build.sh"
17 |       - "test/**/*.sh"
18 | 
19 | jobs:
20 |   bash-lint:
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v4
26 | 
27 |       - name: Install tools
28 |         run: |
29 |           sudo apt-get update
30 |           sudo apt-get install -y \
31 |             shellcheck \
32 |             devscripts \
33 |             shfmt \
34 |             python3-pip
35 |           pip3 install bashate || echo "bashate installation failed, will skip bashate checks"
36 | 
37 |       - name: Syntax Check
38 |         run: |
39 |           echo "Running bash syntax check..."
40 |           for file in $(git ls-files '*.sh'); do
41 |             echo "→ Checking syntax: $file"
42 |             bash -n "$file"
43 |           done
44 | 
45 |       - name: Run ShellCheck
46 |         run: |
47 |           echo "Running strict ShellCheck..."
48 |           shellcheck --enable=all -x $(git ls-files '*.sh')
49 | 
50 |       - name: Check for bashisms
51 |         run: |
52 |           echo "Checking for bashisms..."
53 |           checkbashisms $(git ls-files '*.sh')
54 | 
55 |       - name: Run bashate
56 |         run: |
57 |           echo "Running bashate..."
58 |           if command -v bashate >/dev/null 2>&1; then
59 |             bashate $(git ls-files '*.sh') || echo "bashate check completed with warnings"
60 |           else
61 |             echo "bashate not available, skipping..."
62 |           fi
63 | 
64 |       - name: Check formatting (shfmt)
65 |         run: |
66 |           echo "Checking formatting with shfmt..."
67 |           shfmt -d . || {
68 |             echo "Formatting issues found. To fix, run: shfmt -w ."
69 |             echo "Or the workflow can auto-fix by running: shfmt -w ."
70 |             exit 1
71 |           }
72 | 


--------------------------------------------------------------------------------
/internal/diagnose/sampling.go:
--------------------------------------------------------------------------------
 1 | package diagnose
 2 | 
 3 | import (
 4 | 	"github.com/podtrace/podtrace/internal/config"
 5 | 	"github.com/podtrace/podtrace/internal/events"
 6 | )
 7 | 
 8 | var eventTypeSamplingRates = map[events.EventType]int{
 9 | 	events.EventOOMKill:      1,
10 | 	events.EventPageFault:     1,
11 | 	events.EventNetDevError:   1,
12 | 	events.EventTCPRetrans:    5,
13 | 	events.EventDNS:           10,
14 | 	events.EventConnect:       20,
15 | 	events.EventHTTPReq:       30,
16 | 	events.EventHTTPResp:      30,
17 | 	events.EventTCPSend:       50,
18 | 	events.EventTCPRecv:       50,
19 | 	events.EventUDPSend:       50,
20 | 	events.EventUDPRecv:       50,
21 | 	events.EventWrite:         100,
22 | 	events.EventRead:          100,
23 | 	events.EventFsync:         100,
24 | 	events.EventSchedSwitch:  200,
25 | 	events.EventLockContention: 50,
26 | 	events.EventDBQuery:       20,
27 | 	events.EventExec:           10,
28 | 	events.EventFork:           10,
29 | 	events.EventOpen:           100,
30 | 	events.EventClose:          200,
31 | 	events.EventTCPState:      100,
32 | }
33 | 
34 | func getEventPriority(event *events.Event) int {
35 | 	if event == nil {
36 | 		return config.PriorityLow
37 | 	}
38 | 
39 | 	if event.Error != 0 {
40 | 		return config.PriorityCritical
41 | 	}
42 | 
43 | 	switch event.Type {
44 | 	case events.EventOOMKill, events.EventPageFault, events.EventNetDevError:
45 | 		return config.PriorityCritical
46 | 	case events.EventTCPRetrans, events.EventLockContention:
47 | 		return config.PriorityHigh
48 | 	case events.EventDNS, events.EventConnect, events.EventHTTPReq, events.EventHTTPResp:
49 | 		return config.PriorityNormal
50 | 	default:
51 | 		return config.PriorityLow
52 | 	}
53 | }
54 | 
55 | func shouldSampleEvent(event *events.Event, eventCount int) bool {
56 | 	if event == nil {
57 | 		return false
58 | 	}
59 | 
60 | 	priority := getEventPriority(event)
61 | 	if priority == config.PriorityCritical {
62 | 		return true
63 | 	}
64 | 
65 | 	samplingRate, ok := eventTypeSamplingRates[event.Type]
66 | 	if !ok {
67 | 		samplingRate = config.EventSamplingRate
68 | 	}
69 | 
70 | 	return eventCount%samplingRate == 0
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/scripts/setup-capabilities.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -euo pipefail
 3 | 
 4 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 5 | PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
 6 | BINARY="${PROJECT_ROOT}/bin/podtrace"
 7 | 
 8 | REQUIRED_CAPS="cap_bpf,cap_sys_admin,cap_sys_resource,cap_net_admin"
 9 | 
10 | check_binary_exists() {
11 | 	if [[ ! -f "${BINARY}" ]]; then
12 | 		echo "Error: ${BINARY} not found" >&2
13 | 		echo "Build it first with: make build" >&2
14 | 		exit 1
15 | 	fi
16 | 
17 | 	if [[ ! -x "${BINARY}" ]]; then
18 | 		echo "Error: ${BINARY} is not executable" >&2
19 | 		exit 1
20 | 	fi
21 | }
22 | 
23 | check_root() {
24 | 	if [[ ${EUID} -ne 0 ]]; then
25 | 		echo "Error: This script must be run as root (use sudo)" >&2
26 | 		exit 1
27 | 	fi
28 | }
29 | 
30 | set_capabilities() {
31 | 	echo "Setting capabilities on ${BINARY}..."
32 | 	echo "Capabilities: ${REQUIRED_CAPS}"
33 | 	echo ""
34 | 
35 | 	if ! command -v setcap &>/dev/null; then
36 | 		echo "Error: setcap command not found. Install libcap2-bin package." >&2
37 | 		exit 1
38 | 	fi
39 | 
40 | 	if ! setcap "${REQUIRED_CAPS}+ep" "${BINARY}"; then
41 | 		echo "Error: Failed to set capabilities" >&2
42 | 		exit 1
43 | 	fi
44 | }
45 | 
46 | verify_capabilities() {
47 | 	if ! command -v getcap &>/dev/null; then
48 | 		echo "Warning: getcap command not found, cannot verify capabilities" >&2
49 | 		return 0
50 | 	fi
51 | 
52 | 	local current_caps
53 | 	current_caps=$(getcap "${BINARY}" 2>/dev/null || echo "")
54 | 
55 | 	if [[ -z "${current_caps}" ]]; then
56 | 		echo "Error: Failed to verify capabilities were set" >&2
57 | 		exit 1
58 | 	fi
59 | 
60 | 	echo "Verified capabilities:"
61 | 	echo "  ${current_caps}"
62 | 	echo ""
63 | }
64 | 
65 | print_success_message() {
66 | 	echo "✓ Capabilities set successfully!"
67 | 	echo ""
68 | 	echo "You can now run podtrace without sudo:"
69 | 	echo "  ${BINARY} -n <namespace> <pod-name>"
70 | 	echo ""
71 | 	echo "To verify capabilities:"
72 | 	echo "  getcap ${BINARY}"
73 | 	echo ""
74 | 	echo "To remove capabilities:"
75 | 	echo "  sudo setcap -r ${BINARY}"
76 | }
77 | 
78 | main() {
79 | 	check_binary_exists
80 | 	check_root
81 | 	set_capabilities
82 | 	verify_capabilities
83 | 	print_success_message
84 | }
85 | 
86 | main "$@"
87 | 


--------------------------------------------------------------------------------
/internal/kubernetes/errors.go:
--------------------------------------------------------------------------------
 1 | package kubernetes
 2 | 
 3 | import "fmt"
 4 | 
 5 | type ErrorCode int
 6 | 
 7 | const (
 8 | 	ErrCodeKubeconfigFailed ErrorCode = iota + 1
 9 | 	ErrCodeClientsetFailed
10 | 	ErrCodePodNotFound
11 | 	ErrCodeNoContainers
12 | 	ErrCodeContainerNotFound
13 | 	ErrCodeInvalidContainerID
14 | 	ErrCodeCgroupNotFound
15 | )
16 | 
17 | type KubernetesError struct {
18 | 	Code    ErrorCode
19 | 	Message string
20 | 	Err     error
21 | }
22 | 
23 | func (e *KubernetesError) Error() string {
24 | 	if e.Err != nil {
25 | 		return fmt.Sprintf("%s: %v", e.Message, e.Err)
26 | 	}
27 | 	return e.Message
28 | }
29 | 
30 | func (e *KubernetesError) Unwrap() error {
31 | 	return e.Err
32 | }
33 | 
34 | func NewKubeconfigError(err error) *KubernetesError {
35 | 	return &KubernetesError{
36 | 		Code:    ErrCodeKubeconfigFailed,
37 | 		Message: "failed to get kubeconfig",
38 | 		Err:     err,
39 | 	}
40 | }
41 | 
42 | func NewClientsetError(err error) *KubernetesError {
43 | 	return &KubernetesError{
44 | 		Code:    ErrCodeClientsetFailed,
45 | 		Message: "failed to create Kubernetes clientset",
46 | 		Err:     err,
47 | 	}
48 | }
49 | 
50 | func NewPodNotFoundError(podName, namespace string, err error) *KubernetesError {
51 | 	return &KubernetesError{
52 | 		Code:    ErrCodePodNotFound,
53 | 		Message: fmt.Sprintf("failed to get pod %s in namespace %s", podName, namespace),
54 | 		Err:     err,
55 | 	}
56 | }
57 | 
58 | func NewNoContainersError() *KubernetesError {
59 | 	return &KubernetesError{
60 | 		Code:    ErrCodeNoContainers,
61 | 		Message: "pod has no containers",
62 | 	}
63 | }
64 | 
65 | func NewContainerNotFoundError(containerName string) *KubernetesError {
66 | 	return &KubernetesError{
67 | 		Code:    ErrCodeContainerNotFound,
68 | 		Message: fmt.Sprintf("container %s not found in pod", containerName),
69 | 	}
70 | }
71 | 
72 | func NewInvalidContainerIDError(reason string) *KubernetesError {
73 | 	return &KubernetesError{
74 | 		Code:    ErrCodeInvalidContainerID,
75 | 		Message: fmt.Sprintf("invalid container ID: %s", reason),
76 | 	}
77 | }
78 | 
79 | func NewCgroupNotFoundError(containerID string) *KubernetesError {
80 | 	return &KubernetesError{
81 | 		Code:    ErrCodeCgroupNotFound,
82 | 		Message: fmt.Sprintf("cgroup path not found for container %s", containerID),
83 | 	}
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/internal/alerting/manager_test.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"os"
 6 | 	"testing"
 7 | 	"time"
 8 | 
 9 | 	"github.com/podtrace/podtrace/internal/config"
10 | )
11 | 
12 | func TestNewManager_Disabled(t *testing.T) {
13 | 	_ = os.Setenv("PODTRACE_ALERTING_ENABLED", "false")
14 | 	defer func() {
15 | 		_ = os.Unsetenv("PODTRACE_ALERTING_ENABLED")
16 | 	}()
17 | 	config.AlertingEnabled = false
18 | 	manager, err := NewManager()
19 | 	if err != nil {
20 | 		t.Fatalf("NewManager() error = %v", err)
21 | 	}
22 | 	if manager.IsEnabled() {
23 | 		t.Error("Manager should be disabled")
24 | 	}
25 | }
26 | 
27 | func TestManager_SendAlert_Disabled(t *testing.T) {
28 | 	manager := &Manager{enabled: false}
29 | 	alert := &Alert{
30 | 		Severity:  SeverityWarning,
31 | 		Title:     "Test",
32 | 		Message:   "Test",
33 | 		Timestamp: time.Now(),
34 | 		Source:    "test",
35 | 	}
36 | 	manager.SendAlert(alert)
37 | }
38 | 
39 | func TestManager_SendAlert_Nil(t *testing.T) {
40 | 	manager := &Manager{enabled: true}
41 | 	manager.SendAlert(nil)
42 | }
43 | 
44 | func TestManager_Shutdown(t *testing.T) {
45 | 	manager := &Manager{
46 | 		enabled: true,
47 | 		stopCh:  make(chan struct{}),
48 | 	}
49 | 	manager.cleanupTicker = time.NewTicker(1 * time.Hour)
50 | 	manager.wg.Add(1)
51 | 	go func() {
52 | 		defer manager.wg.Done()
53 | 		<-manager.stopCh
54 | 	}()
55 | 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
56 | 	defer cancel()
57 | 	err := manager.Shutdown(ctx)
58 | 	if err != nil {
59 | 		t.Errorf("Shutdown() error = %v", err)
60 | 	}
61 | }
62 | 
63 | func TestManager_AddSender(t *testing.T) {
64 | 	manager := &Manager{
65 | 		enabled: true,
66 | 		senders: make([]Sender, 0),
67 | 	}
68 | 	mockSender := &testMockSender{name: "test"}
69 | 	manager.AddSender(mockSender)
70 | 	if len(manager.senders) != 1 {
71 | 		t.Errorf("Expected 1 sender, got %d", len(manager.senders))
72 | 	}
73 | 	manager.AddSender(nil)
74 | 	if len(manager.senders) != 1 {
75 | 		t.Errorf("Expected 1 sender after adding nil, got %d", len(manager.senders))
76 | 	}
77 | }
78 | 
79 | type testMockSender struct {
80 | 	sendFunc func(ctx context.Context, alert *Alert) error
81 | 	name     string
82 | }
83 | 
84 | func (m *testMockSender) Send(ctx context.Context, alert *Alert) error {
85 | 	if m.sendFunc != nil {
86 | 		return m.sendFunc(ctx, alert)
87 | 	}
88 | 	return nil
89 | }
90 | 
91 | func (m *testMockSender) Name() string {
92 | 	return m.name
93 | }
94 | 


--------------------------------------------------------------------------------
/test/setup-test-pods.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Setup script for podtrace test pods
 3 | 
 4 | set -e
 5 | 
 6 | NAMESPACE="podtrace-test"
 7 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 8 | 
 9 | GREEN='\033[0;32m'
10 | RED='\033[0;31m'
11 | NC='\033[0m'
12 | 
13 | print_header() {
14 | 	echo "=== Setting up podtrace test environment ==="
15 | 	echo ""
16 | }
17 | 
18 | check_kubectl_installed() {
19 | 	if ! command -v kubectl &>/dev/null; then
20 | 		echo -e "${RED}Error: kubectl is not installed${NC}"
21 | 		exit 1
22 | 	fi
23 | }
24 | 
25 | check_cluster_access() {
26 | 	if ! kubectl cluster-info &>/dev/null; then
27 | 		echo -e "${RED}Error: Cannot connect to Kubernetes cluster${NC}"
28 | 		echo "Please verify: kubectl cluster-info"
29 | 		exit 1
30 | 	fi
31 | 
32 | 	echo -e "${GREEN}✓ Kubernetes cluster accessible${NC}"
33 | 	echo ""
34 | }
35 | 
36 | apply_test_resources() {
37 | 	echo "Creating test namespace and pods..."
38 | 	kubectl apply -f "${SCRIPT_DIR}/test-pods.yaml"
39 | 	echo ""
40 | }
41 | 
42 | wait_for_pods_ready() {
43 | 	echo "Waiting for pods to be ready..."
44 | 	kubectl wait --for=condition=Ready pod/nginx-test -n "${NAMESPACE}" --timeout=120s || true
45 | 	kubectl wait --for=condition=Ready pod/busybox-test -n "${NAMESPACE}" --timeout=120s || true
46 | 	kubectl wait --for=condition=Ready pod/alpine-test -n "${NAMESPACE}" --timeout=120s || true
47 | 	echo ""
48 | }
49 | 
50 | print_pod_status() {
51 | 	echo "=== Test Pods Status ==="
52 | 	kubectl get pods -n "${NAMESPACE}"
53 | 	echo ""
54 | }
55 | 
56 | print_instructions() {
57 | 	echo -e "${GREEN}=== Test pods are ready! ===${NC}"
58 | 	echo ""
59 | 	echo "You can now test podtrace with:"
60 | 	echo ""
61 | 	echo "  # Test with nginx pod"
62 | 	echo "  sudo ./bin/podtrace -n ${NAMESPACE} nginx-test"
63 | 	echo ""
64 | 	echo "  # Test with busybox pod"
65 | 	echo "  sudo ./bin/podtrace -n ${NAMESPACE} busybox-test"
66 | 	echo ""
67 | 	echo "  # Test with alpine pod"
68 | 	echo "  sudo ./bin/podtrace -n ${NAMESPACE} alpine-test"
69 | 	echo ""
70 | 	echo "  # Test diagnose mode"
71 | 	echo "  sudo ./bin/podtrace -n ${NAMESPACE} nginx-test --diagnose 10s"
72 | 	echo ""
73 | 	echo "To clean up, run:"
74 | 	echo "  ./test/cleanup-test-pods.sh"
75 | 	echo ""
76 | }
77 | 
78 | main() {
79 | 	print_header
80 | 	check_kubectl_installed
81 | 	check_cluster_access
82 | 	apply_test_resources
83 | 	wait_for_pods_ready
84 | 	print_pod_status
85 | 	print_instructions
86 | }
87 | 
88 | main "$@"
89 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yaml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Create a report to help us improve
 3 | title: "bug: "
 4 | labels:
 5 |   - kind/bug
 6 | body:
 7 |   - type: textarea
 8 |     id: description
 9 |     attributes:
10 |       label: Describe the bug
11 |       description: Please provide a clear and concise description of the bug.
12 |       placeholder: |
13 |         Add logs and screenshots if any.
14 |     validations:
15 |       required: true
16 | 
17 |   - type: textarea
18 |     id: reproducing
19 |     attributes:
20 |       label: Steps To Reproduce
21 |       description: Steps to reproduce the behavior.
22 |       placeholder: |
23 |         1. Run 'podtrace -n <namespace> <pod-name> ...'
24 |         2. Execute '...'
25 |         3. Observe the error
26 |     validations:
27 |       required: true
28 | 
29 |   - type: textarea
30 |     id: expected
31 |     attributes:
32 |       label: Expected Behaviour
33 |       description: A clear and concise description of what you expected to happen.
34 |     validations:
35 |       required: true
36 | 
37 |   - type: dropdown
38 |     id: component
39 |     attributes:
40 |       label: Component
41 |       description: Which component is affected?
42 |       options:
43 |         - eBPF Programs
44 |         - Event Collection
45 |         - Kubernetes Integration
46 |         - Event Processing
47 |         - Diagnostics
48 |         - Metrics Export
49 |         - CLI
50 |         - Other
51 |     validations:
52 |       required: true
53 | 
54 |   - type: input
55 |     id: kubernetes_version
56 |     attributes:
57 |       label: Kubernetes version
58 |       description: Output of `kubectl version` (server version)
59 |       placeholder: e.g., v1.28.0
60 |     validations:
61 |       required: false
62 | 
63 |   - type: input
64 |     id: podtrace_version
65 |     attributes:
66 |       label: podtrace version
67 |       description: Version of podtrace binary
68 |       placeholder: e.g., v0.1.0
69 |     validations:
70 |       required: false
71 | 
72 |   - type: input
73 |     id: kernel_version
74 |     attributes:
75 |       label: Kernel version
76 |       description: Output of `uname -r` on the node where podtrace runs
77 |       placeholder: e.g., 5.15.0
78 |     validations:
79 |       required: false
80 | 
81 |   - type: textarea
82 |     id: additional
83 |     attributes:
84 |       label: Additional Context
85 |       description: Add any other context about the problem here (logs, cluster setup, pod configuration, etc.).
86 | 


--------------------------------------------------------------------------------
/internal/logger/logger_test.go:
--------------------------------------------------------------------------------
 1 | package logger
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"testing"
 6 | 
 7 | 	"go.uber.org/zap"
 8 | 	"go.uber.org/zap/zapcore"
 9 | )
10 | 
11 | func TestLogger(t *testing.T) {
12 | 	log := Logger()
13 | 	if log == nil {
14 | 		t.Error("Logger() should not return nil")
15 | 	}
16 | }
17 | 
18 | func TestSetLevel(t *testing.T) {
19 | 	originalLevel := atomicLevel.Level()
20 | 	defer SetLevel(originalLevel.String())
21 | 
22 | 	tests := []struct {
23 | 		name     string
24 | 		levelStr string
25 | 		expected zapcore.Level
26 | 	}{
27 | 		{"debug", "debug", zapcore.DebugLevel},
28 | 		{"info", "info", zapcore.InfoLevel},
29 | 		{"warn", "warn", zapcore.WarnLevel},
30 | 		{"error", "error", zapcore.ErrorLevel},
31 | 		{"fatal", "fatal", zapcore.FatalLevel},
32 | 		{"invalid", "invalid", zapcore.InfoLevel},
33 | 		{"empty", "", zapcore.InfoLevel},
34 | 	}
35 | 
36 | 	for _, tt := range tests {
37 | 		t.Run(tt.name, func(t *testing.T) {
38 | 			SetLevel(tt.levelStr)
39 | 			if atomicLevel.Level() != tt.expected {
40 | 				t.Errorf("Expected level %v, got %v", tt.expected, atomicLevel.Level())
41 | 			}
42 | 		})
43 | 	}
44 | }
45 | 
46 | func TestLogFunctions(t *testing.T) {
47 | 	SetLevel("debug")
48 | 
49 | 	Debug("test debug message", zap.String("key", "value"))
50 | 	Info("test info message", zap.String("key", "value"))
51 | 	Warn("test warn message", zap.String("key", "value"))
52 | 	Error("test error message", zap.String("key", "value"))
53 | }
54 | 
55 | 
56 | func TestSync(t *testing.T) {
57 | 	Sync()
58 | }
59 | 
60 | func TestParseLogLevel(t *testing.T) {
61 | 	key := "PODTRACE_LOG_LEVEL"
62 | 	originalValue := os.Getenv(key)
63 | 	defer func() {
64 | 		if originalValue != "" {
65 | 			_ = os.Setenv(key, originalValue)
66 | 		} else {
67 | 			_ = os.Unsetenv(key)
68 | 		}
69 | 	}()
70 | 
71 | 	tests := []struct {
72 | 		name     string
73 | 		levelStr string
74 | 		expected zapcore.Level
75 | 	}{
76 | 		{"debug", "debug", zapcore.DebugLevel},
77 | 		{"info", "info", zapcore.InfoLevel},
78 | 		{"warn", "warn", zapcore.WarnLevel},
79 | 		{"error", "error", zapcore.ErrorLevel},
80 | 		{"fatal", "fatal", zapcore.FatalLevel},
81 | 		{"invalid", "invalid", zapcore.InfoLevel},
82 | 		{"uppercase", "DEBUG", zapcore.InfoLevel},
83 | 	}
84 | 
85 | 	for _, tt := range tests {
86 | 		t.Run(tt.name, func(t *testing.T) {
87 | 			result := parseLogLevel(tt.levelStr)
88 | 			if result != tt.expected {
89 | 				t.Errorf("Expected %v, got %v", tt.expected, result)
90 | 			}
91 | 		})
92 | 	}
93 | }
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/internal/diagnose/errors.go:
--------------------------------------------------------------------------------
 1 | package diagnose
 2 | 
 3 | import "fmt"
 4 | 
 5 | type ErrorCode int
 6 | 
 7 | const (
 8 | 	ErrCodeEventLimitReached ErrorCode = iota + 1
 9 | 	ErrCodeContextCancelled
10 | 	ErrCodeTimeout
11 | 	ErrCodeInvalidOperation
12 | 	ErrCodeReportGenerationFailed
13 | 	ErrCodeStackResolveFailed
14 | 	ErrCodeAddr2lineFailed
15 | 	ErrCodeNoEvents
16 | )
17 | 
18 | type DiagnoseError struct {
19 | 	Code    ErrorCode
20 | 	Message string
21 | 	Err     error
22 | }
23 | 
24 | func (e *DiagnoseError) Error() string {
25 | 	if e.Err != nil {
26 | 		return fmt.Sprintf("%s: %v", e.Message, e.Err)
27 | 	}
28 | 	return e.Message
29 | }
30 | 
31 | func (e *DiagnoseError) Unwrap() error {
32 | 	return e.Err
33 | }
34 | 
35 | func NewEventLimitError(dropped int) *DiagnoseError {
36 | 	return &DiagnoseError{
37 | 		Code:    ErrCodeEventLimitReached,
38 | 		Message: fmt.Sprintf("event limit reached, %d events dropped", dropped),
39 | 	}
40 | }
41 | 
42 | func NewContextCancelledError(err error) *DiagnoseError {
43 | 	return &DiagnoseError{
44 | 		Code:    ErrCodeContextCancelled,
45 | 		Message: "operation cancelled",
46 | 		Err:     err,
47 | 	}
48 | }
49 | 
50 | func NewTimeoutError(operation string) *DiagnoseError {
51 | 	return &DiagnoseError{
52 | 		Code:    ErrCodeTimeout,
53 | 		Message: fmt.Sprintf("operation timed out: %s", operation),
54 | 	}
55 | }
56 | 
57 | func NewInvalidOperationError(operation string) *DiagnoseError {
58 | 	return &DiagnoseError{
59 | 		Code:    ErrCodeInvalidOperation,
60 | 		Message: fmt.Sprintf("invalid operation: %s", operation),
61 | 	}
62 | }
63 | 
64 | func NewReportGenerationError(err error) *DiagnoseError {
65 | 	return &DiagnoseError{
66 | 		Code:    ErrCodeReportGenerationFailed,
67 | 		Message: "failed to generate report",
68 | 		Err:     err,
69 | 	}
70 | }
71 | 
72 | func NewStackResolveError(pid uint32, addr uint64, err error) *DiagnoseError {
73 | 	return &DiagnoseError{
74 | 		Code:    ErrCodeStackResolveFailed,
75 | 		Message: fmt.Sprintf("failed to resolve stack trace for PID %d at address 0x%x", pid, addr),
76 | 		Err:     err,
77 | 	}
78 | }
79 | 
80 | func NewAddr2lineError(exePath string, addr uint64, err error) *DiagnoseError {
81 | 	return &DiagnoseError{
82 | 		Code:    ErrCodeAddr2lineFailed,
83 | 		Message: fmt.Sprintf("addr2line failed for %s at 0x%x", exePath, addr),
84 | 		Err:     err,
85 | 	}
86 | }
87 | 
88 | func NewNoEventsError() *DiagnoseError {
89 | 	return &DiagnoseError{
90 | 		Code:    ErrCodeNoEvents,
91 | 		Message: "no events collected during the diagnostic period",
92 | 	}
93 | }
94 | 


--------------------------------------------------------------------------------
/internal/kubernetes/errors_test.go:
--------------------------------------------------------------------------------
 1 | package kubernetes
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestNewKubeconfigError(t *testing.T) {
 9 | 	originalErr := errors.New("kubeconfig error")
10 | 	err := NewKubeconfigError(originalErr)
11 | 	
12 | 	if err == nil {
13 | 		t.Fatal("NewKubeconfigError returned nil")
14 | 	}
15 | 	if err.Code != ErrCodeKubeconfigFailed {
16 | 		t.Errorf("Expected error code %d, got %d", ErrCodeKubeconfigFailed, err.Code)
17 | 	}
18 | 	if err.Message != "failed to get kubeconfig" {
19 | 		t.Errorf("Expected message 'failed to get kubeconfig', got %q", err.Message)
20 | 	}
21 | 	if err.Unwrap() != originalErr {
22 | 		t.Errorf("Expected unwrapped error to be original error")
23 | 	}
24 | }
25 | 
26 | func TestNewClientsetError(t *testing.T) {
27 | 	originalErr := errors.New("clientset error")
28 | 	err := NewClientsetError(originalErr)
29 | 	
30 | 	if err == nil {
31 | 		t.Fatal("NewClientsetError returned nil")
32 | 	}
33 | 	if err.Code != ErrCodeClientsetFailed {
34 | 		t.Errorf("Expected error code %d, got %d", ErrCodeClientsetFailed, err.Code)
35 | 	}
36 | 	if err.Message != "failed to create Kubernetes clientset" {
37 | 		t.Errorf("Expected message 'failed to create Kubernetes clientset', got %q", err.Message)
38 | 	}
39 | 	if err.Unwrap() != originalErr {
40 | 		t.Errorf("Expected unwrapped error to be original error")
41 | 	}
42 | }
43 | 
44 | func TestKubernetesError_Error_WithErr(t *testing.T) {
45 | 	originalErr := errors.New("underlying error")
46 | 	kerr := &KubernetesError{
47 | 		Code:    ErrCodePodNotFound,
48 | 		Message: "test message",
49 | 		Err:     originalErr,
50 | 	}
51 | 	
52 | 	errStr := kerr.Error()
53 | 	if errStr == "" {
54 | 		t.Error("Error() should return non-empty string")
55 | 	}
56 | 	if !contains(errStr, "test message") {
57 | 		t.Errorf("Expected error string to contain 'test message', got %q", errStr)
58 | 	}
59 | }
60 | 
61 | func TestKubernetesError_Error_WithoutErr(t *testing.T) {
62 | 	kerr := &KubernetesError{
63 | 		Code:    ErrCodePodNotFound,
64 | 		Message: "test message",
65 | 		Err:     nil,
66 | 	}
67 | 	
68 | 	errStr := kerr.Error()
69 | 	if errStr != "test message" {
70 | 		t.Errorf("Expected error string 'test message', got %q", errStr)
71 | 	}
72 | }
73 | 
74 | func contains(s, substr string) bool {
75 | 	return len(s) >= len(substr) && (s == substr || 
76 | 		(len(s) > len(substr) && containsMiddle(s, substr)))
77 | }
78 | 
79 | func containsMiddle(s, substr string) bool {
80 | 	for i := 0; i <= len(s)-len(substr); i++ {
81 | 		if s[i:i+len(substr)] == substr {
82 | 			return true
83 | 		}
84 | 	}
85 | 	return false
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/.github/workflows/security.yml:
--------------------------------------------------------------------------------
 1 | name: Security - CodeQL
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["**"]
 6 |   pull_request:
 7 |     branches: ["main"]
 8 |   schedule:
 9 |     - cron: "0 3 * * 0"
10 | 
11 | jobs:
12 |   analyze:
13 |     permissions:
14 |       contents: read
15 |       security-events: write
16 |       actions: read
17 | 
18 |     runs-on: ubuntu-latest
19 | 
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         language: ["go", "cpp"]
24 | 
25 |     steps:
26 |       - name: Checkout
27 |         uses: actions/checkout@v4
28 | 
29 |       - name: Set up Go
30 |         if: matrix.language == 'go'
31 |         uses: actions/setup-go@v4
32 |         with:
33 |           go-version-file: go.mod
34 |           cache: true
35 | 
36 |       - name: Initialize CodeQL
37 |         uses: github/codeql-action/init@v4
38 |         with:
39 |           languages: ${{ matrix.language }}
40 | 
41 |       - name: Install build dependencies
42 |         run: |
43 |           sudo apt-get update
44 |           sudo apt-get install -y clang llvm libbpf-dev linux-headers-$(uname -r) linux-tools-$(uname -r) || sudo apt-get install -y clang llvm libbpf-dev linux-headers-$(uname -r) linux-tools-generic
45 | 
46 |       - name: Generate vmlinux.h from BTF
47 |         if: matrix.language == 'cpp'
48 |         run: |
49 |           echo "Generating vmlinux.h from BTF..."
50 |           if [ -f /sys/kernel/btf/vmlinux ]; then
51 |             bpftool btf dump file /sys/kernel/btf/vmlinux format c > bpf/vmlinux.h
52 |             echo "vmlinux.h generated successfully"
53 |           else
54 |             echo "Warning: /sys/kernel/btf/vmlinux not found, using placeholder vmlinux.h"
55 |           fi
56 | 
57 |       - name: Build Go
58 |         if: matrix.language == 'go'
59 |         run: |
60 |           GOTOOLCHAIN=auto go mod download
61 |           GOTOOLCHAIN=auto go build ./...
62 | 
63 |       - name: Build eBPF C
64 |         if: matrix.language == 'cpp'
65 |         run: |
66 |           echo "Compiling podtrace.bpf.c..."
67 |           if [ -f /sys/kernel/btf/vmlinux ]; then
68 |             clang -O2 -g \
69 |               -target bpf \
70 |               -D__TARGET_ARCH_x86 \
71 |               -DPODTRACE_VMLINUX_FROM_BTF \
72 |               -I./bpf \
73 |               -c bpf/podtrace.bpf.c \
74 |               -o bpf/podtrace.bpf.o
75 |           else
76 |             clang -O2 -g \
77 |               -target bpf \
78 |               -D__TARGET_ARCH_x86 \
79 |               -I./bpf \
80 |               -c bpf/podtrace.bpf.c \
81 |               -o bpf/podtrace.bpf.o
82 |           fi
83 | 
84 |       - name: Analyze
85 |         uses: github/codeql-action/analyze@v4
86 | 


--------------------------------------------------------------------------------
/bpf/resources.c:
--------------------------------------------------------------------------------
 1 | // SPDX-License-Identifier: GPL-2.0
 2 | 
 3 | #include "common.h"
 4 | #include "maps.h"
 5 | #include "events.h"
 6 | #include "helpers.h"
 7 | 
 8 | #define RESOURCE_CPU    0
 9 | #define RESOURCE_MEMORY 1
10 | #define RESOURCE_IO     2
11 | 
12 | static inline u32 calculate_utilization(u64 usage, u64 limit) {
13 |     if (limit == 0 || limit == ~0ULL) {
14 |         return 0;
15 |     }
16 |     if (usage > limit) {
17 |         return 100;
18 |     }
19 |     u64 percent = (usage * 100) / limit;
20 |     return (u32)(percent > 100 ? 100 : percent);
21 | }
22 | 
23 | static inline u32 check_alert_threshold(u32 utilization) {
24 |     if (utilization >= 95) {
25 |         return 3;
26 |     } else if (utilization >= 90) {
27 |         return 2;
28 |     } else if (utilization >= 80) {
29 |         return 1;
30 |     }
31 |     return 0;
32 | }
33 | 
34 | static inline void emit_resource_alert(u64 cgroup_id, u32 resource_type, u32 utilization, u64 limit, u64 usage) {
35 |     struct event *e = get_event_buf();
36 |     if (!e) {
37 |         return;
38 |     }
39 |     
40 |     e->timestamp = bpf_ktime_get_ns();
41 |     e->pid = 0;
42 |     e->type = EVENT_RESOURCE_LIMIT;
43 |     e->latency_ns = 0;
44 |     e->error = (s32)utilization;
45 |     e->bytes = usage;
46 |     e->tcp_state = resource_type;
47 |     e->target[0] = '\0';
48 |     
49 |     char *details = e->details;
50 |     u32 idx = 0;
51 |     u32 max_idx = MAX_STRING_LEN - 1;
52 |     
53 |     const char *resource_names[] = {"CPU", "MEM", "IO"};
54 |     if (resource_type < 3) {
55 |         const char *name = resource_names[resource_type];
56 |         for (int i = 0; name[i] != '\0' && idx < max_idx; i++) {
57 |             details[idx++] = name[i];
58 |         }
59 |     }
60 |     
61 |     if (idx < max_idx) details[idx++] = ':';
62 |     
63 |     if (utilization >= 100 && idx < max_idx - 2) {
64 |         details[idx++] = '1';
65 |         details[idx++] = '0';
66 |         details[idx++] = '0';
67 |     } else if (utilization >= 10 && idx < max_idx - 1) {
68 |         details[idx++] = '0' + (utilization / 10);
69 |         details[idx++] = '0' + (utilization % 10);
70 |     } else if (idx < max_idx) {
71 |         details[idx++] = '0' + utilization;
72 |     }
73 |     
74 |     if (idx < max_idx) details[idx++] = '%';
75 |     details[idx < MAX_STRING_LEN ? idx : max_idx] = '\0';
76 |     
77 |     bpf_ringbuf_output(&events, e, sizeof(*e), 0);
78 |     
79 |     u32 alert_level = check_alert_threshold(utilization);
80 |     if (alert_level > 0) {
81 |         bpf_map_update_elem(&cgroup_alerts, &cgroup_id, &alert_level, BPF_ANY);
82 |     } else {
83 |         bpf_map_delete_elem(&cgroup_alerts, &cgroup_id);
84 |     }
85 | }


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/network.go:
--------------------------------------------------------------------------------
 1 | package analyzer
 2 | 
 3 | import (
 4 | 	"sort"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func AnalyzeTCP(events []*events.Event, rttSpikeThreshold float64) (avgRTT, maxRTT float64, spikes int, p50, p95, p99 float64, errors int, totalBytes, avgBytes, peakBytes uint64) {
11 | 	var totalRTT float64
12 | 	var rtts []float64
13 | 	maxRTT = 0
14 | 	spikes = 0
15 | 	errors = 0
16 | 	totalBytes = 0
17 | 	peakBytes = 0
18 | 
19 | 	for _, e := range events {
20 | 		rttMs := float64(e.LatencyNS) / float64(config.NSPerMS)
21 | 		rtts = append(rtts, rttMs)
22 | 		totalRTT += rttMs
23 | 		if rttMs > maxRTT {
24 | 			maxRTT = rttMs
25 | 		}
26 | 		if rttMs > rttSpikeThreshold {
27 | 			spikes++
28 | 		}
29 | 		if e.Error < 0 && e.Error != -config.EAGAIN {
30 | 			errors++
31 | 		}
32 | 		if e.Bytes > 0 && e.Bytes < uint64(config.MaxBytesForBandwidth) {
33 | 			totalBytes += e.Bytes
34 | 			if e.Bytes > peakBytes {
35 | 				peakBytes = e.Bytes
36 | 			}
37 | 		}
38 | 	}
39 | 
40 | 	if len(events) > 0 {
41 | 		avgRTT = totalRTT / float64(len(events))
42 | 		sort.Float64s(rtts)
43 | 		p50 = Percentile(rtts, 50)
44 | 		p95 = Percentile(rtts, 95)
45 | 		p99 = Percentile(rtts, 99)
46 | 		if totalBytes > 0 {
47 | 			avgBytes = totalBytes / uint64(len(events))
48 | 		}
49 | 	}
50 | 	return
51 | }
52 | 
53 | func AnalyzeConnections(events []*events.Event) (avgLatency, maxLatency float64, errors int, p50, p95, p99 float64, topTargets []TargetCount, errorBreakdown map[int32]int) {
54 | 	var totalLatency float64
55 | 	var latencies []float64
56 | 	maxLatency = 0
57 | 	errors = 0
58 | 	targetMap := make(map[string]int)
59 | 	errorBreakdown = make(map[int32]int)
60 | 
61 | 	for _, e := range events {
62 | 		latencyMs := float64(e.LatencyNS) / float64(config.NSPerMS)
63 | 		latencies = append(latencies, latencyMs)
64 | 		totalLatency += latencyMs
65 | 		if latencyMs > maxLatency {
66 | 			maxLatency = latencyMs
67 | 		}
68 | 		if e.Error != 0 {
69 | 			errors++
70 | 			errorBreakdown[e.Error]++
71 | 		}
72 | 		if e.Target != "" && e.Target != "?" && e.Target != "unknown" && e.Target != "file" {
73 | 			targetMap[e.Target]++
74 | 		}
75 | 	}
76 | 
77 | 	if len(events) > 0 {
78 | 		avgLatency = totalLatency / float64(len(events))
79 | 		sort.Float64s(latencies)
80 | 		p50 = Percentile(latencies, 50)
81 | 		p95 = Percentile(latencies, 95)
82 | 		p99 = Percentile(latencies, 99)
83 | 	}
84 | 
85 | 	for target, count := range targetMap {
86 | 		topTargets = append(topTargets, TargetCount{target, count})
87 | 	}
88 | 	sort.Slice(topTargets, func(i, j int) bool {
89 | 		return topTargets[i].Count > topTargets[j].Count
90 | 	})
91 | 
92 | 	return
93 | }
94 | 


--------------------------------------------------------------------------------
/test/run-tests.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Automated test runner for podtrace
  3 | 
  4 | set -e
  5 | 
  6 | NAMESPACE="podtrace-test"
  7 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  8 | PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
  9 | 
 10 | GREEN='\033[0;32m'
 11 | YELLOW='\033[1;33m'
 12 | RED='\033[0;31m'
 13 | BLUE='\033[0;34m'
 14 | NC='\033[0m'
 15 | 
 16 | print_header() {
 17 | 	echo -e "${BLUE}=== podtrace Test Runner ===${NC}"
 18 | 	echo ""
 19 | }
 20 | 
 21 | check_dependencies() {
 22 | 	if ! command -v kubectl &>/dev/null; then
 23 | 		echo -e "${RED}Error: kubectl is not installed${NC}"
 24 | 		exit 1
 25 | 	fi
 26 | 
 27 | 	if ! kubectl cluster-info &>/dev/null; then
 28 | 		echo -e "${RED}Error: Cannot connect to Kubernetes cluster${NC}"
 29 | 		exit 1
 30 | 	fi
 31 | 
 32 | 	if [[ ! -f "${PROJECT_ROOT}/bin/podtrace" ]]; then
 33 | 		echo -e "${RED}Error: podtrace binary not found. Run 'make build' first.${NC}"
 34 | 		exit 1
 35 | 	fi
 36 | }
 37 | 
 38 | setup_test_environment() {
 39 | 	echo -e "${YELLOW}[1/4] Setting up test pods...${NC}"
 40 | 	"${SCRIPT_DIR}/setup-test-pods.sh" >/dev/null 2>&1
 41 | }
 42 | 
 43 | wait_for_pods() {
 44 | 	echo -e "${YELLOW}[2/4] Waiting for pods to be active...${NC}"
 45 | 	sleep 10
 46 | 	echo ""
 47 | }
 48 | 
 49 | run_test() {
 50 | 	local test_name="$1"
 51 | 	local pod_name="$2"
 52 | 	local duration="$3"
 53 | 
 54 | 	echo -e "${BLUE}${test_name}${NC}"
 55 | 	echo "Running: sudo ${PROJECT_ROOT}/bin/podtrace -n ${NAMESPACE} ${pod_name} --diagnose ${duration}"
 56 | 	echo ""
 57 | 
 58 | 	local test_output
 59 | 	local test_exit_code
 60 | 	set +e
 61 | 	test_output=$(sudo "${PROJECT_ROOT}/bin/podtrace" -n "${NAMESPACE}" "${pod_name}" --diagnose "${duration}" 2>&1 | head -30 || true)
 62 | 	test_exit_code=${PIPESTATUS[0]}
 63 | 	set -e
 64 | 	echo "${test_output}"
 65 | 	if [[ ${test_exit_code} -eq 0 ]]; then
 66 | 		echo -e "${GREEN}✓ ${test_name} passed${NC}"
 67 | 	else
 68 | 		echo -e "${RED}✗ ${test_name} failed${NC}"
 69 | 	fi
 70 | 
 71 | 	echo ""
 72 | }
 73 | 
 74 | cleanup_test_environment() {
 75 | 	echo -e "${YELLOW}[4/4] Cleaning up...${NC}"
 76 | 	"${SCRIPT_DIR}/cleanup-test-pods.sh" >/dev/null 2>&1
 77 | 	echo ""
 78 | }
 79 | 
 80 | print_footer() {
 81 | 	echo -e "${GREEN}=== Tests completed ===${NC}"
 82 | }
 83 | 
 84 | main() {
 85 | 	print_header
 86 | 	check_dependencies
 87 | 	setup_test_environment
 88 | 	wait_for_pods
 89 | 
 90 | 	echo -e "${YELLOW}[3/4] Running tests...${NC}"
 91 | 	echo ""
 92 | 
 93 | 	run_test "Test 1: Basic tracing (nginx-test)" "nginx-test" "5s"
 94 | 	run_test "Test 2: Diagnose mode (busybox-test)" "busybox-test" "10s"
 95 | 
 96 | 	cleanup_test_environment
 97 | 	print_footer
 98 | }
 99 | 
100 | main "$@"
101 | 


--------------------------------------------------------------------------------
/internal/diagnose/profiling/cpu_profiling_test.go:
--------------------------------------------------------------------------------
 1 | package profiling
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"strings"
 6 | 	"testing"
 7 | 	"time"
 8 | 
 9 | 	"github.com/podtrace/podtrace/internal/events"
10 | )
11 | 
12 | func TestGenerateCPUUsageReport(t *testing.T) {
13 | 	duration := 10 * time.Second
14 | 
15 | 	var testEvents []*events.Event
16 | 	report := GenerateCPUUsageReport(testEvents, duration)
17 | 	if report == "" {
18 | 		t.Error("GenerateCPUUsageReport should return a report even with no events")
19 | 	}
20 | 	if !strings.Contains(report, "CPU Usage by Process") {
21 | 		t.Error("Report should contain 'CPU Usage by Process'")
22 | 	}
23 | 
24 | 	testEvents = []*events.Event{
25 | 		{
26 | 			PID:         1,
27 | 			ProcessName: "init",
28 | 			Type:        events.EventDNS,
29 | 			Timestamp:   uint64(time.Now().UnixNano()),
30 | 		},
31 | 		{
32 | 			PID:         1,
33 | 			ProcessName: "init",
34 | 			Type:        events.EventConnect,
35 | 			Timestamp:   uint64(time.Now().UnixNano()),
36 | 		},
37 | 	}
38 | 
39 | 	selfPID := uint32(os.Getpid())
40 | 	if selfPID > 1 {
41 | 		testEvents = append(testEvents, &events.Event{
42 | 			PID:         selfPID,
43 | 			ProcessName: "test-process",
44 | 			Type:        events.EventTCPSend,
45 | 			Timestamp:   uint64(time.Now().UnixNano()),
46 | 		})
47 | 	}
48 | 
49 | 	report = GenerateCPUUsageReport(testEvents, duration)
50 | 	if !strings.Contains(report, "CPU Usage by Process") {
51 | 		t.Error("Report should contain 'CPU Usage by Process'")
52 | 	}
53 | 	if strings.Contains(report, "Pod Processes") || strings.Contains(report, "System/Kernel Processes") {
54 | 	} else {
55 | 		if !strings.Contains(report, "No CPU events") && !strings.Contains(report, "Total CPU usage") {
56 | 			t.Error("Report should contain either process information or indicate no events")
57 | 		}
58 | 	}
59 | }
60 | 
61 | 
62 | func TestCPUUsageReportWithKernelThreads(t *testing.T) {
63 | 	duration := 10 * time.Second
64 | 
65 | 	testEvents := []*events.Event{
66 | 		{
67 | 			PID:         1,
68 | 			ProcessName: "init",
69 | 			Type:        events.EventDNS,
70 | 			Timestamp:   uint64(time.Now().UnixNano()),
71 | 		},
72 | 		{
73 | 			PID:         2,
74 | 			ProcessName: "kthreadd",
75 | 			Type:        events.EventSchedSwitch,
76 | 			Timestamp:   uint64(time.Now().UnixNano()),
77 | 		},
78 | 	}
79 | 
80 | 	report := GenerateCPUUsageReport(testEvents, duration)
81 | 	if !strings.Contains(report, "CPU Usage by Process") {
82 | 		t.Error("Report should contain 'CPU Usage by Process'")
83 | 	}
84 | 	if !strings.Contains(report, "Pod Processes") && !strings.Contains(report, "System/Kernel Processes") {
85 | 		if !strings.Contains(report, "Total CPU usage") {
86 | 			t.Error("Report should contain process information or total CPU usage")
87 | 		}
88 | 	}
89 | 	_ = report
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/.github/workflows/ebpf-build.yml:
--------------------------------------------------------------------------------
 1 | name: eBPF Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["**"]
 6 |     paths:
 7 |       - "bpf/**"
 8 |       - "**/*.c"
 9 |       - "go.mod"
10 |       - "go.sum"
11 |       - "cmd/**"
12 |       - "internal/**"
13 |       - "Makefile"
14 |   pull_request:
15 |     branches: ["**"]
16 |     paths:
17 |       - "bpf/**"
18 |       - "**/*.c"
19 |       - "go.mod"
20 |       - "go.sum"
21 |       - "cmd/**"
22 |       - "internal/**"
23 |       - "Makefile"
24 |       
25 | jobs:
26 |   build-ebpf:
27 |     runs-on: ubuntu-latest
28 | 
29 |     steps:
30 |       - name: Checkout
31 |         uses: actions/checkout@v4
32 | 
33 |       - name: Set up Go
34 |         uses: actions/setup-go@v4
35 |         with:
36 |           go-version-file: go.mod
37 |           cache: true
38 | 
39 |       - name: Install build dependencies
40 |         run: |
41 |           sudo apt-get update
42 |           sudo apt-get install -y clang llvm libbpf-dev linux-headers-$(uname -r) linux-tools-$(uname -r) || sudo apt-get install -y clang llvm libbpf-dev linux-headers-$(uname -r) linux-tools-generic
43 | 
44 |       - name: Generate vmlinux.h from BTF
45 |         run: |
46 |           echo "Generating vmlinux.h from BTF..."
47 |           if [ -f /sys/kernel/btf/vmlinux ]; then
48 |             bpftool btf dump file /sys/kernel/btf/vmlinux format c > bpf/vmlinux.h
49 |             echo "vmlinux.h generated successfully"
50 |           else
51 |             echo "Warning: /sys/kernel/btf/vmlinux not found, using placeholder vmlinux.h"
52 |           fi
53 | 
54 |       - name: Install Go dependencies
55 |         run: |
56 |           echo "Installing Go dependencies..."
57 |           GOTOOLCHAIN=auto go mod download
58 |           GOTOOLCHAIN=auto go mod verify
59 | 
60 |       - name: Compile eBPF program
61 |         run: |
62 |           echo "Compiling podtrace.bpf.c..."
63 |           if [ -f /sys/kernel/btf/vmlinux ]; then
64 |             clang -O2 -g \
65 |               -target bpf \
66 |               -D__TARGET_ARCH_x86 \
67 |               -DPODTRACE_VMLINUX_FROM_BTF \
68 |               -I./bpf \
69 |               -c bpf/podtrace.bpf.c \
70 |               -o bpf/podtrace.bpf.o
71 |           else
72 |             clang -O2 -g \
73 |               -target bpf \
74 |               -D__TARGET_ARCH_x86 \
75 |               -I./bpf \
76 |               -c bpf/podtrace.bpf.c \
77 |               -o bpf/podtrace.bpf.o
78 |           fi
79 | 
80 |       - name: Build Go binary
81 |         run: |
82 |           echo "Building Go binary..."
83 |           GOTOOLCHAIN=auto go build -o bin/podtrace ./cmd/podtrace
84 | 
85 |       - name: List outputs
86 |         run: |
87 |           echo "eBPF object:"
88 |           ls -l bpf/ || true
89 |           echo "Go binary:"
90 |           ls -l bin/ || true
91 | 


--------------------------------------------------------------------------------
/internal/diagnose/detector/issues.go:
--------------------------------------------------------------------------------
 1 | package detector
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/podtrace/podtrace/internal/config"
 7 | 	"github.com/podtrace/podtrace/internal/events"
 8 | )
 9 | 
10 | func DetectIssues(allEvents []*events.Event, errorRateThreshold, rttSpikeThreshold float64) []string {
11 | 	var issues []string
12 | 
13 | 	var connectEvents []*events.Event
14 | 	for _, e := range allEvents {
15 | 		if e.Type == events.EventConnect {
16 | 			connectEvents = append(connectEvents, e)
17 | 		}
18 | 	}
19 | 
20 | 	if len(connectEvents) > 0 {
21 | 		errors := 0
22 | 		for _, e := range connectEvents {
23 | 			if e.Error != 0 {
24 | 				errors++
25 | 			}
26 | 		}
27 | 		errorRate := float64(errors) / float64(len(connectEvents)) * 100
28 | 		if errorRate > errorRateThreshold {
29 | 			issues = append(issues, fmt.Sprintf("High connection failure rate: %.1f%% (%d/%d) (threshold: %.1f%%)", errorRate, errors, len(connectEvents), errorRateThreshold))
30 | 		}
31 | 	}
32 | 
33 | 	var tcpEvents []*events.Event
34 | 	for _, e := range allEvents {
35 | 		if e.Type == events.EventTCPSend || e.Type == events.EventTCPRecv {
36 | 			tcpEvents = append(tcpEvents, e)
37 | 		}
38 | 	}
39 | 
40 | 	if len(tcpEvents) > 0 {
41 | 		spikes := 0
42 | 		for _, e := range tcpEvents {
43 | 			if float64(e.LatencyNS)/float64(config.NSPerMS) > rttSpikeThreshold {
44 | 				spikes++
45 | 			}
46 | 		}
47 | 		spikeRate := float64(spikes) / float64(len(tcpEvents)) * 100
48 | 		if spikeRate > config.SpikeRateThreshold {
49 | 			issues = append(issues, fmt.Sprintf("High TCP RTT spike rate: %.1f%% (%d/%d) (threshold: %.1fms)", spikeRate, spikes, len(tcpEvents), rttSpikeThreshold))
50 | 		}
51 | 	}
52 | 
53 | 	var resourceAlerts = make(map[string]int)
54 | 	for _, e := range allEvents {
55 | 		if e.Type == events.EventResourceLimit {
56 | 			utilization := uint32(e.Error)
57 | 			resourceType := e.TCPState
58 | 			
59 | 			var resourceName string
60 | 			switch resourceType {
61 | 			case 0:
62 | 				resourceName = "CPU"
63 | 			case 1:
64 | 				resourceName = "Memory"
65 | 			case 2:
66 | 				resourceName = "I/O"
67 | 			default:
68 | 				resourceName = "Resource"
69 | 			}
70 | 			
71 | 			key := resourceName
72 | 			if current, ok := resourceAlerts[key]; !ok || utilization > uint32(current) {
73 | 				resourceAlerts[key] = int(utilization)
74 | 			}
75 | 		}
76 | 	}
77 | 	
78 | 	for resourceName, maxUtil := range resourceAlerts {
79 | 		var severity string
80 | 		if maxUtil >= 95 {
81 | 			severity = "EMERGENCY"
82 | 		} else if maxUtil >= 90 {
83 | 			severity = "CRITICAL"
84 | 		} else if maxUtil >= 80 {
85 | 			severity = "WARNING"
86 | 		}
87 | 		
88 | 		if severity != "" {
89 | 			issues = append(issues, fmt.Sprintf("Resource limit %s: %s - %d%% utilization (threshold: 80%% warning, 90%% critical, 95%% emergency)", 
90 | 				severity, resourceName, maxUtil))
91 | 		}
92 | 	}
93 | 
94 | 	return issues
95 | }
96 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/pool.go:
--------------------------------------------------------------------------------
  1 | package analyzer
  2 | 
  3 | import (
  4 | 	"sort"
  5 | 	"time"
  6 | 
  7 | 	"github.com/podtrace/podtrace/internal/config"
  8 | 	"github.com/podtrace/podtrace/internal/events"
  9 | )
 10 | 
 11 | type PoolStats struct {
 12 | 	TotalAcquires   int
 13 | 	TotalReleases   int
 14 | 	ExhaustedCount  int
 15 | 	AvgWaitTime     time.Duration
 16 | 	MaxWaitTime     time.Duration
 17 | 	ReuseRate       float64
 18 | 	PeakConnections int
 19 | 	AvgConnections  float64
 20 | 	P50WaitTime     float64
 21 | 	P95WaitTime     float64
 22 | 	P99WaitTime     float64
 23 | }
 24 | 
 25 | func AnalyzePool(acquireEvents, releaseEvents, exhaustedEvents []*events.Event) PoolStats {
 26 | 	stats := PoolStats{
 27 | 		TotalAcquires:  len(acquireEvents),
 28 | 		TotalReleases:  len(releaseEvents),
 29 | 		ExhaustedCount: len(exhaustedEvents),
 30 | 	}
 31 | 
 32 | 	if stats.TotalAcquires > 0 {
 33 | 		stats.ReuseRate = float64(stats.TotalReleases) / float64(stats.TotalAcquires)
 34 | 	}
 35 | 
 36 | 	var waitTimes []float64
 37 | 	var totalWaitTime time.Duration
 38 | 	maxWaitTime := time.Duration(0)
 39 | 
 40 | 	for _, e := range exhaustedEvents {
 41 | 		waitTime := e.Latency()
 42 | 		waitTimes = append(waitTimes, float64(waitTime.Nanoseconds())/float64(config.NSPerMS))
 43 | 		totalWaitTime += waitTime
 44 | 		if waitTime > maxWaitTime {
 45 | 			maxWaitTime = waitTime
 46 | 		}
 47 | 	}
 48 | 
 49 | 	if stats.ExhaustedCount > 0 {
 50 | 		stats.AvgWaitTime = totalWaitTime / time.Duration(stats.ExhaustedCount)
 51 | 		stats.MaxWaitTime = maxWaitTime
 52 | 
 53 | 		if len(waitTimes) > 0 {
 54 | 			sort.Float64s(waitTimes)
 55 | 			stats.P50WaitTime = Percentile(waitTimes, 50)
 56 | 			stats.P95WaitTime = Percentile(waitTimes, 95)
 57 | 			stats.P99WaitTime = Percentile(waitTimes, 99)
 58 | 		}
 59 | 	}
 60 | 
 61 | 	poolTracker := make(map[string]struct {
 62 | 		current int
 63 | 		peak    int
 64 | 	})
 65 | 
 66 | 	for _, e := range acquireEvents {
 67 | 		poolID := e.Target
 68 | 		if poolID == "" {
 69 | 			poolID = "default"
 70 | 		}
 71 | 		pool := poolTracker[poolID]
 72 | 		pool.current++
 73 | 		if pool.current > pool.peak {
 74 | 			pool.peak = pool.current
 75 | 		}
 76 | 		poolTracker[poolID] = pool
 77 | 	}
 78 | 
 79 | 	for _, e := range releaseEvents {
 80 | 		poolID := e.Target
 81 | 		if poolID == "" {
 82 | 			poolID = "default"
 83 | 		}
 84 | 		pool := poolTracker[poolID]
 85 | 		if pool.current > 0 {
 86 | 			pool.current--
 87 | 		}
 88 | 		poolTracker[poolID] = pool
 89 | 	}
 90 | 
 91 | 	totalPeak := 0
 92 | 	totalCurrent := 0
 93 | 	for _, pool := range poolTracker {
 94 | 		if pool.peak > totalPeak {
 95 | 			totalPeak = pool.peak
 96 | 		}
 97 | 		totalCurrent += pool.current
 98 | 	}
 99 | 
100 | 	stats.PeakConnections = totalPeak
101 | 	if len(poolTracker) > 0 {
102 | 		stats.AvgConnections = float64(totalCurrent) / float64(len(poolTracker))
103 | 	}
104 | 
105 | 	return stats
106 | }
107 | 


--------------------------------------------------------------------------------
/internal/logger/logger.go:
--------------------------------------------------------------------------------
  1 | package logger
  2 | 
  3 | import (
  4 | 	"os"
  5 | 
  6 | 	"go.uber.org/zap"
  7 | 	"go.uber.org/zap/zapcore"
  8 | 
  9 | 	"github.com/podtrace/podtrace/internal/alerting"
 10 | 	"github.com/podtrace/podtrace/internal/config"
 11 | )
 12 | 
 13 | var (
 14 | 	log         *zap.Logger
 15 | 	atomicLevel zap.AtomicLevel
 16 | )
 17 | 
 18 | func init() {
 19 | 	level := getLogLevel()
 20 | 	atomicLevel = zap.NewAtomicLevelAt(level)
 21 | 	encoderConfig := zap.NewProductionEncoderConfig()
 22 | 	encoderConfig.EncodeTime = zapcore.ISO8601TimeEncoder
 23 | 	encoderConfig.EncodeLevel = zapcore.LowercaseLevelEncoder
 24 | 
 25 | 	core := zapcore.NewCore(
 26 | 		zapcore.NewJSONEncoder(encoderConfig),
 27 | 		zapcore.AddSync(os.Stderr),
 28 | 		atomicLevel,
 29 | 	)
 30 | 
 31 | 	log = zap.New(core, zap.AddCaller(), zap.AddStacktrace(zapcore.ErrorLevel))
 32 | }
 33 | 
 34 | func getLogLevel() zapcore.Level {
 35 | 	levelStr := os.Getenv("PODTRACE_LOG_LEVEL")
 36 | 	if levelStr == "" {
 37 | 		levelStr = config.DefaultLogLevel
 38 | 	}
 39 | 	return parseLogLevel(levelStr)
 40 | }
 41 | 
 42 | func Debug(msg string, fields ...zap.Field) {
 43 | 	log.Debug(msg, fields...)
 44 | }
 45 | 
 46 | func Info(msg string, fields ...zap.Field) {
 47 | 	log.Info(msg, fields...)
 48 | }
 49 | 
 50 | func Warn(msg string, fields ...zap.Field) {
 51 | 	log.Warn(msg, fields...)
 52 | 	manager := alerting.GetGlobalManager()
 53 | 	if manager != nil {
 54 | 		if alert := alerting.CreateAlertFromLog(zapcore.WarnLevel, msg, fields, "", ""); alert != nil {
 55 | 			manager.SendAlert(alert)
 56 | 		}
 57 | 	}
 58 | }
 59 | 
 60 | func Error(msg string, fields ...zap.Field) {
 61 | 	log.Error(msg, fields...)
 62 | 	manager := alerting.GetGlobalManager()
 63 | 	if manager != nil {
 64 | 		if alert := alerting.CreateAlertFromLog(zapcore.ErrorLevel, msg, fields, "", ""); alert != nil {
 65 | 			manager.SendAlert(alert)
 66 | 		}
 67 | 	}
 68 | }
 69 | 
 70 | func Fatal(msg string, fields ...zap.Field) {
 71 | 	log.Fatal(msg, fields...)
 72 | 	manager := alerting.GetGlobalManager()
 73 | 	if manager != nil {
 74 | 		if alert := alerting.CreateAlertFromLog(zapcore.FatalLevel, msg, fields, "", ""); alert != nil {
 75 | 			manager.SendAlert(alert)
 76 | 		}
 77 | 	}
 78 | }
 79 | 
 80 | func Logger() *zap.Logger {
 81 | 	return log
 82 | }
 83 | 
 84 | func Sync() {
 85 | 	_ = log.Sync()
 86 | }
 87 | 
 88 | func SetLevel(levelStr string) {
 89 | 	level := parseLogLevel(levelStr)
 90 | 	atomicLevel.SetLevel(level)
 91 | }
 92 | 
 93 | func parseLogLevel(levelStr string) zapcore.Level {
 94 | 	switch levelStr {
 95 | 	case "debug":
 96 | 		return zapcore.DebugLevel
 97 | 	case "info":
 98 | 		return zapcore.InfoLevel
 99 | 	case "warn":
100 | 		return zapcore.WarnLevel
101 | 	case "error":
102 | 		return zapcore.ErrorLevel
103 | 	case "fatal":
104 | 		return zapcore.FatalLevel
105 | 	default:
106 | 		return zapcore.InfoLevel
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/internal/diagnose/formatter/formatter.go:
--------------------------------------------------------------------------------
 1 | package formatter
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sort"
 6 | 
 7 | 	"github.com/podtrace/podtrace/internal/config"
 8 | 	"github.com/podtrace/podtrace/internal/diagnose/analyzer"
 9 | )
10 | 
11 | func SectionHeader(title string) string {
12 | 	return fmt.Sprintf("%s Statistics:\n", title)
13 | }
14 | 
15 | func TotalWithRate(label string, count int, rate float64) string {
16 | 	return fmt.Sprintf("  Total %s: %d (%.1f/sec)\n", label, count, rate)
17 | }
18 | 
19 | func LatencyMetrics(avgLatency, maxLatency float64) string {
20 | 	return fmt.Sprintf("  Average latency: %.2fms\n  Max latency: %.2fms\n", avgLatency, maxLatency)
21 | }
22 | 
23 | func Percentiles(p50, p95, p99 float64) string {
24 | 	return fmt.Sprintf("  Percentiles: P50=%.2fms, P95=%.2fms, P99=%.2fms\n", p50, p95, p99)
25 | }
26 | 
27 | func ErrorRate(errors, total int) string {
28 | 	if total == 0 {
29 | 		return fmt.Sprintf("  Errors: %d (0.0%%)\n", errors)
30 | 	}
31 | 	return fmt.Sprintf("  Errors: %d (%.1f%%)\n", errors, float64(errors)*float64(config.Percent100)/float64(total))
32 | }
33 | 
34 | func TopTargets(targets []analyzer.TargetCount, limit int, headerLabel, countLabel string) string {
35 | 	if len(targets) == 0 {
36 | 		return ""
37 | 	}
38 | 	var result string
39 | 	result += fmt.Sprintf("  Top %s:\n", headerLabel)
40 | 	for i, target := range targets {
41 | 		if i >= limit {
42 | 			break
43 | 		}
44 | 		result += fmt.Sprintf("    - %s (%d %s)\n", target.Target, target.Count, countLabel)
45 | 	}
46 | 	return result
47 | }
48 | 
49 | func BytesSection(totalBytes, avgBytes uint64, throughput uint64) string {
50 | 	if totalBytes == 0 {
51 | 		return ""
52 | 	}
53 | 	var result string
54 | 	result += fmt.Sprintf("  Total bytes transferred: %s\n", analyzer.FormatBytes(totalBytes))
55 | 	result += fmt.Sprintf("  Average bytes per operation: %s\n", analyzer.FormatBytes(avgBytes))
56 | 	if throughput > 0 {
57 | 		result += fmt.Sprintf("  Average throughput: %s/sec\n", analyzer.FormatBytes(throughput))
58 | 	}
59 | 	return result
60 | }
61 | 
62 | func Rate(count int, duration float64) string {
63 | 	if duration > 0 {
64 | 		return fmt.Sprintf(" (%.1f/sec)", float64(count)/duration)
65 | 	}
66 | 	return ""
67 | }
68 | 
69 | func TopItems(items map[string]int, limit int, headerLabel, itemLabel string) string {
70 | 	if len(items) == 0 {
71 | 		return ""
72 | 	}
73 | 	type itemCount struct {
74 | 		name  string
75 | 		count int
76 | 	}
77 | 	var itemCounts []itemCount
78 | 	for name, count := range items {
79 | 		itemCounts = append(itemCounts, itemCount{name: name, count: count})
80 | 	}
81 | 	sort.Slice(itemCounts, func(i, j int) bool {
82 | 		return itemCounts[i].count > itemCounts[j].count
83 | 	})
84 | 	var result string
85 | 	result += fmt.Sprintf("  Top %s:\n", headerLabel)
86 | 	for i, ic := range itemCounts {
87 | 		if i >= limit {
88 | 			break
89 | 		}
90 | 		result += fmt.Sprintf("    - %s (%d %s)\n", ic.name, ic.count, itemLabel)
91 | 	}
92 | 	return result
93 | }
94 | 
95 | 


--------------------------------------------------------------------------------
/cmd/podtrace/export_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"io"
  6 | 	"os"
  7 | 	"strings"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/podtrace/podtrace/internal/diagnose"
 11 | 	"github.com/podtrace/podtrace/internal/events"
 12 | )
 13 | 
 14 | func TestExportReport_JSON(t *testing.T) {
 15 | 	d := diagnose.NewDiagnostician()
 16 | 	d.AddEvent(&events.Event{Type: events.EventDNS, LatencyNS: 5000000, Target: "example.com"})
 17 | 	d.Finish()
 18 | 
 19 | 	originalStdout := os.Stdout
 20 | 	r, w, _ := os.Pipe()
 21 | 	os.Stdout = w
 22 | 
 23 | 	err := exportReport("test report", "json", d)
 24 | 	_ = w.Close()
 25 | 	os.Stdout = originalStdout
 26 | 
 27 | 	if err == nil {
 28 | 		var buf bytes.Buffer
 29 | 		_, _ = io.Copy(&buf, r)
 30 | 		t.Logf("JSON export test completed, output length: %d", buf.Len())
 31 | 	}
 32 | }
 33 | 
 34 | func TestExportReport_CSV(t *testing.T) {
 35 | 	d := diagnose.NewDiagnostician()
 36 | 	d.AddEvent(&events.Event{Type: events.EventDNS, LatencyNS: 5000000, Target: "example.com"})
 37 | 	d.Finish()
 38 | 
 39 | 	var buf bytes.Buffer
 40 | 	originalStdout := os.Stdout
 41 | 	r, w, _ := os.Pipe()
 42 | 	os.Stdout = w
 43 | 
 44 | 	err := exportReport("test report", "csv", d)
 45 | 	_ = w.Close()
 46 | 	os.Stdout = originalStdout
 47 | 
 48 | 	if err == nil {
 49 | 		_, _ = io.Copy(&buf, r)
 50 | 		t.Logf("CSV export test completed, output length: %d", buf.Len())
 51 | 	}
 52 | }
 53 | 
 54 | func TestExportReport_InvalidFormat(t *testing.T) {
 55 | 	d := diagnose.NewDiagnostician()
 56 | 	err := exportReport("test report", "invalid", d)
 57 | 
 58 | 	if err == nil {
 59 | 		t.Error("Expected error for invalid format")
 60 | 	}
 61 | 
 62 | 	if err != nil && !strings.Contains(err.Error(), "unsupported") {
 63 | 		t.Errorf("Expected error message to contain 'unsupported', got: %v", err)
 64 | 	}
 65 | }
 66 | 
 67 | func TestExportReport_FormatVariations(t *testing.T) {
 68 | 	d := diagnose.NewDiagnostician()
 69 | 	d.AddEvent(&events.Event{Type: events.EventDNS, LatencyNS: 5000000, Target: "example.com"})
 70 | 	d.Finish()
 71 | 
 72 | 	tests := []struct {
 73 | 		name        string
 74 | 		format      string
 75 | 		expectError bool
 76 | 	}{
 77 | 		{"uppercase JSON", "JSON", false},
 78 | 		{"uppercase CSV", "CSV", false},
 79 | 		{"mixed case", "Json", false},
 80 | 		{"with spaces", " json ", false},
 81 | 		{"invalid format", "xml", true},
 82 | 	}
 83 | 
 84 | 	for _, tt := range tests {
 85 | 		t.Run(tt.name, func(t *testing.T) {
 86 | 			originalStdout := os.Stdout
 87 | 			r, w, _ := os.Pipe()
 88 | 			os.Stdout = w
 89 | 
 90 | 			err := exportReport("test report", tt.format, d)
 91 | 			_ = w.Close()
 92 | 			os.Stdout = originalStdout
 93 | 
 94 | 			if tt.expectError && err == nil {
 95 | 				t.Error("Expected error but got none")
 96 | 			}
 97 | 			if !tt.expectError && err != nil {
 98 | 				t.Errorf("Unexpected error: %v", err)
 99 | 			}
100 | 			if !tt.expectError {
101 | 				_, _ = io.Copy(io.Discard, r)
102 | 			}
103 | 		})
104 | 	}
105 | }
106 | 


--------------------------------------------------------------------------------
/test/test-pods-full.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Namespace
  3 | metadata:
  4 |   name: podtrace-test
  5 | ---
  6 | apiVersion: v1
  7 | kind: Pod
  8 | metadata:
  9 |   name: nginx-cpu-test
 10 |   namespace: podtrace-test
 11 |   labels:
 12 |     app: nginx-cpu
 13 | spec:
 14 |   containers:
 15 |   - name: nginx
 16 |     image: nginx:latest
 17 |     resources:
 18 |       requests:
 19 |         cpu: "100m"
 20 |         memory: "128Mi"
 21 |       limits:
 22 |         cpu: "500m"
 23 |         memory: "256Mi"
 24 |     command:
 25 |     - sh
 26 |     - -c
 27 |     - |
 28 |       nginx
 29 |       while true; do
 30 |         for i in {1..1000}; do
 31 |           echo "CPU test $i" > /dev/null
 32 |         done
 33 |         sleep 0.1
 34 |       done
 35 | ---
 36 | apiVersion: v1
 37 | kind: Pod
 38 | metadata:
 39 |   name: network-test
 40 |   namespace: podtrace-test
 41 |   labels:
 42 |     app: network-test
 43 | spec:
 44 |   containers:
 45 |   - name: network
 46 |     image: curlimages/curl:latest
 47 |     command:
 48 |     - sh
 49 |     - -c
 50 |     - |
 51 |       while true; do
 52 |         nslookup google.com || true
 53 |         curl -s -o /dev/null http://httpbin.org/get || true
 54 |         sleep 1
 55 |       done
 56 | ---
 57 | apiVersion: v1
 58 | kind: Pod
 59 | metadata:
 60 |   name: io-test
 61 |   namespace: podtrace-test
 62 |   labels:
 63 |     app: io-test
 64 | spec:
 65 |   containers:
 66 |   - name: io
 67 |     image: busybox:latest
 68 |     command:
 69 |     - sh
 70 |     - -c
 71 |     - |
 72 |       while true; do
 73 |         echo "I/O test $(date)" >> /tmp/io-test.log
 74 |         sync
 75 |         sleep 0.5
 76 |       done
 77 | ---
 78 | apiVersion: v1
 79 | kind: Pod
 80 | metadata:
 81 |   name: memory-test
 82 |   namespace: podtrace-test
 83 |   labels:
 84 |     app: memory-test
 85 | spec:
 86 |   containers:
 87 |   - name: memory
 88 |     image: python:3.9-slim
 89 |     command:
 90 |     - python3
 91 |     - -c
 92 |     - |
 93 |       import time
 94 |       data = []
 95 |       while True:
 96 |           data.append(bytearray(1024 * 1024))
 97 |           time.sleep(0.1)
 98 |           if len(data) > 10:
 99 |               data.pop(0)
100 | ---
101 | apiVersion: v1
102 | kind: Pod
103 | metadata:
104 |   name: multithread-test
105 |   namespace: podtrace-test
106 |   labels:
107 |     app: multithread-test
108 | spec:
109 |   containers:
110 |   - name: multithread
111 |     image: python:3.9-slim
112 |     command:
113 |     - python3
114 |     - -c
115 |     - |
116 |       import threading
117 |       import time
118 |       
119 |       def cpu_worker(worker_id):
120 |           while True:
121 |               sum(range(10000))
122 |               time.sleep(0.01)
123 |       
124 |       threads = []
125 |       for i in range(4):
126 |           t = threading.Thread(target=cpu_worker, args=(i,))
127 |           t.daemon = True
128 |           t.start()
129 |           threads.append(t)
130 |       
131 |       while True:
132 |           time.sleep(1)
133 | 


--------------------------------------------------------------------------------
/internal/kubernetes/service_resolver.go:
--------------------------------------------------------------------------------
  1 | package kubernetes
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 10 | 	"k8s.io/client-go/kubernetes"
 11 | )
 12 | 
 13 | type ServiceInfo struct {
 14 | 	Name      string
 15 | 	Namespace string
 16 | 	Port      int
 17 | }
 18 | 
 19 | type ServiceResolver struct {
 20 | 	clientset     kubernetes.Interface
 21 | 	endpointCache *sync.Map
 22 | 	cacheTTL      time.Duration
 23 | 	informerCache *InformerCache
 24 | }
 25 | 
 26 | type endpointCacheEntry struct {
 27 | 	serviceInfo ServiceInfo
 28 | 	expiresAt   time.Time
 29 | }
 30 | 
 31 | func NewServiceResolver(clientset kubernetes.Interface) *ServiceResolver {
 32 | 	return NewServiceResolverWithCache(clientset, nil)
 33 | }
 34 | 
 35 | func NewServiceResolverWithCache(clientset kubernetes.Interface, ic *InformerCache) *ServiceResolver {
 36 | 	ttl := time.Duration(getIntEnvOrDefault("PODTRACE_K8S_CACHE_TTL", 300)) * time.Second
 37 | 	return &ServiceResolver{
 38 | 		clientset:     clientset,
 39 | 		endpointCache: &sync.Map{},
 40 | 		cacheTTL:      ttl,
 41 | 		informerCache: ic,
 42 | 	}
 43 | }
 44 | 
 45 | func (sr *ServiceResolver) ResolveService(ctx context.Context, ip string, port int) *ServiceInfo {
 46 | 	if ip == "" || port == 0 || sr.clientset == nil {
 47 | 		if sr.informerCache != nil && ip != "" {
 48 | 			return sr.informerCache.GetServiceByEndpoint(ip, port)
 49 | 		}
 50 | 		return nil
 51 | 	}
 52 | 
 53 | 	if sr.informerCache != nil {
 54 | 		if svc := sr.informerCache.GetServiceByEndpoint(ip, port); svc != nil {
 55 | 			return svc
 56 | 		}
 57 | 	}
 58 | 
 59 | 	cacheKey := fmt.Sprintf("%s:%d", ip, port)
 60 | 	if cached, ok := sr.endpointCache.Load(cacheKey); ok {
 61 | 		entry := cached.(*endpointCacheEntry)
 62 | 		if time.Now().Before(entry.expiresAt) {
 63 | 			return &entry.serviceInfo
 64 | 		}
 65 | 		sr.endpointCache.Delete(cacheKey)
 66 | 	}
 67 | 
 68 | 	serviceInfo := sr.fetchServiceByEndpoint(ctx, ip, port)
 69 | 	if serviceInfo != nil {
 70 | 		sr.endpointCache.Store(cacheKey, &endpointCacheEntry{
 71 | 			serviceInfo: *serviceInfo,
 72 | 			expiresAt:   time.Now().Add(sr.cacheTTL),
 73 | 		})
 74 | 	}
 75 | 
 76 | 	return serviceInfo
 77 | }
 78 | 
 79 | func (sr *ServiceResolver) fetchServiceByEndpoint(ctx context.Context, ip string, port int) *ServiceInfo {
 80 | 	endpointsList, err := sr.clientset.CoreV1().Endpoints(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
 81 | 	if err != nil {
 82 | 		return nil
 83 | 	}
 84 | 
 85 | 	for _, endpoint := range endpointsList.Items {
 86 | 		for _, subset := range endpoint.Subsets {
 87 | 			for _, addr := range subset.Addresses {
 88 | 				if addr.IP == ip {
 89 | 					for _, epPort := range subset.Ports {
 90 | 						if int(epPort.Port) == port {
 91 | 							return &ServiceInfo{
 92 | 								Name:      endpoint.Name,
 93 | 								Namespace: endpoint.Namespace,
 94 | 								Port:      port,
 95 | 							}
 96 | 						}
 97 | 					}
 98 | 				}
 99 | 			}
100 | 		}
101 | 	}
102 | 
103 | 	return nil
104 | }
105 | 


--------------------------------------------------------------------------------
/internal/alerting/webhook.go:
--------------------------------------------------------------------------------
 1 | package alerting
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"encoding/json"
 7 | 	"fmt"
 8 | 	"io"
 9 | 	"net/http"
10 | 	"net/url"
11 | 	"strings"
12 | 	"time"
13 | 
14 | 	"github.com/podtrace/podtrace/internal/config"
15 | )
16 | 
17 | type WebhookSender struct {
18 | 	url     string
19 | 	client  *http.Client
20 | 	timeout time.Duration
21 | }
22 | 
23 | func NewWebhookSender(webhookURL string, timeout time.Duration) (*WebhookSender, error) {
24 | 	if webhookURL == "" {
25 | 		return nil, fmt.Errorf("webhook URL is required")
26 | 	}
27 | 	parsedURL, err := url.Parse(webhookURL)
28 | 	if err != nil {
29 | 		return nil, fmt.Errorf("invalid webhook URL: %w", err)
30 | 	}
31 | 	if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
32 | 		return nil, fmt.Errorf("webhook URL must use http or https scheme")
33 | 	}
34 | 	host := strings.ToLower(parsedURL.Hostname())
35 | 	if host != "localhost" && host != "127.0.0.1" && host != "::1" {
36 | 		if parsedURL.Scheme == "http" {
37 | 			return nil, fmt.Errorf("non-localhost URLs must use https")
38 | 		}
39 | 	}
40 | 	return &WebhookSender{
41 | 		url:     webhookURL,
42 | 		client:  &http.Client{Timeout: timeout},
43 | 		timeout: timeout,
44 | 	}, nil
45 | }
46 | 
47 | func (w *WebhookSender) Send(ctx context.Context, alert *Alert) error {
48 | 	if alert == nil {
49 | 		return fmt.Errorf("alert is nil")
50 | 	}
51 | 	payload := map[string]interface{}{
52 | 		"severity":  string(alert.Severity),
53 | 		"title":     alert.Title,
54 | 		"message":   alert.Message,
55 | 		"timestamp": alert.Timestamp.Format(time.RFC3339),
56 | 		"source":    alert.Source,
57 | 		"pod":       alert.PodName,
58 | 		"namespace": alert.Namespace,
59 | 		"context":   alert.Context,
60 | 	}
61 | 	if alert.ErrorCode != "" {
62 | 		payload["error_code"] = alert.ErrorCode
63 | 	}
64 | 	if len(alert.Recommendations) > 0 {
65 | 		payload["recommendations"] = alert.Recommendations
66 | 	}
67 | 	jsonData, err := json.Marshal(payload)
68 | 	if err != nil {
69 | 		return fmt.Errorf("failed to marshal alert: %w", err)
70 | 	}
71 | 	if int64(len(jsonData)) > config.AlertMaxPayloadSize {
72 | 		return fmt.Errorf("payload size %d exceeds maximum %d", len(jsonData), config.AlertMaxPayloadSize)
73 | 	}
74 | 	req, err := http.NewRequestWithContext(ctx, "POST", w.url, bytes.NewReader(jsonData))
75 | 	if err != nil {
76 | 		return fmt.Errorf("failed to create request: %w", err)
77 | 	}
78 | 	req.Header.Set("Content-Type", "application/json")
79 | 	req.Header.Set("User-Agent", config.GetUserAgent())
80 | 	resp, err := w.client.Do(req)
81 | 	if err != nil {
82 | 		return fmt.Errorf("failed to send request: %w", err)
83 | 	}
84 | 	defer func() {
85 | 		_, _ = io.Copy(io.Discard, resp.Body)
86 | 		_ = resp.Body.Close()
87 | 	}()
88 | 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
89 | 		bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
90 | 		return fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(bodyBytes))
91 | 	}
92 | 	return nil
93 | }
94 | 
95 | func (w *WebhookSender) Name() string {
96 | 	return "webhook"
97 | }
98 | 


--------------------------------------------------------------------------------
/cmd/podtrace/main_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 	"time"
  7 | )
  8 | 
  9 | func TestMain_CommandExecution(t *testing.T) {
 10 | 	if testing.Short() {
 11 | 		t.Skip("Skipping main function test in short mode")
 12 | 	}
 13 | 
 14 | 	origArgs := os.Args
 15 | 	defer func() { os.Args = origArgs }()
 16 | 
 17 | 	os.Args = []string{"podtrace", "--help"}
 18 | 
 19 | 	oldExit := exitFunc
 20 | 	exited := false
 21 | 	exitFunc = func(code int) {
 22 | 		exited = true
 23 | 	}
 24 | 	defer func() { exitFunc = oldExit }()
 25 | 
 26 | 	done := make(chan bool, 1)
 27 | 	go func() {
 28 | 		main()
 29 | 		done <- true
 30 | 	}()
 31 | 
 32 | 	select {
 33 | 	case <-done:
 34 | 		if !exited {
 35 | 			t.Log("main function executed (help command)")
 36 | 		}
 37 | 	case <-time.After(1 * time.Second):
 38 | 		t.Log("main function test completed")
 39 | 	}
 40 | }
 41 | 
 42 | func TestMain_InvalidArgs(t *testing.T) {
 43 | 	if testing.Short() {
 44 | 		t.Skip("Skipping main function test in short mode")
 45 | 	}
 46 | 
 47 | 	origArgs := os.Args
 48 | 	defer func() { os.Args = origArgs }()
 49 | 
 50 | 	os.Args = []string{"podtrace"}
 51 | 
 52 | 	oldExit := exitFunc
 53 | 	exited := false
 54 | 	exitFunc = func(code int) {
 55 | 		exited = true
 56 | 	}
 57 | 	defer func() { exitFunc = oldExit }()
 58 | 
 59 | 	done := make(chan bool, 1)
 60 | 	go func() {
 61 | 		main()
 62 | 		done <- true
 63 | 	}()
 64 | 
 65 | 	select {
 66 | 	case <-done:
 67 | 		if !exited {
 68 | 			t.Log("main function executed (invalid args)")
 69 | 		}
 70 | 	case <-time.After(1 * time.Second):
 71 | 		t.Log("main function test completed")
 72 | 	}
 73 | }
 74 | 
 75 | func TestMain_LogLevel(t *testing.T) {
 76 | 	origLogLevel := logLevel
 77 | 	origArgs := os.Args
 78 | 	defer func() {
 79 | 		logLevel = origLogLevel
 80 | 		os.Args = origArgs
 81 | 	}()
 82 | 
 83 | 	os.Args = []string{"podtrace", "--log-level", "debug", "--help"}
 84 | 
 85 | 	oldExit := exitFunc
 86 | 	exitFunc = func(code int) {
 87 | 	}
 88 | 	defer func() { exitFunc = oldExit }()
 89 | 
 90 | 	done := make(chan bool, 1)
 91 | 	go func() {
 92 | 		main()
 93 | 		done <- true
 94 | 	}()
 95 | 
 96 | 	select {
 97 | 	case <-done:
 98 | 		t.Log("main function executed with log level")
 99 | 	case <-time.After(1 * time.Second):
100 | 		t.Log("main function test completed")
101 | 	}
102 | }
103 | 
104 | func TestMain_CommandError(t *testing.T) {
105 | 	if testing.Short() {
106 | 		t.Skip("Skipping main function test in short mode")
107 | 	}
108 | 
109 | 	origArgs := os.Args
110 | 	defer func() { os.Args = origArgs }()
111 | 
112 | 	os.Args = []string{"podtrace", "test-pod", "--invalid-flag"}
113 | 
114 | 	oldExit := exitFunc
115 | 	exited := false
116 | 	exitFunc = func(code int) {
117 | 		exited = true
118 | 	}
119 | 	defer func() { exitFunc = oldExit }()
120 | 
121 | 	done := make(chan bool, 1)
122 | 	go func() {
123 | 		main()
124 | 		done <- true
125 | 	}()
126 | 
127 | 	select {
128 | 	case <-done:
129 | 		if !exited {
130 | 			t.Log("main function executed (command error)")
131 | 		}
132 | 	case <-time.After(1 * time.Second):
133 | 		t.Log("main function test completed")
134 | 	}
135 | }
136 | 


--------------------------------------------------------------------------------
/internal/ebpf/loader/errors_test.go:
--------------------------------------------------------------------------------
  1 | package loader
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestLoaderError_Error_WithErr(t *testing.T) {
  9 | 	originalErr := errors.New("underlying error")
 10 | 	loaderErr := &LoaderError{
 11 | 		Code:    ErrCodeLoadFailed,
 12 | 		Message: "test message",
 13 | 		Err:     originalErr,
 14 | 	}
 15 | 	
 16 | 	errStr := loaderErr.Error()
 17 | 	if errStr == "" {
 18 | 		t.Error("Error() should return non-empty string")
 19 | 	}
 20 | 	if !contains(errStr, "test message") {
 21 | 		t.Errorf("Expected error string to contain 'test message', got %q", errStr)
 22 | 	}
 23 | 	if !contains(errStr, "underlying error") {
 24 | 		t.Errorf("Expected error string to contain 'underlying error', got %q", errStr)
 25 | 	}
 26 | }
 27 | 
 28 | func TestLoaderError_Error_WithoutErr(t *testing.T) {
 29 | 	loaderErr := &LoaderError{
 30 | 		Code:    ErrCodeLoadFailed,
 31 | 		Message: "test message",
 32 | 		Err:     nil,
 33 | 	}
 34 | 	
 35 | 	errStr := loaderErr.Error()
 36 | 	if errStr != "test message" {
 37 | 		t.Errorf("Expected error string 'test message', got %q", errStr)
 38 | 	}
 39 | }
 40 | 
 41 | func TestLoaderError_Unwrap(t *testing.T) {
 42 | 	originalErr := errors.New("underlying error")
 43 | 	loaderErr := &LoaderError{
 44 | 		Code:    ErrCodeLoadFailed,
 45 | 		Message: "test message",
 46 | 		Err:     originalErr,
 47 | 	}
 48 | 	
 49 | 	unwrapped := loaderErr.Unwrap()
 50 | 	if unwrapped != originalErr {
 51 | 		t.Errorf("Expected unwrapped error to be original error, got %v", unwrapped)
 52 | 	}
 53 | }
 54 | 
 55 | func TestLoaderError_Unwrap_Nil(t *testing.T) {
 56 | 	loaderErr := &LoaderError{
 57 | 		Code:    ErrCodeLoadFailed,
 58 | 		Message: "test message",
 59 | 		Err:     nil,
 60 | 	}
 61 | 	
 62 | 	unwrapped := loaderErr.Unwrap()
 63 | 	if unwrapped != nil {
 64 | 		t.Errorf("Expected unwrapped error to be nil, got %v", unwrapped)
 65 | 	}
 66 | }
 67 | 
 68 | func TestNewLoadError(t *testing.T) {
 69 | 	originalErr := errors.New("file not found")
 70 | 	loaderErr := NewLoadError("/path/to/bpf.o", originalErr)
 71 | 	
 72 | 	if loaderErr == nil {
 73 | 		t.Fatal("NewLoadError returned nil")
 74 | 	}
 75 | 	if loaderErr.Code != ErrCodeLoadFailed {
 76 | 		t.Errorf("Expected error code %d, got %d", ErrCodeLoadFailed, loaderErr.Code)
 77 | 	}
 78 | 	if !contains(loaderErr.Message, "failed to load eBPF program") {
 79 | 		t.Errorf("Expected message to contain 'failed to load eBPF program', got %q", loaderErr.Message)
 80 | 	}
 81 | 	if !contains(loaderErr.Message, "/path/to/bpf.o") {
 82 | 		t.Errorf("Expected message to contain '/path/to/bpf.o', got %q", loaderErr.Message)
 83 | 	}
 84 | 	if loaderErr.Unwrap() != originalErr {
 85 | 		t.Errorf("Expected unwrapped error to be original error")
 86 | 	}
 87 | }
 88 | 
 89 | func contains(s, substr string) bool {
 90 | 	return len(s) >= len(substr) && (s == substr || 
 91 | 		(len(s) > len(substr) && containsMiddle(s, substr)))
 92 | }
 93 | 
 94 | func containsMiddle(s, substr string) bool {
 95 | 	for i := 0; i <= len(s)-len(substr); i++ {
 96 | 		if s[i:i+len(substr)] == substr {
 97 | 			return true
 98 | 		}
 99 | 	}
100 | 	return false
101 | }
102 | 
103 | 


--------------------------------------------------------------------------------
/internal/tracing/extractor/http_test.go:
--------------------------------------------------------------------------------
  1 | package extractor
  2 | 
  3 | import (
  4 | 	"net/http"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestHTTPExtractor_ExtractFromHeaders(t *testing.T) {
  9 | 	extractor := NewHTTPExtractor()
 10 | 
 11 | 	tests := []struct {
 12 | 		name    string
 13 | 		headers map[string]string
 14 | 		wantNil bool
 15 | 	}{
 16 | 		{"W3C traceparent", map[string]string{"traceparent": "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"}, false},
 17 | 		{"B3 headers", map[string]string{"x-b3-traceid": "abc", "x-b3-spanid": "def"}, false},
 18 | 		{"Splunk", map[string]string{"x-splunk-requestid": "req123"}, false},
 19 | 		{"empty", map[string]string{}, true},
 20 | 		{"no trace headers", map[string]string{"content-type": "application/json"}, true},
 21 | 	}
 22 | 
 23 | 	for _, tt := range tests {
 24 | 		t.Run(tt.name, func(t *testing.T) {
 25 | 			tc := extractor.ExtractFromHeaders(tt.headers)
 26 | 			if (tc == nil) != tt.wantNil {
 27 | 				t.Errorf("ExtractFromHeaders() = %v, want nil = %v", tc, tt.wantNil)
 28 | 			}
 29 | 		})
 30 | 	}
 31 | }
 32 | 
 33 | func TestHTTPExtractor_ExtractFromHTTPRequest(t *testing.T) {
 34 | 	extractor := NewHTTPExtractor()
 35 | 
 36 | 	req, _ := http.NewRequest("GET", "http://example.com", nil)
 37 | 	req.Header.Set("traceparent", "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01")
 38 | 
 39 | 	tc := extractor.ExtractFromHTTPRequest(req)
 40 | 	if tc == nil {
 41 | 		t.Fatal("ExtractFromHTTPRequest() returned nil")
 42 | 	}
 43 | 	if tc.TraceID == "" {
 44 | 		t.Error("TraceID should be extracted")
 45 | 	}
 46 | }
 47 | 
 48 | func TestHTTPExtractor_ExtractFromHTTPResponse(t *testing.T) {
 49 | 	extractor := NewHTTPExtractor()
 50 | 
 51 | 	resp := &http.Response{
 52 | 		Header: make(http.Header),
 53 | 	}
 54 | 	resp.Header.Set("traceparent", "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01")
 55 | 
 56 | 	tc := extractor.ExtractFromHTTPResponse(resp)
 57 | 	if tc == nil {
 58 | 		t.Fatal("ExtractFromHTTPResponse() returned nil")
 59 | 	}
 60 | 	if tc.TraceID == "" {
 61 | 		t.Error("TraceID should be extracted")
 62 | 	}
 63 | }
 64 | 
 65 | func TestHTTPExtractor_ExtractFromRawHeaders(t *testing.T) {
 66 | 	extractor := NewHTTPExtractor()
 67 | 
 68 | 	rawHeaders := "traceparent: 00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01\r\ncontent-type: application/json"
 69 | 
 70 | 	tc := extractor.ExtractFromRawHeaders(rawHeaders)
 71 | 	if tc == nil {
 72 | 		t.Fatal("ExtractFromRawHeaders() returned nil")
 73 | 	}
 74 | 	if tc.TraceID == "" {
 75 | 		t.Error("TraceID should be extracted")
 76 | 	}
 77 | }
 78 | 
 79 | func TestHTTPExtractor_ExtractFromRawHeaders_Empty(t *testing.T) {
 80 | 	extractor := NewHTTPExtractor()
 81 | 	tc := extractor.ExtractFromRawHeaders("")
 82 | 	if tc != nil {
 83 | 		t.Error("ExtractFromRawHeaders(\"\") should return nil")
 84 | 	}
 85 | }
 86 | 
 87 | func TestParseRawHeaders(t *testing.T) {
 88 | 	raw := "header1: value1\r\nheader2: value2\r\n\r\n"
 89 | 	headers := parseRawHeaders(raw)
 90 | 
 91 | 	if len(headers) != 2 {
 92 | 		t.Errorf("Expected 2 headers, got %d", len(headers))
 93 | 	}
 94 | 	if headers["header1"] != "value1" {
 95 | 		t.Errorf("header1 = %s, want value1", headers["header1"])
 96 | 	}
 97 | 	if headers["header2"] != "value2" {
 98 | 		t.Errorf("header2 = %s, want value2", headers["header2"])
 99 | 	}
100 | }
101 | 


--------------------------------------------------------------------------------
/internal/ebpf/cache/lru.go:
--------------------------------------------------------------------------------
  1 | package cache
  2 | 
  3 | import (
  4 | 	"container/list"
  5 | 	"sync"
  6 | 	"time"
  7 | 
  8 | 	"github.com/podtrace/podtrace/internal/config"
  9 | 	"github.com/podtrace/podtrace/internal/metricsexporter"
 10 | 	"github.com/podtrace/podtrace/internal/validation"
 11 | )
 12 | 
 13 | type cacheEntry struct {
 14 | 	pid       uint32
 15 | 	name      string
 16 | 	expiresAt time.Time
 17 | 	element   *list.Element
 18 | }
 19 | 
 20 | type LRUCache struct {
 21 | 	cache      map[uint32]*list.Element
 22 | 	list       *list.List
 23 | 	maxSize    int
 24 | 	ttl        time.Duration
 25 | 	mutex      sync.RWMutex
 26 | 	stopCleanup chan struct{}
 27 | }
 28 | 
 29 | func NewLRUCache(maxSize int, ttl time.Duration) *LRUCache {
 30 | 	c := &LRUCache{
 31 | 		cache:      make(map[uint32]*list.Element),
 32 | 		list:       list.New(),
 33 | 		maxSize:    maxSize,
 34 | 		ttl:        ttl,
 35 | 		stopCleanup: make(chan struct{}),
 36 | 	}
 37 | 	go c.cleanupExpired()
 38 | 	return c
 39 | }
 40 | 
 41 | func (c *LRUCache) Get(pid uint32) (string, bool) {
 42 | 	if !validation.ValidatePID(pid) {
 43 | 		return "", false
 44 | 	}
 45 | 
 46 | 	c.mutex.Lock()
 47 | 	defer c.mutex.Unlock()
 48 | 
 49 | 	elem, ok := c.cache[pid]
 50 | 	if !ok {
 51 | 		return "", false
 52 | 	}
 53 | 
 54 | 	entry := elem.Value.(*cacheEntry)
 55 | 	if time.Now().After(entry.expiresAt) {
 56 | 		delete(c.cache, pid)
 57 | 		c.list.Remove(elem)
 58 | 		return "", false
 59 | 	}
 60 | 
 61 | 	c.list.MoveToFront(elem)
 62 | 	metricsexporter.RecordProcessCacheHit()
 63 | 	return entry.name, true
 64 | }
 65 | 
 66 | func (c *LRUCache) Set(pid uint32, name string) {
 67 | 	if !validation.ValidatePID(pid) {
 68 | 		return
 69 | 	}
 70 | 
 71 | 	c.mutex.Lock()
 72 | 	defer c.mutex.Unlock()
 73 | 
 74 | 	if elem, ok := c.cache[pid]; ok {
 75 | 		entry := elem.Value.(*cacheEntry)
 76 | 		entry.name = name
 77 | 		entry.expiresAt = time.Now().Add(c.ttl)
 78 | 		c.list.MoveToFront(elem)
 79 | 		return
 80 | 	}
 81 | 
 82 | 	if len(c.cache) >= c.maxSize {
 83 | 		c.evict()
 84 | 	}
 85 | 
 86 | 	entry := &cacheEntry{
 87 | 		pid:       pid,
 88 | 		name:      name,
 89 | 		expiresAt: time.Now().Add(c.ttl),
 90 | 	}
 91 | 	elem := c.list.PushFront(entry)
 92 | 	entry.element = elem
 93 | 	c.cache[pid] = elem
 94 | }
 95 | 
 96 | func (c *LRUCache) evict() {
 97 | 	evictTarget := int(float64(c.maxSize) * config.CacheEvictionThreshold)
 98 | 	for len(c.cache) >= evictTarget {
 99 | 		back := c.list.Back()
100 | 		if back == nil {
101 | 			break
102 | 		}
103 | 		entry := back.Value.(*cacheEntry)
104 | 		delete(c.cache, entry.pid)
105 | 		c.list.Remove(back)
106 | 	}
107 | }
108 | 
109 | func (c *LRUCache) cleanupExpired() {
110 | 	ticker := time.NewTicker(c.ttl / 2)
111 | 	defer ticker.Stop()
112 | 
113 | 	for {
114 | 		select {
115 | 		case <-ticker.C:
116 | 			c.mutex.Lock()
117 | 			now := time.Now()
118 | 			var toRemove []*list.Element
119 | 			for _, elem := range c.cache {
120 | 				entry := elem.Value.(*cacheEntry)
121 | 				if now.After(entry.expiresAt) {
122 | 					toRemove = append(toRemove, elem)
123 | 				}
124 | 			}
125 | 			for _, elem := range toRemove {
126 | 				entry := elem.Value.(*cacheEntry)
127 | 				delete(c.cache, entry.pid)
128 | 				c.list.Remove(elem)
129 | 			}
130 | 			c.mutex.Unlock()
131 | 		case <-c.stopCleanup:
132 | 			return
133 | 		}
134 | 	}
135 | }
136 | 
137 | func (c *LRUCache) Close() {
138 | 	close(c.stopCleanup)
139 | }
140 | 
141 | 


--------------------------------------------------------------------------------
/internal/diagnose/analyzer/tls_test.go:
--------------------------------------------------------------------------------
  1 | package analyzer
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/podtrace/podtrace/internal/events"
  7 | )
  8 | 
  9 | func TestAnalyzeTLS(t *testing.T) {
 10 | 	tests := []struct {
 11 | 		name           string
 12 | 		events         []*events.Event
 13 | 		wantAvgLatency float64
 14 | 		wantMaxLatency float64
 15 | 		wantErrors     int
 16 | 		wantP50        float64
 17 | 		wantP95        float64
 18 | 		wantP99        float64
 19 | 	}{
 20 | 		{
 21 | 			name:           "empty events",
 22 | 			events:         []*events.Event{},
 23 | 			wantAvgLatency: 0,
 24 | 			wantMaxLatency: 0,
 25 | 			wantErrors:     0,
 26 | 			wantP50:        0,
 27 | 			wantP95:        0,
 28 | 			wantP99:        0,
 29 | 		},
 30 | 		{
 31 | 			name: "single successful handshake",
 32 | 			events: []*events.Event{
 33 | 				{
 34 | 					Type:      events.EventTLSHandshake,
 35 | 					LatencyNS: 100000000,
 36 | 					Error:     0,
 37 | 				},
 38 | 			},
 39 | 			wantAvgLatency: 100.0,
 40 | 			wantMaxLatency: 100.0,
 41 | 			wantErrors:     0,
 42 | 			wantP50:        100.0,
 43 | 			wantP95:        100.0,
 44 | 			wantP99:        100.0,
 45 | 		},
 46 | 		{
 47 | 			name: "multiple handshakes with errors",
 48 | 			events: []*events.Event{
 49 | 				{
 50 | 					Type:      events.EventTLSHandshake,
 51 | 					LatencyNS: 50000000,
 52 | 					Error:     0,
 53 | 					Target:    "example.com:443",
 54 | 				},
 55 | 				{
 56 | 					Type:      events.EventTLSHandshake,
 57 | 					LatencyNS: 200000000,
 58 | 					Error:     -1,
 59 | 					Target:    "bad.example.com:443",
 60 | 				},
 61 | 				{
 62 | 					Type:      events.EventTLSHandshake,
 63 | 					LatencyNS: 150000000,
 64 | 					Error:     0,
 65 | 					Target:    "example.com:443",
 66 | 				},
 67 | 			},
 68 | 			wantAvgLatency: 133.33,
 69 | 			wantMaxLatency: 200.0,
 70 | 			wantErrors:     1,
 71 | 			wantP50:        150.0,
 72 | 			wantP95:        150.0,
 73 | 			wantP99:        150.0,
 74 | 		},
 75 | 	}
 76 | 
 77 | 	for _, tt := range tests {
 78 | 		t.Run(tt.name, func(t *testing.T) {
 79 | 			avgLatency, maxLatency, errors, p50, p95, p99, errorBreakdown, topTargets := AnalyzeTLS(tt.events)
 80 | 
 81 | 			if errors != tt.wantErrors {
 82 | 				t.Errorf("AnalyzeTLS() errors = %v, want %v", errors, tt.wantErrors)
 83 | 			}
 84 | 
 85 | 			if len(tt.events) == 0 {
 86 | 				return
 87 | 			}
 88 | 
 89 | 			if avgLatency < tt.wantAvgLatency-1 || avgLatency > tt.wantAvgLatency+1 {
 90 | 				t.Errorf("AnalyzeTLS() avgLatency = %v, want %v", avgLatency, tt.wantAvgLatency)
 91 | 			}
 92 | 
 93 | 			if maxLatency != tt.wantMaxLatency {
 94 | 				t.Errorf("AnalyzeTLS() maxLatency = %v, want %v", maxLatency, tt.wantMaxLatency)
 95 | 			}
 96 | 
 97 | 			if p50 < tt.wantP50-1 || p50 > tt.wantP50+1 {
 98 | 				t.Errorf("AnalyzeTLS() p50 = %v, want %v", p50, tt.wantP50)
 99 | 			}
100 | 
101 | 			if p95 < tt.wantP95-1 || p95 > tt.wantP95+1 {
102 | 				t.Errorf("AnalyzeTLS() p95 = %v, want %v", p95, tt.wantP95)
103 | 			}
104 | 
105 | 			if p99 < tt.wantP99-1 || p99 > tt.wantP99+1 {
106 | 				t.Errorf("AnalyzeTLS() p99 = %v, want %v", p99, tt.wantP99)
107 | 			}
108 | 
109 | 			if tt.wantErrors > 0 && len(errorBreakdown) == 0 {
110 | 				t.Errorf("AnalyzeTLS() expected error breakdown but got none")
111 | 			}
112 | 
113 | 			if len(tt.events) > 1 && len(topTargets) == 0 {
114 | 				t.Errorf("AnalyzeTLS() expected top targets but got none")
115 | 			}
116 | 		})
117 | 	}
118 | }
119 | 
120 | 


--------------------------------------------------------------------------------
/internal/diagnose/tracker/pod_communication_test.go:
--------------------------------------------------------------------------------
  1 | package tracker
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/podtrace/podtrace/internal/events"
  8 | )
  9 | 
 10 | func TestPodCommunicationTracker_ProcessEvent(t *testing.T) {
 11 | 	tracker := NewPodCommunicationTracker("source-pod", "default")
 12 | 
 13 | 	event := &events.Event{
 14 | 		Type:      events.EventConnect,
 15 | 		Target:    "10.244.1.5:8080",
 16 | 		Timestamp: uint64(time.Now().UnixNano()),
 17 | 		Error:     0,
 18 | 	}
 19 | 
 20 | 	k8sContext := map[string]interface{}{
 21 | 		"target_pod":      "target-pod",
 22 | 		"target_service":  "test-service",
 23 | 		"target_namespace": "default",
 24 | 	}
 25 | 
 26 | 	tracker.ProcessEvent(event, k8sContext)
 27 | 
 28 | 	summaries := tracker.GetSummary()
 29 | 	if len(summaries) == 0 {
 30 | 		t.Fatal("expected at least one communication summary")
 31 | 	}
 32 | 
 33 | 	if summaries[0].Target != "test-service" {
 34 | 		t.Errorf("expected target 'test-service', got %q", summaries[0].Target)
 35 | 	}
 36 | 
 37 | 	if summaries[0].Namespace != "default" {
 38 | 		t.Errorf("expected namespace 'default', got %q", summaries[0].Namespace)
 39 | 	}
 40 | }
 41 | 
 42 | func TestPodCommunicationTracker_ProcessEvent_NoContext(t *testing.T) {
 43 | 	tracker := NewPodCommunicationTracker("source-pod", "default")
 44 | 
 45 | 	event := &events.Event{
 46 | 		Type:      events.EventConnect,
 47 | 		Target:    "10.244.1.5:8080",
 48 | 		Timestamp: uint64(time.Now().UnixNano()),
 49 | 	}
 50 | 
 51 | 	tracker.ProcessEvent(event, nil)
 52 | 
 53 | 	summaries := tracker.GetSummary()
 54 | 	if len(summaries) != 0 {
 55 | 		t.Errorf("expected no summaries without context, got %d", len(summaries))
 56 | 	}
 57 | }
 58 | 
 59 | func TestPodCommunicationTracker_ProcessEvent_NonNetwork(t *testing.T) {
 60 | 	tracker := NewPodCommunicationTracker("source-pod", "default")
 61 | 
 62 | 	event := &events.Event{
 63 | 		Type:      events.EventRead,
 64 | 		Target:    "file.txt",
 65 | 		Timestamp: uint64(time.Now().UnixNano()),
 66 | 	}
 67 | 
 68 | 	k8sContext := map[string]interface{}{
 69 | 		"target_pod": "target-pod",
 70 | 	}
 71 | 
 72 | 	tracker.ProcessEvent(event, k8sContext)
 73 | 
 74 | 	summaries := tracker.GetSummary()
 75 | 	if len(summaries) != 0 {
 76 | 		t.Errorf("expected no summaries for non-network events, got %d", len(summaries))
 77 | 	}
 78 | }
 79 | 
 80 | func TestGeneratePodCommunicationReport(t *testing.T) {
 81 | 	summaries := []PodCommunicationSummary{
 82 | 		{
 83 | 			Target:          "service-1",
 84 | 			Namespace:       "default",
 85 | 			ConnectionCount: 10,
 86 | 			TotalBytes:      1024,
 87 | 			AvgLatency:      time.Millisecond * 10,
 88 | 			ErrorCount:      0,
 89 | 			LastSeen:        time.Now(),
 90 | 		},
 91 | 	}
 92 | 
 93 | 	report := GeneratePodCommunicationReport(summaries)
 94 | 	if report == "" {
 95 | 		t.Fatal("expected non-empty report")
 96 | 	}
 97 | 
 98 | 	if !contains(report, "service-1") {
 99 | 		t.Error("report should contain service name")
100 | 	}
101 | }
102 | 
103 | func contains(s, substr string) bool {
104 | 	return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || containsMiddle(s, substr)))
105 | }
106 | 
107 | func containsMiddle(s, substr string) bool {
108 | 	for i := 0; i <= len(s)-len(substr); i++ {
109 | 		if s[i:i+len(substr)] == substr {
110 | 			return true
111 | 		}
112 | 	}
113 | 	return false
114 | }
115 | 
116 | 


--------------------------------------------------------------------------------
/internal/diagnose/tracker/process.go:
--------------------------------------------------------------------------------
  1 | package tracker
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"sort"
  7 | 	"strings"
  8 | 	"github.com/podtrace/podtrace/internal/config"
  9 | 	"github.com/podtrace/podtrace/internal/events"
 10 | 	"github.com/podtrace/podtrace/internal/validation"
 11 | )
 12 | 
 13 | type PidInfo struct {
 14 | 	Pid        uint32
 15 | 	Name       string
 16 | 	Count      int
 17 | 	Percentage float64
 18 | }
 19 | 
 20 | func AnalyzeProcessActivity(events []*events.Event) []PidInfo {
 21 | 	pidMap := make(map[uint32]int)
 22 | 	totalEvents := len(events)
 23 | 
 24 | 	for _, e := range events {
 25 | 		pidMap[e.PID]++
 26 | 	}
 27 | 
 28 | 	var pidInfos []PidInfo
 29 | 	for pid, count := range pidMap {
 30 | 		percentage := float64(count) / float64(totalEvents) * 100
 31 | 		name := ""
 32 | 		for _, e := range events {
 33 | 			if e.PID == pid && e.ProcessName != "" {
 34 | 				name = e.ProcessName
 35 | 				break
 36 | 			}
 37 | 		}
 38 | 		if name == "" {
 39 | 			name = getProcessName(pid)
 40 | 		}
 41 | 		if name == "" {
 42 | 			name = "unknown"
 43 | 		}
 44 | 		pidInfos = append(pidInfos, PidInfo{
 45 | 			Pid:        pid,
 46 | 			Name:       name,
 47 | 			Count:      count,
 48 | 			Percentage: percentage,
 49 | 		})
 50 | 	}
 51 | 
 52 | 	sort.Slice(pidInfos, func(i, j int) bool {
 53 | 		return pidInfos[i].Count > pidInfos[j].Count
 54 | 	})
 55 | 
 56 | 	return pidInfos
 57 | }
 58 | 
 59 | func getProcessName(pid uint32) string {
 60 | 	name := getProcessNameFromProc(pid)
 61 | 	return validation.SanitizeProcessName(name)
 62 | }
 63 | 
 64 | func getProcessNameFromProc(pid uint32) string {
 65 | 	if !validation.ValidatePID(pid) {
 66 | 		return ""
 67 | 	}
 68 | 
 69 | 	name := ""
 70 | 
 71 | 	statPath := fmt.Sprintf("%s/%d/stat", config.ProcBasePath, pid)
 72 | 	if data, err := os.ReadFile(statPath); err == nil {
 73 | 		statStr := string(data)
 74 | 		start := strings.Index(statStr, "(")
 75 | 		end := strings.LastIndex(statStr, ")")
 76 | 		if start >= 0 && end > start {
 77 | 			name = statStr[start+1 : end]
 78 | 		}
 79 | 	}
 80 | 
 81 | 	if name == "" {
 82 | 		commPath := fmt.Sprintf("%s/%d/comm", config.ProcBasePath, pid)
 83 | 		if data, err := os.ReadFile(commPath); err == nil {
 84 | 			name = strings.TrimSpace(string(data))
 85 | 		}
 86 | 	}
 87 | 
 88 | 	if name == "" {
 89 | 		cmdlinePath := fmt.Sprintf("%s/%d/cmdline", config.ProcBasePath, pid)
 90 | 		if cmdline, err := os.ReadFile(cmdlinePath); err == nil {
 91 | 			parts := strings.Split(string(cmdline), "\x00")
 92 | 			if len(parts) > 0 && parts[0] != "" {
 93 | 				name = parts[0]
 94 | 				if idx := strings.LastIndex(name, "/"); idx >= 0 {
 95 | 					name = name[idx+1:]
 96 | 				}
 97 | 			}
 98 | 		}
 99 | 	}
100 | 
101 | 	if name == "" {
102 | 		exePath := fmt.Sprintf("%s/%d/exe", config.ProcBasePath, pid)
103 | 		if link, err := os.Readlink(exePath); err == nil {
104 | 			if idx := strings.LastIndex(link, "/"); idx >= 0 {
105 | 				name = link[idx+1:]
106 | 			} else {
107 | 				name = link
108 | 			}
109 | 		}
110 | 	}
111 | 
112 | 	if name == "" {
113 | 		statusPath := fmt.Sprintf("%s/%d/status", config.ProcBasePath, pid)
114 | 		if data, err := os.ReadFile(statusPath); err == nil {
115 | 			lines := strings.Split(string(data), "\n")
116 | 			for _, line := range lines {
117 | 				if strings.HasPrefix(line, "Name:") {
118 | 					parts := strings.Fields(line)
119 | 					if len(parts) >= 2 {
120 | 						name = parts[1]
121 | 						break
122 | 					}
123 | 				}
124 | 			}
125 | 		}
126 | 	}
127 | 
128 | 	return name
129 | }
130 | 


--------------------------------------------------------------------------------
/internal/kubernetes/events_correlator.go:
--------------------------------------------------------------------------------
  1 | package kubernetes
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | 	"time"
  7 | 
  8 | 	corev1 "k8s.io/api/core/v1"
  9 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 10 | 	"k8s.io/apimachinery/pkg/watch"
 11 | 	"k8s.io/client-go/kubernetes"
 12 | )
 13 | 
 14 | type K8sEvent struct {
 15 | 	Type      string
 16 | 	Reason    string
 17 | 	Message   string
 18 | 	Timestamp time.Time
 19 | 	Count     int32
 20 | }
 21 | 
 22 | type EventsCorrelator struct {
 23 | 	clientset    kubernetes.Interface
 24 | 	podName      string
 25 | 	namespace    string
 26 | 	events       []*K8sEvent
 27 | 	mu           sync.RWMutex
 28 | 	eventWatcher watch.Interface
 29 | 	stopCh       chan struct{}
 30 | }
 31 | 
 32 | func NewEventsCorrelator(clientset kubernetes.Interface, podName, namespace string) *EventsCorrelator {
 33 | 	return &EventsCorrelator{
 34 | 		clientset: clientset,
 35 | 		podName:   podName,
 36 | 		namespace: namespace,
 37 | 		events:    make([]*K8sEvent, 0),
 38 | 		stopCh:    make(chan struct{}),
 39 | 	}
 40 | }
 41 | 
 42 | func (ec *EventsCorrelator) Start(ctx context.Context) error {
 43 | 	if ec.clientset == nil {
 44 | 		return nil
 45 | 	}
 46 | 
 47 | 	watcher, err := ec.clientset.CoreV1().Events(ec.namespace).Watch(ctx, metav1.ListOptions{
 48 | 		FieldSelector: "involvedObject.name=" + ec.podName,
 49 | 	})
 50 | 	if err != nil {
 51 | 		return err
 52 | 	}
 53 | 
 54 | 	ec.eventWatcher = watcher
 55 | 
 56 | 	go ec.watchEvents(ctx)
 57 | 	return nil
 58 | }
 59 | 
 60 | func (ec *EventsCorrelator) watchEvents(ctx context.Context) {
 61 | 	defer func() {
 62 | 		if ec.eventWatcher != nil {
 63 | 			ec.eventWatcher.Stop()
 64 | 		}
 65 | 	}()
 66 | 
 67 | 	for {
 68 | 		select {
 69 | 		case <-ctx.Done():
 70 | 			return
 71 | 		case <-ec.stopCh:
 72 | 			return
 73 | 		case event, ok := <-ec.eventWatcher.ResultChan():
 74 | 			if !ok {
 75 | 				return
 76 | 			}
 77 | 
 78 | 			if k8sEvent, ok := event.Object.(*corev1.Event); ok {
 79 | 				ec.addEvent(k8sEvent)
 80 | 			}
 81 | 		}
 82 | 	}
 83 | }
 84 | 
 85 | func (ec *EventsCorrelator) addEvent(event *corev1.Event) {
 86 | 	if event.InvolvedObject.Name != ec.podName {
 87 | 		return
 88 | 	}
 89 | 
 90 | 	ec.mu.Lock()
 91 | 	defer ec.mu.Unlock()
 92 | 
 93 | 	k8sEvent := &K8sEvent{
 94 | 		Type:      event.Type,
 95 | 		Reason:    event.Reason,
 96 | 		Message:   event.Message,
 97 | 		Timestamp: event.FirstTimestamp.Time,
 98 | 		Count:     event.Count,
 99 | 	}
100 | 
101 | 	ec.events = append(ec.events, k8sEvent)
102 | 
103 | 	maxEvents := 100
104 | 	if len(ec.events) > maxEvents {
105 | 		ec.events = ec.events[len(ec.events)-maxEvents:]
106 | 	}
107 | }
108 | 
109 | func (ec *EventsCorrelator) GetEvents() []*K8sEvent {
110 | 	ec.mu.RLock()
111 | 	defer ec.mu.RUnlock()
112 | 
113 | 	result := make([]*K8sEvent, len(ec.events))
114 | 	copy(result, ec.events)
115 | 	return result
116 | }
117 | 
118 | func (ec *EventsCorrelator) Stop() {
119 | 	close(ec.stopCh)
120 | 	if ec.eventWatcher != nil {
121 | 		ec.eventWatcher.Stop()
122 | 	}
123 | }
124 | 
125 | func (ec *EventsCorrelator) CorrelateWithAppEvents(appEventTime time.Time, window time.Duration) []*K8sEvent {
126 | 	ec.mu.RLock()
127 | 	defer ec.mu.RUnlock()
128 | 
129 | 	var correlated []*K8sEvent
130 | 	windowStart := appEventTime.Add(-window)
131 | 	windowEnd := appEventTime.Add(window)
132 | 
133 | 	for _, k8sEvent := range ec.events {
134 | 		if k8sEvent.Timestamp.After(windowStart) && k8sEvent.Timestamp.Before(windowEnd) {
135 | 			correlated = append(correlated, k8sEvent)
136 | 		}
137 | 	}
138 | 
139 | 	return correlated
140 | }
141 | 
142 | 


--------------------------------------------------------------------------------
/internal/tracing/exporter/splunk.go:
--------------------------------------------------------------------------------
  1 | package exporter
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"net/http"
  8 | 	"time"
  9 | 
 10 | 	"github.com/podtrace/podtrace/internal/config"
 11 | 	"github.com/podtrace/podtrace/internal/diagnose/tracker"
 12 | )
 13 | 
 14 | type SplunkExporter struct {
 15 | 	endpoint   string
 16 | 	token      string
 17 | 	client     *http.Client
 18 | 	enabled    bool
 19 | 	sampleRate float64
 20 | }
 21 | 
 22 | type SplunkEvent struct {
 23 | 	Time       int64                  `json:"time"`
 24 | 	Host       string                 `json:"host,omitempty"`
 25 | 	Source     string                 `json:"source,omitempty"`
 26 | 	Sourcetype string                 `json:"sourcetype,omitempty"`
 27 | 	Event      map[string]interface{} `json:"event"`
 28 | }
 29 | 
 30 | func NewSplunkExporter(endpoint, token string, sampleRate float64) (*SplunkExporter, error) {
 31 | 	if endpoint == "" {
 32 | 		endpoint = config.DefaultSplunkEndpoint
 33 | 	}
 34 | 
 35 | 	return &SplunkExporter{
 36 | 		endpoint:   endpoint,
 37 | 		token:      token,
 38 | 		client:     &http.Client{Timeout: config.TracingExporterTimeout},
 39 | 		enabled:    true,
 40 | 		sampleRate: sampleRate,
 41 | 	}, nil
 42 | }
 43 | 
 44 | func (e *SplunkExporter) ExportTraces(traces []*tracker.Trace) error {
 45 | 	if !e.enabled || len(traces) == 0 {
 46 | 		return nil
 47 | 	}
 48 | 
 49 | 	for _, t := range traces {
 50 | 		if !e.shouldSample(t) {
 51 | 			continue
 52 | 		}
 53 | 
 54 | 		if err := e.exportTrace(t); err != nil {
 55 | 			continue
 56 | 		}
 57 | 	}
 58 | 
 59 | 	return nil
 60 | }
 61 | 
 62 | func (e *SplunkExporter) shouldSample(_ *tracker.Trace) bool {
 63 | 	if e.sampleRate >= 1.0 {
 64 | 		return true
 65 | 	}
 66 | 	if e.sampleRate <= 0.0 {
 67 | 		return false
 68 | 	}
 69 | 	return time.Now().UnixNano()%int64(1.0/e.sampleRate) == 0
 70 | }
 71 | 
 72 | func (e *SplunkExporter) exportTrace(t *tracker.Trace) error {
 73 | 	if len(t.Spans) == 0 {
 74 | 		return nil
 75 | 	}
 76 | 
 77 | 	events := make([]SplunkEvent, 0)
 78 | 
 79 | 	for _, span := range t.Spans {
 80 | 		span.UpdateDuration()
 81 | 
 82 | 		eventData := map[string]interface{}{
 83 | 			"trace_id":       span.TraceID,
 84 | 			"span_id":        span.SpanID,
 85 | 			"parent_span_id": span.ParentSpanID,
 86 | 			"operation":      span.Operation,
 87 | 			"service":        span.Service,
 88 | 			"start_time":     span.StartTime.Unix(),
 89 | 			"duration_ms":    span.Duration.Milliseconds(),
 90 | 			"span_count":     len(span.Events),
 91 | 		}
 92 | 
 93 | 		for k, v := range span.Attributes {
 94 | 			eventData[k] = v
 95 | 		}
 96 | 
 97 | 		if span.Error {
 98 | 			eventData["error"] = true
 99 | 		}
100 | 
101 | 		event := SplunkEvent{
102 | 			Time:       span.StartTime.Unix(),
103 | 			Sourcetype: "Podtrace:trace",
104 | 			Event:      eventData,
105 | 		}
106 | 
107 | 		events = append(events, event)
108 | 	}
109 | 
110 | 	for _, event := range events {
111 | 		payload, err := json.Marshal(event)
112 | 		if err != nil {
113 | 			continue
114 | 		}
115 | 
116 | 		req, err := http.NewRequestWithContext(context.Background(), "POST", e.endpoint, bytes.NewReader(payload))
117 | 		if err != nil {
118 | 			continue
119 | 		}
120 | 
121 | 		req.Header.Set("Content-Type", "application/json")
122 | 		if e.token != "" {
123 | 			req.Header.Set("Authorization", "Splunk "+e.token)
124 | 		}
125 | 
126 | 		resp, err := e.client.Do(req)
127 | 		if err != nil {
128 | 			continue
129 | 		}
130 | 		_ = resp.Body.Close()
131 | 	}
132 | 
133 | 	return nil
134 | }
135 | 
136 | func (e *SplunkExporter) Shutdown(ctx context.Context) error {
137 | 	return nil
138 | }
139 | 


--------------------------------------------------------------------------------
/test/integration_test.go:
--------------------------------------------------------------------------------
  1 | //go:build integration
  2 | // +build integration
  3 | 
  4 | package test
  5 | 
  6 | import (
  7 | 	"strings"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/podtrace/podtrace/internal/diagnose"
 12 | 	"github.com/podtrace/podtrace/internal/events"
 13 | )
 14 | 
 15 | func TestDiagnostician_RealWorldScenario(t *testing.T) {
 16 | 	if testing.Short() {
 17 | 		t.Skip("Skipping integration test in short mode")
 18 | 	}
 19 | 
 20 | 	d := diagnose.NewDiagnostician()
 21 | 
 22 | 	eventTypes := []events.EventType{
 23 | 		events.EventDNS,
 24 | 		events.EventConnect,
 25 | 		events.EventTCPSend,
 26 | 		events.EventTCPRecv,
 27 | 		events.EventRead,
 28 | 		events.EventWrite,
 29 | 	}
 30 | 
 31 | 	for i := 0; i < 100; i++ {
 32 | 		eventType := eventTypes[i%len(eventTypes)]
 33 | 		event := &events.Event{
 34 | 			Type:      eventType,
 35 | 			PID:       uint32(1000 + i%10),
 36 | 			LatencyNS: uint64((i + 1) * 1000000), // 1ms to 100ms
 37 | 			Target:    "example.com",
 38 | 			Error:     0,
 39 | 		}
 40 | 
 41 | 		if i%10 == 0 {
 42 | 			event.Error = 111
 43 | 		}
 44 | 
 45 | 		d.AddEvent(event)
 46 | 	}
 47 | 
 48 | 	d.Finish()
 49 | 
 50 | 	report := d.GenerateReport()
 51 | 	if report == "" {
 52 | 		t.Error("Report should not be empty")
 53 | 	}
 54 | 
 55 | 	sections := []string{
 56 | 		"Summary",
 57 | 		"DNS Statistics",
 58 | 		"TCP Statistics",
 59 | 		"Connection Statistics",
 60 | 	}
 61 | 
 62 | 	for _, section := range sections {
 63 | 		if !contains(report, section) {
 64 | 			t.Errorf("Report should contain section '%s'", section)
 65 | 		}
 66 | 	}
 67 | }
 68 | 
 69 | func TestDiagnostician_ExportFormats(t *testing.T) {
 70 | 	if testing.Short() {
 71 | 		t.Skip("Skipping integration test in short mode")
 72 | 	}
 73 | 
 74 | 	d := diagnose.NewDiagnostician()
 75 | 
 76 | 	for i := 0; i < 50; i++ {
 77 | 		d.AddEvent(&events.Event{
 78 | 			Type:      events.EventDNS,
 79 | 			LatencyNS: uint64(i * 1000000),
 80 | 			Target:    "example.com",
 81 | 		})
 82 | 	}
 83 | 
 84 | 	d.Finish()
 85 | 
 86 | 	jsonData := d.ExportJSON()
 87 | 	if jsonData.Summary == nil {
 88 | 		t.Error("JSON export should include summary")
 89 | 	}
 90 | 
 91 | 	var csvBuf []byte
 92 | 	writer := &testWriter{data: &csvBuf}
 93 | 	err := d.ExportCSV(writer)
 94 | 	if err != nil {
 95 | 		t.Errorf("CSV export should not fail: %v", err)
 96 | 	}
 97 | 	if len(csvBuf) == 0 {
 98 | 		t.Error("CSV export should produce output")
 99 | 	}
100 | }
101 | 
102 | func TestDiagnostician_Performance(t *testing.T) {
103 | 	if testing.Short() {
104 | 		t.Skip("Skipping integration test in short mode")
105 | 	}
106 | 
107 | 	d := diagnose.NewDiagnostician()
108 | 
109 | 	start := time.Now()
110 | 	for i := 0; i < 10000; i++ {
111 | 		d.AddEvent(&events.Event{
112 | 			Type:      events.EventDNS,
113 | 			LatencyNS: uint64(i * 1000000),
114 | 			Target:    "example.com",
115 | 		})
116 | 	}
117 | 	addDuration := time.Since(start)
118 | 
119 | 	if addDuration > 1*time.Second {
120 | 		t.Errorf("Adding 10000 events took too long: %v", addDuration)
121 | 	}
122 | 
123 | 	d.Finish()
124 | 
125 | 	start = time.Now()
126 | 	_ = d.GenerateReport()
127 | 	reportDuration := time.Since(start)
128 | 
129 | 	if reportDuration > 5*time.Second {
130 | 		t.Errorf("Generating report took too long: %v", reportDuration)
131 | 	}
132 | }
133 | 
134 | func contains(s, substr string) bool {
135 | 	return len(s) >= len(substr) && strings.Contains(s, substr)
136 | }
137 | 
138 | type testWriter struct {
139 | 	data *[]byte
140 | }
141 | 
142 | func (w *testWriter) Write(p []byte) (n int, err error) {
143 | 	*w.data = append(*w.data, p...)
144 | 	return len(p), nil
145 | }
146 | 


--------------------------------------------------------------------------------
/internal/alerting/splunk.go:
--------------------------------------------------------------------------------
  1 | package alerting
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"net/http"
 10 | 	"net/url"
 11 | 	"time"
 12 | 
 13 | 	"github.com/podtrace/podtrace/internal/config"
 14 | )
 15 | 
 16 | type SplunkAlertSender struct {
 17 | 	endpoint string
 18 | 	token    string
 19 | 	client   *http.Client
 20 | 	timeout  time.Duration
 21 | }
 22 | 
 23 | type SplunkAlertEvent struct {
 24 | 	Time       int64                  `json:"time"`
 25 | 	Host       string                 `json:"host,omitempty"`
 26 | 	Source     string                 `json:"source,omitempty"`
 27 | 	Sourcetype string                 `json:"sourcetype,omitempty"`
 28 | 	Event      map[string]interface{} `json:"event"`
 29 | }
 30 | 
 31 | func NewSplunkAlertSender(endpoint, token string, timeout time.Duration) (*SplunkAlertSender, error) {
 32 | 	if endpoint == "" {
 33 | 		return nil, fmt.Errorf("splunk endpoint is required")
 34 | 	}
 35 | 	parsedURL, err := url.Parse(endpoint)
 36 | 	if err != nil {
 37 | 		return nil, fmt.Errorf("invalid splunk endpoint: %w", err)
 38 | 	}
 39 | 	if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" {
 40 | 		return nil, fmt.Errorf("splunk endpoint must use http or https scheme")
 41 | 	}
 42 | 	if token == "" {
 43 | 		return nil, fmt.Errorf("splunk token is required")
 44 | 	}
 45 | 	return &SplunkAlertSender{
 46 | 		endpoint: endpoint,
 47 | 		token:    token,
 48 | 		client:   &http.Client{Timeout: timeout},
 49 | 		timeout:  timeout,
 50 | 	}, nil
 51 | }
 52 | 
 53 | func (s *SplunkAlertSender) Send(ctx context.Context, alert *Alert) error {
 54 | 	if alert == nil {
 55 | 		return fmt.Errorf("alert is nil")
 56 | 	}
 57 | 	eventData := map[string]interface{}{
 58 | 		"severity":  string(alert.Severity),
 59 | 		"title":     alert.Title,
 60 | 		"message":   alert.Message,
 61 | 		"source":    alert.Source,
 62 | 		"pod":       alert.PodName,
 63 | 		"namespace": alert.Namespace,
 64 | 	}
 65 | 	if alert.ErrorCode != "" {
 66 | 		eventData["error_code"] = alert.ErrorCode
 67 | 	}
 68 | 	if len(alert.Recommendations) > 0 {
 69 | 		eventData["recommendations"] = alert.Recommendations
 70 | 	}
 71 | 	if len(alert.Context) > 0 {
 72 | 		for k, v := range alert.Context {
 73 | 			if len(k) <= 64 {
 74 | 				eventData[k] = v
 75 | 			}
 76 | 		}
 77 | 	}
 78 | 	event := SplunkAlertEvent{
 79 | 		Time:       alert.Timestamp.Unix(),
 80 | 		Sourcetype: "Podtrace:alert",
 81 | 		Event:      eventData,
 82 | 	}
 83 | 	jsonData, err := json.Marshal(event)
 84 | 	if err != nil {
 85 | 		return fmt.Errorf("failed to marshal Splunk event: %w", err)
 86 | 	}
 87 | 	if int64(len(jsonData)) > config.AlertMaxPayloadSize {
 88 | 		return fmt.Errorf("payload size %d exceeds maximum %d", len(jsonData), config.AlertMaxPayloadSize)
 89 | 	}
 90 | 	req, err := http.NewRequestWithContext(ctx, "POST", s.endpoint, bytes.NewReader(jsonData))
 91 | 	if err != nil {
 92 | 		return fmt.Errorf("failed to create request: %w", err)
 93 | 	}
 94 | 	req.Header.Set("Content-Type", "application/json")
 95 | 	req.Header.Set("Authorization", "Splunk "+s.token)
 96 | 	req.Header.Set("User-Agent", config.GetUserAgent())
 97 | 	resp, err := s.client.Do(req)
 98 | 	if err != nil {
 99 | 		return fmt.Errorf("failed to send request: %w", err)
100 | 	}
101 | 	defer func() {
102 | 		_, _ = io.Copy(io.Discard, resp.Body)
103 | 		_ = resp.Body.Close()
104 | 	}()
105 | 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
106 | 		bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
107 | 		return fmt.Errorf("unexpected status code %d: %s", resp.StatusCode, string(bodyBytes))
108 | 	}
109 | 	return nil
110 | }
111 | 
112 | func (s *SplunkAlertSender) Name() string {
113 | 	return "splunk"
114 | }
115 | 


--------------------------------------------------------------------------------
/internal/alerting/alert.go:
--------------------------------------------------------------------------------
  1 | package alerting
  2 | 
  3 | import (
  4 | 	"crypto/sha256"
  5 | 	"encoding/hex"
  6 | 	"fmt"
  7 | 	"time"
  8 | 
  9 | 	"github.com/podtrace/podtrace/internal/config"
 10 | )
 11 | 
 12 | type AlertSeverity string
 13 | 
 14 | const (
 15 | 	SeverityFatal    AlertSeverity = "fatal"
 16 | 	SeverityCritical AlertSeverity = "critical"
 17 | 	SeverityWarning  AlertSeverity = "warning"
 18 | 	SeverityError    AlertSeverity = "error"
 19 | )
 20 | 
 21 | type Alert struct {
 22 | 	Severity        AlertSeverity
 23 | 	Title           string
 24 | 	Message         string
 25 | 	Timestamp       time.Time
 26 | 	Source          string
 27 | 	PodName         string
 28 | 	Namespace       string
 29 | 	Context         map[string]interface{}
 30 | 	ErrorCode       string
 31 | 	Recommendations []string
 32 | }
 33 | 
 34 | func (a *Alert) Key() string {
 35 | 	if a == nil {
 36 | 		return ""
 37 | 	}
 38 | 	h := sha256.New()
 39 | 	h.Write([]byte(a.Severity))
 40 | 	h.Write([]byte(a.Source))
 41 | 	h.Write([]byte(a.PodName))
 42 | 	h.Write([]byte(a.Namespace))
 43 | 	h.Write([]byte(a.Title))
 44 | 	return hex.EncodeToString(h.Sum(nil))[:16]
 45 | }
 46 | 
 47 | func (a *Alert) Validate() error {
 48 | 	if a == nil {
 49 | 		return fmt.Errorf("alert is nil")
 50 | 	}
 51 | 	if a.Severity == "" {
 52 | 		return fmt.Errorf("alert severity is required")
 53 | 	}
 54 | 	if a.Title == "" {
 55 | 		return fmt.Errorf("alert title is required")
 56 | 	}
 57 | 	if a.Message == "" {
 58 | 		return fmt.Errorf("alert message is required")
 59 | 	}
 60 | 	if a.Timestamp.IsZero() {
 61 | 		return fmt.Errorf("alert timestamp is required")
 62 | 	}
 63 | 	if a.Source == "" {
 64 | 		return fmt.Errorf("alert source is required")
 65 | 	}
 66 | 	return nil
 67 | }
 68 | 
 69 | func (a *Alert) Sanitize() {
 70 | 	if a == nil {
 71 | 		return
 72 | 	}
 73 | 	if len(a.Title) > 256 {
 74 | 		a.Title = a.Title[:253] + "..."
 75 | 	}
 76 | 	if len(a.Message) > 1024 {
 77 | 		a.Message = a.Message[:1021] + "..."
 78 | 	}
 79 | 	if len(a.PodName) > 256 {
 80 | 		a.PodName = a.PodName[:253] + "..."
 81 | 	}
 82 | 	if len(a.Namespace) > 256 {
 83 | 		a.Namespace = a.Namespace[:253] + "..."
 84 | 	}
 85 | 	if len(a.Source) > 128 {
 86 | 		a.Source = a.Source[:125] + "..."
 87 | 	}
 88 | 	if len(a.ErrorCode) > 64 {
 89 | 		a.ErrorCode = a.ErrorCode[:61] + "..."
 90 | 	}
 91 | 	if len(a.Recommendations) > 10 {
 92 | 		a.Recommendations = a.Recommendations[:10]
 93 | 	}
 94 | 	for i, rec := range a.Recommendations {
 95 | 		if len(rec) > 512 {
 96 | 			a.Recommendations[i] = rec[:509] + "..."
 97 | 		}
 98 | 	}
 99 | }
100 | 
101 | func MapResourceAlertLevel(level uint32) AlertSeverity {
102 | 	switch level {
103 | 	case 3:
104 | 		return SeverityFatal
105 | 	case 2:
106 | 		return SeverityCritical
107 | 	case 1:
108 | 		return SeverityWarning
109 | 	default:
110 | 		return SeverityError
111 | 	}
112 | }
113 | 
114 | func ParseSeverity(severity string) AlertSeverity {
115 | 	switch severity {
116 | 	case "fatal":
117 | 		return SeverityFatal
118 | 	case "critical":
119 | 		return SeverityCritical
120 | 	case "warning":
121 | 		return SeverityWarning
122 | 	case "error":
123 | 		return SeverityError
124 | 	default:
125 | 		return SeverityError
126 | 	}
127 | }
128 | 
129 | func SeverityLevel(severity AlertSeverity) int {
130 | 	switch severity {
131 | 	case SeverityFatal:
132 | 		return 4
133 | 	case SeverityCritical:
134 | 		return 3
135 | 	case SeverityWarning:
136 | 		return 2
137 | 	case SeverityError:
138 | 		return 1
139 | 	default:
140 | 		return 0
141 | 	}
142 | }
143 | 
144 | func ShouldSendAlert(severity AlertSeverity) bool {
145 | 	if !config.AlertingEnabled {
146 | 		return false
147 | 	}
148 | 	minSeverity := ParseSeverity(config.GetAlertMinSeverity())
149 | 	return SeverityLevel(severity) >= SeverityLevel(minSeverity)
150 | }
151 | 
152 | 


--------------------------------------------------------------------------------
/bpf/maps.h:
--------------------------------------------------------------------------------
  1 | // SPDX-License-Identifier: GPL-2.0
  2 | 
  3 | #ifndef PODTRACE_MAPS_H
  4 | #define PODTRACE_MAPS_H
  5 | 
  6 | #include "common.h"
  7 | #include "events.h"
  8 | 
  9 | struct stack_trace_t {
 10 | 	u64 ips[MAX_STACK_DEPTH];
 11 | 	u32 nr;
 12 | };
 13 | 
 14 | struct {
 15 | 	__uint(type, BPF_MAP_TYPE_RINGBUF);
 16 | 	__uint(max_entries, 2 * 1024 * 1024);
 17 | } events SEC(".maps");
 18 | 
 19 | struct {
 20 | 	__uint(type, BPF_MAP_TYPE_HASH);
 21 | 	__uint(max_entries, 1024);
 22 | 	__type(key, u64);
 23 | 	__type(value, u64);
 24 | } start_times SEC(".maps");
 25 | 
 26 | struct {
 27 | 	__uint(type, BPF_MAP_TYPE_HASH);
 28 | 	__uint(max_entries, 1024);
 29 | 	__type(key, u64);
 30 | 	__type(value, char[MAX_STRING_LEN]);
 31 | } dns_targets SEC(".maps");
 32 | 
 33 | struct {
 34 | 	__uint(type, BPF_MAP_TYPE_HASH);
 35 | 	__uint(max_entries, 1024);
 36 | 	__type(key, u64);
 37 | 	__type(value, char[MAX_STRING_LEN]);
 38 | } socket_conns SEC(".maps");
 39 | 
 40 | struct {
 41 | 	__uint(type, BPF_MAP_TYPE_HASH);
 42 | 	__uint(max_entries, 1024);
 43 | 	__type(key, u64);
 44 | 	__type(value, u64);
 45 | } tcp_sockets SEC(".maps");
 46 | 
 47 | struct {
 48 | 	__uint(type, BPF_MAP_TYPE_HASH);
 49 | 	__uint(max_entries, 2048);
 50 | 	__type(key, u64);
 51 | 	__type(value, struct stack_trace_t);
 52 | } stack_traces SEC(".maps");
 53 | 
 54 | struct {
 55 | 	__uint(type, BPF_MAP_TYPE_HASH);
 56 | 	__uint(max_entries, 1024);
 57 | 	__type(key, u64);
 58 | 	__type(value, char[MAX_STRING_LEN]);
 59 | } lock_targets SEC(".maps");
 60 | 
 61 | struct {
 62 | 	__uint(type, BPF_MAP_TYPE_HASH);
 63 | 	__uint(max_entries, 1024);
 64 | 	__type(key, u64);
 65 | 	__type(value, char[MAX_STRING_LEN]);
 66 | } db_queries SEC(".maps");
 67 | 
 68 | struct {
 69 | 	__uint(type, BPF_MAP_TYPE_HASH);
 70 | 	__uint(max_entries, 1024);
 71 | 	__type(key, u64);
 72 | 	__type(value, char[MAX_STRING_LEN]);
 73 | } syscall_paths SEC(".maps");
 74 | 
 75 | struct {
 76 | 	__uint(type, BPF_MAP_TYPE_HASH);
 77 | 	__uint(max_entries, 1024);
 78 | 	__type(key, u64);
 79 | 	__type(value, u64);
 80 | } tls_handshakes SEC(".maps");
 81 | 
 82 | struct resource_limit {
 83 | 	u64 limit_bytes;
 84 | 	u64 usage_bytes;
 85 | 	u64 last_update_ns;
 86 | 	u32 resource_type;
 87 | };
 88 | 
 89 | struct {
 90 | 	__uint(type, BPF_MAP_TYPE_HASH);
 91 | 	__uint(max_entries, 1024);
 92 | 	__type(key, u64);
 93 | 	__type(value, struct resource_limit);
 94 | } cgroup_limits SEC(".maps");
 95 | 
 96 | struct {
 97 | 	__uint(type, BPF_MAP_TYPE_HASH);
 98 | 	__uint(max_entries, 1024);
 99 | 	__type(key, u64);
100 | 	__type(value, u32);
101 | } cgroup_alerts SEC(".maps");
102 | 
103 | struct {
104 | 	__uint(type, BPF_MAP_TYPE_ARRAY);
105 | 	__uint(max_entries, 1);
106 | 	__type(key, u32);
107 | 	__type(value, u64);
108 | } target_cgroup_id SEC(".maps");
109 | 
110 | struct pool_state {
111 | 	u64 last_use_ns;
112 | 	u32 connection_id;
113 | 	u32 in_use;
114 | };
115 | 
116 | struct {
117 | 	__uint(type, BPF_MAP_TYPE_HASH);
118 | 	__uint(max_entries, 1024);
119 | 	__type(key, u64);
120 | 	__type(value, struct pool_state);
121 | } pool_states SEC(".maps");
122 | 
123 | struct {
124 | 	__uint(type, BPF_MAP_TYPE_HASH);
125 | 	__uint(max_entries, 1024);
126 | 	__type(key, u64);
127 | 	__type(value, u64);
128 | } pool_acquire_times SEC(".maps");
129 | 
130 | struct {
131 | 	__uint(type, BPF_MAP_TYPE_HASH);
132 | 	__uint(max_entries, 1024);
133 | 	__type(key, u64);
134 | 	__type(value, u32);
135 | } pool_db_types SEC(".maps");
136 | 
137 | struct {
138 | 	__uint(type, BPF_MAP_TYPE_ARRAY);
139 | 	__uint(max_entries, 1);
140 | 	__type(key, u32);
141 | 	__type(value, struct event);
142 | } event_buf SEC(".maps");
143 | 
144 | struct {
145 | 	__uint(type, BPF_MAP_TYPE_ARRAY);
146 | 	__uint(max_entries, 1);
147 | 	__type(key, u32);
148 | 	__type(value, struct stack_trace_t);
149 | } stack_buf SEC(".maps");
150 | 
151 | #endif
152 | 


--------------------------------------------------------------------------------
/internal/alerting/manager.go:
--------------------------------------------------------------------------------
  1 | package alerting
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | 	"time"
  7 | 
  8 | 	"github.com/podtrace/podtrace/internal/config"
  9 | )
 10 | 
 11 | type Manager struct {
 12 | 	senders       []Sender
 13 | 	deduplicator  *AlertDeduplicator
 14 | 	rateLimiter   *RateLimiter
 15 | 	enabled       bool
 16 | 	mu            sync.RWMutex
 17 | 	cleanupTicker *time.Ticker
 18 | 	stopCh        chan struct{}
 19 | 	wg            sync.WaitGroup
 20 | }
 21 | 
 22 | func NewManager() (*Manager, error) {
 23 | 	if !config.AlertingEnabled {
 24 | 		return &Manager{enabled: false}, nil
 25 | 	}
 26 | 	manager := &Manager{
 27 | 		senders:      make([]Sender, 0),
 28 | 		deduplicator: NewAlertDeduplicator(config.AlertDeduplicationWindow),
 29 | 		rateLimiter:  NewRateLimiter(config.AlertRateLimitPerMinute),
 30 | 		enabled:      true,
 31 | 		stopCh:       make(chan struct{}),
 32 | 	}
 33 | 	if config.AlertWebhookURL != "" {
 34 | 		webhookSender, err := NewWebhookSender(config.AlertWebhookURL, config.AlertHTTPTimeout)
 35 | 		if err != nil {
 36 | 		} else {
 37 | 			retrySender := NewRetrySender(webhookSender, config.AlertMaxRetries, config.DefaultAlertRetryBackoffBase)
 38 | 			manager.senders = append(manager.senders, retrySender)
 39 | 		}
 40 | 	}
 41 | 	if config.AlertSlackWebhookURL != "" {
 42 | 		slackSender, err := NewSlackSender(config.AlertSlackWebhookURL, config.AlertSlackChannel, config.AlertHTTPTimeout)
 43 | 		if err != nil {
 44 | 		} else {
 45 | 			retrySender := NewRetrySender(slackSender, config.AlertMaxRetries, config.DefaultAlertRetryBackoffBase)
 46 | 			manager.senders = append(manager.senders, retrySender)
 47 | 		}
 48 | 	}
 49 | 	if config.AlertSplunkEnabled {
 50 | 		splunkEndpoint := config.GetSplunkEndpoint()
 51 | 		splunkToken := config.GetSplunkToken()
 52 | 		if splunkEndpoint != "" && splunkToken != "" {
 53 | 			splunkSender, err := NewSplunkAlertSender(splunkEndpoint, splunkToken, config.AlertHTTPTimeout)
 54 | 			if err != nil {
 55 | 			} else {
 56 | 				retrySender := NewRetrySender(splunkSender, config.AlertMaxRetries, config.DefaultAlertRetryBackoffBase)
 57 | 				manager.senders = append(manager.senders, retrySender)
 58 | 			}
 59 | 		}
 60 | 	}
 61 | 	if len(manager.senders) == 0 {
 62 | 		return &Manager{enabled: false}, nil
 63 | 	}
 64 | 	manager.cleanupTicker = time.NewTicker(1 * time.Hour)
 65 | 	manager.wg.Add(1)
 66 | 	go manager.cleanupLoop()
 67 | 	return manager, nil
 68 | }
 69 | 
 70 | func (m *Manager) SendAlert(alert *Alert) {
 71 | 	if !m.enabled || alert == nil {
 72 | 		return
 73 | 	}
 74 | 	if !ShouldSendAlert(alert.Severity) {
 75 | 		return
 76 | 	}
 77 | 	if !m.rateLimiter.Allow() {
 78 | 		return
 79 | 	}
 80 | 	if !m.deduplicator.ShouldSend(alert) {
 81 | 		return
 82 | 	}
 83 | 	m.mu.RLock()
 84 | 	senders := make([]Sender, len(m.senders))
 85 | 	copy(senders, m.senders)
 86 | 	m.mu.RUnlock()
 87 | 		for _, sender := range senders {
 88 | 			go func(s Sender) {
 89 | 				ctx, cancel := context.WithTimeout(context.Background(), config.AlertHTTPTimeout*2)
 90 | 				defer cancel()
 91 | 				_ = s.Send(ctx, alert)
 92 | 			}(sender)
 93 | 		}
 94 | }
 95 | 
 96 | func (m *Manager) cleanupLoop() {
 97 | 	defer m.wg.Done()
 98 | 	for {
 99 | 		select {
100 | 		case <-m.stopCh:
101 | 			return
102 | 		case <-m.cleanupTicker.C:
103 | 			m.deduplicator.Cleanup(config.AlertDeduplicationWindow * 2)
104 | 		}
105 | 	}
106 | }
107 | 
108 | func (m *Manager) Shutdown(ctx context.Context) error {
109 | 	if !m.enabled {
110 | 		return nil
111 | 	}
112 | 	close(m.stopCh)
113 | 	if m.cleanupTicker != nil {
114 | 		m.cleanupTicker.Stop()
115 | 	}
116 | 	done := make(chan struct{})
117 | 	go func() {
118 | 		m.wg.Wait()
119 | 		close(done)
120 | 	}()
121 | 	select {
122 | 	case <-done:
123 | 		return nil
124 | 	case <-ctx.Done():
125 | 		return ctx.Err()
126 | 	}
127 | }
128 | 
129 | func (m *Manager) AddSender(sender Sender) {
130 | 	if sender == nil {
131 | 		return
132 | 	}
133 | 	m.mu.Lock()
134 | 	defer m.mu.Unlock()
135 | 	m.senders = append(m.senders, sender)
136 | }
137 | 
138 | func (m *Manager) IsEnabled() bool {
139 | 	return m.enabled
140 | }
141 | 
142 | 


--------------------------------------------------------------------------------
/cmd/podtrace/diagnose_env.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"os"
  8 | 	"runtime"
  9 | 	"time"
 10 | 
 11 | 	"github.com/spf13/cobra"
 12 | 	"golang.org/x/sys/unix"
 13 | 
 14 | 	podtrace "github.com/podtrace/podtrace"
 15 | 	"github.com/podtrace/podtrace/internal/config"
 16 | 	"github.com/podtrace/podtrace/internal/cri"
 17 | 	"github.com/podtrace/podtrace/internal/ebpf/loader"
 18 | )
 19 | 
 20 | type envReport struct {
 21 | 	Time           string   `json:"time"`
 22 | 	GoVersion      string   `json:"goVersion"`
 23 | 	GOOS           string   `json:"goos"`
 24 | 	GOARCH         string   `json:"goarch"`
 25 | 	KernelRelease  string   `json:"kernelRelease"`
 26 | 	CgroupBase     string   `json:"cgroupBase"`
 27 | 	ProcBase       string   `json:"procBase"`
 28 | 	CgroupV2       bool     `json:"cgroupV2"`
 29 | 	BTFVmlinux     bool     `json:"btfVmlinuxPresent"`
 30 | 	BTFFile        string   `json:"btfFile"`
 31 | 	CRIEndpointEnv string   `json:"criEndpointEnv"`
 32 | 	CRICandidates  []string `json:"criCandidates"`
 33 | 	CRIDetected    string   `json:"criDetected"`
 34 | 	BPFObjectPath  string   `json:"bpfObjectPath"`
 35 | 	BPFEmbedded    bool     `json:"bpfEmbeddedAvailable"`
 36 | 	BPFPrograms    []string `json:"bpfPrograms"`
 37 | 	BPFMaps        []string `json:"bpfMaps"`
 38 | 	HasCgroupIDMap bool     `json:"hasTargetCgroupIdMap"`
 39 | 	Warnings       []string `json:"warnings"`
 40 | }
 41 | 
 42 | func newDiagnoseEnvCmd() *cobra.Command {
 43 | 	cmd := &cobra.Command{
 44 | 		Use:   "diagnose-env",
 45 | 		Short: "Print environment diagnostics for Podtrace (kernel/BTF/cgroups/CRI/BPF)",
 46 | 		RunE: func(cmd *cobra.Command, args []string) error {
 47 | 			rep := collectEnvReport()
 48 | 			out, _ := json.MarshalIndent(rep, "", "  ")
 49 | 			fmt.Println(string(out))
 50 | 			return nil
 51 | 		},
 52 | 	}
 53 | 	return cmd
 54 | }
 55 | 
 56 | func collectEnvReport() envReport {
 57 | 	rep := envReport{
 58 | 		Time:           time.Now().Format(time.RFC3339),
 59 | 		GoVersion:      runtime.Version(),
 60 | 		GOOS:           runtime.GOOS,
 61 | 		GOARCH:         runtime.GOARCH,
 62 | 		CgroupBase:     config.CgroupBasePath,
 63 | 		ProcBase:       config.ProcBasePath,
 64 | 		BTFFile:        config.BTFFilePath,
 65 | 		CRIEndpointEnv: os.Getenv("PODTRACE_CRI_ENDPOINT"),
 66 | 		CRICandidates:  cri.DefaultCandidateEndpoints(),
 67 | 		BPFObjectPath:  config.BPFObjectPath,
 68 | 		BPFEmbedded:    len(podtrace.EmbeddedPodtraceBPFObj) > 0,
 69 | 	}
 70 | 
 71 | 	var u unix.Utsname
 72 | 	if err := unix.Uname(&u); err == nil {
 73 | 		rep.KernelRelease = bytesToString(u.Release[:])
 74 | 	}
 75 | 
 76 | 	if _, err := os.Stat("/sys/fs/cgroup/cgroup.controllers"); err == nil {
 77 | 		rep.CgroupV2 = true
 78 | 	}
 79 | 	if _, err := os.Stat("/sys/kernel/btf/vmlinux"); err == nil {
 80 | 		rep.BTFVmlinux = true
 81 | 	}
 82 | 
 83 | 	if r, err := cri.NewResolver(); err == nil {
 84 | 		rep.CRIDetected = r.Endpoint()
 85 | 		_ = r.Close()
 86 | 	}
 87 | 
 88 | 	if spec, err := loader.LoadPodtrace(); err == nil && spec != nil {
 89 | 		for name := range spec.Programs {
 90 | 			rep.BPFPrograms = append(rep.BPFPrograms, name)
 91 | 		}
 92 | 		for name := range spec.Maps {
 93 | 			rep.BPFMaps = append(rep.BPFMaps, name)
 94 | 			if name == "target_cgroup_id" {
 95 | 				rep.HasCgroupIDMap = true
 96 | 			}
 97 | 		}
 98 | 	} else if err != nil {
 99 | 		rep.Warnings = append(rep.Warnings, fmt.Sprintf("failed to load BPF spec: %v", err))
100 | 	}
101 | 
102 | 	if !rep.BTFVmlinux && rep.BTFFile == "" {
103 | 		rep.Warnings = append(rep.Warnings, "kernel BTF (/sys/kernel/btf/vmlinux) not found and PODTRACE_BTF_FILE not set; CO-RE relocations may fail")
104 | 	}
105 | 	if rep.CgroupV2 && !rep.HasCgroupIDMap {
106 | 		rep.Warnings = append(rep.Warnings, "cgroup v2 detected but BPF map target_cgroup_id missing; kernel-side cgroup filtering will be unavailable")
107 | 	}
108 | 
109 | 	return rep
110 | }
111 | 
112 | func bytesToString(bts []byte) string {
113 | 	var b bytes.Buffer
114 | 	for _, c := range bts {
115 | 		if c == 0 {
116 | 			break
117 | 		}
118 | 		b.WriteByte(c)
119 | 	}
120 | 	return b.String()
121 | }
122 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/podtrace/podtrace
 2 | 
 3 | go 1.24.0
 4 | 
 5 | require (
 6 | 	github.com/cilium/ebpf v0.20.0
 7 | 	github.com/spf13/cobra v1.10.2
 8 | 	go.opentelemetry.io/otel v1.38.0
 9 | 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0
10 | 	go.opentelemetry.io/otel/sdk v1.38.0
11 | 	go.opentelemetry.io/otel/trace v1.38.0
12 | 	go.uber.org/zap v1.27.1
13 | 	golang.org/x/sys v0.38.0
14 | 	google.golang.org/grpc v1.75.0
15 | 	k8s.io/api v0.34.2
16 | 	k8s.io/apimachinery v0.34.2
17 | 	k8s.io/client-go v0.34.2
18 | 	k8s.io/cri-api v0.34.2
19 | )
20 | 
21 | require (
22 | 	github.com/beorn7/perks v1.0.1 // indirect
23 | 	github.com/cenkalti/backoff/v5 v5.0.3 // indirect
24 | 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
25 | 	github.com/go-logr/stdr v1.2.2 // indirect
26 | 	github.com/google/go-cmp v0.7.0 // indirect
27 | 	github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
28 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
29 | 	github.com/prometheus/client_model v0.6.2 // indirect
30 | 	github.com/prometheus/common v0.67.4 // indirect
31 | 	github.com/prometheus/procfs v0.19.2 // indirect
32 | 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
33 | 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
34 | 	go.opentelemetry.io/otel/metric v1.38.0 // indirect
35 | 	go.opentelemetry.io/proto/otlp v1.7.1 // indirect
36 | 	go.uber.org/multierr v1.10.0 // indirect
37 | 	google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
38 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
39 | )
40 | 
41 | require (
42 | 	github.com/davecgh/go-spew v1.1.1 // indirect
43 | 	github.com/emicklei/go-restful/v3 v3.13.0 // indirect
44 | 	github.com/fxamacker/cbor/v2 v2.9.0 // indirect
45 | 	github.com/go-logr/logr v1.4.3 // indirect
46 | 	github.com/go-openapi/jsonpointer v0.22.3 // indirect
47 | 	github.com/go-openapi/jsonreference v0.21.3 // indirect
48 | 	github.com/go-openapi/swag v0.25.3 // indirect
49 | 	github.com/go-openapi/swag/cmdutils v0.25.3 // indirect
50 | 	github.com/go-openapi/swag/conv v0.25.3 // indirect
51 | 	github.com/go-openapi/swag/fileutils v0.25.3 // indirect
52 | 	github.com/go-openapi/swag/jsonname v0.25.3 // indirect
53 | 	github.com/go-openapi/swag/jsonutils v0.25.3 // indirect
54 | 	github.com/go-openapi/swag/loading v0.25.3 // indirect
55 | 	github.com/go-openapi/swag/mangling v0.25.3 // indirect
56 | 	github.com/go-openapi/swag/netutils v0.25.3 // indirect
57 | 	github.com/go-openapi/swag/stringutils v0.25.3 // indirect
58 | 	github.com/go-openapi/swag/typeutils v0.25.3 // indirect
59 | 	github.com/go-openapi/swag/yamlutils v0.25.3 // indirect
60 | 	github.com/gogo/protobuf v1.3.2 // indirect
61 | 	github.com/google/gnostic-models v0.7.1 // indirect
62 | 	github.com/google/uuid v1.6.0 // indirect
63 | 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
64 | 	github.com/json-iterator/go v1.1.12 // indirect
65 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
66 | 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
67 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
68 | 	github.com/prometheus/client_golang v1.23.2
69 | 	github.com/spf13/pflag v1.0.10 // indirect
70 | 	github.com/x448/float16 v0.8.4 // indirect
71 | 	go.yaml.in/yaml/v2 v2.4.3 // indirect
72 | 	go.yaml.in/yaml/v3 v3.0.4 // indirect
73 | 	golang.org/x/net v0.47.0 // indirect
74 | 	golang.org/x/oauth2 v0.33.0 // indirect
75 | 	golang.org/x/term v0.37.0 // indirect
76 | 	golang.org/x/text v0.31.0 // indirect
77 | 	golang.org/x/time v0.14.0
78 | 	google.golang.org/protobuf v1.36.10 // indirect
79 | 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
80 | 	gopkg.in/inf.v0 v0.9.1 // indirect
81 | 	k8s.io/klog/v2 v2.130.1 // indirect
82 | 	k8s.io/kube-openapi v0.0.0-20251121143641-b6aabc6c6745 // indirect
83 | 	k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
84 | 	sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
85 | 	sigs.k8s.io/randfill v1.0.0 // indirect
86 | 	sigs.k8s.io/structured-merge-diff/v6 v6.3.1 // indirect
87 | 	sigs.k8s.io/yaml v1.6.0 // indirect
88 | )
89 | 


--------------------------------------------------------------------------------
/internal/ebpf/parser/parser.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"sync"
  7 | 	"unsafe"
  8 | 
  9 | 	"github.com/podtrace/podtrace/internal/events"
 10 | )
 11 | 
 12 | var (
 13 | 	binaryRead = binary.Read
 14 | 	eventPool  = sync.Pool{
 15 | 		New: func() interface{} {
 16 | 			return &events.Event{}
 17 | 		},
 18 | 	}
 19 | )
 20 | 
 21 | type rawEvent struct {
 22 | 	Timestamp uint64
 23 | 	PID       uint32
 24 | 	Type      uint32
 25 | 	LatencyNS uint64
 26 | 	Error     int32
 27 | 	_         uint32
 28 | 	Bytes     uint64
 29 | 	TCPState  uint32
 30 | 	_         uint32
 31 | 	StackKey  uint64
 32 | 	Target    [128]byte
 33 | 	Details   [128]byte
 34 | }
 35 | 
 36 | func ParseEvent(data []byte) *events.Event {
 37 | 	type rawEventV1 = rawEvent
 38 | 	type rawEventV2 struct {
 39 | 		Timestamp uint64
 40 | 		PID       uint32
 41 | 		Type      uint32
 42 | 		LatencyNS uint64
 43 | 		Error     int32
 44 | 		_         uint32
 45 | 		Bytes     uint64
 46 | 		TCPState  uint32
 47 | 		_         uint32
 48 | 		StackKey  uint64
 49 | 		CgroupID  uint64
 50 | 		Target    [128]byte
 51 | 		Details   [128]byte
 52 | 	}
 53 | 	type rawEventV3 struct {
 54 | 		Timestamp uint64
 55 | 		PID       uint32
 56 | 		Type      uint32
 57 | 		LatencyNS uint64
 58 | 		Error     int32
 59 | 		_         uint32
 60 | 		Bytes     uint64
 61 | 		TCPState  uint32
 62 | 		_         uint32
 63 | 		StackKey  uint64
 64 | 		CgroupID  uint64
 65 | 		Comm      [16]byte
 66 | 		Target    [128]byte
 67 | 		Details   [128]byte
 68 | 	}
 69 | 
 70 | 	expectedV3 := int(unsafe.Sizeof(rawEventV3{}))
 71 | 	expectedV2 := int(unsafe.Sizeof(rawEventV2{}))
 72 | 	expectedV1 := int(unsafe.Sizeof(rawEventV1{}))
 73 | 	if len(data) < expectedV1 {
 74 | 		return nil
 75 | 	}
 76 | 
 77 | 	event := eventPool.Get().(*events.Event)
 78 | 	event.ProcessName = ""
 79 | 	event.Stack = nil
 80 | 	event.CgroupID = 0
 81 | 
 82 | 	if len(data) >= expectedV3 {
 83 | 		var e rawEventV3
 84 | 		if err := binaryRead(bytes.NewReader(data[:expectedV3]), binary.LittleEndian, &e); err != nil {
 85 | 			return nil
 86 | 		}
 87 | 
 88 | 		event.Timestamp = e.Timestamp
 89 | 		event.PID = e.PID
 90 | 		event.Type = events.EventType(e.Type)
 91 | 		event.LatencyNS = e.LatencyNS
 92 | 		event.Error = e.Error
 93 | 		event.Bytes = e.Bytes
 94 | 		event.TCPState = e.TCPState
 95 | 		event.StackKey = e.StackKey
 96 | 		event.CgroupID = e.CgroupID
 97 | 		event.ProcessName = string(bytes.TrimRight(e.Comm[:], "\x00"))
 98 | 		event.Target = string(bytes.TrimRight(e.Target[:], "\x00"))
 99 | 		event.Details = string(bytes.TrimRight(e.Details[:], "\x00"))
100 | 
101 | 		return event
102 | 	}
103 | 
104 | 	if len(data) >= expectedV2 {
105 | 		var e rawEventV2
106 | 		if err := binaryRead(bytes.NewReader(data[:expectedV2]), binary.LittleEndian, &e); err != nil {
107 | 			return nil
108 | 		}
109 | 
110 | 		event.Timestamp = e.Timestamp
111 | 		event.PID = e.PID
112 | 		event.Type = events.EventType(e.Type)
113 | 		event.LatencyNS = e.LatencyNS
114 | 		event.Error = e.Error
115 | 		event.Bytes = e.Bytes
116 | 		event.TCPState = e.TCPState
117 | 		event.StackKey = e.StackKey
118 | 		event.CgroupID = e.CgroupID
119 | 		event.Target = string(bytes.TrimRight(e.Target[:], "\x00"))
120 | 		event.Details = string(bytes.TrimRight(e.Details[:], "\x00"))
121 | 
122 | 		return event
123 | 	}
124 | 
125 | 	var e rawEventV1
126 | 	if err := binaryRead(bytes.NewReader(data[:expectedV1]), binary.LittleEndian, &e); err != nil {
127 | 		return nil
128 | 	}
129 | 
130 | 	event.Timestamp = e.Timestamp
131 | 	event.PID = e.PID
132 | 	event.Type = events.EventType(e.Type)
133 | 	event.LatencyNS = e.LatencyNS
134 | 	event.Error = e.Error
135 | 	event.Bytes = e.Bytes
136 | 	event.TCPState = e.TCPState
137 | 	event.StackKey = e.StackKey
138 | 	event.Target = string(bytes.TrimRight(e.Target[:], "\x00"))
139 | 	event.Details = string(bytes.TrimRight(e.Details[:], "\x00"))
140 | 
141 | 	return event
142 | }
143 | 
144 | func PutEvent(event *events.Event) {
145 | 	if event == nil {
146 | 		return
147 | 	}
148 | 	event.Stack = nil
149 | 	event.ProcessName = ""
150 | 	event.Target = ""
151 | 	event.Details = ""
152 | 	eventPool.Put(event)
153 | }
154 | 


--------------------------------------------------------------------------------
/internal/diagnose/stacktrace/stacktrace.go:
--------------------------------------------------------------------------------
  1 | package stacktrace
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"os/exec"
  8 | 	"path/filepath"
  9 | 	"sort"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/podtrace/podtrace/internal/config"
 13 | 	"github.com/podtrace/podtrace/internal/events"
 14 | )
 15 | 
 16 | type Diagnostician interface {
 17 | 	GetEvents() []*events.Event
 18 | }
 19 | 
 20 | type stackSummary struct {
 21 | 	Key        string
 22 | 	Count      int
 23 | 	Sample     *events.Event
 24 | 	FirstFrame string
 25 | }
 26 | 
 27 | type stackResolver struct {
 28 | 	cache map[string]string
 29 | }
 30 | 
 31 | func (r *stackResolver) resolve(ctx context.Context, pid uint32, addr uint64) string {
 32 | 	select {
 33 | 	case <-ctx.Done():
 34 | 		return ""
 35 | 	default:
 36 | 	}
 37 | 
 38 | 	if addr == 0 {
 39 | 		return ""
 40 | 	}
 41 | 	if r.cache == nil {
 42 | 		r.cache = make(map[string]string)
 43 | 	}
 44 | 	exePath, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid))
 45 | 	if err != nil || exePath == "" {
 46 | 		return fmt.Sprintf("0x%x", addr)
 47 | 	}
 48 | 	key := exePath + "|" + fmt.Sprintf("%x", addr)
 49 | 	if v, ok := r.cache[key]; ok {
 50 | 		return v
 51 | 	}
 52 | 	timeoutCtx, cancel := context.WithTimeout(ctx, config.DefaultAddr2lineTimeout)
 53 | 	defer cancel()
 54 | 	cmd := exec.CommandContext(timeoutCtx, "addr2line", "-e", exePath, fmt.Sprintf("%#x", addr))
 55 | 	out, err := cmd.Output()
 56 | 	if err != nil {
 57 | 		v := fmt.Sprintf("%s@0x%x", filepath.Base(exePath), addr)
 58 | 		r.cache[key] = v
 59 | 		return v
 60 | 	}
 61 | 	line := strings.TrimSpace(string(out))
 62 | 	if line == "" || line == "??:0" || line == "??:?" {
 63 | 		line = fmt.Sprintf("%s@0x%x", filepath.Base(exePath), addr)
 64 | 	} else {
 65 | 		line = filepath.Base(exePath) + ":" + line
 66 | 	}
 67 | 	r.cache[key] = line
 68 | 	return line
 69 | }
 70 | 
 71 | func GenerateStackTraceSectionWithContext(d Diagnostician, ctx context.Context) string {
 72 | 	allEvents := d.GetEvents()
 73 | 	if len(allEvents) == 0 {
 74 | 		return ""
 75 | 	}
 76 | 
 77 | 	resolver := &stackResolver{cache: make(map[string]string)}
 78 | 	stackMap := make(map[string]*stackSummary)
 79 | 	processed := 0
 80 | 
 81 | 	for _, e := range allEvents {
 82 | 		if processed >= config.MaxEventsForStacks {
 83 | 			break
 84 | 		}
 85 | 		if e == nil {
 86 | 			continue
 87 | 		}
 88 | 		if len(e.Stack) == 0 {
 89 | 			continue
 90 | 		}
 91 | 		if e.LatencyNS < uint64(config.MinLatencyForStackNS) && e.Type != events.EventLockContention && e.Type != events.EventDBQuery {
 92 | 			continue
 93 | 		}
 94 | 		processed++
 95 | 		top := e.Stack[0]
 96 | 		frame := resolver.resolve(ctx, e.PID, top)
 97 | 		if frame == "" {
 98 | 			continue
 99 | 		}
100 | 		key := fmt.Sprintf("%s|%d", frame, e.Type)
101 | 		if entry, ok := stackMap[key]; ok {
102 | 			entry.Count++
103 | 		} else {
104 | 			stackMap[key] = &stackSummary{
105 | 				Key:        key,
106 | 				Count:      1,
107 | 				Sample:     e,
108 | 				FirstFrame: frame,
109 | 			}
110 | 		}
111 | 	}
112 | 
113 | 	if len(stackMap) == 0 {
114 | 		return ""
115 | 	}
116 | 
117 | 	var summaries []*stackSummary
118 | 	for _, v := range stackMap {
119 | 		summaries = append(summaries, v)
120 | 	}
121 | 	sort.Slice(summaries, func(i, j int) bool {
122 | 		return summaries[i].Count > summaries[j].Count
123 | 	})
124 | 
125 | 	var report string
126 | 	report += "Stack Traces for Slow Operations:\n"
127 | 	limit := config.MaxStackTracesLimit
128 | 	if len(summaries) < limit {
129 | 		limit = len(summaries)
130 | 	}
131 | 	for i := 0; i < limit; i++ {
132 | 		s := summaries[i]
133 | 		e := s.Sample
134 | 		if e == nil {
135 | 			continue
136 | 		}
137 | 		report += fmt.Sprintf("  Hot stack %d: %d events, type=%s, target=%s, avg latency=%.2fms\n", i+1, s.Count, e.TypeString(), e.Target, float64(e.LatencyNS)/float64(config.NSPerMS))
138 | 		maxFrames := config.MaxStackFramesLimit
139 | 		if len(e.Stack) < maxFrames {
140 | 			maxFrames = len(e.Stack)
141 | 		}
142 | 		for j := 0; j < maxFrames; j++ {
143 | 			addr := e.Stack[j]
144 | 			frame := resolver.resolve(ctx, e.PID, addr)
145 | 			report += fmt.Sprintf("    #%d %s\n", j, frame)
146 | 		}
147 | 	}
148 | 	report += "\n"
149 | 	return report
150 | }
151 | 
152 | 


--------------------------------------------------------------------------------
/internal/tracing/extractor/http.go:
--------------------------------------------------------------------------------
  1 | package extractor
  2 | 
  3 | import (
  4 | 	"net/http"
  5 | 	"strings"
  6 | 
  7 | 	"github.com/podtrace/podtrace/internal/tracing/context"
  8 | )
  9 | 
 10 | const (
 11 | 	MaxHeaderNameLength  = 256
 12 | 	MaxHeaderValueLength = 4096
 13 | 	MaxHeaderCount       = 100
 14 | )
 15 | 
 16 | type HTTPExtractor struct {
 17 | 	extractW3C    bool
 18 | 	extractB3     bool
 19 | 	extractSplunk bool
 20 | }
 21 | 
 22 | func NewHTTPExtractor() *HTTPExtractor {
 23 | 	return &HTTPExtractor{
 24 | 		extractW3C:    true,
 25 | 		extractB3:     true,
 26 | 		extractSplunk: true,
 27 | 	}
 28 | }
 29 | 
 30 | func (e *HTTPExtractor) ExtractFromHeaders(headers map[string]string) *context.TraceContext {
 31 | 	if headers == nil {
 32 | 		return nil
 33 | 	}
 34 | 
 35 | 	if len(headers) > MaxHeaderCount {
 36 | 		return nil
 37 | 	}
 38 | 
 39 | 	normalized := make(map[string]string, len(headers))
 40 | 	for k, v := range headers {
 41 | 		if len(k) > MaxHeaderNameLength || len(v) > MaxHeaderValueLength {
 42 | 			continue
 43 | 		}
 44 | 		if strings.ContainsAny(k, "\r\n") || strings.ContainsAny(v, "\r\n") {
 45 | 			continue
 46 | 		}
 47 | 		normalized[strings.ToLower(k)] = v
 48 | 	}
 49 | 
 50 | 	if e.extractW3C {
 51 | 		if traceParent, ok := normalized["traceparent"]; ok {
 52 | 			if tc, err := context.ParseW3CTraceParent(traceParent); err == nil {
 53 | 				if tracestate, ok := normalized["tracestate"]; ok {
 54 | 					tc.State = tracestate
 55 | 				}
 56 | 				return tc
 57 | 			}
 58 | 		}
 59 | 	}
 60 | 
 61 | 	if e.extractB3 {
 62 | 		b3Headers := make(map[string]string)
 63 | 		for k, v := range normalized {
 64 | 			if strings.HasPrefix(k, "x-b3-") {
 65 | 				b3Headers[k] = v
 66 | 			}
 67 | 		}
 68 | 		if tc := context.ParseB3TraceContext(b3Headers); tc != nil {
 69 | 			return tc
 70 | 		}
 71 | 	}
 72 | 
 73 | 	if e.extractSplunk {
 74 | 		if requestID, ok := normalized["x-splunk-requestid"]; ok {
 75 | 			tc := context.NewTraceContext()
 76 | 			tc.State = requestID
 77 | 			return tc
 78 | 		}
 79 | 	}
 80 | 
 81 | 	return nil
 82 | }
 83 | 
 84 | func (e *HTTPExtractor) ExtractFromHTTPRequest(req *http.Request) *context.TraceContext {
 85 | 	if req == nil || req.Header == nil {
 86 | 		return nil
 87 | 	}
 88 | 
 89 | 	if len(req.Header) > MaxHeaderCount {
 90 | 		return nil
 91 | 	}
 92 | 
 93 | 	headers := make(map[string]string)
 94 | 	for k, v := range req.Header {
 95 | 		if len(v) > 0 {
 96 | 			headerValue := v[0]
 97 | 			if len(k) > MaxHeaderNameLength || len(headerValue) > MaxHeaderValueLength {
 98 | 				continue
 99 | 			}
100 | 			headers[k] = headerValue
101 | 		}
102 | 	}
103 | 
104 | 	return e.ExtractFromHeaders(headers)
105 | }
106 | 
107 | func (e *HTTPExtractor) ExtractFromHTTPResponse(resp *http.Response) *context.TraceContext {
108 | 	if resp == nil || resp.Header == nil {
109 | 		return nil
110 | 	}
111 | 
112 | 	if len(resp.Header) > MaxHeaderCount {
113 | 		return nil
114 | 	}
115 | 
116 | 	headers := make(map[string]string)
117 | 	for k, v := range resp.Header {
118 | 		if len(v) > 0 {
119 | 			headerValue := v[0]
120 | 			if len(k) > MaxHeaderNameLength || len(headerValue) > MaxHeaderValueLength {
121 | 				continue
122 | 			}
123 | 			headers[k] = headerValue
124 | 		}
125 | 	}
126 | 
127 | 	return e.ExtractFromHeaders(headers)
128 | }
129 | 
130 | func (e *HTTPExtractor) ExtractFromRawHeaders(rawHeaders string) *context.TraceContext {
131 | 	if rawHeaders == "" {
132 | 		return nil
133 | 	}
134 | 
135 | 	headers := parseRawHeaders(rawHeaders)
136 | 	return e.ExtractFromHeaders(headers)
137 | }
138 | 
139 | func parseRawHeaders(raw string) map[string]string {
140 | 	headers := make(map[string]string)
141 | 	lines := strings.Split(raw, "\r\n")
142 | 
143 | 	for _, line := range lines {
144 | 		if line == "" {
145 | 			continue
146 | 		}
147 | 		if len(headers) >= MaxHeaderCount {
148 | 			break
149 | 		}
150 | 		idx := strings.Index(line, ":")
151 | 		if idx <= 0 {
152 | 			continue
153 | 		}
154 | 		key := strings.TrimSpace(line[:idx])
155 | 		value := strings.TrimSpace(line[idx+1:])
156 | 		if key != "" && value != "" {
157 | 			if len(key) > MaxHeaderNameLength || len(value) > MaxHeaderValueLength {
158 | 				continue
159 | 			}
160 | 			headers[key] = value
161 | 		}
162 | 	}
163 | 
164 | 	return headers
165 | }
166 | 


--------------------------------------------------------------------------------
/internal/diagnose/tracker/connection.go:
--------------------------------------------------------------------------------
  1 | package tracker
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 	"time"
  7 | 
  8 | 	"github.com/podtrace/podtrace/internal/config"
  9 | 	"github.com/podtrace/podtrace/internal/events"
 10 | )
 11 | 
 12 | type ConnectionInfo struct {
 13 | 	Target       string
 14 | 	ConnectTime  time.Time
 15 | 	SendCount    int
 16 | 	RecvCount    int
 17 | 	TotalLatency time.Duration
 18 | 	LastActivity time.Time
 19 | }
 20 | 
 21 | type ConnectionTracker struct {
 22 | 	connections map[string]*ConnectionInfo
 23 | }
 24 | 
 25 | func NewConnectionTracker() *ConnectionTracker {
 26 | 	return &ConnectionTracker{
 27 | 		connections: make(map[string]*ConnectionInfo),
 28 | 	}
 29 | }
 30 | 
 31 | func (ct *ConnectionTracker) ProcessEvent(event *events.Event) {
 32 | 	if event == nil {
 33 | 		return
 34 | 	}
 35 | 
 36 | 	switch event.Type {
 37 | 	case events.EventConnect:
 38 | 		if event.Error == 0 && event.Target != "" {
 39 | 			conn := &ConnectionInfo{
 40 | 				Target:       event.Target,
 41 | 				ConnectTime:  event.TimestampTime(),
 42 | 				LastActivity: event.TimestampTime(),
 43 | 			}
 44 | 			ct.connections[event.Target] = conn
 45 | 		}
 46 | 
 47 | 	case events.EventTCPSend, events.EventTCPRecv:
 48 | 		if event.Target != "" {
 49 | 			if conn, exists := ct.connections[event.Target]; exists {
 50 | 				if event.Type == events.EventTCPSend {
 51 | 					conn.SendCount++
 52 | 				} else {
 53 | 					conn.RecvCount++
 54 | 				}
 55 | 				conn.TotalLatency += event.Latency()
 56 | 				conn.LastActivity = event.TimestampTime()
 57 | 			} else {
 58 | 				conn := &ConnectionInfo{
 59 | 					Target:       event.Target,
 60 | 					ConnectTime:  event.TimestampTime(),
 61 | 					LastActivity: event.TimestampTime(),
 62 | 				}
 63 | 				if event.Type == events.EventTCPSend {
 64 | 					conn.SendCount = 1
 65 | 				} else {
 66 | 					conn.RecvCount = 1
 67 | 				}
 68 | 				conn.TotalLatency = event.Latency()
 69 | 				ct.connections[event.Target] = conn
 70 | 			}
 71 | 		}
 72 | 	}
 73 | }
 74 | 
 75 | func (ct *ConnectionTracker) GetConnectionSummary() []ConnectionSummary {
 76 | 	var summaries []ConnectionSummary
 77 | 	for target, conn := range ct.connections {
 78 | 		avgLatency := time.Duration(0)
 79 | 		totalOps := conn.SendCount + conn.RecvCount
 80 | 		if totalOps > 0 {
 81 | 			avgLatency = conn.TotalLatency / time.Duration(totalOps)
 82 | 		}
 83 | 		summaries = append(summaries, ConnectionSummary{
 84 | 			Target:       target,
 85 | 			ConnectTime:  conn.ConnectTime,
 86 | 			SendCount:    conn.SendCount,
 87 | 			RecvCount:    conn.RecvCount,
 88 | 			TotalOps:     totalOps,
 89 | 			AvgLatency:   avgLatency,
 90 | 			LastActivity: conn.LastActivity,
 91 | 		})
 92 | 	}
 93 | 	sort.Slice(summaries, func(i, j int) bool {
 94 | 		return summaries[i].TotalOps > summaries[j].TotalOps
 95 | 	})
 96 | 	return summaries
 97 | }
 98 | 
 99 | type ConnectionSummary struct {
100 | 	Target       string
101 | 	ConnectTime  time.Time
102 | 	SendCount    int
103 | 	RecvCount    int
104 | 	TotalOps     int
105 | 	AvgLatency   time.Duration
106 | 	LastActivity time.Time
107 | }
108 | 
109 | func GenerateConnectionCorrelation(events []*events.Event) string {
110 | 	tracker := NewConnectionTracker()
111 | 	for _, event := range events {
112 | 		tracker.ProcessEvent(event)
113 | 	}
114 | 
115 | 	summaries := tracker.GetConnectionSummary()
116 | 	if len(summaries) == 0 {
117 | 		return ""
118 | 	}
119 | 
120 | 	report := "Connection Correlation:\n"
121 | 	report += fmt.Sprintf("  Active connections: %d\n", len(summaries))
122 | 	report += "  Top connections by activity:\n"
123 | 	for i, summary := range summaries {
124 | 		if i >= config.MaxConnectionTargets {
125 | 			break
126 | 		}
127 | 		report += fmt.Sprintf("    - %s:\n", summary.Target)
128 | 		report += fmt.Sprintf("        Connect: %s\n", summary.ConnectTime.Format("15:04:05"))
129 | 		report += fmt.Sprintf("        Operations: %d send, %d recv (total: %d)\n", summary.SendCount, summary.RecvCount, summary.TotalOps)
130 | 		report += fmt.Sprintf("        Avg latency: %.2fms\n", float64(summary.AvgLatency.Nanoseconds())/float64(config.NSPerMS))
131 | 		report += fmt.Sprintf("        Last activity: %s\n", summary.LastActivity.Format("15:04:05.000"))
132 | 	}
133 | 	report += "\n"
134 | 	return report
135 | }
136 | 


--------------------------------------------------------------------------------
/internal/tracing/context/context.go:
--------------------------------------------------------------------------------
  1 | package context
  2 | 
  3 | import (
  4 | 	"crypto/rand"
  5 | 	"encoding/hex"
  6 | 	"fmt"
  7 | 	"strings"
  8 | )
  9 | 
 10 | type TraceContext struct {
 11 | 	TraceID      string
 12 | 	SpanID       string
 13 | 	ParentSpanID string
 14 | 	Flags        uint8
 15 | 	State        string
 16 | }
 17 | 
 18 | func NewTraceContext() *TraceContext {
 19 | 	return &TraceContext{
 20 | 		TraceID: generateTraceID(),
 21 | 		SpanID:  generateSpanID(),
 22 | 		Flags:   0x01,
 23 | 	}
 24 | }
 25 | 
 26 | func (tc *TraceContext) IsValid() bool {
 27 | 	return tc.TraceID != "" && tc.SpanID != ""
 28 | }
 29 | 
 30 | func (tc *TraceContext) IsSampled() bool {
 31 | 	return (tc.Flags & 0x01) == 0x01
 32 | }
 33 | 
 34 | func (tc *TraceContext) SetSampled(sampled bool) {
 35 | 	if sampled {
 36 | 		tc.Flags |= 0x01
 37 | 	} else {
 38 | 		tc.Flags &= 0xFE
 39 | 	}
 40 | }
 41 | 
 42 | func (tc *TraceContext) CreateChild() *TraceContext {
 43 | 	child := &TraceContext{
 44 | 		TraceID:      tc.TraceID,
 45 | 		ParentSpanID: tc.SpanID,
 46 | 		SpanID:       generateSpanID(),
 47 | 		Flags:        tc.Flags,
 48 | 		State:        tc.State,
 49 | 	}
 50 | 	return child
 51 | }
 52 | 
 53 | func generateTraceID() string {
 54 | 	b := make([]byte, 16)
 55 | 	if _, err := rand.Read(b); err != nil {
 56 | 		return ""
 57 | 	}
 58 | 	return hex.EncodeToString(b)
 59 | }
 60 | 
 61 | func generateSpanID() string {
 62 | 	b := make([]byte, 8)
 63 | 	if _, err := rand.Read(b); err != nil {
 64 | 		return ""
 65 | 	}
 66 | 	return hex.EncodeToString(b)
 67 | }
 68 | 
 69 | func ParseW3CTraceParent(traceParent string) (*TraceContext, error) {
 70 | 	if traceParent == "" {
 71 | 		return nil, fmt.Errorf("empty traceparent")
 72 | 	}
 73 | 
 74 | 	parts := strings.Split(traceParent, "-")
 75 | 	if len(parts) != 4 {
 76 | 		return nil, fmt.Errorf("invalid traceparent format")
 77 | 	}
 78 | 
 79 | 	if parts[0] != "00" {
 80 | 		return nil, fmt.Errorf("unsupported version: %s", parts[0])
 81 | 	}
 82 | 
 83 | 	traceID := parts[1]
 84 | 	parentID := parts[2]
 85 | 	flags := parts[3]
 86 | 
 87 | 	if len(traceID) != 32 {
 88 | 		return nil, fmt.Errorf("invalid trace ID length: %d", len(traceID))
 89 | 	}
 90 | 	if len(parentID) != 16 {
 91 | 		return nil, fmt.Errorf("invalid parent ID length: %d", len(parentID))
 92 | 	}
 93 | 	if len(flags) != 2 {
 94 | 		return nil, fmt.Errorf("invalid flags length: %d", len(flags))
 95 | 	}
 96 | 
 97 | 	var flagsByte uint8
 98 | 	if _, err := fmt.Sscanf(flags, "%02x", &flagsByte); err != nil {
 99 | 		return nil, fmt.Errorf("invalid flags: %w", err)
100 | 	}
101 | 
102 | 	return &TraceContext{
103 | 		TraceID:      traceID,
104 | 		ParentSpanID: parentID,
105 | 		SpanID:       generateSpanID(),
106 | 		Flags:        flagsByte,
107 | 	}, nil
108 | }
109 | 
110 | func ParseB3TraceContext(headers map[string]string) *TraceContext {
111 | 	var traceID, spanID, parentSpanID, sampled, flags string
112 | 
113 | 	for k, v := range headers {
114 | 		lowerK := strings.ToLower(k)
115 | 		switch lowerK {
116 | 		case "x-b3-traceid":
117 | 			traceID = v
118 | 		case "x-b3-spanid":
119 | 			spanID = v
120 | 		case "x-b3-parentspanid":
121 | 			parentSpanID = v
122 | 		case "x-b3-sampled":
123 | 			sampled = v
124 | 		case "x-b3-flags":
125 | 			flags = v
126 | 		}
127 | 	}
128 | 
129 | 	if traceID == "" || spanID == "" {
130 | 		return nil
131 | 	}
132 | 
133 | 	tc := &TraceContext{
134 | 		TraceID:      traceID,
135 | 		SpanID:       spanID,
136 | 		ParentSpanID: parentSpanID,
137 | 	}
138 | 
139 | 	if sampled == "1" || sampled == "true" || flags == "1" {
140 | 		tc.Flags = 0x01
141 | 	}
142 | 
143 | 	return tc
144 | }
145 | 
146 | func (tc *TraceContext) ToW3CTraceParent() string {
147 | 	if !tc.IsValid() {
148 | 		return ""
149 | 	}
150 | 	flags := fmt.Sprintf("%02x", tc.Flags)
151 | 	return fmt.Sprintf("00-%s-%s-%s", tc.TraceID, tc.SpanID, flags)
152 | }
153 | 
154 | func (tc *TraceContext) ToB3Headers() map[string]string {
155 | 	if !tc.IsValid() {
156 | 		return nil
157 | 	}
158 | 	headers := map[string]string{
159 | 		"X-B3-TraceId": tc.TraceID,
160 | 		"X-B3-SpanId":  tc.SpanID,
161 | 	}
162 | 	if tc.ParentSpanID != "" {
163 | 		headers["X-B3-ParentSpanID"] = tc.ParentSpanID
164 | 	}
165 | 	if tc.IsSampled() {
166 | 		headers["X-B3-Sampled"] = "1"
167 | 	}
168 | 	return headers
169 | }
170 | 


--------------------------------------------------------------------------------
/internal/diagnose/tracker/trace_tracker_test.go:
--------------------------------------------------------------------------------
  1 | package tracker
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 	"time"
  6 | 
  7 | 	"github.com/podtrace/podtrace/internal/events"
  8 | )
  9 | 
 10 | func TestNewTraceTracker(t *testing.T) {
 11 | 	tt := NewTraceTracker()
 12 | 	if tt == nil {
 13 | 		t.Fatal("NewTraceTracker returned nil")
 14 | 	}
 15 | 	if tt.GetTraceCount() != 0 {
 16 | 		t.Error("New tracker should have 0 traces")
 17 | 	}
 18 | }
 19 | 
 20 | func TestTraceTracker_ProcessEvent(t *testing.T) {
 21 | 	tt := NewTraceTracker()
 22 | 
 23 | 	event := &events.Event{
 24 | 		TraceID:      "trace123",
 25 | 		SpanID:       "span123",
 26 | 		ParentSpanID: "",
 27 | 		Type:         events.EventHTTPReq,
 28 | 		Timestamp:    uint64(time.Now().UnixNano()),
 29 | 		ProcessName:  "test-process",
 30 | 		Target:       "http://example.com",
 31 | 	}
 32 | 
 33 | 	tt.ProcessEvent(event, nil)
 34 | 
 35 | 	if tt.GetTraceCount() != 1 {
 36 | 		t.Errorf("Expected 1 trace, got %d", tt.GetTraceCount())
 37 | 	}
 38 | 
 39 | 	trace := tt.GetTrace("trace123")
 40 | 	if trace == nil {
 41 | 		t.Fatal("Trace not found")
 42 | 	}
 43 | 	if len(trace.Spans) != 1 {
 44 | 		t.Errorf("Expected 1 span, got %d", len(trace.Spans))
 45 | 	}
 46 | }
 47 | 
 48 | func TestTraceTracker_ProcessEvent_WithK8sContext(t *testing.T) {
 49 | 	tt := NewTraceTracker()
 50 | 
 51 | 	event := &events.Event{
 52 | 		TraceID:     "trace123",
 53 | 		SpanID:      "span123",
 54 | 		Type:        events.EventHTTPReq,
 55 | 		Timestamp:   uint64(time.Now().UnixNano()),
 56 | 		ProcessName: "test-process",
 57 | 	}
 58 | 
 59 | 	k8sCtx := map[string]interface{}{
 60 | 		"target_service":   "test-service",
 61 | 		"target_namespace": "default",
 62 | 		"target_pod":       "test-pod",
 63 | 		"target_labels":    map[string]string{"app": "test"},
 64 | 	}
 65 | 
 66 | 	tt.ProcessEvent(event, k8sCtx)
 67 | 
 68 | 	trace := tt.GetTrace("trace123")
 69 | 	if trace == nil {
 70 | 		t.Fatal("Trace not found")
 71 | 	}
 72 | 
 73 | 	if len(trace.Services) == 0 {
 74 | 		t.Error("Services should be populated")
 75 | 	}
 76 | }
 77 | 
 78 | func TestTraceTracker_ProcessEvent_NoTraceID(t *testing.T) {
 79 | 	tt := NewTraceTracker()
 80 | 
 81 | 	event := &events.Event{
 82 | 		TraceID: "",
 83 | 		SpanID:  "span123",
 84 | 		Type:    events.EventHTTPReq,
 85 | 	}
 86 | 
 87 | 	tt.ProcessEvent(event, nil)
 88 | 
 89 | 	if tt.GetTraceCount() != 0 {
 90 | 		t.Error("Event without TraceID should not create trace")
 91 | 	}
 92 | }
 93 | 
 94 | func TestTraceTracker_GetAllTraces(t *testing.T) {
 95 | 	tt := NewTraceTracker()
 96 | 
 97 | 	event1 := &events.Event{
 98 | 		TraceID:   "trace1",
 99 | 		SpanID:    "span1",
100 | 		Timestamp: uint64(time.Now().UnixNano()),
101 | 		Type:      events.EventHTTPReq,
102 | 	}
103 | 
104 | 	event2 := &events.Event{
105 | 		TraceID:   "trace2",
106 | 		SpanID:    "span2",
107 | 		Timestamp: uint64(time.Now().UnixNano()),
108 | 		Type:      events.EventHTTPReq,
109 | 	}
110 | 
111 | 	tt.ProcessEvent(event1, nil)
112 | 	tt.ProcessEvent(event2, nil)
113 | 
114 | 	traces := tt.GetAllTraces()
115 | 	if len(traces) != 2 {
116 | 		t.Errorf("Expected 2 traces, got %d", len(traces))
117 | 	}
118 | }
119 | 
120 | func TestTraceTracker_CleanupOldTraces(t *testing.T) {
121 | 	tt := NewTraceTracker()
122 | 
123 | 	oldTime := time.Now().Add(-15 * time.Minute)
124 | 	event := &events.Event{
125 | 		TraceID:   "old-trace",
126 | 		SpanID:    "span1",
127 | 		Timestamp: uint64(oldTime.UnixNano()),
128 | 		Type:      events.EventHTTPReq,
129 | 	}
130 | 
131 | 	tt.ProcessEvent(event, nil)
132 | 
133 | 	tt.CleanupOldTraces(10 * time.Minute)
134 | 
135 | 	if tt.GetTraceCount() != 0 {
136 | 		t.Error("Old traces should be cleaned up")
137 | 	}
138 | }
139 | 
140 | func TestSpan_UpdateDuration(t *testing.T) {
141 | 	span := &Span{
142 | 		TraceID:   "trace1",
143 | 		SpanID:    "span1",
144 | 		StartTime: time.Now(),
145 | 		Events: []*events.Event{
146 | 			{
147 | 				Timestamp: uint64(time.Now().UnixNano()),
148 | 				Type:      events.EventHTTPReq,
149 | 			},
150 | 			{
151 | 				Timestamp: uint64(time.Now().Add(100 * time.Millisecond).UnixNano()),
152 | 				Type:      events.EventHTTPResp,
153 | 			},
154 | 		},
155 | 	}
156 | 
157 | 	span.UpdateDuration()
158 | 
159 | 	if span.Duration == 0 {
160 | 		t.Error("Duration should be updated")
161 | 	}
162 | }
163 | 


--------------------------------------------------------------------------------
/internal/ebpf/tracer/errors_test.go:
--------------------------------------------------------------------------------
  1 | package tracer
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"testing"
  6 | )
  7 | 
  8 | func TestTracerError_Error(t *testing.T) {
  9 | 	tests := []struct {
 10 | 		name    string
 11 | 		err     *TracerError
 12 | 		wantMsg string
 13 | 	}{
 14 | 		{
 15 | 			name: "error with wrapped error",
 16 | 			err: &TracerError{
 17 | 				Code:    ErrCodeCollectionFailed,
 18 | 				Message: "test error",
 19 | 				Err:     errors.New("wrapped error"),
 20 | 			},
 21 | 			wantMsg: "test error: wrapped error",
 22 | 		},
 23 | 		{
 24 | 			name: "error without wrapped error",
 25 | 			err: &TracerError{
 26 | 				Code:    ErrCodeRingBufferFailed,
 27 | 				Message: "test error",
 28 | 				Err:     nil,
 29 | 			},
 30 | 			wantMsg: "test error",
 31 | 		},
 32 | 	}
 33 | 
 34 | 	for _, tt := range tests {
 35 | 		t.Run(tt.name, func(t *testing.T) {
 36 | 			got := tt.err.Error()
 37 | 			if got != tt.wantMsg {
 38 | 				t.Errorf("Error() = %v, want %v", got, tt.wantMsg)
 39 | 			}
 40 | 		})
 41 | 	}
 42 | }
 43 | 
 44 | func TestTracerError_Unwrap(t *testing.T) {
 45 | 	wrappedErr := errors.New("wrapped error")
 46 | 	err := &TracerError{
 47 | 		Code:    ErrCodeCollectionFailed,
 48 | 		Message: "test error",
 49 | 		Err:     wrappedErr,
 50 | 	}
 51 | 
 52 | 	unwrapped := err.Unwrap()
 53 | 	if unwrapped != wrappedErr {
 54 | 		t.Errorf("Unwrap() = %v, want %v", unwrapped, wrappedErr)
 55 | 	}
 56 | 
 57 | 	errNoWrap := &TracerError{
 58 | 		Code:    ErrCodeRingBufferFailed,
 59 | 		Message: "test error",
 60 | 		Err:     nil,
 61 | 	}
 62 | 
 63 | 	unwrapped = errNoWrap.Unwrap()
 64 | 	if unwrapped != nil {
 65 | 		t.Errorf("Unwrap() = %v, want nil", unwrapped)
 66 | 	}
 67 | }
 68 | 
 69 | func TestNewCollectionError(t *testing.T) {
 70 | 	wrappedErr := errors.New("collection failed")
 71 | 	err := NewCollectionError(wrappedErr)
 72 | 
 73 | 	if err == nil {
 74 | 		t.Fatal("Expected non-nil error")
 75 | 	}
 76 | 
 77 | 	if err.Code != ErrCodeCollectionFailed {
 78 | 		t.Errorf("Expected Code %d, got %d", ErrCodeCollectionFailed, err.Code)
 79 | 	}
 80 | 
 81 | 	if err.Message != "failed to create eBPF collection" {
 82 | 		t.Errorf("Expected Message 'failed to create eBPF collection', got %q", err.Message)
 83 | 	}
 84 | 
 85 | 	if err.Err != wrappedErr {
 86 | 		t.Errorf("Expected wrapped error %v, got %v", wrappedErr, err.Err)
 87 | 	}
 88 | }
 89 | 
 90 | func TestNewRingBufferError(t *testing.T) {
 91 | 	wrappedErr := errors.New("ring buffer failed")
 92 | 	err := NewRingBufferError(wrappedErr)
 93 | 
 94 | 	if err == nil {
 95 | 		t.Fatal("Expected non-nil error")
 96 | 	}
 97 | 
 98 | 	if err.Code != ErrCodeRingBufferFailed {
 99 | 		t.Errorf("Expected Code %d, got %d", ErrCodeRingBufferFailed, err.Code)
100 | 	}
101 | 
102 | 	if err.Message != "failed to create ring buffer reader" {
103 | 		t.Errorf("Expected Message 'failed to create ring buffer reader', got %q", err.Message)
104 | 	}
105 | 
106 | 	if err.Err != wrappedErr {
107 | 		t.Errorf("Expected wrapped error %v, got %v", wrappedErr, err.Err)
108 | 	}
109 | }
110 | 
111 | func TestNewMapLookupError(t *testing.T) {
112 | 	mapName := "test_map"
113 | 	wrappedErr := errors.New("lookup failed")
114 | 	err := NewMapLookupError(mapName, wrappedErr)
115 | 
116 | 	if err == nil {
117 | 		t.Fatal("Expected non-nil error")
118 | 	}
119 | 
120 | 	if err.Code != ErrCodeMapLookupFailed {
121 | 		t.Errorf("Expected Code %d, got %d", ErrCodeMapLookupFailed, err.Code)
122 | 	}
123 | 
124 | 	expectedMsg := "failed to lookup map test_map"
125 | 	if err.Message != expectedMsg {
126 | 		t.Errorf("Expected Message %q, got %q", expectedMsg, err.Message)
127 | 	}
128 | 
129 | 	if err.Err != wrappedErr {
130 | 		t.Errorf("Expected wrapped error %v, got %v", wrappedErr, err.Err)
131 | 	}
132 | }
133 | 
134 | func TestNewInvalidEventError(t *testing.T) {
135 | 	reason := "invalid format"
136 | 	err := NewInvalidEventError(reason)
137 | 
138 | 	if err == nil {
139 | 		t.Fatal("Expected non-nil error")
140 | 	}
141 | 
142 | 	if err.Code != ErrCodeInvalidEvent {
143 | 		t.Errorf("Expected Code %d, got %d", ErrCodeInvalidEvent, err.Code)
144 | 	}
145 | 
146 | 	expectedMsg := "invalid event: invalid format"
147 | 	if err.Message != expectedMsg {
148 | 		t.Errorf("Expected Message %q, got %q", expectedMsg, err.Message)
149 | 	}
150 | 
151 | 	if err.Err != nil {
152 | 		t.Errorf("Expected nil wrapped error, got %v", err.Err)
153 | 	}
154 | }
155 | 
156 | 


--------------------------------------------------------------------------------
/test/pool-test/pool-test-app.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"fmt"
  7 | 	"log"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	_ "github.com/mattn/go-sqlite3"
 12 | )
 13 | 
 14 | func main() {
 15 | 	db, err := sql.Open("sqlite3", ":memory:")
 16 | 	if err != nil {
 17 | 		log.Fatalf("Failed to open database: %v", err)
 18 | 	}
 19 | 	defer db.Close()
 20 | 
 21 | 	maxOpenConns := 5
 22 | 	maxIdleConns := 2
 23 | 	db.SetMaxOpenConns(maxOpenConns)
 24 | 	db.SetMaxIdleConns(maxIdleConns)
 25 | 	db.SetConnMaxLifetime(time.Hour)
 26 | 
 27 | 	_, err = db.Exec("CREATE TABLE IF NOT EXISTS test (id INTEGER PRIMARY KEY, data TEXT)")
 28 | 	if err != nil {
 29 | 		log.Fatalf("Failed to create table: %v", err)
 30 | 	}
 31 | 
 32 | 	fmt.Println("=== Connection Pool Test App Started ===")
 33 | 	fmt.Printf("MaxOpenConns: %d, MaxIdleConns: %d\n", maxOpenConns, maxIdleConns)
 34 | 	fmt.Println("")
 35 | 
 36 | 	var wg sync.WaitGroup
 37 | 	acquireCount := int64(0)
 38 | 	releaseCount := int64(0)
 39 | 	var mu sync.Mutex
 40 | 
 41 | 	phase1 := func() {
 42 | 		fmt.Println("Phase 1: Normal operations (100 inserts)")
 43 | 		for i := 0; i < 100; i++ {
 44 | 			_, err := db.Exec("INSERT INTO test (data) VALUES (?)", fmt.Sprintf("data-%d", i))
 45 | 			if err != nil {
 46 | 				log.Printf("Failed to execute query: %v", err)
 47 | 				continue
 48 | 			}
 49 | 
 50 | 			mu.Lock()
 51 | 			acquireCount++
 52 | 			releaseCount++
 53 | 			mu.Unlock()
 54 | 
 55 | 			if i%10 == 0 {
 56 | 				fmt.Printf("  Insert %d completed\n", i)
 57 | 			}
 58 | 
 59 | 			time.Sleep(50 * time.Millisecond)
 60 | 		}
 61 | 		fmt.Println("Phase 1 completed")
 62 | 		fmt.Println("")
 63 | 	}
 64 | 
 65 | 	phase2 := func() {
 66 | 		fmt.Println("Phase 2: Concurrent queries (20 queries)")
 67 | 		for i := 0; i < 20; i++ {
 68 | 			var count int
 69 | 			err := db.QueryRow("SELECT COUNT(*) FROM test").Scan(&count)
 70 | 			if err != nil {
 71 | 				log.Printf("Query failed: %v", err)
 72 | 			} else {
 73 | 				mu.Lock()
 74 | 				acquireCount++
 75 | 				releaseCount++
 76 | 				mu.Unlock()
 77 | 			}
 78 | 			time.Sleep(100 * time.Millisecond)
 79 | 		}
 80 | 		fmt.Println("Phase 2 completed")
 81 | 		fmt.Println("")
 82 | 	}
 83 | 
 84 | 	phase3 := func() {
 85 | 		fmt.Println("Phase 3: Pool exhaustion test (10 concurrent connections)")
 86 | 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 87 | 		defer cancel()
 88 | 
 89 | 		for i := 0; i < 10; i++ {
 90 | 			wg.Add(1)
 91 | 			go func(id int) {
 92 | 				defer wg.Done()
 93 | 				conn, err := db.Conn(ctx)
 94 | 				if err != nil {
 95 | 					fmt.Printf("  Connection %d: Failed to acquire (pool exhausted): %v\n", id, err)
 96 | 					return
 97 | 				}
 98 | 				defer conn.Close()
 99 | 
100 | 				mu.Lock()
101 | 				acquireCount++
102 | 				mu.Unlock()
103 | 
104 | 				fmt.Printf("  Connection %d: Acquired\n", id)
105 | 				time.Sleep(2 * time.Second)
106 | 
107 | 				mu.Lock()
108 | 				releaseCount++
109 | 				mu.Unlock()
110 | 
111 | 				fmt.Printf("  Connection %d: Released\n", id)
112 | 			}(i)
113 | 		}
114 | 		wg.Wait()
115 | 		fmt.Println("Phase 3 completed")
116 | 		fmt.Println("")
117 | 	}
118 | 
119 | 	phase4 := func() {
120 | 		fmt.Println("Phase 4: Continuous operations (running indefinitely)")
121 | 		ticker := time.NewTicker(500 * time.Millisecond)
122 | 		defer ticker.Stop()
123 | 
124 | 		for range ticker.C {
125 | 			var count int
126 | 			err := db.QueryRow("SELECT COUNT(*) FROM test").Scan(&count)
127 | 			if err != nil {
128 | 				log.Printf("Query failed: %v", err)
129 | 				continue
130 | 			}
131 | 
132 | 			mu.Lock()
133 | 			acquireCount++
134 | 			releaseCount++
135 | 			currentAcq := acquireCount
136 | 			currentRel := releaseCount
137 | 			mu.Unlock()
138 | 
139 | 			if currentAcq%10 == 0 {
140 | 				fmt.Printf("Running: %d acquires, %d releases\n", currentAcq, currentRel)
141 | 			}
142 | 		}
143 | 	}
144 | 
145 | 	phase1()
146 | 	phase2()
147 | 	phase3()
148 | 
149 | 	mu.Lock()
150 | 	totalAcq := acquireCount
151 | 	totalRel := releaseCount
152 | 	mu.Unlock()
153 | 
154 | 	fmt.Printf("=== Summary ===\n")
155 | 	fmt.Printf("Total acquires: %d\n", totalAcq)
156 | 	fmt.Printf("Total releases: %d\n", totalRel)
157 | 	fmt.Printf("Reuse rate: %.2f%%\n", float64(totalRel)/float64(totalAcq)*100)
158 | 	fmt.Println("")
159 | 	fmt.Println("Starting continuous operations...")
160 | 	fmt.Println("")
161 | 
162 | 	phase4()
163 | }
164 | 


--------------------------------------------------------------------------------
/internal/tracing/exporter/otlp.go:
--------------------------------------------------------------------------------
  1 | package exporter
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"time"
  7 | 
  8 | 	"go.opentelemetry.io/otel"
  9 | 	"go.opentelemetry.io/otel/attribute"
 10 | 	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
 11 | 	"go.opentelemetry.io/otel/sdk/resource"
 12 | 	sdktrace "go.opentelemetry.io/otel/sdk/trace"
 13 | 	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
 14 | 	"go.opentelemetry.io/otel/trace"
 15 | 
 16 | 	"github.com/podtrace/podtrace/internal/config"
 17 | 	"github.com/podtrace/podtrace/internal/diagnose/tracker"
 18 | )
 19 | 
 20 | type OTLPExporter struct {
 21 | 	exporter   sdktrace.SpanExporter
 22 | 	tracer     trace.Tracer
 23 | 	tp         *sdktrace.TracerProvider
 24 | 	endpoint   string
 25 | 	enabled    bool
 26 | 	sampleRate float64
 27 | }
 28 | 
 29 | func NewOTLPExporter(endpoint string, sampleRate float64) (*OTLPExporter, error) {
 30 | 	if endpoint == "" {
 31 | 		endpoint = config.DefaultOTLPEndpoint
 32 | 	}
 33 | 
 34 | 	ctx := context.Background()
 35 | 	otlpExporter, err := otlptracehttp.New(ctx,
 36 | 		otlptracehttp.WithEndpoint(endpoint),
 37 | 		otlptracehttp.WithInsecure(),
 38 | 	)
 39 | 	if err != nil {
 40 | 		return nil, fmt.Errorf("failed to create OTLP exporter: %w", err)
 41 | 	}
 42 | 
 43 | 	res, err := resource.New(ctx,
 44 | 		resource.WithAttributes(
 45 | 			semconv.ServiceNameKey.String("Podtrace"),
 46 | 		),
 47 | 	)
 48 | 	if err != nil {
 49 | 		return nil, fmt.Errorf("failed to create resource: %w", err)
 50 | 	}
 51 | 
 52 | 	tp := sdktrace.NewTracerProvider(
 53 | 		sdktrace.WithBatcher(otlpExporter),
 54 | 		sdktrace.WithResource(res),
 55 | 	)
 56 | 
 57 | 	otel.SetTracerProvider(tp)
 58 | 
 59 | 	return &OTLPExporter{
 60 | 		exporter:   otlpExporter,
 61 | 		tp:         tp,
 62 | 		tracer:     tp.Tracer("Podtrace"),
 63 | 		endpoint:   endpoint,
 64 | 		enabled:    true,
 65 | 		sampleRate: sampleRate,
 66 | 	}, nil
 67 | }
 68 | 
 69 | func (e *OTLPExporter) ExportTraces(traces []*tracker.Trace) error {
 70 | 	if !e.enabled || len(traces) == 0 {
 71 | 		return nil
 72 | 	}
 73 | 
 74 | 	ctx := context.Background()
 75 | 	for _, t := range traces {
 76 | 		if !e.shouldSample(t) {
 77 | 			continue
 78 | 		}
 79 | 
 80 | 		for _, span := range t.Spans {
 81 | 			if err := e.exportSpan(ctx, span, t); err != nil {
 82 | 				continue
 83 | 			}
 84 | 		}
 85 | 	}
 86 | 
 87 | 	return nil
 88 | }
 89 | 
 90 | func (e *OTLPExporter) shouldSample(_ *tracker.Trace) bool {
 91 | 	if e.sampleRate >= 1.0 {
 92 | 		return true
 93 | 	}
 94 | 	if e.sampleRate <= 0.0 {
 95 | 		return false
 96 | 	}
 97 | 	return time.Now().UnixNano()%int64(1.0/e.sampleRate) == 0
 98 | }
 99 | 
100 | func (e *OTLPExporter) exportSpan(ctx context.Context, span *tracker.Span, _ *tracker.Trace) error {
101 | 	span.UpdateDuration()
102 | 
103 | 	traceID, err := trace.TraceIDFromHex(span.TraceID)
104 | 	if err != nil {
105 | 		return fmt.Errorf("invalid trace ID: %w", err)
106 | 	}
107 | 
108 | 	spanID, err := trace.SpanIDFromHex(span.SpanID)
109 | 	if err != nil {
110 | 		return fmt.Errorf("invalid span ID: %w", err)
111 | 	}
112 | 
113 | 	spanContext := trace.NewSpanContext(trace.SpanContextConfig{
114 | 		TraceID:    traceID,
115 | 		SpanID:     spanID,
116 | 		Remote:     false,
117 | 		TraceFlags: trace.FlagsSampled,
118 | 	})
119 | 
120 | 	ctx = trace.ContextWithSpanContext(ctx, spanContext)
121 | 
122 | 	_, otelSpan := e.tracer.Start(ctx, span.Operation,
123 | 		trace.WithTimestamp(span.StartTime),
124 | 	)
125 | 
126 | 	attrs := make([]attribute.KeyValue, 0, len(span.Attributes))
127 | 	for k, v := range span.Attributes {
128 | 		attrs = append(attrs, attribute.String(k, v))
129 | 	}
130 | 	if span.ParentSpanID != "" {
131 | 		attrs = append(attrs, attribute.String("parent_span_id", span.ParentSpanID))
132 | 	}
133 | 	otelSpan.SetAttributes(attrs...)
134 | 
135 | 	if span.Error {
136 | 		otelSpan.RecordError(fmt.Errorf("span error"))
137 | 	}
138 | 
139 | 	for _, event := range span.Events {
140 | 		otelSpan.AddEvent(event.TypeString(),
141 | 			trace.WithTimestamp(event.TimestampTime()),
142 | 			trace.WithAttributes(
143 | 				attribute.String("target", event.Target),
144 | 				attribute.Int64("latency_ns", int64(event.LatencyNS)),
145 | 			),
146 | 		)
147 | 	}
148 | 
149 | 	otelSpan.End(trace.WithTimestamp(span.StartTime.Add(span.Duration)))
150 | 
151 | 	return nil
152 | }
153 | 
154 | func (e *OTLPExporter) Shutdown(ctx context.Context) error {
155 | 	if e.tp != nil {
156 | 		return e.tp.Shutdown(ctx)
157 | 	}
158 | 	return nil
159 | }
160 | 


--------------------------------------------------------------------------------
/internal/diagnose/formatter/formatter_test.go:
--------------------------------------------------------------------------------
  1 | package formatter
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/podtrace/podtrace/internal/diagnose/analyzer"
  7 | )
  8 | 
  9 | func TestSectionHeader(t *testing.T) {
 10 | 	result := SectionHeader("DNS")
 11 | 	if result != "DNS Statistics:\n" {
 12 | 		t.Errorf("Expected 'DNS Statistics:\\n', got %q", result)
 13 | 	}
 14 | }
 15 | 
 16 | func TestTotalWithRate(t *testing.T) {
 17 | 	result := TotalWithRate("lookups", 100, 10.5)
 18 | 	expected := "  Total lookups: 100 (10.5/sec)\n"
 19 | 	if result != expected {
 20 | 		t.Errorf("Expected %q, got %q", expected, result)
 21 | 	}
 22 | }
 23 | 
 24 | func TestLatencyMetrics(t *testing.T) {
 25 | 	result := LatencyMetrics(5.5, 10.2)
 26 | 	if !contains(result, "5.50") || !contains(result, "10.20") {
 27 | 		t.Errorf("Expected latency metrics, got %q", result)
 28 | 	}
 29 | }
 30 | 
 31 | func TestPercentiles(t *testing.T) {
 32 | 	result := Percentiles(1.0, 2.0, 3.0)
 33 | 	if !contains(result, "P50=1.00") || !contains(result, "P95=2.00") || !contains(result, "P99=3.00") {
 34 | 		t.Errorf("Expected percentiles, got %q", result)
 35 | 	}
 36 | }
 37 | 
 38 | func TestErrorRate_ZeroTotal(t *testing.T) {
 39 | 	result := ErrorRate(5, 0)
 40 | 	if !contains(result, "0.0%") {
 41 | 		t.Errorf("Expected 0.0%% for zero total, got %q", result)
 42 | 	}
 43 | }
 44 | 
 45 | func TestErrorRate_WithErrors(t *testing.T) {
 46 | 	result := ErrorRate(5, 100)
 47 | 	if !contains(result, "5.0%") {
 48 | 		t.Errorf("Expected 5.0%% error rate, got %q", result)
 49 | 	}
 50 | }
 51 | 
 52 | func TestTopTargets_Empty(t *testing.T) {
 53 | 	result := TopTargets([]analyzer.TargetCount{}, 5, "targets", "counts")
 54 | 	if result != "" {
 55 | 		t.Errorf("Expected empty string for empty targets, got %q", result)
 56 | 	}
 57 | }
 58 | 
 59 | func TestTopTargets_WithLimit(t *testing.T) {
 60 | 	targets := []analyzer.TargetCount{
 61 | 		{Target: "target1", Count: 10},
 62 | 		{Target: "target2", Count: 20},
 63 | 		{Target: "target3", Count: 30},
 64 | 		{Target: "target4", Count: 40},
 65 | 		{Target: "target5", Count: 50},
 66 | 		{Target: "target6", Count: 60},
 67 | 	}
 68 | 	result := TopTargets(targets, 3, "targets", "counts")
 69 | 	if countOccurrences(result, "-") > 3 {
 70 | 		t.Errorf("Expected at most 3 targets, got more")
 71 | 	}
 72 | }
 73 | 
 74 | func TestBytesSection_Empty(t *testing.T) {
 75 | 	result := BytesSection(0, 0, 0)
 76 | 	if result != "" {
 77 | 		t.Errorf("Expected empty string for zero bytes, got %q", result)
 78 | 	}
 79 | }
 80 | 
 81 | func TestBytesSection_WithBytes(t *testing.T) {
 82 | 	result := BytesSection(1024, 512, 256)
 83 | 	if result == "" {
 84 | 		t.Error("Expected non-empty bytes section")
 85 | 	}
 86 | 	if !contains(result, "KB") && !contains(result, "B") {
 87 | 		t.Errorf("Expected bytes section with formatted bytes, got %q", result)
 88 | 	}
 89 | }
 90 | 
 91 | func TestRate_ZeroDuration(t *testing.T) {
 92 | 	result := Rate(100, 0)
 93 | 	if result != "" {
 94 | 		t.Errorf("Expected empty string for zero duration, got %q", result)
 95 | 	}
 96 | }
 97 | 
 98 | func TestRate_WithDuration(t *testing.T) {
 99 | 	result := Rate(100, 10.0)
100 | 	if !contains(result, "10.0") {
101 | 		t.Errorf("Expected rate string, got %q", result)
102 | 	}
103 | }
104 | 
105 | func TestTopItems_Empty(t *testing.T) {
106 | 	result := TopItems(map[string]int{}, 5, "items", "counts")
107 | 	if result != "" {
108 | 		t.Errorf("Expected empty string for empty items, got %q", result)
109 | 	}
110 | }
111 | 
112 | func TestTopItems_WithLimit(t *testing.T) {
113 | 	items := map[string]int{
114 | 		"item1": 10,
115 | 		"item2": 20,
116 | 		"item3": 30,
117 | 		"item4": 40,
118 | 		"item5": 50,
119 | 		"item6": 60,
120 | 	}
121 | 	result := TopItems(items, 3, "items", "counts")
122 | 	if countOccurrences(result, "-") > 3 {
123 | 		t.Errorf("Expected at most 3 items, got more")
124 | 	}
125 | }
126 | 
127 | func contains(s, substr string) bool {
128 | 	return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || containsMiddle(s, substr)))
129 | }
130 | 
131 | func containsMiddle(s, substr string) bool {
132 | 	for i := 0; i <= len(s)-len(substr); i++ {
133 | 		if s[i:i+len(substr)] == substr {
134 | 			return true
135 | 		}
136 | 	}
137 | 	return false
138 | }
139 | 
140 | func countOccurrences(s, substr string) int {
141 | 	count := 0
142 | 	for i := 0; i <= len(s)-len(substr); i++ {
143 | 		if s[i:i+len(substr)] == substr {
144 | 			count++
145 | 		}
146 | 	}
147 | 	return count
148 | }
149 | 
150 | 


--------------------------------------------------------------------------------