├── README.md ├── go.mod ├── main.go ├── semgrep.go ├── semgrep_test.go ├── semgrepx.gif └── testdata ├── bad.go ├── good.go └── semgrep.json /README.md: -------------------------------------------------------------------------------- 1 | # SEMGREPX 2 | 3 | > A tool for rewriting semgrep matches using externals tools 4 | 5 | ### Installation: 6 | 7 | If you have a Go toolchain installed, you can install the `semgrepx` binary like this: 8 | 9 | ```sh 10 | go install github.com/icholy/semgrepx@latest 11 | ``` 12 | 13 | It will be placed in your `GOBIN` directory, which defaults to `~/go/bin`. 14 | Depending on your install method, this may or may not already be in your `PATH`. 15 | 16 | 17 | ### CLI: 18 | 19 | ``` 20 | Usage: semgrepx [flags] [args...] 21 | flags: 22 | -dir string 23 | directory to run in (default ".") 24 | -file string 25 | semgrep json file 26 | -lines 27 | expand matches to full lines 28 | -retry int 29 | number of retries (< 0 is unlimited) 30 | -trim 31 | trim whitespace 32 | ``` 33 | 34 | ### How it works: 35 | 36 | The provided command is executed for every semgrep match. 37 | The matched code is sent to the command's stdin. 38 | The matched code is replaced by the command's stdout. 39 | 40 | ### Example: 41 | 42 | ```sh 43 | # create a file of matches 44 | semgrep -l go --pattern 'log.$A(...)' --json > matches.json 45 | 46 | # rewrite all the matches using the llm tool 47 | semgrepx llm 'update this go to use log.Printf' < matches.json 48 | ``` 49 | 50 | * This example uses the [llm](https://llm.datasette.io/en/stable/) tool. 51 | 52 | ### Demo: 53 | 54 | ![](./semgrepx.gif) 55 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/icholy/semgrepx 2 | 3 | go 1.22.0 4 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/exec" 11 | "strconv" 12 | "strings" 13 | ) 14 | 15 | func main() { 16 | // parse flags 17 | var dir, file string 18 | var trim, lines bool 19 | var retry int 20 | flag.Usage = func() { 21 | fmt.Fprintln(os.Stderr, "Usage: semgrepx [flags] [args...]") 22 | fmt.Println("flags:") 23 | flag.PrintDefaults() 24 | } 25 | flag.StringVar(&file, "file", "", "semgrep json file") 26 | flag.StringVar(&dir, "dir", ".", "directory to run in") 27 | flag.BoolVar(&trim, "trim", false, "trim whitespace") 28 | flag.BoolVar(&lines, "lines", false, "expand matches to full lines") 29 | flag.IntVar(&retry, "retry", 0, "number of retries (< 0 is unlimited)") 30 | flag.Parse() 31 | if flag.NArg() == 0 { 32 | flag.Usage() 33 | os.Exit(1) 34 | } 35 | // read semgrep json 36 | var output *Output 37 | if file != "" { 38 | var err error 39 | output, err = ReadOutputFile(file) 40 | if err != nil { 41 | log.Fatalf("failed to open semgrep json file: %v", err) 42 | } 43 | } else { 44 | var err error 45 | output, err = ReadOutput(os.Stdin) 46 | if err != nil { 47 | log.Fatalf("failed to read semgrep json: %v", err) 48 | } 49 | } 50 | err := RewriteAll(dir, output.Results, func(r Result, data []byte) (Result, []byte, error) { 51 | if lines { 52 | r = ExtendLines(r, data) 53 | } 54 | match := data[r.Start.Offset:r.End.Offset] 55 | fmt.Printf("--- before: %s\n%s\n", 56 | r.Path, 57 | FormatLines(match, r.Start.Line, 5), 58 | ) 59 | var stdout bytes.Buffer 60 | for retries := 0; true; retries++ { 61 | stdout.Reset() 62 | cmd := exec.Command(flag.Arg(0), flag.Args()[1:]...) 63 | cmd.Stdin = bytes.NewReader(match) 64 | cmd.Stdout = &stdout 65 | cmd.Stderr = os.Stderr 66 | err := cmd.Run() 67 | if err == nil { 68 | break 69 | } 70 | if retry < 0 || retries <= retry { 71 | log.Printf("retrying: %v\n", err) 72 | continue 73 | } 74 | return r, nil, err 75 | } 76 | rewritten := stdout.Bytes() 77 | if trim { 78 | rewritten = bytes.TrimSpace(rewritten) 79 | } 80 | fmt.Printf("--- after: %s\n%s\n", 81 | r.Path, 82 | FormatLines(rewritten, r.Start.Line, 5), 83 | ) 84 | return r, rewritten, nil 85 | }) 86 | if err != nil { 87 | log.Fatalf("failed to rewrite: %v", err) 88 | } 89 | } 90 | 91 | // ExtendLines returns r with the Start and End extended to include 92 | // the full line content 93 | func ExtendLines(r Result, data []byte) Result { 94 | if len(data) == 0 { 95 | return r 96 | } 97 | isNL := func(b byte) bool { return b == '\n' || b == '\r' } 98 | if !isNL(data[r.Start.Offset]) { 99 | for r.Start.Offset > 0 && !isNL(data[r.Start.Offset-1]) { 100 | r.Start.Offset-- 101 | r.Start.Col-- 102 | } 103 | } 104 | if !isNL(data[r.End.Offset]) { 105 | for r.End.Offset < len(data) && !isNL(data[r.End.Offset]) { 106 | r.End.Offset++ 107 | r.End.Col++ 108 | } 109 | } 110 | return r 111 | } 112 | 113 | func FormatLines(data []byte, lineno, indent int) string { 114 | var b strings.Builder 115 | scanner := bufio.NewScanner(bytes.NewReader(data)) 116 | for scanner.Scan() { 117 | num := strconv.Itoa(lineno) 118 | b.WriteString(strings.Repeat(" ", indent-len(num))) 119 | b.WriteString(num) 120 | b.WriteString("| ") 121 | b.Write(scanner.Bytes()) 122 | b.WriteByte('\n') 123 | lineno++ 124 | } 125 | if scanner.Err() != nil { 126 | panic("unreachable") 127 | } 128 | return b.String() 129 | } 130 | -------------------------------------------------------------------------------- /semgrep.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "io" 7 | "os" 8 | "path/filepath" 9 | "slices" 10 | ) 11 | 12 | type Pos struct { 13 | Col int `json:"col"` 14 | Line int `json:"line"` 15 | Offset int `json:"offset"` 16 | } 17 | 18 | type Result struct { 19 | CheckID string `json:"check_id"` 20 | End Pos `json:"end"` 21 | Extra Extra `json:"extra"` 22 | Path string `json:"path"` 23 | Start Pos `json:"start"` 24 | } 25 | 26 | type Extra struct { 27 | EngineKind string `json:"engine_kind"` 28 | Fingerprint string `json:"fingerprint"` 29 | IsIgnored bool `json:"is_ignored"` 30 | Lines string `json:"lines"` 31 | Message string `json:"message"` 32 | Metadata map[string]any `json:"metadata"` 33 | Severity string `json:"severity"` 34 | } 35 | 36 | type Paths struct { 37 | Comment string `json:"_comment"` 38 | Scanned []string `json:"scanned"` 39 | } 40 | 41 | type Output struct { 42 | Errors []any `json:"errors"` 43 | Paths Paths `json:"paths"` 44 | Results []Result `json:"results"` 45 | Version string `json:"version"` 46 | } 47 | 48 | func ReadOutput(r io.Reader) (*Output, error) { 49 | var output Output 50 | dec := json.NewDecoder(r) 51 | if err := dec.Decode(&output); err != nil { 52 | return nil, err 53 | } 54 | return &output, nil 55 | } 56 | 57 | func ReadOutputFile(filename string) (*Output, error) { 58 | f, err := os.Open(filename) 59 | if err != nil { 60 | return nil, err 61 | } 62 | defer f.Close() 63 | return ReadOutput(f) 64 | } 65 | 66 | type RewriteFn = func(r Result, data []byte) (Result, []byte, error) 67 | 68 | var ErrSkip = errors.New("skip") 69 | 70 | func Rewrite(data []byte, results []Result, rewrite RewriteFn) ([]byte, error) { 71 | slices.SortFunc(results, func(a, b Result) int { 72 | return b.Start.Offset - a.Start.Offset 73 | }) 74 | for _, r := range results { 75 | r, rewritten, err := rewrite(r, data) 76 | if err == ErrSkip { 77 | continue 78 | } 79 | if err != nil { 80 | return nil, err 81 | } 82 | data = slices.Replace(data, r.Start.Offset, r.End.Offset, rewritten...) 83 | } 84 | return data, nil 85 | } 86 | 87 | func RewriteAll(dir string, results []Result, rewrite RewriteFn) error { 88 | files := map[string][]Result{} 89 | for _, r := range results { 90 | files[r.Path] = append(files[r.Path], r) 91 | } 92 | for file, rr := range files { 93 | path := filepath.Join(dir, file) 94 | data, err := os.ReadFile(path) 95 | if err != nil { 96 | return err 97 | } 98 | data, err = Rewrite(data, rr, rewrite) 99 | if err != nil { 100 | return err 101 | } 102 | if err := os.WriteFile(path, data, os.ModePerm); err != nil { 103 | return err 104 | } 105 | } 106 | return nil 107 | } 108 | -------------------------------------------------------------------------------- /semgrep_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | ) 9 | 10 | func TestRewrite(t *testing.T) { 11 | good, err := os.ReadFile(filepath.FromSlash("./testdata/good.go")) 12 | if err != nil { 13 | t.Fatalf("failed to read good file: %v", err) 14 | } 15 | bad, err := os.ReadFile(filepath.FromSlash("./testdata/bad.go")) 16 | if err != nil { 17 | t.Fatalf("failed to read bad file: %v", err) 18 | } 19 | output, err := ReadOutputFile(filepath.FromSlash("./testdata/semgrep.json")) 20 | if err != nil { 21 | t.Fatalf("failed to read semgrep json: %v", err) 22 | } 23 | rewritten, err := Rewrite( 24 | bad, 25 | output.Results, 26 | func(r Result, data []byte) (Result, []byte, error) { 27 | return r, []byte("Good()"), nil 28 | }, 29 | ) 30 | if err != nil { 31 | t.Fatalf("failed to rewrite: %v", err) 32 | } 33 | if !bytes.Equal(rewritten, good) { 34 | t.Fatalf("rewritten file does not match good file") 35 | } 36 | } 37 | 38 | func TestExtendLines(t *testing.T) { 39 | tests := []struct { 40 | result Result 41 | data, want []byte 42 | }{ 43 | { 44 | data: []byte(""), 45 | want: []byte(""), 46 | result: Result{ 47 | Start: Pos{ 48 | Line: 1, 49 | Col: 0, 50 | Offset: 0, 51 | }, 52 | End: Pos{ 53 | Line: 1, 54 | Col: 0, 55 | Offset: 0, 56 | }, 57 | }, 58 | }, 59 | { 60 | data: []byte(" a "), 61 | want: []byte(" a "), 62 | result: Result{ 63 | Start: Pos{ 64 | Line: 1, 65 | Offset: 1, 66 | Col: 1, 67 | }, 68 | End: Pos{ 69 | Line: 1, 70 | Col: 1, 71 | Offset: 1, 72 | }, 73 | }, 74 | }, 75 | { 76 | data: []byte("foo();\nbar();\nbaz++"), 77 | want: []byte("bar();"), 78 | result: Result{ 79 | Start: Pos{ 80 | Line: 2, 81 | Col: 9, 82 | Offset: 9, 83 | }, 84 | End: Pos{ 85 | Line: 1, 86 | Col: 9, 87 | Offset: 9, 88 | }, 89 | }, 90 | }, 91 | } 92 | for _, tt := range tests { 93 | t.Run("", func(t *testing.T) { 94 | r := ExtendLines(tt.result, tt.data) 95 | got := tt.data[r.Start.Offset:r.End.Offset] 96 | if !bytes.Equal(got, tt.want) { 97 | t.Errorf("got %q, want %q", got, tt.want) 98 | } 99 | }) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /semgrepx.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icholy/semgrepx/b1c04396a796563030c21437b61882d7fd83edc7/semgrepx.gif -------------------------------------------------------------------------------- /testdata/bad.go: -------------------------------------------------------------------------------- 1 | package bad 2 | 3 | func Bad() { 4 | panic("do not call me") 5 | } 6 | 7 | func Good() { 8 | println("hello") 9 | } 10 | 11 | func main() { 12 | Bad() 13 | Good() 14 | Bad() 15 | } 16 | -------------------------------------------------------------------------------- /testdata/good.go: -------------------------------------------------------------------------------- 1 | package bad 2 | 3 | func Bad() { 4 | panic("do not call me") 5 | } 6 | 7 | func Good() { 8 | println("hello") 9 | } 10 | 11 | func main() { 12 | Good() 13 | Good() 14 | Good() 15 | } 16 | -------------------------------------------------------------------------------- /testdata/semgrep.json: -------------------------------------------------------------------------------- 1 | {"errors": [], "interfile_languages_used": [], "paths": {"scanned": ["bad.go", "good.go"]}, "results": [{"check_id": "-", "end": {"col": 7, "line": 12, "offset": 109}, "extra": {"engine_kind": "OSS", "fingerprint": "56e61778516dcdf51b5f5133a8bf01247be8e7ef5708817fdd05f009bc500f15657dd6d718676951c28a0b4ff9921e68543a8f2823b5855d4c65e93e45a526ab_0", "is_ignored": false, "lines": "\tBad()", "message": "Bad()", "metadata": {}, "metavars": {}, "severity": "ERROR", "validation_state": "NO_VALIDATOR"}, "path": "bad.go", "start": {"col": 2, "line": 12, "offset": 104}}, {"check_id": "-", "end": {"col": 7, "line": 14, "offset": 124}, "extra": {"engine_kind": "OSS", "fingerprint": "56e61778516dcdf51b5f5133a8bf01247be8e7ef5708817fdd05f009bc500f15657dd6d718676951c28a0b4ff9921e68543a8f2823b5855d4c65e93e45a526ab_1", "is_ignored": false, "lines": "\tBad()", "message": "Bad()", "metadata": {}, "metavars": {}, "severity": "ERROR", "validation_state": "NO_VALIDATOR"}, "path": "bad.go", "start": {"col": 2, "line": 14, "offset": 119}}], "skipped_rules": [], "version": "1.60.0"} 2 | --------------------------------------------------------------------------------