├── .gitignore
├── README.md
├── internal
    ├── diff
    │   ├── testdata
    │   │   ├── same.txt
    │   │   ├── eof.txt
    │   │   ├── allnew.txt
    │   │   ├── allold.txt
    │   │   ├── eof1.txt
    │   │   ├── eof2.txt
    │   │   ├── start.txt
    │   │   ├── dups.txt
    │   │   ├── end.txt
    │   │   ├── basic.txt
    │   │   ├── triv.txt
    │   │   └── long.txt
    │   ├── diff_test.go
    │   └── diff.go
    ├── commentfix
    │   ├── testdata
    │   │   ├── nop.txt
    │   │   ├── autolink.txt
    │   │   ├── order.txt
    │   │   ├── replacetext.txt
    │   │   └── replaceurl.txt
    │   ├── fix_test.go
    │   └── fix.go
    ├── github
    │   ├── testing_test.go
    │   ├── edit_test.go
    │   ├── edit.go
    │   ├── data.go
    │   └── sync_test.go
    ├── llm
    │   ├── llm_test.go
    │   ├── embed_test.go
    │   ├── embed.go
    │   └── llm.go
    ├── storage
    │   ├── vectordb_test.go
    │   ├── db_test.go
    │   ├── mem_test.go
    │   ├── vtest.go
    │   ├── vectordb.go
    │   ├── test.go
    │   ├── db.go
    │   ├── mem.go
    │   └── timed
    │   │   └── timed_test.go
    ├── keycheck
    │   └── key_test.go
    ├── secret
    │   ├── secret_test.go
    │   └── secret.go
    ├── testutil
    │   └── testutil.go
    ├── pebble
    │   ├── pebble_test.go
    │   └── pebble.go
    ├── embeddocs
    │   ├── sync.go
    │   └── sync_test.go
    ├── githubdocs
    │   ├── sync.go
    │   └── sync_test.go
    ├── gemini
    │   ├── gemini_test.go
    │   └── gemini.go
    ├── docs
    │   ├── docs_test.go
    │   └── docs.go
    ├── testdata
    │   ├── omap.httprr
    │   └── markdown3.httprr
    ├── related
    │   ├── related_test.go
    │   └── related.go
    └── httprr
    │   ├── rr_test.go
    │   └── rr.go
├── LICENSE
└── go.mod


/.gitignore:
--------------------------------------------------------------------------------
1 | *.db
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Moved to <https://golang.org/x/oscar/internal/gaby>.
2 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/same.txt:
--------------------------------------------------------------------------------
1 | -- old --
2 | hello world
3 | -- new --
4 | hello world
5 | -- diff --
6 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/eof.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | a
 3 | b
 4 | c^D
 5 | -- new --
 6 | a
 7 | b
 8 | c^D
 9 | -- diff --
10 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/allnew.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | -- new --
 3 | a
 4 | b
 5 | c
 6 | -- diff --
 7 | diff old new
 8 | --- old
 9 | +++ new
10 | @@ -0,0 +1,3 @@
11 | +a
12 | +b
13 | +c
14 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/allold.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | a
 3 | b
 4 | c
 5 | -- new --
 6 | -- diff --
 7 | diff old new
 8 | --- old
 9 | +++ new
10 | @@ -1,3 +0,0 @@
11 | -a
12 | -b
13 | -c
14 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/eof1.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | a
 3 | b
 4 | c
 5 | -- new --
 6 | a
 7 | b
 8 | c^D
 9 | -- diff --
10 | diff old new
11 | --- old
12 | +++ new
13 | @@ -1,3 +1,3 @@
14 |  a
15 |  b
16 | -c
17 | +c
18 | \ No newline at end of file
19 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/eof2.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | a
 3 | b
 4 | c^D
 5 | -- new --
 6 | a
 7 | b
 8 | c
 9 | -- diff --
10 | diff old new
11 | --- old
12 | +++ new
13 | @@ -1,3 +1,3 @@
14 |  a
15 |  b
16 | -c
17 | \ No newline at end of file
18 | +c
19 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/start.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | e
 3 | pi
 4 | 4
 5 | 5
 6 | 6
 7 | 7
 8 | 8
 9 | 9
10 | 10
11 | -- new --
12 | 1
13 | 2
14 | 3
15 | 4
16 | 5
17 | 6
18 | 7
19 | 8
20 | 9
21 | 10
22 | -- diff --
23 | diff old new
24 | --- old
25 | +++ new
26 | @@ -1,5 +1,6 @@
27 | -e
28 | -pi
29 | +1
30 | +2
31 | +3
32 |  4
33 |  5
34 |  6
35 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/dups.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | a
 3 | 
 4 | b
 5 | 
 6 | c
 7 | 
 8 | d
 9 | 
10 | e
11 | 
12 | f
13 | -- new --
14 | a
15 | 
16 | B
17 | 
18 | C
19 | 
20 | d
21 | 
22 | e
23 | 
24 | f
25 | -- diff --
26 | diff old new
27 | --- old
28 | +++ new
29 | @@ -1,8 +1,8 @@
30 |  a
31 |  $
32 | -b
33 | -
34 | -c
35 | +B
36 | +
37 | +C
38 |  $
39 |  d
40 |  $
41 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/end.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | eight
10 | nine
11 | ten
12 | eleven
13 | -- new --
14 | 1
15 | 2
16 | 3
17 | 4
18 | 5
19 | 6
20 | 7
21 | 8
22 | 9
23 | 10
24 | -- diff --
25 | diff old new
26 | --- old
27 | +++ new
28 | @@ -5,7 +5,6 @@
29 |  5
30 |  6
31 |  7
32 | -eight
33 | -nine
34 | -ten
35 | -eleven
36 | +8
37 | +9
38 | +10
39 | 


--------------------------------------------------------------------------------
/internal/commentfix/testdata/nop.txt:
--------------------------------------------------------------------------------
 1 | {{/*
 2 |   make sure this does not loop;
 3 |   it claims to have edited (and did edit) the text,
 4 |   so the result is non-empty,
 5 |   but no actual change is made.
 6 | */}}
 7 | {{.ReplaceText `cancelled` "canceled"}}
 8 | {{.ReplaceText `canceled` "cancelled"}}
 9 | -- 1.in --
10 | The context is cancelled.
11 | -- 1.out --
12 | The context is cancelled.
13 | 


--------------------------------------------------------------------------------
/internal/commentfix/testdata/autolink.txt:
--------------------------------------------------------------------------------
 1 | {{.AutoLink `\bCL (\d+)\b` "https://go.dev/cl/$1"}}
 2 | -- 1.in --
 3 | This is in CL 12345.
 4 | -- 1.out --
 5 | This is in [CL 12345](https://go.dev/cl/12345).
 6 | -- 2.in --
 7 | This is in **CL 12345**.
 8 | -- 2.out --
 9 | This is in **[CL 12345](https://go.dev/cl/12345)**.
10 | -- 3.in --
11 | This is in [the CL 12345 page](https://go.dev/cl/12345).
12 | -- 3.out --
13 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/basic.txt:
--------------------------------------------------------------------------------
 1 | Example from Hunt and McIlroy, “An Algorithm for Differential File Comparison.”
 2 | https://www.cs.dartmouth.edu/~doug/diff.pdf
 3 | 
 4 | -- old --
 5 | a
 6 | b
 7 | c
 8 | d
 9 | e
10 | f
11 | g
12 | -- new --
13 | w
14 | a
15 | b
16 | x
17 | y
18 | z
19 | e
20 | -- diff --
21 | diff old new
22 | --- old
23 | +++ new
24 | @@ -1,7 +1,7 @@
25 | +w
26 |  a
27 |  b
28 | -c
29 | -d
30 | +x
31 | +y
32 | +z
33 |  e
34 | -f
35 | -g
36 | 


--------------------------------------------------------------------------------
/internal/commentfix/testdata/order.txt:
--------------------------------------------------------------------------------
 1 | {{/*
 2 |   rules apply in order.
 3 |   make sure this does not loop;
 4 |   it claims to have edited (and did edit) the text,
 5 |   so the result is non-empty,
 6 |   but no actual change is made.
 7 | */}}
 8 | {{.ReplaceText `cancelled` "canceled"}}
 9 | {{.ReplaceText `canceled` "cancelled"}}
10 | -- 1.in --
11 | The context is cancelled.
12 | -- 1.out --
13 | The context is cancelled.
14 | -- 2.in --
15 | The context is canceled.
16 | -- 2.out --
17 | The context is cancelled.
18 | 


--------------------------------------------------------------------------------
/internal/github/testing_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package github
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	"rsc.io/gaby/internal/storage"
11 | 	"rsc.io/gaby/internal/testutil"
12 | )
13 | 
14 | func TestLoadTxtar(t *testing.T) {
15 | 	gh := New(testutil.Slogger(t), storage.MemDB(), nil, nil)
16 | 	testutil.Check(t, gh.Testing().LoadTxtar("../testdata/rsctmp.txt"))
17 | }
18 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/triv.txt:
--------------------------------------------------------------------------------
 1 | Another example from Hunt and McIlroy,
 2 | “An Algorithm for Differential File Comparison.”
 3 | https://www.cs.dartmouth.edu/~doug/diff.pdf
 4 | 
 5 | Anchored diff gives up on finding anything,
 6 | since there are no unique lines.
 7 | 
 8 | -- old --
 9 | a
10 | b
11 | c
12 | a
13 | b
14 | b
15 | a
16 | -- new --
17 | c
18 | a
19 | b
20 | a
21 | b
22 | c
23 | -- diff --
24 | diff old new
25 | --- old
26 | +++ new
27 | @@ -1,7 +1,6 @@
28 | -a
29 | -b
30 | -c
31 | -a
32 | -b
33 | -b
34 | -a
35 | +c
36 | +a
37 | +b
38 | +a
39 | +b
40 | +c
41 | 


--------------------------------------------------------------------------------
/internal/diff/testdata/long.txt:
--------------------------------------------------------------------------------
 1 | -- old --
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | 10
12 | 11
13 | 12
14 | 13
15 | 14
16 | 14½
17 | 15
18 | 16
19 | 17
20 | 18
21 | 19
22 | 20
23 | -- new --
24 | 1
25 | 2
26 | 3
27 | 4
28 | 5
29 | 6
30 | 8
31 | 9
32 | 10
33 | 11
34 | 12
35 | 13
36 | 14
37 | 17
38 | 18
39 | 19
40 | 20
41 | -- diff --
42 | diff old new
43 | --- old
44 | +++ new
45 | @@ -4,7 +4,6 @@
46 |  4
47 |  5
48 |  6
49 | -7
50 |  8
51 |  9
52 |  10
53 | @@ -12,9 +11,6 @@
54 |  12
55 |  13
56 |  14
57 | -14½
58 | -15
59 | -16
60 |  17
61 |  18
62 |  19
63 | 


--------------------------------------------------------------------------------
/internal/llm/llm_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package llm
 6 | 
 7 | import (
 8 | 	"slices"
 9 | 	"testing"
10 | )
11 | 
12 | func TestVector(t *testing.T) {
13 | 	v1 := Vector{1, 2, 3, 4}
14 | 	v2 := Vector{-200, -3000, 0, -10000}
15 | 	dot := v1.Dot(v2)
16 | 	if dot != -46200 {
17 | 		t.Errorf("%v.Dot(%v) = %v, want -46200", v1, v2, dot)
18 | 	}
19 | 
20 | 	enc := v1.Encode()
21 | 	var v3 Vector
22 | 	v3.Decode(enc)
23 | 	if !slices.Equal(v3, v1) {
24 | 		t.Errorf("Decode(Encode(%v)) = %v, want %v", v1, v3, v1)
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/internal/storage/vectordb_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package storage
 6 | 
 7 | import "testing"
 8 | 
 9 | func TestVectorResultCompare(t *testing.T) {
10 | 	type R = VectorResult
11 | 	var tests = []struct {
12 | 		x, y VectorResult
13 | 		cmp  int
14 | 	}{
15 | 		{R{"b", 0.5}, R{"c", 0.5}, -1},
16 | 		{R{"b", 0.4}, R{"a", 0.5}, -1},
17 | 	}
18 | 
19 | 	try := func(x, y VectorResult, cmp int) {
20 | 		if c := x.cmp(y); c != cmp {
21 | 			t.Errorf("Compare(%v, %v) = %d, want %d", x, y, c, cmp)
22 | 		}
23 | 	}
24 | 	for _, tt := range tests {
25 | 		try(tt.x, tt.x, 0)
26 | 		try(tt.y, tt.y, 0)
27 | 		try(tt.x, tt.y, tt.cmp)
28 | 		try(tt.y, tt.x, -tt.cmp)
29 | 	}
30 | }
31 | 


--------------------------------------------------------------------------------
/internal/commentfix/testdata/replacetext.txt:
--------------------------------------------------------------------------------
 1 | {{.ReplaceText `cancelled` "canceled"}}
 2 | -- 1.in --
 3 | The context is cancelled.
 4 | -- 1.out --
 5 | The context is canceled.
 6 | -- 2.in --
 7 | 	fmt.Printf("cancelled\n")
 8 | -- 2.out --
 9 | -- 3.in --
10 | The context **is cancelled.**
11 | -- 3.out --
12 | The context **is canceled.**
13 | -- 4.in --
14 | The context *is cancelled.*
15 | -- 4.out --
16 | The context *is canceled.*
17 | -- 4.in --
18 | The context ~~is cancelled.~~
19 | -- 4.out --
20 | The context ~~is canceled.~~
21 | -- 5.in --
22 | # Contexts that are cancelled
23 | -- 5.out --
24 | # Contexts that are canceled
25 | -- 6.in --
26 | Here is a list of misspelled words:
27 |  - cancelled
28 | -- 6.out --
29 | Here is a list of misspelled words:
30 |  - canceled
31 | -- 7.in --
32 | > The context is cancelled.
33 | -- 7.out --
34 | > The context is canceled.
35 | 


--------------------------------------------------------------------------------
/internal/commentfix/testdata/replaceurl.txt:
--------------------------------------------------------------------------------
 1 | {{.ReplaceURL `https://golang.org/(.*)` "https://go.dev/$1#"}}
 2 | {{.ReplaceURL `(?i)https://lowercase.com/(.*)` "https://lowercase.com/$1"}}
 3 | -- 1.in --
 4 | Visit https://golang.org/doc for more docs.
 5 | -- 1.out --
 6 | Visit [https://go.dev/doc#](https://go.dev/doc#) for more docs.
 7 | -- 2.in --
 8 | Visit <https://golang.org/doc> for more docs.
 9 | -- 2.out --
10 | Visit <https://go.dev/doc#> for more docs.
11 | -- 3.in --
12 | Visit [this page](https://golang.org/doc) for more docs.
13 | -- 3.out --
14 | Visit [this page](https://go.dev/doc#) for more docs.
15 | -- 4.in --
16 | Visit [https://golang.org/doc](https://golang.org/doc) for more docs.
17 | -- 4.out --
18 | Visit [https://go.dev/doc#](https://go.dev/doc#) for more docs.
19 | -- 5.in --
20 | Visit <https://LOWERcaSE.cOM/doc> for more docs.
21 | -- 5.out --
22 | Visit <https://lowercase.com/doc> for more docs.
23 | -- 6.in --
24 | Visit <https://lowercase.com/doc> for more docs.
25 | -- 6.out --
26 | 


--------------------------------------------------------------------------------
/internal/keycheck/key_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Test that API keys do not appear in any httprr logs in this repo.
 6 | 
 7 | package keycheck
 8 | 
 9 | import (
10 | 	"bytes"
11 | 	"io/fs"
12 | 	"os"
13 | 	"path/filepath"
14 | 	"strings"
15 | 	"testing"
16 | 
17 | 	"rsc.io/gaby/internal/testutil"
18 | )
19 | 
20 | var bads = []string{
21 | 	"\nAuthorization:",
22 | 	"\nx-goog-api-key:",
23 | 	"\nX-Goog-Api-Key:",
24 | }
25 | 
26 | func TestTestdata(t *testing.T) {
27 | 	check := testutil.Checker(t)
28 | 	err := filepath.WalkDir("../..", func(file string, d fs.DirEntry, err error) error {
29 | 		if strings.HasSuffix(file, ".httprr") {
30 | 			data, err := os.ReadFile(file)
31 | 			check(err)
32 | 			for _, bad := range bads {
33 | 				if bytes.Contains(data, []byte(bad)) {
34 | 					t.Errorf("%s contains %q", file, bad)
35 | 				}
36 | 			}
37 | 		}
38 | 		return nil
39 | 	})
40 | 	check(err)
41 | }
42 | 


--------------------------------------------------------------------------------
/internal/llm/embed_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package llm
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"fmt"
10 | 	"testing"
11 | )
12 | 
13 | func TestQuote(t *testing.T) {
14 | 	docs := []EmbedDoc{{Text: "abc"}, {Text: "alphabetical order"}}
15 | 	vecs, err := QuoteEmbedder().EmbedDocs(docs)
16 | 	if err != nil {
17 | 		t.Fatal(err)
18 | 	}
19 | 	if len(vecs) != len(docs) {
20 | 		t.Fatalf("len(docs) = %v, but len(vecs) = %d", len(docs), len(vecs))
21 | 	}
22 | 	for i, v := range vecs {
23 | 		u := UnquoteVector(v)
24 | 		if u != docs[i].Text {
25 | 			var buf bytes.Buffer
26 | 			for i, f := range v {
27 | 				fmt.Fprintf(&buf, " %f", f)
28 | 				if f < 0 {
29 | 					if i < len(v)-1 {
30 | 						fmt.Fprintf(&buf, " ... %f", v[len(v)-1])
31 | 					}
32 | 					break
33 | 				}
34 | 			}
35 | 			t.Logf("Embed(%q) = %v", docs[i].Text, buf.String())
36 | 			t.Errorf("Unquote() = %q, want %q", u, docs[i].Text)
37 | 		}
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/internal/diff/diff_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2022 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package diff
 6 | 
 7 | import (
 8 | 	"bytes"
 9 | 	"path/filepath"
10 | 	"testing"
11 | 
12 | 	"golang.org/x/tools/txtar"
13 | )
14 | 
15 | func clean(text []byte) []byte {
16 | 	text = bytes.ReplaceAll(text, []byte("$\n"), []byte("\n"))
17 | 	text = bytes.TrimSuffix(text, []byte("^D\n"))
18 | 	return text
19 | }
20 | 
21 | func Test(t *testing.T) {
22 | 	files, _ := filepath.Glob("testdata/*.txt")
23 | 	if len(files) == 0 {
24 | 		t.Fatalf("no testdata")
25 | 	}
26 | 
27 | 	for _, file := range files {
28 | 		t.Run(filepath.Base(file), func(t *testing.T) {
29 | 			a, err := txtar.ParseFile(file)
30 | 			if err != nil {
31 | 				t.Fatal(err)
32 | 			}
33 | 			if len(a.Files) != 3 || a.Files[2].Name != "diff" {
34 | 				t.Fatalf("%s: want three files, third named \"diff\"", file)
35 | 			}
36 | 			diffs := Diff(a.Files[0].Name, clean(a.Files[0].Data), a.Files[1].Name, clean(a.Files[1].Data))
37 | 			want := clean(a.Files[2].Data)
38 | 			if !bytes.Equal(diffs, want) {
39 | 				t.Fatalf("%s: have:\n%s\nwant:\n%s\n%s", file,
40 | 					diffs, want, Diff("have", diffs, "want", want))
41 | 			}
42 | 		})
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/internal/storage/db_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package storage
 6 | 
 7 | import (
 8 | 	"math"
 9 | 	"testing"
10 | 
11 | 	"rsc.io/ordered"
12 | )
13 | 
14 | func TestPanic(t *testing.T) {
15 | 	func() {
16 | 		defer func() {
17 | 			r := recover()
18 | 			if r.(string) != "msg key=val" {
19 | 				t.Errorf("panic value is not msg key=val:\n%s", r)
20 | 			}
21 | 		}()
22 | 		Panic("msg", "key", "val")
23 | 		t.Fatalf("did not panic")
24 | 	}()
25 | 
26 | }
27 | 
28 | func TestJSON(t *testing.T) {
29 | 	x := map[string]string{"a": "b"}
30 | 	js := JSON(x)
31 | 	want := `{"a":"b"}`
32 | 	if string(js) != want {
33 | 		t.Errorf("JSON(%v) = %#q, want %#q", x, js, want)
34 | 	}
35 | 
36 | 	func() {
37 | 		defer func() {
38 | 			recover()
39 | 		}()
40 | 		JSON(math.NaN())
41 | 		t.Errorf("JSON(NaN) did not panic")
42 | 	}()
43 | }
44 | 
45 | var fmtTests = []struct {
46 | 	data []byte
47 | 	out  string
48 | }{
49 | 	{ordered.Encode(1, 2, 3), "(1, 2, 3)"},
50 | 	{[]byte(`"hello"`), "`\"hello\"`"},
51 | 	{[]byte("`hello`"), "\"`hello`\""},
52 | }
53 | 
54 | func TestFmt(t *testing.T) {
55 | 	for _, tt := range fmtTests {
56 | 		out := Fmt(tt.data)
57 | 		if out != tt.out {
58 | 			t.Errorf("Fmt(%q) = %q, want %q", tt.data, out, tt.out)
59 | 		}
60 | 	}
61 | }
62 | 
63 | func TestMemLocker(t *testing.T) {
64 | 	m := new(MemLocker)
65 | 
66 | 	testDBLock(t, m)
67 | }
68 | 


--------------------------------------------------------------------------------
/internal/secret/secret_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package secret
 6 | 
 7 | import (
 8 | 	"os"
 9 | 	"path/filepath"
10 | 	"testing"
11 | )
12 | 
13 | func TestNetrc(t *testing.T) {
14 | 	file := filepath.Join(t.TempDir(), "netrc")
15 | 	if err := os.WriteFile(file, []byte(testNetrc), 0666); err != nil {
16 | 		t.Fatal(err)
17 | 	}
18 | 	t.Setenv("NETRC", file)
19 | 
20 | 	db := Netrc()
21 | 	if secret, ok := db.Get("missing"); secret != "" || ok != false {
22 | 		t.Errorf("Get(missing) = %q, %v, want %q, %v", secret, ok, "", false)
23 | 	}
24 | 
25 | 	if secret, ok := db.Get("example.com"); secret != "u2:p2" || ok != true {
26 | 		t.Errorf("Get(example.com) = %q, %v, want %q, %v", secret, ok, "u2:p2", true)
27 | 	}
28 | 
29 | 	func() {
30 | 		defer func() {
31 | 			recover()
32 | 		}()
33 | 		db.Set("name", "value")
34 | 		t.Errorf("Set did not panic")
35 | 	}()
36 | }
37 | 
38 | var testNetrc = `
39 | machine example.com login u1 password p1
40 | machine missing login u password p and more
41 | machine example.com login u2 password p2
42 | `
43 | 
44 | func TestEmpty(t *testing.T) {
45 | 	db := Empty()
46 | 	if secret, ok := db.Get("missing"); secret != "" || ok != false {
47 | 		t.Errorf("Get(missing) = %q, %v, want %q, %v", secret, ok, "", false)
48 | 	}
49 | 
50 | 	func() {
51 | 		defer func() {
52 | 			recover()
53 | 		}()
54 | 		db.Set("name", "value")
55 | 		t.Errorf("Set did not panic")
56 | 	}()
57 | }
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/internal/testutil/testutil.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package testutil implements various testing utilities.
 6 | package testutil
 7 | 
 8 | import (
 9 | 	"bytes"
10 | 	"io"
11 | 	"log/slog"
12 | 	"testing"
13 | )
14 | 
15 | // LogWriter returns an [io.Writer] that handles logs
16 | // each Write using t.Log.
17 | func LogWriter(t *testing.T) io.Writer {
18 | 	return testWriter{t}
19 | }
20 | 
21 | type testWriter struct{ t *testing.T }
22 | 
23 | func (w testWriter) Write(b []byte) (int, error) {
24 | 	w.t.Logf("%s", b)
25 | 	return len(b), nil
26 | }
27 | 
28 | // Slogger returns a [*slog.Logger] that writes each message
29 | // using t.Log.
30 | func Slogger(t *testing.T) *slog.Logger {
31 | 	return slog.New(slog.NewTextHandler(LogWriter(t), nil))
32 | }
33 | 
34 | // SlogBuffer returns a [*slog.Logger] that writes each message to out.
35 | func SlogBuffer() (lg *slog.Logger, out *bytes.Buffer) {
36 | 	var buf bytes.Buffer
37 | 	lg = slog.New(slog.NewTextHandler(&buf, nil))
38 | 	return lg, &buf
39 | }
40 | 
41 | // Check calls t.Fatal(err) if err is not nil.
42 | func Check(t *testing.T, err error) {
43 | 	if err != nil {
44 | 		t.Helper()
45 | 		t.Fatal(err)
46 | 	}
47 | }
48 | 
49 | // Checker returns a check function that
50 | // calls t.Fatal if err is not nil.
51 | func Checker(t *testing.T) (check func(err error)) {
52 | 	return func(err error) {
53 | 		if err != nil {
54 | 			t.Helper()
55 | 			t.Fatal(err)
56 | 		}
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/internal/storage/mem_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package storage
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	"rsc.io/gaby/internal/testutil"
11 | )
12 | 
13 | func TestMemDB(t *testing.T) {
14 | 	TestDB(t, MemDB())
15 | }
16 | 
17 | func TestMemVectorDB(t *testing.T) {
18 | 	db := MemDB()
19 | 	TestVectorDB(t, func() VectorDB { return MemVectorDB(db, testutil.Slogger(t), "") })
20 | }
21 | 
22 | type maybeDB struct {
23 | 	DB
24 | 	maybe bool
25 | }
26 | 
27 | type maybeBatch struct {
28 | 	db *maybeDB
29 | 	Batch
30 | }
31 | 
32 | func (db *maybeDB) Batch() Batch {
33 | 	return &maybeBatch{db: db, Batch: db.DB.Batch()}
34 | }
35 | 
36 | func (b *maybeBatch) MaybeApply() bool {
37 | 	return b.db.maybe
38 | }
39 | 
40 | // Test that when db.Batch.MaybeApply returns true,
41 | // the memvector Batch MaybeApply applies the memvector ops.
42 | func TestMemVectorBatchMaybeApply(t *testing.T) {
43 | 	db := &maybeDB{DB: MemDB()}
44 | 	vdb := MemVectorDB(db, testutil.Slogger(t), "")
45 | 	b := vdb.Batch()
46 | 	b.Set("apple3", embed("apple3"))
47 | 	if _, ok := vdb.Get("apple3"); ok {
48 | 		t.Errorf("Get(apple3) succeeded before batch apply")
49 | 	}
50 | 	b.MaybeApply() // should not apply because the db Batch does not apply
51 | 	if _, ok := vdb.Get("apple3"); ok {
52 | 		t.Errorf("Get(apple3) succeeded after MaybeApply that didn't apply")
53 | 	}
54 | 	db.maybe = true
55 | 	b.MaybeApply() // now should apply
56 | 	if _, ok := vdb.Get("apple3"); !ok {
57 | 		t.Errorf("Get(apple3) failed after MaybeApply that did apply")
58 | 	}
59 | }
60 | 


--------------------------------------------------------------------------------
/internal/storage/vtest.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package storage
 6 | 
 7 | import (
 8 | 	"math"
 9 | 	"reflect"
10 | 	"slices"
11 | 	"testing"
12 | 
13 | 	"rsc.io/gaby/internal/llm"
14 | )
15 | 
16 | func TestVectorDB(t *testing.T, newdb func() VectorDB) {
17 | 	vdb := newdb()
18 | 
19 | 	vdb.Set("orange2", embed("orange2"))
20 | 	vdb.Set("orange1", embed("orange1"))
21 | 	b := vdb.Batch()
22 | 	b.Set("apple3", embed("apple3"))
23 | 	b.Set("apple4", embed("apple4"))
24 | 	b.Set("ignore", embed("bad")[:4])
25 | 	b.Apply()
26 | 
27 | 	v, ok := vdb.Get("apple3")
28 | 	if !ok || !slices.Equal(v, embed("apple3")) {
29 | 		// unreachable except bad vectordb
30 | 		t.Errorf("Get(apple3) = %v, %v, want %v, true", v, ok, embed("apple3"))
31 | 	}
32 | 
33 | 	want := []VectorResult{
34 | 		{"apple4", 0.9999961187341375},
35 | 		{"apple3", 0.9999843342970269},
36 | 		{"orange1", 0.38062230442542155},
37 | 		{"orange2", 0.3785152783773009},
38 | 	}
39 | 	have := vdb.Search(embed("apple5"), 5)
40 | 	if !reflect.DeepEqual(have, want) {
41 | 		// unreachable except bad vectordb
42 | 		t.Fatalf("Search(apple5, 5):\nhave %v\nwant %v", have, want)
43 | 	}
44 | 
45 | 	vdb.Flush()
46 | 
47 | 	vdb = newdb()
48 | 	have = vdb.Search(embed("apple5"), 3)
49 | 	want = want[:3]
50 | 	if !reflect.DeepEqual(have, want) {
51 | 		// unreachable except bad vectordb
52 | 		t.Errorf("Search(apple5, 3) in fresh database:\nhave %v\nwant %v", have, want)
53 | 	}
54 | 
55 | }
56 | 
57 | func embed(text string) llm.Vector {
58 | 	const vectorLen = 16
59 | 	v := make(llm.Vector, vectorLen)
60 | 	d := float32(0)
61 | 	for i := range len(text) {
62 | 		v[i] = float32(byte(text[i])) / 256
63 | 		d += v[i] * v[i]
64 | 	}
65 | 	if len(text) < len(v) {
66 | 		v[len(text)] = -1
67 | 		d += 1
68 | 	}
69 | 	d = float32(1 / math.Sqrt(float64(d)))
70 | 	for i, x := range v {
71 | 		v[i] = x * d
72 | 	}
73 | 	return v
74 | }
75 | 


--------------------------------------------------------------------------------
/internal/pebble/pebble_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package pebble
 6 | 
 7 | import (
 8 | 	"encoding/binary"
 9 | 	"fmt"
10 | 	"math/rand/v2"
11 | 	"testing"
12 | 
13 | 	"rsc.io/gaby/internal/storage"
14 | 	"rsc.io/gaby/internal/testutil"
15 | )
16 | 
17 | type testWriter struct{ t *testing.T }
18 | 
19 | func (w testWriter) Write(b []byte) (int, error) {
20 | 	w.t.Logf("%s", b)
21 | 	return len(b), nil
22 | }
23 | 
24 | func TestDB(t *testing.T) {
25 | 	lg := testutil.Slogger(t)
26 | 	dir := t.TempDir()
27 | 
28 | 	db, err := Open(lg, dir+"/db1")
29 | 	if err == nil {
30 | 		t.Fatal("Open nonexistent succeeded")
31 | 	}
32 | 
33 | 	db, err = Create(lg, dir+"/db1")
34 | 	if err != nil {
35 | 		t.Fatal(err)
36 | 	}
37 | 	db.Close()
38 | 
39 | 	db, err = Create(lg, dir+"/db1")
40 | 	if err == nil {
41 | 		t.Fatal("Create already-existing succeeded")
42 | 	}
43 | 
44 | 	db, err = Open(lg, dir+"/db1")
45 | 	if err != nil {
46 | 		t.Fatal(err)
47 | 	}
48 | 	defer db.Close()
49 | 
50 | 	storage.TestDB(t, db)
51 | 
52 | 	if testing.Short() {
53 | 		return
54 | 	}
55 | 
56 | 	// Test that MaybeApply handles very large batch.
57 | 	b := db.Batch()
58 | 	val := make([]byte, 1e6)
59 | 	pcg := rand.NewPCG(1, 2)
60 | 	applied := 0
61 | 	for key := range 500 {
62 | 		for i := 0; i < len(val); i += 8 {
63 | 			binary.BigEndian.PutUint64(val[i:], pcg.Uint64())
64 | 		}
65 | 		binary.BigEndian.PutUint64(val, uint64(key))
66 | 		b.Set([]byte(fmt.Sprint(key)), val)
67 | 		if b.MaybeApply() {
68 | 			if applied++; applied == 2 {
69 | 				break
70 | 			}
71 | 		}
72 | 	}
73 | 	b.Apply()
74 | 
75 | 	for key := range 200 {
76 | 		val, ok := db.Get([]byte(fmt.Sprint(key)))
77 | 		if !ok {
78 | 			t.Fatalf("after batch, missing key %d", key)
79 | 		}
80 | 		if x := binary.BigEndian.Uint64(val); x != uint64(key) {
81 | 			t.Fatalf("Get(%d) = value for %d, want %d", key, x, key)
82 | 		}
83 | 	}
84 | }
85 | 


--------------------------------------------------------------------------------
/internal/llm/embed.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package llm
 6 | 
 7 | import "math"
 8 | 
 9 | const quoteLen = 123
10 | 
11 | // QuoteEmbedder returns an implementation
12 | // of Embedder that can be useful for testing but
13 | // is completely pointless for real use.
14 | // It encodes up to the first 122 bytes of each document
15 | // directly into the first 122 elements of a 123-element unit vector.
16 | func QuoteEmbedder() Embedder {
17 | 	return quoter{}
18 | }
19 | 
20 | // quote quotes text into a vector.
21 | // The text ends at the first negative entry in the vector.
22 | // The final entry of the vector is hard-coded to -1
23 | // before normalization, so that the final entry of a
24 | // normalized vector lets us know scaling to reverse
25 | // to obtain the original bytes.
26 | func quote(text string) Vector {
27 | 	v := make(Vector, quoteLen)
28 | 	var d float64
29 | 	for i := range len(text) {
30 | 		if i >= len(v)-1 {
31 | 			break
32 | 		}
33 | 		v[i] = float32(byte(text[i])) / 256
34 | 		d += float64(v[i]) * float64(v[i])
35 | 	}
36 | 	if len(text)+1 < len(v) {
37 | 		v[len(text)] = -1
38 | 		d += 1
39 | 	}
40 | 	v[len(v)-1] = -1
41 | 	d += 1
42 | 
43 | 	d = 1 / math.Sqrt(d)
44 | 	for i := range v {
45 | 		v[i] *= float32(d)
46 | 	}
47 | 	return v
48 | }
49 | 
50 | // quoter is a quoting Embedder, returned by QuoteEmbedder
51 | type quoter struct{}
52 | 
53 | // EmbedDocs implements Embedder by quoting.
54 | func (quoter) EmbedDocs(docs []EmbedDoc) ([]Vector, error) {
55 | 	var vecs []Vector
56 | 	for _, d := range docs {
57 | 		vecs = append(vecs, quote(d.Text))
58 | 	}
59 | 	return vecs, nil
60 | }
61 | 
62 | // UnquoteVector recovers the original text prefix
63 | // passed to a [QuoteEmbedder]'s EmbedDocs method.
64 | // Like QuoteEmbedder, UnquoteVector is only useful in tests.
65 | func UnquoteVector(v Vector) string {
66 | 	if len(v) != quoteLen {
67 | 		panic("UnquoteVector of non-quotation vector")
68 | 	}
69 | 	d := -1 / v[len(v)-1]
70 | 	var b []byte
71 | 	for _, f := range v {
72 | 		if f < 0 {
73 | 			break
74 | 		}
75 | 		b = append(b, byte(256*f*d+0.5))
76 | 	}
77 | 	return string(b)
78 | }
79 | 


--------------------------------------------------------------------------------
/internal/embeddocs/sync.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package embeddocs implements embedding text docs into a vector database.
 6 | package embeddocs
 7 | 
 8 | import (
 9 | 	"log/slog"
10 | 
11 | 	"rsc.io/gaby/internal/docs"
12 | 	"rsc.io/gaby/internal/llm"
13 | 	"rsc.io/gaby/internal/storage"
14 | 	"rsc.io/gaby/internal/storage/timed"
15 | )
16 | 
17 | // Sync reads new documents from dc, embeds them using embed,
18 | // and then writes the (docid, vector) pairs to vdb.
19 | //
20 | // Sync uses [docs.DocWatcher] with the the name “embeddocs” to
21 | // save its position across multiple calls.
22 | //
23 | // Sync logs status and unexpected problems to lg.
24 | func Sync(lg *slog.Logger, vdb storage.VectorDB, embed llm.Embedder, dc *docs.Corpus) {
25 | 	lg.Info("embeddocs sync")
26 | 
27 | 	const batchSize = 100
28 | 	var (
29 | 		batch     []llm.EmbedDoc
30 | 		ids       []string
31 | 		batchLast timed.DBTime
32 | 	)
33 | 	w := dc.DocWatcher("embeddocs")
34 | 
35 | 	flush := func() bool {
36 | 		vecs, err := embed.EmbedDocs(batch)
37 | 		if len(vecs) > len(ids) {
38 | 			lg.Error("embeddocs length mismatch", "batch", len(batch), "vecs", len(vecs), "ids", len(ids))
39 | 			return false
40 | 		}
41 | 		for i, v := range vecs {
42 | 			vdb.Set(ids[i], v)
43 | 		}
44 | 		if err != nil {
45 | 			lg.Error("embeddocs EmbedDocs error", "err", err)
46 | 			return false
47 | 		}
48 | 		if len(vecs) != len(ids) {
49 | 			lg.Error("embeddocs length mismatch", "batch", len(batch), "vecs", len(vecs), "ids", len(ids))
50 | 			return false
51 | 		}
52 | 		vdb.Flush() // todo vdb
53 | 		w.MarkOld(batchLast)
54 | 		w.Flush()
55 | 		batch = nil
56 | 		ids = nil
57 | 		return true
58 | 	}
59 | 
60 | 	for d := range w.Recent() {
61 | 		lg.Debug("embeddocs sync start", "doc", d.ID)
62 | 		batch = append(batch, llm.EmbedDoc{Title: d.Title, Text: d.Text})
63 | 		ids = append(ids, d.ID)
64 | 		batchLast = d.DBTime
65 | 		if len(batch) >= batchSize {
66 | 			if !flush() {
67 | 				break
68 | 			}
69 | 		}
70 | 	}
71 | 	if len(batch) > 0 {
72 | 		// More to flush, but flush uses w.MarkOld,
73 | 		// which has to be called during an iteration over w.Recent.
74 | 		// Start a new iteration just to call flush and then break out.
75 | 		for _ = range w.Recent() {
76 | 			if !flush() {
77 | 				return
78 | 			}
79 | 			break
80 | 		}
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/internal/githubdocs/sync.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package githubdocs implements converting GitHub issues into text docs
 6 | // for [rsc.io/gaby/internal/docs].
 7 | package githubdocs
 8 | 
 9 | import (
10 | 	"fmt"
11 | 	"log/slog"
12 | 
13 | 	"rsc.io/gaby/internal/docs"
14 | 	"rsc.io/gaby/internal/github"
15 | )
16 | 
17 | // Sync writes to dc docs corresponding to each issue in gh that is
18 | // new since the last call to Sync.
19 | //
20 | // If an issue is edited on GitHub, it will appear new in gh and
21 | // the new text will be written to dc, replacing the old issue text.
22 | // Only the issue body (what looks like the top comment in the UI)
23 | // is saved as a document.
24 | // The document ID for each issue is its GitHub URL: "https://github.com/<org>/<repo>/issues/<n>".
25 | func Sync(lg *slog.Logger, dc *docs.Corpus, gh *github.Client) {
26 | 	w := gh.EventWatcher("githubdocs")
27 | 	for e := range w.Recent() {
28 | 		if e.API != "/issues" {
29 | 			continue
30 | 		}
31 | 		lg.Debug("githubdocs sync", "issue", e.Issue, "dbtime", e.DBTime)
32 | 		issue := e.Typed.(*github.Issue)
33 | 		title := cleanTitle(issue.Title)
34 | 		text := cleanBody(issue.Body)
35 | 		dc.Add(fmt.Sprintf("https://github.com/%s/issues/%d", e.Project, e.Issue), title, text)
36 | 		w.MarkOld(e.DBTime)
37 | 	}
38 | }
39 | 
40 | // Restart causes the next call to Sync to behave as if
41 | // it has never sync'ed any issues before.
42 | // The result is that all issues will be reconverted to doc form
43 | // and re-added.
44 | // Docs that have not changed since the last addition to the corpus
45 | // will appear unmodified; others will be marked new in the corpus.
46 | func Restart(lg *slog.Logger, gh *github.Client) {
47 | 	gh.EventWatcher("githubdocs").Restart()
48 | }
49 | 
50 | // cleanTitle should clean the title for indexing.
51 | // For now we assume the LLM is good enough at Markdown not to bother.
52 | func cleanTitle(title string) string {
53 | 	// TODO
54 | 	return title
55 | }
56 | 
57 | // cleanBody should clean the body for indexing.
58 | // For now we assume the LLM is good enough at Markdown not to bother.
59 | // In the future we may want to make various changes like inlining
60 | // the programs associated with playground URLs,
61 | // and we may also want to remove any HTML tags from the Markdown.
62 | func cleanBody(body string) string {
63 | 	// TODO
64 | 	return body
65 | }
66 | 


--------------------------------------------------------------------------------
/internal/secret/secret.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package secret defines an interface to a database storing secrets, such as passwords and API keys.
 6 | //
 7 | // TODO(rsc): Consider adding a basic key: value text file format besides netrc.
 8 | package secret
 9 | 
10 | import (
11 | 	"os"
12 | 	"path/filepath"
13 | 	"strings"
14 | )
15 | 
16 | // A DB is a secret database, which is a persistent map from names to secret values.
17 | type DB interface {
18 | 	Get(name string) (secret string, ok bool)
19 | 	Set(name, secret string)
20 | }
21 | 
22 | // Empty returns a read-only, empty secret database.
23 | func Empty() DB {
24 | 	return ReadOnlyMap(nil)
25 | }
26 | 
27 | // A Map is a read-write, in-memory [DB].
28 | type Map map[string]string
29 | 
30 | // Get returns the named secret.
31 | func (m Map) Get(name string) (secret string, ok bool) {
32 | 	secret, ok = m[name]
33 | 	return
34 | }
35 | 
36 | // Set adds a secret with the given name.
37 | func (m Map) Set(name, secret string) {
38 | 	m[name] = secret
39 | }
40 | 
41 | // A ReadOnlyMap is a read-only [DB]. Calling [Set] panics.
42 | type ReadOnlyMap map[string]string
43 | 
44 | // Get returns the named secret.
45 | func (m ReadOnlyMap) Get(name string) (secret string, ok bool) {
46 | 	secret, ok = m[name]
47 | 	return
48 | }
49 | 
50 | // Set panics.
51 | func (m ReadOnlyMap) Set(name, secret string) {
52 | 	panic("read-only secrets")
53 | }
54 | 
55 | // Netrc returns a read-only secret database initialized by the content of $HOME/.netrc, if it exists.
56 | // A line in .netrc of the form
57 | //
58 | //	machine name login user password pass
59 | //
60 | // causes Get("name") to return "user:pass".
61 | // Lines later in .netrc take priority over lines earlier in .netrc.
62 | //
63 | // If the environment $NETRC is set and non-empty, the file it names is used
64 | // instead of $HOME/.netrc.
65 | func Netrc() ReadOnlyMap {
66 | 	file := filepath.Join(os.Getenv("HOME"), ".netrc")
67 | 	if env := os.Getenv("NETRC"); env != "" {
68 | 		file = env
69 | 	}
70 | 	return openNetrc(file)
71 | }
72 | 
73 | func openNetrc(file string) ReadOnlyMap {
74 | 	m := make(ReadOnlyMap)
75 | 	if data, err := os.ReadFile(file); err == nil {
76 | 		for _, line := range strings.Split(string(data), "\n") {
77 | 			f := strings.Fields(line)
78 | 			if len(f) == 6 && f[0] == "machine" && f[2] == "login" && f[4] == "password" {
79 | 				m[f[1]] = f[3] + ":" + f[5]
80 | 			}
81 | 		}
82 | 	}
83 | 	return m
84 | }
85 | 


--------------------------------------------------------------------------------
/internal/llm/llm.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | // Package llm defines interfaces implemented by LLMs (or LLM-related services).
 6 | package llm
 7 | 
 8 | import (
 9 | 	"encoding/binary"
10 | 	"math"
11 | )
12 | 
13 | // An Embedder computes vector embeddings of a list of documents.
14 | //
15 | // EmbedDocs accepts an arbitrary number of documents and returns
16 | // their embeddings. If the underlying implementation has a limit on
17 | // the batch size, it should make multiple requests in order to process
18 | // all the documents. If an error occurs after some, but not all, documents
19 | // have been processed, EmbedDocs can return an error along with a
20 | // shortened vector slice giving the vectors for a prefix of the document slice.
21 | //
22 | // See [QuoteEmbedder] for a semantically useless embedder that
23 | // can nonetheless be helpful when writing tests,
24 | // and see [rsc.io/gaby/internal/gemini] for a real implementation.
25 | type Embedder interface {
26 | 	EmbedDocs(docs []EmbedDoc) ([]Vector, error)
27 | }
28 | 
29 | // An EmbedDoc is a single document to be embedded.
30 | type EmbedDoc struct {
31 | 	Title string // title of document
32 | 	Text  string // text of document
33 | }
34 | 
35 | // A Vector is an embedding vector, typically a high-dimensional unit vector.
36 | type Vector []float32
37 | 
38 | // Dot returns the dot product of v and w.
39 | //
40 | // TODO(rsc): Using a float64 for the result is slightly higher
41 | // precision and may be worth doing in the intermediate calculation
42 | // but may not be worth the type conversions involved to return a float64.
43 | // Perhaps the return type should still be float32 even if the math is float64.
44 | func (v Vector) Dot(w Vector) float64 {
45 | 	v = v[:min(len(v), len(w))]
46 | 	w = w[:len(v)] // make "i in range for v" imply "i in range for w" to remove bounds check in loop
47 | 	t := float64(0)
48 | 	for i := range v {
49 | 		t += float64(v[i]) * float64(w[i])
50 | 	}
51 | 	return t
52 | }
53 | 
54 | // Encode returns a byte encoding of the vector v,
55 | // suitable for storing in a database.
56 | func (v Vector) Encode() []byte {
57 | 	val := make([]byte, 4*len(v))
58 | 	for i, f := range v {
59 | 		binary.BigEndian.PutUint32(val[4*i:], math.Float32bits(f))
60 | 	}
61 | 	return val
62 | }
63 | 
64 | // Decode decodes the byte encoding enc into the vector v.
65 | // Enc should be a multiple of 4 bytes; any trailing bytes are ignored.
66 | func (v *Vector) Decode(enc []byte) {
67 | 	if len(*v) < len(enc)/4 {
68 | 		*v = make(Vector, len(enc)/4)
69 | 	}
70 | 	*v = (*v)[:0]
71 | 	for ; len(enc) >= 4; enc = enc[4:] {
72 | 		*v = append(*v, math.Float32frombits(binary.BigEndian.Uint32(enc)))
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/internal/storage/vectordb.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package storage
 6 | 
 7 | import (
 8 | 	"cmp"
 9 | 
10 | 	"rsc.io/gaby/internal/llm"
11 | )
12 | 
13 | // A VectorDB is a vector database that implements
14 | // nearest-neighbor search over embedding vectors
15 | // corresponding to documents.
16 | type VectorDB interface {
17 | 	// Set sets the vector associated with the given document ID to vec.
18 | 	Set(id string, vec llm.Vector)
19 | 
20 | 	// TODO: Add Delete.
21 | 
22 | 	// Get gets the vector associated with the given document ID.
23 | 	// If no such document exists, Get returns nil, false.
24 | 	// If a document exists, Get returns vec, true.
25 | 	Get(id string) (llm.Vector, bool)
26 | 
27 | 	// Batch returns a new [VectorBatch] that accumulates
28 | 	// vector database mutations to apply in an atomic operation.
29 | 	// It is more efficient than repeated calls to Set.
30 | 	Batch() VectorBatch
31 | 
32 | 	// Search searches the database for the n vectors
33 | 	// most similar to vec, returning the document IDs
34 | 	// and similarity scores.
35 | 	Search(vec llm.Vector, n int) []VectorResult
36 | 
37 | 	// Flush flushes storage to disk.
38 | 	Flush()
39 | }
40 | 
41 | // A VectorBatch accumulates vector database mutations
42 | // that are applied to a [VectorDB] in a single atomic operation.
43 | // Applying bulk operations in a batch is also more efficient than
44 | // making individual [VectorDB] method calls.
45 | // The batched operations apply in the order they are made.
46 | type VectorBatch interface {
47 | 	// Set sets the vector associated with the given document ID to vec.
48 | 	Set(id string, vec llm.Vector)
49 | 
50 | 	// TODO: Add Delete.
51 | 
52 | 	// MaybeApply calls Apply if the VectorBatch is getting close to full.
53 | 	// Every VectorBatch has a limit to how many operations can be batched,
54 | 	// so in a bulk operation where atomicity of the entire batch is not a concern,
55 | 	// calling MaybeApply gives the VectorBatch implementation
56 | 	// permission to flush the batch at specific “safe points”.
57 | 	// A typical limit for a batch is about 100MB worth of logged operations.
58 | 	//
59 | 	// MaybeApply reports whether it called Apply.
60 | 	MaybeApply() bool
61 | 
62 | 	// Apply applies all the batched operations to the underlying VectorDB
63 | 	// as a single atomic unit.
64 | 	// When Apply returns, the VectorBatch is an empty batch ready for
65 | 	// more operations.
66 | 	Apply()
67 | }
68 | 
69 | // A VectorResult is a single document returned by a VectorDB search.
70 | type VectorResult struct {
71 | 	ID    string  // document ID
72 | 	Score float64 // similarity score in range [0, 1]; 1 is exact match
73 | }
74 | 
75 | func (x VectorResult) cmp(y VectorResult) int {
76 | 	if x.Score != y.Score {
77 | 		return cmp.Compare(x.Score, y.Score)
78 | 	}
79 | 	return cmp.Compare(x.ID, y.ID)
80 | }
81 | 


--------------------------------------------------------------------------------
/internal/gemini/gemini_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package gemini
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"net/http"
 11 | 	"os"
 12 | 	"strings"
 13 | 	"testing"
 14 | 
 15 | 	"rsc.io/gaby/internal/httprr"
 16 | 	"rsc.io/gaby/internal/llm"
 17 | 	"rsc.io/gaby/internal/secret"
 18 | 	"rsc.io/gaby/internal/testutil"
 19 | )
 20 | 
 21 | var docs = []llm.EmbedDoc{
 22 | 	{Text: "for loops"},
 23 | 	{Text: "for all time, always"},
 24 | 	{Text: "break statements"},
 25 | 	{Text: "breakdancing"},
 26 | 	{Text: "forever could never be long enough for me"},
 27 | 	{Text: "the macarena"},
 28 | }
 29 | 
 30 | var matches = map[string]string{
 31 | 	"for loops":            "break statements",
 32 | 	"for all time, always": "forever could never be long enough for me",
 33 | 	"breakdancing":         "the macarena",
 34 | }
 35 | 
 36 | func init() {
 37 | 	for k, v := range matches {
 38 | 		matches[v] = k
 39 | 	}
 40 | }
 41 | 
 42 | func newTestClient(t *testing.T, rrfile string) *Client {
 43 | 	check := testutil.Checker(t)
 44 | 	lg := testutil.Slogger(t)
 45 | 
 46 | 	rr, err := httprr.Open(rrfile, http.DefaultTransport)
 47 | 	check(err)
 48 | 	rr.Scrub(Scrub)
 49 | 	sdb := secret.Netrc()
 50 | 
 51 | 	c, err := NewClient(lg, sdb, rr.Client())
 52 | 	check(err)
 53 | 
 54 | 	return c
 55 | }
 56 | 
 57 | func TestEmbedBatch(t *testing.T) {
 58 | 	check := testutil.Checker(t)
 59 | 	c := newTestClient(t, "testdata/embedbatch.httprr")
 60 | 	vecs, err := c.EmbedDocs(docs)
 61 | 	check(err)
 62 | 	if len(vecs) != len(docs) {
 63 | 		t.Fatalf("len(vecs) = %d, but len(docs) = %d", len(vecs), len(docs))
 64 | 	}
 65 | 
 66 | 	var buf bytes.Buffer
 67 | 	for i := range docs {
 68 | 		for j := range docs {
 69 | 			fmt.Fprintf(&buf, " %.4f", vecs[i].Dot(vecs[j]))
 70 | 		}
 71 | 		fmt.Fprintf(&buf, "\n")
 72 | 	}
 73 | 
 74 | 	for i, d := range docs {
 75 | 		best := ""
 76 | 		bestDot := 0.0
 77 | 		for j := range docs {
 78 | 			if dot := vecs[i].Dot(vecs[j]); i != j && dot > bestDot {
 79 | 				best, bestDot = docs[j].Text, dot
 80 | 			}
 81 | 		}
 82 | 		if best != matches[d.Text] {
 83 | 			if buf.Len() > 0 {
 84 | 				t.Errorf("dot matrix:\n%s", buf.String())
 85 | 				buf.Reset()
 86 | 			}
 87 | 			t.Errorf("%q: best=%q, want %q", d.Text, best, matches[d.Text])
 88 | 		}
 89 | 	}
 90 | }
 91 | 
 92 | func TestBigBatch(t *testing.T) {
 93 | 	check := testutil.Checker(t)
 94 | 	c := newTestClient(t, "testdata/bigbatch.httprr")
 95 | 	var docs []llm.EmbedDoc
 96 | 	data, err := os.ReadFile("/usr/local/plan9/lib/words")
 97 | 	check(err)
 98 | 	for _, w := range strings.Fields(string(data)) {
 99 | 		docs = append(docs, llm.EmbedDoc{Text: w})
100 | 	}
101 | 	docs = docs[:251]
102 | 	vecs, err := c.EmbedDocs(docs)
103 | 	check(err)
104 | 	if len(vecs) != len(docs) {
105 | 		t.Fatalf("len(vecs) = %d, but len(docs) = %d", len(vecs), len(docs))
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/internal/githubdocs/sync_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The Go Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style
 3 | // license that can be found in the LICENSE file.
 4 | 
 5 | package githubdocs
 6 | 
 7 | import (
 8 | 	"testing"
 9 | 
10 | 	"rsc.io/gaby/internal/docs"
11 | 	"rsc.io/gaby/internal/github"
12 | 	"rsc.io/gaby/internal/storage"
13 | 	"rsc.io/gaby/internal/testutil"
14 | )
15 | 
16 | func TestMarkdown(t *testing.T) {
17 | 	check := testutil.Checker(t)
18 | 	lg := testutil.Slogger(t)
19 | 	db := storage.MemDB()
20 | 	gh := github.New(lg, db, nil, nil)
21 | 	check(gh.Testing().LoadTxtar("../testdata/markdown.txt"))
22 | 
23 | 	dc := docs.New(db)
24 | 	Sync(lg, dc, gh)
25 | 
26 | 	var want = []string{
27 | 		"https://github.com/rsc/markdown/issues/1",
28 | 		"https://github.com/rsc/markdown/issues/10",
29 | 		"https://github.com/rsc/markdown/issues/11",
30 | 		"https://github.com/rsc/markdown/issues/12",
31 | 		"https://github.com/rsc/markdown/issues/13",
32 | 		"https://github.com/rsc/markdown/issues/14",
33 | 		"https://github.com/rsc/markdown/issues/15",
34 | 		"https://github.com/rsc/markdown/issues/16",
35 | 		"https://github.com/rsc/markdown/issues/17",
36 | 		"https://github.com/rsc/markdown/issues/18",
37 | 		"https://github.com/rsc/markdown/issues/19",
38 | 		"https://github.com/rsc/markdown/issues/2",
39 | 		"https://github.com/rsc/markdown/issues/3",
40 | 		"https://github.com/rsc/markdown/issues/4",
41 | 		"https://github.com/rsc/markdown/issues/5",
42 | 		"https://github.com/rsc/markdown/issues/6",
43 | 		"https://github.com/rsc/markdown/issues/7",
44 | 		"https://github.com/rsc/markdown/issues/8",
45 | 		"https://github.com/rsc/markdown/issues/9",
46 | 	}
47 | 	for d := range dc.Docs("") {
48 | 		if len(want) == 0 {
49 | 			t.Fatalf("unexpected extra doc: %s", d.ID)
50 | 		}
51 | 		if d.ID != want[0] {
52 | 			t.Fatalf("doc mismatch: have %s, want %s", d.ID, want[0])
53 | 		}
54 | 		want = want[1:]
55 | 		if d.ID == md1 {
56 | 			if d.Title != md1Title {
57 | 				t.Errorf("#1 Title = %q, want %q", d.Title, md1Title)
58 | 			}
59 | 			if d.Text != md1Text {
60 | 				t.Errorf("#1 Text = %q, want %q", d.Text, md1Text)
61 | 			}
62 | 		}
63 | 	}
64 | 	if len(want) > 0 {
65 | 		t.Fatalf("missing docs: %v", want)
66 | 	}
67 | 
68 | 	dc.Add("https://github.com/rsc/markdown/issues/1", "OLD TITLE", "OLD TEXT")
69 | 	Sync(lg, dc, gh)
70 | 	d, _ := dc.Get(md1)
71 | 	if d.Title != "OLD TITLE" || d.Text != "OLD TEXT" {
72 | 		t.Errorf("Sync rewrote #1: Title=%q Text=%q, want OLD TITLE, OLD TEXT", d.Title, d.Text)
73 | 	}
74 | 
75 | 	Restart(lg, gh)
76 | 	Sync(lg, dc, gh)
77 | 	d, _ = dc.Get(md1)
78 | 	if d.Title == "OLD TITLE" || d.Text == "OLD TEXT" {
79 | 		t.Errorf("Restart+Sync did not rewrite #1: Title=%q Text=%q", d.Title, d.Text)
80 | 	}
81 | }
82 | 
83 | var (
84 | 	md1      = "https://github.com/rsc/markdown/issues/1"
85 | 	md1Title = "Support Github Emojis"
86 | 	md1Text  = "This is an issue for supporting github emojis, such as `:smile:` for \n😄 . There's a github page that gives a mapping of emojis to image \nfile names that we can parse the hex representation out of here: \nhttps://api.github.com/emojis.\n"
87 | )
88 | 


--------------------------------------------------------------------------------
/internal/docs/docs_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package docs
  6 | 
  7 | import (
  8 | 	"slices"
  9 | 	"strings"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/gaby/internal/storage"
 13 | )
 14 | 
 15 | func TestCorpus(t *testing.T) {
 16 | 	db := storage.MemDB()
 17 | 
 18 | 	corpus := New(db)
 19 | 	corpus.Add("id1", "Title1", "text1")
 20 | 	corpus.Add("id3", "Title3", "text3")
 21 | 	corpus.Add("id2", "Title2", "text2")
 22 | 
 23 | 	extra := make(map[string]string)
 24 | 	var ids []string
 25 | 	do := func(d *Doc) {
 26 | 		t.Helper()
 27 | 		if !strings.HasPrefix(d.ID, "id") {
 28 | 			t.Fatalf("invalid prefix %q", d.ID)
 29 | 		}
 30 | 		n := d.ID[len("id"):]
 31 | 		title := "Title" + n + extra[d.ID]
 32 | 		text := "text" + n + extra[d.ID]
 33 | 		if d.Title != title || d.Text != text {
 34 | 			t.Fatalf("Doc id=%s has Title=%q, Text=%q, want %q, %q", d.ID, d.Title, d.Text, title, text)
 35 | 		}
 36 | 		ids = append(ids, d.ID)
 37 | 	}
 38 | 
 39 | 	// Basic iteration.
 40 | 	for d := range corpus.Docs("") {
 41 | 		do(d)
 42 | 	}
 43 | 	want := []string{"id1", "id2", "id3"}
 44 | 	if !slices.Equal(ids, want) {
 45 | 		t.Errorf("Docs() = %v, want %v", ids, want)
 46 | 	}
 47 | 
 48 | 	// Break during iteration.
 49 | 	ids = nil
 50 | 	for d := range corpus.Docs("") {
 51 | 		do(d)
 52 | 		if d.ID == "id2" {
 53 | 			break
 54 | 		}
 55 | 	}
 56 | 	want = []string{"id1", "id2"}
 57 | 	if !slices.Equal(ids, want) {
 58 | 		t.Errorf("Docs with break = %v, want %v", ids, want)
 59 | 	}
 60 | 
 61 | 	// DocsAfter iteration uses insert order.
 62 | 	var last *Doc
 63 | 	ids = nil
 64 | 	for d := range corpus.DocsAfter(0, "") {
 65 | 		do(d)
 66 | 		last = d
 67 | 	}
 68 | 	want = []string{"id1", "id3", "id2"}
 69 | 	if !slices.Equal(ids, want) {
 70 | 		t.Errorf("Docs() = %v, want %v", ids, want)
 71 | 	}
 72 | 
 73 | 	// DocsAfter incremental iteration.
 74 | 	corpus.Add("id4", "Title4", "text4")
 75 | 	extra["id2"] = "X"
 76 | 	corpus.Add("id2", "Title2X", "text2X") // edits existing text
 77 | 	corpus.Add("id3", "Title3", "text3")   // no-op, ignored
 78 | 	ids = nil
 79 | 	for d := range corpus.DocsAfter(last.DBTime, "") {
 80 | 		do(d)
 81 | 	}
 82 | 	want = []string{"id4", "id2"}
 83 | 	if !slices.Equal(ids, want) {
 84 | 		t.Errorf("DocsAfter(last.DBTime=%d) = %v, want %v", last.DBTime, ids, want)
 85 | 	}
 86 | 
 87 | 	// DocsAfter with break.
 88 | 	ids = nil
 89 | 	for d := range corpus.DocsAfter(last.DBTime, "") {
 90 | 		do(d)
 91 | 		break
 92 | 	}
 93 | 	want = []string{"id4"}
 94 | 	if !slices.Equal(ids, want) {
 95 | 		t.Errorf("DocsAfter(last.DBTime=%d) with break = %v, want %v", last.DBTime, ids, want)
 96 | 	}
 97 | 
 98 | 	// Docs with prefix.
 99 | 	corpus.Add("id11", "Title11", "text11")
100 | 	ids = nil
101 | 	for d := range corpus.Docs("id1") {
102 | 		do(d)
103 | 	}
104 | 	want = []string{"id1", "id11"}
105 | 	if !slices.Equal(ids, want) {
106 | 		t.Errorf("Docs(id1) = %v, want %v", ids, want)
107 | 	}
108 | 
109 | 	// DocsAfter with prefix.
110 | 	ids = nil
111 | 	for d := range corpus.DocsAfter(0, "id1") {
112 | 		do(d)
113 | 	}
114 | 	want = []string{"id1", "id11"}
115 | 	if !slices.Equal(ids, want) {
116 | 		t.Errorf("DocsAfter(0, id1) = %v, want %v", ids, want)
117 | 	}
118 | }
119 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module rsc.io/gaby
 2 | 
 3 | go 1.23rc1
 4 | 
 5 | require (
 6 | 	cloud.google.com/go/firestore v1.15.0
 7 | 	github.com/cockroachdb/pebble v1.1.0
 8 | 	github.com/google/generative-ai-go v0.13.0
 9 | 	golang.org/x/net v0.26.0
10 | 	golang.org/x/tools v0.22.0
11 | 	google.golang.org/api v0.178.0
12 | 	rsc.io/markdown v0.0.0-20240603215554-74725d8a840a
13 | 	rsc.io/omap v1.0.0
14 | 	rsc.io/ordered v1.1.0
15 | 	rsc.io/top v1.0.2
16 | )
17 | 
18 | require (
19 | 	cloud.google.com/go v0.113.0 // indirect
20 | 	cloud.google.com/go/ai v0.5.0 // indirect
21 | 	cloud.google.com/go/auth v0.4.0 // indirect
22 | 	cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
23 | 	cloud.google.com/go/compute/metadata v0.3.0 // indirect
24 | 	cloud.google.com/go/longrunning v0.5.7 // indirect
25 | 	github.com/DataDog/zstd v1.4.5 // indirect
26 | 	github.com/beorn7/perks v1.0.1 // indirect
27 | 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
28 | 	github.com/cockroachdb/errors v1.11.1 // indirect
29 | 	github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect
30 | 	github.com/cockroachdb/redact v1.1.5 // indirect
31 | 	github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect
32 | 	github.com/felixge/httpsnoop v1.0.4 // indirect
33 | 	github.com/getsentry/sentry-go v0.18.0 // indirect
34 | 	github.com/go-logr/logr v1.4.1 // indirect
35 | 	github.com/go-logr/stdr v1.2.2 // indirect
36 | 	github.com/gogo/protobuf v1.3.2 // indirect
37 | 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
38 | 	github.com/golang/protobuf v1.5.4 // indirect
39 | 	github.com/golang/snappy v0.0.4 // indirect
40 | 	github.com/google/s2a-go v0.1.7 // indirect
41 | 	github.com/google/uuid v1.6.0 // indirect
42 | 	github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
43 | 	github.com/googleapis/gax-go/v2 v2.12.4 // indirect
44 | 	github.com/klauspost/compress v1.15.15 // indirect
45 | 	github.com/kr/pretty v0.3.1 // indirect
46 | 	github.com/kr/text v0.2.0 // indirect
47 | 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
48 | 	github.com/pkg/errors v0.9.1 // indirect
49 | 	github.com/prometheus/client_golang v1.12.0 // indirect
50 | 	github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a // indirect
51 | 	github.com/prometheus/common v0.32.1 // indirect
52 | 	github.com/prometheus/procfs v0.7.3 // indirect
53 | 	github.com/rogpeppe/go-internal v1.9.0 // indirect
54 | 	go.opencensus.io v0.24.0 // indirect
55 | 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
56 | 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect
57 | 	go.opentelemetry.io/otel v1.26.0 // indirect
58 | 	go.opentelemetry.io/otel/metric v1.26.0 // indirect
59 | 	go.opentelemetry.io/otel/trace v1.26.0 // indirect
60 | 	golang.org/x/crypto v0.24.0 // indirect
61 | 	golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df // indirect
62 | 	golang.org/x/oauth2 v0.20.0 // indirect
63 | 	golang.org/x/sync v0.7.0 // indirect
64 | 	golang.org/x/sys v0.21.0 // indirect
65 | 	golang.org/x/text v0.16.0 // indirect
66 | 	golang.org/x/time v0.5.0 // indirect
67 | 	google.golang.org/genproto v0.0.0-20240401170217-c3f982113cda // indirect
68 | 	google.golang.org/genproto/googleapis/api v0.0.0-20240506185236-b8a5c65736ae // indirect
69 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240506185236-b8a5c65736ae // indirect
70 | 	google.golang.org/grpc v1.63.2 // indirect
71 | 	google.golang.org/protobuf v1.34.1 // indirect
72 | )
73 | 


--------------------------------------------------------------------------------
/internal/storage/test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package storage
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"slices"
 10 | 	"sync"
 11 | 	"testing"
 12 | 
 13 | 	"rsc.io/ordered"
 14 | )
 15 | 
 16 | // TestDB runs basic tests on db.
 17 | // It should be empty when TestDB is called.
 18 | func TestDB(t *testing.T, db DB) {
 19 | 	db.Set([]byte("key"), []byte("value"))
 20 | 	if val, ok := db.Get([]byte("key")); string(val) != "value" || ok != true {
 21 | 		// unreachable except for bad db
 22 | 		t.Fatalf("Get(key) = %q, %v, want %q, true", val, ok, "value")
 23 | 	}
 24 | 	if val, ok := db.Get([]byte("missing")); val != nil || ok != false {
 25 | 		// unreachable except for bad db
 26 | 		t.Fatalf("Get(missing) = %v, %v, want nil, false", val, ok)
 27 | 	}
 28 | 
 29 | 	db.Delete([]byte("key"))
 30 | 	if val, ok := db.Get([]byte("key")); val != nil || ok != false {
 31 | 		// unreachable except for bad db
 32 | 		t.Fatalf("Get(key) after delete = %v, %v, want nil, false", val, ok)
 33 | 	}
 34 | 
 35 | 	b := db.Batch()
 36 | 	for i := range 10 {
 37 | 		b.Set(ordered.Encode(i), []byte(fmt.Sprint(i)))
 38 | 		b.MaybeApply()
 39 | 	}
 40 | 	b.Apply()
 41 | 
 42 | 	collect := func(min, max, stop int) []int {
 43 | 		t.Helper()
 44 | 		var list []int
 45 | 		for key, val := range db.Scan(ordered.Encode(min), ordered.Encode(max)) {
 46 | 			var i int
 47 | 			if err := ordered.Decode(key, &i); err != nil {
 48 | 				// unreachable except for bad db
 49 | 				t.Fatalf("db.Scan malformed key %v", Fmt(key))
 50 | 			}
 51 | 			if sv, want := string(val()), fmt.Sprint(i); sv != want {
 52 | 				// unreachable except for bad db
 53 | 				t.Fatalf("db.Scan key %v val=%q, want %q", i, sv, want)
 54 | 			}
 55 | 			list = append(list, i)
 56 | 			if i == stop {
 57 | 				break
 58 | 			}
 59 | 		}
 60 | 		return list
 61 | 	}
 62 | 
 63 | 	if scan, want := collect(3, 6, -1), []int{3, 4, 5, 6}; !slices.Equal(scan, want) {
 64 | 		// unreachable except for bad db
 65 | 		t.Fatalf("Scan(3, 6) = %v, want %v", scan, want)
 66 | 	}
 67 | 
 68 | 	if scan, want := collect(3, 6, 5), []int{3, 4, 5}; !slices.Equal(scan, want) {
 69 | 		// unreachable except for bad db
 70 | 		t.Fatalf("Scan(3, 6) with break at 5 = %v, want %v", scan, want)
 71 | 	}
 72 | 
 73 | 	db.DeleteRange(ordered.Encode(4), ordered.Encode(7))
 74 | 	if scan, want := collect(-1, 11, -1), []int{0, 1, 2, 3, 8, 9}; !slices.Equal(scan, want) {
 75 | 		// unreachable except for bad db
 76 | 		t.Fatalf("Scan(-1, 11) after Delete(4, 7) = %v, want %v", scan, want)
 77 | 	}
 78 | 
 79 | 	b = db.Batch()
 80 | 	for i := range 5 {
 81 | 		b.Delete(ordered.Encode(i))
 82 | 		b.Set(ordered.Encode(2*i), []byte(fmt.Sprint(2*i)))
 83 | 	}
 84 | 	b.DeleteRange(ordered.Encode(0), ordered.Encode(0))
 85 | 	b.Apply()
 86 | 	if scan, want := collect(-1, 11, -1), []int{6, 8, 9}; !slices.Equal(scan, want) {
 87 | 		// unreachable except for bad db
 88 | 		t.Fatalf("Scan(-1, 11) after batch Delete+Set = %v, want %v", scan, want)
 89 | 	}
 90 | 
 91 | 	// Can't test much, but check that it doesn't crash.
 92 | 	db.Flush()
 93 | 
 94 | 	testDBLock(t, db)
 95 | }
 96 | 
 97 | type locker interface {
 98 | 	Lock(string)
 99 | 	Unlock(string)
100 | }
101 | 
102 | func testDBLock(t *testing.T, db locker) {
103 | 	var x int
104 | 	db.Lock("abc")
105 | 	var wg sync.WaitGroup
106 | 	wg.Add(1)
107 | 	go func() {
108 | 		db.Lock("abc")
109 | 		x = 2 // cause race if not synchronized
110 | 		db.Unlock("abc")
111 | 		wg.Done()
112 | 	}()
113 | 	x = 1 // cause race if not synchronized
114 | 	db.Unlock("abc")
115 | 	wg.Wait()
116 | 	_ = x
117 | 
118 | 	func() {
119 | 		defer func() {
120 | 			recover()
121 | 		}()
122 | 		db.Unlock("def")
123 | 		t.Errorf("Unlock never-locked key did not panic")
124 | 	}()
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/internal/github/edit_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package github
  6 | 
  7 | import (
  8 | 	"net/http"
  9 | 	"slices"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/gaby/internal/httprr"
 13 | 	"rsc.io/gaby/internal/secret"
 14 | 	"rsc.io/gaby/internal/storage"
 15 | 	"rsc.io/gaby/internal/testutil"
 16 | )
 17 | 
 18 | func TestMarkdownEditing(t *testing.T) {
 19 | 	check := testutil.Checker(t)
 20 | 	lg := testutil.Slogger(t)
 21 | 	db := storage.MemDB()
 22 | 
 23 | 	// Initial load.
 24 | 	rr, err := httprr.Open("../testdata/tmpedit.httprr", http.DefaultTransport)
 25 | 	check(err)
 26 | 	rr.Scrub(Scrub)
 27 | 	sdb := secret.DB(secret.Map{"api.github.com": "user:pass"})
 28 | 	if rr.Recording() {
 29 | 		sdb = secret.Netrc()
 30 | 	}
 31 | 	c := New(lg, db, sdb, rr.Client())
 32 | 	check(c.Add("rsc/tmp"))
 33 | 	check(c.Sync())
 34 | 
 35 | 	var ei, ec *Event
 36 | 	for e := range c.Events("rsc/tmp", 5, 5) {
 37 | 		if ei == nil && e.API == "/issues" {
 38 | 			ei = e
 39 | 		}
 40 | 		if ec == nil && e.API == "/issues/comments" {
 41 | 			ec = e
 42 | 		}
 43 | 	}
 44 | 	if ei == nil {
 45 | 		t.Fatalf("did not find issue #5")
 46 | 	}
 47 | 	if ec == nil {
 48 | 		t.Fatalf("did not find comment on issue #5")
 49 | 	}
 50 | 
 51 | 	issue := ei.Typed.(*Issue)
 52 | 	issue1, err := c.DownloadIssue(issue.URL)
 53 | 	check(err)
 54 | 	if issue1.Title != issue.Title {
 55 | 		t.Errorf("DownloadIssue: Title=%q, want %q", issue1.Title, issue.Title)
 56 | 	}
 57 | 
 58 | 	comment := ec.Typed.(*IssueComment)
 59 | 	comment1, err := c.DownloadIssueComment(comment.URL)
 60 | 	check(err)
 61 | 	if comment1.Body != comment.Body {
 62 | 		t.Errorf("DownloadIssueComment: Body=%q, want %q", comment1.Body, comment.Body)
 63 | 	}
 64 | 
 65 | 	c.testing = false // edit github directly (except for the httprr in the way)
 66 | 	check(c.EditIssueComment(comment, &IssueCommentChanges{Body: rot13(comment.Body)}))
 67 | 	check(c.PostIssueComment(issue, &IssueCommentChanges{Body: "testing. rot13 is the best."}))
 68 | 	check(c.EditIssue(issue, &IssueChanges{Title: rot13(issue.Title)}))
 69 | }
 70 | 
 71 | func TestMarkdownDivertEdit(t *testing.T) {
 72 | 	check := testutil.Checker(t)
 73 | 	lg := testutil.Slogger(t)
 74 | 	db := storage.MemDB()
 75 | 
 76 | 	c := New(lg, db, nil, nil)
 77 | 	check(c.Testing().LoadTxtar("../testdata/rsctmp.txt"))
 78 | 
 79 | 	var ei, ec *Event
 80 | 	for e := range c.Events("rsc/tmp", 5, 5) {
 81 | 		if ei == nil && e.API == "/issues" {
 82 | 			ei = e
 83 | 		}
 84 | 		if ec == nil && e.API == "/issues/comments" {
 85 | 			ec = e
 86 | 		}
 87 | 	}
 88 | 	if ei == nil {
 89 | 		t.Fatalf("did not find issue #5")
 90 | 	}
 91 | 	if ec == nil {
 92 | 		t.Fatalf("did not find comment on issue #5")
 93 | 	}
 94 | 
 95 | 	issue := ei.Typed.(*Issue)
 96 | 	issue1, err := c.DownloadIssue(issue.URL)
 97 | 	check(err)
 98 | 	if issue1.Title != issue.Title {
 99 | 		t.Errorf("DownloadIssue: Title=%q, want %q", issue1.Title, issue.Title)
100 | 	}
101 | 
102 | 	comment := ec.Typed.(*IssueComment)
103 | 	comment1, err := c.DownloadIssueComment(comment.URL)
104 | 	check(err)
105 | 	if comment1.Body != comment.Body {
106 | 		t.Errorf("DownloadIssueComment: Body=%q, want %q", comment1.Body, comment.Body)
107 | 	}
108 | 
109 | 	check(c.EditIssueComment(comment, &IssueCommentChanges{Body: rot13(comment.Body)}))
110 | 	check(c.PostIssueComment(issue, &IssueCommentChanges{Body: "testing. rot13 is the best."}))
111 | 	check(c.EditIssue(issue, &IssueChanges{Title: rot13(issue.Title), Labels: &[]string{"ebg13"}}))
112 | 
113 | 	var edits []string
114 | 	for _, e := range c.Testing().Edits() {
115 | 		edits = append(edits, e.String())
116 | 	}
117 | 
118 | 	want := []string{
119 | 		`EditIssueComment(rsc/tmp#5.10000000008, {"body":"Comment!\n"})`,
120 | 		`PostIssueComment(rsc/tmp#5, {"body":"testing. rot13 is the best."})`,
121 | 		`EditIssue(rsc/tmp#5, {"title":"another new issue","labels":["ebg13"]})`,
122 | 	}
123 | 	if !slices.Equal(edits, want) {
124 | 		t.Fatalf("Testing().Edits():\nhave %s\nwant %s", edits, want)
125 | 	}
126 | }
127 | 
128 | func rot13(s string) string {
129 | 	b := []byte(s)
130 | 	for i, x := range b {
131 | 		if 'A' <= x && x <= 'M' || 'a' <= x && x <= 'm' {
132 | 			b[i] = x + 13
133 | 		} else if 'N' <= x && x <= 'Z' || 'n' <= x && x <= 'z' {
134 | 			b[i] = x - 13
135 | 		}
136 | 	}
137 | 	return string(b)
138 | }
139 | 


--------------------------------------------------------------------------------
/internal/testdata/omap.httprr:
--------------------------------------------------------------------------------
 1 | httprr trace v1
 2 | 172 1240
 3 | GET https://api.github.com/repos/rsc/omap/issues?direction=asc&page=1&per_page=100&sort=updated&state=all HTTP/1.1
 4 | Host: api.github.com
 5 | User-Agent: Go-http-client/1.1
 6 | 
 7 | HTTP/2.0 200 OK
 8 | Content-Length: 2
 9 | Accept-Ranges: bytes
10 | Access-Control-Allow-Origin: *
11 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
12 | Cache-Control: public, max-age=60, s-maxage=60
13 | Content-Security-Policy: default-src 'none'
14 | Content-Type: application/json; charset=utf-8
15 | Date: Tue, 04 Jun 2024 16:16:40 GMT
16 | Etag: "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945"
17 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
18 | Server: GitHub.com
19 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
20 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
21 | X-Content-Type-Options: nosniff
22 | X-Frame-Options: deny
23 | X-Github-Api-Version-Selected: 2022-11-28
24 | X-Github-Media-Type: github.v3; format=json
25 | X-Github-Request-Id: F9AB:A58D8:48273A5:7F79B5E:665F3DE8
26 | X-Ratelimit-Limit: 60
27 | X-Ratelimit-Remaining: 59
28 | X-Ratelimit-Reset: 1717521400
29 | X-Ratelimit-Resource: core
30 | X-Ratelimit-Used: 1
31 | X-Xss-Protection: 0
32 | 
33 | []158 1240
34 | GET https://api.github.com/repos/rsc/omap/issues/comments?direction=asc&page=1&sort=updated HTTP/1.1
35 | Host: api.github.com
36 | User-Agent: Go-http-client/1.1
37 | 
38 | HTTP/2.0 200 OK
39 | Content-Length: 2
40 | Accept-Ranges: bytes
41 | Access-Control-Allow-Origin: *
42 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
43 | Cache-Control: public, max-age=60, s-maxage=60
44 | Content-Security-Policy: default-src 'none'
45 | Content-Type: application/json; charset=utf-8
46 | Date: Tue, 04 Jun 2024 16:16:40 GMT
47 | Etag: "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945"
48 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
49 | Server: GitHub.com
50 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
51 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
52 | X-Content-Type-Options: nosniff
53 | X-Frame-Options: deny
54 | X-Github-Api-Version-Selected: 2022-11-28
55 | X-Github-Media-Type: github.v3; format=json
56 | X-Github-Request-Id: F9AB:A58D8:48273F3:7F79BDD:665F3DE8
57 | X-Ratelimit-Limit: 60
58 | X-Ratelimit-Remaining: 58
59 | X-Ratelimit-Reset: 1717521400
60 | X-Ratelimit-Resource: core
61 | X-Ratelimit-Used: 2
62 | X-Xss-Protection: 0
63 | 
64 | []142 1240
65 | GET https://api.github.com/repos/rsc/omap/issues/events?page=1&per_page=100 HTTP/1.1
66 | Host: api.github.com
67 | User-Agent: Go-http-client/1.1
68 | 
69 | HTTP/2.0 200 OK
70 | Content-Length: 2
71 | Accept-Ranges: bytes
72 | Access-Control-Allow-Origin: *
73 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
74 | Cache-Control: public, max-age=60, s-maxage=60
75 | Content-Security-Policy: default-src 'none'
76 | Content-Type: application/json; charset=utf-8
77 | Date: Tue, 04 Jun 2024 16:16:40 GMT
78 | Etag: "4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945"
79 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
80 | Server: GitHub.com
81 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
82 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
83 | X-Content-Type-Options: nosniff
84 | X-Frame-Options: deny
85 | X-Github-Api-Version-Selected: 2022-11-28
86 | X-Github-Media-Type: github.v3; format=json
87 | X-Github-Request-Id: F9AB:A58D8:482744D:7F79C70:665F3DE8
88 | X-Ratelimit-Limit: 60
89 | X-Ratelimit-Remaining: 57
90 | X-Ratelimit-Reset: 1717521400
91 | X-Ratelimit-Resource: core
92 | X-Ratelimit-Used: 3
93 | X-Xss-Protection: 0
94 | 
95 | []


--------------------------------------------------------------------------------
/internal/docs/docs.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package docs implements a corpus of text documents identified by document IDs.
  6 | // It allows retrieving the documents by ID as well as retrieving documents that are
  7 | // new since a previous scan.
  8 | package docs
  9 | 
 10 | import (
 11 | 	"iter"
 12 | 	"strings"
 13 | 
 14 | 	"rsc.io/gaby/internal/storage"
 15 | 	"rsc.io/gaby/internal/storage/timed"
 16 | 	"rsc.io/ordered"
 17 | )
 18 | 
 19 | // This package stores the following key schemas in the database:
 20 | //
 21 | //	["docs.Doc", URL] => [DBTime, Title, Text]
 22 | //	["docs.DocByTime", DBTime, URL] => []
 23 | //
 24 | // DocByTime is an index of Docs by DBTime, which is the time when the
 25 | // record was added to the database. Code that processes new docs can
 26 | // record which DBTime it has most recently processed and then scan forward in
 27 | // the index to learn about new docs.
 28 | 
 29 | // A Corpus is the collection of documents stored in a database.
 30 | type Corpus struct {
 31 | 	db storage.DB
 32 | }
 33 | 
 34 | // New returns a new Corpus representing the documents stored in db.
 35 | func New(db storage.DB) *Corpus {
 36 | 	return &Corpus{db}
 37 | }
 38 | 
 39 | // A Doc is a single document in the Corpus.
 40 | type Doc struct {
 41 | 	DBTime timed.DBTime // database time (from storage.Now) when Doc was written
 42 | 	ID     string       // document identifier (such as a URL)
 43 | 	Title  string       // title of document
 44 | 	Text   string       // text of document
 45 | }
 46 | 
 47 | // decodeDoc decodes the document in the timed key-value pair.
 48 | // It calls c.db.Panic if the key-value pair is malformed.
 49 | func (c *Corpus) decodeDoc(t *timed.Entry) *Doc {
 50 | 	d := new(Doc)
 51 | 	d.DBTime = t.ModTime
 52 | 	if err := ordered.Decode(t.Key, &d.ID); err != nil {
 53 | 		// unreachable unless db corruption
 54 | 		c.db.Panic("docs decode", "key", storage.Fmt(t.Key), "err", err)
 55 | 	}
 56 | 	if err := ordered.Decode(t.Val, &d.Title, &d.Text); err != nil {
 57 | 		// unreachable unless db corruption
 58 | 		c.db.Panic("docs decode", "key", storage.Fmt(t.Key), "val", storage.Fmt(t.Val), "err", err)
 59 | 	}
 60 | 	return d
 61 | }
 62 | 
 63 | // Get returns the document with the given id.
 64 | // It returns nil, false if no document is found.
 65 | // It returns d, true otherwise.
 66 | func (c *Corpus) Get(id string) (doc *Doc, ok bool) {
 67 | 	t, ok := timed.Get(c.db, "docs.Doc", ordered.Encode(id))
 68 | 	if !ok {
 69 | 		return nil, false
 70 | 	}
 71 | 	return c.decodeDoc(t), true
 72 | }
 73 | 
 74 | // Add adds a document with the given id, title, and text.
 75 | // If the document already exists in the corpus with the same title and text,
 76 | // Add is an no-op.
 77 | // Otherwise, if the document already exists in the corpus, it is replaced.
 78 | func (c *Corpus) Add(id, title, text string) {
 79 | 	old, ok := c.Get(id)
 80 | 	if ok && old.Title == title && old.Text == text {
 81 | 		return
 82 | 	}
 83 | 	b := c.db.Batch()
 84 | 	timed.Set(c.db, b, "docs.Doc", ordered.Encode(id), ordered.Encode(title, text))
 85 | 	b.Apply()
 86 | }
 87 | 
 88 | // Docs returns an iterator over all documents in the corpus
 89 | // with IDs starting with a given prefix.
 90 | // The documents are ordered by ID.
 91 | func (c *Corpus) Docs(prefix string) iter.Seq[*Doc] {
 92 | 	return func(yield func(*Doc) bool) {
 93 | 		for t := range timed.Scan(c.db, "docs.Doc", ordered.Encode(prefix), ordered.Encode(prefix+"\xff")) {
 94 | 			if !yield(c.decodeDoc(t)) {
 95 | 				return
 96 | 			}
 97 | 		}
 98 | 	}
 99 | }
100 | 
101 | // DocsAfter returns an iterator over all documents with DBTime
102 | // greater than dbtime and with IDs starting with the prefix.
103 | // The documents are ordered by DBTime.
104 | func (c *Corpus) DocsAfter(dbtime timed.DBTime, prefix string) iter.Seq[*Doc] {
105 | 	filter := func(key []byte) bool {
106 | 		if prefix == "" {
107 | 			return true
108 | 		}
109 | 		var id string
110 | 		if err := ordered.Decode(key, &id); err != nil {
111 | 			// unreachable unless db corruption
112 | 			c.db.Panic("docs scan decode", "key", storage.Fmt(key), "err", err)
113 | 		}
114 | 		return strings.HasPrefix(id, prefix)
115 | 	}
116 | 	return func(yield func(*Doc) bool) {
117 | 		for t := range timed.ScanAfter(c.db, "docs.Doc", dbtime, filter) {
118 | 			if !yield(c.decodeDoc(t)) {
119 | 				return
120 | 			}
121 | 		}
122 | 	}
123 | }
124 | 
125 | // DocWatcher returns a new [storage.Watcher] with the given name.
126 | // It picks up where any previous Watcher of the same name left off.
127 | func (c *Corpus) DocWatcher(name string) *timed.Watcher[*Doc] {
128 | 	return timed.NewWatcher(c.db, name, "docs.Doc", c.decodeDoc)
129 | }
130 | 


--------------------------------------------------------------------------------
/internal/gemini/gemini.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package gemini implements access to Google's Gemini model.
  6 | //
  7 | // [Client] implements [llm.Embedder]. Use [NewClient] to connect.
  8 | package gemini
  9 | 
 10 | import (
 11 | 	"bytes"
 12 | 	"context"
 13 | 	"encoding/json"
 14 | 	"fmt"
 15 | 	"log/slog"
 16 | 	"maps"
 17 | 	"net/http"
 18 | 	"slices"
 19 | 	"strings"
 20 | 	_ "unsafe" // for linkname
 21 | 
 22 | 	"github.com/google/generative-ai-go/genai"
 23 | 	"google.golang.org/api/option"
 24 | 	"rsc.io/gaby/internal/httprr"
 25 | 	"rsc.io/gaby/internal/llm"
 26 | 	"rsc.io/gaby/internal/secret"
 27 | )
 28 | 
 29 | // Scrub is a request scrubber for use with [rsc.io/httprr].
 30 | func Scrub(req *http.Request) error {
 31 | 	delete(req.Header, "x-goog-api-key")    // genai does not canonicalize
 32 | 	req.Header.Del("X-Goog-Api-Key")        // in case it starts
 33 | 	delete(req.Header, "x-goog-api-client") // contains version numbers
 34 | 	req.Header.Del("X-Goog-Api-Client")
 35 | 
 36 | 	if ctype := req.Header.Get("Content-Type"); ctype == "application/json" || strings.HasPrefix(ctype, "application/json;") {
 37 | 		// Canonicalize JSON body.
 38 | 		// google.golang.org/protobuf/internal/encoding.json
 39 | 		// goes out of its way to randomize the JSON encodings
 40 | 		// of protobuf messages by adding or not adding spaces
 41 | 		// after commas. Derandomize by compacting the JSON.
 42 | 		b := req.Body.(*httprr.Body)
 43 | 		var buf bytes.Buffer
 44 | 		if err := json.Compact(&buf, b.Data); err == nil {
 45 | 			b.Data = buf.Bytes()
 46 | 		}
 47 | 	}
 48 | 	return nil
 49 | }
 50 | 
 51 | // A Client represents a connection to Gemini.
 52 | type Client struct {
 53 | 	slog  *slog.Logger
 54 | 	genai *genai.Client
 55 | }
 56 | 
 57 | // NewClient returns a connection to Gemini, using the given logger and HTTP client.
 58 | // It expects to find a secret of the form "AIza..." or "user:AIza..." in sdb
 59 | // under the name "ai.google.dev".
 60 | func NewClient(lg *slog.Logger, sdb secret.DB, hc *http.Client) (*Client, error) {
 61 | 	key, ok := sdb.Get("ai.google.dev")
 62 | 	if !ok {
 63 | 		return nil, fmt.Errorf("missing api key for ai.google.dev")
 64 | 	}
 65 | 	// If key is from .netrc, ignore user name.
 66 | 	if _, pass, ok := strings.Cut(key, ":"); ok {
 67 | 		key = pass
 68 | 	}
 69 | 
 70 | 	// Ideally this would use use “option.WithAPIKey(key), option.WithHTTPClient(hc),”
 71 | 	// but using option.WithHTTPClient bypasses the code that passes along the API key.
 72 | 	// Instead we make our own derived http.Client that re-adds the key.
 73 | 	// And then we still have to say option.WithAPIKey("ignored") because
 74 | 	// otherwise NewClient complains that we haven't passed in a key.
 75 | 	// (If we pass in the key, it ignores it, but if we don't pass it in,
 76 | 	// it complains that we didn't give it a key.)
 77 | 	ai, err := genai.NewClient(context.Background(),
 78 | 		option.WithAPIKey("ignored"),
 79 | 		option.WithHTTPClient(withKey(hc, key)))
 80 | 	if err != nil {
 81 | 		return nil, err
 82 | 	}
 83 | 
 84 | 	return &Client{slog: lg, genai: ai}, nil
 85 | }
 86 | 
 87 | // withKey returns a new http.Client that is the same as hc
 88 | // except that it adds "x-goog-api-key: key" to every request.
 89 | func withKey(hc *http.Client, key string) *http.Client {
 90 | 	c := *hc
 91 | 	t := c.Transport
 92 | 	if t == nil {
 93 | 		t = http.DefaultTransport
 94 | 	}
 95 | 	c.Transport = &transportWithKey{t, key}
 96 | 	return &c
 97 | }
 98 | 
 99 | // transportWithKey is the same as rt
100 | // except that it adds "x-goog-api-key: key" to every request.
101 | type transportWithKey struct {
102 | 	rt  http.RoundTripper
103 | 	key string
104 | }
105 | 
106 | func (t *transportWithKey) RoundTrip(req *http.Request) (resp *http.Response, err error) {
107 | 	r := *req
108 | 	r.Header = maps.Clone(req.Header)
109 | 	r.Header["x-goog-api-key"] = []string{t.key}
110 | 	return t.rt.RoundTrip(&r)
111 | }
112 | 
113 | const maxBatch = 100 // empirical limit
114 | 
115 | // EmbedDocs returns the vector embeddings for the docs,
116 | // implementing [llm.Embedder].
117 | func (c *Client) EmbedDocs(docs []llm.EmbedDoc) ([]llm.Vector, error) {
118 | 	model := c.genai.EmbeddingModel("text-embedding-004")
119 | 	var vecs []llm.Vector
120 | 	for docs := range slices.Chunk(docs, maxBatch) {
121 | 		b := model.NewBatch()
122 | 		for _, d := range docs {
123 | 			b.AddContentWithTitle(d.Title, genai.Text(d.Text))
124 | 		}
125 | 		resp, err := model.BatchEmbedContents(context.Background(), b)
126 | 		if err != nil {
127 | 			return vecs, err
128 | 		}
129 | 		for _, e := range resp.Embeddings {
130 | 			vecs = append(vecs, e.Values)
131 | 		}
132 | 	}
133 | 	return vecs, nil
134 | }
135 | 


--------------------------------------------------------------------------------
/internal/embeddocs/sync_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package embeddocs
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"strings"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/gaby/internal/docs"
 13 | 	"rsc.io/gaby/internal/llm"
 14 | 	"rsc.io/gaby/internal/storage"
 15 | 	"rsc.io/gaby/internal/testutil"
 16 | )
 17 | 
 18 | var texts = []string{
 19 | 	"for loops",
 20 | 	"for all time, always",
 21 | 	"break statements",
 22 | 	"breakdancing",
 23 | 	"forever could never be long enough for me",
 24 | 	"the macarena",
 25 | }
 26 | 
 27 | func checker(t *testing.T) func(error) {
 28 | 	return func(err error) {
 29 | 		if err != nil {
 30 | 			t.Helper()
 31 | 			t.Fatal(err)
 32 | 		}
 33 | 	}
 34 | }
 35 | 
 36 | func TestSync(t *testing.T) {
 37 | 	lg := testutil.Slogger(t)
 38 | 	db := storage.MemDB()
 39 | 	vdb := storage.MemVectorDB(db, lg, "step1")
 40 | 	dc := docs.New(db)
 41 | 	for i, text := range texts {
 42 | 		dc.Add(fmt.Sprintf("URL%d", i), "", text)
 43 | 	}
 44 | 
 45 | 	Sync(lg, vdb, llm.QuoteEmbedder(), dc)
 46 | 	for i, text := range texts {
 47 | 		vec, ok := vdb.Get(fmt.Sprintf("URL%d", i))
 48 | 		if !ok {
 49 | 			t.Errorf("URL%d missing from vdb", i)
 50 | 			continue
 51 | 		}
 52 | 		vtext := llm.UnquoteVector(vec)
 53 | 		if vtext != text {
 54 | 			t.Errorf("URL%d decoded to %q, want %q", i, vtext, text)
 55 | 		}
 56 | 	}
 57 | 
 58 | 	for i, text := range texts {
 59 | 		dc.Add(fmt.Sprintf("rot13%d", i), "", rot13(text))
 60 | 	}
 61 | 	vdb2 := storage.MemVectorDB(db, lg, "step2")
 62 | 	Sync(lg, vdb2, llm.QuoteEmbedder(), dc)
 63 | 	for i, text := range texts {
 64 | 		vec, ok := vdb2.Get(fmt.Sprintf("URL%d", i))
 65 | 		if ok {
 66 | 			t.Errorf("URL%d written during second sync: %q", i, llm.UnquoteVector(vec))
 67 | 			continue
 68 | 		}
 69 | 
 70 | 		vec, ok = vdb2.Get(fmt.Sprintf("rot13%d", i))
 71 | 		vtext := llm.UnquoteVector(vec)
 72 | 		if vtext != rot13(text) {
 73 | 			t.Errorf("rot13%d decoded to %q, want %q", i, vtext, rot13(text))
 74 | 		}
 75 | 	}
 76 | }
 77 | 
 78 | func TestBigSync(t *testing.T) {
 79 | 	const N = 10000
 80 | 
 81 | 	lg := testutil.Slogger(t)
 82 | 	db := storage.MemDB()
 83 | 	vdb := storage.MemVectorDB(db, lg, "vdb")
 84 | 	dc := docs.New(db)
 85 | 	for i := range N {
 86 | 		dc.Add(fmt.Sprintf("URL%d", i), "", fmt.Sprintf("Text%d", i))
 87 | 	}
 88 | 
 89 | 	Sync(lg, vdb, llm.QuoteEmbedder(), dc)
 90 | 	for i := range N {
 91 | 		vec, ok := vdb.Get(fmt.Sprintf("URL%d", i))
 92 | 		if !ok {
 93 | 			t.Errorf("URL%d missing from vdb", i)
 94 | 			continue
 95 | 		}
 96 | 		text := fmt.Sprintf("Text%d", i)
 97 | 		vtext := llm.UnquoteVector(vec)
 98 | 		if vtext != text {
 99 | 			t.Errorf("URL%d decoded to %q, want %q", i, vtext, text)
100 | 		}
101 | 	}
102 | }
103 | 
104 | func TestBadEmbedders(t *testing.T) {
105 | 	const N = 150
106 | 	db := storage.MemDB()
107 | 	dc := docs.New(db)
108 | 	for i := range N {
109 | 		dc.Add(fmt.Sprintf("URL%03d", i), "", fmt.Sprintf("Text%d", i))
110 | 	}
111 | 
112 | 	lg, out := testutil.SlogBuffer()
113 | 	db = storage.MemDB()
114 | 	vdb := storage.MemVectorDB(db, lg, "vdb")
115 | 	Sync(lg, vdb, tooManyEmbed{}, dc)
116 | 	if !strings.Contains(out.String(), "embeddocs length mismatch") {
117 | 		t.Errorf("tooManyEmbed did not report error:\n%s", out)
118 | 	}
119 | 
120 | 	lg, out = testutil.SlogBuffer()
121 | 	db = storage.MemDB()
122 | 	vdb = storage.MemVectorDB(db, lg, "vdb")
123 | 	Sync(lg, vdb, embedErr{}, dc)
124 | 	if !strings.Contains(out.String(), "EMBED ERROR") {
125 | 		t.Errorf("embedErr did not report error:\n%s", out)
126 | 	}
127 | 	if _, ok := vdb.Get("URL001"); !ok {
128 | 		t.Errorf("Sync did not write URL001 after embedErr")
129 | 	}
130 | 
131 | 	lg, out = testutil.SlogBuffer()
132 | 	db = storage.MemDB()
133 | 	vdb = storage.MemVectorDB(db, lg, "vdb")
134 | 	Sync(lg, vdb, embedHalf{}, dc)
135 | 	if !strings.Contains(out.String(), "length mismatch") {
136 | 		t.Errorf("embedHalf did not report error:\n%s", out)
137 | 	}
138 | 	if _, ok := vdb.Get("URL001"); !ok {
139 | 		t.Errorf("Sync did not write URL001 after embedHalf")
140 | 	}
141 | }
142 | 
143 | func rot13(s string) string {
144 | 	b := []byte(s)
145 | 	for i, x := range b {
146 | 		if 'A' <= x && x <= 'M' || 'a' <= x && x <= 'm' {
147 | 			b[i] = x + 13
148 | 		} else if 'N' <= x && x <= 'Z' || 'n' <= x && x <= 'z' {
149 | 			b[i] = x - 13
150 | 		}
151 | 	}
152 | 	return string(b)
153 | }
154 | 
155 | type tooManyEmbed struct{}
156 | 
157 | func (tooManyEmbed) EmbedDocs(docs []llm.EmbedDoc) ([]llm.Vector, error) {
158 | 	vec, _ := llm.QuoteEmbedder().EmbedDocs(docs)
159 | 	vec = append(vec, vec...)
160 | 	return vec, nil
161 | }
162 | 
163 | type embedErr struct{}
164 | 
165 | func (embedErr) EmbedDocs(docs []llm.EmbedDoc) ([]llm.Vector, error) {
166 | 	vec, _ := llm.QuoteEmbedder().EmbedDocs(docs)
167 | 	return vec, fmt.Errorf("EMBED ERROR")
168 | }
169 | 
170 | type embedHalf struct{}
171 | 
172 | func (embedHalf) EmbedDocs(docs []llm.EmbedDoc) ([]llm.Vector, error) {
173 | 	vec, _ := llm.QuoteEmbedder().EmbedDocs(docs)
174 | 	vec = vec[:len(vec)/2]
175 | 	return vec, nil
176 | }
177 | 


--------------------------------------------------------------------------------
/internal/pebble/pebble.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package pebble implements a storage.DB using Pebble,
  6 | // a production-quality key-value database from CockroachDB.
  7 | package pebble
  8 | 
  9 | import (
 10 | 	"bytes"
 11 | 	"cmp"
 12 | 	"iter"
 13 | 	"log/slog"
 14 | 
 15 | 	"github.com/cockroachdb/pebble"
 16 | 	"rsc.io/gaby/internal/storage"
 17 | )
 18 | 
 19 | // Open opens an existing Pebble database in the named directory.
 20 | // The database must already exist.
 21 | func Open(lg *slog.Logger, dir string) (storage.DB, error) {
 22 | 	return open(lg, dir, &pebble.Options{ErrorIfNotExists: true})
 23 | }
 24 | 
 25 | // Create creates a new Pebble database in the named directory.
 26 | // The database (and directory) must not already exist.
 27 | func Create(lg *slog.Logger, dir string) (storage.DB, error) {
 28 | 	return open(lg, dir, &pebble.Options{ErrorIfExists: true})
 29 | }
 30 | 
 31 | func open(lg *slog.Logger, dir string, opts *pebble.Options) (storage.DB, error) {
 32 | 	p, err := pebble.Open(dir, opts)
 33 | 	if err != nil {
 34 | 		lg.Error("pebble open", "dir", dir, "create", opts.ErrorIfExists, "err", err)
 35 | 		return nil, err
 36 | 	}
 37 | 	return &db{p: p, slog: lg}, nil
 38 | }
 39 | 
 40 | type db struct {
 41 | 	p    *pebble.DB
 42 | 	m    storage.MemLocker
 43 | 	slog *slog.Logger
 44 | }
 45 | 
 46 | type batch struct {
 47 | 	db *db
 48 | 	b  *pebble.Batch
 49 | }
 50 | 
 51 | func (d *db) Lock(key string) {
 52 | 	d.m.Lock(key)
 53 | }
 54 | 
 55 | func (d *db) Unlock(key string) {
 56 | 	d.m.Unlock(key)
 57 | }
 58 | 
 59 | func (d *db) get(key []byte, yield func(val []byte)) {
 60 | 	v, c, err := d.p.Get(key)
 61 | 	if err == pebble.ErrNotFound {
 62 | 		return
 63 | 	}
 64 | 	if err != nil {
 65 | 		// unreachable except db error
 66 | 		d.Panic("pebble get", "key", storage.Fmt(key), "err", err)
 67 | 	}
 68 | 	yield(v)
 69 | 	c.Close()
 70 | }
 71 | 
 72 | func (d *db) Get(key []byte) (val []byte, ok bool) {
 73 | 	d.get(key, func(v []byte) {
 74 | 		val = bytes.Clone(v)
 75 | 		ok = true
 76 | 	})
 77 | 	return
 78 | }
 79 | 
 80 | var (
 81 | 	sync   = &pebble.WriteOptions{Sync: true}
 82 | 	noSync = &pebble.WriteOptions{Sync: false}
 83 | )
 84 | 
 85 | func (d *db) Panic(msg string, args ...any) {
 86 | 	d.slog.Error(msg, args...)
 87 | 	storage.Panic(msg, args...)
 88 | }
 89 | 
 90 | func (d *db) Set(key, val []byte) {
 91 | 	if err := d.p.Set(key, val, noSync); err != nil {
 92 | 		// unreachable except db error
 93 | 		d.Panic("pebble set", "key", storage.Fmt(key), "val", storage.Fmt(val), "err", err)
 94 | 	}
 95 | }
 96 | 
 97 | func (d *db) Delete(key []byte) {
 98 | 	if err := d.p.Delete(key, noSync); err != nil {
 99 | 		// unreachable except db error
100 | 		d.Panic("pebble delete", "key", storage.Fmt(key), "err", err)
101 | 	}
102 | }
103 | 
104 | func (d *db) DeleteRange(start, end []byte) {
105 | 	err := cmp.Or(
106 | 		d.p.DeleteRange(start, end, noSync),
107 | 		d.p.Delete(end, noSync),
108 | 	)
109 | 	if err != nil {
110 | 		// unreachable except db error
111 | 		d.Panic("pebble delete range", "start", storage.Fmt(start), "end", storage.Fmt(end), "err", err)
112 | 	}
113 | }
114 | 
115 | func (d *db) Flush() {
116 | 	if err := d.p.Flush(); err != nil {
117 | 		// unreachable except db error
118 | 		d.Panic("pebble flush", "err", err)
119 | 	}
120 | }
121 | 
122 | func (d *db) Close() {
123 | 	if err := d.p.Close(); err != nil {
124 | 		// unreachable except db error
125 | 		d.Panic("pebble close", "err", err)
126 | 	}
127 | }
128 | 
129 | func (d *db) Scan(start, end []byte) iter.Seq2[[]byte, func() []byte] {
130 | 	start = bytes.Clone(start)
131 | 	end = bytes.Clone(end)
132 | 	return func(yield func(key []byte, val func() []byte) bool) {
133 | 		// Note: Pebble's UpperBound is non-inclusive (not included in the scan)
134 | 		// but we want to include the key end in the scan,
135 | 		// so do not use UpperBound; we check during the iteration instead.
136 | 		iter, err := d.p.NewIter(&pebble.IterOptions{
137 | 			LowerBound: start,
138 | 		})
139 | 		if err != nil {
140 | 			// unreachable except db error
141 | 			d.Panic("pebble new iterator", "start", storage.Fmt(start), "err", err)
142 | 		}
143 | 		defer iter.Close()
144 | 		for iter.First(); iter.Valid(); iter.Next() {
145 | 			key := iter.Key()
146 | 			if bytes.Compare(key, end) > 0 {
147 | 				break
148 | 			}
149 | 			val := func() []byte {
150 | 				v, err := iter.ValueAndErr()
151 | 				if err != nil {
152 | 					// unreachable except db error
153 | 					d.Panic("pebble iterator value", "key", storage.Fmt(key), "err", err)
154 | 				}
155 | 				return v
156 | 			}
157 | 			if !yield(key, val) {
158 | 				return
159 | 			}
160 | 		}
161 | 	}
162 | }
163 | 
164 | func (d *db) Batch() storage.Batch {
165 | 	return &batch{d, d.p.NewBatch()}
166 | }
167 | 
168 | func (b *batch) Set(key, val []byte) {
169 | 	if err := b.b.Set(key, val, noSync); err != nil {
170 | 		// unreachable except db error
171 | 		b.db.Panic("pebble batch set", "key", storage.Fmt(key), "val", storage.Fmt(val), "err", err)
172 | 	}
173 | }
174 | 
175 | func (b *batch) Delete(key []byte) {
176 | 	if err := b.b.Delete(key, noSync); err != nil {
177 | 		// unreachable except db error
178 | 		b.db.Panic("pebble batch delete", "key", storage.Fmt(key), "err", err)
179 | 	}
180 | }
181 | 
182 | func (b *batch) DeleteRange(start, end []byte) {
183 | 	err := cmp.Or(
184 | 		b.b.DeleteRange(start, end, noSync),
185 | 		b.b.Delete(end, noSync),
186 | 	)
187 | 	if err != nil {
188 | 		// unreachable except db error
189 | 		b.db.Panic("pebble batch delete range", "start", storage.Fmt(start), "end", storage.Fmt(end), "err", err)
190 | 	}
191 | }
192 | 
193 | func (b *batch) MaybeApply() bool {
194 | 	if b.b.Len() > 100e6 {
195 | 		b.Apply()
196 | 		return true
197 | 	}
198 | 	return false
199 | }
200 | 
201 | func (b *batch) Apply() {
202 | 	if err := b.db.p.Apply(b.b, noSync); err != nil {
203 | 		// unreachable except db error
204 | 		b.db.Panic("pebble batch apply", "err", err)
205 | 	}
206 | 	b.b.Reset()
207 | }
208 | 


--------------------------------------------------------------------------------
/internal/github/edit.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package github
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"encoding/json"
 10 | 	"fmt"
 11 | 	"io"
 12 | 	"net/http"
 13 | 	"slices"
 14 | 	"strings"
 15 | 	"testing"
 16 | )
 17 | 
 18 | // NOTE: It's possible that we should elevate TestingEdit to a general
 19 | // “deferred edits” facility for use in looking at potential changes.
 20 | // On the other hand, higher-level code usually needs to know
 21 | // whether it's making changes or not, so that it can record that
 22 | // the work has been done, so normally “deferred edits” should be
 23 | // as high in the stack as possible, and the GitHub client is not.
 24 | 
 25 | // PostIssueComment posts a new comment with the given body (written in Markdown) on issue.
 26 | func (c *Client) PostIssueComment(issue *Issue, changes *IssueCommentChanges) error {
 27 | 	if c.divertEdits() {
 28 | 		c.testMu.Lock()
 29 | 		defer c.testMu.Unlock()
 30 | 
 31 | 		c.testEdits = append(c.testEdits, &TestingEdit{
 32 | 			Project:             issue.Project(),
 33 | 			Issue:               issue.Number,
 34 | 			IssueCommentChanges: changes.clone(),
 35 | 		})
 36 | 		return nil
 37 | 	}
 38 | 
 39 | 	return c.post(issue.URL+"/comments", changes)
 40 | }
 41 | 
 42 | // DownloadIssue downloads the current issue JSON from the given URL
 43 | // and decodes it into an issue.
 44 | // Given an issue, c.DownloadIssue(issue.URL) fetches the very latest state for the issue.
 45 | func (c *Client) DownloadIssue(url string) (*Issue, error) {
 46 | 	x := new(Issue)
 47 | 	_, err := c.get(url, "", x)
 48 | 	if err != nil {
 49 | 		return nil, err
 50 | 	}
 51 | 	return x, nil
 52 | }
 53 | 
 54 | // DownloadIssueComment downloads the current comment JSON from the given URL
 55 | // and decodes it into an IssueComment.
 56 | // Given a comment, c.DownloadIssueComment(comment.URL) fetches the very latest state for the comment.
 57 | func (c *Client) DownloadIssueComment(url string) (*IssueComment, error) {
 58 | 	x := new(IssueComment)
 59 | 	_, err := c.get(url, "", x)
 60 | 	if err != nil {
 61 | 		return nil, err
 62 | 	}
 63 | 	return x, nil
 64 | }
 65 | 
 66 | type IssueCommentChanges struct {
 67 | 	Body string `json:"body,omitempty"`
 68 | }
 69 | 
 70 | func (ch *IssueCommentChanges) clone() *IssueCommentChanges {
 71 | 	x := *ch
 72 | 	ch = &x
 73 | 	return ch
 74 | }
 75 | 
 76 | // EditIssueComment changes the comment on GitHub to have the new body.
 77 | // It is typically a good idea to use c.DownloadIssueComment first and check
 78 | // that the live comment body matches the one obtained from the database,
 79 | // to minimize race windows.
 80 | func (c *Client) EditIssueComment(comment *IssueComment, changes *IssueCommentChanges) error {
 81 | 	if c.divertEdits() {
 82 | 		c.testMu.Lock()
 83 | 		defer c.testMu.Unlock()
 84 | 
 85 | 		c.testEdits = append(c.testEdits, &TestingEdit{
 86 | 			Project:             comment.Project(),
 87 | 			Issue:               comment.Issue(),
 88 | 			Comment:             comment.CommentID(),
 89 | 			IssueCommentChanges: changes.clone(),
 90 | 		})
 91 | 		return nil
 92 | 	}
 93 | 
 94 | 	return c.patch(comment.URL, changes)
 95 | }
 96 | 
 97 | // An IssueChanges specifies changes to make to an issue.
 98 | // Fields that are the empty string or a nil pointer are ignored.
 99 | //
100 | // Note that Labels is the new set of all labels for the issue,
101 | // not labels to add. If you are adding a single label,
102 | // you need to include all the existing labels as well.
103 | // Labels is a *[]string so that it can be set to new([]string)
104 | // to clear the labels.
105 | type IssueChanges struct {
106 | 	Title  string    `json:"title,omitempty"`
107 | 	Body   string    `json:"body,omitempty"`
108 | 	State  string    `json:"state,omitempty"`
109 | 	Labels *[]string `json:"labels,omitempty"`
110 | }
111 | 
112 | func (ch *IssueChanges) clone() *IssueChanges {
113 | 	x := *ch
114 | 	ch = &x
115 | 	if ch.Labels != nil {
116 | 		x := slices.Clone(*ch.Labels)
117 | 		ch.Labels = &x
118 | 	}
119 | 	return ch
120 | }
121 | 
122 | // EditIssue applies the changes to issue on GitHub.
123 | func (c *Client) EditIssue(issue *Issue, changes *IssueChanges) error {
124 | 	if c.divertEdits() {
125 | 		c.testMu.Lock()
126 | 		defer c.testMu.Unlock()
127 | 
128 | 		c.testEdits = append(c.testEdits, &TestingEdit{
129 | 			Project:      issue.Project(),
130 | 			Issue:        issue.Number,
131 | 			IssueChanges: changes.clone(),
132 | 		})
133 | 		return nil
134 | 	}
135 | 
136 | 	return c.patch(issue.URL, changes)
137 | }
138 | 
139 | // patch is like c.get but makes a PATCH request.
140 | // Unlike c.get, it requires authentication.
141 | func (c *Client) patch(url string, changes any) error {
142 | 	return c.json("PATCH", url, changes)
143 | }
144 | 
145 | // post is like c.get but makes a POST request.
146 | // Unlike c.get, it requires authentication.
147 | func (c *Client) post(url string, body any) error {
148 | 	return c.json("POST", url, body)
149 | }
150 | 
151 | // json is the general PATCH/POST implementation.
152 | func (c *Client) json(method, url string, body any) error {
153 | 	js, err := json.Marshal(body)
154 | 	if err != nil {
155 | 		return err
156 | 	}
157 | 
158 | 	auth, ok := c.secret.Get("api.github.com")
159 | 	if !ok && !testing.Testing() {
160 | 		return fmt.Errorf("no secret for api.github.com")
161 | 	}
162 | 	user, pass, _ := strings.Cut(auth, ":")
163 | 
164 | Redo:
165 | 	req, err := http.NewRequest(method, url, bytes.NewReader(js))
166 | 	if err != nil {
167 | 		return err
168 | 	}
169 | 	req.Header.Set("Content-Type", "application/json; charset=utf-8")
170 | 	req.SetBasicAuth(user, pass)
171 | 	resp, err := c.http.Do(req)
172 | 	if err != nil {
173 | 		return err
174 | 	}
175 | 	data, err := io.ReadAll(resp.Body)
176 | 	resp.Body.Close()
177 | 	if err != nil {
178 | 		return fmt.Errorf("reading body: %v", err)
179 | 	}
180 | 	if c.rateLimit(resp) {
181 | 		goto Redo
182 | 	}
183 | 	if resp.StatusCode/10 != 20 { // allow 200, 201, maybe others
184 | 		return fmt.Errorf("%s\n%s", resp.Status, data)
185 | 	}
186 | 	return nil
187 | }
188 | 


--------------------------------------------------------------------------------
/internal/storage/db.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package storage defines the storage abstractions needed for Gaby:
  6 | // [DB], a basic key-value store, and [VectorDB], a vector database.
  7 | // The storage needs are intentionally minimal (avoiding, for example,
  8 | // a requirement on SQL), to admit as many implementations as possible.
  9 | package storage
 10 | 
 11 | import (
 12 | 	"bytes"
 13 | 	"encoding/json"
 14 | 	"fmt"
 15 | 	"iter"
 16 | 	"log/slog"
 17 | 	"strconv"
 18 | 	"strings"
 19 | 
 20 | 	"rsc.io/ordered"
 21 | )
 22 | 
 23 | // A DB is a key-value database.
 24 | //
 25 | // DB operations are assumed not to fail.
 26 | // They panic, intending to take down the program,
 27 | // if there is an error accessing the database.
 28 | // The assumption is that the program cannot possibly
 29 | // continue without the database, since that's where all the state is stored.
 30 | // Similarly, clients of DB conventionally panic if the database
 31 | // returned corrupted data.
 32 | // Code using multiple parallel database operations can recover
 33 | // at the outermost calls.
 34 | // Clients of DB
 35 | type DB interface {
 36 | 	// Lock acquires a lock on the given name, which need not exist in the database.
 37 | 	// After a successful Lock(name),
 38 | 	// any other call to Lock(name) from any other client of the database
 39 | 	// (including in another process, for shared databases)
 40 | 	// must block until Unlock(name) has been called.
 41 | 	// In a shared database, a lock may also unlock
 42 | 	// when the client disconnects or times out.
 43 | 	Lock(name string)
 44 | 
 45 | 	// Unlock releases the lock with the given name,
 46 | 	// which the caller must have locked.
 47 | 	Unlock(name string)
 48 | 
 49 | 	// Set sets the value associated with key to val.
 50 | 	Set(key, val []byte)
 51 | 
 52 | 	// Get looks up the value associated with key.
 53 | 	// If there is no entry for key in the database, Get returns nil, false.
 54 | 	// Otherwise it returns val, true.
 55 | 	Get(key []byte) (val []byte, ok bool)
 56 | 
 57 | 	// Scan returns an iterator over all key-value pairs with start ≤ key ≤ end.
 58 | 	// The second value in each iteration pair is a function returning the value,
 59 | 	// not the value itself:
 60 | 	//
 61 | 	//	for key, getVal := range db.Scan([]byte("aaa"), []byte("zzz")) {
 62 | 	//		val := getVal()
 63 | 	//		fmt.Printf("%q: %q\n", key, val)
 64 | 	//	}
 65 | 	//
 66 | 	// In iterations that only need the keys or only need the values for a subset of keys,
 67 | 	// some DB implementations may avoid work when the value function is not called.
 68 | 	Scan(start, end []byte) iter.Seq2[[]byte, func() []byte]
 69 | 
 70 | 	// Delete deletes any value associated with key.
 71 | 	// Delete of an unset key is a no-op.
 72 | 	Delete(key []byte)
 73 | 
 74 | 	// DeleteRange deletes all key-value pairs with start ≤ key ≤ end.
 75 | 	DeleteRange(start, end []byte)
 76 | 
 77 | 	// Batch returns a new [Batch] that accumulates database mutations
 78 | 	// to apply in an atomic operation. In addition to the atomicity, using a
 79 | 	// Batch for bulk operations is more efficient than making each
 80 | 	// change using repeated calls to DB's Set, Delete, and DeleteRange methods.
 81 | 	Batch() Batch
 82 | 
 83 | 	// Flush flushes DB changes to permanent storage.
 84 | 	// Flush must be called before the process crashes or exits,
 85 | 	// or else any changes since the previous Flush may be lost.
 86 | 	Flush()
 87 | 
 88 | 	// Close closes the database.
 89 | 	// Like the other routines, it panics if an error happens,
 90 | 	// so there is no error result.
 91 | 	Close()
 92 | 
 93 | 	// Panic logs the error message and args using the database's slog.Logger
 94 | 	// and then panics with the text formatting of its arguments.
 95 | 	// It is meant to be called when database corruption or other
 96 | 	// database-related “can't happen” conditions been detected.
 97 | 	Panic(msg string, args ...any)
 98 | }
 99 | 
100 | // A Batch accumulates database mutations that are applied to a [DB]
101 | // as a single atomic operation. Applying bulk operations in a batch
102 | // is also more efficient than making individual [DB] method calls.
103 | // The batched operations apply in the order they are made.
104 | // For example, Set("a", "b") followed by Delete("a") is the same as
105 | // Delete("a"), while Delete("a") followed by Set("a", "b") is the same
106 | // as Set("a", "b").
107 | type Batch interface {
108 | 	// Delete deletes any value associated with key.
109 | 	// Delete of an unset key is a no-op.
110 | 	Delete(key []byte)
111 | 
112 | 	// DeleteRange deletes all key-value pairs with start ≤ key ≤ end.
113 | 	DeleteRange(start, end []byte)
114 | 
115 | 	// Set sets the value associated with key to val.
116 | 	Set(key, val []byte)
117 | 
118 | 	// MaybeApply calls Apply if the batch is getting close to full.
119 | 	// Every Batch has a limit to how many operations can be batched,
120 | 	// so in a bulk operation where atomicity of the entire batch is not a concern,
121 | 	// calling MaybeApply gives the Batch implementation
122 | 	// permission to flush the batch at specific “safe points”.
123 | 	// A typical limit for a batch is about 100MB worth of logged operations.
124 | 	// MaybeApply reports whether it called Apply.
125 | 	MaybeApply() bool
126 | 
127 | 	// Apply applies all the batched operations to the underlying DB
128 | 	// as a single atomic unit.
129 | 	// When Apply returns, the Batch is an empty batch ready for
130 | 	// more operations.
131 | 	Apply()
132 | }
133 | 
134 | // Panic panics with the text formatting of its arguments.
135 | // It is meant to be called for database errors or corruption,
136 | // which have been defined to be impossible.
137 | // (See the [DB] documentation.)
138 | //
139 | // Panic is expected to be used by DB implementations.
140 | // DB clients should use the [DB.Panic] method instead.
141 | func Panic(msg string, args ...any) {
142 | 	var b bytes.Buffer
143 | 	slog.New(slog.NewTextHandler(&b, nil)).Error(msg, args...)
144 | 	s := b.String()
145 | 	if _, rest, ok := strings.Cut(s, " level=ERROR msg="); ok {
146 | 		s = rest
147 | 	}
148 | 	panic(strings.TrimSpace(s))
149 | }
150 | 
151 | // JSON converts x to JSON and returns the result.
152 | // It panics if there is any error converting x to JSON.
153 | // Since whether x can be converted to JSON depends
154 | // almost entirely on its type, a marshaling error indicates a
155 | // bug at the call site.
156 | //
157 | // (The exception is certain malformed UTF-8 and floating-point
158 | // infinity and NaN. Code must be careful not to use JSON with those.)
159 | func JSON(x any) []byte {
160 | 	js, err := json.Marshal(x)
161 | 	if err != nil {
162 | 		panic(fmt.Sprintf("json.Marshal: %v", err))
163 | 	}
164 | 	return js
165 | }
166 | 
167 | // Fmt formats data for printing,
168 | // first trying [ordered.DecodeFmt] in case data is an [ordered encoding],
169 | // then trying a backquoted string if possible
170 | // (handling simple JSON data),
171 | // and finally resorting to [strconv.QuoteToASCII].
172 | func Fmt(data []byte) string {
173 | 	if s, err := ordered.DecodeFmt(data); err == nil {
174 | 		return s
175 | 	}
176 | 	s := string(data)
177 | 	if strconv.CanBackquote(s) {
178 | 		return "`" + s + "`"
179 | 	}
180 | 	return strconv.QuoteToASCII(s)
181 | }
182 | 


--------------------------------------------------------------------------------
/internal/related/related_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package related
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"maps"
 10 | 	"slices"
 11 | 	"strings"
 12 | 	"testing"
 13 | 	"time"
 14 | 
 15 | 	"rsc.io/gaby/internal/diff"
 16 | 	"rsc.io/gaby/internal/docs"
 17 | 	"rsc.io/gaby/internal/embeddocs"
 18 | 	"rsc.io/gaby/internal/github"
 19 | 	"rsc.io/gaby/internal/githubdocs"
 20 | 	"rsc.io/gaby/internal/llm"
 21 | 	"rsc.io/gaby/internal/storage"
 22 | 	"rsc.io/gaby/internal/testutil"
 23 | )
 24 | 
 25 | func Test(t *testing.T) {
 26 | 	lg := testutil.Slogger(t)
 27 | 	db := storage.MemDB()
 28 | 	gh := github.New(lg, db, nil, nil)
 29 | 	gh.Testing().LoadTxtar("../testdata/markdown.txt")
 30 | 	gh.Testing().LoadTxtar("../testdata/rsctmp.txt")
 31 | 
 32 | 	dc := docs.New(db)
 33 | 	githubdocs.Sync(lg, dc, gh)
 34 | 
 35 | 	vdb := storage.MemVectorDB(db, lg, "vecs")
 36 | 	embeddocs.Sync(lg, vdb, llm.QuoteEmbedder(), dc)
 37 | 
 38 | 	vdb = storage.MemVectorDB(db, lg, "vecs")
 39 | 	p := New(lg, db, gh, vdb, dc, "postname")
 40 | 	p.EnableProject("rsc/markdown")
 41 | 	p.SetTimeLimit(time.Time{})
 42 | 	p.Run()
 43 | 	checkEdits(t, gh.Testing().Edits(), nil)
 44 | 	gh.Testing().ClearEdits()
 45 | 
 46 | 	p.EnablePosts()
 47 | 	p.Run()
 48 | 	checkEdits(t, gh.Testing().Edits(), map[int64]string{13: post13, 19: post19})
 49 | 	gh.Testing().ClearEdits()
 50 | 
 51 | 	p = New(lg, db, gh, vdb, dc, "postname2")
 52 | 	p.EnableProject("rsc/markdown")
 53 | 	p.SetTimeLimit(time.Time{})
 54 | 	p.EnablePosts()
 55 | 	p.Run()
 56 | 	checkEdits(t, gh.Testing().Edits(), nil)
 57 | 	gh.Testing().ClearEdits()
 58 | 
 59 | 	for i := range 4 {
 60 | 		p := New(lg, db, gh, vdb, dc, "postnameloop."+fmt.Sprint(i))
 61 | 		p.EnableProject("rsc/markdown")
 62 | 		p.SetTimeLimit(time.Time{})
 63 | 		switch i {
 64 | 		case 0:
 65 | 			p.SkipTitlePrefix("feature: ")
 66 | 		case 1:
 67 | 			p.SkipTitleSuffix("for heading")
 68 | 		case 2:
 69 | 			p.SkipBodyContains("For example, this heading")
 70 | 		case 3:
 71 | 			p.SkipBodyContains("For example, this heading")
 72 | 			p.SkipBodyContains("ZZZ")
 73 | 		}
 74 | 		p.EnablePosts()
 75 | 		p.deletePosted()
 76 | 		p.Run()
 77 | 		checkEdits(t, gh.Testing().Edits(), map[int64]string{13: post13})
 78 | 		gh.Testing().ClearEdits()
 79 | 	}
 80 | 
 81 | 	p = New(lg, db, gh, vdb, dc, "postname3")
 82 | 	p.EnableProject("rsc/markdown")
 83 | 	p.SetMinScore(2.0) // impossible
 84 | 	p.SetTimeLimit(time.Time{})
 85 | 	p.EnablePosts()
 86 | 	p.deletePosted()
 87 | 	p.Run()
 88 | 	checkEdits(t, gh.Testing().Edits(), nil)
 89 | 	gh.Testing().ClearEdits()
 90 | 
 91 | 	p = New(lg, db, gh, vdb, dc, "postname4")
 92 | 	p.EnableProject("rsc/markdown")
 93 | 	p.SetMinScore(2.0) // impossible
 94 | 	p.SetTimeLimit(time.Date(2222, 1, 1, 1, 1, 1, 1, time.UTC))
 95 | 	p.EnablePosts()
 96 | 	p.deletePosted()
 97 | 	p.Run()
 98 | 	checkEdits(t, gh.Testing().Edits(), nil)
 99 | 	gh.Testing().ClearEdits()
100 | 
101 | 	p = New(lg, db, gh, vdb, dc, "postname5")
102 | 	p.EnableProject("rsc/markdown")
103 | 	p.SetMinScore(0)   // everything
104 | 	p.SetMaxResults(0) // except none
105 | 	p.SetTimeLimit(time.Time{})
106 | 	p.EnablePosts()
107 | 	p.deletePosted()
108 | 	p.Run()
109 | 	checkEdits(t, gh.Testing().Edits(), nil)
110 | 	gh.Testing().ClearEdits()
111 | 
112 | }
113 | 
114 | func checkEdits(t *testing.T, edits []*github.TestingEdit, want map[int64]string) {
115 | 	t.Helper()
116 | 	for _, e := range edits {
117 | 		if e.Project != "rsc/markdown" {
118 | 			t.Errorf("posted to unexpected project: %v", e)
119 | 			continue
120 | 		}
121 | 		if e.Comment != 0 || e.IssueCommentChanges == nil {
122 | 			t.Errorf("non-post edit: %v", e)
123 | 			continue
124 | 		}
125 | 		w, ok := want[e.Issue]
126 | 		if !ok {
127 | 			t.Errorf("post to unexpected issue: %v", e)
128 | 			continue
129 | 		}
130 | 		delete(want, e.Issue)
131 | 		if strings.TrimSpace(e.IssueCommentChanges.Body) != strings.TrimSpace(w) {
132 | 			t.Errorf("rsc/markdown#%d: wrong post:\n%s", e.Issue,
133 | 				string(diff.Diff("want", []byte(w), "have", []byte(e.IssueCommentChanges.Body))))
134 | 		}
135 | 	}
136 | 	for _, issue := range slices.Sorted(maps.Keys(want)) {
137 | 		t.Errorf("did not see post on rsc/markdown#%d", issue)
138 | 	}
139 | 	if t.Failed() {
140 | 		t.FailNow()
141 | 	}
142 | }
143 | 
144 | var post13 = unQUOT(`**Related Issues**
145 | 
146 |  - [goldmark and markdown diff with h1 inside p #6 (closed)](https://github.com/rsc/markdown/issues/6) <!-- score=0.92657 -->
147 |  - [Support escaped \QUOT|\QUOT in table cells #9 (closed)](https://github.com/rsc/markdown/issues/9) <!-- score=0.91858 -->
148 |  - [markdown: fix markdown printing for inline code #12 (closed)](https://github.com/rsc/markdown/issues/12) <!-- score=0.91325 -->
149 |  - [markdown: emit Info in CodeBlock markdown #18 (closed)](https://github.com/rsc/markdown/issues/18) <!-- score=0.91129 -->
150 |  - [feature: synthesize lowercase anchors for heading #19](https://github.com/rsc/markdown/issues/19) <!-- score=0.90867 -->
151 |  - [Replace newlines with spaces in alt text #4 (closed)](https://github.com/rsc/markdown/issues/4) <!-- score=0.90859 -->
152 |  - [allow capital X in task list items #2 (closed)](https://github.com/rsc/markdown/issues/2) <!-- score=0.90850 -->
153 |  - [build(deps): bump golang.org/x/text from 0.3.6 to 0.3.8 in /rmplay #10](https://github.com/rsc/tmp/issues/10) <!-- score=0.90453 -->
154 |  - [Render reference links in Markdown #14 (closed)](https://github.com/rsc/markdown/issues/14) <!-- score=0.90175 -->
155 |  - [Render reference links in Markdown #15 (closed)](https://github.com/rsc/markdown/issues/15) <!-- score=0.90103 -->
156 | 
157 | <sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>
158 | `)
159 | 
160 | var post19 = unQUOT(`**Related Issues**
161 | 
162 |  - [allow capital X in task list items #2 (closed)](https://github.com/rsc/markdown/issues/2) <!-- score=0.92943 -->
163 |  - [Support escaped \QUOT|\QUOT in table cells #9 (closed)](https://github.com/rsc/markdown/issues/9) <!-- score=0.91994 -->
164 |  - [goldmark and markdown diff with h1 inside p #6 (closed)](https://github.com/rsc/markdown/issues/6) <!-- score=0.91813 -->
165 |  - [Render reference links in Markdown #14 (closed)](https://github.com/rsc/markdown/issues/14) <!-- score=0.91513 -->
166 |  - [Render reference links in Markdown #15 (closed)](https://github.com/rsc/markdown/issues/15) <!-- score=0.91487 -->
167 |  - [Empty column heading not recognized in table #7 (closed)](https://github.com/rsc/markdown/issues/7) <!-- score=0.90874 -->
168 |  - [Correctly render reference links in Markdown #13](https://github.com/rsc/markdown/issues/13) <!-- score=0.90867 -->
169 |  - [markdown: fix markdown printing for inline code #12 (closed)](https://github.com/rsc/markdown/issues/12) <!-- score=0.90795 -->
170 |  - [Replace newlines with spaces in alt text #4 (closed)](https://github.com/rsc/markdown/issues/4) <!-- score=0.90278 -->
171 |  - [build(deps): bump golang.org/x/text from 0.3.6 to 0.3.8 in /rmplay #10](https://github.com/rsc/tmp/issues/10) <!-- score=0.90259 -->
172 | 
173 | <sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>
174 | `)
175 | 
176 | func unQUOT(s string) string { return strings.ReplaceAll(s, "QUOT", "`") }
177 | 


--------------------------------------------------------------------------------
/internal/commentfix/fix_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package commentfix
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"io"
 10 | 	"path/filepath"
 11 | 	"strings"
 12 | 	"testing"
 13 | 	"text/template"
 14 | 	"time"
 15 | 
 16 | 	"golang.org/x/tools/txtar"
 17 | 	"rsc.io/gaby/internal/diff"
 18 | 	"rsc.io/gaby/internal/github"
 19 | 	"rsc.io/gaby/internal/storage"
 20 | 	"rsc.io/gaby/internal/testutil"
 21 | )
 22 | 
 23 | func TestTestdata(t *testing.T) {
 24 | 	files, err := filepath.Glob("testdata/*.txt")
 25 | 	testutil.Check(t, err)
 26 | 	for _, file := range files {
 27 | 		t.Run(filepath.Base(file), func(t *testing.T) {
 28 | 			a, err := txtar.ParseFile(file)
 29 | 			testutil.Check(t, err)
 30 | 			var f Fixer
 31 | 			tmpl, err := new(template.Template).Parse(string(a.Comment))
 32 | 			testutil.Check(t, err)
 33 | 			testutil.Check(t, tmpl.Execute(io.Discard, &f))
 34 | 			for i := 0; i+2 <= len(a.Files); {
 35 | 				in := a.Files[i]
 36 | 				out := a.Files[i+1]
 37 | 				i += 2
 38 | 				name := strings.TrimSuffix(in.Name, ".in")
 39 | 				if name != strings.TrimSuffix(out.Name, ".out") {
 40 | 					t.Fatalf("mismatched file pair: %s and %s", in.Name, out.Name)
 41 | 				}
 42 | 				t.Run(name, func(t *testing.T) {
 43 | 					newBody, fixed := f.Fix(string(in.Data))
 44 | 					if fixed != (newBody != "") {
 45 | 						t.Fatalf("Fix() = %q, %v (len(newBody)=%d but fixed=%v)", newBody, fixed, len(newBody), fixed)
 46 | 					}
 47 | 					if newBody != string(out.Data) {
 48 | 						t.Fatalf("Fix: incorrect output:\n%s", string(diff.Diff("want", []byte(out.Data), "have", []byte(newBody))))
 49 | 					}
 50 | 				})
 51 | 			}
 52 | 		})
 53 | 	}
 54 | }
 55 | 
 56 | func TestPanics(t *testing.T) {
 57 | 	callRecover := func() { recover() }
 58 | 
 59 | 	func() {
 60 | 		defer callRecover()
 61 | 		var f Fixer
 62 | 		f.EnableEdits()
 63 | 		t.Errorf("EnableEdits on zero Fixer did not panic")
 64 | 	}()
 65 | 
 66 | 	func() {
 67 | 		defer callRecover()
 68 | 		var f Fixer
 69 | 		f.EnableProject("abc/xyz")
 70 | 		t.Errorf("EnableProject on zero Fixer did not panic")
 71 | 	}()
 72 | 
 73 | 	func() {
 74 | 		defer callRecover()
 75 | 		var f Fixer
 76 | 		f.Run()
 77 | 		t.Errorf("Run on zero Fixer did not panic")
 78 | 	}()
 79 | }
 80 | 
 81 | func TestErrors(t *testing.T) {
 82 | 	var f Fixer
 83 | 	if err := f.AutoLink(`\`, ""); err == nil {
 84 | 		t.Fatalf("AutoLink succeeded on bad regexp")
 85 | 	}
 86 | 	if err := f.ReplaceText(`\`, ""); err == nil {
 87 | 		t.Fatalf("ReplaceText succeeded on bad regexp")
 88 | 	}
 89 | 	if err := f.ReplaceURL(`\`, ""); err == nil {
 90 | 		t.Fatalf("ReplaceText succeeded on bad regexp")
 91 | 	}
 92 | }
 93 | 
 94 | func TestGitHub(t *testing.T) {
 95 | 	testGH := func() *github.Client {
 96 | 		db := storage.MemDB()
 97 | 		gh := github.New(testutil.Slogger(t), db, nil, nil)
 98 | 		gh.Testing().AddIssue("rsc/tmp", &github.Issue{
 99 | 			Number:    18,
100 | 			Title:     "spellchecking",
101 | 			Body:      "Contexts are cancelled.",
102 | 			CreatedAt: "2024-06-17T20:16:49-04:00",
103 | 			UpdatedAt: "2024-06-17T20:16:49-04:00",
104 | 		})
105 | 		gh.Testing().AddIssue("rsc/tmp", &github.Issue{
106 | 			Number:      19,
107 | 			Title:       "spellchecking",
108 | 			Body:        "Contexts are cancelled.",
109 | 			CreatedAt:   "2024-06-17T20:16:49-04:00",
110 | 			UpdatedAt:   "2024-06-17T20:16:49-04:00",
111 | 			PullRequest: new(struct{}),
112 | 		})
113 | 
114 | 		gh.Testing().AddIssueComment("rsc/tmp", 18, &github.IssueComment{
115 | 			Body:      "No really, contexts are cancelled.",
116 | 			CreatedAt: "2024-06-17T20:16:49-04:00",
117 | 			UpdatedAt: "2024-06-17T20:16:49-04:00",
118 | 		})
119 | 
120 | 		gh.Testing().AddIssueComment("rsc/tmp", 18, &github.IssueComment{
121 | 			Body:      "Completely unrelated.",
122 | 			CreatedAt: "2024-06-17T20:16:49-04:00",
123 | 			UpdatedAt: "2024-06-17T20:16:49-04:00",
124 | 		})
125 | 
126 | 		return gh
127 | 	}
128 | 
129 | 	// Check for comment with too-new cutoff and edits disabled.
130 | 	// Finds nothing but also no-op.
131 | 	gh := testGH()
132 | 	lg, buf := testutil.SlogBuffer()
133 | 	f := New(lg, gh, "fixer1")
134 | 	f.SetStderr(testutil.LogWriter(t))
135 | 	f.EnableProject("rsc/tmp")
136 | 	f.SetTimeLimit(time.Date(2222, 1, 1, 1, 1, 1, 1, time.UTC))
137 | 	f.ReplaceText("cancelled", "canceled")
138 | 	f.Run()
139 | 	// t.Logf("output:\n%s", buf)
140 | 	if bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
141 | 		t.Fatalf("logs mention rewrite of old comment:\n%s", buf.Bytes())
142 | 	}
143 | 
144 | 	// Check again with old enough cutoff.
145 | 	// Finds comment but does not edit, does not advance cursor.
146 | 	f = New(lg, gh, "fixer1")
147 | 	f.SetStderr(testutil.LogWriter(t))
148 | 	f.EnableProject("rsc/tmp")
149 | 	f.SetTimeLimit(time.Time{})
150 | 	f.ReplaceText("cancelled", "canceled")
151 | 	f.Run()
152 | 	// t.Logf("output:\n%s", buf)
153 | 	if !bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
154 | 		t.Fatalf("logs do not mention rewrite of comment:\n%s", buf.Bytes())
155 | 	}
156 | 	if bytes.Contains(buf.Bytes(), []byte("editing github")) {
157 | 		t.Fatalf("logs incorrectly mention editing github:\n%s", buf.Bytes())
158 | 	}
159 | 
160 | 	// Run with too-new cutoff and edits enabled, should make issue not seen again.
161 | 	buf.Truncate(0)
162 | 	f.SetTimeLimit(time.Date(2222, 1, 1, 1, 1, 1, 1, time.UTC))
163 | 	f.EnableEdits()
164 | 	f.Run()
165 | 	// t.Logf("output:\n%s", buf)
166 | 	if bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
167 | 		t.Fatalf("logs incorrectly mention rewrite of comment:\n%s", buf.Bytes())
168 | 	}
169 | 
170 | 	f.SetTimeLimit(time.Time{})
171 | 	f.Run()
172 | 	// t.Logf("output:\n%s", buf)
173 | 	if bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
174 | 		t.Fatalf("logs incorrectly mention rewrite of comment:\n%s", buf.Bytes())
175 | 	}
176 | 
177 | 	// Write comment (now using fixer2 to avoid 'marked as old' in fixer1).
178 | 	lg, buf = testutil.SlogBuffer()
179 | 	f = New(lg, gh, "fixer2")
180 | 	f.SetStderr(testutil.LogWriter(t))
181 | 	f.EnableProject("rsc/tmp")
182 | 	f.ReplaceText("cancelled", "canceled")
183 | 	f.SetTimeLimit(time.Time{})
184 | 	f.EnableEdits()
185 | 	f.Run()
186 | 	// t.Logf("output:\n%s", buf)
187 | 	if !bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
188 | 		t.Fatalf("logs do not mention rewrite of comment:\n%s", buf.Bytes())
189 | 	}
190 | 	if !bytes.Contains(buf.Bytes(), []byte("editing github")) {
191 | 		t.Fatalf("logs do not mention editing github:\n%s", buf.Bytes())
192 | 	}
193 | 	if !bytes.Contains(buf.Bytes(), []byte(`editing github" url=https://api.github.com/repos/rsc/tmp/issues/18`)) {
194 | 		t.Fatalf("logs do not mention editing issue body:\n%s", buf.Bytes())
195 | 	}
196 | 	if bytes.Contains(buf.Bytes(), []byte(`editing github" url=https://api.github.com/repos/rsc/tmp/issues/19`)) {
197 | 		t.Fatalf("logs incorrectly mention editing pull request body:\n%s", buf.Bytes())
198 | 	}
199 | 	if !bytes.Contains(buf.Bytes(), []byte(`editing github" url=https://api.github.com/repos/rsc/tmp/issues/comments/10000000001`)) {
200 | 		t.Fatalf("logs do not mention editing issue comment:\n%s", buf.Bytes())
201 | 	}
202 | 	if bytes.Contains(buf.Bytes(), []byte("ERROR")) {
203 | 		t.Fatalf("editing failed:\n%s", buf.Bytes())
204 | 	}
205 | 
206 | 	// Try again; comment should now be marked old in watcher.
207 | 	lg, buf = testutil.SlogBuffer()
208 | 	f = New(lg, gh, "fixer2")
209 | 	f.SetStderr(testutil.LogWriter(t))
210 | 	f.EnableProject("rsc/tmp")
211 | 	f.ReplaceText("cancelled", "canceled")
212 | 	f.EnableEdits()
213 | 	f.SetTimeLimit(time.Time{})
214 | 	f.Run()
215 | 	// t.Logf("output:\n%s", buf)
216 | 	if bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
217 | 		t.Fatalf("logs incorrectly mention rewrite of comment:\n%s", buf.Bytes())
218 | 	}
219 | 
220 | 	// Check that not enabling the project doesn't edit comments.
221 | 	lg, buf = testutil.SlogBuffer()
222 | 	f = New(lg, gh, "fixer3")
223 | 	f.SetStderr(testutil.LogWriter(t))
224 | 	f.EnableProject("xyz/tmp")
225 | 	f.ReplaceText("cancelled", "canceled")
226 | 	f.EnableEdits()
227 | 	f.SetTimeLimit(time.Time{})
228 | 	f.Run()
229 | 	// t.Logf("output:\n%s", buf)
230 | 	if bytes.Contains(buf.Bytes(), []byte("commentfix rewrite")) {
231 | 		t.Fatalf("logs incorrectly mention rewrite of comment:\n%s", buf.Bytes())
232 | 	}
233 | }
234 | 


--------------------------------------------------------------------------------
/internal/diff/diff.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2022 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package diff
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"sort"
 11 | 	"strings"
 12 | )
 13 | 
 14 | // A pair is a pair of values tracked for both the x and y side of a diff.
 15 | // It is typically a pair of line indexes.
 16 | type pair struct{ x, y int }
 17 | 
 18 | // Diff returns an anchored diff of the two texts old and new
 19 | // in the “unified diff” format. If old and new are identical,
 20 | // Diff returns a nil slice (no output).
 21 | //
 22 | // Unix diff implementations typically look for a diff with
 23 | // the smallest number of lines inserted and removed,
 24 | // which can in the worst case take time quadratic in the
 25 | // number of lines in the texts. As a result, many implementations
 26 | // either can be made to run for a long time or cut off the search
 27 | // after a predetermined amount of work.
 28 | //
 29 | // In contrast, this implementation looks for a diff with the
 30 | // smallest number of “unique” lines inserted and removed,
 31 | // where unique means a line that appears just once in both old and new.
 32 | // We call this an “anchored diff” because the unique lines anchor
 33 | // the chosen matching regions. An anchored diff is usually clearer
 34 | // than a standard diff, because the algorithm does not try to
 35 | // reuse unrelated blank lines or closing braces.
 36 | // The algorithm also guarantees to run in O(n log n) time
 37 | // instead of the standard O(n²) time.
 38 | //
 39 | // Some systems call this approach a “patience diff,” named for
 40 | // the “patience sorting” algorithm, itself named for a solitaire card game.
 41 | // We avoid that name for two reasons. First, the name has been used
 42 | // for a few different variants of the algorithm, so it is imprecise.
 43 | // Second, the name is frequently interpreted as meaning that you have
 44 | // to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
 45 | // when in fact the algorithm is faster than the standard one.
 46 | func Diff(oldName string, old []byte, newName string, new []byte) []byte {
 47 | 	if bytes.Equal(old, new) {
 48 | 		return nil
 49 | 	}
 50 | 	x := lines(old)
 51 | 	y := lines(new)
 52 | 
 53 | 	// Print diff header.
 54 | 	var out bytes.Buffer
 55 | 	fmt.Fprintf(&out, "diff %s %s\n", oldName, newName)
 56 | 	fmt.Fprintf(&out, "--- %s\n", oldName)
 57 | 	fmt.Fprintf(&out, "+++ %s\n", newName)
 58 | 
 59 | 	// Loop over matches to consider,
 60 | 	// expanding each match to include surrounding lines,
 61 | 	// and then printing diff chunks.
 62 | 	// To avoid setup/teardown cases outside the loop,
 63 | 	// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
 64 | 	// in the sequence of matches.
 65 | 	var (
 66 | 		done  pair     // printed up to x[:done.x] and y[:done.y]
 67 | 		chunk pair     // start lines of current chunk
 68 | 		count pair     // number of lines from each side in current chunk
 69 | 		ctext []string // lines for current chunk
 70 | 	)
 71 | 	for _, m := range tgs(x, y) {
 72 | 		if m.x < done.x {
 73 | 			// Already handled scanning forward from earlier match.
 74 | 			continue
 75 | 		}
 76 | 
 77 | 		// Expand matching lines as far as possible,
 78 | 		// establishing that x[start.x:end.x] == y[start.y:end.y].
 79 | 		// Note that on the first (or last) iteration we may (or definitely do)
 80 | 		// have an empty match: start.x==end.x and start.y==end.y.
 81 | 		start := m
 82 | 		for start.x > done.x && start.y > done.y && x[start.x-1] == y[start.y-1] {
 83 | 			start.x--
 84 | 			start.y--
 85 | 		}
 86 | 		end := m
 87 | 		for end.x < len(x) && end.y < len(y) && x[end.x] == y[end.y] {
 88 | 			end.x++
 89 | 			end.y++
 90 | 		}
 91 | 
 92 | 		// Emit the mismatched lines before start into this chunk.
 93 | 		// (No effect on first sentinel iteration, when start = {0,0}.)
 94 | 		for _, s := range x[done.x:start.x] {
 95 | 			ctext = append(ctext, "-"+s)
 96 | 			count.x++
 97 | 		}
 98 | 		for _, s := range y[done.y:start.y] {
 99 | 			ctext = append(ctext, "+"+s)
100 | 			count.y++
101 | 		}
102 | 
103 | 		// If we're not at EOF and have too few common lines,
104 | 		// the chunk includes all the common lines and continues.
105 | 		const C = 3 // number of context lines
106 | 		if (end.x < len(x) || end.y < len(y)) &&
107 | 			(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) {
108 | 			for _, s := range x[start.x:end.x] {
109 | 				ctext = append(ctext, " "+s)
110 | 				count.x++
111 | 				count.y++
112 | 			}
113 | 			done = end
114 | 			continue
115 | 		}
116 | 
117 | 		// End chunk with common lines for context.
118 | 		if len(ctext) > 0 {
119 | 			n := end.x - start.x
120 | 			if n > C {
121 | 				n = C
122 | 			}
123 | 			for _, s := range x[start.x : start.x+n] {
124 | 				ctext = append(ctext, " "+s)
125 | 				count.x++
126 | 				count.y++
127 | 			}
128 | 			done = pair{start.x + n, start.y + n}
129 | 
130 | 			// Format and emit chunk.
131 | 			// Convert line numbers to 1-indexed.
132 | 			// Special case: empty file shows up as 0,0 not 1,0.
133 | 			if count.x > 0 {
134 | 				chunk.x++
135 | 			}
136 | 			if count.y > 0 {
137 | 				chunk.y++
138 | 			}
139 | 			fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y)
140 | 			for _, s := range ctext {
141 | 				out.WriteString(s)
142 | 			}
143 | 			count.x = 0
144 | 			count.y = 0
145 | 			ctext = ctext[:0]
146 | 		}
147 | 
148 | 		// If we reached EOF, we're done.
149 | 		if end.x >= len(x) && end.y >= len(y) {
150 | 			break
151 | 		}
152 | 
153 | 		// Otherwise start a new chunk.
154 | 		chunk = pair{end.x - C, end.y - C}
155 | 		for _, s := range x[chunk.x:end.x] {
156 | 			ctext = append(ctext, " "+s)
157 | 			count.x++
158 | 			count.y++
159 | 		}
160 | 		done = end
161 | 	}
162 | 
163 | 	return out.Bytes()
164 | }
165 | 
166 | // lines returns the lines in the file x, including newlines.
167 | // If the file does not end in a newline, one is supplied
168 | // along with a warning about the missing newline.
169 | func lines(x []byte) []string {
170 | 	l := strings.SplitAfter(string(x), "\n")
171 | 	if l[len(l)-1] == "" {
172 | 		l = l[:len(l)-1]
173 | 	} else {
174 | 		// Treat last line as having a message about the missing newline attached,
175 | 		// using the same text as BSD/GNU diff (including the leading backslash).
176 | 		l[len(l)-1] += "\n\\ No newline at end of file\n"
177 | 	}
178 | 	return l
179 | }
180 | 
181 | // tgs returns the pairs of indexes of the longest common subsequence
182 | // of unique lines in x and y, where a unique line is one that appears
183 | // once in x and once in y.
184 | //
185 | // The longest common subsequence algorithm is as described in
186 | // Thomas G. Szymanski, “A Special Case of the Maximal Common
187 | // Subsequence Problem,” Princeton TR #170 (January 1975),
188 | // available at https://research.swtch.com/tgs170.pdf.
189 | func tgs(x, y []string) []pair {
190 | 	// Count the number of times each string appears in a and b.
191 | 	// We only care about 0, 1, many, counted as 0, -1, -2
192 | 	// for the x side and 0, -4, -8 for the y side.
193 | 	// Using negative numbers now lets us distinguish positive line numbers later.
194 | 	m := make(map[string]int)
195 | 	for _, s := range x {
196 | 		if c := m[s]; c > -2 {
197 | 			m[s] = c - 1
198 | 		}
199 | 	}
200 | 	for _, s := range y {
201 | 		if c := m[s]; c > -8 {
202 | 			m[s] = c - 4
203 | 		}
204 | 	}
205 | 
206 | 	// Now unique strings can be identified by m[s] = -1+-4.
207 | 	//
208 | 	// Gather the indexes of those strings in x and y, building:
209 | 	//	xi[i] = increasing indexes of unique strings in x.
210 | 	//	yi[i] = increasing indexes of unique strings in y.
211 | 	//	inv[i] = index j such that x[xi[i]] = y[yi[j]].
212 | 	var xi, yi, inv []int
213 | 	for i, s := range y {
214 | 		if m[s] == -1+-4 {
215 | 			m[s] = len(yi)
216 | 			yi = append(yi, i)
217 | 		}
218 | 	}
219 | 	for i, s := range x {
220 | 		if j, ok := m[s]; ok && j >= 0 {
221 | 			xi = append(xi, i)
222 | 			inv = append(inv, j)
223 | 		}
224 | 	}
225 | 
226 | 	// Apply Algorithm A from Szymanski's paper.
227 | 	// In those terms, A = J = inv and B = [0, n).
228 | 	// We add sentinel pairs {0,0}, and {len(x),len(y)}
229 | 	// to the returned sequence, to help the processing loop.
230 | 	J := inv
231 | 	n := len(xi)
232 | 	T := make([]int, n)
233 | 	L := make([]int, n)
234 | 	for i := range T {
235 | 		T[i] = n + 1
236 | 	}
237 | 	for i := 0; i < n; i++ {
238 | 		k := sort.Search(n, func(k int) bool {
239 | 			return T[k] >= J[i]
240 | 		})
241 | 		T[k] = J[i]
242 | 		L[i] = k + 1
243 | 	}
244 | 	k := 0
245 | 	for _, v := range L {
246 | 		if k < v {
247 | 			k = v
248 | 		}
249 | 	}
250 | 	seq := make([]pair, 2+k)
251 | 	seq[1+k] = pair{len(x), len(y)} // sentinel at end
252 | 	lastj := n
253 | 	for i := n - 1; i >= 0; i-- {
254 | 		if L[i] == k && J[i] < lastj {
255 | 			seq[k] = pair{xi[i], yi[J[i]]}
256 | 			k--
257 | 		}
258 | 	}
259 | 	seq[0] = pair{0, 0} // sentinel at start
260 | 	return seq
261 | }
262 | 


--------------------------------------------------------------------------------
/internal/storage/mem.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package storage
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"fmt"
 10 | 	"iter"
 11 | 	"log/slog"
 12 | 	"slices"
 13 | 	"sync"
 14 | 
 15 | 	"rsc.io/gaby/internal/llm"
 16 | 	"rsc.io/omap"
 17 | 	"rsc.io/ordered"
 18 | 	"rsc.io/top"
 19 | )
 20 | 
 21 | // A MemLocker is an single-process implementation
 22 | // of the database Lock and Unlock methods,
 23 | // suitable if there is only one process accessing the
 24 | // database at a time.
 25 | type MemLocker struct {
 26 | 	mu    sync.Mutex
 27 | 	locks map[string]*sync.Mutex
 28 | }
 29 | 
 30 | // Lock locks the mutex with the given name.
 31 | func (l *MemLocker) Lock(name string) {
 32 | 	l.mu.Lock()
 33 | 	if l.locks == nil {
 34 | 		l.locks = make(map[string]*sync.Mutex)
 35 | 	}
 36 | 	mu := l.locks[name]
 37 | 	if mu == nil {
 38 | 		mu = new(sync.Mutex)
 39 | 		l.locks[name] = mu
 40 | 	}
 41 | 	l.mu.Unlock()
 42 | 
 43 | 	mu.Lock()
 44 | }
 45 | 
 46 | // Unlock locks the mutex with the given name.
 47 | func (l *MemLocker) Unlock(name string) {
 48 | 	l.mu.Lock()
 49 | 	mu := l.locks[name]
 50 | 	l.mu.Unlock()
 51 | 	if mu == nil {
 52 | 		panic("Unlock of never locked key")
 53 | 	}
 54 | 	mu.Unlock()
 55 | }
 56 | 
 57 | // MemDB returns an in-memory DB implementation.
 58 | func MemDB() DB {
 59 | 	return new(memDB)
 60 | }
 61 | 
 62 | // A memDB is an in-memory DB implementation,.
 63 | type memDB struct {
 64 | 	MemLocker
 65 | 	mu   sync.RWMutex
 66 | 	data omap.Map[string, []byte]
 67 | }
 68 | 
 69 | func (*memDB) Close() {}
 70 | 
 71 | func (*memDB) Panic(msg string, args ...any) {
 72 | 	Panic(msg, args...)
 73 | }
 74 | 
 75 | // Get returns the value associated with the key.
 76 | func (db *memDB) Get(key []byte) (val []byte, ok bool) {
 77 | 	db.mu.RLock()
 78 | 	v, ok := db.data.Get(string(key))
 79 | 	db.mu.RUnlock()
 80 | 	if ok {
 81 | 		v = bytes.Clone(v)
 82 | 	}
 83 | 	return v, ok
 84 | }
 85 | 
 86 | // Scan returns an iterator overall key-value pairs
 87 | // in the range start ≤ key ≤ end.
 88 | func (db *memDB) Scan(start, end []byte) iter.Seq2[[]byte, func() []byte] {
 89 | 	lo := string(start)
 90 | 	hi := string(end)
 91 | 	return func(yield func(key []byte, val func() []byte) bool) {
 92 | 		db.mu.RLock()
 93 | 		locked := true
 94 | 		defer func() {
 95 | 			if locked {
 96 | 				db.mu.RUnlock()
 97 | 			}
 98 | 		}()
 99 | 		for k, v := range db.data.Scan(lo, hi) {
100 | 			key := []byte(k)
101 | 			val := func() []byte { return bytes.Clone(v) }
102 | 			db.mu.RUnlock()
103 | 			locked = false
104 | 			if !yield(key, val) {
105 | 				return
106 | 			}
107 | 			db.mu.RLock()
108 | 			locked = true
109 | 		}
110 | 	}
111 | }
112 | 
113 | // Delete deletes any entry with the given key.
114 | func (db *memDB) Delete(key []byte) {
115 | 	db.mu.Lock()
116 | 	defer db.mu.Unlock()
117 | 
118 | 	db.data.Delete(string(key))
119 | }
120 | 
121 | // DeleteRange deletes all entries with start ≤ key ≤ end.
122 | func (db *memDB) DeleteRange(start, end []byte) {
123 | 	db.mu.Lock()
124 | 	defer db.mu.Unlock()
125 | 
126 | 	db.data.DeleteRange(string(start), string(end))
127 | }
128 | 
129 | // Set sets the value associated with key to val.
130 | func (db *memDB) Set(key, val []byte) {
131 | 	db.mu.Lock()
132 | 	defer db.mu.Unlock()
133 | 
134 | 	db.data.Set(string(key), bytes.Clone(val))
135 | }
136 | 
137 | // Batch returns a new batch.
138 | func (db *memDB) Batch() Batch {
139 | 	return &memBatch{db: db}
140 | }
141 | 
142 | // Flush flushes everything to persistent storage.
143 | // Since this is an in-memory database, the memory is as persistent as it gets.
144 | func (db *memDB) Flush() {
145 | }
146 | 
147 | // A memBatch is a Batch for a memDB.
148 | type memBatch struct {
149 | 	db  *memDB   // underlying database
150 | 	ops []func() // operations to apply
151 | }
152 | 
153 | func (b *memBatch) Set(key, val []byte) {
154 | 	k := string(key)
155 | 	v := bytes.Clone(val)
156 | 	b.ops = append(b.ops, func() { b.db.data.Set(k, v) })
157 | }
158 | 
159 | func (b *memBatch) Delete(key []byte) {
160 | 	k := string(key)
161 | 	b.ops = append(b.ops, func() { b.db.data.Delete(k) })
162 | }
163 | 
164 | func (b *memBatch) DeleteRange(start, end []byte) {
165 | 	s := string(start)
166 | 	e := string(end)
167 | 	b.ops = append(b.ops, func() { b.db.data.DeleteRange(s, e) })
168 | }
169 | 
170 | func (b *memBatch) MaybeApply() bool {
171 | 	return false
172 | }
173 | 
174 | func (b *memBatch) Apply() {
175 | 	b.db.mu.Lock()
176 | 	defer b.db.mu.Unlock()
177 | 
178 | 	for _, op := range b.ops {
179 | 		op()
180 | 	}
181 | }
182 | 
183 | // A memVectorDB is a VectorDB implementing in-memory search
184 | // but storing its vectors in an underlying DB.
185 | type memVectorDB struct {
186 | 	storage   DB
187 | 	slog      *slog.Logger
188 | 	namespace string
189 | 
190 | 	mu    sync.RWMutex
191 | 	cache map[string][]float32 // in-memory cache of all vectors, indexed by id
192 | }
193 | 
194 | // MemVectorDB returns a VectorDB that stores its vectors in db
195 | // but uses a cached, in-memory copy to implement Search using
196 | // a brute-force scan.
197 | //
198 | // The namespace is incorporated into the keys used in the underlying db,
199 | // to allow multiple vector databases to be stored in a single [DB].
200 | //
201 | // When MemVectorDB is called, it reads all previously stored vectors
202 | // from db; after that, changes must be made using the MemVectorDB
203 | // Set method.
204 | //
205 | // A MemVectorDB requires approximately 3kB of memory per stored vector.
206 | //
207 | // The db keys used by a MemVectorDB have the form
208 | //
209 | //	ordered.Encode("llm.Vector", namespace, id)
210 | //
211 | // where id is the document ID passed to Set.
212 | func MemVectorDB(db DB, lg *slog.Logger, namespace string) VectorDB {
213 | 	// NOTE: The worst case score error in a dot product over 768 entries
214 | 	// caused by quantization error of e is approximately 54e,
215 | 	// so quantizing to int16s would only introduce a maximum score
216 | 	// error of 0.00165, which would not change results significantly.
217 | 	// So we could cut the memory per stored vector in half by
218 | 	// quantizing to int16.
219 | 
220 | 	vdb := &memVectorDB{
221 | 		storage:   db,
222 | 		slog:      lg,
223 | 		namespace: namespace,
224 | 		cache:     make(map[string][]float32),
225 | 	}
226 | 
227 | 	// Load all the previously-stored vectors.
228 | 	vdb.cache = make(map[string][]float32)
229 | 	for key, getVal := range vdb.storage.Scan(
230 | 		ordered.Encode("llm.Vector", namespace),
231 | 		ordered.Encode("llm.Vector", namespace, ordered.Inf)) {
232 | 
233 | 		var id string
234 | 		if err := ordered.Decode(key, nil, nil, &id); err != nil {
235 | 			// unreachable except data corruption
236 | 			panic(fmt.Errorf("MemVectorDB decode key=%v: %v", Fmt(key), err))
237 | 		}
238 | 		val := getVal()
239 | 		if len(val)%4 != 0 {
240 | 			// unreachable except data corruption
241 | 			panic(fmt.Errorf("MemVectorDB decode key=%v bad len(val)=%d", Fmt(key), len(val)))
242 | 		}
243 | 		var vec llm.Vector
244 | 		vec.Decode(val)
245 | 		vdb.cache[id] = vec
246 | 	}
247 | 
248 | 	vdb.slog.Info("loaded vectordb", "n", len(vdb.cache), "namespace", namespace)
249 | 	return vdb
250 | }
251 | 
252 | func (db *memVectorDB) Set(id string, vec llm.Vector) {
253 | 	db.storage.Set(ordered.Encode("llm.Vector", db.namespace, id), vec.Encode())
254 | 
255 | 	db.mu.Lock()
256 | 	db.cache[id] = slices.Clone(vec)
257 | 	db.mu.Unlock()
258 | }
259 | 
260 | func (db *memVectorDB) Get(name string) (llm.Vector, bool) {
261 | 	db.mu.RLock()
262 | 	vec, ok := db.cache[name]
263 | 	db.mu.RUnlock()
264 | 	return vec, ok
265 | }
266 | 
267 | func (db *memVectorDB) Search(target llm.Vector, n int) []VectorResult {
268 | 	db.mu.RLock()
269 | 	defer db.mu.RUnlock()
270 | 	best := top.New(n, VectorResult.cmp)
271 | 	for name, vec := range db.cache {
272 | 		if len(vec) != len(target) {
273 | 			continue
274 | 		}
275 | 		best.Add(VectorResult{name, target.Dot(vec)})
276 | 	}
277 | 	return best.Take()
278 | }
279 | 
280 | func (db *memVectorDB) Flush() {
281 | 	db.storage.Flush()
282 | }
283 | 
284 | // memVectorBatch implements VectorBatch for a memVectorDB.
285 | type memVectorBatch struct {
286 | 	db *memVectorDB          // underlying memVectorDB
287 | 	sb Batch                 // batch for underlying DB
288 | 	w  map[string]llm.Vector // vectors to write
289 | }
290 | 
291 | func (db *memVectorDB) Batch() VectorBatch {
292 | 	return &memVectorBatch{db, db.storage.Batch(), make(map[string]llm.Vector)}
293 | }
294 | 
295 | func (b *memVectorBatch) Set(name string, vec llm.Vector) {
296 | 	b.sb.Set(ordered.Encode("llm.Vector", b.db.namespace, name), vec.Encode())
297 | 
298 | 	b.w[name] = slices.Clone(vec)
299 | }
300 | 
301 | func (b *memVectorBatch) MaybeApply() bool {
302 | 	if !b.sb.MaybeApply() {
303 | 		return false
304 | 	}
305 | 	b.Apply()
306 | 	return true
307 | }
308 | 
309 | func (b *memVectorBatch) Apply() {
310 | 	b.sb.Apply()
311 | 
312 | 	b.db.mu.Lock()
313 | 	defer b.db.mu.Unlock()
314 | 
315 | 	for name, vec := range b.w {
316 | 		b.db.cache[name] = vec
317 | 	}
318 | 	clear(b.w)
319 | }
320 | 


--------------------------------------------------------------------------------
/internal/github/data.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package github
  6 | 
  7 | import (
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 	"iter"
 11 | 	"math"
 12 | 	"strconv"
 13 | 	"strings"
 14 | 
 15 | 	"rsc.io/gaby/internal/storage"
 16 | 	"rsc.io/gaby/internal/storage/timed"
 17 | 	"rsc.io/ordered"
 18 | )
 19 | 
 20 | // LookupIssueURL looks up an issue by URL,
 21 | // only consulting the database (not actual GitHub).
 22 | func (c *Client) LookupIssueURL(url string) (*Issue, error) {
 23 | 	bad := func() (*Issue, error) {
 24 | 		return nil, fmt.Errorf("not a github URL: %q", url)
 25 | 	}
 26 | 	proj, ok := strings.CutPrefix(url, "https://github.com/")
 27 | 	if !ok {
 28 | 		return bad()
 29 | 	}
 30 | 	i := strings.LastIndex(proj, "/issues/")
 31 | 	if i < 0 {
 32 | 		return bad()
 33 | 	}
 34 | 	proj, num := proj[:i], proj[i+len("/issues/"):]
 35 | 	n, err := strconv.ParseInt(num, 10, 64)
 36 | 	if err != nil || n <= 0 {
 37 | 		return bad()
 38 | 	}
 39 | 
 40 | 	for e := range c.Events(proj, n, n) {
 41 | 		if e.API == "/issues" {
 42 | 			return e.Typed.(*Issue), nil
 43 | 		}
 44 | 	}
 45 | 	return nil, fmt.Errorf("%s#%d not in database", proj, n)
 46 | }
 47 | 
 48 | // An Event is a single GitHub issue event stored in the database.
 49 | type Event struct {
 50 | 	DBTime  timed.DBTime // when event was last written
 51 | 	Project string       // project ("golang/go")
 52 | 	Issue   int64        // issue number
 53 | 	API     string       // API endpoint for event: "/issues", "/issues/comments", or "/issues/events"
 54 | 	ID      int64        // ID of event; each API has a different ID space. (Project, Issue, API, ID) is assumed unique
 55 | 	JSON    []byte       // JSON for the event data
 56 | 	Typed   any          // Typed unmarshaling of the event data, of type *Issue, *IssueComment, or *IssueEvent
 57 | }
 58 | 
 59 | // Events returns an iterator over issue events for the given project,
 60 | // limited to issues in the range issueMin ≤ issue ≤ issueMax.
 61 | // If issueMax < 0, there is no upper limit.
 62 | // The events are iterated over in (Project, Issue, API, ID) order,
 63 | // so "/issues" events come first, then "/issues/comments", then "/issues/events".
 64 | // Within a specific API, the events are ordered by increasing ID,
 65 | // which corresponds to increasing event time on GitHub.
 66 | func (c *Client) Events(project string, issueMin, issueMax int64) iter.Seq[*Event] {
 67 | 	return func(yield func(*Event) bool) {
 68 | 		start := o(project, issueMin)
 69 | 		if issueMax < 0 {
 70 | 			issueMax = math.MaxInt64
 71 | 		}
 72 | 		end := o(project, issueMax, ordered.Inf)
 73 | 		for t := range timed.Scan(c.db, "githubdl.Event", start, end) {
 74 | 			if !yield(c.decodeEvent(t)) {
 75 | 				return
 76 | 			}
 77 | 		}
 78 | 	}
 79 | }
 80 | 
 81 | // EventsAfter returns an iterator over events in the given project after DBTime t,
 82 | // which should be e.DBTime from the most recent processed event.
 83 | // The events are iterated over in DBTime order, so the DBTime of the last
 84 | // successfully processed event can be used in a future call to EventsAfter.
 85 | // If project is the empty string, then events from all projects are returned.
 86 | func (c *Client) EventsAfter(t timed.DBTime, project string) iter.Seq[*Event] {
 87 | 	filter := func(key []byte) bool {
 88 | 		if project == "" {
 89 | 			return true
 90 | 		}
 91 | 		var p string
 92 | 		if _, err := ordered.DecodePrefix(key, &p); err != nil {
 93 | 			c.db.Panic("github EventsAfter decode", "key", storage.Fmt(key), "err", err)
 94 | 		}
 95 | 		return p == project
 96 | 	}
 97 | 
 98 | 	return func(yield func(*Event) bool) {
 99 | 		for e := range timed.ScanAfter(c.db, "githubdl.Event", t, filter) {
100 | 			if !yield(c.decodeEvent(e)) {
101 | 				return
102 | 			}
103 | 		}
104 | 	}
105 | }
106 | 
107 | // decodeEvent decodes the key, val pair into an Event.
108 | // It calls c.db.Panic for malformed data.
109 | func (c *Client) decodeEvent(t *timed.Entry) *Event {
110 | 	var e Event
111 | 	e.DBTime = t.ModTime
112 | 	if err := ordered.Decode(t.Key, &e.Project, &e.Issue, &e.API, &e.ID); err != nil {
113 | 		c.db.Panic("github event decode", "key", storage.Fmt(t.Key), "err", err)
114 | 	}
115 | 
116 | 	var js ordered.Raw
117 | 	if err := ordered.Decode(t.Val, &js); err != nil {
118 | 		c.db.Panic("github event val decode", "key", storage.Fmt(t.Key), "val", storage.Fmt(t.Val), "err", err)
119 | 	}
120 | 	e.JSON = js
121 | 	switch e.API {
122 | 	default:
123 | 		c.db.Panic("github event invalid API", "api", e.API)
124 | 	case "/issues":
125 | 		e.Typed = new(Issue)
126 | 	case "/issues/comments":
127 | 		e.Typed = new(IssueComment)
128 | 	case "/issues/events":
129 | 		e.Typed = new(IssueEvent)
130 | 	}
131 | 	if err := json.Unmarshal(js, e.Typed); err != nil {
132 | 		c.db.Panic("github event json", "js", string(js), "err", err)
133 | 	}
134 | 	return &e
135 | }
136 | 
137 | // EventWatcher returns a new [storage.Watcher] with the given name.
138 | // It picks up where any previous Watcher of the same name left off.
139 | func (c *Client) EventWatcher(name string) *timed.Watcher[*Event] {
140 | 	return timed.NewWatcher(c.db, name, "githubdl.Event", c.decodeEvent)
141 | }
142 | 
143 | // IssueEvent is the GitHub JSON structure for an issue metadata event.
144 | type IssueEvent struct {
145 | 	// NOTE: Issue field is not present when downloading for a specific issue,
146 | 	// only in the master feed for the whole repo. So do not add it here.
147 | 	ID         int64
148 | 	URL        string
149 | 	Actor      User      `json:"actor"`
150 | 	Event      string    `json:"event"`
151 | 	Labels     []Label   `json:"labels"`
152 | 	LockReason string    `json:"lock_reason"`
153 | 	CreatedAt  string    `json:"created_at"`
154 | 	CommitID   string    `json:"commit_id"`
155 | 	Assigner   User      `json:"assigner"`
156 | 	Assignees  []User    `json:"assignees"`
157 | 	Milestone  Milestone `json:"milestone"`
158 | 	Rename     Rename    `json:"rename"`
159 | }
160 | 
161 | // A User represents a user or organization account in GitHub JSON.
162 | type User struct {
163 | 	Login string
164 | }
165 | 
166 | // A Label represents a project issue tracker label in GitHub JSON.
167 | type Label struct {
168 | 	Name string
169 | }
170 | 
171 | // A Milestone represents a project issue milestone in GitHub JSON.
172 | type Milestone struct {
173 | 	Title string
174 | }
175 | 
176 | // A Rename describes an issue title renaming in GitHub JSON.
177 | type Rename struct {
178 | 	From string
179 | 	To   string
180 | }
181 | 
182 | func urlToProject(u string) string {
183 | 	u, ok := strings.CutPrefix(u, "https://api.github.com/repos/")
184 | 	if !ok {
185 | 		return ""
186 | 	}
187 | 	i := strings.Index(u, "/")
188 | 	if i < 0 {
189 | 		return ""
190 | 	}
191 | 	j := strings.Index(u[i+1:], "/")
192 | 	if j < 0 {
193 | 		return ""
194 | 	}
195 | 	return u[:i+1+j]
196 | }
197 | 
198 | func baseToInt64(u string) int64 {
199 | 	i, err := strconv.ParseInt(u[strings.LastIndex(u, "/")+1:], 10, 64)
200 | 	if i <= 0 || err != nil {
201 | 		return 0
202 | 	}
203 | 	return i
204 | }
205 | 
206 | // IssueComment is the GitHub JSON structure for an issue comment event.
207 | type IssueComment struct {
208 | 	URL       string `json:"url"`
209 | 	IssueURL  string `json:"issue_url"`
210 | 	HTMLURL   string `json:"html_url"`
211 | 	User      User   `json:"user"`
212 | 	CreatedAt string `json:"created_at"`
213 | 	UpdatedAt string `json:"updated_at"`
214 | 	Body      string `json:"body"`
215 | }
216 | 
217 | // Project returns the issue comment's GitHub project (for example, "golang/go").
218 | func (x *IssueComment) Project() string {
219 | 	return urlToProject(x.URL)
220 | }
221 | 
222 | // Issue returns the issue comment's issue number.
223 | func (x *IssueComment) Issue() int64 {
224 | 	u, _, _ := strings.Cut(x.HTMLURL, "#")
225 | 	return baseToInt64(u)
226 | }
227 | 
228 | // CommentID returns the issue comment's numeric ID.
229 | // The ID appears to be unique across all comments on GitHub,
230 | // but we only assume it is unique within a single issue.
231 | func (x *IssueComment) CommentID() int64 {
232 | 	return baseToInt64(x.URL)
233 | }
234 | 
235 | // Issue is the GitHub JSON structure for an issue creation event.
236 | type Issue struct {
237 | 	URL              string    `json:"url"`
238 | 	HTMLURL          string    `json:"html_url"`
239 | 	Number           int64     `json:"number"`
240 | 	User             User      `json:"user"`
241 | 	Title            string    `json:"title"`
242 | 	CreatedAt        string    `json:"created_at"`
243 | 	UpdatedAt        string    `json:"updated_at"`
244 | 	ClosedAt         string    `json:"closed_at"`
245 | 	Body             string    `json:"body"`
246 | 	Assignees        []User    `json:"assignees"`
247 | 	Milestone        Milestone `json:"milestone"`
248 | 	State            string    `json:"state"`
249 | 	PullRequest      *struct{} `json:"pull_request"`
250 | 	Locked           bool
251 | 	ActiveLockReason string  `json:"active_lock_reason"`
252 | 	Labels           []Label `json:"labels"`
253 | }
254 | 
255 | // Project returns the issue's GitHub project (for example, "golang/go").
256 | func (x *Issue) Project() string {
257 | 	return urlToProject(x.URL)
258 | }
259 | 


--------------------------------------------------------------------------------
/internal/httprr/rr_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package httprr
  6 | 
  7 | import (
  8 | 	"errors"
  9 | 	"io"
 10 | 	"net/http"
 11 | 	"net/http/httptest"
 12 | 	"os"
 13 | 	"strings"
 14 | 	"testing"
 15 | 	"testing/iotest"
 16 | )
 17 | 
 18 | func handler(w http.ResponseWriter, r *http.Request) {
 19 | 	if strings.HasSuffix(r.URL.Path, "/redirect") {
 20 | 		http.Error(w, "redirect me!", 304)
 21 | 		return
 22 | 	}
 23 | 	if r.Method == "GET" {
 24 | 		if r.Header.Get("Secret") != "key" {
 25 | 			http.Error(w, "missing secret", 666)
 26 | 			return
 27 | 		}
 28 | 	}
 29 | 	if r.Method == "POST" {
 30 | 		data, err := io.ReadAll(r.Body)
 31 | 		if err != nil {
 32 | 			panic(err)
 33 | 		}
 34 | 		if !strings.Contains(string(data), "my Secret") {
 35 | 			http.Error(w, "missing body secret", 667)
 36 | 			return
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | func always555(w http.ResponseWriter, r *http.Request) {
 42 | 	http.Error(w, "should not be making HTTP requests", 555)
 43 | }
 44 | 
 45 | func dropPort(r *http.Request) error {
 46 | 	if r.URL.Port() != "" {
 47 | 		r.URL.Host = r.URL.Host[:strings.LastIndex(r.URL.Host, ":")]
 48 | 		r.Host = r.Host[:strings.LastIndex(r.Host, ":")]
 49 | 	}
 50 | 	return nil
 51 | }
 52 | 
 53 | func dropSecretHeader(r *http.Request) error {
 54 | 	r.Header.Del("Secret")
 55 | 	return nil
 56 | }
 57 | 
 58 | func hideSecretBody(r *http.Request) error {
 59 | 	if r.Body != nil {
 60 | 		body := r.Body.(*Body)
 61 | 		body.Data = []byte("redacted")
 62 | 	}
 63 | 	return nil
 64 | }
 65 | 
 66 | func TestRecordReplay(t *testing.T) {
 67 | 	dir := t.TempDir()
 68 | 	file := dir + "/rr"
 69 | 
 70 | 	// 4 passes:
 71 | 	//	0: create
 72 | 	//	1: open
 73 | 	//	2: Open with -httprecord="r+"
 74 | 	//	3: Open with -httprecord=""
 75 | 	for pass := range 4 {
 76 | 		start := open
 77 | 		h := always555
 78 | 		*record = ""
 79 | 		switch pass {
 80 | 		case 0:
 81 | 			start = create
 82 | 			h = handler
 83 | 		case 2:
 84 | 			start = Open
 85 | 			*record = "r+"
 86 | 			h = handler
 87 | 		case 3:
 88 | 			start = Open
 89 | 		}
 90 | 		rr, err := start(file, http.DefaultTransport)
 91 | 		if err != nil {
 92 | 			t.Fatal(err)
 93 | 		}
 94 | 		if rr.Recording() {
 95 | 			t.Log("RECORDING")
 96 | 		} else {
 97 | 			t.Log("REPLAYING")
 98 | 		}
 99 | 		rr.Scrub(dropPort, dropSecretHeader)
100 | 		rr.Scrub(hideSecretBody)
101 | 
102 | 		mustNewRequest := func(method, url string, body io.Reader) *http.Request {
103 | 			req, err := http.NewRequest(method, url, body)
104 | 			if err != nil {
105 | 				t.Helper()
106 | 				t.Fatal(err)
107 | 			}
108 | 			return req
109 | 		}
110 | 
111 | 		mustDo := func(req *http.Request, status int) {
112 | 			resp, err := rr.Client().Do(req)
113 | 			if err != nil {
114 | 				t.Helper()
115 | 				t.Fatal(err)
116 | 			}
117 | 			body, _ := io.ReadAll(resp.Body)
118 | 			resp.Body.Close()
119 | 			if resp.StatusCode != status {
120 | 				t.Helper()
121 | 				t.Fatalf("%v: %s\n%s", req.URL, resp.Status, body)
122 | 			}
123 | 		}
124 | 
125 | 		srv := httptest.NewServer(http.HandlerFunc(h))
126 | 		defer srv.Close()
127 | 
128 | 		req := mustNewRequest("GET", srv.URL+"/myrequest", nil)
129 | 		req.Header.Set("Secret", "key")
130 | 		mustDo(req, 200)
131 | 
132 | 		req = mustNewRequest("POST", srv.URL+"/myrequest", strings.NewReader("my Secret"))
133 | 		mustDo(req, 200)
134 | 
135 | 		req = mustNewRequest("GET", srv.URL+"/redirect", nil)
136 | 		mustDo(req, 304)
137 | 
138 | 		if !rr.Recording() {
139 | 			req = mustNewRequest("GET", srv.URL+"/uncached", nil)
140 | 			resp, err := rr.Client().Do(req)
141 | 			if err == nil {
142 | 				body, _ := io.ReadAll(resp.Body)
143 | 				t.Fatalf("%v: %s\n%s", req.URL, resp.Status, body)
144 | 			}
145 | 		}
146 | 
147 | 		if err := rr.Close(); err != nil {
148 | 			t.Fatal(err)
149 | 		}
150 | 	}
151 | 
152 | 	data, err := os.ReadFile(file)
153 | 	if err != nil {
154 | 		t.Fatal(err)
155 | 	}
156 | 	if strings.Contains(string(data), "Secret") {
157 | 		t.Fatalf("rr file contains Secret:\n%s", data)
158 | 	}
159 | }
160 | 
161 | var badResponseTrace = []byte("httprr trace v1\n" +
162 | 	"92 75\n" +
163 | 	"GET http://127.0.0.1/myrequest HTTP/1.1\r\n" +
164 | 	"Host: 127.0.0.1\r\n" +
165 | 	"User-Agent: Go-http-client/1.1\r\n" +
166 | 	"\r\n" +
167 | 	"HZZP/1.1 200 OK\r\n" +
168 | 	"Date: Wed, 12 Jun 2024 13:55:02 GMT\r\n" +
169 | 	"Content-Length: 0\r\n" +
170 | 	"\r\n")
171 | 
172 | func TestErrors(t *testing.T) {
173 | 	// -httprecord regexp parsing
174 | 	*record = "+"
175 | 	if _, err := Open(os.DevNull, nil); err == nil || !strings.Contains(err.Error(), "invalid -httprecord flag") {
176 | 		t.Errorf("did not diagnose bad -httprecord: err = %v", err)
177 | 	}
178 | 	*record = ""
179 | 
180 | 	// invalid httprr trace
181 | 	if _, err := Open(os.DevNull, nil); err == nil || !strings.Contains(err.Error(), "not an httprr trace") {
182 | 		t.Errorf("did not diagnose invalid httprr trace: err = %v", err)
183 | 	}
184 | 
185 | 	// corrupt httprr trace
186 | 	dir := t.TempDir()
187 | 	os.WriteFile(dir+"/rr", []byte("httprr trace v1\ngarbage\n"), 0666)
188 | 	if _, err := Open(dir+"/rr", nil); err == nil || !strings.Contains(err.Error(), "corrupt httprr trace") {
189 | 		t.Errorf("did not diagnose invalid httprr trace: err = %v", err)
190 | 	}
191 | 
192 | 	// os.Create error creating trace
193 | 	if _, err := create("invalid\x00file", nil); err == nil {
194 | 		t.Errorf("did not report failure from os.Create: err = %v", err)
195 | 	}
196 | 
197 | 	// os.ReadAll error reading trace
198 | 	if _, err := open("nonexistent", nil); err == nil {
199 | 		t.Errorf("did not report failure from os.ReadFile: err = %v", err)
200 | 	}
201 | 
202 | 	// error reading body
203 | 	rr, err := create(os.DevNull, nil)
204 | 	if err != nil {
205 | 		t.Fatal(err)
206 | 	}
207 | 	if _, err := rr.Client().Post("http://127.0.0.1/nonexist", "x/error", iotest.ErrReader(errors.New("MY ERROR"))); err == nil || !strings.Contains(err.Error(), "MY ERROR") {
208 | 		t.Errorf("did not report failure from io.ReadAll(body): err = %v", err)
209 | 	}
210 | 
211 | 	// error during scrub
212 | 	rr.Scrub(func(*http.Request) error { return errors.New("SCRUB ERROR") })
213 | 	if _, err := rr.Client().Get("http://127.0.0.1/nonexist"); err == nil || !strings.Contains(err.Error(), "SCRUB ERROR") {
214 | 		t.Errorf("did not report failure from scrub: err = %v", err)
215 | 	}
216 | 	rr.Close()
217 | 
218 | 	// error during rkey.WriteProxy
219 | 	rr, err = create(os.DevNull, nil)
220 | 	if err != nil {
221 | 		t.Fatal(err)
222 | 	}
223 | 	rr.Scrub(func(req *http.Request) error {
224 | 		req.URL = nil
225 | 		req.Host = ""
226 | 		return nil
227 | 	})
228 | 	if _, err := rr.Client().Get("http://127.0.0.1/nonexist"); err == nil || !strings.Contains(err.Error(), "no Host or URL set") {
229 | 		t.Errorf("did not report failure from rkey.WriteProxy: err = %v", err)
230 | 	}
231 | 	rr.Close()
232 | 
233 | 	// error during resp.Write
234 | 	rr, err = create(os.DevNull, badRespTransport{})
235 | 	if err != nil {
236 | 		t.Fatal(err)
237 | 	}
238 | 	if _, err := rr.Client().Get("http://127.0.0.1/nonexist"); err == nil || !strings.Contains(err.Error(), "TRANSPORT ERROR") {
239 | 		t.Errorf("did not report failure from resp.Write: err = %v", err)
240 | 	}
241 | 	rr.Close()
242 | 
243 | 	// error during Write logging request
244 | 	srv := httptest.NewServer(http.HandlerFunc(always555))
245 | 	defer srv.Close()
246 | 	rr, err = create(os.DevNull, http.DefaultTransport)
247 | 	if err != nil {
248 | 		t.Fatal(err)
249 | 	}
250 | 	rr.Scrub(dropPort)
251 | 	rr.record.Close() // cause write error
252 | 	if _, err := rr.Client().Get(srv.URL + "/redirect"); err == nil || !strings.Contains(err.Error(), "file already closed") {
253 | 		t.Errorf("did not report failure from record write: err = %v", err)
254 | 	}
255 | 	rr.broken = errors.New("BROKEN ERROR")
256 | 	if _, err := rr.Client().Get(srv.URL + "/redirect"); err == nil || !strings.Contains(err.Error(), "BROKEN ERROR") {
257 | 		t.Errorf("did not report previous write failure: err = %v", err)
258 | 	}
259 | 	if err := rr.Close(); err == nil || !strings.Contains(err.Error(), "BROKEN ERROR") {
260 | 		t.Errorf("did not report write failure during close: err = %v", err)
261 | 	}
262 | 
263 | 	// error during RoundTrip
264 | 	rr, err = create(os.DevNull, errTransport{errors.New("TRANSPORT ERROR")})
265 | 	if err != nil {
266 | 		t.Fatal(err)
267 | 	}
268 | 	if _, err := rr.Client().Get(srv.URL); err == nil || !strings.Contains(err.Error(), "TRANSPORT ERROR") {
269 | 		t.Errorf("did not report failure from transport: err = %v", err)
270 | 	}
271 | 
272 | 	// error during http.ReadResponse: trace is structurally okay but has malformed response inside
273 | 	if err := os.WriteFile(dir+"/rr", badResponseTrace, 0666); err != nil {
274 | 		t.Fatal(err)
275 | 	}
276 | 	rr, err = Open(dir+"/rr", nil)
277 | 	if err != nil {
278 | 		t.Fatal(err)
279 | 	}
280 | 	if _, err := rr.Client().Get("http://127.0.0.1/myrequest"); err == nil || !strings.Contains(err.Error(), "corrupt httprr trace:") {
281 | 		t.Errorf("did not diagnose invalid httprr trace: err = %v", err)
282 | 	}
283 | }
284 | 
285 | type errTransport struct{ err error }
286 | 
287 | func (e errTransport) RoundTrip(req *http.Request) (*http.Response, error) {
288 | 	return nil, e.err
289 | }
290 | 
291 | type badRespTransport struct{}
292 | 
293 | func (badRespTransport) RoundTrip(req *http.Request) (*http.Response, error) {
294 | 	resp := new(http.Response)
295 | 	resp.Body = io.NopCloser(iotest.ErrReader(errors.New("TRANSPORT ERROR")))
296 | 	return resp, nil
297 | }
298 | 


--------------------------------------------------------------------------------
/internal/httprr/rr.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package httprr implements HTTP record and replay, mainly for use in tests.
  6 | //
  7 | // [Open] creates a new [RecordReplay]. Whether it is recording or replaying
  8 | // is controlled by the -httprecord flag, which is defined by this package
  9 | // only in test programs (built by “go test”).
 10 | // See the [Open] documentation for more details.
 11 | package httprr
 12 | 
 13 | import (
 14 | 	"bufio"
 15 | 	"bytes"
 16 | 	"cmp"
 17 | 	"context"
 18 | 	"flag"
 19 | 	"fmt"
 20 | 	"io"
 21 | 	"net/http"
 22 | 	"os"
 23 | 	"regexp"
 24 | 	"strconv"
 25 | 	"strings"
 26 | 	"sync"
 27 | 	"testing"
 28 | )
 29 | 
 30 | var record = new(string)
 31 | 
 32 | func init() {
 33 | 	if testing.Testing() {
 34 | 		record = flag.String("httprecord", "", "re-record traces for files matching `regexp`")
 35 | 	}
 36 | }
 37 | 
 38 | // A RecordReplay is an [http.RoundTripper] that can operate in two modes: record and replay.
 39 | //
 40 | // In record mode, the RecordReplay invokes another RoundTripper
 41 | // and logs the (request, response) pairs to a file.
 42 | //
 43 | // In replay mode, the RecordReplay responds to requests by finding
 44 | // an identical request in the log and sending the logged response.
 45 | type RecordReplay struct {
 46 | 	file string
 47 | 	real http.RoundTripper
 48 | 
 49 | 	mu     sync.Mutex
 50 | 	broken error
 51 | 	record *os.File
 52 | 	replay map[string]string
 53 | 	scrub  []func(*http.Request) error
 54 | }
 55 | 
 56 | // Scrub adds new scrubbing functions to rr.
 57 | //
 58 | // Before using a request as a lookup key or saving it in the record/replay log,
 59 | // the RecordReplay calls each scrub function, in the order they were registered,
 60 | // to canonicalize non-deterministic parts of the request and remove secrets.
 61 | // Scrubbing only applies to a copy of the request used in the record/replay log;
 62 | // the unmodified original request is sent to the actual server in recording mode.
 63 | // A scrub function can assume that if req.Body is not nil, then it has type [*Body].
 64 | //
 65 | // Calling Scrub adds to the list of registered scrubbing functions;
 66 | // it does not replace those registered by earlier calls.
 67 | func (rr *RecordReplay) Scrub(scrubs ...func(req *http.Request) error) {
 68 | 	rr.scrub = append(rr.scrub, scrubs...)
 69 | }
 70 | 
 71 | // Recording reports whether the rr is in recording mode.
 72 | func (rr *RecordReplay) Recording() bool {
 73 | 	return rr.record != nil
 74 | }
 75 | 
 76 | // Open opens a new record/replay log in the named file and
 77 | // returns a [RecordReplay] backed by that file.
 78 | //
 79 | // By default Open expects the file to exist and contain a
 80 | // previously-recorded log of (request, response) pairs,
 81 | // which [RecordReplay.RoundTrip] consults to prepare its responses.
 82 | //
 83 | // If the command-line flag -httprecord is set to a non-empty
 84 | // regular expression that matches file, then Open creates
 85 | // the file as a new log. In that mode, [RecordReplay.RoundTrip]
 86 | // makes actual HTTP requests using rt but then logs the requests and
 87 | // responses to the file for replaying in a future run.
 88 | func Open(file string, rt http.RoundTripper) (*RecordReplay, error) {
 89 | 	if *record != "" {
 90 | 		re, err := regexp.Compile(*record)
 91 | 		if err != nil {
 92 | 			return nil, fmt.Errorf("invalid -httprecord flag: %v", err)
 93 | 		}
 94 | 		if re.MatchString(file) {
 95 | 			return create(file, rt)
 96 | 		}
 97 | 	}
 98 | 	return open(file, rt)
 99 | }
100 | 
101 | // creates creates a new record-mode RecordReplay in the file.
102 | // TODO maybe export
103 | func create(file string, rt http.RoundTripper) (*RecordReplay, error) {
104 | 	f, err := os.Create(file)
105 | 	if err != nil {
106 | 		return nil, err
107 | 	}
108 | 	if _, err := fmt.Fprintf(f, "httprr trace v1\n"); err != nil {
109 | 		// unreachable unless write error immediately after os.Create
110 | 		f.Close()
111 | 		return nil, err
112 | 	}
113 | 	rr := &RecordReplay{
114 | 		file:   file,
115 | 		real:   rt,
116 | 		record: f,
117 | 	}
118 | 	return rr, nil
119 | }
120 | 
121 | // open opens a replay-mode RecordReplay using the data in the file.
122 | func open(file string, rt http.RoundTripper) (*RecordReplay, error) {
123 | 	// Note: To handle larger traces without storing entirely in memory,
124 | 	// could instead read the file incrementally, storing a map[hash]offsets
125 | 	// and then reread the relevant part of the file during RoundTrip.
126 | 
127 | 	bdata, err := os.ReadFile(file)
128 | 	if err != nil {
129 | 		return nil, err
130 | 	}
131 | 	data := string(bdata)
132 | 	line, data, ok := strings.Cut(data, "\n")
133 | 	if !ok || line != "httprr trace v1" {
134 | 		return nil, fmt.Errorf("read %s: not an httprr trace", file)
135 | 	}
136 | 	replay := make(map[string]string)
137 | 	for data != "" {
138 | 		line, data, ok = strings.Cut(data, "\n")
139 | 		f1, f2, _ := strings.Cut(line, " ")
140 | 		n1, err1 := strconv.Atoi(f1)
141 | 		n2, err2 := strconv.Atoi(f2)
142 | 		if !ok || err1 != nil || err2 != nil || n1 > len(data) || n2 > len(data[n1:]) {
143 | 			return nil, fmt.Errorf("read %s: corrupt httprr trace", file)
144 | 		}
145 | 		var req, resp string
146 | 		req, resp, data = data[:n1], data[n1:n1+n2], data[n1+n2:]
147 | 		replay[req] = resp
148 | 	}
149 | 
150 | 	rr := &RecordReplay{
151 | 		file:   file,
152 | 		real:   rt,
153 | 		replay: replay,
154 | 	}
155 | 	return rr, nil
156 | }
157 | 
158 | // Client returns an http.Client using rr as its transport.
159 | // It is a shorthand for:
160 | //
161 | //	return &http.Client{Transport: rr}
162 | //
163 | // For more complicated uses, use rr or the [RecordReplay.RoundTrip] method directly.
164 | func (rr *RecordReplay) Client() *http.Client {
165 | 	return &http.Client{Transport: rr}
166 | }
167 | 
168 | // A Body is an io.ReadCloser used as an HTTP request body.
169 | // In a Scrubber, if req.Body != nil, then req.Body is guaranteed
170 | // to have type *Body, making it easy to access the body to change it.
171 | type Body struct {
172 | 	Data       []byte
173 | 	ReadOffset int
174 | }
175 | 
176 | // Read reads from the body, implementing io.Reader.
177 | func (b *Body) Read(p []byte) (int, error) {
178 | 	n := copy(p, b.Data[b.ReadOffset:])
179 | 	if n == 0 {
180 | 		return 0, io.EOF
181 | 	}
182 | 	b.ReadOffset += n
183 | 	return n, nil
184 | }
185 | 
186 | // Close is a no-op, implementing io.Closer.
187 | func (b *Body) Close() error {
188 | 	return nil
189 | }
190 | 
191 | // RoundTrip implements [http.RoundTripper].
192 | //
193 | // If rr has been opened in record mode, RoundTrip passes the requests on to
194 | // the RoundTripper specified in the call to [Open] and then logs the
195 | // (request, response) pair to the underlying file.
196 | //
197 | // If rr has been opened in replay mode, RoundTrip looks up the request in the log
198 | // and then responds with the previously logged response.
199 | // If the log does not contain req, RoundTrip returns an error.
200 | func (rr *RecordReplay) RoundTrip(req *http.Request) (*http.Response, error) {
201 | 	// rkey is the scrubbed request used as a lookup key.
202 | 	rkey := req.Clone(context.Background())
203 | 	if req.Body != nil {
204 | 		body, err := io.ReadAll(req.Body)
205 | 		req.Body.Close()
206 | 		if err != nil {
207 | 			return nil, err
208 | 		}
209 | 		req.Body = &Body{Data: body}
210 | 		rkey.Body = &Body{Data: bytes.Clone(body)}
211 | 		rkey.ContentLength = -1
212 | 	}
213 | 
214 | 	if len(rr.scrub) > 0 {
215 | 		// Canonicalize and scrub body.
216 | 		for _, scrub := range rr.scrub {
217 | 			if err := scrub(rkey); err != nil {
218 | 				return nil, err
219 | 			}
220 | 		}
221 | 		if rkey.Body != nil {
222 | 			rkey.ContentLength = int64(len(rkey.Body.(*Body).Data))
223 | 		}
224 | 	}
225 | 
226 | 	// Use WriteProxy instead of Write to preserve the URL scheme.
227 | 	var bkey strings.Builder
228 | 	if err := rkey.WriteProxy(&bkey); err != nil {
229 | 		return nil, err
230 | 	}
231 | 	key := bkey.String()
232 | 
233 | 	if rr.replay != nil {
234 | 		if respWire, ok := rr.replay[key]; ok {
235 | 			resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(respWire)), req)
236 | 			if err != nil {
237 | 				return nil, fmt.Errorf("read %s: corrupt httprr trace: %v", rr.file, err)
238 | 			}
239 | 			return resp, nil
240 | 		}
241 | 		return nil, fmt.Errorf("cached HTTP response not found for:\n%s", key)
242 | 	}
243 | 
244 | 	rr.mu.Lock()
245 | 	err := rr.broken
246 | 	rr.mu.Unlock()
247 | 	if err != nil {
248 | 		return nil, err
249 | 	}
250 | 
251 | 	resp, err := rr.real.RoundTrip(req)
252 | 	if err != nil {
253 | 		return nil, err
254 | 	}
255 | 
256 | 	var respBuf strings.Builder
257 | 	if err := resp.Write(&respBuf); err != nil {
258 | 		return nil, err
259 | 	}
260 | 	respWire := respBuf.String()
261 | 
262 | 	resp, err = http.ReadResponse(bufio.NewReader(strings.NewReader(respWire)), req)
263 | 	if err != nil {
264 | 		// unreachable unless resp.Write does not round-trip with http.ReadResponse
265 | 		return nil, err
266 | 	}
267 | 
268 | 	rr.mu.Lock()
269 | 	defer rr.mu.Unlock()
270 | 	if rr.broken != nil {
271 | 		// unreachable unless concurrent I/O error; checked above
272 | 		return nil, rr.broken
273 | 	}
274 | 	_, err1 := fmt.Fprintf(rr.record, "%d %d\n", len(key), len(respWire))
275 | 	_, err2 := rr.record.WriteString(key)
276 | 	_, err3 := rr.record.WriteString(respWire)
277 | 	if err := cmp.Or(err1, err2, err3); err != nil {
278 | 		rr.broken = err
279 | 		rr.record.Close()
280 | 		os.Remove(rr.file)
281 | 		return nil, err
282 | 	}
283 | 	return resp, nil
284 | }
285 | 
286 | // Close closes the RecordReplay.
287 | // It is a no-op in replay mode.
288 | func (rr *RecordReplay) Close() error {
289 | 	if rr.broken != nil {
290 | 		return rr.broken
291 | 	}
292 | 	if rr.record != nil {
293 | 		return rr.record.Close()
294 | 	}
295 | 	return nil
296 | }
297 | 


--------------------------------------------------------------------------------
/internal/related/related.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | /*TODO
  6 | 
  7 | p.EnableProject("golang/go")
  8 | p.IgnoreBody("— [watchflakes](https://go.dev/wiki/Watchflakes)")
  9 | p.IgnoreTitlePrefix("x/tools/gopls: release version v")
 10 | p.IgnoreTitleSuffix(" backport]")
 11 | 
 12 | */
 13 | 
 14 | // Package related implements posting about related issues to GitHub.
 15 | package related
 16 | 
 17 | import (
 18 | 	"bytes"
 19 | 	"fmt"
 20 | 	"log/slog"
 21 | 	"strings"
 22 | 	"time"
 23 | 
 24 | 	"rsc.io/gaby/internal/docs"
 25 | 	"rsc.io/gaby/internal/github"
 26 | 	"rsc.io/gaby/internal/storage"
 27 | 	"rsc.io/gaby/internal/storage/timed"
 28 | 	"rsc.io/ordered"
 29 | )
 30 | 
 31 | // A Poster posts to GitHub about related issues (and eventually other documents).
 32 | type Poster struct {
 33 | 	slog        *slog.Logger
 34 | 	db          storage.DB
 35 | 	vdb         storage.VectorDB
 36 | 	github      *github.Client
 37 | 	docs        *docs.Corpus
 38 | 	projects    map[string]bool
 39 | 	watcher     *timed.Watcher[*github.Event]
 40 | 	name        string
 41 | 	timeLimit   time.Time
 42 | 	ignores     []func(*github.Issue) bool
 43 | 	maxResults  int
 44 | 	scoreCutoff float64
 45 | 	post        bool
 46 | }
 47 | 
 48 | // New creates and returns a new Poster. It logs to lg, stores state in db,
 49 | // watches for new GitHub issues using gh, looks up related documents in vdb,
 50 | // and reads the document content from docs.
 51 | // For the purposes of storing its own state, it uses the given name.
 52 | // Future calls to New with the same name will use the same state.
 53 | //
 54 | // Use the [Poster] methods to configure the posting parameters
 55 | // (especially [Poster.EnableProject] and [Poster.EnablePosts])
 56 | // before calling [Poster.Run].
 57 | func New(lg *slog.Logger, db storage.DB, gh *github.Client, vdb storage.VectorDB, docs *docs.Corpus, name string) *Poster {
 58 | 	return &Poster{
 59 | 		slog:        lg,
 60 | 		db:          db,
 61 | 		vdb:         vdb,
 62 | 		github:      gh,
 63 | 		docs:        docs,
 64 | 		projects:    make(map[string]bool),
 65 | 		watcher:     gh.EventWatcher("related.Poster:" + name),
 66 | 		name:        name,
 67 | 		timeLimit:   time.Now().Add(-defaultTooOld),
 68 | 		maxResults:  defaultMaxResults,
 69 | 		scoreCutoff: defaultScoreCutoff,
 70 | 	}
 71 | }
 72 | 
 73 | // SetTimeLimit controls how old an issue can be for the Poster to post to it.
 74 | // Issues created before time t will be skipped.
 75 | // The default is not to post to issues that are more than 48 hours old
 76 | // at the time of the call to [New].
 77 | func (p *Poster) SetTimeLimit(t time.Time) {
 78 | 	p.timeLimit = t
 79 | }
 80 | 
 81 | const defaultTooOld = 48 * time.Hour
 82 | 
 83 | // SetMaxResults sets the maximum number of related documents to
 84 | // post to the issue.
 85 | // The default is 10.
 86 | func (p *Poster) SetMaxResults(max int) {
 87 | 	p.maxResults = max
 88 | }
 89 | 
 90 | const defaultMaxResults = 10
 91 | 
 92 | // SetMinScore sets the minimum vector search score that a
 93 | // [storage.VectorResult] must have to be considered a related document
 94 | // The default is 0.82, which was determined empirically.
 95 | func (p *Poster) SetMinScore(min float64) {
 96 | 	p.scoreCutoff = min
 97 | }
 98 | 
 99 | const defaultScoreCutoff = 0.82
100 | 
101 | // SkipBodyContains configures the Poster to skip issues with a body containing
102 | // the given text.
103 | func (p *Poster) SkipBodyContains(text string) {
104 | 	p.ignores = append(p.ignores, func(issue *github.Issue) bool {
105 | 		return strings.Contains(issue.Body, text)
106 | 	})
107 | }
108 | 
109 | // SkipTitlePrefix configures the Poster to skip issues with a title starting
110 | // with the given prefix.
111 | func (p *Poster) SkipTitlePrefix(prefix string) {
112 | 	p.ignores = append(p.ignores, func(issue *github.Issue) bool {
113 | 		return strings.HasPrefix(issue.Title, prefix)
114 | 	})
115 | }
116 | 
117 | // SkipTitleSuffix configures the Poster to skip issues with a title starting
118 | // with the given suffix.
119 | func (p *Poster) SkipTitleSuffix(suffix string) {
120 | 	p.ignores = append(p.ignores, func(issue *github.Issue) bool {
121 | 		return strings.HasSuffix(issue.Title, suffix)
122 | 	})
123 | }
124 | 
125 | // EnableProject enables the Poster to post on issues in the given GitHub project (for example "golang/go").
126 | // See also [Poster.EnablePosts], which must also be called to post anything to GitHub.
127 | func (p *Poster) EnableProject(project string) {
128 | 	p.projects[project] = true
129 | }
130 | 
131 | // EnablePosts enables the Poster to post to GitHub.
132 | // If EnablePosts has not been called, [Poster.Run] logs what it would post but does not post the messages.
133 | // See also [Poster.EnableProject], which must also be called to set the projects being considered.
134 | func (p *Poster) EnablePosts() {
135 | 	p.post = true
136 | }
137 | 
138 | // deletePosted deletes all the “posted on this issue” notes.
139 | func (p *Poster) deletePosted() {
140 | 	p.db.DeleteRange(ordered.Encode("triage.Posted"), ordered.Encode("triage.Posted", ordered.Inf))
141 | }
142 | 
143 | // Run runs a single round of posting to GitHub.
144 | // It scans all open issues that have been created since the last call to [Poster.Run]
145 | // using a Poster with the same name (see [New]).
146 | // Run skips closed issues, and it also skips pull requests.
147 | //
148 | // For each issue that matches the configured posting constraints
149 | // (see [Poster.EnableProject], [Poster.SetTimeLimit], [Poster.IgnoreBodyContains], [Poster.IgnoreTitlePrefix], and [Poster.IgnoreTitleSuffix]),
150 | // Run computes an embedding of the issue body text (ignoring comments)
151 | // and looks in the vector database for other documents (currently only issues)
152 | // that are aligned closely enough with that body text
153 | // (see [Poster.SetMinScore]) and posts a limited number of matches
154 | // (see [Poster.SetMaxResults]).
155 | //
156 | // Run logs each post to the [slog.Logger] passed to [New].
157 | // If [Poster.EnablePosts] has been called, then [Run] also posts the comment to GitHub,
158 | // records in the database that it has posted to GitHub to make sure it never posts to that issue again,
159 | // and advances its GitHub issue watcher's incremental cursor to speed future calls to [Run].
160 | //
161 | // When [Poster.EnablePosts] has not been called, Run only logs the comments it would post.
162 | // Future calls to Run will reprocess the same issues and re-log the same comments.
163 | func (p *Poster) Run() {
164 | 	p.slog.Info("related.Poster start", "name", p.name)
165 | 	defer p.slog.Info("related.Poster end", "name", p.name)
166 | 
167 | 	defer p.watcher.Flush()
168 | 
169 | Watcher:
170 | 	for e := range p.watcher.Recent() {
171 | 		if !p.projects[e.Project] || e.API != "/issues" {
172 | 			continue
173 | 		}
174 | 		issue := e.Typed.(*github.Issue)
175 | 		if issue.State == "closed" || issue.PullRequest != nil {
176 | 			continue
177 | 		}
178 | 		tm, err := time.Parse(time.RFC3339, issue.CreatedAt)
179 | 		if err != nil {
180 | 			p.slog.Error("triage parse createdat", "CreatedAt", issue.CreatedAt, "err", err)
181 | 			continue
182 | 		}
183 | 		if tm.Before(p.timeLimit) {
184 | 			continue
185 | 		}
186 | 		for _, ig := range p.ignores {
187 | 			if ig(issue) {
188 | 				continue Watcher
189 | 			}
190 | 		}
191 | 
192 | 		// TODO: Perhaps this key should include p.name, but perhaps not.
193 | 		// This makes sure we only every post to each issue once.
194 | 		posted := ordered.Encode("triage.Posted", e.Project, e.Issue)
195 | 		if _, ok := p.db.Get(posted); ok {
196 | 			continue
197 | 		}
198 | 
199 | 		u := fmt.Sprintf("https://github.com/%s/issues/%d", e.Project, e.Issue)
200 | 		p.slog.Debug("triage client consider", "url", u)
201 | 		vec, ok := p.vdb.Get(u)
202 | 		if !ok {
203 | 			p.slog.Error("triage lookup failed", "url", u)
204 | 			continue
205 | 		}
206 | 		results := p.vdb.Search(vec, p.maxResults+5)
207 | 		if len(results) > 0 && results[0].ID == u {
208 | 			results = results[1:]
209 | 		}
210 | 		for i, r := range results {
211 | 			if r.Score < p.scoreCutoff {
212 | 				results = results[:i]
213 | 				break
214 | 			}
215 | 		}
216 | 		if len(results) > p.maxResults {
217 | 			results = results[:p.maxResults]
218 | 		}
219 | 		if len(results) == 0 {
220 | 			if p.post {
221 | 				p.watcher.MarkOld(e.DBTime)
222 | 			}
223 | 			continue
224 | 		}
225 | 		var buf bytes.Buffer
226 | 		fmt.Fprintf(&buf, "**Related Issues**\n\n")
227 | 		for _, r := range results {
228 | 			title := r.ID
229 | 			if d, ok := p.docs.Get(r.ID); ok {
230 | 				title = d.Title
231 | 			}
232 | 			info := ""
233 | 			if issue, err := p.github.LookupIssueURL(r.ID); err == nil {
234 | 				info = fmt.Sprint(" #", issue.Number)
235 | 				if issue.ClosedAt != "" {
236 | 					info += " (closed)"
237 | 				}
238 | 			}
239 | 			fmt.Fprintf(&buf, " - [%s%s](%s) <!-- score=%.5f -->\n", markdownEscape(title), info, r.ID, r.Score)
240 | 		}
241 | 		fmt.Fprintf(&buf, "\n<sub>(Emoji vote if this was helpful or unhelpful; more detailed feedback welcome in [this discussion](https://github.com/golang/go/discussions/67901).)</sub>\n")
242 | 
243 | 		p.slog.Info("related.Poster post", "name", p.name, "project", e.Project, "issue", e.Issue, "comment", buf.String())
244 | 
245 | 		if !p.post {
246 | 			continue
247 | 		}
248 | 
249 | 		if err := p.github.PostIssueComment(issue, &github.IssueCommentChanges{Body: buf.String()}); err != nil {
250 | 			p.slog.Error("PostIssueComment", "issue", e.Issue, "err", err)
251 | 			continue
252 | 		}
253 | 		p.db.Set(posted, nil)
254 | 		p.watcher.MarkOld(e.DBTime)
255 | 
256 | 		// Flush immediately to make sure we don't re-post if interrupted later in the loop.
257 | 		p.watcher.Flush()
258 | 		p.db.Flush()
259 | 	}
260 | }
261 | 
262 | var markdownEscaper = strings.NewReplacer(
263 | 	"_", `\_`,
264 | 	"*", `\*`,
265 | 	"`", "\\`",
266 | 	"[", `\[`,
267 | 	"]", `\]`,
268 | 	"<", `\<`,
269 | 	">", `\>`,
270 | 	"&", `\&`,
271 | )
272 | 
273 | func markdownEscape(s string) string {
274 | 	return markdownEscaper.Replace(s)
275 | }
276 | 


--------------------------------------------------------------------------------
/internal/storage/timed/timed_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package timed
  6 | 
  7 | import (
  8 | 	"slices"
  9 | 	"strings"
 10 | 	"testing"
 11 | 
 12 | 	"rsc.io/gaby/internal/storage"
 13 | )
 14 | 
 15 | func Test(t *testing.T) {
 16 | 	db := storage.MemDB()
 17 | 	b := db.Batch()
 18 | 
 19 | 	Set(db, b, "kind", []byte("key"), []byte("val"))
 20 | 	if e, ok := Get(db, "kind", []byte("key")); e != nil || ok != false {
 21 | 		t.Errorf("Set wrote to db instead of b: Get = %v, %v, want nil, false", e, ok)
 22 | 	}
 23 | 	b.Apply()
 24 | 	if e, ok := Get(db, "kind", []byte("key")); !ok || e == nil || e.Kind != "kind" || string(e.Key) != "key" || string(e.Val) != "val" || e.ModTime == 0 {
 25 | 		t.Errorf("Get after Set = %+v, %v, want {>0, kind, key, val}, true", e, ok)
 26 | 	}
 27 | 
 28 | 	Delete(db, b, "kind", []byte("missing"))
 29 | 	b.Apply()
 30 | 	if e, ok := Get(db, "kind", []byte("key")); !ok || e == nil || e.Kind != "kind" || string(e.Key) != "key" || string(e.Val) != "val" || e.ModTime == 0 {
 31 | 		t.Errorf("Get after Delete = %+v, %v, want {>0, kind, key, val}, true", e, ok)
 32 | 	}
 33 | 
 34 | 	Delete(db, b, "kind", []byte("key"))
 35 | 	b.Apply()
 36 | 	if e, ok := Get(db, "kind", []byte("key")); e != nil || ok != false {
 37 | 		t.Errorf("Delete didn't delete key: Get = %v, %v, want nil, false", e, ok)
 38 | 	}
 39 | 
 40 | 	var keys []string
 41 | 	var last DBTime
 42 | 	do := func(e *Entry) {
 43 | 		t.Helper()
 44 | 		if last != -1 {
 45 | 			if e.ModTime <= last {
 46 | 				t.Fatalf("%+v: ModTime %v <= last %v", e, e.ModTime, last)
 47 | 			}
 48 | 			last = e.ModTime
 49 | 		}
 50 | 		if string(e.Kind) != "kind" {
 51 | 			t.Fatalf("%+v: Kind=%q, want %q", e, e.Kind, "kind")
 52 | 		}
 53 | 		key := string(e.Key)
 54 | 		if !strings.HasPrefix(key, "k") {
 55 | 			t.Fatalf("%+v: Key=%q, want k prefix", e, e.Key)
 56 | 		}
 57 | 		if want := "v" + key[1:]; string(e.Val) != want {
 58 | 			t.Fatalf("%+v: Val=%q, want %q", e, e.Val, want)
 59 | 		}
 60 | 		keys = append(keys, key)
 61 | 	}
 62 | 
 63 | 	Set(db, b, "kind", []byte("k1"), []byte("v1"))
 64 | 	Set(db, b, "kind", []byte("k3"), []byte("v3"))
 65 | 	Set(db, b, "kind", []byte("k2"), []byte("v2"))
 66 | 	b.Apply()
 67 | 
 68 | 	// Basic iteration.
 69 | 	last = -1
 70 | 	keys = nil
 71 | 	for e := range Scan(db, "kind", nil, []byte("\xff")) {
 72 | 		do(e)
 73 | 	}
 74 | 	if want := []string{"k1", "k2", "k3"}; !slices.Equal(keys, want) {
 75 | 		t.Errorf("Scan() = %v, want %v", keys, want)
 76 | 	}
 77 | 
 78 | 	keys = nil
 79 | 	for e := range Scan(db, "kind", []byte("k1x"), []byte("k2z")) {
 80 | 		do(e)
 81 | 	}
 82 | 	if want := []string{"k2"}; !slices.Equal(keys, want) {
 83 | 		t.Errorf("Scan(k1x, k2z) = %v, want %v", keys, want)
 84 | 	}
 85 | 
 86 | 	keys = nil
 87 | 	for e := range Scan(db, "kind", []byte("k2"), []byte("\xff")) {
 88 | 		do(e)
 89 | 	}
 90 | 	if want := []string{"k2", "k3"}; !slices.Equal(keys, want) {
 91 | 		t.Errorf("Scan(k2) = %v, want %v", keys, want)
 92 | 	}
 93 | 
 94 | 	keys = nil
 95 | 	for e := range Scan(db, "kind", []byte("k2"), []byte("\xff")) {
 96 | 		do(e)
 97 | 		break
 98 | 	}
 99 | 	if want := []string{"k2"}; !slices.Equal(keys, want) {
100 | 		t.Errorf("Scan(k2) with break = %v, want %v", keys, want)
101 | 	}
102 | 
103 | 	// Timed iteration.
104 | 	last = 0
105 | 	keys = nil
106 | 	for e := range ScanAfter(db, "kind", 0, nil) {
107 | 		do(e)
108 | 	}
109 | 	if want := []string{"k1", "k3", "k2"}; !slices.Equal(keys, want) {
110 | 		t.Errorf("ScanAfter(0) = %v, want %v", keys, want)
111 | 	}
112 | 	t123 := last
113 | 
114 | 	// Watcher.
115 | 	last = 0
116 | 	keys = nil
117 | 	w := NewWatcher(db, "name", "kind", func(e *Entry) *Entry { return e })
118 | 	for e := range w.Recent() {
119 | 		do(e)
120 | 		w.MarkOld(e.ModTime)
121 | 		w.MarkOld(e.ModTime - 1) // no-op
122 | 	}
123 | 	if want := []string{"k1", "k3", "k2"}; !slices.Equal(keys, want) {
124 | 		t.Errorf("Watcher.Recent() = %v, want %v", keys, want)
125 | 	}
126 | 
127 | 	// Timed iteration with break.
128 | 	last = 0
129 | 	keys = nil
130 | 	for e := range ScanAfter(db, "kind", 0, nil) {
131 | 		do(e)
132 | 		break
133 | 	}
134 | 	if want := []string{"k1"}; !slices.Equal(keys, want) {
135 | 		t.Errorf("ScanAfter(0) with break = %v, want %v", keys, want)
136 | 	}
137 | 
138 | 	// Incremental iteration
139 | 	Set(db, b, "kind", []byte("k5"), []byte("v5"))
140 | 	Set(db, b, "kind", []byte("k4"), []byte("v4"))
141 | 	Set(db, b, "kind", []byte("k2"), []byte("v2"))
142 | 	b.Apply()
143 | 
144 | 	// Check full scan.
145 | 	last = 0
146 | 	keys = nil
147 | 	for e := range ScanAfter(db, "kind", 0, nil) {
148 | 		do(e)
149 | 	}
150 | 	if want := []string{"k1", "k3", "k5", "k4", "k2"}; !slices.Equal(keys, want) {
151 | 		t.Errorf("ScanAfter(0) = %v, want %v", keys, want)
152 | 	}
153 | 
154 | 	// Check incremental scan.
155 | 	last = 0
156 | 	keys = nil
157 | 	for e := range ScanAfter(db, "kind", t123, nil) {
158 | 		do(e)
159 | 	}
160 | 	if want := []string{"k5", "k4", "k2"}; !slices.Equal(keys, want) {
161 | 		t.Errorf("ScanAfter(t123) = %v, want %v", keys, want)
162 | 	}
163 | 
164 | 	// Full (new) watcher.
165 | 	last = 0
166 | 	keys = nil
167 | 	w = NewWatcher(db, "name2", "kind", func(e *Entry) *Entry { return e })
168 | 	for e := range w.Recent() {
169 | 		do(e)
170 | 	}
171 | 	if want := []string{"k1", "k3", "k5", "k4", "k2"}; !slices.Equal(keys, want) {
172 | 		t.Errorf("Watcher.Recent() full = %v, want %v", keys, want)
173 | 	}
174 | 
175 | 	// Watcher with break
176 | 	last = 0
177 | 	keys = nil
178 | 	w = NewWatcher(db, "name2", "kind", func(e *Entry) *Entry { return e })
179 | 	for e := range w.Recent() {
180 | 		do(e)
181 | 		break
182 | 	}
183 | 	if want := []string{"k1"}; !slices.Equal(keys, want) {
184 | 		t.Errorf("Watcher.Recent() full = %v, want %v", keys, want)
185 | 	}
186 | 
187 | 	// Incremental (old) watcher.
188 | 	last = 0
189 | 	keys = nil
190 | 	w = NewWatcher(db, "name", "kind", func(e *Entry) *Entry { return e })
191 | 	for e := range w.Recent() {
192 | 		do(e)
193 | 	}
194 | 	if want := []string{"k5", "k4", "k2"}; !slices.Equal(keys, want) {
195 | 		t.Errorf("Watcher.Recent() incremental = %v, want %v", keys, want)
196 | 	}
197 | 
198 | 	// Restart incremental watcher.
199 | 	last = 0
200 | 	keys = nil
201 | 	w.Restart()
202 | 	for e := range w.Recent() {
203 | 		do(e)
204 | 	}
205 | 	if want := []string{"k1", "k3", "k5", "k4", "k2"}; !slices.Equal(keys, want) {
206 | 		t.Errorf("Watcher.Recent() after Reset = %v, want %v", keys, want)
207 | 	}
208 | 
209 | 	// Filtered scan.
210 | 	last = 0
211 | 	keys = nil
212 | 	filter := func(key []byte) bool { return strings.HasSuffix(string(key), "3") }
213 | 	for e := range ScanAfter(db, "kind", 0, filter) {
214 | 		do(e)
215 | 	}
216 | 	if want := []string{"k3"}; !slices.Equal(keys, want) {
217 | 		t.Errorf("ScanAfter(0, suffix3) = %v, want %v", keys, want)
218 | 	}
219 | 
220 | 	// Accidentally doing multiple Sets of a single key
221 | 	// will leave behind a stale timestamp record.
222 | 	Set(db, b, "kind", []byte("k3"), []byte("v3"))
223 | 	Set(db, b, "kind", []byte("k3"), []byte("v3"))
224 | 	b.Apply()
225 | 
226 | 	// Stale timestamp should not result in multiple k3 visits.
227 | 	last = 0
228 | 	keys = nil
229 | 	for e := range ScanAfter(db, "kind", 0, nil) {
230 | 		do(e)
231 | 	}
232 | 	if want := []string{"k1", "k5", "k4", "k2", "k3"}; !slices.Equal(keys, want) {
233 | 		t.Errorf("ScanAfter(0) = %v, want %v", keys, want)
234 | 	}
235 | 
236 | 	// Deleting k3 now will still leave the stale timestamp record.
237 | 	// Make sure it is ignored and doesn't cause a lookup crash.
238 | 	Delete(db, b, "kind", []byte("k3"))
239 | 	b.Apply()
240 | 
241 | 	// Stale timestamp should not crash on k3.
242 | 	last = 0
243 | 	keys = nil
244 | 	for e := range ScanAfter(db, "kind", 0, nil) {
245 | 		do(e)
246 | 	}
247 | 	if want := []string{"k1", "k5", "k4", "k2"}; !slices.Equal(keys, want) {
248 | 		t.Errorf("ScanAfter(0) = %v, want %v", keys, want)
249 | 	}
250 | 
251 | 	// Range deletion.
252 | 	DeleteRange(db, b, "kind", []byte("k1z"), []byte("k33"))
253 | 	b.Apply()
254 | 
255 | 	last = -1
256 | 	keys = nil
257 | 	for e := range Scan(db, "kind", nil, []byte("\xff")) {
258 | 		do(e)
259 | 	}
260 | 	if want := []string{"k1", "k4", "k5"}; !slices.Equal(keys, want) {
261 | 		t.Errorf("Scan() after DeleteRange = %v, want %v", keys, want)
262 | 	}
263 | 
264 | 	last = 0
265 | 	keys = nil
266 | 	for e := range ScanAfter(db, "kind", 0, nil) {
267 | 		do(e)
268 | 	}
269 | 	if want := []string{"k1", "k5", "k4"}; !slices.Equal(keys, want) {
270 | 		t.Errorf("ScanAfter(0) after DeleteRange = %v, want %v", keys, want)
271 | 	}
272 | 
273 | 	Set(db, b, "kind", []byte("k2"), []byte("v2"))
274 | 	b.Apply()
275 | }
276 | 
277 | func TestLocking(t *testing.T) {
278 | 	db := storage.MemDB()
279 | 	b := db.Batch()
280 | 	Set(db, b, "kind", []byte("key"), []byte("val"))
281 | 	b.Apply()
282 | 
283 | 	w := NewWatcher(db, "name", "kind", func(e *Entry) *Entry { return e })
284 | 	callRecover := func() { recover() }
285 | 
286 | 	w.lock()
287 | 	func() {
288 | 		defer callRecover()
289 | 		w.lock()
290 | 		t.Fatalf("second w.lock did not panic")
291 | 	}()
292 | 
293 | 	w.unlock()
294 | 	func() {
295 | 		defer callRecover()
296 | 		w.unlock()
297 | 		t.Fatalf("second w.unlock did not panic")
298 | 	}()
299 | 
300 | 	func() {
301 | 		defer callRecover()
302 | 		w.MarkOld(0)
303 | 		t.Fatalf("MarkOld outside iteration did not panic")
304 | 	}()
305 | 
306 | 	did := false
307 | 	for _ = range w.Recent() {
308 | 		did = true
309 | 		func() {
310 | 			defer callRecover()
311 | 			w.Restart()
312 | 			t.Fatalf("Restart inside iteration did not panic")
313 | 		}()
314 | 
315 | 		func() {
316 | 			defer callRecover()
317 | 			for _ = range w.Recent() {
318 | 			}
319 | 			t.Fatalf("iteration inside iteration did not panic")
320 | 		}()
321 | 	}
322 | 	if !did {
323 | 		t.Fatalf("range over Recent did not find any entries")
324 | 	}
325 | }
326 | 
327 | func TestNow(t *testing.T) {
328 | 	t1 := now()
329 | 	for range 1000 {
330 | 		t2 := now()
331 | 		if t2 <= t1 {
332 | 			t.Errorf("now(), now() = %d, %d (out of order)", t1, t2)
333 | 		}
334 | 		t1 = t2
335 | 	}
336 | }
337 | 


--------------------------------------------------------------------------------
/internal/testdata/markdown3.httprr:
--------------------------------------------------------------------------------
 1 | httprr trace v1
 2 | 207 6198
 3 | GET https://api.github.com/repos/rsc/markdown/issues?direction=asc&page=1&per_page=100&since=2024-06-04T02%3A57%3A22Z&sort=updated&state=all HTTP/1.1
 4 | Host: api.github.com
 5 | User-Agent: Go-http-client/1.1
 6 | 
 7 | HTTP/2.0 200 OK
 8 | Accept-Ranges: bytes
 9 | Access-Control-Allow-Origin: *
10 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
11 | Cache-Control: public, max-age=60, s-maxage=60
12 | Content-Security-Policy: default-src 'none'
13 | Content-Type: application/json; charset=utf-8
14 | Date: Tue, 04 Jun 2024 12:28:22 GMT
15 | Etag: W/"97372b4a1a5b329a038fade04cd5934c5acca1f74d0515121101e2ce66b1ba4e"
16 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
17 | Server: GitHub.com
18 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
19 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
20 | X-Content-Type-Options: nosniff
21 | X-Frame-Options: deny
22 | X-Github-Api-Version-Selected: 2022-11-28
23 | X-Github-Media-Type: github.v3; format=json
24 | X-Github-Request-Id: DD96:493B9:15322F77:248A5191:665F0866
25 | X-Ratelimit-Limit: 60
26 | X-Ratelimit-Remaining: 59
27 | X-Ratelimit-Reset: 1717507702
28 | X-Ratelimit-Resource: core
29 | X-Ratelimit-Used: 1
30 | X-Xss-Protection: 0
31 | 
32 | [{"url":"https://api.github.com/repos/rsc/markdown/issues/18","repository_url":"https://api.github.com/repos/rsc/markdown","labels_url":"https://api.github.com/repos/rsc/markdown/issues/18/labels{/name}","comments_url":"https://api.github.com/repos/rsc/markdown/issues/18/comments","events_url":"https://api.github.com/repos/rsc/markdown/issues/18/events","html_url":"https://github.com/rsc/markdown/pull/18","id":2276848742,"node_id":"PR_kwDOKnFwjc5ubgV0","number":18,"title":"markdown: emit Info in CodeBlock markdown","user":{"login":"juliaogris","id":1596871,"node_id":"MDQ6VXNlcjE1OTY4NzE=","avatar_url":"https://avatars.githubusercontent.com/u/1596871?v=4","gravatar_id":"","url":"https://api.github.com/users/juliaogris","html_url":"https://github.com/juliaogris","followers_url":"https://api.github.com/users/juliaogris/followers","following_url":"https://api.github.com/users/juliaogris/following{/other_user}","gists_url":"https://api.github.com/users/juliaogris/gists{/gist_id}","starred_url":"https://api.github.com/users/juliaogris/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/juliaogris/subscriptions","organizations_url":"https://api.github.com/users/juliaogris/orgs","repos_url":"https://api.github.com/users/juliaogris/repos","events_url":"https://api.github.com/users/juliaogris/events{/privacy}","received_events_url":"https://api.github.com/users/juliaogris/received_events","type":"User","site_admin":false},"labels":[],"state":"closed","locked":false,"assignee":null,"assignees":[],"milestone":null,"comments":2,"created_at":"2024-05-03T03:59:00Z","updated_at":"2024-06-04T02:57:22Z","closed_at":"2024-06-03T21:40:04Z","author_association":"CONTRIBUTOR","active_lock_reason":null,"draft":false,"pull_request":{"url":"https://api.github.com/repos/rsc/markdown/pulls/18","html_url":"https://github.com/rsc/markdown/pull/18","diff_url":"https://github.com/rsc/markdown/pull/18.diff","patch_url":"https://github.com/rsc/markdown/pull/18.patch","merged_at":"2024-06-03T21:40:04Z"},"body":"Emit the Info field of CodeBlock in the CodeBlock.printMardown function so \r\nthat a round trip from markdown to markdown will preserve the language Info.","reactions":{"url":"https://api.github.com/repos/rsc/markdown/issues/18/reactions","total_count":1,"+1":1,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":0,"rocket":0,"eyes":0},"timeline_url":"https://api.github.com/repos/rsc/markdown/issues/18/timeline","performed_via_github_app":null,"state_reason":null},{"url":"https://api.github.com/repos/rsc/markdown/issues/19","repository_url":"https://api.github.com/repos/rsc/markdown","labels_url":"https://api.github.com/repos/rsc/markdown/issues/19/labels{/name}","comments_url":"https://api.github.com/repos/rsc/markdown/issues/19/comments","events_url":"https://api.github.com/repos/rsc/markdown/issues/19/events","html_url":"https://github.com/rsc/markdown/issues/19","id":2308816936,"node_id":"I_kwDOKnFwjc6JncAo","number":19,"title":"feature: synthesize lowercase anchors for heading","user":{"login":"adonovan","id":5658175,"node_id":"MDQ6VXNlcjU2NTgxNzU=","avatar_url":"https://avatars.githubusercontent.com/u/5658175?v=4","gravatar_id":"","url":"https://api.github.com/users/adonovan","html_url":"https://github.com/adonovan","followers_url":"https://api.github.com/users/adonovan/followers","following_url":"https://api.github.com/users/adonovan/following{/other_user}","gists_url":"https://api.github.com/users/adonovan/gists{/gist_id}","starred_url":"https://api.github.com/users/adonovan/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/adonovan/subscriptions","organizations_url":"https://api.github.com/users/adonovan/orgs","repos_url":"https://api.github.com/users/adonovan/repos","events_url":"https://api.github.com/users/adonovan/events{/privacy}","received_events_url":"https://api.github.com/users/adonovan/received_events","type":"User","site_admin":false},"labels":[],"state":"open","locked":false,"assignee":null,"assignees":[],"milestone":null,"comments":1,"created_at":"2024-05-21T17:56:12Z","updated_at":"2024-06-04T12:27:49Z","closed_at":null,"author_association":"NONE","active_lock_reason":null,"body":"GitHub's markdown renderer creates lowercase anchors for headings. For example, this heading, `## Diagnostic`, can be found using either of these two URLs, which differ in the case of their fragment ID:\r\n\r\nhttps://github.com/golang/tools/blob/master/gopls/doc/settings.md#diagnostic\r\nhttps://github.com/golang/tools/blob/master/gopls/doc/settings.md#Diagnostic\r\n\r\nPerhaps your markdown renderer (which has been really useful--thanks!) could do the same.\r\n","reactions":{"url":"https://api.github.com/repos/rsc/markdown/issues/19/reactions","total_count":0,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":0,"rocket":0,"eyes":0},"timeline_url":"https://api.github.com/repos/rsc/markdown/issues/19/timeline","performed_via_github_app":null,"state_reason":null}]193 2704
33 | GET https://api.github.com/repos/rsc/markdown/issues/comments?direction=asc&page=1&since=2024-06-04T02%3A57%3A21Z&sort=updated HTTP/1.1
34 | Host: api.github.com
35 | User-Agent: Go-http-client/1.1
36 | 
37 | HTTP/2.0 200 OK
38 | Accept-Ranges: bytes
39 | Access-Control-Allow-Origin: *
40 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
41 | Cache-Control: public, max-age=60, s-maxage=60
42 | Content-Security-Policy: default-src 'none'
43 | Content-Type: application/json; charset=utf-8
44 | Date: Tue, 04 Jun 2024 12:28:22 GMT
45 | Etag: W/"1075157bc09a784524c573b03b8c28dabbce2697b7dc47f380a9ad6d44a8badf"
46 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
47 | Server: GitHub.com
48 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
49 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
50 | X-Content-Type-Options: nosniff
51 | X-Frame-Options: deny
52 | X-Github-Api-Version-Selected: 2022-11-28
53 | X-Github-Media-Type: github.v3; format=json
54 | X-Github-Request-Id: DD96:493B9:15323000:248A5264:665F0866
55 | X-Ratelimit-Limit: 60
56 | X-Ratelimit-Remaining: 58
57 | X-Ratelimit-Reset: 1717507702
58 | X-Ratelimit-Resource: core
59 | X-Ratelimit-Used: 2
60 | X-Xss-Protection: 0
61 | 
62 | [{"url":"https://api.github.com/repos/rsc/markdown/issues/comments/2146475274","html_url":"https://github.com/rsc/markdown/pull/18#issuecomment-2146475274","issue_url":"https://api.github.com/repos/rsc/markdown/issues/18","id":2146475274,"node_id":"IC_kwDOKnFwjc5_8J0K","user":{"login":"rsc","id":104030,"node_id":"MDQ6VXNlcjEwNDAzMA==","avatar_url":"https://avatars.githubusercontent.com/u/104030?v=4","gravatar_id":"","url":"https://api.github.com/users/rsc","html_url":"https://github.com/rsc","followers_url":"https://api.github.com/users/rsc/followers","following_url":"https://api.github.com/users/rsc/following{/other_user}","gists_url":"https://api.github.com/users/rsc/gists{/gist_id}","starred_url":"https://api.github.com/users/rsc/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/rsc/subscriptions","organizations_url":"https://api.github.com/users/rsc/orgs","repos_url":"https://api.github.com/users/rsc/repos","events_url":"https://api.github.com/users/rsc/events{/privacy}","received_events_url":"https://api.github.com/users/rsc/received_events","type":"User","site_admin":false},"created_at":"2024-06-04T02:57:21Z","updated_at":"2024-06-04T02:57:21Z","author_association":"OWNER","body":"Thanks very much!\r\n","reactions":{"url":"https://api.github.com/repos/rsc/markdown/issues/comments/2146475274/reactions","total_count":0,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":0,"rocket":0,"eyes":0},"performed_via_github_app":null}]231 1235
63 | GET https://api.github.com/repos/rsc/markdown/issues/events?page=1&per_page=100 HTTP/1.1
64 | Host: api.github.com
65 | User-Agent: Go-http-client/1.1
66 | If-None-Match: W/"5f8cdae3e0a577c993191ba0140691c76a0df6b824580833fc3662906ef5aaf3"
67 | 
68 | HTTP/2.0 304 Not Modified
69 | Access-Control-Allow-Origin: *
70 | Access-Control-Expose-Headers: ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset
71 | Cache-Control: public, max-age=60, s-maxage=60
72 | Content-Encoding: gzip
73 | Content-Security-Policy: default-src 'none'
74 | Content-Type: application/json; charset=utf-8
75 | Date: Tue, 04 Jun 2024 12:28:23 GMT
76 | Etag: W/"5f8cdae3e0a577c993191ba0140691c76a0df6b824580833fc3662906ef5aaf3"
77 | Referrer-Policy: origin-when-cross-origin, strict-origin-when-cross-origin
78 | Server: GitHub.com
79 | Strict-Transport-Security: max-age=31536000; includeSubdomains; preload
80 | Vary: Accept, Accept-Encoding, Accept, X-Requested-With
81 | X-Content-Type-Options: nosniff
82 | X-Frame-Options: deny
83 | X-Github-Api-Version-Selected: 2022-11-28
84 | X-Github-Media-Type: github.v3; format=json
85 | X-Github-Request-Id: DD96:493B9:1532304C:248A52EE:665F0866
86 | X-Ratelimit-Limit: 60
87 | X-Ratelimit-Remaining: 57
88 | X-Ratelimit-Reset: 1717507702
89 | X-Ratelimit-Resource: core
90 | X-Ratelimit-Used: 3
91 | X-Xss-Protection: 0
92 | 
93 | 


--------------------------------------------------------------------------------
/internal/github/sync_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package github
  6 | 
  7 | import (
  8 | 	"bytes"
  9 | 	"errors"
 10 | 	"iter"
 11 | 	"net/http"
 12 | 	"os"
 13 | 	"path/filepath"
 14 | 	"slices"
 15 | 	"strings"
 16 | 	"testing"
 17 | 
 18 | 	"rsc.io/gaby/internal/httprr"
 19 | 	"rsc.io/gaby/internal/secret"
 20 | 	"rsc.io/gaby/internal/storage"
 21 | 	"rsc.io/gaby/internal/storage/timed"
 22 | 	"rsc.io/gaby/internal/testutil"
 23 | )
 24 | 
 25 | func githubAuth() (string, string) {
 26 | 	data, err := os.ReadFile(filepath.Join(os.Getenv("HOME"), ".netrc"))
 27 | 	if err != nil {
 28 | 		return "", ""
 29 | 	}
 30 | 	for _, line := range strings.Split(string(data), "\n") {
 31 | 		f := strings.Fields(line)
 32 | 		if len(f) == 6 && f[0] == "machine" && f[1] == "api.github.com" && f[2] == "login" && f[4] == "password" {
 33 | 			return f[3], f[5]
 34 | 		}
 35 | 	}
 36 | 	return "", ""
 37 | }
 38 | 
 39 | func TestMarkdown(t *testing.T) {
 40 | 	check := testutil.Checker(t)
 41 | 	lg := testutil.Slogger(t)
 42 | 	db := storage.MemDB()
 43 | 
 44 | 	// Initial load.
 45 | 	rr, err := httprr.Open("../testdata/markdown.httprr", http.DefaultTransport)
 46 | 	check(err)
 47 | 	rr.Scrub(Scrub)
 48 | 	sdb := secret.Empty()
 49 | 	if rr.Recording() {
 50 | 		sdb = secret.Netrc()
 51 | 	}
 52 | 	c := New(lg, db, sdb, rr.Client())
 53 | 	check(c.Add("rsc/markdown"))
 54 | 	check(c.Sync())
 55 | 
 56 | 	w := c.EventWatcher("test1")
 57 | 	for e := range w.Recent() {
 58 | 		w.MarkOld(e.DBTime)
 59 | 	}
 60 | 
 61 | 	// Incremental update.
 62 | 	rr, err = httprr.Open("../testdata/markdown2.httprr", http.DefaultTransport)
 63 | 	check(err)
 64 | 	rr.Scrub(Scrub)
 65 | 	sdb = secret.Empty()
 66 | 	if rr.Recording() {
 67 | 		sdb = secret.Netrc()
 68 | 	}
 69 | 	c = New(lg, db, sdb, rr.Client())
 70 | 	check(c.Sync())
 71 | 
 72 | 	// Test that EventWatcher sees the updates.
 73 | 	diffEvents(t,
 74 | 		collectEventsAfter(t, 0, c.EventWatcher("test1").Recent()),
 75 | 		markdownNewEvents)
 76 | 
 77 | 	// Test that without MarkOld, Recent leaves the cursor where it was.
 78 | 	diffEvents(t,
 79 | 		collectEventsAfter(t, 0, c.EventWatcher("test1").Recent()),
 80 | 		markdownNewEvents)
 81 | 
 82 | 	// Incremental update.
 83 | 	rr, err = httprr.Open("../testdata/markdown3.httprr", http.DefaultTransport)
 84 | 	check(err)
 85 | 	rr.Scrub(Scrub)
 86 | 	sdb = secret.Empty()
 87 | 	if rr.Recording() {
 88 | 		sdb = secret.Netrc()
 89 | 	}
 90 | 	c = New(lg, db, sdb, rr.Client())
 91 | 	check(c.Sync())
 92 | 
 93 | 	testMarkdownEvents(t, c)
 94 | }
 95 | 
 96 | func TestMarkdownIncrementalSync(t *testing.T) {
 97 | 	check := testutil.Checker(t)
 98 | 	lg := testutil.Slogger(t)
 99 | 	db := storage.MemDB()
100 | 
101 | 	// Initial load.
102 | 	rr, err := httprr.Open("../testdata/markdowninc.httprr", http.DefaultTransport)
103 | 	check(err)
104 | 	rr.Scrub(Scrub)
105 | 	sdb := secret.Empty()
106 | 	if rr.Recording() {
107 | 		sdb = secret.Netrc()
108 | 	}
109 | 	c := New(lg, db, sdb, rr.Client())
110 | 	check(c.Add("rsc/markdown"))
111 | 
112 | 	testFullSyncStop = errors.New("stop for testing")
113 | 	defer func() {
114 | 		testFullSyncStop = nil
115 | 	}()
116 | 	for {
117 | 		err := c.Sync()
118 | 		if err == nil {
119 | 			break
120 | 		}
121 | 		if !errors.Is(err, testFullSyncStop) {
122 | 			t.Fatal(err)
123 | 		}
124 | 	}
125 | 
126 | 	testMarkdownEvents(t, c)
127 | }
128 | 
129 | func testMarkdownEvents(t *testing.T, c *Client) {
130 | 	// All the events should be present in order.
131 | 	have := collectEvents(c.Events("rsc/markdown", -1, -1))
132 | 	diffEvents(t, have, markdownEvents)
133 | 
134 | 	// Again with an early break.
135 | 	have = have[:0]
136 | 	for e := range c.Events("rsc/markdown", -1, 100) {
137 | 		have = append(have, o(e.Project, e.Issue, e.API, e.ID))
138 | 		if len(have) == len(markdownEvents)/2 {
139 | 			break
140 | 		}
141 | 	}
142 | 	diffEvents(t, have, markdownEvents[:len(markdownEvents)/2])
143 | 
144 | 	// Again with a different project.
145 | 	for _ = range c.Events("fauxlang/faux", -1, 100) {
146 | 		t.Errorf("EventsAfter: project filter failed")
147 | 	}
148 | 
149 | 	// The EventsByTime list should not have any duplicates, even though
150 | 	// the incremental sync revisited some issues.
151 | 	have = collectEventsAfter(t, 0, c.EventsAfter(0, ""))
152 | 	diffEvents(t, have, markdownEvents)
153 | 
154 | 	// Again with an early break.
155 | 	have = have[:0]
156 | 	for e := range c.EventsAfter(0, "") {
157 | 		have = append(have, o(e.Project, e.Issue, e.API, e.ID))
158 | 		if len(have) == len(markdownEarlyEvents) {
159 | 			break
160 | 		}
161 | 	}
162 | 	diffEvents(t, have, markdownEarlyEvents)
163 | 
164 | 	// Again with a different project.
165 | 	for _ = range c.EventsAfter(0, "fauxlang/faux") {
166 | 		t.Errorf("EventsAfter: project filter failed")
167 | 	}
168 | }
169 | 
170 | func diffEvents(t *testing.T, have, want [][]byte) {
171 | 	t.Helper()
172 | 	for _, key := range have {
173 | 		for len(want) > 0 && bytes.Compare(want[0], key) < 0 {
174 | 			t.Errorf("Events: missing %s", storage.Fmt(want[0]))
175 | 			want = want[1:]
176 | 		}
177 | 		if len(want) > 0 && bytes.Equal(key, want[0]) {
178 | 			want = want[1:]
179 | 			continue
180 | 		}
181 | 		t.Errorf("Events: unexpected %s", storage.Fmt(key))
182 | 	}
183 | 	for len(want) > 0 {
184 | 		t.Errorf("Events: missing %s", storage.Fmt(want[0]))
185 | 		want = want[1:]
186 | 	}
187 | }
188 | 
189 | func collectEvents(seq iter.Seq[*Event]) [][]byte {
190 | 	var keys [][]byte
191 | 	for e := range seq {
192 | 		keys = append(keys, o(e.Project, e.Issue, e.API, e.ID))
193 | 	}
194 | 	return keys
195 | }
196 | 
197 | func collectEventsAfter(t *testing.T, dbtime timed.DBTime, seq iter.Seq[*Event]) [][]byte {
198 | 	var keys [][]byte
199 | 	for e := range seq {
200 | 		if e.DBTime <= dbtime {
201 | 			// TODO(rsc): t.Helper probably doesn't apply here but should.
202 | 			t.Errorf("EventsSince: DBTime inversion: e.DBTime %d <= last %d", e.DBTime, dbtime)
203 | 		}
204 | 		dbtime = e.DBTime
205 | 		keys = append(keys, o(e.Project, e.Issue, e.API, e.ID))
206 | 	}
207 | 	slices.SortFunc(keys, bytes.Compare)
208 | 	return keys
209 | }
210 | 
211 | func TestIvy(t *testing.T) {
212 | 	check := testutil.Checker(t)
213 | 	lg := testutil.Slogger(t)
214 | 	db := storage.MemDB()
215 | 	rr, err := httprr.Open("../testdata/ivy.httprr", http.DefaultTransport)
216 | 	check(err)
217 | 	rr.Scrub(Scrub)
218 | 	sdb := secret.Empty()
219 | 	if rr.Recording() {
220 | 		sdb = secret.Netrc()
221 | 	}
222 | 	c := New(lg, db, sdb, rr.Client())
223 | 	check(c.Add("robpike/ivy"))
224 | 	check(c.Sync())
225 | }
226 | 
227 | func TestOmap(t *testing.T) {
228 | 	check := testutil.Checker(t)
229 | 	lg := testutil.Slogger(t)
230 | 	db := storage.MemDB()
231 | 	rr, err := httprr.Open("../testdata/omap.httprr", http.DefaultTransport)
232 | 	check(err)
233 | 	rr.Scrub(Scrub)
234 | 	sdb := secret.Empty()
235 | 	if rr.Recording() {
236 | 		sdb = secret.Netrc()
237 | 	}
238 | 	c := New(lg, db, sdb, rr.Client())
239 | 	check(c.Add("rsc/omap"))
240 | 	check(c.Sync())
241 | }
242 | 
243 | var markdownEarlyEvents = [][]byte{
244 | 	o("rsc/markdown", 3, "/issues", 2038510799),
245 | 	o("rsc/markdown", 2, "/issues", 2038502414),
246 | 	o("rsc/markdown", 4, "/issues", 2038521730),
247 | 	o("rsc/markdown", 1, "/issues", 2038380363),
248 | 	o("rsc/markdown", 6, "/issues", 2038573328),
249 | }
250 | 
251 | var markdownNewEvents = [][]byte{
252 | 	o("rsc/markdown", 16, "/issues", 2189605425),
253 | 	o("rsc/markdown", 16, "/issues/comments", 2146194902),
254 | 	o("rsc/markdown", 16, "/issues/events", 13027435265),
255 | 	o("rsc/markdown", 17, "/issues", 2189605911),
256 | 	o("rsc/markdown", 17, "/issues/comments", 2146194573),
257 | 	o("rsc/markdown", 17, "/issues/comments", 2146421109),
258 | 	o("rsc/markdown", 17, "/issues/events", 13027432818),
259 | 	o("rsc/markdown", 17, "/issues/events", 13028910699),
260 | 	o("rsc/markdown", 17, "/issues/events", 13028910702),
261 | 	o("rsc/markdown", 18, "/issues", 2276848742),
262 | 	o("rsc/markdown", 18, "/issues/comments", 2097019306),
263 | 	o("rsc/markdown", 18, "/issues/comments", 2146475274),
264 | 	o("rsc/markdown", 18, "/issues/events", 13027289256),
265 | 	o("rsc/markdown", 18, "/issues/events", 13027289270),
266 | 	o("rsc/markdown", 18, "/issues/events", 13027289466),
267 | 	o("rsc/markdown", 19, "/issues", 2308816936),
268 | 	o("rsc/markdown", 19, "/issues/comments", 2146197528),
269 | }
270 | 
271 | var markdownEvents = [][]byte{
272 | 	o("rsc/markdown", 1, "/issues", 2038380363),
273 | 	o("rsc/markdown", 1, "/issues/events", 11230676272),
274 | 	o("rsc/markdown", 2, "/issues", 2038502414),
275 | 	o("rsc/markdown", 2, "/issues/events", 11230676151),
276 | 	o("rsc/markdown", 3, "/issues", 2038510799),
277 | 	o("rsc/markdown", 3, "/issues/comments", 1852808662),
278 | 	o("rsc/markdown", 3, "/issues/events", 11228615168),
279 | 	o("rsc/markdown", 3, "/issues/events", 11228628324),
280 | 	o("rsc/markdown", 3, "/issues/events", 11230676181),
281 | 	o("rsc/markdown", 4, "/issues", 2038521730),
282 | 	o("rsc/markdown", 4, "/issues/events", 11230676170),
283 | 	o("rsc/markdown", 5, "/issues", 2038530418),
284 | 	o("rsc/markdown", 5, "/issues/comments", 1852919031),
285 | 	o("rsc/markdown", 5, "/issues/comments", 1854409176),
286 | 	o("rsc/markdown", 5, "/issues/events", 11230676200),
287 | 	o("rsc/markdown", 5, "/issues/events", 11239005964),
288 | 	o("rsc/markdown", 6, "/issues", 2038573328),
289 | 	o("rsc/markdown", 6, "/issues/events", 11230676238),
290 | 	o("rsc/markdown", 7, "/issues", 2040197050),
291 | 	o("rsc/markdown", 7, "/issues/events", 11241620840),
292 | 	o("rsc/markdown", 8, "/issues", 2040277497),
293 | 	o("rsc/markdown", 8, "/issues/comments", 1854835554),
294 | 	o("rsc/markdown", 8, "/issues/comments", 1854837832),
295 | 	o("rsc/markdown", 8, "/issues/comments", 1856133592),
296 | 	o("rsc/markdown", 8, "/issues/comments", 1856151124),
297 | 	o("rsc/markdown", 8, "/issues/events", 11250194227),
298 | 	o("rsc/markdown", 9, "/issues", 2040303458),
299 | 	o("rsc/markdown", 9, "/issues/events", 11241620809),
300 | 	o("rsc/markdown", 10, "/issues", 2076625629),
301 | 	o("rsc/markdown", 10, "/issues/comments", 1894927765),
302 | 	o("rsc/markdown", 10, "/issues/events", 11456466988),
303 | 	o("rsc/markdown", 10, "/issues/events", 11506360992),
304 | 	o("rsc/markdown", 11, "/issues", 2076798270),
305 | 	o("rsc/markdown", 11, "/issues/comments", 1894929190),
306 | 	o("rsc/markdown", 11, "/issues/events", 11506369300),
307 | 	o("rsc/markdown", 12, "/issues", 2137605063),
308 | 	o("rsc/markdown", 12, "/issues/events", 11822212932),
309 | 	o("rsc/markdown", 12, "/issues/events", 11942808811),
310 | 	o("rsc/markdown", 12, "/issues/events", 11942812866),
311 | 	o("rsc/markdown", 12, "/issues/events", 12028957331),
312 | 	o("rsc/markdown", 12, "/issues/events", 12028957356),
313 | 	o("rsc/markdown", 12, "/issues/events", 12028957676),
314 | 	o("rsc/markdown", 13, "/issues", 2182527101),
315 | 	o("rsc/markdown", 13, "/issues/events", 12122378461),
316 | 	o("rsc/markdown", 14, "/issues", 2182534654),
317 | 	o("rsc/markdown", 14, "/issues/events", 12122340938),
318 | 	o("rsc/markdown", 14, "/issues/events", 12122495521),
319 | 	o("rsc/markdown", 14, "/issues/events", 12122495545),
320 | 	o("rsc/markdown", 14, "/issues/events", 12122501258),
321 | 	o("rsc/markdown", 14, "/issues/events", 12122508555),
322 | 	o("rsc/markdown", 15, "/issues", 2187046263),
323 | 	o("rsc/markdown", 16, "/issues", 2189605425),
324 | 	o("rsc/markdown", 16, "/issues/comments", 2146194902),
325 | 	o("rsc/markdown", 16, "/issues/events", 13027435265),
326 | 	o("rsc/markdown", 17, "/issues", 2189605911),
327 | 	o("rsc/markdown", 17, "/issues/comments", 2146194573),
328 | 	o("rsc/markdown", 17, "/issues/comments", 2146421109),
329 | 	o("rsc/markdown", 17, "/issues/events", 12137686933),
330 | 	o("rsc/markdown", 17, "/issues/events", 12137688071),
331 | 	o("rsc/markdown", 17, "/issues/events", 13027432818),
332 | 	o("rsc/markdown", 17, "/issues/events", 13028910699),
333 | 	o("rsc/markdown", 17, "/issues/events", 13028910702),
334 | 	o("rsc/markdown", 18, "/issues", 2276848742),
335 | 	o("rsc/markdown", 18, "/issues/comments", 2097019306),
336 | 	o("rsc/markdown", 18, "/issues/comments", 2146475274),
337 | 	o("rsc/markdown", 18, "/issues/events", 12721108829),
338 | 	o("rsc/markdown", 18, "/issues/events", 13027289256),
339 | 	o("rsc/markdown", 18, "/issues/events", 13027289270),
340 | 	o("rsc/markdown", 18, "/issues/events", 13027289466),
341 | 	o("rsc/markdown", 19, "/issues", 2308816936),
342 | 	o("rsc/markdown", 19, "/issues/comments", 2146197528),
343 | }
344 | 


--------------------------------------------------------------------------------
/internal/commentfix/fix.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package commentfix implements rule-based rewriting of issue comments.
  6 | package commentfix
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"io"
 11 | 	"log/slog"
 12 | 	"os"
 13 | 	"reflect"
 14 | 	"regexp"
 15 | 	"strings"
 16 | 	"testing"
 17 | 	"time"
 18 | 
 19 | 	"rsc.io/gaby/internal/diff"
 20 | 	"rsc.io/gaby/internal/github"
 21 | 	"rsc.io/gaby/internal/storage/timed"
 22 | 	"rsc.io/markdown"
 23 | )
 24 | 
 25 | // A Fixer rewrites issue texts and issue comments using a set of rules.
 26 | // After creating a fixer with [New], new rules can be added using
 27 | // the [Fixer.AutoLink], [Fixer.ReplaceText], and [Fixer.ReplaceURL] methods,
 28 | // and then repeated calls to [Fixer.Run] apply the replacements on GitHub.
 29 | //
 30 | // The zero value of a Fixer can be used in “offline” mode with [Fixer.Fix],
 31 | // which returns rewritten Markdown.
 32 | //
 33 | // TODO(rsc): Separate the GitHub logic more cleanly from the rewrite logic.
 34 | type Fixer struct {
 35 | 	slog      *slog.Logger
 36 | 	github    *github.Client
 37 | 	watcher   *timed.Watcher[*github.Event]
 38 | 	fixes     []func(any, int) any
 39 | 	projects  map[string]bool
 40 | 	edit      bool
 41 | 	timeLimit time.Time
 42 | 
 43 | 	stderrw io.Writer
 44 | }
 45 | 
 46 | func (f *Fixer) stderr() io.Writer {
 47 | 	if f.stderrw != nil {
 48 | 		return f.stderrw
 49 | 	}
 50 | 	return os.Stderr
 51 | }
 52 | 
 53 | func (f *Fixer) SetStderr(w io.Writer) {
 54 | 	f.stderrw = w
 55 | }
 56 | 
 57 | // New creates a new Fixer using the given logger and GitHub client.
 58 | //
 59 | // The Fixer logs status and errors to lg; if lg is nil, the Fixer does not log anything.
 60 | //
 61 | // The GitHub client is used to watch for new issues and comments
 62 | // and to edit issues and comments. If gh is nil, the Fixer can still be
 63 | // configured and applied to Markdown using [Fixer.Fix], but calling
 64 | // [Fixer.Run] will panic.
 65 | //
 66 | // The name is the handle by which the Fixer's “last position” is retrieved
 67 | // across multiple program invocations; each differently configured
 68 | // Fixer needs a different name.
 69 | func New(lg *slog.Logger, gh *github.Client, name string) *Fixer {
 70 | 	f := &Fixer{
 71 | 		slog:      lg,
 72 | 		github:    gh,
 73 | 		projects:  make(map[string]bool),
 74 | 		timeLimit: time.Now().Add(-30 * 24 * time.Hour),
 75 | 	}
 76 | 	f.init() // set f.slog if lg==nil
 77 | 	if gh != nil {
 78 | 		f.watcher = gh.EventWatcher("commentfix.Fixer:" + name)
 79 | 	}
 80 | 	return f
 81 | }
 82 | 
 83 | // SetTimeLimit sets the time before which comments are not edited.
 84 | func (f *Fixer) SetTimeLimit(limit time.Time) {
 85 | 	f.timeLimit = limit
 86 | }
 87 | 
 88 | // init makes sure slog is non-nil.
 89 | func (f *Fixer) init() {
 90 | 	if f.slog == nil {
 91 | 		f.slog = slog.New(slog.NewTextHandler(io.Discard, nil))
 92 | 	}
 93 | }
 94 | 
 95 | func (f *Fixer) EnableProject(name string) {
 96 | 	f.init()
 97 | 	if f.github == nil {
 98 | 		panic("commentfix.Fixer: EnableProject missing GitHub client")
 99 | 	}
100 | 	f.projects[name] = true
101 | }
102 | 
103 | // EnableEdits configures the fixer to make edits to comments on GitHub.
104 | // If EnableEdits is not called, the Fixer only prints what it would do,
105 | // and it does not mark the issues and comments as “old”.
106 | // This default mode is useful for experimenting with a Fixer
107 | // to gauge its effects.
108 | //
109 | // EnableEdits panics if the Fixer was not constructed by calling [New]
110 | // with a non-nil [github.Client].
111 | func (f *Fixer) EnableEdits() {
112 | 	f.init()
113 | 	if f.github == nil {
114 | 		panic("commentfix.Fixer: EnableEdits missing GitHub client")
115 | 	}
116 | 	f.edit = true
117 | }
118 | 
119 | // AutoLink instructs the fixer to turn any text matching the
120 | // regular expression pattern into a link to the URL.
121 | // The URL can contain substitution values like $1
122 | // as supported by [regexp.Regexp.Expand].
123 | //
124 | // For example, to link CL nnn to https://go.dev/cl/nnn,
125 | // you could use:
126 | //
127 | //	f.AutoLink(`\bCL (\d+)\b`, "https://go.dev/cl/$1")
128 | func (f *Fixer) AutoLink(pattern, url string) error {
129 | 	f.init()
130 | 	re, err := regexp.Compile(pattern)
131 | 	if err != nil {
132 | 		return err
133 | 	}
134 | 	f.fixes = append(f.fixes, func(x any, flags int) any {
135 | 		if flags&flagLink != 0 {
136 | 			// already inside link
137 | 			return nil
138 | 		}
139 | 		plain, ok := x.(*markdown.Plain)
140 | 		if !ok {
141 | 			return nil
142 | 		}
143 | 		var out []markdown.Inline
144 | 		start := 0
145 | 		text := plain.Text
146 | 		for _, m := range re.FindAllStringSubmatchIndex(text, -1) {
147 | 			if start < m[0] {
148 | 				out = append(out, &markdown.Plain{Text: text[start:m[0]]})
149 | 			}
150 | 			link := string(re.ExpandString(nil, url, text, m))
151 | 			out = append(out, &markdown.Link{
152 | 				Inner: []markdown.Inline{&markdown.Plain{Text: text[m[0]:m[1]]}},
153 | 				URL:   link,
154 | 			})
155 | 			start = m[1]
156 | 		}
157 | 		if start == 0 {
158 | 			return nil
159 | 		}
160 | 		out = append(out, &markdown.Plain{Text: text[start:]})
161 | 		return out
162 | 	})
163 | 	return nil
164 | }
165 | 
166 | // ReplaceText instructs the fixer to replace any text
167 | // matching the regular expression pattern with the replacement repl.
168 | // The replacement can contain substitution values like $1
169 | // as supported by [regexp.Regexp.Expand].
170 | //
171 | // ReplaceText only applies in Markdown plain text.
172 | // It does not apply in backticked code text, or in backticked
173 | // or indented code blocks, or to URLs.
174 | // It does apply to the plain text inside headings,
175 | // inside bold, italic, or link markup.
176 | //
177 | // For example, you could correct “cancelled” to “canceled”,
178 | // following Go's usual conventions, with:
179 | //
180 | //	f.ReplaceText(`cancelled`, "canceled")
181 | func (f *Fixer) ReplaceText(pattern, repl string) error {
182 | 	f.init()
183 | 	re, err := regexp.Compile(pattern)
184 | 	if err != nil {
185 | 		return err
186 | 	}
187 | 	f.fixes = append(f.fixes, func(x any, flags int) any {
188 | 		plain, ok := x.(*markdown.Plain)
189 | 		if !ok {
190 | 			return nil
191 | 		}
192 | 		if re.FindStringSubmatchIndex(plain.Text) == nil {
193 | 			return nil
194 | 		}
195 | 		plain.Text = re.ReplaceAllString(plain.Text, repl)
196 | 		return plain
197 | 	})
198 | 	return nil
199 | }
200 | 
201 | // ReplaceURL instructs the fixer to replace any linked URLs
202 | // matching the regular expression pattern with the replacement URL repl.
203 | // The replacement can contain substitution values like $1
204 | // as supported by [regexp.Regexp.Expand].
205 | //
206 | // The regular expression pattern is automatically anchored
207 | // to the start of the URL: there is no need to start it with \A or ^.
208 | //
209 | // For example, to replace links to golang.org with links to go.dev,
210 | // you could use:
211 | //
212 | //	f.ReplaceURL(`https://golang\.org(/?)`, "https://go.dev$1")
213 | func (f *Fixer) ReplaceURL(pattern, repl string) error {
214 | 	f.init()
215 | 	re, err := regexp.Compile(`\A(?:` + pattern + `)`)
216 | 	if err != nil {
217 | 		return err
218 | 	}
219 | 	f.fixes = append(f.fixes, func(x any, flags int) any {
220 | 		switch x := x.(type) {
221 | 		case *markdown.AutoLink:
222 | 			old := x.URL
223 | 			x.URL = re.ReplaceAllString(x.URL, repl)
224 | 			if x.URL == old {
225 | 				return nil
226 | 			}
227 | 			if x.Text == old {
228 | 				x.Text = x.URL
229 | 			}
230 | 			return x
231 | 		case *markdown.Link:
232 | 			old := x.URL
233 | 			x.URL = re.ReplaceAllString(x.URL, repl)
234 | 			if x.URL == old {
235 | 				return nil
236 | 			}
237 | 			if len(x.Inner) == 1 {
238 | 				if p, ok := x.Inner[0].(*markdown.Plain); ok && p.Text == old {
239 | 					p.Text = x.URL
240 | 				}
241 | 			}
242 | 			return x
243 | 		}
244 | 		return nil
245 | 	})
246 | 	return nil
247 | }
248 | 
249 | // Run applies the configured rewrites to issue texts and comments on GitHub
250 | // that have been updated since the last call to Run for this fixer with edits enabled
251 | // (including in different program invocations using the same fixer name).
252 | // Run ignores issues texts and comments more than 30 days old.
253 | //
254 | // Run prints diffs of its edits to standard error in addition to logging them,
255 | // because slog logs the diffs as single-line Go quoted strings that are
256 | // too difficult to skim.
257 | //
258 | // If [Fixer.EnableEdits] has not been called, Run processes recent issue texts
259 | // and comments and prints diffs of its intended edits to standard error,
260 | // but it does not make the changes. It also does not mark the issues and comments as processed,
261 | // so that a future call to Run with edits enabled can rewrite them on GitHub.
262 | //
263 | // Run sleeps for 1 second after each GitHub edit.
264 | //
265 | // Run panics if the Fixer was not constructed by calling [New]
266 | // with a non-nil [github.Client].
267 | func (f *Fixer) Run() {
268 | 	if f.watcher == nil {
269 | 		panic("commentfix.Fixer: Run missing GitHub client")
270 | 	}
271 | 	for e := range f.watcher.Recent() {
272 | 		if !f.projects[e.Project] {
273 | 			continue
274 | 		}
275 | 		var ic *issueOrComment
276 | 		switch x := e.Typed.(type) {
277 | 		default:
278 | 			continue
279 | 		case *github.Issue:
280 | 			if x.PullRequest != nil {
281 | 				// Do not edit pull request bodies,
282 | 				// because they turn into commit messages
283 | 				// and cannot contain things like hyperlinks.
284 | 				continue
285 | 			}
286 | 			ic = &issueOrComment{issue: x}
287 | 		case *github.IssueComment:
288 | 			ic = &issueOrComment{comment: x}
289 | 		}
290 | 		if tm, err := time.Parse(time.RFC3339, ic.updatedAt()); err == nil && tm.Before(f.timeLimit) {
291 | 			if f.edit {
292 | 				f.watcher.MarkOld(e.DBTime)
293 | 			}
294 | 			continue
295 | 		}
296 | 		body, updated := f.Fix(ic.body())
297 | 		if !updated {
298 | 			continue
299 | 		}
300 | 		live, err := ic.download(f.github)
301 | 		if err != nil {
302 | 			// unreachable unless github error
303 | 			f.slog.Error("commentfix download error", "project", e.Project, "issue", e.Issue, "url", ic.url(), "err", err)
304 | 			continue
305 | 		}
306 | 		if live.body() != ic.body() {
307 | 			f.slog.Info("commentfix stale", "project", e.Project, "issue", e.Issue, "url", ic.url())
308 | 			continue
309 | 		}
310 | 		f.slog.Info("commentfix rewrite", "project", e.Project, "issue", e.Issue, "url", ic.url(), "edit", f.edit, "diff", bodyDiff(ic.body(), body))
311 | 		fmt.Fprintf(f.stderr(), "Fix %s:\n%s\n", ic.url(), bodyDiff(ic.body(), body))
312 | 		if f.edit {
313 | 			f.slog.Info("commentfix editing github", "url", ic.url())
314 | 			if err := ic.editBody(f.github, body); err != nil {
315 | 				// unreachable unless github error
316 | 				f.slog.Error("commentfix edit", "project", e.Project, "issue", e.Issue, "err", err)
317 | 				continue
318 | 			}
319 | 			f.watcher.MarkOld(e.DBTime)
320 | 			f.watcher.Flush()
321 | 			if !testing.Testing() {
322 | 				// unreachable in tests
323 | 				time.Sleep(1 * time.Second)
324 | 			}
325 | 		}
326 | 	}
327 | }
328 | 
329 | type issueOrComment struct {
330 | 	issue   *github.Issue
331 | 	comment *github.IssueComment
332 | }
333 | 
334 | func (ic *issueOrComment) updatedAt() string {
335 | 	if ic.issue != nil {
336 | 		return ic.issue.UpdatedAt
337 | 	}
338 | 	return ic.comment.UpdatedAt
339 | }
340 | 
341 | func (ic *issueOrComment) body() string {
342 | 	if ic.issue != nil {
343 | 		return ic.issue.Body
344 | 	}
345 | 	return ic.comment.Body
346 | }
347 | 
348 | func (ic *issueOrComment) download(gh *github.Client) (*issueOrComment, error) {
349 | 	if ic.issue != nil {
350 | 		live, err := gh.DownloadIssue(ic.issue.URL)
351 | 		return &issueOrComment{issue: live}, err
352 | 	}
353 | 	live, err := gh.DownloadIssueComment(ic.comment.URL)
354 | 	return &issueOrComment{comment: live}, err
355 | }
356 | 
357 | func (ic *issueOrComment) url() string {
358 | 	if ic.issue != nil {
359 | 		return ic.issue.URL
360 | 	}
361 | 	return ic.comment.URL
362 | }
363 | 
364 | func (ic *issueOrComment) editBody(gh *github.Client, body string) error {
365 | 	if ic.issue != nil {
366 | 		return gh.EditIssue(ic.issue, &github.IssueChanges{Body: body})
367 | 	}
368 | 	return gh.EditIssueComment(ic.comment, &github.IssueCommentChanges{Body: body})
369 | }
370 | 
371 | // Fix applies the configured rewrites to the markdown text.
372 | // If no fixes apply, it returns "", false.
373 | // If any fixes apply, it returns the updated text and true.
374 | func (f *Fixer) Fix(text string) (newText string, fixed bool) {
375 | 	p := &markdown.Parser{
376 | 		AutoLinkText:  true,
377 | 		Strikethrough: true,
378 | 		HeadingIDs:    true,
379 | 		Emoji:         true,
380 | 	}
381 | 	doc := p.Parse(text)
382 | 	for _, fixer := range f.fixes {
383 | 		if f.fixOne(fixer, doc) {
384 | 			fixed = true
385 | 		}
386 | 	}
387 | 	if !fixed {
388 | 		return "", false
389 | 	}
390 | 	return markdown.ToMarkdown(doc), true
391 | }
392 | 
393 | const (
394 | 	// flagLink means this inline is link text,
395 | 	// so it is inappropriate/impossible to turn
396 | 	// it into a (nested) hyperlink.
397 | 	flagLink = 1 << iota
398 | )
399 | 
400 | // fixOne runs one fix function over doc,
401 | // reporting whether doc was changed.
402 | func (f *Fixer) fixOne(fix func(any, int) any, doc *markdown.Document) (fixed bool) {
403 | 	var (
404 | 		fixBlock   func(markdown.Block)
405 | 		fixInlines func(*[]markdown.Inline)
406 | 	)
407 | 	fixBlock = func(x markdown.Block) {
408 | 		switch x := x.(type) {
409 | 		case *markdown.Document:
410 | 			for _, sub := range x.Blocks {
411 | 				fixBlock(sub)
412 | 			}
413 | 		case *markdown.Quote:
414 | 			for _, sub := range x.Blocks {
415 | 				fixBlock(sub)
416 | 			}
417 | 		case *markdown.List:
418 | 			for _, sub := range x.Items {
419 | 				fixBlock(sub)
420 | 			}
421 | 		case *markdown.Item:
422 | 			for _, sub := range x.Blocks {
423 | 				fixBlock(sub)
424 | 			}
425 | 		case *markdown.Heading:
426 | 			fixBlock(x.Text)
427 | 		case *markdown.Paragraph:
428 | 			fixBlock(x.Text)
429 | 		case *markdown.Text:
430 | 			fixInlines(&x.Inline)
431 | 		}
432 | 	}
433 | 
434 | 	link := 0
435 | 	fixInlines = func(inlines *[]markdown.Inline) {
436 | 		changed := false
437 | 		var out []markdown.Inline
438 | 		for _, x := range *inlines {
439 | 			switch x := x.(type) {
440 | 			case *markdown.Del:
441 | 				fixInlines(&x.Inner)
442 | 			case *markdown.Emph:
443 | 				fixInlines(&x.Inner)
444 | 			case *markdown.Strong:
445 | 				fixInlines(&x.Inner)
446 | 			case *markdown.Link:
447 | 				link++
448 | 				fixInlines(&x.Inner)
449 | 				link--
450 | 			}
451 | 			flags := 0
452 | 			if link > 0 {
453 | 				flags = flagLink
454 | 			}
455 | 			switch fx := fix(x, flags).(type) {
456 | 			default:
457 | 				// unreachable unless bug in fix func
458 | 				f.slog.Error("fixer returned invalid type", "old", reflect.TypeOf(x).String(), "new", reflect.TypeOf(fx).String())
459 | 				out = append(out, x)
460 | 			case nil:
461 | 				out = append(out, x)
462 | 			case markdown.Inline:
463 | 				changed = true
464 | 				out = append(out, fx)
465 | 			case []markdown.Inline:
466 | 				changed = true
467 | 				out = append(out, fx...)
468 | 			}
469 | 		}
470 | 		if changed {
471 | 			*inlines = out
472 | 			fixed = true
473 | 		}
474 | 	}
475 | 
476 | 	fixBlock(doc)
477 | 	return fixed
478 | }
479 | 
480 | func bodyDiff(old, new string) string {
481 | 	old = strings.TrimRight(old, "\n") + "\n"
482 | 	old = strings.ReplaceAll(old, "\r\n", "\n")
483 | 
484 | 	new = strings.TrimRight(new, "\n") + "\n"
485 | 	new = strings.ReplaceAll(new, "\r\n", "\n")
486 | 
487 | 	return string(diff.Diff("old", []byte(old), "new", []byte(new)))
488 | }
489 | 


--------------------------------------------------------------------------------