├── pkg ├── fulltext │ ├── fulltext.go │ ├── scraper_test.go │ └── scraper.go ├── search │ ├── search.go │ └── sqlSearchProvider.go ├── logging │ └── logging.go ├── config │ └── config.go ├── persistence │ ├── testutils │ │ └── testutils.go │ ├── migrations │ │ ├── 01_init.sql │ │ └── 02_fts_fragments.sql │ ├── persistence_test.go │ └── persistence.go ├── util │ ├── util_test.go │ └── util.go ├── extractors │ ├── browserparrot.go │ ├── orion.go │ ├── safari.go │ ├── sigmaos.go │ ├── firefox.go │ ├── chromium.go │ ├── historyTrendsUnlimited.go │ └── extractors.go ├── types │ └── types.go ├── populate │ ├── populate.go │ ├── fulltext.go │ └── searchIndex.go └── tui │ └── tui.go ├── .gitignore ├── main.go ├── cmd ├── dev.go ├── import.go ├── devDb.go ├── dbPath.go ├── devReindex.go ├── dbMigrate.go ├── importBrowserparrot.go ├── root.go ├── importHistorytrends.go ├── backup.go ├── search.go ├── devFullText.go └── populate.go ├── Makefile ├── LICENSE ├── .vscode └── launch.json ├── .github └── workflows │ └── go-build.yml ├── go.mod ├── readme.md └── go.sum /pkg/fulltext/fulltext.go: -------------------------------------------------------------------------------- 1 | package fulltext 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | browser-gopher 2 | index.bleve 3 | .envrc 4 | dist 5 | tmp 6 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/iansinnott/browser-gopher/cmd" 5 | ) 6 | 7 | func main() { 8 | cmd.Execute() 9 | } 10 | -------------------------------------------------------------------------------- /cmd/dev.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var devCmd = &cobra.Command{ 8 | Use: "dev", 9 | Short: "Dev tools", 10 | Long: `Currently there are no dev tools...`, 11 | } 12 | 13 | func init() { 14 | rootCmd.AddCommand(devCmd) 15 | } 16 | -------------------------------------------------------------------------------- /cmd/import.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var importCmd = &cobra.Command{ 8 | Use: "import", 9 | Short: "Import from various data sources. ", 10 | Long: `Importing is used for pulling data from non-browser sources into the URL database.`, 11 | } 12 | 13 | func init() { 14 | rootCmd.AddCommand(importCmd) 15 | } 16 | -------------------------------------------------------------------------------- /pkg/search/search.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "github.com/iansinnott/browser-gopher/pkg/types" 5 | ) 6 | 7 | type URLQueryResult struct { 8 | Urls []types.UrlDbEntity 9 | Count uint 10 | } 11 | 12 | type SearchResult struct { 13 | Urls []types.SearchableEntity 14 | Count uint 15 | } 16 | 17 | type SearchProvider interface { 18 | SearchUrls(query string) (*SearchResult, error) 19 | } 20 | 21 | type DataProvider interface { 22 | SearchProvider 23 | RecentUrls(limit uint) (*SearchResult, error) 24 | } 25 | -------------------------------------------------------------------------------- /cmd/devDb.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/config" 8 | "github.com/iansinnott/browser-gopher/pkg/persistence" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var dbCmd = &cobra.Command{ 13 | Use: "db", 14 | Short: "Database related commands", 15 | } 16 | 17 | var dbInitCmd = &cobra.Command{ 18 | Use: "init", 19 | Short: "Initialize the database", 20 | Run: func(cmd *cobra.Command, args []string) { 21 | // init the db 22 | _, err := persistence.InitDb(cmd.Context(), config.Config) 23 | if err != nil { 24 | fmt.Println("error initializing db", err) 25 | os.Exit(1) 26 | } 27 | 28 | fmt.Println("db initialized: " + config.Config.DBPath) 29 | }, 30 | } 31 | 32 | func init() { 33 | dbCmd.AddCommand(dbInitCmd) 34 | devCmd.AddCommand(dbCmd) 35 | } 36 | -------------------------------------------------------------------------------- /cmd/dbPath.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 NAME HERE 3 | */ 4 | package cmd 5 | 6 | import ( 7 | "fmt" 8 | 9 | "github.com/iansinnott/browser-gopher/pkg/config" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // dbPathCmd represents the dbPath command 14 | var dbPathCmd = &cobra.Command{ 15 | Use: "db-path", 16 | Short: "Print the path to the database", 17 | Long: ` 18 | Print the path to the database. Useful if you want to use SQL on your database 19 | directly. 20 | 21 | Example: 22 | # Print the path 23 | browser-gopher db-path 24 | 25 | # Use the path to connect via sqlite3 26 | sqlite3 $(browser-gopher db-path) 'SELECT * FROM urls LIMIT 3;' 27 | 28 | `, 29 | Run: func(cmd *cobra.Command, args []string) { 30 | fmt.Println(config.Config.DBPath) 31 | }, 32 | } 33 | 34 | func init() { 35 | rootCmd.AddCommand(dbPathCmd) 36 | } 37 | -------------------------------------------------------------------------------- /pkg/logging/logging.go: -------------------------------------------------------------------------------- 1 | package logging 2 | 3 | import ( 4 | "io" 5 | "log" 6 | "os" 7 | ) 8 | 9 | var debugLogger *log.Logger = log.New(os.Stderr, "DEBUG: ", log.Ldate|log.Ltime|log.Lshortfile) 10 | var warnLogger *log.Logger = log.New(os.Stderr, "WARN: ", log.Ldate|log.Ltime) 11 | var errLogger *log.Logger = log.New(os.Stderr, "ERROR: ", log.Ldate|log.Ltime|log.Lshortfile) 12 | var quietLogger *log.Logger = log.New(io.Discard, "", 0) 13 | 14 | const DEBUG = 1 15 | 16 | var LOG_LEVEL = 0 17 | 18 | func IsDebug() bool { 19 | return LOG_LEVEL == DEBUG 20 | } 21 | 22 | func SetLogLevel(level int) { 23 | LOG_LEVEL = level 24 | } 25 | 26 | // Debug returns a logger that will only log in debug mode. Set the 27 | func Debug() *log.Logger { 28 | if LOG_LEVEL != DEBUG { 29 | return quietLogger 30 | } 31 | 32 | return debugLogger 33 | } 34 | 35 | func Warn() *log.Logger { 36 | return warnLogger 37 | } 38 | 39 | func Error() *log.Logger { 40 | return errLogger 41 | } 42 | -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "path/filepath" 7 | 8 | "github.com/iansinnott/browser-gopher/pkg/util" 9 | ) 10 | 11 | type AppConfig struct { 12 | AppDataPath string 13 | BackupDir string 14 | DBPath string 15 | } 16 | 17 | // initialize the config object and perform setup tasks. 18 | func newConfig() *AppConfig { 19 | conf := &AppConfig{ 20 | AppDataPath: util.Expanduser(filepath.Join("~", ".config", "browser-gopher")), 21 | BackupDir: util.Expanduser(filepath.Join("~", ".cache", "browser-gopher")), 22 | } 23 | 24 | err := os.MkdirAll(conf.AppDataPath, 0755) 25 | if err != nil { 26 | log.Fatal("could not create app data path: "+conf.AppDataPath, err) 27 | } 28 | 29 | err = os.MkdirAll(conf.BackupDir, 0755) 30 | if err != nil { 31 | log.Fatal("could not create app data path: "+conf.AppDataPath, err) 32 | } 33 | 34 | conf.DBPath = filepath.Join(conf.AppDataPath, "db.sqlite") 35 | 36 | return conf 37 | } 38 | 39 | var Config *AppConfig = newConfig() 40 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | NAME = $(shell basename `pwd`) 2 | VERSION = $(shell git describe --tags --always) 3 | OUTDIR = dist/$(NAME)-$(VERSION) 4 | 5 | all: 6 | 7 | dist: 8 | @mkdir -p dist/$(NAME)-$(VERSION) 9 | 10 | .PHONY: outdir 11 | outdir: 12 | @echo "dist/$(NAME)-$(VERSION)" 13 | 14 | build: dist 15 | @echo "Building with system defaults..." 16 | @CGO_ENABLED=0 go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(VERSION)" 17 | @echo "Building ..." 18 | CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(VERSION)" -o $(OUTDIR)/$(NAME)-darwin-arm64 19 | CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(VERSION)" -o $(OUTDIR)/$(NAME)-darwin-amd64 20 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(VERSION)" -o $(OUTDIR)/$(NAME)-linux-amd64 21 | CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(VERSION)" -o $(OUTDIR)/$(NAME)-linux-arm64 22 | @echo "Done." -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ian Sinnott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cmd/devReindex.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/iansinnott/browser-gopher/pkg/config" 9 | "github.com/iansinnott/browser-gopher/pkg/persistence" 10 | "github.com/iansinnott/browser-gopher/pkg/populate" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var reindexCmd = &cobra.Command{ 15 | Use: "reindex", 16 | Short: "Reindex all URL records in the search index", 17 | Run: func(cmd *cobra.Command, args []string) { 18 | limit, err := cmd.Flags().GetInt("limit") 19 | if err != nil { 20 | fmt.Println("could not parse --limit:", err) 21 | os.Exit(1) 22 | } 23 | 24 | dbConn, err := persistence.InitDb(cmd.Context(), config.Config) 25 | if err != nil { 26 | fmt.Println("could not open our db", err) 27 | os.Exit(1) 28 | } 29 | 30 | fmt.Println("Reindexing everything...") 31 | t := time.Now() 32 | n, err := populate.ReindexWithLimit(cmd.Context(), dbConn, limit) 33 | if err != nil { 34 | fmt.Println("encountered an error building the search index", err) 35 | os.Exit(1) 36 | } 37 | fmt.Printf("Indexed %d records in %v\n", n, time.Since(t)) 38 | }, 39 | } 40 | 41 | func init() { 42 | reindexCmd.Flags().Int("limit", 0, "Limit the number of records to index") 43 | devCmd.AddCommand(reindexCmd) 44 | } 45 | -------------------------------------------------------------------------------- /pkg/persistence/testutils/testutils.go: -------------------------------------------------------------------------------- 1 | package testutils 2 | 3 | import ( 4 | "database/sql" 5 | _ "embed" 6 | "sort" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/iansinnott/browser-gopher/pkg/persistence" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | // get an in-memory db connection. Don't forget to close your connection when done. 15 | func GetTestDBConn(t *testing.T) (*sql.DB, error) { 16 | conn, err := sql.Open("sqlite", ":memory:") 17 | if err != nil { 18 | return nil, errors.Wrap(err, "could not open test db") 19 | } 20 | 21 | entries, err := persistence.MigrationsDir.ReadDir("migrations") 22 | if err != nil { 23 | return nil, err 24 | } 25 | 26 | // make sure the migrations are sorted 27 | sort.Slice(entries, func(i, j int) bool { 28 | return entries[i].Name() < entries[j].Name() 29 | }) 30 | 31 | for _, entry := range entries { 32 | // skip files that are not migrations 33 | if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sql") { 34 | continue 35 | } 36 | 37 | filePath := "migrations/" + entry.Name() 38 | 39 | migration, err := persistence.MigrationsDir.ReadFile(filePath) 40 | if err != nil { 41 | return nil, err 42 | } 43 | 44 | _, err = conn.Exec(string(migration)) 45 | if err != nil { 46 | return nil, err 47 | } 48 | } 49 | 50 | return conn, err 51 | } 52 | -------------------------------------------------------------------------------- /cmd/dbMigrate.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2023 NAME HERE 3 | */ 4 | package cmd 5 | 6 | import ( 7 | "fmt" 8 | "os" 9 | 10 | "github.com/iansinnott/browser-gopher/pkg/config" 11 | "github.com/iansinnott/browser-gopher/pkg/persistence" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | // dbMigrateCmd represents the dbMigrate command 16 | var dbMigrateCmd = &cobra.Command{ 17 | Use: "db-migrate", 18 | Short: "Migrate the database and do nothing else.", 19 | Long: `Migrate the database and do nothing else. This is useful in development.`, 20 | Run: func(cmd *cobra.Command, args []string) { 21 | db, err := persistence.InitDb(cmd.Context(), config.Config) 22 | if err != nil { 23 | fmt.Println(err) 24 | os.Exit(1) 25 | } 26 | defer db.Close() 27 | fmt.Println("Database migrated successfully.") 28 | }, 29 | } 30 | 31 | func init() { 32 | rootCmd.AddCommand(dbMigrateCmd) 33 | 34 | // Here you will define your flags and configuration settings. 35 | 36 | // Cobra supports Persistent Flags which will work for this command 37 | // and all subcommands, e.g.: 38 | // dbMigrateCmd.PersistentFlags().String("foo", "", "A help for foo") 39 | 40 | // Cobra supports local flags which will only run when this command 41 | // is called directly, e.g.: 42 | // dbMigrateCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") 43 | } 44 | -------------------------------------------------------------------------------- /pkg/util/util_test.go: -------------------------------------------------------------------------------- 1 | package util_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/iansinnott/browser-gopher/pkg/util" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestReverseSlice(t *testing.T) { 11 | 12 | table := []struct { 13 | name string 14 | a []string 15 | expected []string 16 | }{ 17 | { 18 | name: "empty slice", 19 | a: []string{}, 20 | expected: []string{}, 21 | }, 22 | {"single item slice", []string{"a"}, []string{"a"}}, 23 | {"two item slice", []string{"a", "b"}, []string{"b", "a"}}, 24 | {"three item slice", []string{"a", "b", "c"}, []string{"c", "b", "a"}}, 25 | {"three item slice", []string{"a", "bb", "c"}, []string{"c", "bb", "a"}}, 26 | {"multi-slice", []string{"abc", "bb", "heyo"}, []string{"heyo", "bb", "abc"}}, 27 | } 28 | 29 | for _, tt := range table { 30 | t.Run(tt.name, func(t *testing.T) { 31 | actual := util.ReverseSlice(tt.a) 32 | require.ElementsMatch(t, tt.expected, actual) 33 | }) 34 | } 35 | 36 | intsTable := []struct { 37 | name string 38 | a []int 39 | expected []int 40 | }{ 41 | {"empty slice", []int{}, []int{}}, 42 | {"single item slice", []int{12, 3, 58, 2}, []int{2, 58, 3, 12}}, 43 | } 44 | 45 | for _, tt := range intsTable { 46 | t.Run(tt.name, func(t *testing.T) { 47 | actual := util.ReverseSlice(tt.a) 48 | require.ElementsMatch(t, tt.expected, actual) 49 | }) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cmd/importBrowserparrot.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/extractors" 8 | "github.com/iansinnott/browser-gopher/pkg/populate" 9 | "github.com/iansinnott/browser-gopher/pkg/util" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | // browserparrotCmd represents the browserparrot command 14 | var browserparrotCmd = &cobra.Command{ 15 | Use: "browserparrot", 16 | Short: "Import from a BrowserParrot database", 17 | Long: `If you have not previously used BrowserParrot this does not apply. This 18 | command will import all URLs from BrowserParrot since you may already have 19 | many URLs in there which are no longer present in the history databases of the 20 | original browsers. 21 | 22 | Using the command without any args will try the default location for the 23 | BrowserParrot database, and should work in most cases.`, 24 | Run: func(cmd *cobra.Command, args []string) { 25 | dbPath, err := cmd.Flags().GetString("db-path") 26 | if err != nil { 27 | fmt.Println("could not parse --db-path:", err) 28 | os.Exit(1) 29 | } 30 | 31 | browserparrot := &extractors.BrowserParrotExtractor{ 32 | HistoryDBPath: util.Expanduser(dbPath), 33 | Name: "browserparrot", 34 | } 35 | err = populate.PopulateAll(browserparrot) 36 | if err != nil { 37 | fmt.Println(err) 38 | os.Exit(1) 39 | } 40 | fmt.Println("Done.") 41 | }, 42 | } 43 | 44 | func init() { 45 | importCmd.AddCommand(browserparrotCmd) 46 | browserparrotCmd.Flags().String("db-path", "~/.config/persistory/persistory.db", "The path to the database") 47 | } 48 | -------------------------------------------------------------------------------- /pkg/persistence/migrations/01_init.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS "urls" ( 2 | "url_md5" VARCHAR(32) PRIMARY KEY NOT NULL, 3 | "url" TEXT UNIQUE NOT NULL, 4 | "title" TEXT, 5 | "description" TEXT, 6 | "last_visit" INTEGER 7 | ); 8 | 9 | CREATE TABLE IF NOT EXISTS "urls_meta" ( 10 | "id" INTEGER PRIMARY KEY AUTOINCREMENT, 11 | "url_md5" VARCHAR(32) UNIQUE NOT NULL REFERENCES urls(url_md5), 12 | "indexed_at" INTEGER 13 | ); 14 | 15 | CREATE TABLE IF NOT EXISTS "visits" ( 16 | "id" INTEGER PRIMARY KEY AUTOINCREMENT, 17 | "url_md5" VARCHAR(32) NOT NULL REFERENCES urls(url_md5), 18 | "visit_time" INTEGER, 19 | "extractor_name" TEXT 20 | ); 21 | 22 | CREATE UNIQUE INDEX IF NOT EXISTS visits_unique ON visits(url_md5, visit_time); 23 | CREATE INDEX IF NOT EXISTS visits_url_md5 ON visits(url_md5); 24 | 25 | CREATE TABLE IF NOT EXISTS "documents" ( 26 | "document_md5" VARCHAR(32) PRIMARY KEY NOT NULL, 27 | "body" TEXT, 28 | "status_code" INTEGER, 29 | "accessed_at" INTEGER 30 | ); 31 | 32 | CREATE TABLE IF NOT EXISTS "url_document_edges" ( 33 | "id" INTEGER PRIMARY KEY AUTOINCREMENT, 34 | "url_md5" VARCHAR(32) UNIQUE NOT NULL REFERENCES urls(url_md5), 35 | "document_md5" VARCHAR(32) NOT NULL REFERENCES documents(document_md5) 36 | ); 37 | 38 | CREATE VIEW IF NOT EXISTS "searchable_data" AS 39 | SELECT 40 | urls.rowid as url_rowid, 41 | urls.url_md5, 42 | urls.url, 43 | urls.title, 44 | urls.description, 45 | documents.document_md5, 46 | documents.body 47 | FROM 48 | urls 49 | LEFT OUTER JOIN url_document_edges ON urls.url_md5 = url_document_edges.url_md5 50 | LEFT OUTER JOIN documents ON url_document_edges.document_md5 = documents.document_md5; -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "search", 9 | "type": "go", 10 | "request": "launch", 11 | "mode": "auto", 12 | "program": "${workspaceFolder}", 13 | "args": [ 14 | "search", 15 | "githu*" 16 | ] 17 | }, 18 | { 19 | "name": "dev bleve-search", 20 | "type": "go", 21 | "request": "launch", 22 | "mode": "auto", 23 | "program": "${workspaceFolder}", 24 | "args": [ 25 | "dev", 26 | "bleve-search" 27 | ] 28 | }, 29 | { 30 | "name": "dev reindex", 31 | "type": "go", 32 | "request": "launch", 33 | "mode": "auto", 34 | "program": "${workspaceFolder}", 35 | "args": [ 36 | "dev", 37 | "reindex" 38 | ] 39 | }, 40 | { 41 | "name": "populate", 42 | "type": "go", 43 | "request": "launch", 44 | "mode": "auto", 45 | "program": "${workspaceFolder}", 46 | "args": [ 47 | "populate", 48 | "--browser=vivaldi", 49 | "--fulltext" 50 | ] 51 | }, 52 | { 53 | "name": "dev full-text", 54 | "type": "go", 55 | "request": "launch", 56 | "mode": "auto", 57 | "program": "${workspaceFolder}", 58 | "args": [ 59 | "dev", 60 | "full-text", 61 | "https://old.reddit.com/r/vscode/comments/qromfk/comment/hka2z0n/?utm_source=reddit&utm_medium=web2x&context=3" 62 | ] 63 | } 64 | ] 65 | } -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 NAME HERE 3 | */ 4 | package cmd 5 | 6 | import ( 7 | "fmt" 8 | "os" 9 | 10 | "github.com/iansinnott/browser-gopher/pkg/logging" 11 | "github.com/pkg/errors" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | // overwrite with: 16 | // go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=$(git describe --tags)" 17 | var Version string = "v0.0.0-dev" 18 | 19 | // rootCmd represents the base command when called without any subcommands 20 | var rootCmd = &cobra.Command{ 21 | Use: "browser-gopher", 22 | Short: "A tool aggregate your browsing history", 23 | Long: `browser-gopher will aggregate and backup your browsing history. Use the 24 | populate command to populate all URLs from currently supported browsers. 25 | 26 | Example: 27 | 28 | browser-gopher populate 29 | 30 | `, 31 | // Uncomment the following line if your bare application 32 | // has an action associated with it: 33 | Run: func(cmd *cobra.Command, args []string) { 34 | v, err := cmd.Flags().GetBool("version") 35 | if err != nil { 36 | fmt.Println(errors.Wrap(err, "failed to get version flag")) 37 | os.Exit(1) 38 | } 39 | 40 | if v { 41 | fmt.Println(Version) 42 | } else { 43 | cmd.Help() 44 | } 45 | }, 46 | } 47 | 48 | // Execute adds all child commands to the root command and sets flags appropriately. 49 | // This is called by main.main(). It only needs to happen once to the rootCmd. 50 | func Execute() { 51 | debug := os.Getenv("DEBUG") 52 | if debug != "" && debug != "0" && debug != "false" { 53 | logging.SetLogLevel(logging.DEBUG) 54 | } 55 | 56 | err := rootCmd.Execute() 57 | if err != nil { 58 | os.Exit(1) 59 | } 60 | } 61 | 62 | func init() { 63 | rootCmd.Flags().BoolP("version", "v", false, "Display the version number") 64 | } 65 | -------------------------------------------------------------------------------- /pkg/persistence/persistence_test.go: -------------------------------------------------------------------------------- 1 | package persistence_test 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/persistence" 8 | "github.com/iansinnott/browser-gopher/pkg/persistence/testutils" 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | "github.com/iansinnott/browser-gopher/pkg/util" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestInitDb(t *testing.T) { 15 | dbConn, err := testutils.GetTestDBConn(t) 16 | require.NoError(t, err) 17 | defer dbConn.Close() 18 | } 19 | 20 | func TestInsertUrlMetadata(t *testing.T) { 21 | ctx := context.Background() 22 | dbConn, err := testutils.GetTestDBConn(t) 23 | require.NoError(t, err) 24 | defer dbConn.Close() 25 | 26 | table := []struct { 27 | name string 28 | metas []types.UrlMetaRow 29 | expected []string 30 | }{ 31 | { 32 | name: "single item slice", 33 | metas: []types.UrlMetaRow{ 34 | {Url: "http://www.google.com"}, 35 | }, 36 | expected: []string{util.HashMd5String("http://www.google.com")}, 37 | }, 38 | { 39 | name: "single item slice", 40 | metas: []types.UrlMetaRow{ 41 | {Url: "http://abc"}, 42 | {Url: "http://123"}, 43 | }, 44 | expected: []string{ 45 | util.HashMd5String("http://abc"), 46 | util.HashMd5String("http://123"), 47 | }, 48 | }, 49 | } 50 | 51 | for _, tt := range table { 52 | t.Run(tt.name, func(t *testing.T) { 53 | err = persistence.InsertUrlMeta(ctx, dbConn, tt.metas...) 54 | require.NoError(t, err) 55 | 56 | for i, expected := range tt.expected { 57 | var result string 58 | hash := util.HashMd5String(tt.metas[i].Url) 59 | dbConn.QueryRow("SELECT url_md5 FROM urls_meta where url_md5 = ?", hash).Scan(&result) 60 | require.Equal(t, expected, result) 61 | } 62 | }) 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /cmd/importHistorytrends.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/extractors" 8 | "github.com/iansinnott/browser-gopher/pkg/populate" 9 | "github.com/iansinnott/browser-gopher/pkg/util" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | var historytrendsCmd = &cobra.Command{ 14 | Use: "historytrends", 15 | Short: "Import from the History Trends Unlimited browser extension.", 16 | Long: `Using the command without any args will try the default location for 17 | the BrowserParrot database, and should work in most cases.`, 18 | Run: func(cmd *cobra.Command, args []string) { 19 | searchPath, err := cmd.Flags().GetString("search-path") 20 | if err != nil { 21 | fmt.Println("could not parse --db-path:", err) 22 | os.Exit(1) 23 | } 24 | if searchPath == "" { 25 | fmt.Println("--search-path is required") 26 | os.Exit(1) 27 | } 28 | 29 | dbs, err := extractors.FindHistoryTrendsDBs(util.Expanduser(searchPath)) 30 | if err != nil { 31 | fmt.Println("", err) 32 | os.Exit(1) 33 | } 34 | 35 | if len(dbs) == 0 { 36 | fmt.Println("History Trends Unlimited does not appear to be installed. Could not find it under the root path: ", searchPath) 37 | os.Exit(0) 38 | } 39 | 40 | for _, dbPath := range dbs { 41 | extractor := &extractors.HistoryTrendsExtractor{ 42 | HistoryDBPath: util.Expanduser(dbPath), 43 | Name: "historytrends", 44 | } 45 | fmt.Println("importing:", dbPath) 46 | err = populate.PopulateAll(extractor) 47 | if err != nil { 48 | fmt.Println(err) 49 | os.Exit(1) 50 | } 51 | } 52 | 53 | fmt.Println("Done.") 54 | }, 55 | } 56 | 57 | func init() { 58 | importCmd.AddCommand(historytrendsCmd) 59 | historytrendsCmd.Flags().String("search-path", "~/Library/Application Support/Google/Chrome/", "The path to the database") 60 | } 61 | -------------------------------------------------------------------------------- /pkg/extractors/browserparrot.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | ) 11 | 12 | const browserParrotUrls = ` 13 | SELECT 14 | url, 15 | title 16 | FROM 17 | datasource_browsing_history; 18 | ` 19 | 20 | type BrowserParrotExtractor struct { 21 | Name string 22 | HistoryDBPath string 23 | } 24 | 25 | func (a *BrowserParrotExtractor) GetName() string { 26 | return a.Name 27 | } 28 | 29 | func (a *BrowserParrotExtractor) GetDBPath() string { 30 | return a.HistoryDBPath 31 | } 32 | func (a *BrowserParrotExtractor) SetDBPath(s string) { 33 | a.HistoryDBPath = s 34 | } 35 | 36 | func (a *BrowserParrotExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 37 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM datasource_browsing_history;") 38 | err := row.Err() 39 | if err != nil { 40 | return false, err 41 | } 42 | return true, nil 43 | } 44 | 45 | func (a *BrowserParrotExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 46 | rows, err := conn.QueryContext(ctx, browserParrotUrls) 47 | if err != nil { 48 | fmt.Println(err) 49 | return nil, err 50 | } 51 | defer rows.Close() 52 | 53 | var urls []types.UrlRow 54 | 55 | for rows.Next() { 56 | var x types.UrlRow 57 | err = rows.Scan(&x.Url, &x.Title) 58 | if err != nil { 59 | fmt.Println("individual row error", err) 60 | return nil, err 61 | } 62 | urls = append(urls, x) 63 | } 64 | 65 | err = rows.Err() 66 | if err != nil { 67 | fmt.Println("row error", err) 68 | return nil, err 69 | } 70 | 71 | return urls, nil 72 | } 73 | 74 | // Persistory / browser parrot does not map visits properly as of this commit 75 | func (a *BrowserParrotExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 76 | return []types.VisitRow{}, nil 77 | } 78 | -------------------------------------------------------------------------------- /pkg/fulltext/scraper_test.go: -------------------------------------------------------------------------------- 1 | package fulltext_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/iansinnott/browser-gopher/pkg/fulltext" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | // These tests are decidedly not good, in that they depend on the outside world. 11 | // They could fail randomly due to network conditions, DNS issuse, updates in 12 | // the resolved web apps, etc. 13 | func TestScrapeUrls(t *testing.T) { 14 | table := []struct { 15 | name string 16 | url string 17 | }{ 18 | { 19 | name: "scrape a single website", 20 | url: "https://example.com", 21 | }, 22 | { 23 | name: "handle redirects", 24 | url: "https://iansinnott.com", // redirects to https://www.iansinnott.com 25 | }, 26 | } 27 | 28 | scraper := fulltext.NewScraper() 29 | 30 | for _, tt := range table { 31 | t.Run(tt.name, func(t *testing.T) { 32 | xm, err := scraper.ScrapeUrls(tt.url) 33 | require.Nil(t, err) 34 | require.NotEmpty(t, xm[tt.url]) 35 | body := xm[tt.url].Body 36 | require.NotEmpty(t, body) 37 | }) 38 | } 39 | 40 | t.Run("will scrape 404s", func(t *testing.T) { 41 | xm, err := scraper.ScrapeUrls("https://example.com/404") 42 | require.Nil(t, err) 43 | require.NotEmpty(t, xm["https://example.com/404"]) 44 | require.Equal(t, xm["https://example.com/404"].StatusCode, 404) 45 | }) 46 | } 47 | 48 | func TestScrapeMultipleUrls(t *testing.T) { 49 | scraper := fulltext.NewScraper() 50 | 51 | t.Run("scrape multiple urls", func(t *testing.T) { 52 | xm, err := scraper.ScrapeUrls("https://example.com", "https://iansinnott.com") 53 | require.Nil(t, err) 54 | require.NotEmpty(t, xm["https://example.com"].Body) 55 | require.NotEmpty(t, xm["https://iansinnott.com"].Body) 56 | }) 57 | 58 | t.Run("repeatable results", func(t *testing.T) { 59 | xm, err := scraper.ScrapeUrls("https://iansinnott.com", "https://example.com") 60 | require.Nil(t, err) 61 | require.NotEmpty(t, xm["https://example.com"].Body) 62 | require.NotEmpty(t, xm["https://iansinnott.com"].Body) 63 | }) 64 | } 65 | -------------------------------------------------------------------------------- /.github/workflows/go-build.yml: -------------------------------------------------------------------------------- 1 | name: Go Build 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out code 13 | uses: actions/checkout@v2 14 | with: 15 | fetch-depth: 0 16 | - name: Setup env 17 | run: echo "VERSION=$(git describe --tags --always)" >> $GITHUB_ENV 18 | - name: Create Release 19 | id: create_release 20 | uses: actions/create-release@v1 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token 23 | with: 24 | tag_name: ${{ env.VERSION }} 25 | release_name: Release ${{ env.VERSION }} 26 | draft: false 27 | prerelease: true 28 | 29 | build: 30 | name: Build 31 | runs-on: ubuntu-latest 32 | 33 | # Matrix strategy 34 | strategy: 35 | matrix: 36 | GOOS: [linux, darwin] 37 | GOARCH: [amd64, arm64] 38 | 39 | steps: 40 | - name: Set up Go 1.x 41 | uses: actions/setup-go@v2 42 | with: 43 | go-version: ^1.19 44 | 45 | - name: Check out code 46 | uses: actions/checkout@v2 47 | with: 48 | fetch-depth: 0 49 | 50 | - name: Setup env 51 | run: echo "VERSION=$(git describe --tags --always)" >> $GITHUB_ENV 52 | 53 | - name: Download modules 54 | run: go mod download 55 | 56 | - name: Get the version 57 | id: get_version 58 | run: echo ::set-output name=VERSION::$(echo $(git describe --tags --always)) 59 | 60 | - name: Go build 61 | run: | 62 | CGO_ENABLED=0 GOOS=${{ matrix.GOOS }} GOARCH=${{ matrix.GOARCH}} go build -ldflags "-X github.com/iansinnott/browser-gopher/cmd.Version=${{ env.VERSION }}" -o ./dist/browser-gopher-${{ env.VERSION }}-${{ matrix.GOOS }}-${{ matrix.GOARCH }} 63 | 64 | - name: Release 65 | uses: softprops/action-gh-release@v1 66 | if: startsWith(github.ref, 'refs/tags/') 67 | with: 68 | files: | 69 | ./dist/browser-gopher-${{ env.VERSION }}-${{ matrix.GOOS }}-${{ matrix.GOARCH }} 70 | -------------------------------------------------------------------------------- /cmd/backup.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | "path/filepath" 8 | "time" 9 | 10 | "github.com/iansinnott/browser-gopher/pkg/config" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var backupCmd = &cobra.Command{ 15 | Use: "backup", 16 | Short: "Backup your data", 17 | Long: `This command will backup your database and search index.`, 18 | Run: func(cmd *cobra.Command, args []string) { 19 | backupPath := filepath.Join(config.Config.BackupDir, "backup_"+time.Now().Format("20060102150405")) 20 | err := os.MkdirAll(backupPath, 0755) 21 | if err != nil { 22 | fmt.Println("could not create backup dir", err) 23 | os.Exit(1) 24 | } 25 | 26 | // copy the config dir to the backup dir 27 | err = copyDir(config.Config.AppDataPath, backupPath) 28 | if err != nil { 29 | fmt.Println("could not copy config dir to backup dir", err) 30 | os.Exit(1) 31 | } 32 | 33 | fmt.Println("Backed up to:", backupPath) 34 | }, 35 | } 36 | 37 | func copyDir(src, dst string) error { 38 | // get properties of source dir 39 | srcInfo, err := os.Stat(src) 40 | if err != nil { 41 | return err 42 | } 43 | 44 | // create destination dir 45 | err = os.MkdirAll(dst, srcInfo.Mode()) 46 | if err != nil { 47 | return err 48 | } 49 | 50 | entries, err := os.ReadDir(src) 51 | if err != nil { 52 | return err 53 | } 54 | 55 | for _, entry := range entries { 56 | srcPath := filepath.Join(src, entry.Name()) 57 | dstPath := filepath.Join(dst, entry.Name()) 58 | 59 | if entry.IsDir() { 60 | err = copyDir(srcPath, dstPath) 61 | if err != nil { 62 | return err 63 | } 64 | } else { 65 | err = copyFile(srcPath, dstPath) 66 | if err != nil { 67 | return err 68 | } 69 | } 70 | } 71 | 72 | return nil 73 | } 74 | 75 | func copyFile(src, dst string) error { 76 | srcFile, err := os.Open(src) 77 | if err != nil { 78 | return err 79 | } 80 | defer srcFile.Close() 81 | 82 | dstFile, err := os.Create(dst) 83 | if err != nil { 84 | return err 85 | } 86 | defer dstFile.Close() 87 | 88 | _, err = io.Copy(dstFile, srcFile) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | return nil 94 | } 95 | 96 | func init() { 97 | rootCmd.AddCommand(backupCmd) 98 | } 99 | -------------------------------------------------------------------------------- /pkg/persistence/migrations/02_fts_fragments.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE 2 | if NOT EXISTS "fragment" ( 3 | "id" INTEGER PRIMARY KEY, -- auto increment doesn't work well for synced tables. also, must be int for use in fts rowid 4 | "e" VARCHAR(255), -- references some other thing in the db. for now, either a thread or a message 5 | "t" VARCHAR(255), -- what table this belongs to. not quite using sql the way it was intended here 6 | "a" VARCHAR(255), -- the name of the a that this fragment is for 7 | "v" TEXT, -- the v of the a that this fragment is for 8 | "created_at" TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP 9 | ); 10 | 11 | CREATE VIRTUAL TABLE if NOT EXISTS "fragment_fts" USING fts5 ( 12 | "e" UNINDEXED, 13 | "t" UNINDEXED, 14 | "a", 15 | "v", 16 | "created_at" UNINDEXED, 17 | content = "fragment", 18 | content_rowid = "id", 19 | tokenize = "trigram" 20 | ); 21 | 22 | CREATE TRIGGER if NOT EXISTS "fragment_ai" AFTER INSERT ON "fragment" BEGIN 23 | INSERT INTO 24 | "fragment_fts" ("rowid", "e", "t", "a", "v", "created_at") 25 | VALUES 26 | ( 27 | NEW."id", 28 | NEW."e", 29 | NEW."t", 30 | NEW."a", 31 | NEW."v", 32 | NEW."created_at" 33 | ); 34 | 35 | END; 36 | 37 | CREATE TRIGGER if NOT EXISTS "fragment_ad" AFTER DELETE ON "fragment" BEGIN 38 | INSERT INTO 39 | "fragment_fts" ( 40 | "fragment_fts", 41 | "rowid", 42 | "e", 43 | "t", 44 | "a", 45 | "v", 46 | "created_at" 47 | ) 48 | VALUES 49 | ( 50 | 'delete', 51 | OLD."id", 52 | OLD."e", 53 | OLD."t", 54 | OLD."a", 55 | OLD."v", 56 | OLD."created_at" 57 | ); 58 | 59 | END; 60 | 61 | CREATE TRIGGER if NOT EXISTS "fragment_au" AFTER 62 | UPDATE ON "fragment" BEGIN 63 | INSERT INTO 64 | "fragment_fts" ( 65 | "fragment_fts", 66 | "rowid", 67 | "e", 68 | "t", 69 | "a", 70 | "v", 71 | "created_at" 72 | ) 73 | VALUES 74 | ( 75 | 'delete', 76 | OLD."id", 77 | OLD."e", 78 | OLD."t", 79 | OLD."a", 80 | OLD."v", 81 | OLD."created_at" 82 | ); 83 | 84 | INSERT INTO 85 | "fragment_fts" ("rowid", "e", "t", "a", "v", "created_at") 86 | VALUES 87 | ( 88 | NEW."id", 89 | NEW."e", 90 | NEW."t", 91 | NEW."a", 92 | NEW."v", 93 | NEW."created_at" 94 | ); 95 | 96 | END; -------------------------------------------------------------------------------- /pkg/util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/md5" 5 | "crypto/sha1" 6 | "fmt" 7 | "io" 8 | "os" 9 | "strings" 10 | "time" 11 | ) 12 | 13 | const SQLiteDateTime = "2006-01-02 15:04:05" 14 | const FormatDateOnly = "2006-01-02" 15 | 16 | // Given a datetime string in the form "2022-01-14 06:41:48" parse it to time.Time 17 | // 18 | // @note Rather than parse timestamps we can also pull timestamps out of the db. 19 | // Here's an example for Chrome: 20 | // 21 | // strftime("%s", visit_time / 1e6 + strftime ('%s', '1601-01-01'), 'unixepoch') AS `timestamp`, 22 | // 23 | // Might be a better approach, but for now I like seeing the extracted time 24 | // visually for debugging. 25 | func ParseSQLiteDatetime(s string) (time.Time, error) { 26 | return time.Parse(SQLiteDateTime, s) 27 | } 28 | 29 | // A quick helper for parsing iso time because I find it hard to remember the const name 30 | func ParseISODatetime(s string) (time.Time, error) { 31 | return time.Parse(time.RFC3339, s) 32 | } 33 | 34 | // Expand tilde in path strings 35 | func Expanduser(path string) string { 36 | userHome, err := os.UserHomeDir() 37 | if err != nil { 38 | fmt.Println("could not get user home", err) 39 | os.Exit(1) 40 | } 41 | 42 | return strings.Replace(path, "~", userHome, 1) 43 | } 44 | 45 | func HashMd5(bs []byte) string { 46 | h := md5.New() 47 | h.Write(bs) 48 | return fmt.Sprintf("%x", h.Sum(nil)) 49 | } 50 | 51 | func HashMd5String(s string) string { 52 | return HashMd5([]byte(s)) 53 | } 54 | 55 | func HashSha1(bs []byte) string { 56 | h := sha1.New() 57 | h.Write(bs) 58 | return fmt.Sprintf("%x", h.Sum(nil)) 59 | } 60 | 61 | func HashSha1String(s string) string { 62 | return HashSha1([]byte(s)) 63 | } 64 | 65 | func CopyPath(frm, to string) error { 66 | dest, err := os.OpenFile(to, os.O_CREATE|os.O_WRONLY, 0644) 67 | if err != nil { 68 | return err 69 | } 70 | defer dest.Close() 71 | 72 | src, err := os.Open(frm) 73 | if err != nil { 74 | return err 75 | } 76 | defer src.Close() 77 | 78 | _, err = io.Copy(dest, src) 79 | if err != nil { 80 | return err 81 | } 82 | 83 | return nil 84 | } 85 | 86 | func ReverseSlice[S ~[]E, E any](s S) []E { 87 | result := make([]E, len(s)) 88 | copy(result, s) 89 | for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { 90 | result[i], result[j] = result[j], result[i] 91 | } 92 | return result 93 | } 94 | -------------------------------------------------------------------------------- /cmd/search.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/iansinnott/browser-gopher/pkg/config" 9 | "github.com/iansinnott/browser-gopher/pkg/search" 10 | "github.com/iansinnott/browser-gopher/pkg/tui" 11 | "github.com/iansinnott/browser-gopher/pkg/util" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | var searchCmd = &cobra.Command{ 16 | Use: "search", 17 | Short: "Find URLs you've visited", 18 | Run: func(cmd *cobra.Command, args []string) { 19 | noInteractive, err := cmd.Flags().GetBool("no-interactive") 20 | if err != nil { 21 | fmt.Println("could not parse --no-interactive:", err) 22 | os.Exit(1) 23 | } 24 | 25 | fmtJson, err := cmd.Flags().GetBool("json") 26 | if err != nil { 27 | fmt.Println("could not parse --json:", err) 28 | os.Exit(1) 29 | } 30 | 31 | dataProvider := search.NewSqlSearchProvider(cmd.Context(), config.Config) 32 | initialQuery := "" 33 | 34 | if len(args) > 0 { 35 | initialQuery = args[0] 36 | } 37 | 38 | if noInteractive { 39 | if len(args) < 1 { 40 | fmt.Println("No search query provided.") 41 | os.Exit(1) 42 | return 43 | } 44 | 45 | result, err := dataProvider.SearchUrls(initialQuery) 46 | if err != nil { 47 | fmt.Println("search error", err) 48 | os.Exit(1) 49 | return 50 | } 51 | 52 | if fmtJson { 53 | // output x as a JSON string 54 | bs, err := json.MarshalIndent(result.Urls, "", " ") 55 | 56 | if err != nil { 57 | fmt.Println("could not marshal json:", err) 58 | os.Exit(1) 59 | } 60 | 61 | fmt.Println(string(bs)) 62 | } else { 63 | for _, x := range util.ReverseSlice(result.Urls) { 64 | var title string 65 | var lastVisit string 66 | if x.Title != nil { 67 | title = *x.Title 68 | } else { 69 | title = "" 70 | } 71 | 72 | if x.LastVisit != nil { 73 | lastVisit = x.LastVisit.Format("2006-01-02") 74 | } 75 | 76 | fmt.Printf("%v %s %sv\n", lastVisit, title, x.Url) 77 | } 78 | 79 | fmt.Printf("Found %d results for \"%s\"\n", result.Count, initialQuery) 80 | os.Exit(0) 81 | } 82 | 83 | return 84 | } 85 | 86 | p, err := tui.GetSearchProgram(cmd.Context(), initialQuery, dataProvider, dataProvider, nil) 87 | if err != nil { 88 | fmt.Println("could not get search program:", err) 89 | os.Exit(1) 90 | } 91 | 92 | if err := p.Start(); err != nil { 93 | fmt.Println("Error running program:", err) 94 | os.Exit(1) 95 | } 96 | }, 97 | } 98 | 99 | func init() { 100 | searchCmd.Flags().Bool("no-interactive", false, "disable interactive terminal interface. useful for scripting") 101 | searchCmd.Flags().Bool("json", false, "output results as json. only works with --no-interactive") 102 | rootCmd.AddCommand(searchCmd) 103 | } 104 | -------------------------------------------------------------------------------- /pkg/extractors/orion.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | "github.com/iansinnott/browser-gopher/pkg/util" 11 | ) 12 | 13 | const orionUrls = ` 14 | SELECT 15 | url, 16 | title 17 | FROM 18 | history_items; 19 | ` 20 | 21 | const orionVisits = ` 22 | SELECT 23 | v.VISIT_TIME, 24 | u.URL 25 | FROM 26 | visits v 27 | INNER JOIN history_items u ON u.ID = v.HISTORY_ITEM_ID 28 | ORDER BY 29 | VISIT_TIME DESC; 30 | ` 31 | 32 | type OrionExtractor struct { 33 | Name string 34 | HistoryDBPath string 35 | } 36 | 37 | func (a *OrionExtractor) GetName() string { 38 | return a.Name 39 | } 40 | 41 | func (a *OrionExtractor) GetDBPath() string { 42 | return a.HistoryDBPath 43 | } 44 | 45 | func (a *OrionExtractor) SetDBPath(s string) { 46 | a.HistoryDBPath = s 47 | } 48 | 49 | func (a *OrionExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 50 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM history_items;") 51 | err := row.Err() 52 | if err != nil { 53 | return false, err 54 | } 55 | return true, nil 56 | } 57 | 58 | func (a *OrionExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 59 | rows, err := conn.QueryContext(ctx, orionUrls) 60 | if err != nil { 61 | fmt.Println(err) 62 | return nil, err 63 | } 64 | defer rows.Close() 65 | 66 | var urls []types.UrlRow 67 | 68 | for rows.Next() { 69 | var x types.UrlRow 70 | err = rows.Scan(&x.Url, &x.Title) 71 | if err != nil { 72 | fmt.Println("individual row error", err) 73 | return nil, err 74 | } 75 | urls = append(urls, x) 76 | } 77 | 78 | err = rows.Err() 79 | if err != nil { 80 | fmt.Println("row error", err) 81 | return nil, err 82 | } 83 | 84 | return urls, nil 85 | } 86 | 87 | func (a *OrionExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 88 | rows, err := conn.QueryContext(ctx, orionVisits) 89 | if err != nil { 90 | fmt.Println(err) 91 | return nil, err 92 | } 93 | defer rows.Close() 94 | 95 | var visits []types.VisitRow 96 | 97 | for rows.Next() { 98 | var x types.VisitRow 99 | var ts string 100 | err = rows.Scan(&ts, &x.Url) 101 | if err != nil { 102 | fmt.Println("individual row error", err) 103 | return nil, err 104 | } 105 | 106 | t, err := util.ParseISODatetime(ts) 107 | if err != nil { 108 | fmt.Println("datetime parsing error", ts, err) 109 | return nil, err 110 | } 111 | x.Datetime = t 112 | visits = append(visits, x) 113 | } 114 | 115 | err = rows.Err() 116 | if err != nil { 117 | fmt.Println("row error", err) 118 | return nil, err 119 | } 120 | 121 | return visits, nil 122 | } 123 | -------------------------------------------------------------------------------- /pkg/extractors/safari.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | "github.com/iansinnott/browser-gopher/pkg/util" 11 | ) 12 | 13 | type SafariExtractor struct { 14 | Name string 15 | HistoryDBPath string 16 | } 17 | 18 | // Join with latest visit to get title, since safari doesn't store title with URL 19 | const safariUrls = ` 20 | SELECT 21 | u.url AS url, 22 | v.title AS title 23 | FROM 24 | history_items u 25 | INNER JOIN ( 26 | SELECT 27 | *, 28 | max(visit_time) AS last_visit_date 29 | FROM 30 | history_visits 31 | GROUP BY 32 | history_item) v ON v.history_item = u.id; 33 | ` 34 | 35 | const safariVisits = ` 36 | SELECT 37 | datetime(visit_time + 978307200, 'unixepoch') AS time, 38 | u.url 39 | FROM 40 | history_visits v 41 | INNER JOIN history_items u ON v.history_item = u.id; 42 | ` 43 | 44 | func (a *SafariExtractor) GetName() string { 45 | return a.Name 46 | } 47 | 48 | func (a *SafariExtractor) GetDBPath() string { 49 | return a.HistoryDBPath 50 | } 51 | 52 | func (a *SafariExtractor) SetDBPath(s string) { 53 | a.HistoryDBPath = s 54 | } 55 | 56 | func (a *SafariExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 57 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM history_items;") 58 | err := row.Err() 59 | if err != nil { 60 | return false, err 61 | } 62 | return true, nil 63 | } 64 | 65 | func (a *SafariExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 66 | rows, err := conn.QueryContext(ctx, safariUrls) 67 | if err != nil { 68 | fmt.Println(err) 69 | return nil, err 70 | } 71 | defer rows.Close() 72 | 73 | var urls []types.UrlRow 74 | 75 | for rows.Next() { 76 | var x types.UrlRow 77 | err = rows.Scan(&x.Url, &x.Title) 78 | if err != nil { 79 | fmt.Println("individual row error", err) 80 | return nil, err 81 | } 82 | urls = append(urls, x) 83 | } 84 | 85 | err = rows.Err() 86 | if err != nil { 87 | fmt.Println("row error", err) 88 | return nil, err 89 | } 90 | 91 | return urls, nil 92 | } 93 | 94 | func (a *SafariExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 95 | rows, err := conn.QueryContext(ctx, safariVisits) 96 | if err != nil { 97 | fmt.Println(err) 98 | return nil, err 99 | } 100 | defer rows.Close() 101 | 102 | var visits []types.VisitRow 103 | 104 | for rows.Next() { 105 | var x types.VisitRow 106 | var ts string 107 | err = rows.Scan(&ts, &x.Url) 108 | if err != nil { 109 | fmt.Println("individual row error", err) 110 | return nil, err 111 | } 112 | 113 | t, err := util.ParseSQLiteDatetime(ts) 114 | if err != nil { 115 | fmt.Println("datetime parsing error", ts, err) 116 | return nil, err 117 | } 118 | x.Datetime = t 119 | visits = append(visits, x) 120 | } 121 | 122 | err = rows.Err() 123 | if err != nil { 124 | fmt.Println("row error", err) 125 | return nil, err 126 | } 127 | 128 | return visits, nil 129 | } 130 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/iansinnott/browser-gopher 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/JohannesKaufmann/html-to-markdown v1.3.6 7 | github.com/charmbracelet/bubbles v0.14.0 8 | github.com/charmbracelet/bubbletea v0.22.1 9 | github.com/charmbracelet/lipgloss v0.6.0 10 | github.com/gocolly/colly/v2 v2.1.0 11 | github.com/pkg/errors v0.9.1 12 | github.com/samber/lo v1.33.0 13 | github.com/spf13/cobra v1.7.0 14 | github.com/stretchr/testify v1.8.1 15 | github.com/writeas/go-strip-markdown v2.0.1+incompatible 16 | modernc.org/sqlite v1.18.1 17 | ) 18 | 19 | require ( 20 | github.com/PuerkitoBio/goquery v1.8.0 // indirect 21 | github.com/andybalholm/cascadia v1.3.1 // indirect 22 | github.com/antchfx/htmlquery v1.2.3 // indirect 23 | github.com/antchfx/xmlquery v1.2.4 // indirect 24 | github.com/antchfx/xpath v1.1.8 // indirect 25 | github.com/atotto/clipboard v0.1.4 // indirect 26 | github.com/containerd/console v1.0.3 // indirect 27 | github.com/davecgh/go-spew v1.1.1 // indirect 28 | github.com/gobwas/glob v0.2.3 // indirect 29 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect 30 | github.com/golang/protobuf v1.4.2 // indirect 31 | github.com/google/uuid v1.3.0 // indirect 32 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 33 | github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect 34 | github.com/kennygrant/sanitize v1.2.4 // indirect 35 | github.com/lucasb-eyer/go-colorful v1.2.0 // indirect 36 | github.com/mattn/go-isatty v0.0.16 // indirect 37 | github.com/mattn/go-localereader v0.0.1 // indirect 38 | github.com/mattn/go-runewidth v0.0.13 // indirect 39 | github.com/mattn/go-sqlite3 v1.14.17 // indirect 40 | github.com/muesli/ansi v0.0.0-20211031195517-c9f0611b6c70 // indirect 41 | github.com/muesli/cancelreader v0.2.2 // indirect 42 | github.com/muesli/reflow v0.3.0 // indirect 43 | github.com/muesli/termenv v0.12.0 // indirect 44 | github.com/pmezard/go-difflib v1.0.0 // indirect 45 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect 46 | github.com/rivo/uniseg v0.4.2 // indirect 47 | github.com/sahilm/fuzzy v0.1.0 // indirect 48 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 49 | github.com/spf13/pflag v1.0.5 // indirect 50 | github.com/temoto/robotstxt v1.1.1 // indirect 51 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect 52 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect 53 | golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect 54 | golang.org/x/sys v0.4.0 // indirect 55 | golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 // indirect 56 | golang.org/x/text v0.3.8 // indirect 57 | golang.org/x/tools v0.1.12 // indirect 58 | google.golang.org/appengine v1.6.6 // indirect 59 | google.golang.org/protobuf v1.24.0 // indirect 60 | gopkg.in/yaml.v3 v3.0.1 // indirect 61 | lukechampine.com/uint128 v1.1.1 // indirect 62 | modernc.org/cc/v3 v3.36.0 // indirect 63 | modernc.org/ccgo/v3 v3.16.8 // indirect 64 | modernc.org/libc v1.16.19 // indirect 65 | modernc.org/mathutil v1.4.1 // indirect 66 | modernc.org/memory v1.1.1 // indirect 67 | modernc.org/opt v0.1.1 // indirect 68 | modernc.org/strutil v1.1.1 // indirect 69 | modernc.org/token v1.0.0 // indirect 70 | ) 71 | -------------------------------------------------------------------------------- /pkg/extractors/sigmaos.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | "github.com/iansinnott/browser-gopher/pkg/util" 11 | ) 12 | 13 | const sigmaUrls = ` 14 | SELECT 15 | u.ZURL AS url, 16 | v.ZTITLE AS title, 17 | DATETIME(MAX(v.ZVISITTIME) + 978307200, 'unixepoch') AS visit_time 18 | FROM 19 | ZHISTORYITEM u 20 | INNER JOIN ZHISTORYVISIT v ON u.Z_PK = v.ZHISTORYITEM 21 | GROUP BY v.ZHISTORYITEM 22 | ORDER BY 23 | v.ZVISITTIME DESC; 24 | ` 25 | 26 | const sigmaVisits = ` 27 | SELECT 28 | datetime (v.ZVISITTIME + 978307200, 'unixepoch') AS visit_time, 29 | u.ZURL AS url 30 | FROM 31 | ZHISTORYITEM u 32 | INNER JOIN ZHISTORYVISIT v ON u.Z_PK = v.ZHISTORYITEM 33 | ORDER BY 34 | v.ZVISITTIME DESC; 35 | ` 36 | 37 | type SigmaOSExtractor struct { 38 | Name string 39 | HistoryDBPath string 40 | } 41 | 42 | func (a *SigmaOSExtractor) GetName() string { 43 | return a.Name 44 | } 45 | 46 | func (a *SigmaOSExtractor) GetDBPath() string { 47 | return a.HistoryDBPath 48 | } 49 | func (a *SigmaOSExtractor) SetDBPath(s string) { 50 | a.HistoryDBPath = s 51 | } 52 | 53 | func (a *SigmaOSExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 54 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM ZHISTORYITEM;") 55 | err := row.Err() 56 | if err != nil { 57 | return false, err 58 | } 59 | return true, nil 60 | } 61 | 62 | func (a *SigmaOSExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 63 | rows, err := conn.QueryContext(ctx, sigmaUrls) 64 | if err != nil { 65 | fmt.Println(err) 66 | return nil, err 67 | } 68 | defer rows.Close() 69 | 70 | var urls []types.UrlRow 71 | 72 | for rows.Next() { 73 | var x types.UrlRow 74 | var visit_time string 75 | err = rows.Scan(&x.Url, &x.Title, &visit_time) 76 | if err != nil { 77 | fmt.Println("individual row error", err) 78 | return nil, err 79 | } 80 | t, err := util.ParseSQLiteDatetime(visit_time) 81 | if err != nil { 82 | fmt.Println("could not parse datetime", err) 83 | } 84 | x.LastVisit = &t 85 | urls = append(urls, x) 86 | } 87 | 88 | err = rows.Err() 89 | if err != nil { 90 | fmt.Println("row error", err) 91 | return nil, err 92 | } 93 | 94 | return urls, nil 95 | } 96 | 97 | func (a *SigmaOSExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 98 | rows, err := conn.QueryContext(ctx, sigmaVisits) 99 | if err != nil { 100 | fmt.Println(err) 101 | return nil, err 102 | } 103 | defer rows.Close() 104 | 105 | var visits []types.VisitRow 106 | 107 | for rows.Next() { 108 | var x types.VisitRow 109 | var ts string 110 | err = rows.Scan(&ts, &x.Url) 111 | if err != nil { 112 | fmt.Println("individual row error", err) 113 | return nil, err 114 | } 115 | 116 | t, err := util.ParseSQLiteDatetime(ts) 117 | if err != nil { 118 | fmt.Println("datetime parsing error", ts, err) 119 | return nil, err 120 | } 121 | x.Datetime = t 122 | visits = append(visits, x) 123 | } 124 | 125 | err = rows.Err() 126 | if err != nil { 127 | fmt.Println("row error", err) 128 | return nil, err 129 | } 130 | 131 | return visits, nil 132 | } 133 | -------------------------------------------------------------------------------- /pkg/types/types.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "time" 7 | ) 8 | 9 | type UrlRow struct { 10 | Url string 11 | Title *string // Nullable 12 | Description *string // Nullable 13 | LastVisit *time.Time // Nullable 14 | } 15 | 16 | // Meta information about the URL 17 | type UrlMetaRow struct { 18 | Url string 19 | IndexedAt *time.Time // Nullable 20 | } 21 | 22 | // DocumentRow represents a full-text document. The HTML version of a web page. 23 | // However, the HTML body is not stored (for now). The page will be distilled to 24 | // plain text. A markdown version will be stored on disk, again, for now. 25 | type DocumentRow struct { 26 | DocumentMd5 string 27 | UrlMd5 string 28 | StatusCode int // the HTTP status code returned during fetch 29 | AccessedAt *time.Time // Nullable 30 | Body *string // Fulltext of the webpage as markdown 31 | } 32 | 33 | // Initially this was a URL row representation but it was later augmented with 34 | // body, which is only available via join. 35 | type UrlDbEntity struct { 36 | UrlMd5 string 37 | Url string 38 | Title *string 39 | Description *string 40 | LastVisit *time.Time 41 | Body *string 42 | BodyMd5 *string 43 | } 44 | 45 | type UrlDbSearchEntity struct { 46 | UrlMd5 string 47 | Url string 48 | Title *string 49 | Description *string 50 | LastVisit *time.Time 51 | Match *string 52 | MatchCount *int 53 | SumRank *float64 54 | } 55 | 56 | type VisitRow struct { 57 | Url string 58 | Datetime time.Time 59 | // The data extractor that created this visit. Not present on URls since URLs 60 | // are often visited in multiple browsers. 61 | ExtractorName string 62 | } 63 | 64 | type Extractor interface { 65 | GetName() string 66 | GetDBPath() string 67 | SetDBPath(string) 68 | GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]UrlRow, error) 69 | GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]VisitRow, error) 70 | 71 | // Verify that the passed db can actually be connected to. In the case of 72 | // sqlite, it's not uncommon for a db to be locked. The Open call will work 73 | // but the db cannot be read. 74 | VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) 75 | } 76 | 77 | type SearchableEntity struct { 78 | Id string `json:"id"` 79 | Url string `json:"url"` 80 | Title *string `json:"title"` 81 | Description *string `json:"description"` 82 | LastVisit *time.Time `json:"last_visit"` 83 | Match *string `json:"match"` 84 | MatchCount *int `json:"match_count"` 85 | SumRank *float64 `json:"sum_rank"` 86 | } 87 | 88 | func UrlDbEntityToSearchableEntity(x UrlDbEntity) SearchableEntity { 89 | return SearchableEntity{ 90 | Id: x.UrlMd5, 91 | Url: x.Url, 92 | Title: x.Title, 93 | Description: x.Description, 94 | LastVisit: x.LastVisit, 95 | } 96 | } 97 | 98 | func UrlDbSearchEntityToSearchableEntity(x UrlDbSearchEntity) SearchableEntity { 99 | return SearchableEntity{ 100 | Id: x.UrlMd5, 101 | Url: x.Url, 102 | Title: x.Title, 103 | Description: x.Description, 104 | LastVisit: x.LastVisit, 105 | Match: x.Match, 106 | MatchCount: x.MatchCount, 107 | SumRank: x.SumRank, 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /pkg/extractors/firefox.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "io/fs" 8 | "path/filepath" 9 | "time" 10 | 11 | "github.com/iansinnott/browser-gopher/pkg/types" 12 | "github.com/iansinnott/browser-gopher/pkg/util" 13 | ) 14 | 15 | type FirefoxExtractor struct { 16 | Name string 17 | HistoryDBPath string 18 | } 19 | 20 | const firefoxUrls = ` 21 | SELECT 22 | url, 23 | title, 24 | description, 25 | datetime(last_visit_date / 1e6, 'unixepoch') as lastVisitDate 26 | FROM 27 | moz_places 28 | WHERE lastVisitDate > ? 29 | ORDER BY 30 | lastVisitDate DESC; 31 | ; 32 | ` 33 | 34 | const firefoxVisits = ` 35 | SELECT 36 | datetime(v.visit_date / 1e6, 'unixepoch') AS visitDate, 37 | u.url 38 | FROM 39 | moz_historyvisits v 40 | INNER JOIN moz_places u ON v.place_id = u.id 41 | WHERE visitDate > ? 42 | ORDER BY 43 | visitDate DESC; 44 | ; 45 | ` 46 | 47 | func (a *FirefoxExtractor) GetName() string { 48 | return a.Name 49 | } 50 | 51 | func (a *FirefoxExtractor) GetDBPath() string { 52 | return a.HistoryDBPath 53 | } 54 | func (a *FirefoxExtractor) SetDBPath(s string) { 55 | a.HistoryDBPath = s 56 | } 57 | 58 | func (a *FirefoxExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 59 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM moz_places;") 60 | err := row.Err() 61 | if err != nil { 62 | return false, err 63 | } 64 | return true, nil 65 | } 66 | 67 | func (a *FirefoxExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 68 | rows, err := conn.QueryContext(ctx, firefoxUrls, since.UTC().Format(util.SQLiteDateTime)) 69 | if err != nil { 70 | fmt.Println(err) 71 | return nil, err 72 | } 73 | defer rows.Close() 74 | 75 | var urls []types.UrlRow 76 | 77 | for rows.Next() { 78 | var x types.UrlRow 79 | var visit_time *string 80 | err = rows.Scan(&x.Url, &x.Title, &x.Description, &visit_time) 81 | if err != nil { 82 | fmt.Println("individual row error", err) 83 | return nil, err 84 | } 85 | if visit_time != nil { 86 | t, err := util.ParseSQLiteDatetime(*visit_time) 87 | if err != nil { 88 | fmt.Println("could not parse datetime", err) 89 | } 90 | x.LastVisit = &t 91 | } 92 | 93 | urls = append(urls, x) 94 | } 95 | 96 | err = rows.Err() 97 | if err != nil { 98 | fmt.Println("row error", err) 99 | return nil, err 100 | } 101 | 102 | return urls, nil 103 | } 104 | 105 | func (a *FirefoxExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 106 | rows, err := conn.QueryContext(ctx, firefoxVisits, since.UTC().Format(util.SQLiteDateTime)) 107 | if err != nil { 108 | fmt.Println(err) 109 | return nil, err 110 | } 111 | defer rows.Close() 112 | 113 | var visits []types.VisitRow 114 | 115 | for rows.Next() { 116 | var x types.VisitRow 117 | var ts string 118 | err = rows.Scan(&ts, &x.Url) 119 | if err != nil { 120 | fmt.Println("individual row error", err) 121 | return nil, err 122 | } 123 | 124 | t, err := util.ParseSQLiteDatetime(ts) 125 | if err != nil { 126 | fmt.Println("datetime parsing error", ts, err) 127 | return nil, err 128 | } 129 | x.Datetime = t 130 | visits = append(visits, x) 131 | } 132 | 133 | err = rows.Err() 134 | if err != nil { 135 | fmt.Println("row error", err) 136 | return nil, err 137 | } 138 | 139 | return visits, nil 140 | } 141 | 142 | func FindFirefoxDBs(root string) ([]string, error) { 143 | results := []string{} 144 | 145 | err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 146 | if d.Name() == "places.sqlite" { 147 | results = append(results, path) 148 | } 149 | return nil 150 | }) 151 | 152 | return results, err 153 | } 154 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Browser Gopher 2 | 3 | Search, aggregate, backup your browsing history from the command line. 4 | 5 | ![Screenshot showing browser-gopher searching for neovim](https://share.cleanshot.com/6l3BXT/download) 6 | 7 | ## Features 8 | 9 | - Search your entire browsing history across all browsers 10 | - Data stored locally in SQLite, query it however you like 11 | 12 | ## Installation 13 | 14 | ### Option 1: Download pre-built binary 15 | 16 | Grab a binary from the [releases page](https://github.com/iansinnott/browser-gopher/releases/latest) and put it somewhere in your path. 17 | 18 | ### Option 2: install from source: 19 | 20 | ```sh 21 | # Install go if you don't have it: https://go.dev/doc/install 22 | # If on a mac, install through brew: 23 | #brew install go 24 | # debian based: 25 | #apt install golang 26 | 27 | # Build browser-gopher 28 | git clone https://github.com/iansinnott/browser-gopher 29 | cd browser-gopher 30 | make build 31 | 32 | # Check the version 33 | ./browser-gopher --version 34 | 35 | # Populate the database 36 | ./browser-gopher populate --latest 37 | 38 | # Search 39 | ./browser-gopher search 40 | ``` 41 | 42 | ## Contributing 43 | 44 | Would be great! Send a PR. 45 | 46 | ## Project status 47 | 48 | Started recently, but should be roughly stable. Currently it extracts and stores all your browsing history in SQLite and indexes it with Bleve. You can search over all your browsing history, including full-text. However, full-text extraction requires use of the `--fulltext` flag (turned off by default). 49 | 50 | ## Supported browsers 51 | 52 | For now see the list of extractors here: https://github.com/iansinnott/browser-gopher/blob/master/pkg/extractors/extractors.go#L25 53 | 54 | I should probably add a command to print it to stdout though. 55 | 56 | ## Why? 57 | 58 | I created [BrowserParrot][] to have GUI access to all my browsing history with a quick fuzzy search. This worked out well, but the stack chosen at the time (Clojure/JVM) turned out not to be ideal for the problem. 59 | 60 | In this iteration if switched to Go, which can provide: 61 | 62 | - Lower memory usage 63 | - Quick startup time 64 | - Smaller binary 65 | - More consistent deployments 66 | 67 | ### Is this a rewrite of BrowserParrot? 68 | 69 | Not currently. For now the focus is on acheiving desired UX from the command line. To be a real BrowserParrot alternative we'd need a GUI. However, I've been investigating [Wails](https://wails.io/) for a separate project and quite like it. Since this repo uses Go we'd be in a good position to wrap the functionality in a UI using Wails. 70 | 71 | ## Importing from [BrowserParrot][] 72 | 73 | Import URLs from BrowserParrot: 74 | 75 | ```sh 76 | browser-gopher browserparrot 77 | ``` 78 | 79 | Same as above, but with a custom DB path: 80 | 81 | ```sh 82 | browser-gopher browserparrot --db-path ~/.config/uncloud/persistory.db 83 | ``` 84 | 85 | (This may be useful if you tried out [Uncloud](https://www.uncloud.gg/) and have a browserparrot-like database somewhere else on your system) 86 | 87 | [browserparrot]: (https://www.browserparrot.com/) 88 | 89 | ## Todo / Wishlist 90 | 91 | - [x] search (yeah, need to add this) 92 | - [x] action: open 93 | - [ ] action: copy 94 | - [x] a TUI for searching and filtering for a more GUI-like experience 95 | - [x] full text indexing 96 | - ideally with more sophisticated extraction mechanisms than previous 97 | - [x] import history from History Trends Unlimited 98 | - It's already in sqlite so should be quick 99 | - ~~favicons~~ 100 | - Update: Tried https://github.com/trashhalo/imgcat/blob/master/component/load.go#L121. Cannot effectively render at such small sizes in the terminal. 101 | - If anyone has suggestions for how to render 32x32 pngs in the terminal please let me know. 102 | - Will just use the GUI for this 103 | -------------------------------------------------------------------------------- /pkg/populate/populate.go: -------------------------------------------------------------------------------- 1 | package populate 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "log" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | "time" 12 | 13 | "github.com/iansinnott/browser-gopher/pkg/config" 14 | "github.com/iansinnott/browser-gopher/pkg/logging" 15 | "github.com/iansinnott/browser-gopher/pkg/persistence" 16 | "github.com/iansinnott/browser-gopher/pkg/types" 17 | "github.com/iansinnott/browser-gopher/pkg/util" 18 | ) 19 | 20 | // inceptionTime is just an early time, assuming all observations will be after this time. 21 | var inceptionTime time.Time = time.Unix(0, 0) // 1970-01-01 22 | 23 | // PopulateAll populates all records from browsers, ignoring the last updated time 24 | func PopulateAll(extractor types.Extractor) error { 25 | return PopulateSinceTime(extractor, inceptionTime, nil) 26 | } 27 | 28 | type PopulateOptions struct { 29 | KeepTmpFiles bool 30 | } 31 | 32 | func PopulateSinceTime(extractor types.Extractor, since time.Time, opts *PopulateOptions) error { 33 | conn, err := sql.Open("sqlite", extractor.GetDBPath()) 34 | ctx := context.TODO() 35 | 36 | if err != nil { 37 | log.Println("could not connect to db at", extractor.GetDBPath(), err) 38 | return err 39 | } 40 | defer conn.Close() 41 | 42 | // Handle the case where the database is in use, or return if the database cannot be read or copied. 43 | _, err = extractor.VerifyConnection(ctx, conn) 44 | if err != nil { 45 | if !strings.Contains(err.Error(), "SQLITE_BUSY") { 46 | log.Println("[err] Could read from DB", extractor.GetDBPath()) 47 | return err 48 | } 49 | 50 | tmpPath := filepath.Join(os.TempDir(), extractor.GetName()+"_backup.sqlite") 51 | 52 | log.Println("[" + extractor.GetName() + "] database locked, copying for read access: " + extractor.GetDBPath()) 53 | 54 | err := util.CopyPath(extractor.GetDBPath(), tmpPath) 55 | if err != nil { 56 | fmt.Println("could not copy:", tmpPath) 57 | return err 58 | } 59 | // Remove interim file afterwards (otherwise these files eventually take up quite a bit of space) 60 | defer func() { 61 | keepTmpFiles := false 62 | if opts != nil { 63 | keepTmpFiles = opts.KeepTmpFiles 64 | } 65 | 66 | if keepTmpFiles { 67 | logging.Debug().Println("keeping tmp file:", tmpPath) 68 | return 69 | } 70 | 71 | err := os.Remove(tmpPath) 72 | if err != nil { 73 | log.Println("could not remove tmp file:", tmpPath) 74 | } 75 | }() 76 | 77 | if extractor.GetDBPath() == tmpPath { 78 | return fmt.Errorf("recursive populate call detected. db tmp path must be different than initial db path") 79 | } 80 | 81 | // Update extractor to use the tmp path 82 | extractor.SetDBPath(tmpPath) 83 | 84 | // Retry with udpated db path 85 | return PopulateSinceTime(extractor, since, opts) 86 | } 87 | 88 | urls, err := extractor.GetAllUrlsSince(ctx, conn, since) 89 | if err != nil { 90 | return err 91 | } 92 | 93 | visits, err := extractor.GetAllVisitsSince(ctx, conn, since) 94 | if err != nil { 95 | log.Println(err) 96 | os.Exit(1) 97 | } 98 | 99 | var sinceString string 100 | if since != inceptionTime { 101 | sinceString = "since:" + since.Format(time.RFC3339) 102 | } 103 | 104 | log.Printf("["+extractor.GetName()+"] %s urls:%d visits:%d source:%s", sinceString, len(urls), len(visits), extractor.GetDBPath()) 105 | 106 | db, err := persistence.InitDb(ctx, config.Config) 107 | if err != nil { 108 | return err 109 | } 110 | defer db.Close() 111 | 112 | for _, x := range urls { 113 | err := persistence.InsertUrl(ctx, db, &x) 114 | if err != nil { 115 | log.Println("could not insert row", err) 116 | } 117 | } 118 | 119 | for _, x := range visits { 120 | if x.ExtractorName == "" { 121 | x.ExtractorName = extractor.GetName() 122 | } 123 | 124 | err := persistence.InsertVisit(ctx, db, &x) 125 | if err != nil { 126 | log.Println("could not insert row", err) 127 | } 128 | } 129 | 130 | return nil 131 | } 132 | -------------------------------------------------------------------------------- /pkg/extractors/chromium.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "io/fs" 8 | "path/filepath" 9 | "time" 10 | 11 | "github.com/iansinnott/browser-gopher/pkg/logging" 12 | "github.com/iansinnott/browser-gopher/pkg/types" 13 | "github.com/iansinnott/browser-gopher/pkg/util" 14 | ) 15 | 16 | const chromiumUrls = ` 17 | SELECT 18 | url, 19 | title, 20 | datetime(last_visit_time / 1e6 + strftime('%s', '1601-01-01'), 'unixepoch') as lastVisitDate 21 | FROM 22 | urls 23 | WHERE lastVisitDate > ? 24 | ORDER BY 25 | lastVisitDate DESC; 26 | ` 27 | 28 | const chromiumVisits = ` 29 | SELECT 30 | datetime(visit_time / 1e6 + strftime('%s', '1601-01-01'), 'unixepoch') AS visitDate, 31 | u.url 32 | FROM 33 | visits v 34 | INNER JOIN urls u ON v.url = u.id 35 | WHERE visitDate > ? 36 | ORDER BY 37 | visitDate DESC; 38 | ` 39 | 40 | type ChromiumExtractor struct { 41 | Name string 42 | HistoryDBPath string 43 | } 44 | 45 | func (a *ChromiumExtractor) GetName() string { 46 | return a.Name 47 | } 48 | 49 | func (a *ChromiumExtractor) GetDBPath() string { 50 | return a.HistoryDBPath 51 | } 52 | 53 | func (a *ChromiumExtractor) SetDBPath(s string) { 54 | a.HistoryDBPath = s 55 | } 56 | 57 | func (a *ChromiumExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 58 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM urls;") 59 | err := row.Err() 60 | if err != nil { 61 | return false, err 62 | } 63 | return true, nil 64 | } 65 | 66 | func (a *ChromiumExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 67 | // NOTE it is very important to use UTC. Otherwise the timezone will be unintentionally stripped (this was a bug before) 68 | // aside: we should probably use the ints rather than string formatting. 69 | sinceString := since.UTC().Format(util.SQLiteDateTime) 70 | logging.Debug().Println("sinceString", sinceString) 71 | rows, err := conn.QueryContext(ctx, chromiumUrls, sinceString) 72 | if err != nil { 73 | fmt.Println(err) 74 | return nil, err 75 | } 76 | defer rows.Close() 77 | 78 | var urls []types.UrlRow 79 | 80 | for rows.Next() { 81 | var x types.UrlRow 82 | var visit_time string 83 | err = rows.Scan(&x.Url, &x.Title, &visit_time) 84 | if err != nil { 85 | fmt.Println("individual row error", err) 86 | return nil, err 87 | } 88 | t, err := util.ParseSQLiteDatetime(visit_time) 89 | if err != nil { 90 | fmt.Println("could not parse datetime", err) 91 | } 92 | x.LastVisit = &t 93 | urls = append(urls, x) 94 | } 95 | 96 | err = rows.Err() 97 | if err != nil { 98 | fmt.Println("row error", err) 99 | return nil, err 100 | } 101 | 102 | return urls, nil 103 | } 104 | 105 | func (a *ChromiumExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 106 | rows, err := conn.QueryContext(ctx, chromiumVisits, since.UTC().Format(util.SQLiteDateTime)) 107 | if err != nil { 108 | fmt.Println(err) 109 | return nil, err 110 | } 111 | defer rows.Close() 112 | 113 | var visits []types.VisitRow 114 | 115 | for rows.Next() { 116 | var x types.VisitRow 117 | var ts string 118 | err = rows.Scan(&ts, &x.Url) 119 | if err != nil { 120 | fmt.Println("individual row error", err) 121 | return nil, err 122 | } 123 | 124 | t, err := util.ParseSQLiteDatetime(ts) 125 | if err != nil { 126 | fmt.Println("datetime parsing error", ts, err) 127 | return nil, err 128 | } 129 | x.Datetime = t 130 | visits = append(visits, x) 131 | } 132 | 133 | err = rows.Err() 134 | if err != nil { 135 | fmt.Println("row error", err) 136 | return nil, err 137 | } 138 | 139 | return visits, nil 140 | } 141 | 142 | func FindChromiumDBs(root string) ([]string, error) { 143 | results := []string{} 144 | 145 | err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 146 | if !d.IsDir() && d.Name() == "History" { 147 | results = append(results, path) 148 | } 149 | return nil 150 | }) 151 | 152 | return results, err 153 | } 154 | -------------------------------------------------------------------------------- /pkg/extractors/historyTrendsUnlimited.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "io/fs" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/iansinnott/browser-gopher/pkg/types" 13 | "github.com/iansinnott/browser-gopher/pkg/util" 14 | ) 15 | 16 | // @note We cannot use lastVisitDate in the where clause due to MAX(...) aggregation. 17 | const historyTrendsUrls = ` 18 | SELECT 19 | u.url, 20 | u.title, 21 | datetime(max(v.visit_time) / 1e3, 'unixepoch') AS lastVisitDate 22 | FROM 23 | visits v 24 | INNER JOIN urls u ON u.urlid = v.urlid 25 | WHERE datetime(v.visit_time / 1e3, 'unixepoch') > ? 26 | GROUP BY 27 | v.urlid 28 | ORDER BY 29 | lastVisitDate DESC; 30 | ` 31 | 32 | const historyTrendsVisits = ` 33 | SELECT 34 | datetime(v.visit_time / 1e3, 'unixepoch') AS visitDate, 35 | u.url 36 | FROM 37 | visits v 38 | INNER JOIN urls u ON u.urlid = v.urlid 39 | WHERE visitDate > ? 40 | ORDER BY 41 | visitDate DESC; 42 | ` 43 | 44 | type HistoryTrendsExtractor struct { 45 | Name string 46 | HistoryDBPath string 47 | } 48 | 49 | func (a *HistoryTrendsExtractor) GetName() string { 50 | return a.Name 51 | } 52 | 53 | func (a *HistoryTrendsExtractor) GetDBPath() string { 54 | return a.HistoryDBPath 55 | } 56 | 57 | func (a *HistoryTrendsExtractor) SetDBPath(s string) { 58 | a.HistoryDBPath = s 59 | } 60 | 61 | func (a *HistoryTrendsExtractor) VerifyConnection(ctx context.Context, conn *sql.DB) (bool, error) { 62 | row := conn.QueryRowContext(ctx, "SELECT count(*) FROM urls;") 63 | err := row.Err() 64 | if err != nil { 65 | return false, err 66 | } 67 | return true, nil 68 | } 69 | 70 | func (a *HistoryTrendsExtractor) GetAllUrlsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.UrlRow, error) { 71 | rows, err := conn.QueryContext(ctx, historyTrendsUrls, since.UTC().Format(util.SQLiteDateTime)) 72 | if err != nil { 73 | fmt.Println(err) 74 | return nil, err 75 | } 76 | defer rows.Close() 77 | 78 | var urls []types.UrlRow 79 | 80 | for rows.Next() { 81 | var x types.UrlRow 82 | var visit_time string 83 | err = rows.Scan(&x.Url, &x.Title, &visit_time) 84 | if err != nil { 85 | fmt.Println("individual row error", err) 86 | return nil, err 87 | } 88 | t, err := util.ParseSQLiteDatetime(visit_time) 89 | if err != nil { 90 | fmt.Println("could not parse datetime", err) 91 | } 92 | x.LastVisit = &t 93 | urls = append(urls, x) 94 | } 95 | 96 | err = rows.Err() 97 | if err != nil { 98 | fmt.Println("row error", err) 99 | return nil, err 100 | } 101 | 102 | return urls, nil 103 | } 104 | 105 | func (a *HistoryTrendsExtractor) GetAllVisitsSince(ctx context.Context, conn *sql.DB, since time.Time) ([]types.VisitRow, error) { 106 | rows, err := conn.QueryContext(ctx, historyTrendsVisits, since.UTC().Format(util.SQLiteDateTime)) 107 | if err != nil { 108 | fmt.Println(err) 109 | return nil, err 110 | } 111 | defer rows.Close() 112 | 113 | var visits []types.VisitRow 114 | 115 | for rows.Next() { 116 | var x types.VisitRow 117 | var ts string 118 | err = rows.Scan(&ts, &x.Url) 119 | if err != nil { 120 | fmt.Println("individual row error", err) 121 | return nil, err 122 | } 123 | 124 | t, err := util.ParseSQLiteDatetime(ts) 125 | if err != nil { 126 | fmt.Println("datetime parsing error", ts, err) 127 | return nil, err 128 | } 129 | x.Datetime = t 130 | visits = append(visits, x) 131 | } 132 | 133 | err = rows.Err() 134 | if err != nil { 135 | fmt.Println("row error", err) 136 | return nil, err 137 | } 138 | 139 | return visits, nil 140 | } 141 | 142 | func FindHistoryTrendsDBs(root string) ([]string, error) { 143 | results := []string{} 144 | 145 | fmt.Println("Trying root", root) 146 | err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 147 | if !d.IsDir() && d.Name() == "1" && strings.Contains(path, "chrome-extension_pnmchffiealhkdloeffcdnbgdnedheme_0") { 148 | results = append(results, path) 149 | } 150 | return nil 151 | }) 152 | 153 | return results, err 154 | } 155 | -------------------------------------------------------------------------------- /cmd/devFullText.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/url" 7 | "os" 8 | "path/filepath" 9 | "strings" 10 | 11 | md "github.com/JohannesKaufmann/html-to-markdown" 12 | "github.com/iansinnott/browser-gopher/pkg/fulltext" 13 | "github.com/iansinnott/browser-gopher/pkg/logging" 14 | "github.com/iansinnott/browser-gopher/pkg/util" 15 | "github.com/spf13/cobra" 16 | stripmd "github.com/writeas/go-strip-markdown" 17 | ) 18 | 19 | // get user agent returns a valid user agent for use in scraping. in the future 20 | // the idea is to have it generated at runtime, either by reading from local 21 | // data or calling a remote api. thus the error return value. 22 | func GetUserAgent() (string, error) { 23 | return "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", nil 24 | } 25 | 26 | var devFullTextCmd = &cobra.Command{ 27 | Use: "full-text", 28 | Short: "Get the full text of URLs", 29 | Long: ` 30 | Get the full text of a URL or stdin. This is used for dev in order to 31 | easily check the FTS output of a given site. FTS processing is done 32 | automatically for URLs. 33 | 34 | Example: 35 | 36 | browser-gopher dev full-text 'https://example.com' 37 | curl 'https://example.com' | browser-gopher dev full-text - 38 | 39 | `, 40 | Run: func(cmd *cobra.Command, args []string) { 41 | if len(args) == 0 { 42 | fmt.Println("No url provided") 43 | cmd.Help() 44 | os.Exit(1) 45 | } 46 | 47 | targetUrl := args[0] 48 | urlMd5 := util.HashMd5String(targetUrl) 49 | logging.Debug().Println("processing", urlMd5, targetUrl) 50 | 51 | var html []byte 52 | var err error 53 | var hostname string 54 | var pathname string 55 | 56 | cacheDir := filepath.Join("tmp", "scrape_cache") 57 | err = os.MkdirAll(cacheDir, 0755) 58 | if err != nil { 59 | fmt.Fprintf(os.Stderr, "mkdir: %s\n", err) 60 | os.Exit(1) 61 | } 62 | 63 | if targetUrl == "-" { 64 | html, err = io.ReadAll(os.Stdin) 65 | hostname = "stdin" 66 | pathname = "-" 67 | if err != nil { 68 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 69 | os.Exit(1) 70 | } 71 | } else { 72 | u, err := url.Parse(targetUrl) 73 | if err != nil { 74 | fmt.Fprintf(os.Stderr, "could not parse: %s\n", err) 75 | os.Exit(1) 76 | } 77 | 78 | hostname = u.Hostname() 79 | pathname = strings.ReplaceAll(strings.Trim(u.Path, "/"), "/", "_") 80 | scraper := fulltext.NewScraper() 81 | htmls, err := scraper.ScrapeUrls(targetUrl) 82 | 83 | if err != nil { 84 | fmt.Fprintf(os.Stderr, "error: %s ", err) 85 | os.Exit(1) 86 | } 87 | 88 | if len(htmls) != 1 { 89 | fmt.Fprintf(os.Stderr, "no html body found") 90 | os.Exit(1) 91 | } 92 | 93 | // @note the urls in the htmls map may not match the passed-in URLs. this is not a good API 94 | html = htmls[targetUrl].Body 95 | } 96 | 97 | outFile := fmt.Sprintf("%s_%s_%s", urlMd5, hostname, pathname) 98 | 99 | var outPath string 100 | outPath = filepath.Join("tmp", fmt.Sprintf("%s.html", outFile)) 101 | err = os.WriteFile(outPath, html, 0644) 102 | if err != nil { 103 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 104 | os.Exit(1) 105 | } 106 | fmt.Println("wrote: " + outPath) 107 | 108 | converter := md.NewConverter(targetUrl, true, nil) 109 | bs, err := converter.ConvertBytes(html) 110 | if err != nil { 111 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 112 | os.Exit(1) 113 | } 114 | 115 | outPath = filepath.Join("tmp", fmt.Sprintf("%s.md", outFile)) 116 | err = os.WriteFile(outPath, bs, 0644) 117 | if err != nil { 118 | fmt.Fprintf(os.Stderr, "html2markdown: %s\n", err) 119 | os.Exit(1) 120 | } 121 | fmt.Println("wrote: " + outPath) 122 | 123 | outPath = filepath.Join("tmp", fmt.Sprintf("%s.txt", outFile)) 124 | err = os.WriteFile(outPath, []byte(stripmd.Strip(string(bs))), 0644) 125 | if err != nil { 126 | fmt.Fprintf(os.Stderr, "strip error: %s\n", err) 127 | os.Exit(1) 128 | } 129 | fmt.Println("wrote: " + outPath) 130 | }, 131 | } 132 | 133 | func init() { 134 | devCmd.AddCommand(devFullTextCmd) 135 | } 136 | -------------------------------------------------------------------------------- /pkg/search/sqlSearchProvider.go: -------------------------------------------------------------------------------- 1 | package search 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/config" 8 | "github.com/iansinnott/browser-gopher/pkg/persistence" 9 | "github.com/iansinnott/browser-gopher/pkg/types" 10 | "github.com/pkg/errors" 11 | "github.com/samber/lo" 12 | ) 13 | 14 | type SqlSearchProvider struct { 15 | ctx context.Context 16 | conf *config.AppConfig 17 | } 18 | 19 | func NewSqlSearchProvider(ctx context.Context, conf *config.AppConfig) SqlSearchProvider { 20 | return SqlSearchProvider{ctx: ctx, conf: conf} 21 | } 22 | 23 | func (p SqlSearchProvider) SearchUrls(query string) (*SearchResult, error) { 24 | conn, err := persistence.OpenConnection(p.ctx, p.conf) 25 | if err != nil { 26 | return nil, err 27 | } 28 | defer conn.Close() 29 | 30 | var count uint 31 | row := conn.QueryRowContext(p.ctx, ` 32 | SELECT 33 | count(*) 34 | FROM ( 35 | SELECT 36 | e 37 | FROM 38 | fragment_fts 39 | WHERE 40 | fragment_fts MATCH ? 41 | GROUP BY 42 | e); 43 | `, query) 44 | if row.Err() != nil { 45 | return nil, errors.Wrap(row.Err(), "row count error") 46 | } 47 | err = row.Scan(&count) 48 | if err != nil { 49 | return nil, errors.Wrap(err, "row count error") 50 | } 51 | 52 | rows, err := conn.QueryContext(p.ctx, ` 53 | WITH 54 | search_fragments AS ( 55 | SELECT 56 | fts.rank, 57 | fts.e, 58 | fts.a, 59 | snippet (fragment_fts, 60 | - 1, 61 | '', 62 | '', 63 | '…', 64 | 64) AS snippet 65 | FROM 66 | fragment_fts fts 67 | LEFT OUTER JOIN urls d ON d.url_md5 = fts.e 68 | WHERE 69 | fragment_fts MATCH ? 70 | ORDER BY 71 | d.last_visit DESC 72 | LIMIT 73 | 500 74 | ) 75 | SELECT 76 | t.url_md5, 77 | t.url, 78 | t.title, 79 | t.description, 80 | t.last_visit, 81 | group_concat (m.snippet, '\n') AS 'match', 82 | count(m.snippet) as 'match_count', 83 | sum(m.rank) as 'sum_rank' 84 | FROM 85 | search_fragments m 86 | inner join urls t on t.url_md5 = m.e 87 | GROUP BY 88 | m.e 89 | ORDER BY t.last_visit DESC 90 | LIMIT 100; 91 | `, query) 92 | 93 | if err != nil { 94 | return nil, errors.Wrap(err, "query error") 95 | } 96 | if rows.Err() != nil { 97 | return nil, errors.Wrap(rows.Err(), "query error") 98 | } 99 | 100 | xs := []types.UrlDbSearchEntity{} 101 | 102 | for rows.Next() { 103 | var x types.UrlDbSearchEntity 104 | var ts int64 105 | err := rows.Scan(&x.UrlMd5, &x.Url, &x.Title, &x.Description, &ts, &x.Match, &x.MatchCount, &x.SumRank) 106 | if err != nil { 107 | return nil, errors.Wrap(err, "row error") 108 | } 109 | t := time.Unix(ts, 0) 110 | x.LastVisit = &t 111 | xs = append(xs, x) 112 | } 113 | 114 | searchResult := lo.Map(xs, func(x types.UrlDbSearchEntity, i int) types.SearchableEntity { 115 | return types.UrlDbSearchEntityToSearchableEntity(x) 116 | }) 117 | 118 | return &SearchResult{Urls: searchResult, Count: count}, nil 119 | } 120 | 121 | func (p SqlSearchProvider) RecentUrls(limit uint) (*SearchResult, error) { 122 | conn, err := persistence.OpenConnection(p.ctx, p.conf) 123 | if err != nil { 124 | return nil, err 125 | } 126 | defer conn.Close() 127 | 128 | var count uint 129 | row := conn.QueryRowContext(p.ctx, ` 130 | SELECT 131 | COUNT(*) 132 | FROM 133 | urls; 134 | `) 135 | if row.Err() != nil { 136 | return nil, errors.Wrap(row.Err(), "row count error") 137 | } 138 | err = row.Scan(&count) 139 | if err != nil { 140 | return nil, errors.Wrap(err, "row count error") 141 | } 142 | 143 | rows, err := conn.QueryContext(p.ctx, ` 144 | SELECT 145 | url_md5, 146 | url, 147 | title, 148 | description, 149 | last_visit 150 | FROM 151 | urls 152 | ORDER BY 153 | last_visit DESC 154 | LIMIT ?; 155 | `, limit) 156 | 157 | if err != nil { 158 | return nil, errors.Wrap(err, "query error") 159 | } 160 | if rows.Err() != nil { 161 | return nil, errors.Wrap(rows.Err(), "query error") 162 | } 163 | 164 | xs := []types.UrlDbEntity{} 165 | 166 | for rows.Next() { 167 | var x types.UrlDbEntity 168 | var ts int64 169 | err := rows.Scan(&x.UrlMd5, &x.Url, &x.Title, &x.Description, &ts) 170 | if err != nil { 171 | return nil, errors.Wrap(err, "row error") 172 | } 173 | t := time.Unix(ts, 0) 174 | x.LastVisit = &t 175 | xs = append(xs, x) 176 | } 177 | 178 | searchResult := lo.Map(xs, func(x types.UrlDbEntity, i int) types.SearchableEntity { 179 | return types.UrlDbEntityToSearchableEntity(x) 180 | }) 181 | 182 | return &SearchResult{Urls: searchResult, Count: count}, nil 183 | } 184 | -------------------------------------------------------------------------------- /pkg/fulltext/scraper.go: -------------------------------------------------------------------------------- 1 | package fulltext 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/url" 7 | "os" 8 | "strings" 9 | "sync" 10 | 11 | "github.com/gocolly/colly/v2" 12 | "github.com/iansinnott/browser-gopher/pkg/logging" 13 | ) 14 | 15 | type WebPage struct { 16 | Url string 17 | Body []byte 18 | Redirected bool 19 | StatusCode int 20 | } 21 | 22 | type Scraper struct { 23 | collector *colly.Collector 24 | scrapedPages map[string]WebPage 25 | redirects map[string]string 26 | lock sync.RWMutex 27 | } 28 | 29 | // currently unused 30 | func (s *Scraper) onRequest(r *colly.Request) {} 31 | 32 | func (s *Scraper) redirectHandler(req *http.Request, via []*http.Request) error { 33 | var sb strings.Builder 34 | sb.WriteString("Redirect: ") 35 | for _, v := range via { 36 | sb.WriteString(v.URL.String()) 37 | sb.WriteString(" -> ") 38 | } 39 | sb.WriteString(req.URL.String()) 40 | logging.Debug().Println(sb.String()) 41 | 42 | // store the redirect so we can get back to the original url later 43 | s.lock.Lock() 44 | s.redirects[req.URL.String()] = via[0].URL.String() 45 | s.lock.Unlock() 46 | 47 | if len(via) > 10 { 48 | return fmt.Errorf("too many redirects") 49 | } 50 | 51 | return nil 52 | } 53 | 54 | func (s *Scraper) handleResponse(r *colly.Response) { 55 | u, redirected := s.UnredirectUrl(r.Request.URL.String()) 56 | 57 | s.lock.Lock() 58 | defer s.lock.Unlock() 59 | 60 | s.scrapedPages[u] = WebPage{ 61 | Url: u, 62 | Body: r.Body, 63 | Redirected: redirected, 64 | StatusCode: r.StatusCode, 65 | } 66 | } 67 | 68 | // get user agent returns a valid user agent for use in scraping. in the future 69 | // the idea is to have it generated at runtime, either by reading from local 70 | // data or calling a remote api. thus the error return value. 71 | func GetUserAgent() (string, error) { 72 | return "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", nil 73 | } 74 | 75 | func NewScraper() *Scraper { 76 | ua, _ := GetUserAgent() 77 | collector := colly.NewCollector( 78 | colly.UserAgent(ua), 79 | colly.MaxDepth(1), // 0 means unlimited. not sure how this actually works since I thought it does NOT spider by default 80 | colly.Async(true), 81 | colly.IgnoreRobotsTxt(), 82 | colly.AllowURLRevisit(), 83 | // colly.CacheDir(cacheDir), // without cachedir colly will re-request every site (which may be what you want, just note) 84 | ) 85 | 86 | // if logging.IsDebug() { 87 | // collector.SetDebugger(&debug.LogDebugger{}) 88 | // } 89 | 90 | perSiteConcurrency := 2 91 | logging.Debug().Printf("setting max concurrency to %d", perSiteConcurrency) 92 | err := collector.Limit(&colly.LimitRule{ 93 | DomainGlob: "*", 94 | Parallelism: perSiteConcurrency, 95 | Delay: 1, 96 | RandomDelay: 1, 97 | }) 98 | 99 | if err != nil { 100 | fmt.Fprintf(os.Stderr, "could not set limit rule: %s\n", err) 101 | } 102 | 103 | scraper := &Scraper{ 104 | collector: collector, 105 | scrapedPages: map[string]WebPage{}, 106 | redirects: map[string]string{}, 107 | } 108 | 109 | collector.OnRequest(scraper.onRequest) 110 | collector.OnResponseHeaders(func(r *colly.Response) { 111 | logging.Debug().Println(r.StatusCode, r.Request.URL) 112 | }) 113 | collector.SetRedirectHandler(scraper.redirectHandler) 114 | collector.OnResponse(scraper.handleResponse) 115 | 116 | collector.OnError(func(r *colly.Response, err error) { 117 | if r.StatusCode < 400 { 118 | logging.Debug().Printf("error: %v %s\n", r.StatusCode, err) 119 | } 120 | scraper.handleResponse(r) 121 | }) 122 | 123 | return scraper 124 | } 125 | 126 | func (s *Scraper) UnredirectUrl(url string) (u string, redirected bool) { 127 | s.lock.RLock() 128 | defer s.lock.RUnlock() 129 | 130 | if s.redirects[url] != "" { 131 | return s.redirects[url], true 132 | } 133 | 134 | return url, false 135 | } 136 | 137 | func (s *Scraper) ScrapeUrls(urls ...string) (map[string]WebPage, error) { 138 | for _, targetUrl := range urls { 139 | _, err := url.Parse(targetUrl) 140 | 141 | if err != nil { 142 | fmt.Fprintf(os.Stderr, "warn: could not parse: %s\n", err) 143 | continue 144 | } 145 | 146 | err = s.collector.Visit(targetUrl) 147 | 148 | if err != nil { 149 | logging.Debug().Println("could not visit", targetUrl, err) 150 | return nil, err 151 | } 152 | } 153 | 154 | // make sure async requests have finished 155 | s.collector.Wait() 156 | 157 | // result := map[string]WebPage{} 158 | 159 | // for url, webPage := range s.scrapedPages { 160 | // result[url] = webPage 161 | // delete(s.scrapedPages, url) 162 | // } 163 | 164 | return s.scrapedPages, nil 165 | } 166 | -------------------------------------------------------------------------------- /cmd/populate.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2022 NAME HERE 3 | */ 4 | package cmd 5 | 6 | import ( 7 | "fmt" 8 | "log" 9 | "os" 10 | "strings" 11 | "time" 12 | 13 | "github.com/iansinnott/browser-gopher/pkg/config" 14 | ex "github.com/iansinnott/browser-gopher/pkg/extractors" 15 | "github.com/iansinnott/browser-gopher/pkg/logging" 16 | "github.com/iansinnott/browser-gopher/pkg/persistence" 17 | "github.com/iansinnott/browser-gopher/pkg/populate" 18 | "github.com/pkg/errors" 19 | "github.com/spf13/cobra" 20 | ) 21 | 22 | var populateCmd = &cobra.Command{ 23 | Use: "populate", 24 | Short: "Populate URLs from all known sources", 25 | Long: ``, 26 | Run: func(cmd *cobra.Command, args []string) { 27 | var err error 28 | 29 | browserName, err := cmd.Flags().GetString("browser") 30 | if err != nil { 31 | fmt.Println("could not parse --browser:", err) 32 | os.Exit(1) 33 | } 34 | 35 | onlyLatest, err := cmd.Flags().GetBool("latest") 36 | if err != nil { 37 | fmt.Println("could not parse --latest:", err) 38 | os.Exit(1) 39 | } 40 | 41 | shouldBuildIndex, err := cmd.Flags().GetBool("build-index") 42 | if err != nil { 43 | fmt.Println("could not parse --build-index:", err) 44 | os.Exit(1) 45 | } 46 | 47 | shouldScrapeFulltext, err := cmd.Flags().GetBool("fulltext") 48 | if err != nil { 49 | fmt.Println("could not parse --fulltext:", err) 50 | os.Exit(1) 51 | } 52 | 53 | keepTmpFiles, err := cmd.Flags().GetBool("keep-tmp-files") 54 | if err != nil { 55 | fmt.Println("could not parse --keep-tmp-files:", err) 56 | os.Exit(1) 57 | } 58 | 59 | extractors, err := ex.BuildExtractorList() 60 | if err != nil { 61 | log.Println("error getting extractors", err) 62 | os.Exit(1) 63 | } 64 | 65 | dbConn, err := persistence.InitDb(cmd.Context(), config.Config) 66 | if err != nil { 67 | fmt.Println("could not open our db", err) 68 | os.Exit(1) 69 | } 70 | defer dbConn.Close() 71 | 72 | errs := []error{} 73 | 74 | // Without a browser name, populate everything 75 | for _, x := range extractors { 76 | if browserName != "" && x.GetName() != browserName { 77 | continue 78 | } 79 | 80 | since := time.Unix(0, 0) // 1970-01-01 81 | if onlyLatest { 82 | latestTime, err := persistence.GetLatestTime(cmd.Context(), dbConn, x) 83 | if err != nil { 84 | fmt.Println("could not get latest time", err) 85 | os.Exit(1) 86 | } 87 | 88 | logging.Debug().Println("latest time:", latestTime, latestTime.Format(time.RFC3339)) 89 | 90 | since = *latestTime 91 | } 92 | 93 | var err error 94 | if onlyLatest { 95 | err = populate.PopulateSinceTime(x, since, &populate.PopulateOptions{KeepTmpFiles: keepTmpFiles}) 96 | } else { 97 | err = populate.PopulateAll(x) 98 | } 99 | if err != nil { 100 | errs = append(errs, errors.Wrap(err, x.GetName()+" populate:")) 101 | } 102 | } 103 | 104 | if len(errs) > 0 { 105 | for _, e := range errs { 106 | logging.Warn().Println("browser failure:", e) 107 | } 108 | 109 | if len(errs) == len(extractors) { 110 | err = fmt.Errorf("all browsers failed to populate. exiting") 111 | } 112 | } 113 | 114 | if err != nil { 115 | logging.Error().Println("Encountered an error", err) 116 | os.Exit(1) 117 | } 118 | 119 | if shouldScrapeFulltext { 120 | var n int 121 | retries := 5 122 | t := time.Now() 123 | 124 | // @note It's not clear why sqlite is throwing busy errors. Concurrency is 125 | // used under the hood by colly but not directly in our code, so in theory 126 | // there should be only one goroutine accessing the database. 127 | // The retry loop is a workaround for episodic sqlite busy errors. 128 | for retries > 0 { 129 | n, err = populate.PopulateFulltext(cmd.Context(), dbConn) 130 | if err != nil { 131 | // if the error is sqlite_busy then retry once 132 | if strings.Contains(err.Error(), "database is locked") { 133 | fmt.Println("database is locked, retrying in 5 seconds") 134 | time.Sleep(5 * time.Second) 135 | retries-- 136 | continue 137 | } 138 | 139 | logging.Error().Printf("could not populate fulltext: %v\n", err) 140 | os.Exit(1) 141 | } 142 | 143 | // if no error then break out 144 | break 145 | } 146 | 147 | log.Printf("Scraped %d pages in %v\n", n, time.Since(t)) 148 | } 149 | 150 | if shouldBuildIndex { 151 | fmt.Println("Indexing results...") 152 | t := time.Now() 153 | n, err := populate.BuildIndex(cmd.Context(), dbConn, 0) 154 | if err != nil { 155 | logging.Error().Printf("building the search index: %v\n", err) 156 | os.Exit(1) 157 | } 158 | log.Printf("Indexed %d records in %v\n", n, time.Since(t)) 159 | } 160 | }, 161 | } 162 | 163 | func init() { 164 | rootCmd.AddCommand(populateCmd) 165 | populateCmd.Flags().StringP("browser", "b", "", "Specify the browser name you'd like to extract") 166 | populateCmd.Flags().Bool("latest", false, "Only populate data that's newer than last import (Recommended, likely will be default in future version)") 167 | populateCmd.Flags().Bool("build-index", true, "Whether or not to build the search index. Required for search to work.") 168 | populateCmd.Flags().Bool("fulltext", false, "Whether or not to collect the full-text of each page in your browsing history and make it searchable.") 169 | populateCmd.Flags().Bool("keep-tmp-files", false, "Whether or not to keep temporary files created during the populate process. Probably only useful for debugging.") 170 | } 171 | -------------------------------------------------------------------------------- /pkg/extractors/extractors.go: -------------------------------------------------------------------------------- 1 | package extractors 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | 7 | "github.com/iansinnott/browser-gopher/pkg/logging" 8 | "github.com/iansinnott/browser-gopher/pkg/types" 9 | "github.com/iansinnott/browser-gopher/pkg/util" 10 | ) 11 | 12 | type browserDataSource struct { 13 | name string 14 | // @todo Hrm, so make this []string as a way to support multiple OSs? is there a case where any of the other logic would not be platform agnostic? 15 | paths []string 16 | findDBs func(string) ([]string, error) 17 | createExtractor func(name string, dbPath string) types.Extractor 18 | } 19 | 20 | // Build a list of relevant extractors for this system 21 | // @todo If we want to go multi platform this is currently the place to specify 22 | // the logic to determine paths on a per-platform basis. The extractors should 23 | // all Just Work if they are pointed to an appropriate sqlite db. 24 | func BuildExtractorList() ([]types.Extractor, error) { 25 | result := []types.Extractor{} 26 | 27 | candidateBrowsers := []browserDataSource{ 28 | // Chrome-like 29 | { 30 | name: "chrome", 31 | paths: []string{util.Expanduser("~/Library/Application Support/Google/Chrome/")}, 32 | findDBs: FindChromiumDBs, 33 | createExtractor: func(name, dbPath string) types.Extractor { 34 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 35 | }, 36 | }, 37 | { 38 | name: "brave", 39 | paths: []string{util.Expanduser("~/Library/Application Support/BraveSoftware/Brave-Browser")}, 40 | findDBs: FindChromiumDBs, 41 | createExtractor: func(name, dbPath string) types.Extractor { 42 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 43 | }, 44 | }, 45 | { 46 | name: "brave-beta", 47 | paths: []string{util.Expanduser("~/Library/Application Support/BraveSoftware/Brave-Browser-Beta")}, 48 | findDBs: FindChromiumDBs, 49 | createExtractor: func(name, dbPath string) types.Extractor { 50 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 51 | }, 52 | }, 53 | { 54 | name: "arc", 55 | paths: []string{util.Expanduser("~/Library/Application Support/Arc/User Data")}, 56 | findDBs: FindChromiumDBs, 57 | createExtractor: func(name, dbPath string) types.Extractor { 58 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 59 | }, 60 | }, 61 | { 62 | name: "vivaldi", 63 | paths: []string{util.Expanduser("~/Library/Application Support/Vivaldi")}, 64 | findDBs: FindChromiumDBs, 65 | createExtractor: func(name, dbPath string) types.Extractor { 66 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 67 | }, 68 | }, 69 | { 70 | name: "sidekick", 71 | paths: []string{util.Expanduser("~/Library/Application Support/Sidekick")}, 72 | findDBs: FindChromiumDBs, 73 | createExtractor: func(name, dbPath string) types.Extractor { 74 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 75 | }, 76 | }, 77 | { 78 | name: "edge", 79 | paths: []string{util.Expanduser("~/Library/Application Support/Microsoft Edge")}, 80 | findDBs: FindChromiumDBs, 81 | createExtractor: func(name, dbPath string) types.Extractor { 82 | return &ChromiumExtractor{Name: name, HistoryDBPath: dbPath} 83 | }, 84 | }, 85 | 86 | // Firefox-like 87 | // @todo What is the path for FF dev edition? 88 | { 89 | name: "firefox", 90 | paths: []string{ 91 | util.Expanduser("~/Library/Application Support/Firefox/Profiles/"), // osx 92 | util.Expanduser("~/.mozilla/firefox/"), // lin 93 | }, 94 | findDBs: FindFirefoxDBs, 95 | createExtractor: func(name, dbPath string) types.Extractor { 96 | return &FirefoxExtractor{Name: name, HistoryDBPath: dbPath} 97 | }, 98 | }, 99 | 100 | // Safari-like 101 | // @todo What is the path for safari preview edition? 102 | { 103 | name: "safari", 104 | paths: []string{util.Expanduser("~/Library/Safari/")}, 105 | findDBs: func(s string) ([]string, error) { 106 | dbPath := s + "History.db" 107 | if _, err := os.Stat(dbPath); err != nil { 108 | return nil, err 109 | } 110 | return []string{dbPath}, nil 111 | }, 112 | createExtractor: func(name, dbPath string) types.Extractor { 113 | return &SafariExtractor{Name: name, HistoryDBPath: dbPath} 114 | }, 115 | }, 116 | 117 | // Orion 118 | { 119 | name: "orion", 120 | paths: []string{util.Expanduser("~/Library/Application Support/Orion/Defaults/")}, 121 | findDBs: func(s string) ([]string, error) { 122 | dbPath := s + "history" 123 | if _, err := os.Stat(dbPath); err != nil { 124 | return nil, err 125 | } 126 | return []string{dbPath}, nil 127 | }, 128 | createExtractor: func(name, dbPath string) types.Extractor { 129 | return &OrionExtractor{Name: name, HistoryDBPath: dbPath} 130 | }, 131 | }, 132 | 133 | // @note Of all the browsers listed above sigmaos seems to be the most 134 | // actively changing with the most novel data model. So this may well break 135 | // with some future update. 136 | { 137 | name: "sigmaos", 138 | paths: []string{util.Expanduser("~/Library/Containers/com.sigmaos.sigmaos.macos/Data/Library/Application Support/SigmaOS/")}, 139 | findDBs: func(s string) ([]string, error) { 140 | dbPath := s + "Model.sqlite" 141 | if _, err := os.Stat(dbPath); err != nil { 142 | return nil, err 143 | } 144 | return []string{dbPath}, nil 145 | }, 146 | createExtractor: func(name, dbPath string) types.Extractor { 147 | return &SigmaOSExtractor{Name: name, HistoryDBPath: dbPath} 148 | }, 149 | }, 150 | } 151 | 152 | // @note it is assumed that the same browser is not installed at multiple 153 | // paths. I.e. the paths are mutually exclusive. the first path to be found 154 | // will be used. 155 | for _, browser := range candidateBrowsers { 156 | found := false 157 | 158 | for _, p := range browser.paths { 159 | _, err := os.Stat(p) 160 | if errors.Is(err, os.ErrNotExist) { 161 | continue 162 | } 163 | 164 | dbs, err := browser.findDBs(p) 165 | if err != nil { 166 | return nil, err 167 | } 168 | for _, dbPath := range dbs { 169 | result = append(result, browser.createExtractor(browser.name, dbPath)) 170 | found = true 171 | } 172 | } 173 | 174 | if !found { 175 | logging.Debug().Println("[" + browser.name + "] not found. skipping:") 176 | } 177 | } 178 | 179 | return result, nil 180 | } 181 | -------------------------------------------------------------------------------- /pkg/populate/fulltext.go: -------------------------------------------------------------------------------- 1 | package populate 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "time" 8 | 9 | md "github.com/JohannesKaufmann/html-to-markdown" 10 | "github.com/iansinnott/browser-gopher/pkg/fulltext" 11 | "github.com/iansinnott/browser-gopher/pkg/logging" 12 | "github.com/iansinnott/browser-gopher/pkg/persistence" 13 | "github.com/iansinnott/browser-gopher/pkg/types" 14 | "github.com/iansinnott/browser-gopher/pkg/util" 15 | "github.com/pkg/errors" 16 | "github.com/samber/lo" 17 | stripmd "github.com/writeas/go-strip-markdown" 18 | ) 19 | 20 | // @note the `order by random()` is meant to avoid trying to scrape from the same website all at once. No DoS! 21 | const queryUrlsWithoutDocuments = ` 22 | SELECT 23 | u.url_md5, 24 | u.url, 25 | u.title, 26 | u.description, 27 | u.last_visit 28 | FROM 29 | urls u 30 | LEFT OUTER JOIN url_document_edges edge ON u.url_md5 = edge.url_md5 31 | WHERE 32 | edge.url_md5 IS NULL 33 | ORDER BY 34 | RANDOM() 35 | LIMIT ?; 36 | ` 37 | 38 | const countUrlsWithoutDocuments = ` 39 | SELECT 40 | COUNT(*) 41 | FROM 42 | urls u 43 | LEFT OUTER JOIN url_document_edges edge ON u.url_md5 = edge.url_md5 44 | WHERE 45 | edge.url_md5 IS NULL; 46 | ` 47 | 48 | const scrapeBatchSize = 10 49 | 50 | func PopulateFulltext(ctx context.Context, db *sql.DB) (int, error) { 51 | indexedCount := 0 52 | var todoCount int 53 | row := db.QueryRowContext(ctx, countUrlsWithoutDocuments) 54 | err := row.Scan(&todoCount) 55 | if err != nil { 56 | return 0, errors.Wrap(err, "failed to count urls without documents") 57 | } 58 | 59 | scraper := fulltext.NewScraper() 60 | 61 | // Do the scraping 62 | for indexedCount < todoCount { 63 | fmt.Printf("scraping: (%d/%d) %.2f\n", indexedCount, todoCount, float32(indexedCount)/float32(todoCount)) 64 | 65 | n, err := batchScrape(ctx, db, scraper) 66 | 67 | // break early if there was an error 68 | if err != nil { 69 | return 0, err 70 | } 71 | 72 | // Break early if we get back fewer URLs than batch size, indicating there 73 | // are less than batch size left to scrape 74 | if n == 0 { 75 | break 76 | } 77 | 78 | indexedCount += n 79 | } 80 | 81 | indexedCount = 0 82 | toIndexCount, err := persistence.CountUrlsWhere(ctx, db, 83 | `documents.body NOT NULL 84 | AND documents.body != '' 85 | AND urls_meta.indexed_at < documents.accessed_at`) 86 | if err != nil { 87 | return 0, errors.Wrap(err, "failed to count urls to index") 88 | } 89 | 90 | // Do the indexing 91 | for indexedCount < toIndexCount { 92 | ents, err := getUnindexedBodyRows(ctx, db) 93 | if err != nil { 94 | return 0, errors.Wrap(err, "error getting unindexed bodies") 95 | } 96 | 97 | n, err := batchIndex(ctx, db, ents...) 98 | if err != nil { 99 | return 0, errors.Wrap(err, "error indexing batch") 100 | } 101 | 102 | if n == 0 { 103 | logging.Debug().Println("nothing left to process") 104 | break 105 | } 106 | 107 | logging.Debug().Printf("indexing with bodies (%d/%d) %.2f\n", indexedCount, toIndexCount, float32(indexedCount)/float32(toIndexCount)) 108 | indexedCount += n 109 | } 110 | 111 | return indexedCount, nil 112 | } 113 | 114 | func getUnindexedBodyRows(ctx context.Context, db *sql.DB) ([]types.UrlDbEntity, error) { 115 | qry := ` 116 | SELECT 117 | u.url_md5, 118 | u.url, 119 | u.title, 120 | u.description, 121 | u.last_visit, 122 | d.body 123 | FROM 124 | urls u 125 | JOIN url_document_edges edge ON u.url_md5 = edge.url_md5 126 | JOIN documents d ON edge.document_md5 = d.document_md5 127 | JOIN urls_meta m ON u.url_md5 = m.url_md5 128 | WHERE 129 | d.body NOT NULL 130 | AND d.body != '' 131 | AND m.indexed_at < d.accessed_at 132 | LIMIT ?; 133 | ` 134 | 135 | rows, err := db.QueryContext(ctx, qry, batchSize) 136 | if err != nil { 137 | return nil, errors.Wrap(err, "failed to query for urls with unindexed documents") 138 | } 139 | defer rows.Close() 140 | 141 | var ents []types.UrlDbEntity 142 | 143 | for rows.Next() { 144 | var ( 145 | ent types.UrlDbEntity 146 | ts int64 147 | t time.Time 148 | ) 149 | 150 | err := rows.Scan( 151 | &ent.UrlMd5, 152 | &ent.Url, 153 | &ent.Title, 154 | &ent.Description, 155 | &ts, 156 | &ent.Body, 157 | ) 158 | if err != nil { 159 | return nil, errors.Wrap(err, "failed to scan row") 160 | } 161 | 162 | if ts == 0 { 163 | t = time.Unix(ts, 0) 164 | ent.LastVisit = &t 165 | } 166 | 167 | if ent.Body != nil { 168 | // @todo index this plaintext. Will do this from our data base, since fulltext is now stored there 169 | plaintext := stripmd.Strip(*ent.Body) 170 | ent.Body = &plaintext 171 | } 172 | 173 | ents = append(ents, ent) 174 | } 175 | 176 | return ents, nil 177 | } 178 | 179 | func batchScrape(ctx context.Context, db *sql.DB, scraper *fulltext.Scraper) (int, error) { 180 | // get all urls that do not have an associated record in the documents table 181 | rows, err := db.QueryContext(ctx, queryUrlsWithoutDocuments, scrapeBatchSize) 182 | if err != nil { 183 | return 0, errors.Wrap(err, "failed to query for urls without documents") 184 | } 185 | 186 | var urls []types.UrlDbEntity 187 | 188 | // for each document, get the text and index it 189 | for rows.Next() { 190 | var u types.UrlDbEntity 191 | var ts int64 192 | 193 | err := rows.Scan(&u.UrlMd5, &u.Url, &u.Title, &u.Description, &ts) 194 | 195 | // @note last visit time can be zero, indicating unknown visit time. This 196 | // will happen if importing from browserparrot/persistory because the visits 197 | // table had a bug 198 | if ts > 0 { 199 | t := time.Unix(ts, 0) 200 | u.LastVisit = &t 201 | } 202 | 203 | if err != nil { 204 | return 0, errors.Wrap(err, "error scanning row") 205 | } 206 | 207 | urls = append(urls, u) 208 | } 209 | 210 | rows.Close() 211 | 212 | xs := lo.Map(urls, func(u types.UrlDbEntity, i int) string { return u.Url }) 213 | xm, err := scraper.ScrapeUrls(xs...) 214 | if err != nil { 215 | return 0, errors.Wrap(err, "error scraping urls") 216 | } 217 | 218 | for _, u := range urls { 219 | doc := xm[u.Url] 220 | converter := md.NewConverter(doc.Url, true, nil) 221 | md, err := converter.ConvertString(string(doc.Body)) 222 | if err != nil { 223 | return 0, err 224 | } 225 | 226 | urlMd5 := util.HashMd5String(doc.Url) 227 | docMd5 := util.HashMd5String(md) // @note that we use the distilled md hash in order to avoid duplication when content hasn't noticably changed 228 | accessedAt := time.Now() 229 | 230 | err = persistence.InsertDocument(ctx, db, &types.DocumentRow{ 231 | DocumentMd5: docMd5, 232 | UrlMd5: urlMd5, 233 | StatusCode: doc.StatusCode, 234 | AccessedAt: &accessedAt, 235 | Body: &md, 236 | }) 237 | if err != nil { 238 | return 0, errors.Wrap(err, "error inserting document") 239 | } 240 | 241 | } 242 | 243 | return len(urls), nil 244 | } 245 | -------------------------------------------------------------------------------- /pkg/tui/tui.go: -------------------------------------------------------------------------------- 1 | package tui 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/exec" 8 | "regexp" 9 | "strings" 10 | "time" 11 | 12 | "github.com/charmbracelet/bubbles/list" 13 | "github.com/charmbracelet/bubbles/textinput" 14 | tea "github.com/charmbracelet/bubbletea" 15 | "github.com/charmbracelet/lipgloss" 16 | "github.com/iansinnott/browser-gopher/pkg/search" 17 | "github.com/iansinnott/browser-gopher/pkg/util" 18 | "github.com/pkg/errors" 19 | ) 20 | 21 | var docStyle = lipgloss.NewStyle().Margin(1, 2) 22 | var titleStyle = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#fafafa")) 23 | var urlStyle = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#87BCF7")) 24 | 25 | var HighlightStyle = lipgloss.NewStyle().Background(lipgloss.Color("#D8D7A0")).Foreground(lipgloss.Color("#000000")) 26 | 27 | const UNTITLED = "" 28 | 29 | type ListItem struct { 30 | // @note ItemTitle is thus named so as not to conflict with the Title() method, which is used by bubbletea 31 | ItemTitle, Desc, query string 32 | Body *string 33 | Date *time.Time 34 | } 35 | 36 | func (i ListItem) Title() string { 37 | var sb strings.Builder 38 | 39 | if i.Date != nil { 40 | sb.WriteString(i.Date.Format(util.FormatDateOnly)) 41 | sb.WriteString(" ") 42 | } 43 | 44 | sb.WriteString(titleStyle.Render(i.ItemTitle)) 45 | 46 | return sb.String() 47 | } 48 | func (i ListItem) Description() string { return urlStyle.Render(i.Desc) } 49 | func (i ListItem) FilterValue() string { return i.ItemTitle + i.Desc } 50 | 51 | // @todo Support other systems that don't have `open` 52 | // @todo should prob store a list of the `item` structs that have the URL rather than doing this string manipulation 53 | func OpenItem(item list.Item) error { 54 | filterVal := item.FilterValue() 55 | re := regexp.MustCompile(`https?://`) 56 | loc := re.FindStringIndex(filterVal) 57 | url := filterVal[loc[0]:] 58 | fmt.Println("open", url) 59 | return exec.Command("open", url).Run() 60 | } 61 | 62 | type model struct { 63 | input textinput.Model 64 | list list.Model 65 | searchProvider search.SearchProvider 66 | dataProvider search.DataProvider 67 | mapItem ItemMapping 68 | } 69 | 70 | func (m model) Init() tea.Cmd { 71 | return nil 72 | } 73 | 74 | func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { 75 | var cmd tea.Cmd 76 | switch msg := msg.(type) { 77 | 78 | case tea.KeyMsg: 79 | switch msg.String() { 80 | case "ctrl-c", "esc": 81 | return m, tea.Quit 82 | case "ctrl+n", "ctrl+j", "down": 83 | m.list, cmd = m.list.Update(msg) 84 | return m, cmd 85 | case "ctrl+p", "ctrl+k", "up": 86 | m.list, cmd = m.list.Update(msg) 87 | return m, cmd 88 | case "enter": 89 | item := m.list.SelectedItem() 90 | OpenItem(item) // @todo wrap this in a tea.Cmd to preserve purity 91 | return m, tea.Quit 92 | default: 93 | var inputCmd tea.Cmd 94 | var result *search.SearchResult 95 | var err error 96 | m.input, inputCmd = m.input.Update(msg) 97 | query := m.input.Value() 98 | if query == "" { 99 | result, err = m.dataProvider.RecentUrls(100) 100 | } else { 101 | result, err = m.searchProvider.SearchUrls(query) 102 | } 103 | // @note we ignored parse errors since they are quite expected when a user is typing 104 | if err != nil && !AcceptibleSearchError(err) { 105 | fmt.Println("search error", err) 106 | os.Exit(1) 107 | } 108 | items := ResultToItems(result, query, m.mapItem) 109 | listCmd := m.list.SetItems(items) 110 | return m, tea.Batch(inputCmd, listCmd) 111 | } 112 | 113 | case tea.WindowSizeMsg: 114 | h, v := docStyle.GetFrameSize() 115 | m.list.SetSize(msg.Width-h*2, msg.Height-v*2) 116 | } 117 | 118 | return m, cmd 119 | } 120 | 121 | func (m model) View() string { 122 | listView := m.list.View() 123 | return docStyle.Render(m.input.View()) + "\n" + listView 124 | } 125 | 126 | type ItemMapping func(x ListItem) list.Item 127 | 128 | var identityMapping ItemMapping = func(x ListItem) list.Item { 129 | return x 130 | } 131 | 132 | // @todo Rather than taking providers this should probably take a search 133 | // function that can handle customized querying. I.e. if I want to return only 134 | // full-text documents with this current setup i would need to create a new 135 | // SearchProvider that returns full-text docs for the SearchUrls call 136 | func GetSearchProgram( 137 | ctx context.Context, 138 | initialQuery string, 139 | dataProvider search.DataProvider, 140 | searchProvider search.SearchProvider, 141 | mapItem *func(x ListItem) list.Item, 142 | ) (*tea.Program, error) { 143 | var err error 144 | var result *search.SearchResult 145 | 146 | var mapping ItemMapping 147 | if mapItem != nil { 148 | mapping = ItemMapping(*mapItem) 149 | } else { 150 | mapping = identityMapping 151 | } 152 | 153 | if initialQuery == "" { 154 | result, err = dataProvider.RecentUrls(100) 155 | } else { 156 | result, err = searchProvider.SearchUrls(initialQuery) 157 | } 158 | 159 | if err != nil && !AcceptibleSearchError(err) { 160 | return nil, errors.Wrap(err, "failed to get initial search results") 161 | } 162 | 163 | items := ResultToItems(result, "", mapping) 164 | 165 | // Input el 166 | input := textinput.New() 167 | input.Placeholder = "Search..." 168 | input.SetValue(initialQuery) 169 | input.Focus() 170 | 171 | // Search results list el 172 | listDelegate := list.NewDefaultDelegate() 173 | listDelegate.SetHeight(2) 174 | listDelegate.SetSpacing(1) 175 | list := list.New(items, listDelegate, 0, 0) 176 | list.SetFilteringEnabled(false) 177 | list.SetShowTitle(false) 178 | list.SetShowStatusBar(false) 179 | 180 | m := model{ 181 | list: list, 182 | input: input, 183 | searchProvider: searchProvider, 184 | dataProvider: dataProvider, 185 | mapItem: mapping, 186 | } 187 | 188 | return tea.NewProgram(m, tea.WithAltScreen()), nil 189 | } 190 | 191 | func ResultToItems(result *search.SearchResult, query string, mapItem ItemMapping) []list.Item { 192 | if result == nil || len(result.Urls) == 0 { 193 | return []list.Item{ListItem{ItemTitle: "No results found"}} 194 | } 195 | 196 | urls := result.Urls 197 | items := []list.Item{} 198 | 199 | for _, u := range urls { 200 | displayUrl := u.Url 201 | displayTitle := UNTITLED 202 | if u.Title != nil { 203 | displayTitle = *u.Title 204 | } 205 | 206 | // @todo commented out while moving to sqlite 207 | // Highlighting 208 | // if result.Meta != nil { 209 | // hit, ok := lo.Find(result.Meta.Hits, func(x *bs.DocumentMatch) bool { 210 | // return x.ID == u.Id 211 | // }) 212 | 213 | // if ok { 214 | // for k, locations := range hit.Locations { 215 | // switch k { 216 | // case "title": 217 | // displayTitle = search.HighlightAll(locations, displayTitle, HighlightStyle.Render) 218 | // case "url": 219 | // displayUrl = search.HighlightAll(locations, displayUrl, HighlightStyle.Render) 220 | // default: 221 | // } 222 | // } 223 | // } 224 | // } 225 | 226 | items = append(items, mapItem(ListItem{ 227 | ItemTitle: displayTitle, 228 | Desc: displayUrl, 229 | Date: u.LastVisit, 230 | query: query, 231 | Body: u.Match, 232 | })) 233 | } 234 | 235 | return items 236 | } 237 | 238 | func AcceptibleSearchError(err error) bool { 239 | return strings.Contains(err.Error(), "parse error") || strings.Contains(err.Error(), "syntax error") 240 | } 241 | -------------------------------------------------------------------------------- /pkg/populate/searchIndex.go: -------------------------------------------------------------------------------- 1 | package populate 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | "github.com/iansinnott/browser-gopher/pkg/persistence" 12 | "github.com/iansinnott/browser-gopher/pkg/types" 13 | "github.com/iansinnott/browser-gopher/pkg/util" 14 | "github.com/pkg/errors" 15 | stripmd "github.com/writeas/go-strip-markdown" 16 | ) 17 | 18 | // how many urls to index at a time 19 | const batchSize = 1000 20 | 21 | func BuildIndex(ctx context.Context, db *sql.DB, limit int) (int, error) { 22 | indexedCount := 0 23 | toIndexCount, err := persistence.CountUrlsWhere(ctx, db, "indexed_at IS NULL") 24 | if err != nil { 25 | return 0, errors.Wrap(err, "error getting count of urls to index") 26 | } 27 | 28 | if limit > 0 && limit < toIndexCount { 29 | toIndexCount = limit 30 | } 31 | 32 | for indexedCount < toIndexCount { 33 | // get documents to index 34 | ents, err := getUnindexed(ctx, db) 35 | if err != nil { 36 | return 0, err 37 | } 38 | 39 | // index them 40 | n, err := batchIndex(ctx, db, ents...) 41 | if err != nil { 42 | return 0, err 43 | } 44 | 45 | // Break out if indexedCount is not increasing 46 | if n == 0 { 47 | break 48 | } 49 | 50 | fmt.Printf("indexing: (%d/%d) %.2f\n", indexedCount, toIndexCount, float32(indexedCount)/float32(toIndexCount)) 51 | 52 | indexedCount += n 53 | } 54 | 55 | return indexedCount, err 56 | } 57 | 58 | // Index (or reindex) an individual document. If doc.Id is already present in 59 | // the search index it will be overwritten. 60 | func IndexDocument(ctx context.Context, db *sql.DB, doc types.UrlDbEntity) error { 61 | _, err := batchIndex(ctx, db, doc) 62 | if err != nil { 63 | return errors.Wrap(err, "error indexing document") 64 | } 65 | 66 | t := time.Now() 67 | meta := types.UrlMetaRow{ 68 | Url: doc.Url, 69 | IndexedAt: &t, 70 | } 71 | 72 | err = persistence.InsertUrlMeta(ctx, db, meta) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | return nil 78 | } 79 | 80 | // The reason we need an int ID is due to the int requirement on rowid in the fts table. 81 | func generateEavId(e string, t string, a string, v string) (int64, error) { 82 | shasum := util.HashSha1String(fmt.Sprintf("%s%s%s%s", e, t, a, v)) 83 | 84 | // take the first 15 characters of the sha1 hash, as this is the max that will 85 | // fit into sqlite int column (it's signed, i think. otherwise we could take 86 | // 16 chars) 87 | hexId := shasum[0:15] 88 | 89 | // convert to a base 10 number 90 | return strconv.ParseInt(hexId, 16, 64) 91 | } 92 | 93 | /** 94 | * Index an entity 95 | */ 96 | func indexEav(ctx context.Context, db *sql.Tx, e string, t string, a string, v string) error { 97 | // insert into the fragments table 98 | const qry = ` 99 | INSERT OR REPLACE INTO 100 | fragment(id, e, t, a, v) 101 | VALUES(?, ?, ?, ?, ?); 102 | ` 103 | 104 | id, err := generateEavId(e, t, a, v) 105 | if err != nil { 106 | return errors.Wrap(err, "error generating eav id") 107 | } 108 | 109 | _, err = db.ExecContext(ctx, qry, id, e, t, a, v) 110 | if err != nil { 111 | return errors.Wrap(err, "error inserting fragment") 112 | } 113 | 114 | return nil 115 | } 116 | 117 | func batchIndex(ctx context.Context, db *sql.DB, ents ...types.UrlDbEntity) (int, error) { 118 | tx, err := db.BeginTx(ctx, nil) 119 | if err != nil { 120 | return 0, err 121 | } 122 | 123 | cleanupWithError := func(err error) error { 124 | tx.Rollback() 125 | return err 126 | } 127 | 128 | // @todo Could be much more efficient by not committing transaction for each row 129 | for _, ent := range ents { 130 | 131 | // vs is an array of structs containing `a` and `v` fields 132 | vs := []struct { 133 | a string 134 | v *string 135 | }{{"url", &ent.Url}, {"title", ent.Title}, {"description", ent.Description}} 136 | 137 | // Insert basic URL data 138 | for _, x := range vs { 139 | table := "urls" 140 | if x.v != nil { 141 | err := indexEav(ctx, tx, ent.UrlMd5, table, x.a, *x.v) 142 | if err != nil { 143 | return 0, cleanupWithError(err) 144 | } 145 | } 146 | } 147 | 148 | // Insert fulltext data 149 | if ent.Body != nil { 150 | table := "documents" 151 | chunk := "" 152 | // Chunk documents by paragraphs, for now 153 | for _, paragraph := range strings.Split(*ent.Body, "\n\n") { 154 | chunk += strings.TrimSpace(paragraph) 155 | if len(chunk) < 180 { 156 | chunk += " " 157 | continue 158 | } 159 | 160 | err := indexEav(ctx, tx, ent.UrlMd5, table, "content", chunk) 161 | if err != nil { 162 | return 0, cleanupWithError(err) 163 | } 164 | 165 | chunk = "" 166 | } 167 | 168 | // Grab straggling chunk. i.e. if the whole document is less than the threshold. 169 | if len(chunk) > 0 { 170 | err := indexEav(ctx, tx, ent.UrlMd5, table, "content", chunk) 171 | if err != nil { 172 | return 0, cleanupWithError(err) 173 | } 174 | } 175 | } 176 | } 177 | 178 | err = tx.Commit() 179 | if err != nil { 180 | return 0, err 181 | } 182 | 183 | // var err error 184 | 185 | metas := []types.UrlMetaRow{} 186 | 187 | // Mark docs as indexed so that we don't re-index them 188 | for _, doc := range ents { 189 | t := time.Now() 190 | metas = append(metas, types.UrlMetaRow{ 191 | Url: doc.Url, 192 | IndexedAt: &t, 193 | }) 194 | } 195 | 196 | err = persistence.InsertUrlMeta(ctx, db, metas...) 197 | 198 | if err != nil { 199 | // check if SQLITE_BUSY error 200 | if strings.Contains(err.Error(), "database is locked") { 201 | fmt.Println("database is locked, retrying...") 202 | time.Sleep(1000 * time.Millisecond) 203 | return batchIndex(ctx, db, ents...) 204 | } 205 | 206 | return 0, errors.Wrap(err, "error marking doc as indexed") 207 | } 208 | 209 | return len(ents), nil 210 | } 211 | 212 | func getUnindexed(ctx context.Context, db *sql.DB) ([]types.UrlDbEntity, error) { 213 | const qry = ` 214 | SELECT 215 | u.url_md5, 216 | u.url, 217 | u.title, 218 | u.description, 219 | u.last_visit, 220 | doc.document_md5, 221 | doc.body 222 | FROM 223 | urls u 224 | LEFT OUTER JOIN urls_meta um ON u.url_md5 = um.url_md5 225 | LEFT OUTER JOIN url_document_edges ON u.url_md5 = url_document_edges.url_md5 226 | LEFT OUTER JOIN documents doc ON url_document_edges.document_md5 = doc.document_md5 227 | WHERE 228 | um.indexed_at IS NULL 229 | ORDER BY 230 | last_visit DESC 231 | LIMIT ?; 232 | ` 233 | 234 | rows, err := db.QueryContext(ctx, qry, batchSize) 235 | if err != nil { 236 | return nil, errors.Wrap(err, "error querying unindexed urls") 237 | } 238 | defer rows.Close() 239 | 240 | // Put docs into a slice so that we can iterate over them to mark them as 241 | // indexed. Otherwies we could add them to the batch directly. 242 | var docs []types.UrlDbEntity 243 | 244 | for rows.Next() { 245 | var ent types.UrlDbEntity 246 | var ts int64 247 | err := rows.Scan(&ent.UrlMd5, &ent.Url, &ent.Title, &ent.Description, &ts, &ent.BodyMd5, &ent.Body) 248 | if err != nil { 249 | return nil, errors.Wrap(err, "error scanning row") 250 | } 251 | 252 | // @note last visit time can be zero, indicating unknown visit time. This 253 | // will happen if importing from browserparrot/persistory because the visits 254 | // table had a bug 255 | if ts > 0 { 256 | t := time.Unix(ts, 0) 257 | ent.LastVisit = &t 258 | } 259 | 260 | if ent.Body != nil { 261 | plaintext := stripmd.Strip(*ent.Body) 262 | ent.Body = &plaintext 263 | } 264 | 265 | docs = append(docs, ent) 266 | } 267 | 268 | return docs, nil 269 | } 270 | 271 | func ReindexWithLimit(ctx context.Context, db *sql.DB, limit int) (int, error) { 272 | var err error 273 | qry := ` 274 | UPDATE 275 | urls_meta 276 | SET 277 | indexed_at = NULL 278 | WHERE 279 | indexed_at NOT NULL; 280 | ` 281 | _, err = db.ExecContext(ctx, qry) 282 | if err != nil { 283 | return 0, errors.Wrap(err, "error removing indexed status") 284 | } 285 | 286 | return BuildIndex(ctx, db, limit) 287 | } 288 | 289 | // Reindex documents that have already been indexed. This does not remove 290 | // anything from the index, but will overwrite documents that have been updated. 291 | func ReindexAll(ctx context.Context, db *sql.DB) (int, error) { 292 | return ReindexWithLimit(ctx, db, 0) 293 | } 294 | -------------------------------------------------------------------------------- /pkg/persistence/persistence.go: -------------------------------------------------------------------------------- 1 | package persistence 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "sort" 8 | "strings" 9 | "sync" 10 | "time" 11 | 12 | "embed" 13 | 14 | _ "modernc.org/sqlite" 15 | // _ "github.com/mattn/go-sqlite3" 16 | 17 | "github.com/iansinnott/browser-gopher/pkg/config" 18 | "github.com/iansinnott/browser-gopher/pkg/logging" 19 | "github.com/iansinnott/browser-gopher/pkg/types" 20 | "github.com/iansinnott/browser-gopher/pkg/util" 21 | "github.com/samber/lo" 22 | ) 23 | 24 | // @note Initially visits had a unique index on `extractor_name, url_md5, 25 | // visit_time`, however, this lead to duplicate visits. The visits were 26 | // duplicated because some browsers will immport the history of other browsers, 27 | // or in cases like the history trends chrome extension duplication is 28 | // explicitly part of the goal. Thus, in order to minimize duplication visits 29 | // are considered unique by url and unix timestamp. 30 | // 31 | //go:embed migrations/* 32 | var MigrationsDir embed.FS 33 | 34 | var writeLock sync.Mutex 35 | 36 | // Open a connection to the database. Calling code should close the connection when done. 37 | // @note It is assumed that the database is already initialized. Thus this may be less useful than `InitDB` 38 | func OpenConnection(ctx context.Context, c *config.AppConfig) (*sql.DB, error) { 39 | dbPath := c.DBPath 40 | conn, err := sql.Open("sqlite", dbPath) 41 | if err != nil { 42 | return nil, err 43 | } 44 | 45 | return conn, err 46 | } 47 | 48 | // Initialize the database. Create tables and indexes 49 | func InitDb(ctx context.Context, c *config.AppConfig) (*sql.DB, error) { 50 | conn, err := OpenConnection(ctx, c) 51 | if err != nil { 52 | return nil, err 53 | } 54 | 55 | entries, err := MigrationsDir.ReadDir("migrations") 56 | if err != nil { 57 | return nil, err 58 | } 59 | 60 | // pull out `select current_timestamp;` from the database 61 | // this is used to set the version of the database 62 | var version string 63 | err = conn.QueryRowContext(ctx, "PRAGMA user_version;").Scan(&version) 64 | if err != nil { 65 | return nil, err 66 | } 67 | 68 | if version == "0" { 69 | _, err = conn.ExecContext(ctx, "PRAGMA user_version = 1;") 70 | if err != nil { 71 | return nil, err 72 | } 73 | } 74 | 75 | // make sure the migrations are sorted 76 | sort.Slice(entries, func(i, j int) bool { 77 | return entries[i].Name() < entries[j].Name() 78 | }) 79 | 80 | for _, entry := range entries { 81 | // skip files that are not migrations 82 | if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sql") { 83 | continue 84 | } 85 | 86 | migrationVersion := strings.Split(entry.Name(), "_")[0] 87 | migrationVersion = strings.TrimPrefix(migrationVersion, "0") 88 | 89 | // skip migrations that have already been run 90 | if migrationVersion <= version { 91 | continue 92 | } 93 | 94 | filePath := "migrations/" + entry.Name() 95 | 96 | migration, err := MigrationsDir.ReadFile(filePath) 97 | if err != nil { 98 | return nil, err 99 | } 100 | 101 | _, err = conn.ExecContext(ctx, string(migration)) 102 | if err != nil { 103 | return nil, err 104 | } 105 | 106 | _, err = conn.ExecContext(ctx, fmt.Sprintf("PRAGMA user_version = %s;", migrationVersion)) 107 | if err != nil { 108 | return nil, err 109 | } 110 | 111 | version = migrationVersion 112 | } 113 | 114 | return conn, err 115 | } 116 | 117 | func GetLatestTime(ctx context.Context, db *sql.DB, extractor types.Extractor) (*time.Time, error) { 118 | qry := ` 119 | SELECT 120 | visit_time 121 | FROM 122 | visits 123 | WHERE extractor_name = ? 124 | ORDER BY 125 | visit_time DESC 126 | LIMIT 1; 127 | ` 128 | row := db.QueryRowContext(ctx, qry, extractor.GetName()) 129 | if err := row.Err(); err != nil { 130 | return nil, err 131 | } 132 | 133 | var ts int64 134 | err := row.Scan(&ts) 135 | if err != nil { 136 | return nil, err 137 | } 138 | 139 | t := time.Unix(ts, 0) 140 | 141 | return &t, nil 142 | 143 | } 144 | 145 | func InsertUrl(ctx context.Context, db *sql.DB, row *types.UrlRow) error { 146 | const qry = ` 147 | INSERT OR REPLACE INTO 148 | urls(url_md5, url, title, description, last_visit) 149 | VALUES(?, ?, ?, ?, ?); 150 | ` 151 | var lastVisit int64 152 | if row.LastVisit != nil { 153 | lastVisit = row.LastVisit.Unix() 154 | } 155 | md5 := util.HashMd5String(row.Url) 156 | 157 | _, err := db.ExecContext(ctx, qry, md5, row.Url, row.Title, row.Description, lastVisit) 158 | return err 159 | } 160 | 161 | func InsertUrlMeta(ctx context.Context, db *sql.DB, rows ...types.UrlMetaRow) error { 162 | // sql to insert multiple rows at once 163 | qry := ` 164 | INSERT OR REPLACE INTO 165 | urls_meta(url_md5, indexed_at) 166 | VALUES` 167 | 168 | n := len(rows) 169 | 170 | for i, row := range rows { 171 | if i == 0 { 172 | qry += "\n" 173 | } else { 174 | qry += ",\n" 175 | } 176 | 177 | md5 := util.HashMd5String(row.Url) 178 | var indexed_at int64 179 | 180 | if row.IndexedAt != nil { 181 | indexed_at = row.IndexedAt.Unix() 182 | } 183 | 184 | qry += fmt.Sprintf("('%s', %d)", md5, indexed_at) 185 | 186 | if i == n-1 { 187 | qry += ";" 188 | } 189 | } 190 | 191 | _, err := db.ExecContext(ctx, qry) 192 | 193 | if err != nil { 194 | logging.Debug().Println("error sql", qry) 195 | } 196 | 197 | return err 198 | } 199 | 200 | func InsertDocument(ctx context.Context, db *sql.DB, row *types.DocumentRow) error { 201 | writeLock.Lock() 202 | defer writeLock.Unlock() 203 | 204 | var accessed_at int64 205 | var err error 206 | 207 | if row.AccessedAt != nil { 208 | accessed_at = row.AccessedAt.Unix() 209 | } 210 | 211 | // @note these are separate transactions because of how Exec handles 212 | // positional args with multiple statements. There is no way to pass different 213 | // args to subsequent statements, the arg list order is reset for each one. 214 | // I.e. the first positional arg is the first in _all_ statements. 215 | 216 | _, err = db.ExecContext(ctx, 217 | ` 218 | INSERT OR REPLACE INTO 219 | documents(document_md5, status_code, accessed_at, body) 220 | VALUES(?, ?, ?, ?); 221 | `, 222 | row.DocumentMd5, row.StatusCode, accessed_at, row.Body, 223 | ) 224 | if err != nil { 225 | return err 226 | } 227 | 228 | _, err = db.ExecContext(ctx, 229 | ` 230 | INSERT OR IGNORE INTO 231 | url_document_edges(url_md5, document_md5) 232 | VALUES(?, ?); 233 | `, 234 | row.UrlMd5, row.DocumentMd5, 235 | ) 236 | if err != nil { 237 | return err 238 | } 239 | 240 | return nil 241 | } 242 | 243 | func InsertVisit(ctx context.Context, db *sql.DB, row *types.VisitRow) error { 244 | const qry = ` 245 | INSERT OR IGNORE INTO 246 | visits(url_md5, visit_time, extractor_name) 247 | VALUES(?, ?, ?); 248 | ` 249 | md5 := util.HashMd5String(row.Url) 250 | 251 | _, err := db.ExecContext(ctx, qry, md5, row.Datetime.Unix(), row.ExtractorName) 252 | return err 253 | } 254 | 255 | // Count the number of urls that match the given where clause. URL meta is available in the where clause as well. 256 | func CountUrlsWhere(ctx context.Context, db *sql.DB, where string, args ...interface{}) (int, error) { 257 | var qry = ` 258 | SELECT 259 | COUNT(*) 260 | FROM 261 | urls 262 | LEFT OUTER JOIN urls_meta ON urls.url_md5 = urls_meta.url_md5 263 | LEFT OUTER JOIN url_document_edges ON urls.url_md5 = url_document_edges.url_md5 264 | LEFT OUTER JOIN documents ON url_document_edges.document_md5 = documents.document_md5 265 | WHERE %s; 266 | ` 267 | qry = fmt.Sprintf(qry, where) 268 | row := db.QueryRowContext(ctx, qry, args...) 269 | if err := row.Err(); err != nil { 270 | return 0, err 271 | } 272 | 273 | var count int 274 | err := row.Scan(&count) 275 | if err != nil { 276 | return 0, err 277 | } 278 | 279 | return count, nil 280 | } 281 | 282 | func UrlsById(ctx context.Context, db *sql.DB, ids ...string) ([]types.UrlDbEntity, error) { 283 | qry := fmt.Sprintf( 284 | `SELECT 285 | url_md5, 286 | url, 287 | title, 288 | description, 289 | last_visit 290 | FROM 291 | urls 292 | WHERE 293 | url_md5 IN (%s); 294 | `, 295 | strings.Join( 296 | lo.Map(ids, func(id string, _ int) string { return "?" }), 297 | ",", 298 | ), 299 | ) 300 | 301 | // C'mon Go, don't expose your implementation details (this conversion is 302 | // necessary becuase of underlying mem representation): 303 | // https://go.dev/doc/faq#convert_slice_of_interface 304 | var args []any 305 | for _, id := range ids { 306 | args = append(args, id) 307 | } 308 | 309 | rows, err := db.QueryContext(ctx, qry, args...) 310 | if err != nil { 311 | return nil, err 312 | } 313 | defer rows.Close() 314 | 315 | var urls []types.UrlDbEntity 316 | for rows.Next() { 317 | var url types.UrlDbEntity 318 | var ts int64 319 | 320 | err := rows.Scan(&url.UrlMd5, &url.Url, &url.Title, &url.Description, &ts) 321 | if err != nil { 322 | return nil, err 323 | } 324 | 325 | if ts != 0 { 326 | t := time.Unix(ts, 0) 327 | url.LastVisit = &t 328 | } 329 | 330 | urls = append(urls, url) 331 | } 332 | 333 | return urls, nil 334 | } 335 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/JohannesKaufmann/html-to-markdown v1.3.6 h1:i3Ma4RmIU97gqArbxZXbFqbWKm7XtImlMwVNUouQ7Is= 4 | github.com/JohannesKaufmann/html-to-markdown v1.3.6/go.mod h1:Ol3Jv/xw8jt8qsaLeSh/6DBBw4ZBJrTqrOu3wbbUUg8= 5 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= 6 | github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= 7 | github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= 8 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 9 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 10 | github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= 11 | github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= 12 | github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= 13 | github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= 14 | github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4= 15 | github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM= 16 | github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 17 | github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk= 18 | github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 19 | github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= 20 | github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= 21 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 22 | github.com/charmbracelet/bubbles v0.14.0 h1:DJfCwnARfWjZLvMglhSQzo76UZ2gucuHPy9jLWX45Og= 23 | github.com/charmbracelet/bubbles v0.14.0/go.mod h1:bbeTiXwPww4M031aGi8UK2HT9RDWoiNibae+1yCMtcc= 24 | github.com/charmbracelet/bubbletea v0.21.0/go.mod h1:GgmJMec61d08zXsOhqRC/AiOx4K4pmz+VIcRIm1FKr4= 25 | github.com/charmbracelet/bubbletea v0.22.1 h1:z66q0LWdJNOWEH9zadiAIXp2GN1AWrwNXU8obVY9X24= 26 | github.com/charmbracelet/bubbletea v0.22.1/go.mod h1:8/7hVvbPN6ZZPkczLiB8YpLkLJ0n7DMho5Wvfd2X1C0= 27 | github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao= 28 | github.com/charmbracelet/lipgloss v0.5.0/go.mod h1:EZLha/HbzEt7cYqdFPovlqy5FZPj0xFhg5SaqxScmgs= 29 | github.com/charmbracelet/lipgloss v0.6.0 h1:1StyZB9vBSOyuZxQUcUwGr17JmojPNm87inij9N3wJY= 30 | github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk= 31 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 32 | github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw= 33 | github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= 34 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 35 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 36 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 37 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 38 | github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= 39 | github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= 40 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 41 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 42 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 43 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 44 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= 45 | github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs= 46 | github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0= 47 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 48 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= 49 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 50 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 51 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 52 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 53 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 54 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 55 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 56 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 57 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 58 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 59 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= 60 | github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= 61 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 62 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 63 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 64 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 65 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 66 | github.com/google/go-cmp v0.5.3 h1:x95R7cp+rSeeqAMI2knLtQ0DKlaBhv2NrtrOvafPHRo= 67 | github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 68 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 69 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 70 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 71 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 72 | github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= 73 | github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= 74 | github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= 75 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= 76 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= 77 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 78 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 79 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 80 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 81 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 82 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 83 | github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= 84 | github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= 85 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 86 | github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= 87 | github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= 88 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 89 | github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= 90 | github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= 91 | github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= 92 | github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= 93 | github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= 94 | github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 95 | github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM= 96 | github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= 97 | github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho= 98 | github.com/muesli/ansi v0.0.0-20211031195517-c9f0611b6c70 h1:kMlmsLSbjkikxQJ1IPwaM+7LJ9ltFu/fi8CRzvSnQmA= 99 | github.com/muesli/ansi v0.0.0-20211031195517-c9f0611b6c70/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho= 100 | github.com/muesli/cancelreader v0.2.0/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= 101 | github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= 102 | github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= 103 | github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ= 104 | github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= 105 | github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= 106 | github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs= 107 | github.com/muesli/termenv v0.11.1-0.20220212125758-44cd13922739/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs= 108 | github.com/muesli/termenv v0.12.0 h1:KuQRUE3PgxRFWhq4gHvZtPSLCGDqM5q/cYr1pZ39ytc= 109 | github.com/muesli/termenv v0.12.0/go.mod h1:WCCv32tusQ/EEZ5S8oUIIrC/nIuBcxCVqlN4Xfkv+7A= 110 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= 111 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 112 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 113 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 114 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 115 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 116 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 117 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 h1:OdAsTTz6OkFY5QxjkYwrChwuRruF69c169dPK26NUlk= 118 | github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= 119 | github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 120 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 121 | github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8= 122 | github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 123 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 124 | github.com/sahilm/fuzzy v0.1.0 h1:FzWGaw2Opqyu+794ZQ9SYifWv2EIXpwP4q8dY1kDAwI= 125 | github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= 126 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= 127 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 128 | github.com/samber/lo v1.33.0 h1:2aKucr+rQV6gHpY3bpeZu69uYoQOzVhGT3J22Op6Cjk= 129 | github.com/samber/lo v1.33.0/go.mod h1:HLeWcJRRyLKp3+/XBJvOrerCQn9mhdKMHyd7IRlgeQ8= 130 | github.com/sebdah/goldie/v2 v2.5.3 h1:9ES/mNN+HNUbNWpVAlrzuZ7jE+Nrczbj8uFRjM7624Y= 131 | github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= 132 | github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= 133 | github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= 134 | github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= 135 | github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= 136 | github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= 137 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 138 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 139 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 140 | github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= 141 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 142 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 143 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 144 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 145 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 146 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 147 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 148 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 149 | github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= 150 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= 151 | github.com/thoas/go-funk v0.9.1 h1:O549iLZqPpTUQ10ykd26sZhzD+rmR5pWhuElrhbC20M= 152 | github.com/writeas/go-strip-markdown v2.0.1+incompatible h1:IIqxTM5Jr7RzhigcL6FkrCNfXkvbR+Nbu1ls48pXYcw= 153 | github.com/writeas/go-strip-markdown v2.0.1+incompatible/go.mod h1:Rsyu10ZhbEK9pXdk8V6MVnZmTzRG0alMNLMwa0J01fE= 154 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 155 | github.com/yuin/goldmark v1.4.14 h1:jwww1XQfhJN7Zm+/a1ZA/3WUiEBEroYFNTiV3dKwM8U= 156 | github.com/yuin/goldmark v1.4.14/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 157 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 158 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 159 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 160 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 161 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 162 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= 163 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= 164 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 165 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 166 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 167 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 168 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= 169 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 170 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 171 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 172 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 173 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 174 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 175 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 176 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 177 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 178 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 179 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 180 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 181 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 182 | golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 183 | golang.org/x/net v0.0.0-20220909164309-bea034e7d591 h1:D0B/7al0LLrVC8aWF4+oxpv/m8bc7ViFfVS8/gXGdqI= 184 | golang.org/x/net v0.0.0-20220909164309-bea034e7d591/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= 185 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 186 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 187 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 188 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 189 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 190 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 191 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 192 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 193 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 194 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 195 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 196 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 197 | golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 198 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 199 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 200 | golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 201 | golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 202 | golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 203 | golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 204 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 205 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 206 | golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18= 207 | golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 208 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 209 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 210 | golang.org/x/term v0.0.0-20220722155259-a9ba230a4035 h1:Q5284mrmYTpACcm+eAKjKJH48BBwSyfJqmmGDTtT8Vc= 211 | golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 212 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 213 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 214 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 215 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 216 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 217 | golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= 218 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= 219 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 220 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 221 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 222 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 223 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 224 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 225 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 226 | golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 227 | golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= 228 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 229 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 230 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 231 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 232 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= 233 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 234 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 235 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 236 | google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc= 237 | google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 238 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 239 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 240 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= 241 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 242 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 243 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 244 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 245 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 246 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 247 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 248 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 249 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 250 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 251 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 252 | google.golang.org/protobuf v1.24.0 h1:UhZDfRO8JRQru4/+LlLE0BRKGF8L+PICnvYZmx/fEGA= 253 | google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= 254 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 255 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 256 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= 257 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 258 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 259 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 260 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 261 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 262 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 263 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 264 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 265 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 266 | lukechampine.com/uint128 v1.1.1 h1:pnxCASz787iMf+02ssImqk6OLt+Z5QHMoZyUXR4z6JU= 267 | lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= 268 | modernc.org/cc/v3 v3.36.0 h1:0kmRkTmqNidmu3c7BNDSdVHCxXCkWLmWmCIVX4LUboo= 269 | modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= 270 | modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc= 271 | modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw= 272 | modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= 273 | modernc.org/ccgo/v3 v3.16.8 h1:G0QNlTqI5uVgczBWfGKs7B++EPwCfXPWGD2MdeKloDs= 274 | modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws= 275 | modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk= 276 | modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ= 277 | modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM= 278 | modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= 279 | modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= 280 | modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A= 281 | modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU= 282 | modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= 283 | modernc.org/libc v1.16.19 h1:S8flPn5ZeXx6iw/8yNa986hwTQDrY8RXU7tObZuAozo= 284 | modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= 285 | modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= 286 | modernc.org/mathutil v1.4.1 h1:ij3fYGe8zBF4Vu+g0oT7mB06r8sqGWKuJu1yXeR4by8= 287 | modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= 288 | modernc.org/memory v1.1.1 h1:bDOL0DIDLQv7bWhP3gMvIrnoFw+Eo6F7a2QK9HPDiFU= 289 | modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= 290 | modernc.org/opt v0.1.1 h1:/0RX92k9vwVeDXj+Xn23DKp2VJubL7k8qNffND6qn3A= 291 | modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= 292 | modernc.org/sqlite v1.18.1 h1:ko32eKt3jf7eqIkCgPAeHMBXw3riNSLhl2f3loEF7o8= 293 | modernc.org/sqlite v1.18.1/go.mod h1:6ho+Gow7oX5V+OiOQ6Tr4xeqbx13UZ6t+Fw9IRUG4d4= 294 | modernc.org/strutil v1.1.1 h1:xv+J1BXY3Opl2ALrBwyfEikFAj8pmqcpnfmuwUwcozs= 295 | modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= 296 | modernc.org/tcl v1.13.1 h1:npxzTwFTZYM8ghWicVIX1cRWzj7Nd8i6AqqX2p+IYao= 297 | modernc.org/token v1.0.0 h1:a0jaWiNMDhDUtqOj09wvjWWAqd3q7WpBulmL9H2egsk= 298 | modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= 299 | modernc.org/z v1.5.1 h1:RTNHdsrOpeoSeOF4FbzTo8gBYByaJ5xT7NgZ9ZqRiJM= 300 | --------------------------------------------------------------------------------