├── .envrc ├── .envrc.local.example ├── .github └── workflows │ ├── build.yml │ ├── deploy.yml │ └── format.yml ├── .gitignore ├── .golangci.yml ├── .mergify.yml ├── LICENSE ├── README.md ├── aboutpage.go ├── algorithmspage.go ├── app.go ├── archive.go ├── auth.go ├── canonicaldomain.go ├── compare-against-random-voter.sql ├── database.go ├── devbox.json ├── devbox.lock ├── domain_penalties.go ├── fly.toml ├── frontpage.go ├── go.mod ├── go.sum ├── health.go ├── httpserver.go ├── init.sql ├── justfile ├── logger.go ├── logo.svg ├── main.go ├── middleware.go ├── migrate-volume.sh ├── position.go ├── postprocessing.go ├── prometheus.go ├── rankcrawler.go ├── reset-prior-average-upvote-rate.sql ├── resources.go ├── score-page.go ├── scoring-formula.go ├── scraper.go ├── seed └── domain-penalties.csv ├── sql ├── cumulative-upvotes.sql ├── previous-crawl-index-old.sql ├── previous-crawl.sql ├── qnranks.sql ├── random-new-voter.sql ├── random-top-voter.sql ├── raw-ranks.sql ├── resubmissions.sql └── upvote-rates.sql ├── static ├── android-chrome-192x192.png ├── android-chrome-512x512.png ├── apple-touch-icon.png ├── browserconfig.xml ├── chart-646.png ├── expected-upvotes.png ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── hn-top-page-upvotes-by-rank.png ├── hn-top-page-votehistogram.svg ├── logo.svg ├── mstile-144x144.png ├── mstile-150x150.png ├── mstile-310x150.png ├── mstile-310x310.png ├── mstile-70x70.png ├── rank-history.png ├── safari-pinned-tab.svg ├── site.webmanifest ├── upvote-rate.png └── upvote-share-by-rank.png ├── statspage.go ├── storage.go ├── story-details.go ├── storyplot-data.go ├── templates.go ├── templates ├── about-content.html.tmpl ├── about.html.tmpl ├── algorithms-content.html.tmpl ├── header.html.tmpl ├── index.html.tmpl ├── normalize.css.tmpl ├── ranksPlot.js.tmpl ├── score.html.tmpl ├── scorePlot.js.tmpl ├── spinner.css.tmpl ├── stats.html.tmpl ├── storyDetails.html.tmpl ├── storyplots.js.tmpl ├── styles.css.tmpl ├── upvoteRatePlot.js.tmpl ├── upvotesPlot.js.tmpl ├── vote.html.tmpl └── vote.js.tmpl ├── timeout.go ├── upvote-rate-model.go ├── upvotes-db.sh ├── utils.go ├── vote.go ├── voting-notes.md └── watch.sh /.envrc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 1) Enable devbox environment 4 | eval "$(devbox generate direnv --print-envrc)" 5 | 6 | # 2) Any shared env variables go here 7 | export SQLITE_DATA_DIR=data 8 | export CACHE_SIZE=100 9 | export LISTEN_ADDRESS=127.0.0.1 10 | export PORT=8080 11 | export R2_BUCKET=news-archive-dev 12 | export R2_USE_SSL=true 13 | export R2_ENDPOINT=https://9e2da4e2b5c6dd05d36f399d4afc7d4c.r2.cloudflarestorage.com 14 | 15 | # 3) Only on macOS unify DEVELOPER_DIR / DEVELOPER_DIR_FOR_TARGET 16 | if [[ "$(uname)" == "Darwin" ]]; then 17 | # Devbox may set both DEVELOPER_DIR and DEVELOPER_DIR_FOR_TARGET to different paths. 18 | # cgo doesn't like that, so unify them. 19 | if [[ -n "$DEVELOPER_DIR" && -n "$DEVELOPER_DIR_FOR_TARGET" ]]; then 20 | export DEVELOPER_DIR_FOR_TARGET="$DEVELOPER_DIR" 21 | fi 22 | fi 23 | 24 | # 4) If there's a local override file, load it 25 | if [[ -f .envrc.local ]]; then 26 | source .envrc.local 27 | echo "Successfully loaded .envrc.local" 28 | fi 29 | 30 | echo "Successfully loaded .envrc" 31 | -------------------------------------------------------------------------------- /.envrc.local.example: -------------------------------------------------------------------------------- 1 | # If you use nix, you can run the nix-shell directly with the following command 2 | if command -v nix &> /dev/null 3 | then 4 | use nix 5 | # you can add parameters to the nix-shell as well, e.g. 6 | # use nix --command zsh 7 | # if you use lorri, replace `use nix` with (see https://github.com/nix-community/lorri) 8 | # eval "$(lorri direnv)" 9 | fi 10 | 11 | export R2_ACCESS_KEY_ID="DEV.ACCESS.KEY.ID" 12 | export R2_SECRET_ACCESS_KEY="DEV.SECRET.ACCESS.KEY" 13 | 14 | echo "Successfully loaded .envrc.local" 15 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | tags: [v*] 7 | pull_request: 8 | types: [opened, synchronize] 9 | workflow_dispatch: 10 | 11 | permissions: 12 | contents: read 13 | 14 | # automatically cancel previous runs on the same PR 15 | # https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre/67939898#67939898 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | build: 22 | name: "Build" 23 | runs-on: ubuntu-22.04 24 | steps: 25 | - uses: actions/checkout@v3 26 | with: 27 | # https://github.com/actions/checkout/issues/626 28 | # This is correct, because we're using a merge queue (mergify) which only merges when built against the latest target branch. 29 | # https://docs.mergify.com/actions/queue/ 30 | ref: ${{ github.event.pull_request.head.sha }} 31 | - uses: actions/setup-go@v3 32 | with: 33 | go-version-file: go.mod 34 | cache: true 35 | - run: go build *.go 36 | - name: Check if working directory is clean 37 | run: git diff --quiet --exit-code || (git status && false) 38 | 39 | lint: 40 | name: "Lint" 41 | runs-on: ubuntu-22.04 42 | steps: 43 | - uses: actions/checkout@v3 44 | with: 45 | # https://github.com/actions/checkout/issues/626 46 | # This is correct, because we're using a merge queue (mergify) which only merges when built against the latest target branch. 47 | # https://docs.mergify.com/actions/queue/ 48 | ref: ${{ github.event.pull_request.head.sha }} 49 | - uses: actions/setup-go@v3 50 | with: 51 | go-version-file: go.mod 52 | cache: true 53 | - name: golangci-lint 54 | uses: golangci/golangci-lint-action@v3 55 | with: 56 | version: v1.50.1 57 | - name: Check if go code is formatted 58 | run: | 59 | UNFORMATTED_FILES=$(gofmt -l .) 60 | test -z $UNFORMATTED_FILES || (echo -e "Go code not formatted:\n$UNFORMATTED_FILES\n"; exit 1) 61 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | env: 7 | FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} 8 | jobs: 9 | deploy: 10 | name: Deploy app 11 | runs-on: ubuntu-22.04 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: superfly/flyctl-actions/setup-flyctl@master 15 | - run: flyctl deploy 16 | -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- 1 | name: Formatter 2 | 3 | on: 4 | pull_request: 5 | types: [opened] 6 | issue_comment: 7 | types: [created] 8 | 9 | jobs: 10 | format: 11 | name: "Format" 12 | runs-on: ubuntu-22.04 13 | if: github.event.issue.pull_request 14 | steps: 15 | - uses: khan/pull-request-comment-trigger@v1.1.0 16 | id: check 17 | with: 18 | trigger: '/format' 19 | reaction: "+1" # Reaction must be one of the reactions here: https://developer.github.com/v3/reactions/#reaction-types 20 | env: 21 | GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' 22 | 23 | - uses: actions/checkout@v3 24 | if: steps.check.outputs.triggered == 'true' 25 | 26 | - name: Check out PR 27 | if: steps.check.outputs.triggered == 'true' 28 | run: gh pr checkout ${{ github.event.issue.number }} 29 | env: 30 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 31 | 32 | - uses: actions/setup-go@v3 33 | with: 34 | go-version-file: go.mod 35 | cache: true 36 | 37 | - name: Format Go Code 38 | if: steps.check.outputs.triggered == 'true' 39 | run: go fmt . 40 | 41 | - name: Commit changes 42 | if: steps.check.outputs.triggered == 'true' 43 | run: | 44 | git config user.name "GitHub Actions Bot" 45 | git config user.email "<>" 46 | 47 | git status 48 | git diff --stat 49 | git commit -am "chore: format code" 50 | 51 | git log --oneline --max-count=10 52 | 53 | git push 54 | 55 | - uses: khan/pull-request-comment-trigger@v1.1.0 56 | if: failure() 57 | with: 58 | trigger: '/format' 59 | reaction: "confused" # Reaction must be one of the reactions here: https://developer.github.com/v3/reactions/#reaction-types 60 | env: 61 | GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # mac 2 | .DS_Store 3 | 4 | # other? 5 | .history 6 | 7 | #general 8 | /node_modules 9 | /data 10 | /.envrc.local 11 | 12 | personal-notes.md 13 | working-query.sql -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | disable: 3 | - staticcheck 4 | 5 | run: 6 | skip-dirs: 7 | - "go/pkg/mod" 8 | - "/Cellar/go" 9 | -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | queue_rules: 2 | - name: Merge dependency-update PRs 3 | queue_conditions: 4 | - label=dependencies 5 | - base=master 6 | merge_conditions: [] 7 | merge_method: squash 8 | 9 | - name: Merge PRs using label (rebase) 10 | queue_conditions: 11 | - label=ready-to-merge-rebase 12 | - base=master 13 | - "#review-requested=0" 14 | - "#changes-requested-reviews-by=0" 15 | - "#review-threads-unresolved=0" 16 | merge_conditions: [] 17 | merge_method: rebase 18 | 19 | - name: Merge PRs using label (squash) 20 | queue_conditions: 21 | - label=ready-to-merge-squash 22 | - base=master 23 | - "#review-requested=0" 24 | - "#changes-requested-reviews-by=0" 25 | - "#review-threads-unresolved=0" 26 | merge_conditions: [] 27 | merge_method: squash 28 | 29 | pull_request_rules: 30 | - name: All PRs into queue 31 | conditions: [] 32 | actions: 33 | queue: 34 | -------------------------------------------------------------------------------- /aboutpage.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | type AboutPageData struct { 10 | PageTemplateData 11 | } 12 | 13 | func (d AboutPageData) IsAboutPage() bool { 14 | return true 15 | } 16 | 17 | func (app app) aboutHandler() func(http.ResponseWriter, *http.Request, struct{}) error { 18 | return func(w http.ResponseWriter, r *http.Request, p struct{}) error { 19 | w.Header().Set("Content-Type", "text/html; charset=utf-8") 20 | 21 | err := templates.ExecuteTemplate(w, "about.html.tmpl", AboutPageData{PageTemplateData{UserID: app.getUserID(r)}}) 22 | 23 | return errors.Wrap(err, "executing algorithms page template") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /algorithmspage.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | type AlgorithmsPageData struct { 10 | PageTemplateData 11 | } 12 | 13 | func (d AlgorithmsPageData) IsAlgorithmsPage() bool { 14 | return true 15 | } 16 | 17 | func (app app) algorithmsHandler() func(http.ResponseWriter, *http.Request, struct{}) error { 18 | return func(w http.ResponseWriter, r *http.Request, p struct{}) error { 19 | w.Header().Set("Content-Type", "text/html; charset=utf-8") 20 | 21 | err := templates.ExecuteTemplate(w, "about.html.tmpl", AlgorithmsPageData{PageTemplateData{UserID: app.getUserID(r)}}) 22 | 23 | return errors.Wrap(err, "executing Algorithms page template") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /app.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "os" 7 | "strconv" 8 | "time" 9 | 10 | "github.com/johnwarden/hn" 11 | "golang.org/x/exp/slog" 12 | 13 | retryablehttp "github.com/hashicorp/go-retryablehttp" 14 | ) 15 | 16 | type app struct { 17 | ndb newsDatabase 18 | hnClient *hn.Client 19 | httpClient *http.Client 20 | logger *slog.Logger 21 | cacheSize int 22 | archiveTriggerChan chan context.Context 23 | } 24 | 25 | func initApp() app { 26 | var err error 27 | var cacheSize int 28 | { 29 | s := os.Getenv("CACHE_SIZE") 30 | if s != "" { 31 | cacheSize, err = strconv.Atoi(s) 32 | if err != nil { 33 | LogFatal(slog.Default(), "CACHE_SIZE", err) 34 | } 35 | } 36 | } 37 | 38 | logLevelString := os.Getenv("LOG_LEVEL") 39 | logFormatString := os.Getenv("LOG_FORMAT") 40 | logger := newLogger(logLevelString, logFormatString) 41 | 42 | sqliteDataDir := os.Getenv("SQLITE_DATA_DIR") 43 | if sqliteDataDir == "" { 44 | panic("SQLITE_DATA_DIR not set") 45 | } 46 | 47 | db, err := openNewsDatabase(sqliteDataDir) 48 | if err != nil { 49 | LogFatal(logger, "openNewsDatabase", err) 50 | } 51 | 52 | retryClient := retryablehttp.NewClient() 53 | retryClient.RetryMax = 3 54 | retryClient.RetryWaitMin = 1 * time.Second 55 | retryClient.RetryWaitMax = 5 * time.Second 56 | 57 | retryClient.Logger = wrapLoggerForRetryableHTTPClient(logger) 58 | 59 | httpClient := retryClient.StandardClient() 60 | 61 | hnClient := hn.NewClient(httpClient) 62 | 63 | return app{ 64 | httpClient: httpClient, 65 | hnClient: hnClient, 66 | logger: logger, 67 | ndb: db, 68 | cacheSize: cacheSize, 69 | archiveTriggerChan: make(chan context.Context, 1), 70 | } 71 | } 72 | 73 | func (app app) cleanup() { 74 | app.ndb.close() 75 | } 76 | -------------------------------------------------------------------------------- /auth.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "math/rand" 6 | "net/http" 7 | "strconv" 8 | 9 | "github.com/johnwarden/httperror" 10 | "github.com/pkg/errors" 11 | ) 12 | 13 | func (app app) getUserID(r *http.Request) sql.NullInt64 { 14 | var id sql.NullInt64 15 | 16 | cookie, err := r.Cookie("userID") 17 | if err != nil { 18 | if !errors.Is(err, http.ErrNoCookie) { 19 | app.logger.Error("r.Cookie('UserID'", err) 20 | } 21 | return id 22 | } 23 | 24 | idInt, err := strconv.Atoi(cookie.Value) 25 | if err != nil { 26 | app.logger.Error("Parsing cookie", err) 27 | } 28 | 29 | id.Int64 = int64(idInt) 30 | id.Valid = true 31 | 32 | return id 33 | } 34 | 35 | type loginParams struct { 36 | UserID sql.NullInt64 37 | } 38 | 39 | func (app app) loginHandler() func(http.ResponseWriter, *http.Request, loginParams) error { 40 | return func(w http.ResponseWriter, r *http.Request, p loginParams) error { 41 | userID := p.UserID 42 | 43 | if !userID.Valid { 44 | loggedInUserID := app.getUserID(r) 45 | if loggedInUserID.Valid { 46 | http.Redirect(w, r, "/", http.StatusTemporaryRedirect) 47 | return nil 48 | } 49 | 50 | // Assign a random user ID if none specified as parameter 51 | userID.Int64 = rand.Int63() 52 | userID.Valid = true 53 | } 54 | 55 | if userID.Int64 == 0 { 56 | return httperror.PublicErrorf(http.StatusUnauthorized, "Can't login as user 0") 57 | } 58 | 59 | setUserIDCookie(w, userID) 60 | 61 | http.Redirect(w, r, "/score", http.StatusTemporaryRedirect) 62 | 63 | return nil 64 | } 65 | } 66 | 67 | func (app app) logoutHandler() func(http.ResponseWriter, *http.Request, struct{}) error { 68 | return func(w http.ResponseWriter, r *http.Request, p struct{}) error { 69 | var userID sql.NullInt64 70 | setUserIDCookie(w, userID) 71 | 72 | http.Redirect(w, r, "/", http.StatusTemporaryRedirect) 73 | 74 | return nil 75 | } 76 | } 77 | 78 | func setUserIDCookie(w http.ResponseWriter, userID sql.NullInt64) { 79 | value := strconv.Itoa(int(userID.Int64)) 80 | maxAge := 365 * 24 * 60 * 60 81 | if !userID.Valid { 82 | maxAge = -1 83 | value = "" 84 | } 85 | 86 | cookie := http.Cookie{ 87 | Name: "userID", 88 | Value: value, 89 | Path: "/", 90 | MaxAge: maxAge, 91 | HttpOnly: true, 92 | Secure: true, 93 | SameSite: http.SameSiteLaxMode, 94 | } 95 | 96 | // Use the http.SetCookie() function to send the cookie to the client. 97 | // Behind the scenes this adds a `Set-Cookie` header to the response 98 | // containing the necessary cookie data. 99 | http.SetCookie(w, &cookie) 100 | } 101 | -------------------------------------------------------------------------------- /canonicaldomain.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "strings" 7 | 8 | "github.com/johnwarden/httperror" 9 | ) 10 | 11 | var nonCanonicalDomains = map[string]string{ 12 | "social-protocols-news.fly.dev": "news.social-protocols.org", 13 | "127.0.0.1:8080": "localhost:8080", // just for testing 14 | } 15 | 16 | var canonicalDomains = getValues(nonCanonicalDomains) 17 | 18 | func (app app) canonicalDomainMiddleware(handler http.Handler) http.Handler { 19 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 20 | // Redirect any non-canonical domain to the corresponding canonical domain. 21 | for nonCanonicalDomain, canonicalDomain := range nonCanonicalDomains { 22 | if r.Host == nonCanonicalDomain { 23 | url := "https://" + canonicalDomain + r.RequestURI 24 | http.Redirect(w, r, url, http.StatusMovedPermanently) 25 | return 26 | } 27 | } 28 | isCanonical := false 29 | for _, canonicalDomain := range canonicalDomains { 30 | if strings.HasPrefix(r.Host, canonicalDomain) { 31 | isCanonical = true 32 | break 33 | } 34 | } 35 | if !isCanonical { 36 | httperror.DefaultErrorHandler(w, httperror.New(http.StatusForbidden, fmt.Sprintf("Invalid request host: %s", r.Host))) 37 | return 38 | } 39 | 40 | handler.ServeHTTP(w, r) 41 | }) 42 | } 43 | -------------------------------------------------------------------------------- /compare-against-random-voter.sql: -------------------------------------------------------------------------------- 1 | with parameters as ( 2 | select 3 | 1.50 as priorWeight 4 | , 0.003462767 as fatigueFactor 5 | ), 6 | stories as ( 7 | select 8 | id 9 | , votes.entryTime is not null as mystory 10 | , entryUpvoteRate 11 | , max(cumulativeUpvotes) as cumulativeUpvotes 12 | , max(cumulativeExpectedUpvotes) as cumulativeExpectedUpvotes 13 | , max(score) as score 14 | , (cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) qualityScore 15 | 16 | , log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight))*100 gain 17 | 18 | 19 | from dataset 20 | join parameters 21 | left join votes on 22 | votes.userID = 1 23 | and votes.storyID = dataset.id 24 | 25 | 26 | -- where id >= (select min(storyID) from votes where userID = 1 and storyID > 36754601) and id <= (select max(storyID) from votes where userID = 1 and storyID > 36754601) 27 | -- where id >= (select min(storyID) from votes where userID = 1 and storyID > 36780531) and id <= (select max(storyID) from votes where userID = 1 and storyID > 36780531) 28 | -- where id >= (select min(storyID) from votes where userID = 1) 29 | where id >= (select min(storyID) from votes where userID = 1) and id <= (select max(storyID) from votes where userID = 1) 30 | 31 | -- and id <= (select max(storyID) from votes where userID = 1) 32 | 33 | group by id 34 | ) 35 | 36 | -- select * from stories where id = 36805284; 37 | 38 | 39 | 40 | , sums as ( 41 | select 42 | sum(case when mystory then cumulativeUpvotes else null end) as myCumulativeUpvotes 43 | , sum(case when mystory then cumulativeExpectedUpvotes else null end) as myCumulativeExpectedUpvotes 44 | , avg(case when mystory then score else null end) as myAverageScore 45 | , avg(case when mystory then cumulativeUpvotes / cumulativeExpectedUpvotes else null end) as myAverageUpvoteRate 46 | 47 | -- The below doesn't make sense. Because cumulativeUpvotes are sometimes 0, and the log of 0 is not defined. 48 | -- , exp(avg(case when mystory then log(cumulativeUpvotes / cumulativeExpectedUpvotes) else null end)) as myGeoAverageUpvoteRate 49 | 50 | 51 | -- , sum(case when votes.entryTime is not null then score-1 else null end)/count(distinct votes.storyID) as myAverageScore 52 | , sum(cumulativeUpvotes) as overallCumulativeUpvotes 53 | , sum(cumulativeExpectedUpvotes) as overallCumulativeExpectedUpvotes 54 | , avg(score) as overallAverageScore 55 | , avg(cumulativeUpvotes / cumulativeExpectedUpvotes) as overallAverageUpvoteRate 56 | 57 | -- The below doesn't make sense. Because cumulativeUpvotes are sometimes 0, and the log of 0 is not defined. 58 | -- , exp(avg(log(cumulativeUpvotes / cumulativeExpectedUpvotes))) as overallGeoAverageUpvoteRate 59 | 60 | 61 | , exp(avg(log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight)))) geoAverageQualityScore 62 | 63 | 64 | , sum(log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight)) )*100 baselineGain 65 | 66 | 67 | -- , exp(avg(log((cumulativeUpvotes + priorWeight)/(cumulativeExpectedUpvotes + priorWeight)))) geoAverageQualityScore 68 | 69 | 70 | -- , sum(case when votes.entryTime is null then score-1 else null end)/(count(distinct dataset.id) - count(distinct votes.storyID)) as overallAverageScore 71 | from stories 72 | join parameters 73 | ) 74 | select 75 | -- * 76 | myAverageScore 77 | , myAverageUpvoteRate 78 | , myCumulativeUpvotes/myCumulativeExpectedUpvotes as myUpvoteRate 79 | , overallAverageScore 80 | , overallAverageUpvoteRate 81 | , overallCumulativeUpvotes/overallCumulativeExpectedUpvotes as overallUpvoteRate 82 | , geoAverageQualityScore 83 | , baselineGain 84 | from sums; 85 | 86 | 87 | -- Discussion: The geomean quality score is close to 1, as expected. The average score is greater than 1, because that's what will happen 88 | -- if you take the average of exp(x) when the average of x is 0. FOr example in R: 89 | -- (ins)> x = rnorm(10000, mean=0, sd=2) 90 | -- (ins)> mean(x) 91 | -- [1] -0.007797868 92 | -- (ins)> mean(exp(x)) 93 | -- [1] 9.844065 94 | -------------------------------------------------------------------------------- /devbox.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.13.4/.schema/devbox.schema.json", 3 | "packages": [ 4 | "entr@latest", 5 | "git@latest", 6 | "gcc@latest", 7 | "gotools@latest", 8 | "golangci-lint@latest", 9 | "sqlite-interactive@latest", 10 | "go@latest", 11 | "just@latest" 12 | ], 13 | } 14 | -------------------------------------------------------------------------------- /domain_penalties.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/csv" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | 10 | "github.com/pkg/errors" 11 | "gorm.io/driver/sqlite" 12 | "gorm.io/gorm" 13 | "gorm.io/gorm/clause" 14 | ) 15 | 16 | type DomainPenalty struct { 17 | Domain string `gorm:"primaryKey"` 18 | AvgPenalty float64 19 | } 20 | 21 | func (ndb newsDatabase) importPenaltiesData(sqliteDataDir string) error { 22 | frontpageDatabaseFilename := fmt.Sprintf("%s/%s", sqliteDataDir, sqliteDataFilename) 23 | 24 | db, err := gorm.Open(sqlite.Open(frontpageDatabaseFilename), &gorm.Config{}) 25 | if err != nil { 26 | panic("failed to connect database") 27 | } 28 | 29 | err = db.AutoMigrate(&DomainPenalty{}) 30 | if err != nil { 31 | return errors.Wrap(err, "db.AutoMigrate Domain Penalties table") 32 | } 33 | 34 | // Open domain penalty seed data file as CSV 35 | b, _ := resources.ReadFile("seed/domain-penalties.csv") 36 | buf := bytes.NewBuffer(b) 37 | r := csv.NewReader(buf) 38 | 39 | // Read the header row. 40 | _, err = r.Read() 41 | if err != nil { 42 | return errors.Wrap(err, "missing header row in domain penalties data") 43 | } 44 | 45 | for { 46 | record, err := r.Read() 47 | if err != nil { 48 | if errors.Is(err, io.EOF) { 49 | break 50 | } 51 | return errors.Wrapf(err, "Parsing penalty CSV") 52 | } 53 | 54 | avgPenalty, err := strconv.ParseFloat(record[1], 64) 55 | if err != nil { 56 | return errors.Wrapf(err, "Parsing penalty record %s, %s", record[0], record[1]) 57 | } 58 | err = db.Clauses(clause.OnConflict{ // adding this onConflict clause makes the create into an upsert 59 | UpdateAll: true, 60 | }).Create(&DomainPenalty{Domain: record[0], AvgPenalty: avgPenalty}).Error 61 | 62 | if err != nil { 63 | return errors.Wrapf(err, "Parsing inserting domain penalty %s, %f", record[0], avgPenalty) 64 | } 65 | 66 | } 67 | 68 | return nil 69 | } 70 | -------------------------------------------------------------------------------- /fly.toml: -------------------------------------------------------------------------------- 1 | # fly.toml file generated for social-protocols-news on 2022-09-14T17:00:08+02:00 2 | 3 | app = "social-protocols-news" 4 | kill_signal = "SIGINT" 5 | kill_timeout = 5 6 | processes = [] 7 | primary_region = "ewr" 8 | 9 | [build] 10 | builder = "paketobuildpacks/builder:base" 11 | buildpacks = ["gcr.io/paketo-buildpacks/go"] 12 | 13 | [env] 14 | PORT = "8080" 15 | SQLITE_DATA_DIR="/data" 16 | LOG_LEVEL="DEBUG" 17 | CACHE_SIZE="100" 18 | R2_BUCKET="news-archive" 19 | R2_USE_SSL="true" 20 | R2_ENDPOINT="https://9e2da4e2b5c6dd05d36f399d4afc7d4c.r2.cloudflarestorage.com" 21 | 22 | [experimental] 23 | allowed_public_ports = [] 24 | auto_rollback = true 25 | 26 | [[services]] 27 | http_checks = [] 28 | internal_port = 8080 29 | processes = ["app"] 30 | protocol = "tcp" 31 | script_checks = [] 32 | [services.concurrency] 33 | hard_limit = 25 34 | soft_limit = 20 35 | type = "connections" 36 | 37 | [[services.ports]] 38 | force_https = true 39 | handlers = ["http"] 40 | port = 80 41 | 42 | [[services.ports]] 43 | handlers = ["tls", "http"] 44 | port = 443 45 | 46 | [[services.tcp_checks]] 47 | grace_period = "1s" 48 | interval = "15s" 49 | restart_limit = 0 50 | timeout = "2s" 51 | 52 | 53 | 54 | # flyctl volumes create data --region ewr --size 3 55 | [[mounts]] 56 | source = "data3" 57 | destination = "/data" 58 | 59 | # prometheus metrics 60 | [metrics] 61 | port = 9091 62 | path = "/metrics" 63 | 64 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/social-protocols/news 2 | 3 | go 1.22 4 | 5 | toolchain go1.23.3 6 | 7 | require ( 8 | github.com/NYTimes/gziphandler v1.1.1 9 | github.com/VictoriaMetrics/metrics v1.23.0 10 | github.com/dustin/go-humanize v1.0.1 11 | github.com/gocolly/colly/v2 v2.1.0 12 | github.com/gorilla/schema v1.2.0 13 | github.com/hashicorp/go-retryablehttp v0.7.1 14 | github.com/johnwarden/hn v1.0.1 15 | github.com/johnwarden/httperror v1.6.0 16 | github.com/julienschmidt/httprouter v1.3.0 17 | github.com/mattn/go-sqlite3 v1.14.15 18 | github.com/minio/minio-go/v7 v7.0.80 19 | github.com/multiprocessio/go-sqlite3-stdlib v0.0.0-20220822170115-9f6825a1cd25 20 | github.com/pkg/errors v0.9.1 21 | github.com/weppos/publicsuffix-go v0.20.0 22 | golang.org/x/exp v0.0.0-20221114191408-850992195362 23 | gonum.org/v1/gonum v0.12.0 24 | gorm.io/driver/sqlite v1.4.3 25 | gorm.io/gorm v1.24.2 26 | ) 27 | 28 | //replace github.com/johnwarden/httperror v1.6.0 => ../httperror 29 | //replace "github.com/johnwarden/hn" v1.0.1 => "../hn" 30 | 31 | require ( 32 | github.com/PuerkitoBio/goquery v1.5.1 // indirect 33 | github.com/alitto/pond/v2 v2.1.4 // indirect 34 | github.com/andybalholm/cascadia v1.2.0 // indirect 35 | github.com/antchfx/htmlquery v1.2.3 // indirect 36 | github.com/antchfx/xmlquery v1.2.4 // indirect 37 | github.com/antchfx/xpath v1.1.8 // indirect 38 | github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect 39 | github.com/fatih/color v1.13.0 // indirect 40 | github.com/go-ini/ini v1.67.0 // indirect 41 | github.com/gobwas/glob v0.2.3 // indirect 42 | github.com/goccy/go-json v0.10.3 // indirect 43 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect 44 | github.com/golang/protobuf v1.4.2 // indirect 45 | github.com/google/uuid v1.6.0 // indirect 46 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 47 | github.com/hashicorp/go-hclog v0.16.2 // indirect 48 | github.com/jinzhu/inflection v1.0.0 // indirect 49 | github.com/jinzhu/now v1.1.5 // indirect 50 | github.com/kennygrant/sanitize v1.2.4 // indirect 51 | github.com/klauspost/compress v1.17.11 // indirect 52 | github.com/klauspost/cpuid/v2 v2.2.8 // indirect 53 | github.com/mattn/go-colorable v0.1.13 // indirect 54 | github.com/mattn/go-isatty v0.0.16 // indirect 55 | github.com/minio/md5-simd v1.1.2 // indirect 56 | github.com/rs/xid v1.6.0 // indirect 57 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 58 | github.com/temoto/robotstxt v1.1.1 // indirect 59 | github.com/valyala/fastrand v1.1.0 // indirect 60 | github.com/valyala/histogram v1.2.0 // indirect 61 | golang.org/x/crypto v0.28.0 // indirect 62 | golang.org/x/net v0.30.0 // indirect 63 | golang.org/x/sys v0.26.0 // indirect 64 | golang.org/x/text v0.19.0 // indirect 65 | google.golang.org/appengine v1.6.6 // indirect 66 | google.golang.org/protobuf v1.24.0 // indirect 67 | ) 68 | -------------------------------------------------------------------------------- /health.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/pkg/errors" 9 | ) 10 | 11 | const alertAfterMinutes = 5 12 | 13 | func (app app) healthHandler() func(http.ResponseWriter, *http.Request, loginParams) error { 14 | return func(w http.ResponseWriter, r *http.Request, p loginParams) error { 15 | w.Header().Set("Content-Type", "text/plain; charset=utf-8") 16 | 17 | if r.Method != http.MethodHead { 18 | _, err := w.Write([]byte("ok")) 19 | if err != nil { 20 | return errors.Wrap(err, "writing response") 21 | } 22 | } 23 | 24 | return nil 25 | } 26 | } 27 | 28 | func (app app) crawlHealthHandler() func(http.ResponseWriter, *http.Request, loginParams) error { 29 | return func(w http.ResponseWriter, r *http.Request, p loginParams) error { 30 | w.Header().Set("Content-Type", "text/plain; charset=utf-8") 31 | 32 | lastSampleTime, err := app.ndb.selectLastCrawlTime() 33 | if err != nil { 34 | return errors.Wrap(err, "getting last crawl time") 35 | } 36 | 37 | if time.Now().Unix()-int64(lastSampleTime) > alertAfterMinutes*60 { 38 | return fmt.Errorf("last successful crawl of %d is more than %d minutes ago", lastSampleTime, alertAfterMinutes) 39 | } 40 | 41 | if r.Method != http.MethodHead { 42 | _, err = w.Write([]byte("ok")) 43 | if err != nil { 44 | return errors.Wrap(err, "writing response") 45 | } 46 | } 47 | 48 | return nil 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /httpserver.go: -------------------------------------------------------------------------------- 1 | //nolint:typecheck 2 | package main 3 | 4 | import ( 5 | "embed" 6 | "io/fs" 7 | "net/http" 8 | "os" 9 | "time" 10 | 11 | "github.com/julienschmidt/httprouter" 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | const ( 16 | // writeTimeout = 2500 * time.Millisecond 17 | writeTimeout = 60 * time.Second 18 | readHeaderTimeout = 5 * time.Second 19 | ) 20 | 21 | //go:embed static 22 | var staticFS embed.FS 23 | 24 | func (app app) httpServer(onPanic func(error)) *http.Server { 25 | l := app.logger 26 | 27 | port := os.Getenv("PORT") 28 | if port == "" { 29 | port = "8080" 30 | } 31 | 32 | listenAddress := os.Getenv("LISTEN_ADDRESS") 33 | 34 | staticRoot, err := fs.Sub(staticFS, "static") 35 | if err != nil { 36 | LogFatal(l, "fs.Sub", err) 37 | } 38 | 39 | server := &http.Server{ 40 | Addr: listenAddress + ":" + port, 41 | WriteTimeout: writeTimeout - 100*time.Millisecond, 42 | ReadHeaderTimeout: readHeaderTimeout, 43 | } 44 | 45 | router := httprouter.New() 46 | router.GET("/static/*filepath", app.serveFiles(http.FS(staticRoot))) 47 | 48 | router.GET("/", middleware("hntop", l, onPanic, app.frontpageHandler("hntop"))) 49 | router.GET("/new", middleware("new", l, onPanic, app.frontpageHandler("new"))) 50 | router.GET("/top", middleware("top", l, onPanic, app.frontpageHandler("hntop"))) 51 | router.GET("/best", middleware("best", l, onPanic, app.frontpageHandler("best"))) 52 | router.GET("/ask", middleware("ask", l, onPanic, app.frontpageHandler("ask"))) 53 | router.GET("/show", middleware("show", l, onPanic, app.frontpageHandler("show"))) 54 | router.GET("/raw", middleware("raw", l, onPanic, app.frontpageHandler("raw"))) 55 | router.GET("/fair", middleware("fair", l, onPanic, app.frontpageHandler("fair"))) 56 | router.GET("/upvoterate", middleware("upvoterate", l, onPanic, app.frontpageHandler("upvoterate"))) 57 | router.GET("/best-upvoterate", middleware("best-upvoterate", l, onPanic, app.frontpageHandler("best-upvoterate"))) 58 | router.GET("/penalties", middleware("penalties", l, onPanic, app.frontpageHandler("penalties"))) 59 | router.GET("/boosts", middleware("boosts", l, onPanic, app.frontpageHandler("boosts"))) 60 | router.GET("/resubmissions", middleware("resubmissions", l, onPanic, app.frontpageHandler("resubmissions"))) 61 | router.GET("/stats", middleware("stats", l, onPanic, app.statsHandler())) 62 | router.GET("/about", middleware("about", l, onPanic, app.aboutHandler())) 63 | router.GET("/algorithms", middleware("algorithms", l, onPanic, app.algorithmsHandler())) 64 | 65 | router.POST("/vote", middleware("upvote", l, onPanic, app.voteHandler())) 66 | 67 | router.GET("/score", middleware("score", l, onPanic, app.scoreHandler())) 68 | 69 | router.GET("/login", middleware("login", l, onPanic, app.loginHandler())) 70 | router.GET("/logout", middleware("logout", l, onPanic, app.logoutHandler())) 71 | 72 | router.GET("/health", middleware("health", l, onPanic, app.healthHandler())) 73 | router.HEAD("/health", middleware("health", l, onPanic, app.healthHandler())) 74 | router.GET("/crawl-health", middleware("crawl-health", l, onPanic, app.crawlHealthHandler())) 75 | router.HEAD("/crawl-health", middleware("crawl-health", l, onPanic, app.crawlHealthHandler())) 76 | 77 | server.Handler = app.preRouterMiddleware(router, writeTimeout-100*time.Millisecond) 78 | 79 | return server 80 | } 81 | 82 | func (app app) frontpageHandler(ranking string) func(http.ResponseWriter, *http.Request, OptionalFrontPageParams) error { 83 | return func(w http.ResponseWriter, r *http.Request, params OptionalFrontPageParams) error { 84 | w.Header().Set("Content-Type", "text/html; charset=utf-8") 85 | 86 | err := app.serveFrontPage(r, w, ranking, params.WithDefaults()) 87 | return errors.Wrap(err, "serveFrontPage") 88 | } 89 | } 90 | 91 | func (app app) statsHandler() func(http.ResponseWriter, *http.Request, StatsPageParams) error { 92 | return func(w http.ResponseWriter, r *http.Request, params StatsPageParams) error { 93 | w.Header().Set("Content-Type", "text/html; charset=utf-8") 94 | 95 | userID := app.getUserID(r) 96 | return app.statsPage(w, r, params, userID) 97 | } 98 | } 99 | 100 | func (app app) serveFiles(root http.FileSystem) func(w http.ResponseWriter, r *http.Request, p httprouter.Params) { 101 | fileServer := http.FileServer(root) 102 | 103 | return func(w http.ResponseWriter, r *http.Request, p httprouter.Params) { 104 | w.Header().Set("Cache-Control", "public, max-age=86400") // 1 hours 105 | r.URL.Path = p.ByName("filepath") 106 | fileServer.ServeHTTP(w, r) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /init.sql: -------------------------------------------------------------------------------- 1 | attach database 'file:/Users/jwarden/hacker-news-data-datadir/frontpage.sqlite?mode=ro' as frontpage; 2 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | set dotenv-load := true 2 | 3 | # List available recipes in the order in which they appear in this file 4 | _default: 5 | @just --list --unsorted 6 | 7 | watch: 8 | ./watch.sh 9 | 10 | sqlite: 11 | sqlite3 $SQLITE_DATA_DIR/frontpage.sqlite 12 | 13 | upvotes-db: 14 | ./upvotes-db.sh 15 | 16 | format: 17 | go fmt -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "golang.org/x/exp/slog" 9 | ) 10 | 11 | func newLogger(levelString, formatString string) *slog.Logger { 12 | if levelString == "" { 13 | levelString = "DEBUG" 14 | } 15 | 16 | logLevels := map[string]slog.Leveler{ 17 | "DEBUG": slog.DebugLevel, 18 | "INFO": slog.InfoLevel, 19 | "WARN": slog.WarnLevel, 20 | "ERROR": slog.ErrorLevel, 21 | } 22 | 23 | l, ok := logLevels[strings.ToUpper(levelString)] 24 | if !ok { 25 | panic("Unrecognized log level: " + levelString) 26 | } 27 | 28 | var lh slog.Handler 29 | 30 | if strings.ToUpper(formatString) == "JSON" { 31 | lh = slog.HandlerOptions{Level: l}.NewJSONHandler(os.Stdout) 32 | } else { 33 | lh = slog.HandlerOptions{Level: l}.NewTextHandler(os.Stdout) 34 | } 35 | 36 | logger := slog.New(lh) 37 | slog.SetDefault(logger) 38 | return logger 39 | } 40 | 41 | func LogErrorf(logger *slog.Logger, msg string, args ...interface{}) { 42 | logger.Error(fmt.Sprintf(msg, args...), nil) 43 | } 44 | 45 | func Debugf(logger *slog.Logger, msg string, args ...interface{}) { 46 | logger.Debug(fmt.Sprintf(msg, args...)) 47 | } 48 | 49 | func LogFatal(logger *slog.Logger, msg string, err error, args ...interface{}) { 50 | if len(args) > 0 { 51 | logger.Error(msg, err, args...) 52 | } else { 53 | logger.Error(msg, err) 54 | } 55 | os.Exit(2) 56 | } 57 | 58 | type retryableHTTPClientloggerWrapper struct { 59 | *slog.Logger 60 | } 61 | 62 | func (l retryableHTTPClientloggerWrapper) Error(msg string, keysAndValues ...interface{}) { 63 | l.Logger.Error("retryableHTTPClient: "+msg, nil, keysAndValues...) 64 | } 65 | 66 | func (l retryableHTTPClientloggerWrapper) Debug(msg string, keysAndValues ...interface{}) { 67 | // ignore very verbose debug output from retryableHTTPClientloggerWrapper 68 | } 69 | 70 | // wrapLoggerForRetryableHTTPClient wraps a logger so that it implements an interface required by retryableHTTPClient 71 | func wrapLoggerForRetryableHTTPClient(logger *slog.Logger) retryableHTTPClientloggerWrapper { 72 | // ignore debug messages from this retry client. 73 | l := slog.New(logger.Handler()) 74 | return retryableHTTPClientloggerWrapper{l} 75 | } 76 | -------------------------------------------------------------------------------- /logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 15 | 17 | 19 | 20 | 22 | image/svg+xml 23 | 25 | 26 | 27 | 28 | 54 | 61 | 68 | 75 | 76 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "os" 7 | "os/signal" 8 | "syscall" 9 | "time" 10 | 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | const maxShutDownTimeout = 5 * time.Second 15 | 16 | func main() { 17 | app := initApp() 18 | defer app.cleanup() 19 | 20 | logger := app.logger 21 | 22 | ctx, cancelContext := context.WithCancel(context.Background()) 23 | defer cancelContext() 24 | 25 | shutdownPrometheusServer := servePrometheusMetrics() 26 | 27 | // Start the archive worker 28 | go app.archiveWorker(ctx) 29 | 30 | // Listen for a soft kill signal (INT, TERM, HUP) 31 | c := make(chan os.Signal, 1) 32 | signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) 33 | 34 | // shutdown function call in case of 1) panic 2) soft kill signal 35 | var httpServer *http.Server // this variable included in shutdown closure 36 | 37 | shutdown := func() { 38 | // cancel the current background context 39 | cancelContext() 40 | 41 | err := shutdownPrometheusServer(ctx) 42 | if err != nil { 43 | logger.Error("shutdownPrometheusServer", err) 44 | } 45 | 46 | if httpServer != nil { 47 | logger.Info("Shutting down HTTP server") 48 | // shut down the HTTP server with a timeout in case the server doesn't want to shut down. 49 | // use background context, because we just cancelled ctx 50 | ctxWithTimeout, cancel := context.WithTimeout(context.Background(), maxShutDownTimeout) 51 | defer cancel() 52 | err := httpServer.Shutdown(ctxWithTimeout) 53 | if err != nil { 54 | logger.Error("httpServer.Shutdown", err) 55 | // if server doesn't respond to shutdown signal, nothing remains but to panic. 56 | panic("HTTP server shutdown failed") 57 | } 58 | 59 | logger.Info("HTTP server shutdown complete") 60 | } 61 | } 62 | 63 | go func() { 64 | sig := <-c 65 | 66 | // Clean shutdown 67 | logger.Info("Received shutdown signal", "signal", sig) 68 | shutdown() 69 | 70 | // now exit process 71 | logger.Info("Main loop exited. Terminating process") 72 | 73 | os.Exit(0) 74 | }() 75 | 76 | httpServer = app.httpServer( 77 | func(error) { 78 | logger.Info("Panic in HTTP handler. Shutting down") 79 | shutdown() 80 | os.Exit(2) 81 | }, 82 | ) 83 | 84 | go func() { 85 | logger.Info("HTTP server listening", "address", httpServer.Addr) 86 | err := httpServer.ListenAndServe() 87 | if err != nil && err != http.ErrServerClosed { 88 | logger.Error("server.ListenAndServe", err) 89 | } 90 | logger.Info("Server shut down") 91 | }() 92 | 93 | app.mainLoop(ctx) 94 | } 95 | 96 | func (app app) mainLoop(ctx context.Context) { 97 | logger := app.logger 98 | 99 | lastCrawlTime, err := app.ndb.selectLastCrawlTime() 100 | if err != nil { 101 | LogFatal(logger, "selectLastCrawlTime", err) 102 | } 103 | 104 | t := time.Now().Unix() 105 | 106 | elapsed := int(t) - lastCrawlTime 107 | 108 | // If it has been more than a minute since our last crawl, 109 | // then crawl right away. 110 | if elapsed >= 60 { 111 | logger.Info("60 seconds since last crawl. Crawling now.") 112 | if err = app.crawlAndPostprocess(ctx); err != nil { 113 | logger.Error("crawlAndPostprocess", err) 114 | 115 | if errors.Is(err, context.Canceled) { 116 | return 117 | } 118 | } 119 | } else { 120 | logger.Info("Less than 60 seconds since last crawl.", "waitSeconds", 60-time.Now().Unix()%60) 121 | } 122 | 123 | // And now set a ticker so we crawl every minute going forward 124 | ticker := make(chan int64) 125 | 126 | // Make the first tick happen at the next 127 | // Minute mark. 128 | go func() { 129 | t := time.Now().Unix() 130 | delay := 60 - t%60 131 | <-time.After(time.Duration(delay) * time.Second) 132 | ticker <- t + delay 133 | }() 134 | 135 | for { 136 | select { 137 | case <-ticker: 138 | t := time.Now().Unix() 139 | // Set the next tick at the minute mark. We use this instead of using 140 | // time.NewTicker because in dev mode our app can be suspended, and I 141 | // want to see all the timestamps in the DB as multiples of 60. 142 | delay := 60 - t%60 143 | nextTickTime := t + delay 144 | go func() { 145 | <-time.After(time.Duration(delay) * time.Second) 146 | ticker <- nextTickTime 147 | }() 148 | 149 | logger.Info("Beginning crawl") 150 | 151 | // Create a context with deadline for both crawl and idle period 152 | crawlCtx, cancel := context.WithDeadline(ctx, time.Unix(nextTickTime-1, 0)) 153 | defer cancel() 154 | 155 | if err = app.crawlAndPostprocess(crawlCtx); err != nil { 156 | logger.Error("crawlAndPostprocess", err) 157 | } else { 158 | app.logger.Info("Finished crawl and postprocess") 159 | 160 | // Only send idle context if we have enough time (at least 10 seconds) 161 | if delay >= 5 { 162 | // Try to send the same context to the archive worker 163 | select { 164 | case app.archiveTriggerChan <- crawlCtx: 165 | app.logger.Debug("Sent idle context to archive worker") 166 | default: 167 | app.logger.Debug("Archive trigger channel full, skipping signal") 168 | } 169 | } else { 170 | app.logger.Debug("Skipping idle context - not enough time", "delay", delay) 171 | } 172 | } 173 | 174 | case <-ctx.Done(): 175 | return 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /middleware.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "encoding/json" 6 | "net/http" 7 | "reflect" 8 | "strconv" 9 | "strings" 10 | "time" 11 | 12 | "github.com/pkg/errors" 13 | "golang.org/x/exp/slog" 14 | 15 | "github.com/julienschmidt/httprouter" 16 | 17 | "github.com/johnwarden/httperror" 18 | 19 | "github.com/gorilla/schema" 20 | 21 | "github.com/NYTimes/gziphandler" 22 | ) 23 | 24 | // middleware converts a handler of type httperror.XHandlerFunc[P] into an 25 | // httprouter.Handle. We use the former type for our http handler functions: 26 | // this is a clean function signature that accepts parameters as a struct and 27 | // returns an error. But we need to pass an httprouter.Handle to our router. 28 | // So we wrap our httperror.XHandlerFunc[P], parsing the URL parameters to 29 | // produce the parameter struct, passing it to the inner handler, then 30 | // handling any errors that are returned. 31 | func middleware[P any](routeName string, logger *slog.Logger, onPanic func(error), h httperror.XHandlerFunc[P]) httprouter.Handle { 32 | h = httperror.XPanicMiddleware[P](h) 33 | 34 | h = prometheusMiddleware[P](routeName, h) 35 | 36 | handleError := func(w http.ResponseWriter, err error) { 37 | if errors.Is(err, httperror.Panic) { 38 | // do this in a goroutine otherwise we get deadlock if onPanic shuts downs the HTTP server 39 | // because the http server shutdown function will wait for all requests to terminate, 40 | // including this one! 41 | go onPanic(err) 42 | } 43 | httperror.DefaultErrorHandler(w, err) 44 | } 45 | 46 | return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { 47 | var params P 48 | err := unmarshalRouterRequest(r, ps, ¶ms) 49 | if err != nil { 50 | err = httperror.Wrap(err, http.StatusBadRequest) 51 | logger.Error("unmarshalRouterRequest", err, "url", r.URL) 52 | handleError(w, err) 53 | return 54 | } 55 | 56 | err = h(w, r, params) 57 | if err != nil { 58 | if httperror.StatusCode(err) >= 500 { 59 | logger.Error("executing handler", err, "url", r.URL) 60 | requestErrorsTotal.Inc() 61 | } 62 | handleError(w, err) 63 | } 64 | } 65 | } 66 | 67 | var decoder = schema.NewDecoder() 68 | 69 | func nullInt64Converter(value string) reflect.Value { 70 | var result sql.NullInt64 71 | if value != "" { 72 | v, _ := strconv.ParseInt(value, 10, 64) 73 | result = sql.NullInt64{Int64: v, Valid: true} 74 | } 75 | return reflect.ValueOf(result) 76 | } 77 | 78 | func nullFloat64Converter(value string) reflect.Value { 79 | var result sql.NullFloat64 80 | if value != "" { 81 | v, _ := strconv.ParseFloat(value, 64) 82 | result = sql.NullFloat64{Float64: v, Valid: true} 83 | } 84 | return reflect.ValueOf(result) 85 | } 86 | 87 | func init() { 88 | decoder.RegisterConverter(sql.NullInt64{}, nullInt64Converter) 89 | decoder.RegisterConverter(sql.NullFloat64{}, nullFloat64Converter) 90 | } 91 | 92 | // unmarshalRouterRequest is a generic request URL unmarshaler for use with 93 | // httprouter. It unmarshals the request parameters parsed by httprouter, as 94 | // well as any URL parameters, into a struct of any type, matching query 95 | // names to struct field names. 96 | func unmarshalRouterRequest(r *http.Request, ps httprouter.Params, params any) error { 97 | if r.Method == "POST" { 98 | err := json.NewDecoder(r.Body).Decode(params) 99 | if err != nil { 100 | return errors.Wrap(err, "decode json") 101 | } 102 | return nil 103 | } 104 | 105 | m := make(map[string][]string) 106 | 107 | // First convert the httprouter.Params into a map 108 | for _, p := range ps { 109 | key := p.Key 110 | if v, ok := m[key]; ok { 111 | m[key] = append(v, p.Value) 112 | } else { 113 | m[key] = []string{p.Value} 114 | } 115 | } 116 | 117 | // Then merge in the URL query parameters. 118 | for key, values := range r.URL.Query() { 119 | if v, ok := m[key]; ok { 120 | m[key] = append(v, values...) 121 | } else { 122 | m[key] = values 123 | } 124 | } 125 | 126 | // Then unmarshal. 127 | err := decoder.Decode(params, m) 128 | if err != nil { 129 | if !strings.HasPrefix(err.Error(), "schema: invalid path") { 130 | // ignore errors due to unrecognized parameters 131 | return errors.Wrap(err, "decode parameters") 132 | } 133 | } 134 | 135 | return nil 136 | } 137 | 138 | // preRouterMiddleware wraps the router itself. It is for middleware that does 139 | // not need to know anything about the route (params, name, etc) 140 | func (app app) preRouterMiddleware(handler http.Handler, writeTimeout time.Duration) http.Handler { 141 | handler = app.cacheAndCompressMiddleware(handler) 142 | handler = app.canonicalDomainMiddleware(handler) // redirects must happen before caching! 143 | handler = app.timeoutMiddleware(handler, writeTimeout) // redirects must happen before caching! 144 | return handler 145 | } 146 | 147 | // We could improve this middleware. Currently we cache before we 148 | // compress, because the cache middleware we use here doesn't recognize the 149 | // accept-encoding header, and if we compressed before we cache, cache 150 | // entries would be randomly compressed or not, regardless of the 151 | // accept-encoding header. Unfortunately by caching before we compress, 152 | // requests are cached uncompressed. A compressed-cache middleware would be a 153 | // nice improvement. Also our cache-control headers should be synced with the 154 | // exact cache expiration time, which should be synced with the crawl. But 155 | // what we have here is simple and probably good enough. 156 | 157 | func (app app) cacheAndCompressMiddleware(handler http.Handler) http.Handler { 158 | // if app.cacheSize > 0 { 159 | 160 | // memorycached, err := memory.NewAdapter( 161 | // memory.AdapterWithAlgorithm(memory.LRU), 162 | // memory.AdapterWithCapacity(app.cacheSize), 163 | // ) 164 | // if err != nil { 165 | // LogFatal(app.logger, "memory.NewAdapater", err) 166 | // } 167 | 168 | // cacheClient, err := cache.NewClient( 169 | // cache.ClientWithAdapter(memorycached), 170 | // cache.ClientWithTTL(1*time.Minute), 171 | // cache.ClientWithRefreshKey("opn"), 172 | // ) 173 | // if err != nil { 174 | // LogFatal(app.logger, "cache.NewClient", err) 175 | // } 176 | 177 | // var h http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 178 | // // since we update data only every minute, tell browsers to cache for one minute 179 | // handler.ServeHTTP(w, r) 180 | // }) 181 | 182 | // h = cacheClient.Middleware(h) 183 | // } 184 | h := handler 185 | 186 | return gziphandler.GzipHandler(h) 187 | } 188 | -------------------------------------------------------------------------------- /migrate-volume.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e # Exit on any error 3 | 4 | # Configuration 5 | APP_NAME="news" 6 | OLD_VOLUME_NAME="data2" 7 | NEW_VOLUME_NAME="data3" 8 | NEW_VOLUME_SIZE="3" # Adjust this to your needs 9 | REGION="ewr" # Your current region 10 | 11 | # Function to wait for VM to be ready 12 | wait_for_vm() { 13 | echo "Waiting for VM to be ready..." 14 | while true; do 15 | STATUS=$(fly status --app $APP_NAME) 16 | if echo "$STATUS" | grep -q "running"; then 17 | echo "VM is ready" 18 | break 19 | fi 20 | echo "VM not ready yet, waiting..." 21 | sleep 5 22 | done 23 | } 24 | 25 | echo "Stopping the application..." 26 | fly scale count 0 --app $APP_NAME 27 | 28 | echo "Creating new volume..." 29 | fly volumes create $NEW_VOLUME_NAME --size $NEW_VOLUME_SIZE --region $REGION 30 | 31 | echo "Creating temporary machine with old volume..." 32 | cat > migrate-old.toml << EOL 33 | app = "$APP_NAME" 34 | primary_region = "$REGION" 35 | 36 | [build] 37 | image = "alpine:latest" 38 | 39 | [mounts] 40 | source = "$OLD_VOLUME_NAME" 41 | destination = "/data" 42 | 43 | [processes] 44 | app = "sleep infinity" 45 | EOL 46 | 47 | echo "Deploying temporary machine with old volume..." 48 | fly deploy --config migrate-old.toml --app $APP_NAME 49 | wait_for_vm 50 | 51 | echo "Copying data from old volume to temporary storage..." 52 | fly ssh console --command 'cd /data && tar czf frontpage.sqlite.gz frontpage.sqlite && tar czf frontpage.sqlite-shm.gz frontpage.sqlite-shm && tar czf frontpage.sqlite-wal.gz frontpage.sqlite-wal' --app $APP_NAME 53 | 54 | echo "Downloading database files from old volume..." 55 | fly sftp shell --app $APP_NAME << EOF 56 | get /data/frontpage.sqlite.gz ~/social-protocols-data/recover/frontpage.sqlite.gz 57 | get /data/frontpage.sqlite-shm.gz ~/social-protocols-data/recover/frontpage.sqlite-shm.gz 58 | get /data/frontpage.sqlite-wal.gz ~/social-protocols-data/recover/frontpage.sqlite-wal.gz 59 | exit 60 | EOF 61 | 62 | echo "Destroying temporary machine..." 63 | fly scale count 0 --app $APP_NAME 64 | fly machines destroy $(fly machines list --json | jq -r '.[].id') --force --app $APP_NAME 65 | 66 | echo "Creating temporary machine with new volume..." 67 | cat > migrate-new.toml << EOL 68 | app = "$APP_NAME" 69 | primary_region = "$REGION" 70 | 71 | [build] 72 | image = "alpine:latest" 73 | 74 | [mounts] 75 | source = "$NEW_VOLUME_NAME" 76 | destination = "/data" 77 | 78 | [processes] 79 | app = "sleep infinity" 80 | EOL 81 | 82 | echo "Deploying temporary machine with new volume..." 83 | fly deploy --config migrate-new.toml --app $APP_NAME 84 | wait_for_vm 85 | 86 | echo "Uploading database files to new volume..." 87 | fly sftp shell --app $APP_NAME << EOF 88 | put ~/social-protocols-data/recover/frontpage.sqlite.gz /data/frontpage.sqlite.gz 89 | put ~/social-protocols-data/recover/frontpage.sqlite-shm.gz /data/frontpage.sqlite-shm.gz 90 | put ~/social-protocols-data/recover/frontpage.sqlite-wal.gz /data/frontpage.sqlite-wal.gz 91 | exit 92 | EOF 93 | 94 | echo "Extracting database files on new volume..." 95 | fly ssh console --command 'cd /data && gunzip frontpage.sqlite.gz && gunzip frontpage.sqlite-shm.gz && gunzip frontpage.sqlite-wal.gz' --app $APP_NAME 96 | 97 | echo "Updating mount configuration..." 98 | # Create a temporary file for the new fly.toml 99 | cat > fly.toml.new << EOL 100 | [mounts] 101 | source = "$NEW_VOLUME_NAME" 102 | destination = "/data" 103 | EOL 104 | 105 | # Backup the original fly.toml 106 | cp fly.toml fly.toml.backup 107 | 108 | # Update the mounts section in fly.toml 109 | sed -i.bak '/\[mounts\]/,/^$/c\' fly.toml 110 | cat fly.toml.new >> fly.toml 111 | rm fly.toml.new migrate-old.toml migrate-new.toml 112 | 113 | echo "Deploying application with new volume..." 114 | fly deploy 115 | wait_for_vm 116 | 117 | echo "Verifying application is running..." 118 | fly status --app $APP_NAME 119 | 120 | echo "If everything looks good, you can delete the old volume with:" 121 | echo "fly volumes delete $OLD_VOLUME_NAME --app $APP_NAME" 122 | echo "" 123 | echo "To rollback, restore the original fly.toml:" 124 | echo "mv fly.toml.backup fly.toml" -------------------------------------------------------------------------------- /postprocessing.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "database/sql" 7 | "fmt" 8 | "io" 9 | "strings" 10 | "time" 11 | 12 | "github.com/pkg/errors" 13 | "golang.org/x/exp/slog" 14 | ) 15 | 16 | const ( 17 | qnRankFormulaSQL = "pow(ageHours * (cumulativeUpvotes + overallPriorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + overallPriorWeight), 0.8) / pow(ageHours + 2, gravity/0.8) desc" 18 | 19 | // qnRankFormulaSQL = ` 20 | // pow( 21 | // ageHours * 22 | // sample_from_gamma_distribution( 23 | // cumulativeUpvotes + overallPriorWeight, 24 | // ( 25 | // 1-exp(-fatigueFactor*cumulativeExpectedUpvotes) 26 | // ) / fatigueFactor + overallPriorWeight 27 | // ) 28 | // , 0.8 29 | // ) / pow( 30 | // ageHours + 2 31 | // , gravity/0.8 32 | // ) desc` 33 | 34 | hnRankFormulaSQL = "(score-1) / pow(ageHours + 2, gravity/0.8) desc" 35 | ) 36 | 37 | func (app app) crawlPostprocess(ctx context.Context, tx *sql.Tx) error { 38 | t := time.Now() 39 | defer crawlPostprocessingDuration.UpdateDuration(t) 40 | 41 | var err error 42 | 43 | // for _, filename := range []string{"previous-crawl.sql", "resubmissions.sql", "raw-ranks.sql", "upvote-rates.sql"} { 44 | for _, filename := range []string{ 45 | "previous-crawl.sql", 46 | "resubmissions.sql", 47 | "raw-ranks.sql", 48 | } { 49 | app.logger.Info("Processing SQL file", slog.String("filename", filename)) 50 | err = executeSQLFile(ctx, tx, filename) 51 | if err != nil { 52 | return err 53 | } 54 | } 55 | 56 | err = app.updateQNRanks(ctx, tx) 57 | if err != nil { 58 | return errors.Wrap(err, "updateQNRanks") 59 | } 60 | 61 | app.logger.Info("Finished crawl postprocessing", slog.Duration("elapsed", time.Since(t))) 62 | 63 | return err 64 | } 65 | 66 | var qnRanksSQL = readSQLSource("qnranks.sql") 67 | 68 | func (app app) updateQNRanks(ctx context.Context, tx *sql.Tx) error { 69 | t := time.Now() 70 | 71 | d := defaultFrontPageParams 72 | sql := fmt.Sprintf(qnRanksSQL, d.PriorWeight, d.OverallPriorWeight, d.Gravity, d.PenaltyWeight, d.FatigueFactor, qnRankFormulaSQL) 73 | 74 | stmt, err := tx.Prepare(sql) 75 | if err != nil { 76 | return errors.Wrap(err, "preparing updateQNRanksSQL") 77 | } 78 | 79 | _, err = stmt.ExecContext(ctx) 80 | 81 | app.logger.Info("Finished executing updateQNRanks", slog.Duration("elapsed", time.Since(t))) 82 | 83 | return errors.Wrap(err, "executing updateQNRanksSQL") 84 | } 85 | 86 | func readSQLSource(filename string) string { 87 | f, err := resources.Open("sql/" + filename) 88 | if err != nil { 89 | panic(err) 90 | } 91 | defer f.Close() 92 | buf := bytes.NewBuffer(nil) 93 | _, err = io.Copy(buf, f) 94 | if err != nil { 95 | panic(err) 96 | } 97 | 98 | return buf.String() 99 | } 100 | 101 | func executeSQLFile(ctx context.Context, tx *sql.Tx, filename string) error { 102 | sql := readSQLSource(filename) 103 | 104 | sql = strings.Trim(sql, " \n\r;") 105 | 106 | parts := strings.Split(sql, ";\n") 107 | 108 | for _, sql := range parts { 109 | 110 | stmt, err := tx.Prepare(sql) 111 | if err != nil { 112 | return errors.Wrapf(err, "preparing SQL in file %s", filename) 113 | } 114 | 115 | _, err = stmt.ExecContext(ctx) 116 | 117 | if err != nil { 118 | return errors.Wrapf(err, "executing SQL in file %s", filename) 119 | } 120 | } 121 | return nil 122 | } 123 | -------------------------------------------------------------------------------- /prometheus.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "os" 7 | "time" 8 | 9 | "github.com/VictoriaMetrics/metrics" 10 | "github.com/johnwarden/httperror" 11 | "golang.org/x/exp/slog" 12 | ) 13 | 14 | // Register various metrics. 15 | // Metric name may contain labels in Prometheus format - see below. 16 | 17 | var ( 18 | crawlErrorsTotal = metrics.NewCounter(`errors_total{type="crawl"}`) 19 | archiveErrorsTotal = metrics.NewCounter(`errors_total{type="archive"}`) 20 | requestErrorsTotal = metrics.NewCounter(`errors_total{type="request"}`) 21 | crawlDuration = metrics.NewHistogram("crawl_duration_seconds") 22 | crawlPostprocessingDuration = metrics.NewHistogram("crawl_postprocessing_duration_seconds") 23 | 24 | upvotesTotal = metrics.NewCounter(`upvotes_total`) 25 | submissionsTotal = metrics.NewCounter(`submissions_total`) 26 | storiesArchivedTotal = metrics.NewCounter(`stories_archived_total`) 27 | storiesPurgedTotal = metrics.NewCounter(`stories_purged_total`) 28 | 29 | vacuumOperationsTotal = metrics.NewCounter(`database_vacuum_operations_total{database="frontpage"}`) 30 | 31 | // Store histograms per route to avoid duplicate registration 32 | routeHistograms = make(map[string]*metrics.Histogram) 33 | ) 34 | 35 | // getRouteHistogram returns an existing histogram for a route or creates a new one 36 | func getRouteHistogram(routeName string) *metrics.Histogram { 37 | if h, exists := routeHistograms[routeName]; exists { 38 | return h 39 | } 40 | h := metrics.NewHistogram(`requests_duration_seconds{route="` + routeName + `"}`) 41 | routeHistograms[routeName] = h 42 | return h 43 | } 44 | 45 | func servePrometheusMetrics() func(ctx context.Context) error { 46 | mux := http.NewServeMux() 47 | 48 | // Export all the registered metrics in Prometheus format at `/metrics` http path. 49 | mux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) { 50 | metrics.WritePrometheus(w, true) 51 | }) 52 | 53 | listenAddress := os.Getenv("LISTEN_ADDRESS") 54 | 55 | s := &http.Server{ 56 | Addr: listenAddress + ":9091", 57 | Handler: mux, 58 | } 59 | 60 | go func() { 61 | LogFatal(slog.Default(), "Listen and serve prometheus", s.ListenAndServe()) 62 | }() 63 | 64 | return s.Shutdown 65 | } 66 | 67 | func prometheusMiddleware[P any](routeName string, h httperror.XHandler[P]) httperror.XHandlerFunc[P] { 68 | requestDuration := getRouteHistogram(routeName) 69 | 70 | return func(w http.ResponseWriter, r *http.Request, p P) error { 71 | var startTime time.Time 72 | if r.Method != http.MethodHead { 73 | startTime = time.Now() 74 | } 75 | 76 | err := h.Serve(w, r, p) 77 | 78 | if r.Method != http.MethodHead && routeName != "health" && routeName != "crawl-health" { 79 | requestDuration.UpdateDuration(startTime) 80 | } 81 | 82 | return err 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /reset-prior-average-upvote-rate.sql: -------------------------------------------------------------------------------- 1 | 2 | with parameters as ( 3 | select 4 | -- 2.2956 as priorWeight 5 | -- 4.0 as priorWeight 6 | 1.7 as priorWeight 7 | , 0.003462767 as fatigueFactor 8 | -- , 1.036 as priorAverage 9 | -- , 1.036 as priorAverage 10 | -- , .99 as priorAverage 11 | -- , 1.0 as priorAverage 12 | ), entryRates as ( 13 | select 14 | userID 15 | , storyID 16 | , entryTime 17 | , entryUpvoteRate 18 | , max(cumulativeUpvotes) cumulativeUpvotes 19 | , max(cumulativeExpectedUpvotes) cumulativeExpectedUpvotes 20 | , (cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) newEntryUpvoteRate 21 | -- , (cumulativeUpvotes + priorWeight*1.174)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) newEntryUpvoteRate 22 | -- , (cumulativeUpvotes + priorWeight*1.145)/(cumulativeExpectedUpvotes + priorWeight) as newEntryUpvoteRate 23 | 24 | 25 | from 26 | votes 27 | join dataset 28 | on dataset.id = storyID 29 | join parameters 30 | where 31 | dataset.sampleTime 32 | and sampleTime <= entryTime 33 | -- and votes.userID != 0 34 | group by userID, storyID, entryTime 35 | ) 36 | -- select * from entryRates where userID = 0 and storyID = 36805231 limit 10; 37 | 38 | update votes as u 39 | set entryUpvotes = entryRates.cumulativeUpvotes 40 | , entryExpectedUpvotes = entryRates.cumulativeExpectedUpvotes 41 | , entryUpvoteRate = entryRates.newEntryUpvoteRate 42 | from 43 | entryRates 44 | where entryRates.userID = u.userID 45 | and entryRates.storyID = u.storyID ; 46 | 47 | -------------------------------------------------------------------------------- /resources.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "embed" 4 | 5 | //go:embed templates/* 6 | //go:embed sql/* 7 | //go:embed seed/* 8 | var resources embed.FS 9 | -------------------------------------------------------------------------------- /score-page.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "net/http" 7 | 8 | "github.com/johnwarden/httperror" 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | type ScorePageData struct { 13 | PageTemplateData 14 | Positions []Position 15 | Score float64 16 | ScorePlotData [][]any 17 | } 18 | 19 | // Override IsScorePage since it's not determined by Ranking 20 | func (d ScorePageData) IsScorePage() bool { 21 | return true 22 | } 23 | 24 | func (p ScorePageData) ScoreString() string { 25 | return fmt.Sprintf("%.2f", p.Score) 26 | } 27 | 28 | func (p ScorePageData) AverageScoreString() string { 29 | return fmt.Sprintf("%.2f", p.Score/float64(len(p.Positions))) 30 | } 31 | 32 | type ScorePageParams struct { 33 | UserID sql.NullInt64 34 | OptionalModelParams 35 | ScoringFormula string 36 | } 37 | 38 | func (app app) scoreHandler() func(http.ResponseWriter, *http.Request, ScorePageParams) error { 39 | return func(w http.ResponseWriter, r *http.Request, params ScorePageParams) error { 40 | nullUserID := params.UserID 41 | if !nullUserID.Valid { 42 | 43 | nullUserID = app.getUserID(r) 44 | 45 | if !nullUserID.Valid { 46 | return httperror.PublicErrorf(http.StatusUnauthorized, "not logged in") 47 | } 48 | } 49 | 50 | modelParams := params.OptionalModelParams.WithDefaults() 51 | 52 | userID := int(nullUserID.Int64) 53 | 54 | positions, err := app.getDetailedPositions(r.Context(), userID) 55 | if err != nil { 56 | return errors.Wrap(err, "getDetailedPositions") 57 | } 58 | 59 | var score float64 60 | for i, p := range positions { 61 | 62 | p.EntryUpvoteRate = modelParams.upvoteRate(p.EntryUpvotes, p.EntryExpectedUpvotes) 63 | p.CurrentUpvoteRate = modelParams.upvoteRate(p.CurrentUpvotes, p.CurrentExpectedUpvotes) 64 | p.Story.UpvoteRate = p.CurrentUpvoteRate 65 | 66 | if p.ExitUpvotes.Valid && p.ExitExpectedUpvotes.Valid { 67 | p.ExitUpvoteRate = sql.NullFloat64{ 68 | Float64: modelParams.upvoteRate(int(p.ExitUpvotes.Int64), p.ExitExpectedUpvotes.Float64), 69 | Valid: true, 70 | } 71 | } 72 | 73 | p.UserScore = UserScore(p, modelParams, params.ScoringFormula) 74 | 75 | score += p.UserScore 76 | p.RunningScore = score 77 | 78 | p.Story.UpvoteRate = p.UpvoteRate 79 | 80 | positions[i] = p 81 | } 82 | 83 | n := len(positions) 84 | for i := range positions { 85 | positions[i].RunningScore = score - positions[i].RunningScore + positions[i].UserScore 86 | positions[i].Label = intToAlphaLabel(n - i - 1) 87 | } 88 | 89 | scorePlotData := make([][]any, n) 90 | for i, p := range positions { 91 | scorePlotData[n-i-1] = []any{ 92 | p.EntryTime, p.RunningScore, fmt.Sprintf("%d", p.PositionID), p.Story.Title, p.UserScoreString(), p.Direction, p.EntryUpvoteRateString(), p.CurrentUpvoteRateString(), p.ExitUpvoteRateString(), 93 | } 94 | } 95 | 96 | pageSize := 1000 97 | if n > pageSize { 98 | n = pageSize 99 | } 100 | 101 | d := ScorePageData{ 102 | PageTemplateData: PageTemplateData{ 103 | UserID: nullUserID, 104 | Ranking: "score", 105 | }, 106 | Positions: positions[0:n], 107 | Score: score, 108 | ScorePlotData: scorePlotData, 109 | } 110 | 111 | if err = templates.ExecuteTemplate(w, "score.html.tmpl", d); err != nil { 112 | return errors.Wrap(err, "executing score template") 113 | } 114 | 115 | return nil 116 | } 117 | } 118 | 119 | // convert an integer into an alpha-numerical label starting with A through Z, then continuing AA, AB, etc. 120 | 121 | func intToAlphaLabel(i int) string { 122 | r := make([]byte, 0, 1) 123 | 124 | // result := "" 125 | n := 0 126 | for { 127 | digit := i % 26 128 | letter := 'A' + digit 129 | // result = string(letter) + result 130 | 131 | r = append(r, byte(letter)) 132 | 133 | i -= digit 134 | if i == 0 { 135 | break 136 | } 137 | i /= 26 138 | i -= 1 139 | n++ 140 | } 141 | 142 | n = len(r) 143 | for i := 0; i < n/2; i++ { 144 | j := n - i - 1 145 | 146 | r[i], r[j] = r[j], r[i] 147 | } 148 | 149 | return string(r) 150 | } 151 | -------------------------------------------------------------------------------- /scraper.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "sync" 9 | "time" 10 | 11 | colly "github.com/gocolly/colly/v2" 12 | "github.com/pkg/errors" 13 | "golang.org/x/exp/slog" 14 | ) 15 | 16 | type rawStory struct { 17 | ID string 18 | row1 19 | row2 20 | } 21 | 22 | type row1 struct { 23 | Title string `selector:"span.titleline a"` 24 | FullTitle string `selector:"span.titleline"` 25 | URL string `selector:"span.titleline a" attr:"href"` 26 | Rank string `selector:"span.rank"` 27 | } 28 | 29 | type row2 struct { 30 | Author string `selector:"a.hnuser"` 31 | Score string `selector:"span.score"` 32 | SubmissionTime string `selector:"span.age" attr:"title"` 33 | AgeApprox string `selector:"span.age"` 34 | Links []string `selector:"a"` 35 | } 36 | 37 | type ScrapedStory struct { 38 | Story 39 | Rank int 40 | Source string 41 | } 42 | 43 | func (rs rawStory) Clean() (ScrapedStory, error) { 44 | story := ScrapedStory{ 45 | Story: Story{ 46 | Title: rs.Title, 47 | By: rs.Author, 48 | URL: rs.URL, 49 | }, 50 | Source: "scraper", 51 | } 52 | 53 | // parse id 54 | { 55 | id, err := strconv.Atoi(rs.ID) 56 | if err != nil { 57 | return story, errors.Wrapf(err, "parse story id %s", rs.ID) 58 | } 59 | story.ID = id 60 | } 61 | 62 | // fix url 63 | if strings.HasPrefix(story.Story.URL, "item?id=") { 64 | story.Story.URL = "https://news.ycombinator.com/" + story.Story.URL 65 | } 66 | 67 | // parse score. This field will look like "4 points" 68 | { 69 | if fs := strings.Fields(rs.Score); len(fs) > 0 { 70 | scoreStr := strings.Fields(rs.Score)[0] 71 | 72 | score, err := strconv.Atoi(scoreStr) 73 | story.Score = score 74 | if err != nil { 75 | return story, errors.Wrapf(err, "parse story score %s", rs.Score) 76 | } 77 | } else { 78 | // if there is no upvotes field, then this is an HN job. 79 | // we want to include these in the database because they get ranked 80 | story.Job = true 81 | } 82 | } 83 | 84 | // parse submission time 85 | { 86 | // submission times now contain a timestamp string, followed by a 87 | // space then a unix timestamp with what looks like the *current* 88 | // time which I suppose we can just ignore. For 89 | // example "2024-10-23T16:44:01 1729713776" 90 | parts := strings.Split(rs.SubmissionTime, " ") 91 | 92 | var submissionTime time.Time 93 | var err error 94 | 95 | if strings.HasSuffix(parts[0], "Z") { 96 | // Old format with "Z" indicating UTC 97 | submissionTime, err = time.Parse("2006-01-02T15:04:05Z", parts[0]) 98 | } else { 99 | // New format without "Z" 100 | submissionTime, err = time.Parse("2006-01-02T15:04:05", parts[0]) 101 | } 102 | 103 | if err != nil { 104 | return story, errors.Wrapf(err, "parse submission time %s", rs.SubmissionTime) 105 | } 106 | story.SubmissionTime = submissionTime.Unix() 107 | story.OriginalSubmissionTime = story.SubmissionTime 108 | } 109 | 110 | // parse approximate age 111 | { 112 | // this will be something like "1 minute ago" or "3 hours ago" 113 | if fs := strings.Fields(rs.AgeApprox); len(fs) > 1 { 114 | n, err := strconv.Atoi(fs[0]) 115 | if err != nil { 116 | return story, errors.Wrapf(err, "parse relative age %s", rs.AgeApprox) 117 | } 118 | 119 | var units int64 120 | if strings.HasPrefix(fs[1], "minute") { // "minute" or "minutes" 121 | units = 60 122 | } else if strings.HasPrefix(fs[1], "hour") { 123 | units = 3600 124 | } else if strings.HasPrefix(fs[1], "day") { 125 | units = 3600 * 24 126 | } else if strings.HasPrefix(fs[1], "month") { 127 | units = 3600 * 24 * 30 128 | } else if strings.HasPrefix(fs[1], "year") { 129 | units = 3600 * 24 * 364 130 | } 131 | 132 | story.AgeApprox = int64(n) * units 133 | } else { 134 | return story, fmt.Errorf("parse age %s", rs.AgeApprox) 135 | } 136 | 137 | // parse rank. we know the rank because of the order it appears in. 138 | // we just use this to do an integrity check later. 139 | { 140 | tRank := strings.Trim(rs.Rank, ".") 141 | var err error 142 | story.Rank, err = strconv.Atoi(tRank) 143 | if err != nil || story.Rank == 0 { 144 | return story, errors.Wrapf(err, "parse rank %s", rs.Rank) 145 | } 146 | } 147 | 148 | // parse the number of comments 149 | { 150 | // if there are comments, this will be the last tag. Unfortunately, it doesn't have an id or class. 151 | commentString := rs.Links[len(rs.Links)-1] 152 | 153 | // this string will be a single word like "comment" or "hide" if there are no comments. 154 | // otherwise it will be something like "12 comments" 155 | if fs := strings.Fields(commentString); len(fs) > 1 { 156 | c, err := strconv.Atoi(fs[0]) 157 | if err != nil { 158 | return story, errors.Wrapf(err, "parse comments %s", commentString) 159 | } 160 | story.Comments = c 161 | } 162 | } 163 | 164 | // parse [flagged] and [dupe] tags 165 | { 166 | if strings.Contains(rs.FullTitle, "[flagged]") { 167 | story.Flagged = true 168 | } 169 | if strings.Contains(rs.FullTitle, "[dupe]") { 170 | story.Dupe = true 171 | } 172 | } 173 | 174 | return story, nil 175 | } 176 | } 177 | 178 | func (app app) newScraper(resultCh chan ScrapedStory, errCh chan error, moreLinkCh chan string) *colly.Collector { 179 | c := colly.NewCollector() 180 | c.SetClient(app.httpClient) 181 | 182 | var rs rawStory 183 | 184 | c.OnHTML("a.morelink", func(e *colly.HTMLElement) { 185 | moreLinkCh <- e.Attr("href") 186 | }) 187 | 188 | c.OnHTML("tr table", func(e *colly.HTMLElement) { 189 | n := 0 190 | lastStoryRownum := 0 191 | e.ForEach("tr", func(i int, e *colly.HTMLElement) { 192 | class := e.Attr("class") 193 | 194 | // stories will always start with a tr of class athing 195 | if strings.Contains(class, "athing") && n < 30 { 196 | n = n + 1 197 | lastStoryRownum = i 198 | if n > 30 { 199 | return 200 | } 201 | 202 | rs = rawStory{ 203 | ID: e.Attr("id"), 204 | } 205 | err := e.Unmarshal(&rs.row1) 206 | if err != nil { 207 | errCh <- err 208 | } 209 | } else if class == "" && i == lastStoryRownum+1 && n > 0 && n <= 30 { 210 | // the first tr after the "athing" contains the second row of 211 | // details for the story. Note also we must skip any trs 212 | // before the first athing because sometimes they contain 213 | // general page content. 214 | 215 | err := e.Unmarshal(&rs.row2) 216 | 217 | if err != nil { 218 | errCh <- err 219 | } else { 220 | st, err := rs.Clean() 221 | rank := st.Rank 222 | 223 | // Do an integrity check. If the row shown for the story equals the row 224 | // count we are keeping, we area all good. 225 | if err == nil && ((rank-1)%30)+1 != n { 226 | err = fmt.Errorf("Ranks out of order. Expected %d but parsed %d", n, (rank-1)%30+1) 227 | } 228 | 229 | if err != nil { 230 | Debugf(app.logger, "Failed to parse story %d. Raw story %#v", n, rs) 231 | errCh <- err 232 | } else { 233 | resultCh <- st 234 | } 235 | } 236 | } 237 | }) 238 | }) 239 | 240 | c.OnError(func(r *colly.Response, err error) { 241 | err = errors.Wrapf(err, "Failed to parse page %s", r.Request.URL) 242 | errCh <- err 243 | }) 244 | 245 | return c 246 | } 247 | 248 | func (app app) scrapeHN(pageType string, resultCh chan ScrapedStory, errCh chan error) { 249 | baseUrl := "https://news.ycombinator.com/" 250 | url := baseUrl 251 | if pageType == "new" { 252 | url = url + "newest" 253 | } else if pageType != "top" { 254 | url = url + pageType 255 | } 256 | for p := 1; p <= 3; p++ { 257 | moreLinkCh := make(chan string, 1) 258 | c := app.newScraper(resultCh, errCh, moreLinkCh) 259 | err := c.Visit(url) 260 | if err != nil { 261 | errCh <- err 262 | } 263 | select { 264 | case relativeURL := <-moreLinkCh: 265 | url = baseUrl + relativeURL 266 | default: 267 | // there won't always be a next link, in particular the show page could have less than 3 pages worth of stories 268 | } 269 | 270 | } 271 | close(resultCh) 272 | close(errCh) 273 | } 274 | 275 | func (app app) scrapeFrontPageStories(ctx context.Context) (map[int]ScrapedStory, error) { 276 | app.logger.Info("Scraping front page stories") 277 | 278 | stories := map[int]ScrapedStory{} 279 | 280 | pageTypeName := "top" 281 | 282 | nSuccess := 0 283 | 284 | resultCh := make(chan ScrapedStory) 285 | errCh := make(chan error) 286 | 287 | var wg sync.WaitGroup 288 | 289 | t := time.Now() 290 | 291 | // scrape in a goroutine. the scraper will write results to the channel 292 | // we provide 293 | wg.Add(1) 294 | go func() { 295 | defer wg.Done() 296 | app.scrapeHN(pageTypeName, resultCh, errCh) 297 | }() 298 | 299 | // read from the error channel in print errors in a separate goroutine. 300 | // The scraper will block writing to the error channel if nothing is reading 301 | // from it. 302 | wg.Add(1) 303 | go func() { 304 | defer wg.Done() 305 | for err := range errCh { 306 | app.logger.Error("Error parsing story", err) 307 | crawlErrorsTotal.Inc() 308 | } 309 | }() 310 | 311 | for story := range resultCh { 312 | id := story.ID 313 | 314 | stories[id] = story 315 | 316 | nSuccess += 1 317 | } 318 | 319 | if nSuccess == 0 { 320 | return stories, fmt.Errorf("Didn't successfully parse any stories from %s page", pageTypeName) 321 | } 322 | Debugf(app.logger, "Crawled %d stories on %s page", nSuccess, pageTypeName) 323 | 324 | wg.Wait() 325 | 326 | app.logger.Info("Scraped stories", "pageTypeName", pageTypeName, slog.Duration("elapsed", time.Since(t))) 327 | 328 | return stories, nil 329 | } 330 | -------------------------------------------------------------------------------- /seed/domain-penalties.csv: -------------------------------------------------------------------------------- 1 | domain,avgPenalty 2 | www.phoronix.com,0.255983153311316 3 | arstechnica.com,0.236416125252806 4 | www.theguardian.com,0.304580202256946 5 | old.reddit.com,0.329146893687822 6 | twitter.com,0.343862039075023 7 | www.theregister.com,0.281049828586506 8 | www.theatlantic.com,0.269041168885244 9 | www.cnn.com,0.287677200012903 10 | www.latimes.com,0.442553344733379 11 | apnews.com,0.306319646291724 12 | www.wired.com,0.247088570701928 13 | torrentfreak.com,0.378240055055204 14 | www.fastcompany.com,0.259302489090707 15 | www.protocol.com,0.295085830042498 16 | www.forbes.com,0.373620224179877 17 | reason.com,0.393667070368929 18 | drewdevault.com,0.426765471415533 19 | www.washingtonpost.com,0.281714743939709 20 | www.scmp.com,0.357802047615181 21 | www.politico.com,0.300041785931081 22 | medium.com,0.280016222997068 23 | www.wsj.com,0.246757013133426 24 | www.cnbc.com,0.292165034038854 25 | www.ft.com,0.271791649480615 26 | nypost.com,0.426552760633264 27 | www.nytimes.com,0.247017260946097 28 | reclaimthenet.org,0.449452850524531 29 | tech.marksblogg.com,0.681258036075086 30 | gizmodo.com,0.235581227145393 31 | www.vice.com,0.273699442401756 32 | www.bbc.com,0.243614162442268 33 | techcrunch.com,0.267693218312698 34 | en.wikipedia.org,0.200554150996098 35 | www.macrumors.com,0.252190681850287 36 | www.bleepingcomputer.com,0.298953498978752 37 | www.telegraph.co.uk,0.328513269271261 38 | www.tomshardware.com,0.225581267219099 39 | www.thedrive.com,0.253433255715287 40 | www.sfchronicle.com,0.295735132759634 41 | www.businessinsider.com,0.327460617838536 42 | www.theverge.com,0.233671896247822 43 | www.eff.org,0.30545264142663 44 | theconversation.com,0.238252498612625 45 | www.bbc.co.uk,0.266873076770723 46 | astralcodexten.substack.com,0.274378958851422 47 | www.engadget.com,0.222515465389002 48 | www.marketwatch.com,0.345967596506456 49 | www.nasa.gov,0.235891052875635 50 | www.nationalreview.com,0.386958948863856 51 | web.archive.org,0.299904628615894 52 | www.dw.com,0.319977862059942 53 | nationalpost.com,0.442595555825156 54 | www.newsweek.com,0.389042499337081 55 | www.bloomberg.com,0.242968773325337 56 | www.nbcnews.com,0.272889000950255 57 | www.technologyreview.com,0.257431302810684 58 | lite.cnn.com,0.250700151641931 59 | venturebeat.com,0.276781089118411 60 | www.sfgate.com,0.281372733111948 61 | phys.org,0.191695575116859 62 | petapixel.com,0.202737368232906 63 | jalopnik.com,0.224182778568468 64 | www.cbsnews.com,0.215748434523396 65 | www.sciencealert.com,0.291226974242294 66 | appleinsider.com,0.358670726355805 67 | hackernoon.com,0.314810633979195 68 | www.space.com,0.209511817616723 69 | www.techdirt.com,0.279776442812103 70 | www.cbc.ca,0.247250210810386 71 | slate.com,0.265835164534057 72 | 9to5mac.com,0.251672380939431 73 | quillette.com,0.30212810108685 74 | www.independent.co.uk,0.287744881377527 75 | news.yahoo.com,0.306129953355535 76 | www.newscientist.com,0.248626857306742 77 | marginalrevolution.com,0.240319697857452 78 | www.cnet.com,0.240582214090158 79 | www.usatoday.com,0.249900498713825 80 | futurism.com,0.311144077242749 81 | www.scientificamerican.com,0.250479576916355 82 | thehill.com,0.297360919489742 83 | www.indiehackers.com,0.280577764502318 84 | finance.yahoo.com,0.242354394239265 85 | docs.google.com,0.246768467189723 86 | therecord.media,0.265035130188274 87 | blogs.nasa.gov,0.234768976839632 88 | www.micahlerner.com,0.228182741047012 89 | themarkup.org,0.169215301594475 90 | restofworld.org,0.277497103221563 91 | www.politico.eu,0.231618820954611 92 | www.france24.com,0.298707256113616 93 | betterprogramming.pub,0.239944839492751 94 | time.com,0.220345275200395 95 | www.schneier.com,0.24690892186614 96 | www.lesswrong.com,0.198347134793858 97 | www.pcmag.com,0.252328948155656 98 | thebulletin.org,0.185234962067628 99 | www.teslaoracle.com,0.218183317161904 100 | hbr.org,0.193063289207588 101 | thenewstack.io,0.226202908952288 102 | www.productlessons.xyz,0.165012975629553 103 | www.polygon.com,0.209155781150655 104 | medicalxpress.com,0.187450245477553 105 | www.anandtech.com,0.174405165433974 106 | electrek.co,0.142642514874113 107 | -------------------------------------------------------------------------------- /sql/cumulative-upvotes.sql: -------------------------------------------------------------------------------- 1 | -- this query updates cumulativeUpvotes and cumulativeExpectedUpvotes 2 | -- accounting for possible gaps in the data (stories in the latest crawl but not the previous crawl). 3 | -- We only want cumulativeUpvotes or cumulativeExpectedUpvotes to increase if we have two consecutive data 4 | -- points (one minute apart). 5 | 6 | with latest as ( 7 | select * from dataset where sampleTime = (select max(sampleTime) from dataset) 8 | ) 9 | update dataset as d 10 | set 11 | cumulativeUpvotes = case 12 | when not gapInData then previousCrawl.cumulativeUpvotes + latest.score - previousCrawl.score 13 | else previousCrawl.cumulativeUpvotes 14 | end 15 | , cumulativeExpectedUpvotes = case 16 | when not gapInData then latest.cumulativeExpectedUpvotes 17 | else previousCrawl.cumulativeExpectedUpvotes 18 | end 19 | from latest left join previousCrawl using (id) 20 | where 21 | d.id = latest.id 22 | and d.sampleTime = (select max(sampleTime) from dataset) 23 | -------------------------------------------------------------------------------- /sql/previous-crawl-index-old.sql: -------------------------------------------------------------------------------- 1 | create index previousCrawl_id_idx on previousCrawl (id); -------------------------------------------------------------------------------- /sql/previous-crawl.sql: -------------------------------------------------------------------------------- 1 | -- This query selects the previous datapoint for every story in the latest crawl 2 | -- It is a bit tricky because the sampleTime may be different for each story, because 3 | -- Some stories may appear and disappear from crawl results if they fall off the front page and reappear. 4 | 5 | create view if not exists previousCrawl as 6 | with latest as ( 7 | select * from dataset 8 | where sampleTime = (select max(sampleTime) from dataset) 9 | ) 10 | -- identify stories that are in the previous crawl. This is a quick indexed lookup 11 | , previousCrawl as ( 12 | select 13 | id 14 | , sampleTime 15 | from dataset 16 | where sampleTime = (select max(sampleTime) from dataset where sampleTime != (select max(sampleTime) from dataset)) 17 | ) 18 | -- this this query finds the sampleTime of the last time this story was 19 | -- crawled, for all stories that were not in the previous crawl. This 20 | -- subquery can be slow, so only do it for stories that weren't in the 21 | -- previous crawl. 22 | , previousSampleForStory as ( 23 | select 24 | latest.id 25 | , ifnull(previousCrawl.sampleTime, max(dataset.sampleTime)) as sampleTime 26 | , previousCrawl.sampleTime is null as gapInData 27 | from latest left join previousCrawl using (id) 28 | left join dataset on ( 29 | previousCrawl.id is null 30 | and latest.id = dataset.id 31 | and dataset.sampleTime < (select max(sampleTime) from dataset) 32 | ) 33 | group by 1 34 | ) 35 | select dataset.*, gapInData from previousSampleForStory join dataset using (id, sampleTime); 36 | -------------------------------------------------------------------------------- /sql/qnranks.sql: -------------------------------------------------------------------------------- 1 | with parameters as (select %f as priorWeight, %f as overallPriorWeight, %f as gravity, %f as penaltyWeight, %f as fatigueFactor) 2 | , latestData as ( 3 | select 4 | id 5 | , score 6 | , sampleTime 7 | , cast(sampleTime-submissionTime as real)/3600 as ageHours 8 | , cumulativeUpvotes 9 | , cumulativeExpectedUpvotes 10 | , penalty 11 | from dataset 12 | where sampleTime = (select max(sampleTime) from dataset) 13 | and score >= 3 -- story can't reach front page until score >= 3 14 | and coalesce(topRank, bestRank, newRank, askRank, showRank) is not null -- let's not rank stories if they aren't accumulating attention 15 | ), 16 | qnRanks as ( 17 | select 18 | id 19 | , dense_rank() over(order by %s) as rank 20 | , sampleTime 21 | , penalty 22 | from latestData join parameters 23 | ) 24 | update dataset as d set qnRank = qnRanks.rank 25 | from qnRanks 26 | where d.id = qnRanks.id and d.sampleTime = qnRanks.sampleTime; 27 | -------------------------------------------------------------------------------- /sql/random-new-voter.sql: -------------------------------------------------------------------------------- 1 | with limits as ( 2 | select 3 | count(*) / 1000 as n 4 | , abs(random()) % 10 as m 5 | from dataset 6 | ) 7 | , randomFrontpageSample as ( 8 | select id, sampleTime, cumulativeUpvotes, cumulativeExpectedUpvotes 9 | from dataset 10 | join stories using (id) 11 | join limits 12 | where timestamp > ( select min(sampleTime) from dataset ) -- only stories submitted since we started crawling 13 | and newRank is not null 14 | and not job 15 | and ( ( dataset.rowid - (select min(rowid) from dataset) ) % n ) = m 16 | ) 17 | , storiesToUpvote as ( 18 | select id as storyID 19 | , min(sampleTime) as minSampleTime 20 | , min(cumulativeUpvotes) as minUpvotes 21 | , min(cumulativeExpectedUpvotes) as minExpectedUpvotes 22 | from randomFrontpageSample 23 | group by id 24 | order by sampleTime 25 | ) 26 | , positions as ( 27 | select 28 | 0 as userID 29 | , storiesToUpvote.storyID 30 | , 1 as direction 31 | , minSampleTime as entryTime 32 | , minUpvotes as entryUpvotes 33 | , minExpectedUPvotes as entryExpectedUpvotes 34 | , row_number() over () as positionID 35 | from storiesToUpvote 36 | -- left join votes existingVotes using (storyID) 37 | -- where existingVotes.storyID is null 38 | ) select 39 | userID 40 | , storyID 41 | , positionID 42 | , direction 43 | , entryTime 44 | , entryUpvotes 45 | , entryExpectedUpvotes 46 | , null as exitTime 47 | , null as exitUpvotes 48 | , null as exitExpectedUpvotes 49 | , cumulativeUpvotes 50 | , cumulativeExpectedUpvotes 51 | , title 52 | , url 53 | , by 54 | , unixepoch() - sampleTime + coalesce(ageApprox, sampleTime - submissionTime) ageApprox 55 | , score 56 | , descendants as comments 57 | from positions 58 | join dataset on 59 | positions.storyID = id 60 | join stories using (id) 61 | group by positionID 62 | having max(dataset.sampleTime) 63 | order by entryTime desc 64 | ; 65 | -------------------------------------------------------------------------------- /sql/random-top-voter.sql: -------------------------------------------------------------------------------- 1 | with randomDatapoints as ( 2 | select 3 | id, sampleTime , cumulativeUpvotes, cumulativeExpectedUpvotes 4 | -- , row_number() over () as 5 | , row_number() over () as i 6 | , count() over () as nIDs 7 | from dataset 8 | join stories using (id) 9 | where 10 | timestamp > ( select min(sampleTime) from dataset ) -- only stories submitted since we started crawling 11 | and sampleTime > ( select max(sampleTime) from dataset ) - 24 * 60 * 60 12 | and topRank is not null 13 | ), 14 | limits as ( 15 | select abs(random()) % ( nIds / 100 ) as n 16 | from randomDatapoints 17 | where i = 1 18 | ) 19 | , storiesToUpvote as ( 20 | select id as storyID 21 | , min(sampleTime) as minSampleTime 22 | , min(cumulativeUpvotes) as minUpvotes 23 | , min(cumulativeExpectedUpvotes) as minExpectedUpvotes 24 | from randomDatapoints join limits 25 | -- sampleTime % nIDs = n 26 | where 27 | ( i ) % (nIDs / 100) = n 28 | group by id 29 | order by sampleTime 30 | ) 31 | , positions as ( 32 | select 33 | ? as userID 34 | , storiesToUpvote.storyID 35 | , 1 as direction 36 | , minSampleTime as entryTime 37 | , minUpvotes as entryUpvotes 38 | , minExpectedUPvotes as entryExpectedUpvotes 39 | , row_number() over () as positionID 40 | from storiesToUpvote 41 | -- left join votes existingVotes using (storyID) 42 | -- where existingVotes.storyID is null 43 | ) select 44 | userID 45 | , storyID 46 | , positionID 47 | , direction 48 | , entryTime 49 | , entryUpvotes 50 | , entryExpectedUpvotes 51 | , null as exitTime 52 | , null as exitUpvotes 53 | , null as exitExpectedUpvotes 54 | , cumulativeUpvotes 55 | , cumulativeExpectedUpvotes 56 | , title 57 | , url 58 | , by 59 | , unixepoch() - sampleTime + coalesce(ageApprox, sampleTime - submissionTime) ageApprox 60 | , score 61 | , descendants as comments 62 | from positions 63 | join dataset on 64 | positions.storyID = id 65 | join stories using (id) 66 | group by positionID 67 | having max(dataset.sampleTime) 68 | order by entryTime desc; -------------------------------------------------------------------------------- /sql/raw-ranks.sql: -------------------------------------------------------------------------------- 1 | with rankingScores as ( 2 | select 3 | id 4 | , sampleTime 5 | , topRank 6 | , pow(score-1, 0.8) / pow(cast(sampleTime - submissionTime as real)/3600+2, 1.8) as rankingScore -- pre-penalty HN ranking formula 7 | , ageApprox 8 | , job 9 | , score 10 | , timeStamp != submissionTime as resubmitted 11 | from dataset join stories using (id) 12 | where sampleTime = (select max(sampleTime) from dataset) 13 | -- normally a story is eligible to rank on front page once score >= 3 14 | -- but jobs can be on the front page without a score, and sometimes I see 15 | -- stories on the front page of a score of only 2. We want to calculate 16 | -- raw rank for any store that is ranked, or **should** be ranked. 17 | and (score >= 3 or topRank is not null) 18 | order by topRank asc, rankingScore desc 19 | ), 20 | rawRanks as ( 21 | select 22 | id 23 | , sampleTime 24 | , job 25 | , resubmitted 26 | , topRank as rank 27 | , score 28 | , count(*) over (order by rankingScore desc) as rawRank 29 | from rankingScores 30 | order by rank nulls last 31 | ) 32 | update dataset as d 33 | set rawRank = count(*) over ( 34 | order by case when rawRanks.job then rawRanks.rank else rawRanks.rawRank end, rawRanks.job desc 35 | ) 36 | from rawRanks 37 | where d.id = rawRanks.id 38 | and d.sampleTime = rawRanks.sampleTime 39 | ; 40 | -------------------------------------------------------------------------------- /sql/resubmissions.sql: -------------------------------------------------------------------------------- 1 | -- ESTIMATING RESUBMISSION TIME 2 | 3 | -- THE PROBLEM 4 | 5 | -- When a story is resubmitted, its submission time is updated to the current 6 | -- time, which gives it a rankings boost. 7 | 8 | -- We want to know what this new submission time is, so our algorithm can give 9 | -- stories the same boost. Also our penalty calculate requires knowing each 10 | -- story's pre-penalty ranking score, which requires knowing their submission 11 | -- times. 12 | 13 | -- Unfortunately exact resubmission times are not currently published by HN. The API always 14 | -- gives the story's original submission time. 15 | 16 | -- Each story's submission time datestamp is also included in the HTML when 17 | -- the story is displayed: you can see it when you hover the mouse over the 18 | -- age field ("20 minutes ago"). 19 | 20 | -- Unfortunately, although the approximate age field age ("20 minutes ago:) 21 | -- reflects the resubmission time, the datestamp in the HTML is the original 22 | -- submission time. 23 | 24 | -- So we can only estimate the resubmission time from this approximate age 25 | -- field. 26 | 27 | -- But the approximate age is neither precise nor accurate. It is always a 28 | -- whole number of minutes, hours, or days, rounded down: 1 hour 59 minutes 29 | -- is show as "1 hour ago", and 1 day 23 hours is shown as "one day ago". 30 | 31 | -- When a story is less than an hour old, we have minute-level granularity, 32 | -- However, this number is imprecise: it can be off by a couple of minutes 33 | -- either way. 34 | 35 | -- Further resubmitted stories don't seem to show up on the front page (at least 36 | -- not the top 90 ranks we crawl) until they are at least an hour old. 37 | 38 | 39 | -- THE SOLUTION We wrote dang to ask if he can help us out here. But I have 40 | -- implemented a pretty accurate solution: 41 | 42 | -- We can tell a story has been resubmitted within the last 24 hours because 43 | -- the submission time will be far earlier (typically hours) than the 44 | -- approximate age parsed from the web page (e.g. 3 hours ago). 45 | 46 | -- If the story is less than 1 day old, we can then place lower and upper 47 | -- bounds on the resubmission time. If it says "3 hours", it means anyway 48 | -- from 3:00 h to 3:59 h ago. 49 | 50 | -- So each time we crawl, we calculate a lowe bound on the story's 51 | -- resubmission time (based on an upper-boudn on age 52 | -- , and then compare it to the previous upper bound and move the bound 53 | -- accordingly (taking the lowest upper bound). 54 | 55 | -- So if a story was submitted "3 hours ago" we know the story is at at most 4 56 | -- hours old. So we save the sampleTime-4 hours in the submissionTime field, 57 | -- understanding that this is a lower bound on submissionTime. Then in the 58 | -- next minute we redo the calculation. If it still says "3 hours old" then 59 | -- our new implied lower bound on submission time will be greater the the 60 | -- previously lower bound by one minute. So we move up the lower bound up by a minue. 61 | -- (lower bounds always move up as we discover higher lower bounds). 62 | 63 | -- When the age string changes to "4 hours ago", we will know the story is at 64 | -- least 5 hours 59 minutes old. But the implied submission time will one hour less 65 | -- than the lower bound we calculated one minute before. So we keep the 66 | -- current lower bound. At this point, we have the exact resumibssion time 67 | -- within a couple of minutes either way. 68 | 69 | -- Other considerations: We can't detect resubmission times for stories more than a day old 70 | -- (unless they were resubmitted several days later) It is possible that a 71 | -- resubmitted story is more than a day old, and is still on the front page. 72 | -- In that case, we cannot determine it is a resubmitted story. So we need to 73 | -- calculate the resubmission time beofre the stories is a day old. We then 74 | -- remember this time, updating each subsequent datapoint to use this time. 75 | 76 | with latest as ( 77 | -- first, get the data from the latest crawl, determine which stories have 78 | -- been resubmitted, and estimate a lower bound on submission time 79 | select 80 | * 81 | , timestamp as originalSubmissionTime 82 | , sampleTime - ageApprox - timestamp > 7200 and ageApprox < 3600*24 as resubmitted 83 | , cast( 84 | case 85 | when 86 | -- we know a story has been resubmitted if the submission time implied 87 | -- by the approximate age differs by too much. Because age is rounded 88 | -- down, the difference can be up to one hour plus a few minutes 89 | -- because of data delay. In practice, the difference is always 90 | -- several hours. Using a cutoff at two hours should be good. also, 91 | -- we should filter out stories more than a day old: if we just saw 92 | -- these stories for the first time, we don't know if they have been 93 | -- resubmitted or not (and thus don't know how old they really are) 94 | sampleTime - ageApprox - timestamp > 3600*2 and ageApprox < 3600*24 95 | and not job then 96 | -- calculate an upper bound on age 97 | case 98 | when ageApprox < 3600 then ageApprox+59 -- e.g. if a story is "5 minutes old", it could be up to 5 minutes and 59 seconds old 99 | when ageApprox < 3600*24 then (ageApprox+59*60) -- if a story is "1 hour old" it could be up to 1:59m old 100 | end + 100 -- add another 100 seconds because the age field tends to be a little stale. 101 | else sampleTime - timestamp 102 | end 103 | as real) / 3600 as ageHours 104 | from dataset join stories using (id) 105 | where sampleTime = (select max(sampleTime) from dataset) 106 | ) 107 | update dataset as d 108 | -- And use the greater of the lower-bound submission time from the last crawl, and the one we just calculated. 109 | set submissionTime = case when latest.sampleTime - ageHours*3600 > ifnull(previousCrawl.submissionTime,0) then cast(latest.sampleTime - ageHours*3600 as int) else previousCrawl.submissionTime end 110 | from latest 111 | left join previousCrawl using (id) 112 | where d.id = latest.id and d.sampleTime = latest.sampleTime; 113 | -------------------------------------------------------------------------------- /sql/upvote-rates.sql: -------------------------------------------------------------------------------- 1 | /*Calculate the moving average upvote rate. The moving average window is based 2 | on expected upvotes, instead of time. As a result, the length of the window 3 | in terms of number of rows of data is variable. The calculation to identify 4 | the rows that fall within the window could be very inefficient: the query 5 | will scan the entire dataset to find rows where the difference between 6 | cumulativeExpectedUpvotes and the latest cumulativeExpectedUpvotes falls 7 | within the window. So we save the samleTime of the start of the window in 8 | the database, so the query only needs to scan rows within this window. 9 | */ 10 | with parameters as ( 11 | select 50 as windowSize 12 | , 2.3 as priorWeight 13 | , 0.003462767 as fatigueFactor 14 | ), latest as ( 15 | select 16 | latest.id 17 | , latest.sampleTime 18 | , latest.score 19 | , latest.cumulativeUpvotes 20 | , latest.cumulativeExpectedUpvotes 21 | , ifnull(previous.upvoteRateWindow,0) as upvoteRateWindow 22 | from dataset latest join previousCrawl previous using (id) 23 | where latest.sampleTime = (select max(sampleTime) from dataset) 24 | ) 25 | , windows as ( 26 | select 27 | latest.id 28 | , latest.sampleTime 29 | , latest.cumulativeUpvotes as cumulativeUpvotes 30 | , latest.cumulativeExpectedUpvotes as cumulativeExpectedUpvotes 31 | , max(dataset.sampleTime) as newWindow 32 | , min(latest.cumulativeUpvotes - dataset.cumulativeUpvotes) as upvotesInWindow 33 | , min(latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes) as expectedUpvotesInWindow 34 | , min(latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes) - windowSize as over 35 | , parameters.* 36 | from latest 37 | join parameters 38 | left join dataset on 39 | latest.id = dataset.id 40 | and dataset.sampleTime >= latest.upvoteRateWindow 41 | and latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes > windowSize 42 | group by latest.id 43 | ) 44 | update dataset 45 | set 46 | upvoteRate = case 47 | when upvotesInWindow is null then ( dataset.cumulativeUpvotes + priorWeight ) / ( (1-exp(-fatigueFactor*dataset.cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) 48 | else ( upvotesInWindow + priorWeight ) / ( 49 | -- The formula for adjusting expected upvotes for fatigue comes from the assumption that expected upvote rate decays 50 | -- exponentially: fatigueAdjustedExpectedUpvoteRate = exp(-fatigueFactor*cumulativeExpectedUpvotes). 51 | -- So fatigueAdjustedExpectedUpvotes is the total area under this curve, or the integral of 52 | -- fatigueAdjustedExpectedUpvoteRate from 0 to max(cumulativeExpectedUpvotes), which is: 53 | -- ( 1-exp(-fatigueFactor*max(cumulativeExpectedUpvotes)) ) / fatigueFactor 54 | -- But no we want the area under the curve within the moving average window, 55 | -- So we integrate from max(cumulativeExpectedUpvotes) - expectedUpvotesInWindow to max(cumulativeExpectedUpvotes), 56 | -- which gives us the below formula. 57 | 58 | ( 59 | exp(-fatigueFactor*(dataset.cumulativeExpectedUpvotes - expectedUpvotesInWindow)) 60 | -exp(-fatigueFactor*dataset.cumulativeExpectedUpvotes) 61 | )/fatigueFactor 62 | + priorWeight) 63 | end 64 | , upvoteRateWindow = newWindow 65 | from windows 66 | where windows.id = dataset.id and windows.sampleTime = dataset.sampleTime; 67 | 68 | -- select 69 | -- id 70 | -- , sampleTime 71 | -- , newWindow 72 | -- , cumulativeUpvotes 73 | -- , cumulativeExpectedUpvotes 74 | -- , upvotesInWindow 75 | -- , expectedUpvotesInWindow 76 | -- , ( upvotesInWindow + priorWeight ) / ( expectedUpvotesInWindow + priorWeight) as movingAverageUpvoteRate 77 | -- , ( cumulativeUpvotes + priorWeight ) / ( cumulativeExpectedUpvotes + priorWeight) as upvoteRate 78 | -- from windows 79 | -- where movingAverageUpvoteRate is not null 80 | -- limit 10; 81 | 82 | 83 | 84 | 85 | -- where datset.id = windows.id 86 | 87 | 88 | -- select 89 | -- id 90 | -- , newWindow 91 | -- , cumulativeUpvotes 92 | -- , cumulativeExpectedUpvotes 93 | -- , upvotesInWindow 94 | -- , expectedUpvotesInWindow 95 | -- , ( upvotesInWindow + priorWeight ) / ( expectedUpvotesInWindow + priorWeight) as movingAverageUpvoteRate 96 | -- , ( cumulativeUpvotes + priorWeight ) / ( cumulativeExpectedUpvotes + priorWeight) as upvoteRate 97 | -- from windows join parameters 98 | -- -- where movingAverageUpvoteRate is not null 99 | -- limit 10; 100 | 101 | 102 | -------------------------------------------------------------------------------- /static/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/android-chrome-192x192.png -------------------------------------------------------------------------------- /static/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/android-chrome-512x512.png -------------------------------------------------------------------------------- /static/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/apple-touch-icon.png -------------------------------------------------------------------------------- /static/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | #4a9ced 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /static/chart-646.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/chart-646.png -------------------------------------------------------------------------------- /static/expected-upvotes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/expected-upvotes.png -------------------------------------------------------------------------------- /static/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon-16x16.png -------------------------------------------------------------------------------- /static/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon-32x32.png -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon.ico -------------------------------------------------------------------------------- /static/hn-top-page-upvotes-by-rank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/hn-top-page-upvotes-by-rank.png -------------------------------------------------------------------------------- /static/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /static/mstile-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-144x144.png -------------------------------------------------------------------------------- /static/mstile-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-150x150.png -------------------------------------------------------------------------------- /static/mstile-310x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-310x150.png -------------------------------------------------------------------------------- /static/mstile-310x310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-310x310.png -------------------------------------------------------------------------------- /static/mstile-70x70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-70x70.png -------------------------------------------------------------------------------- /static/rank-history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/rank-history.png -------------------------------------------------------------------------------- /static/safari-pinned-tab.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 7 | 8 | Created by potrace 1.14, written by Peter Selinger 2001-2017 9 | 10 | 12 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /static/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Quality News", 3 | "short_name": "Quality News", 4 | "start_url": "/", 5 | "icons": [ 6 | { 7 | "src": "android-chrome-192x192.png", 8 | "sizes": "192x192", 9 | "type": "image/png" 10 | }, 11 | { 12 | "src": "android-chrome-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png" 15 | } 16 | ], 17 | "theme_color": "#ffffff", 18 | "background_color": "#ffffff", 19 | "display": "standalone" 20 | } 21 | -------------------------------------------------------------------------------- /static/upvote-rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/upvote-rate.png -------------------------------------------------------------------------------- /static/upvote-share-by-rank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/upvote-share-by-rank.png -------------------------------------------------------------------------------- /statspage.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "fmt" 8 | "html/template" 9 | "io" 10 | "net/http" 11 | "time" 12 | 13 | "github.com/pkg/errors" 14 | 15 | "github.com/johnwarden/httperror" 16 | ) 17 | 18 | type StatsPageParams struct { 19 | StoryID int `schema:"id,required"` 20 | OptionalModelParams 21 | } 22 | 23 | type StatsData struct { 24 | RanksPlotDataJSON template.JS 25 | UpvotesPlotDataJSON template.JS 26 | MaxSampleTime int 27 | } 28 | 29 | type StatsPageData struct { 30 | StatsPageParams 31 | EstimatedUpvoteRate int 32 | StoryTemplateData 33 | StatsData 34 | } 35 | 36 | func (s StatsPageData) MaxSampleTimeISOString() string { 37 | return time.Unix(int64(s.MaxSampleTime), 0).UTC().Format("2006-01-02T15:04") 38 | } 39 | 40 | func (s StatsPageData) OriginalSubmissionTimeISOString() string { 41 | return time.Unix(s.OriginalSubmissionTime, 0).UTC().Format("2006-01-02T15:04") 42 | } 43 | 44 | func (s StatsPageData) MaxAgeHours() int { 45 | return (s.MaxSampleTime - int(s.OriginalSubmissionTime)) / 3600 46 | } 47 | 48 | var ErrStoryIDNotFound = httperror.New(404, "Story ID not found") 49 | 50 | func (app app) statsPage(w io.Writer, r *http.Request, params StatsPageParams, userID sql.NullInt64) error { 51 | s, stats, err := app.loadStoryAndStats(r.Context(), params.StoryID, params.OptionalModelParams) 52 | if err != nil { 53 | return err 54 | } 55 | 56 | modelParams := params.OptionalModelParams.WithDefaults() 57 | s.UpvoteRate = modelParams.upvoteRate(s.CumulativeUpvotes, s.CumulativeExpectedUpvotes) 58 | 59 | pageTemplate := PageTemplateData{ 60 | UserID: userID, 61 | } 62 | 63 | storyTemplate := StoryTemplateData{ 64 | Story: s, 65 | PageTemplateData: pageTemplate, 66 | } 67 | 68 | d := StatsPageData{ 69 | StatsPageParams: params, 70 | EstimatedUpvoteRate: 1.0, 71 | StoryTemplateData: storyTemplate, 72 | StatsData: stats, 73 | } 74 | 75 | err = templates.ExecuteTemplate(w, "stats.html.tmpl", d) 76 | return errors.Wrap(err, "executing stats page template") 77 | } 78 | 79 | func (app app) loadStoryAndStats(ctx context.Context, storyID int, modelParams OptionalModelParams) (Story, StatsData, error) { 80 | ndb := app.ndb 81 | 82 | // Try to get story from DB first 83 | s, err := ndb.selectStoryDetails(ctx, storyID) 84 | dbRecordExists := (err == nil) 85 | isArchived := (dbRecordExists && s.Archived) 86 | 87 | // If story doesn't exist in DB or is archived, try to load from archive 88 | if !dbRecordExists || isArchived { 89 | app.logger.Info("Loading story from archive", "storyID", storyID, "dbRecordExists", dbRecordExists, "isArchived", isArchived) 90 | 91 | sc, err := NewStorageClient() 92 | if err != nil { 93 | return Story{}, StatsData{}, errors.Wrap(err, "create storage client") 94 | } 95 | 96 | // Try v2 archive first 97 | filename := fmt.Sprintf("%d.v2.json", storyID) 98 | jsonData, err := sc.DownloadFile(ctx, filename) 99 | isV2 := err == nil 100 | if err != nil { 101 | // Try legacy archive 102 | filename = fmt.Sprintf("%d.json", storyID) 103 | jsonData, err = sc.DownloadFile(ctx, filename) 104 | if err != nil { 105 | if !dbRecordExists { 106 | return Story{}, StatsData{}, ErrStoryIDNotFound 107 | } 108 | return Story{}, StatsData{}, errors.Wrapf(err, "failed to load archive file %s for story marked as archived", filename) 109 | } 110 | } 111 | 112 | var archiveData ArchiveData 113 | err = json.Unmarshal(jsonData, &archiveData) 114 | if err != nil { 115 | return Story{}, StatsData{}, errors.Wrap(err, "unmarshal archive data") 116 | } 117 | 118 | if isV2 { 119 | // Calculate AgeApprox as current time minus submission time 120 | ageApprox := time.Now().Unix() - archiveData.SubmissionTime 121 | 122 | s = Story{ 123 | ID: archiveData.ID, 124 | By: archiveData.By, 125 | Title: archiveData.Title, 126 | URL: archiveData.URL, 127 | SubmissionTime: archiveData.SubmissionTime, 128 | OriginalSubmissionTime: archiveData.OriginalSubmissionTime, 129 | AgeApprox: ageApprox, 130 | Score: archiveData.Score, 131 | Comments: archiveData.Comments, 132 | CumulativeUpvotes: archiveData.CumulativeUpvotes, 133 | CumulativeExpectedUpvotes: archiveData.CumulativeExpectedUpvotes, 134 | TopRank: archiveData.TopRank, 135 | QNRank: archiveData.QNRank, 136 | RawRank: archiveData.RawRank, 137 | Flagged: archiveData.Flagged, 138 | Dupe: archiveData.Dupe, 139 | Job: archiveData.Job, 140 | Archived: archiveData.Archived, 141 | } 142 | } else { 143 | // For legacy archives, we need story details from DB 144 | return Story{}, StatsData{}, ErrStoryIDNotFound 145 | } 146 | 147 | // Convert plot data to JSON 148 | ranksJson, err := json.Marshal(archiveData.RanksPlotData) 149 | if err != nil { 150 | return Story{}, StatsData{}, errors.Wrap(err, "marshal ranks plot data") 151 | } 152 | 153 | upvotesJson, err := json.Marshal(archiveData.UpvotesPlotData) 154 | if err != nil { 155 | return Story{}, StatsData{}, errors.Wrap(err, "marshal upvotes plot data") 156 | } 157 | 158 | stats := StatsData{ 159 | RanksPlotDataJSON: template.JS(string(ranksJson)), 160 | UpvotesPlotDataJSON: template.JS(string(upvotesJson)), 161 | MaxSampleTime: archiveData.MaxSampleTime, 162 | } 163 | 164 | return s, stats, nil 165 | } 166 | 167 | // Story is not archived, get stats from DB 168 | maxSampleTime, err := maxSampleTime(ctx, ndb, storyID) 169 | if err != nil { 170 | return Story{}, StatsData{}, errors.Wrap(err, "maxSampleTime") 171 | } 172 | 173 | ranks, err := rankDatapoints(ctx, ndb, storyID) 174 | if err != nil { 175 | return Story{}, StatsData{}, errors.Wrap(err, "rankDatapoints") 176 | } 177 | 178 | ranksJson, err := json.Marshal(ranks) 179 | if err != nil { 180 | return Story{}, StatsData{}, errors.Wrap(err, "marshal ranks plot data") 181 | } 182 | 183 | upvotes, err := upvotesDatapoints(ctx, ndb, storyID, modelParams.WithDefaults()) 184 | if err != nil { 185 | return Story{}, StatsData{}, errors.Wrap(err, "upvotesDatapoints") 186 | } 187 | 188 | upvotesJson, err := json.Marshal(upvotes) 189 | if err != nil { 190 | return Story{}, StatsData{}, errors.Wrap(err, "marshal upvotes plot data") 191 | } 192 | 193 | stats := StatsData{ 194 | RanksPlotDataJSON: template.JS(string(ranksJson)), 195 | UpvotesPlotDataJSON: template.JS(string(upvotesJson)), 196 | MaxSampleTime: maxSampleTime, 197 | } 198 | 199 | return s, stats, nil 200 | } 201 | -------------------------------------------------------------------------------- /storage.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "compress/gzip" 6 | "context" 7 | "fmt" 8 | "io" 9 | "os" 10 | 11 | minio "github.com/minio/minio-go/v7" 12 | "github.com/minio/minio-go/v7/pkg/credentials" 13 | ) 14 | 15 | type StorageClient struct { 16 | minioClient *minio.Client 17 | bucket string 18 | } 19 | 20 | func NewStorageClient() (*StorageClient, error) { 21 | endpoint := os.Getenv("R2_ENDPOINT") 22 | if endpoint == "" { 23 | return nil, fmt.Errorf("R2_ENDPOINT environment variable not set") 24 | } 25 | 26 | accessKeyID := os.Getenv("R2_ACCESS_KEY_ID") 27 | if accessKeyID == "" { 28 | return nil, fmt.Errorf("R2_ACCESS_KEY_ID environment variable not set") 29 | } 30 | 31 | secretAccessKey := os.Getenv("R2_SECRET_ACCESS_KEY") 32 | if secretAccessKey == "" { 33 | return nil, fmt.Errorf("R2_SECRET_ACCESS_KEY environment variable not set") 34 | } 35 | 36 | useSSL := os.Getenv("R2_USE_SSL") 37 | if useSSL == "" { 38 | return nil, fmt.Errorf("R2_USE_SSL environment variable not set") 39 | } 40 | 41 | bucket := os.Getenv("R2_BUCKET") 42 | if bucket == "" { 43 | return nil, fmt.Errorf("R2_BUCKET environment variable not set") 44 | } 45 | 46 | // Convert useSSL to boolean 47 | var ssl bool 48 | if useSSL == "true" || useSSL == "1" { 49 | ssl = true 50 | } else { 51 | ssl = false 52 | } 53 | 54 | // Remove "https://" or "http://" prefix from endpoint if present 55 | endpoint = trimEndpointScheme(endpoint) 56 | 57 | minioClient, err := minio.New(endpoint, &minio.Options{ 58 | Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""), 59 | Secure: ssl, 60 | Region: "auto", // For Cloudflare R2 61 | }) 62 | if err != nil { 63 | return nil, fmt.Errorf("failed to create MinIO client: %v", err) 64 | } 65 | 66 | return &StorageClient{minioClient: minioClient, bucket: bucket}, nil 67 | } 68 | 69 | // UploadFile uploads a file with the specified content type and optional compression 70 | func (sc *StorageClient) UploadFile(ctx context.Context, objectName string, content []byte, contentType string, compress bool) error { 71 | var reader *bytes.Reader 72 | var size int64 73 | 74 | if compress { 75 | // Compress the content using gzip 76 | var compressedContent bytes.Buffer 77 | gzipWriter := gzip.NewWriter(&compressedContent) 78 | _, err := gzipWriter.Write(content) 79 | if err != nil { 80 | return fmt.Errorf("failed to compress content: %v", err) 81 | } 82 | gzipWriter.Close() // Make sure to close the writer to flush the data 83 | 84 | reader = bytes.NewReader(compressedContent.Bytes()) 85 | size = int64(compressedContent.Len()) 86 | } else { 87 | reader = bytes.NewReader(content) 88 | size = int64(len(content)) 89 | } 90 | 91 | // Set appropriate options 92 | opts := minio.PutObjectOptions{ 93 | ContentType: contentType, 94 | } 95 | if compress { 96 | opts.ContentEncoding = "gzip" 97 | } 98 | 99 | // Upload the content 100 | _, err := sc.minioClient.PutObject(ctx, sc.bucket, objectName, reader, size, opts) 101 | if err != nil { 102 | return fmt.Errorf("failed to upload object %s: %v", objectName, err) 103 | } 104 | 105 | return nil 106 | } 107 | 108 | // DownloadFile downloads a file from storage and returns its content 109 | func (sc *StorageClient) DownloadFile(ctx context.Context, objectName string) ([]byte, error) { 110 | // Get the object from storage 111 | object, err := sc.minioClient.GetObject(ctx, sc.bucket, objectName, minio.GetObjectOptions{}) 112 | if err != nil { 113 | return nil, fmt.Errorf("failed to get object %s: %v", objectName, err) 114 | } 115 | defer object.Close() 116 | 117 | // Read the object content 118 | var buf bytes.Buffer 119 | _, err = buf.ReadFrom(object) 120 | if err != nil { 121 | return nil, fmt.Errorf("failed to read object %s: %v", objectName, err) 122 | } 123 | 124 | // Check if the content is compressed 125 | info, err := object.Stat() 126 | if err != nil { 127 | return nil, fmt.Errorf("failed to stat object %s: %v", objectName, err) 128 | } 129 | 130 | content := buf.Bytes() 131 | if info.Metadata.Get("Content-Encoding") == "gzip" { 132 | // Decompress the content 133 | gzipReader, err := gzip.NewReader(bytes.NewReader(content)) 134 | if err != nil { 135 | return nil, fmt.Errorf("failed to create gzip reader for object %s: %v", objectName, err) 136 | } 137 | defer gzipReader.Close() 138 | 139 | decompressedContent, err := io.ReadAll(gzipReader) 140 | if err != nil { 141 | return nil, fmt.Errorf("failed to decompress object %s: %v", objectName, err) 142 | } 143 | 144 | content = decompressedContent 145 | } 146 | 147 | return content, nil 148 | } 149 | 150 | func (sc *StorageClient) FileExists(ctx context.Context, objectName string) (bool, error) { 151 | // Attempt to get object information 152 | _, err := sc.minioClient.StatObject(ctx, sc.bucket, objectName, minio.StatObjectOptions{}) 153 | if err != nil { 154 | // If the error is because the object does not exist, return false 155 | if minio.ToErrorResponse(err).Code == "NoSuchKey" { 156 | return false, nil 157 | } 158 | // Otherwise, return the error 159 | return false, fmt.Errorf("error checking if object %s exists: %v", objectName, err) 160 | } 161 | // If no error, the object exists 162 | return true, nil 163 | } 164 | 165 | // DeleteFile deletes a file from storage 166 | func (sc *StorageClient) DeleteFile(ctx context.Context, objectName string) error { 167 | err := sc.minioClient.RemoveObject(ctx, sc.bucket, objectName, minio.RemoveObjectOptions{}) 168 | if err != nil { 169 | return fmt.Errorf("failed to delete object %s: %v", objectName, err) 170 | } 171 | return nil 172 | } 173 | 174 | // Helper function to trim scheme from endpoint 175 | func trimEndpointScheme(endpoint string) string { 176 | if len(endpoint) >= 8 && endpoint[:8] == "https://" { 177 | return endpoint[8:] 178 | } 179 | if len(endpoint) >= 7 && endpoint[:7] == "http://" { 180 | return endpoint[7:] 181 | } 182 | return endpoint 183 | } 184 | -------------------------------------------------------------------------------- /story-details.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "net/url" 7 | "strings" 8 | "time" 9 | 10 | "github.com/weppos/publicsuffix-go/publicsuffix" 11 | 12 | humanize "github.com/dustin/go-humanize" 13 | ) 14 | 15 | type Story struct { 16 | ID int 17 | By string 18 | Title string 19 | URL string 20 | SubmissionTime int64 21 | OriginalSubmissionTime int64 22 | AgeApprox int64 23 | Score int 24 | Comments int 25 | CumulativeUpvotes int 26 | CumulativeExpectedUpvotes float64 27 | UpvoteRate float64 28 | TopRank sql.NullInt32 29 | QNRank sql.NullInt32 30 | RawRank sql.NullInt32 31 | Job bool 32 | Flagged bool 33 | Dupe bool 34 | Archived bool 35 | } 36 | 37 | // PageTemplateData contains the common template data for all pages 38 | type PageTemplateData struct { 39 | Ranking string 40 | UserID sql.NullInt64 41 | } 42 | 43 | // StoryTemplateData combines a Story with page context for use in templates 44 | type StoryTemplateData struct { 45 | Story // embed Story instead of having it as a named field 46 | PageTemplateData 47 | } 48 | 49 | // Page-specific methods for ranking-based pages 50 | func (p PageTemplateData) IsHNTopPage() bool { 51 | return p.Ranking == "hntop" 52 | } 53 | 54 | func (p PageTemplateData) IsFairPage() bool { 55 | return p.Ranking == "fair" 56 | } 57 | 58 | func (p PageTemplateData) IsUpvoteratePage() bool { 59 | return p.Ranking == "upvoterate" 60 | } 61 | 62 | func (p PageTemplateData) IsBestUpvoteratePage() bool { 63 | return p.Ranking == "best-upvoterate" 64 | } 65 | 66 | func (p PageTemplateData) IsNewPage() bool { 67 | return p.Ranking == "new" 68 | } 69 | 70 | func (p PageTemplateData) IsBestPage() bool { 71 | return p.Ranking == "best" 72 | } 73 | 74 | func (p PageTemplateData) IsAskPage() bool { 75 | return p.Ranking == "ask" 76 | } 77 | 78 | func (p PageTemplateData) IsShowPage() bool { 79 | return p.Ranking == "show" 80 | } 81 | 82 | func (p PageTemplateData) IsRawPage() bool { 83 | return p.Ranking == "raw" 84 | } 85 | 86 | func (p PageTemplateData) IsPenaltiesPage() bool { 87 | return p.Ranking == "penalties" 88 | } 89 | 90 | func (p PageTemplateData) IsBoostsPage() bool { 91 | return p.Ranking == "boosts" 92 | } 93 | 94 | func (p PageTemplateData) IsResubmissionsPage() bool { 95 | return p.Ranking == "resubmissions" 96 | } 97 | 98 | // Default implementations for non-ranking based pages 99 | func (p PageTemplateData) IsAboutPage() bool { 100 | return false 101 | } 102 | 103 | func (p PageTemplateData) IsAlgorithmsPage() bool { 104 | return false 105 | } 106 | 107 | func (p PageTemplateData) IsScorePage() bool { 108 | return false 109 | } 110 | 111 | func (p PageTemplateData) IsStatsPage() bool { 112 | return false 113 | } 114 | 115 | func (p PageTemplateData) IsAlternativeFrontPage() bool { 116 | return p.IsHNTopPage() || p.IsRawPage() || p.IsPenaltiesPage() || p.IsBoostsPage() || p.IsResubmissionsPage() || p.IsFairPage() || p.IsUpvoteratePage() || p.IsBestUpvoteratePage() || p.IsNewPage() || p.IsBestPage() || p.IsAskPage() || p.IsShowPage() 117 | } 118 | 119 | func (s Story) AgeString() string { 120 | // return humanize.Time(time.Unix(int64(time.Now().Unix()-s.AgeApprox), 0)) 121 | return humanize.Time(time.Unix(int64(time.Now().Unix()-s.AgeApprox), 0)) 122 | } 123 | 124 | func (s Story) OriginalAgeString() string { 125 | return humanize.Time(time.Unix(s.OriginalSubmissionTime, 0)) 126 | } 127 | 128 | func (s Story) IsResubmitted() bool { 129 | return s.SubmissionTime != s.OriginalSubmissionTime 130 | } 131 | 132 | func (s Story) UpvoteRateString() string { 133 | return fmt.Sprintf("%.2f", s.UpvoteRate) 134 | } 135 | 136 | func (s Story) RankDiff() int32 { 137 | if !s.RawRank.Valid { 138 | return 0 139 | } 140 | rawRank := s.RawRank.Int32 141 | topRank := s.TopRank.Int32 142 | 143 | if !s.TopRank.Valid { 144 | topRank = 91 145 | } 146 | 147 | return rawRank - topRank 148 | } 149 | 150 | func abs(a int32) int32 { 151 | if a >= 0 { 152 | return a 153 | } 154 | return -a 155 | } 156 | 157 | func (s Story) RankDiffAbs() int32 { 158 | return abs(s.RankDiff()) 159 | } 160 | 161 | func (s Story) OverRanked() bool { 162 | return s.RankDiff() > 0 163 | } 164 | 165 | func (s Story) UnderRanked() bool { 166 | return s.RankDiff() < 0 167 | } 168 | 169 | func (s Story) Domain() string { 170 | u, err := url.Parse(s.URL) 171 | if err != nil { 172 | return "" 173 | } 174 | 175 | domain, err := publicsuffix.Domain(u.Host) 176 | if err != nil { 177 | return "" 178 | } 179 | 180 | // some domains are treated specially: 181 | 182 | // twitter.com/x 183 | // github.com/x 184 | // x.substack.com 185 | // x.notion.site 186 | // x.dreamhosters.com 187 | 188 | if u.Host == "news.ycombinator.com" { 189 | return "" 190 | } 191 | if domain == "twitter.com" || domain == "github.com" { 192 | // keep first part of path 193 | return domain + "/" + strings.Split(u.Path, "/")[1] 194 | } 195 | 196 | if domain == "substack.com" || domain == "notion.site" || domain == "dreamhosters.com" { 197 | // keep subdomain 198 | return strings.Split(u.Host, ".")[0] + "." + domain 199 | } 200 | 201 | return domain 202 | } 203 | 204 | func (s Story) ISOTimestamp() string { 205 | return time.Unix(s.SubmissionTime, 0).UTC().Format("2006-01-02T15:04:05") 206 | } 207 | 208 | func (s Story) OriginalISOTimestamp() string { 209 | return time.Unix(s.OriginalSubmissionTime, 0).UTC().Format("2006-01-02T15:04:05") 210 | } 211 | -------------------------------------------------------------------------------- /storyplot-data.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | 7 | "github.com/pkg/errors" 8 | ) 9 | 10 | func maxSampleTime(ctx context.Context, ndb newsDatabase, storyID int) (int, error) { 11 | var n int 12 | err := ndb.db.QueryRowContext(ctx, ` 13 | select max(sampleTime) from dataset 14 | where id = ? 15 | `, storyID).Scan(&n) 16 | 17 | return n, errors.Wrap(err, "QueryRow count: select max(sampleTime)") 18 | } 19 | 20 | func rankDatapoints(ctx context.Context, ndb newsDatabase, storyID int) ([][]any, error) { 21 | var n int 22 | if err := ndb.db.QueryRowContext(ctx, "select count(*) from dataset where id = ?", storyID).Scan(&n); err != nil { 23 | return nil, errors.Wrap(err, "QueryRow: select count") 24 | } 25 | 26 | if n == 0 { 27 | return nil, ErrStoryIDNotFound 28 | } 29 | 30 | var submissionTime int64 31 | if err := ndb.db.QueryRowContext(ctx, "select timestamp from stories where id = ?", storyID).Scan(&submissionTime); err != nil { 32 | return nil, errors.Wrap(err, "QueryRow: select submissionTime") 33 | } 34 | 35 | ranks := make([][]any, n) 36 | 37 | rows, err := ndb.db.QueryContext(ctx, "select sampleTime, rawRank, topRank, newRank, bestRank, askRank, showRank from dataset where id = ?", storyID) 38 | if err != nil { 39 | return nil, errors.Wrap(err, "Query: select ranks") 40 | } 41 | defer rows.Close() 42 | 43 | // rawRank, top, new, bet, ask, show 44 | const nRanks = 6 45 | 46 | i := 0 47 | for rows.Next() { 48 | var sampleTime int64 49 | 50 | var nullableRanks [nRanks]sql.NullInt32 51 | 52 | err = rows.Scan(&sampleTime, &nullableRanks[0], &nullableRanks[1], &nullableRanks[2], &nullableRanks[3], &nullableRanks[4], &nullableRanks[5]) 53 | 54 | if err != nil { 55 | return nil, errors.Wrap(err, "rows.Scan") 56 | } 57 | 58 | ranks[i] = make([]any, nRanks+1) 59 | ranks[i][0] = sampleTime 60 | 61 | for j, rank := range nullableRanks { 62 | if rank.Valid { 63 | ranks[i][j+1] = rank.Int32 64 | } else { 65 | ranks[i][j+1] = 91 66 | } 67 | } 68 | 69 | i++ 70 | } 71 | 72 | err = rows.Err() 73 | 74 | return ranks, errors.Wrap(err, "rows.Err") 75 | } 76 | 77 | func upvotesDatapoints(ctx context.Context, ndb newsDatabase, storyID int, modelParams ModelParams) ([][]any, error) { 78 | var n int 79 | if err := ndb.db.QueryRowContext(ctx, "select count(*) from dataset where id = ?", storyID).Scan(&n); err != nil { 80 | return nil, errors.Wrap(err, "QueryRow: select count") 81 | } 82 | 83 | if n == 0 { 84 | return nil, ErrStoryIDNotFound 85 | } 86 | 87 | var submissionTime int64 88 | if err := ndb.db.QueryRowContext(ctx, "select timestamp from stories where id = ?", storyID).Scan(&submissionTime); err != nil { 89 | return nil, errors.Wrap(err, "QueryRow: select submissionTime") 90 | } 91 | 92 | upvotesData := make([][]any, n) 93 | 94 | rows, err := ndb.db.QueryContext(ctx, `select sampleTime, cumulativeUpvotes, cumulativeExpectedUpvotes 95 | from dataset where id = ?`, storyID) 96 | if err != nil { 97 | return nil, errors.Wrap(err, "Query: select upvotes") 98 | } 99 | defer rows.Close() 100 | 101 | i := 0 102 | for rows.Next() { 103 | var sampleTime int64 104 | var upvotes int 105 | var expectedUpvotes float64 106 | 107 | err = rows.Scan(&sampleTime, &upvotes, &expectedUpvotes) 108 | 109 | if err != nil { 110 | return nil, errors.Wrap(err, "rows.Scan") 111 | } 112 | 113 | upvotesData[i] = []any{ 114 | sampleTime, 115 | int32(upvotes), 116 | expectedUpvotes, 117 | modelParams.upvoteRate(upvotes, expectedUpvotes), 118 | } 119 | i++ 120 | } 121 | 122 | err = rows.Err() 123 | 124 | return upvotesData, errors.Wrap(err, "rows.Err") 125 | } 126 | -------------------------------------------------------------------------------- /templates.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "html/template" 6 | "io/fs" 7 | 8 | "github.com/pkg/errors" 9 | // some templates functions we might use 10 | // "github.com/Masterminds/sprig/v3" 11 | ) 12 | 13 | var templates = template.Must(ParseFSStrict(resources, "templates")) 14 | 15 | // ParseFSStrict works like template.ParseFS, but is more strict: 16 | // - each template will be given the same name as the file it is defined in 17 | // - each filename can contain only one template and may not {{define}} subtemplates 18 | // - filenames must end in .tmpl 19 | // 20 | // This approach eliminates the possibility of inconsistency between the names 21 | // of templates and the names of template files, reducing decision overhead 22 | // and opportunities for surprises for developers. It also eliminates the 23 | // possibility of two templates accidentally being given the same name, which 24 | // will result in one template being overwritten by the other and can create 25 | // surprising bugs (this was the immediate motivation for creating this 26 | // function). 27 | // 28 | // The returned template's name will have the base name and parsed contents of 29 | // the first file. There must be at least one file. If an error occurs, 30 | // parsing stops and the returned *Template is nil. 31 | // 32 | // Templates in subdirectories of the provided directory will be parsed. The 33 | // names of templates in subdirectories will be prefixed with the name 34 | // of subdirectory (e.g. "charts/chart1.html.tmpl") 35 | // 36 | // TODO: submit pull request to add this to the html/template library. 37 | 38 | func ParseFSStrict(resources fs.FS, dir string) (*template.Template, error) { 39 | var ts *template.Template 40 | 41 | templateFiles, err := fs.ReadDir(resources, dir) 42 | if err != nil { 43 | return ts, errors.Wrapf(err, "fs.ReadDir(%s)", dir) 44 | } 45 | 46 | for _, dirEntry := range templateFiles { 47 | if dirEntry.IsDir() { 48 | subDirName := dir + "/" + dirEntry.Name() 49 | subTemplates, err := ParseFSStrict(resources, subDirName) 50 | if err != nil { 51 | return ts, errors.Wrapf(err, "fs.ReadDir(%s)", subDirName) 52 | } 53 | 54 | for _, t := range subTemplates.Templates() { 55 | fileName := dirEntry.Name() + "/" + t.Name() 56 | if ts == nil { 57 | ts = t 58 | } 59 | _, err := ts.AddParseTree(fileName, t.Tree) 60 | if err != nil { 61 | return ts, errors.Wrapf(err, "ts.AddParseTree(%s)", fileName) 62 | } 63 | } 64 | 65 | continue 66 | } 67 | fileName := dirEntry.Name() 68 | 69 | // use this to add sprig functions. 70 | // t, err := template.New(fileName).Funcs(sprig.FuncMap()).ParseFS(resources, dir+"/"+fileName) 71 | t, err := template.New(fileName).ParseFS(resources, dir+"/"+fileName) 72 | if err != nil { 73 | return ts, errors.Wrapf(err, "parsing template %s", dir+"/"+fileName) 74 | } 75 | 76 | for _, t := range t.Templates() { 77 | if t.Name() != fileName { 78 | return ts, fmt.Errorf(`{{define "%v"}} in file %v not allowed when using ParseFSStrict. Each template file must contain one template whose name will be equal to the filename.`, t.Name(), fileName) 79 | } 80 | } 81 | if ts == nil { 82 | ts = t 83 | } 84 | 85 | _, err = ts.AddParseTree(fileName, t.Tree) 86 | if err != nil { 87 | return ts, errors.Wrapf(err, "ts.AddParseTree(%s)", fileName) 88 | } 89 | } 90 | 91 | if ts == nil { 92 | return ts, fmt.Errorf("No template files found in directory %s", dir) 93 | } 94 | return ts, nil 95 | } 96 | -------------------------------------------------------------------------------- /templates/about-content.html.tmpl: -------------------------------------------------------------------------------- 1 |

Quality News is a Hacker News client with:

2 |
    3 |
  1. additional story performance stats shown below each story
  2. 4 |
  3. detailed historical charts (click on the story's ×upvoteRate stats)
  4. 5 |
  5. additional ranking algorithms
  6. 6 | 7 |
8 | 9 |

For more details, see the Readme on 10 | GitHub.

11 | 12 |

This is a collective intelligence experiment by Social Protocols. Follow us on Mastodon or Twitter, or send a mail.

13 | 14 | 15 |

Definition

16 | 17 |

Upvote Rate

18 |

The ×upvoteRate quantifies how much more or less 19 | likely users are to upvote this story compared to the average story. It 20 | is calculated as the story's total upvotes divided by total 21 | expected upvotes.

22 | 23 |

Expected Upvotes

24 |

The expected upvotes for a story is an estimate of 25 | the number of upvotes the average story would have 26 | received if it were shown at the same times at the same ranks.

27 | 28 |

Raw Rank

29 |

The raw rank is the rank that a story would have 30 | according to the "raw" Hacker News ranking formula:

31 |
upvotes^0.8 / (ageHours+2)^1.8
32 | 33 |

This formula produces a certain ranking that you can see on the (raw page). 34 | 35 | But the HN ranking is further influenced by moderator actions, user flags, and other factors which boost or penalize stories. 36 | 37 |

38 | 39 | 40 |

Rank Delta

41 |

The delta between the raw rank and front page rank. An over-ranked page may have received a boost by HN moderators, while an 42 | under-ranked page may have received a penalty. 43 |

44 | 45 | 46 | 47 | 48 | {{/* 49 |

Rank Delta

50 |

The rank delta is the difference between the story's 51 | actual rank and it's raw rank (described above).

52 | 53 |

54 | A value of +1 means that a story is ranked 1 position higher on the front page than if it were ranked using the raw formula only. 55 |

56 | */}} 57 | 58 | {{/*

Second-Chance Age

59 |

The second-chance age is the story's revised age 60 | after being re-posted from the the second-chance 62 | queue.

*/}} 63 | 64 | -------------------------------------------------------------------------------- /templates/about.html.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 29 | 30 | 31 | 32 | About Quality News 33 | 34 | 35 | 36 | {{template "header.html.tmpl" .}} 37 | 38 |
39 | 40 | {{if .IsAboutPage}} 41 | {{template "about-content.html.tmpl" .}} 42 | {{end}} 43 | 44 | {{if .IsAlgorithmsPage}} 45 | {{template "algorithms-content.html.tmpl" .}} 46 | {{end}} 47 | 48 |
49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /templates/algorithms-content.html.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Ranking Algorithms

4 |

5 | 6 |

22 |

23 | 24 | 25 | -------------------------------------------------------------------------------- /templates/header.html.tmpl: -------------------------------------------------------------------------------- 1 |
2 | 3 | Quality News 4 |
5 | 6 | new | 7 | top | 8 | ask | 9 | show | 10 | best | 11 | 12 | {{if .IsRawPage}}raw |{{end}} 13 | {{if .IsFairPage}}fair |{{end}} 14 | 15 | {{if .IsUpvoteratePage}}upvoterate |{{end}} 16 | {{if .IsBestUpvoteratePage}}best-upvoterate |{{end}} 17 | 18 | {{if .IsPenaltiesPage}}penalties |{{end}} 19 | {{if .IsBoostsPage}}boosts |{{end}} 20 | {{if .IsResubmissionsPage}}resubmissions |{{end}} 21 | 22 | algorithms | 23 | 24 | {{ if .UserID.Valid }} score | {{ end }} 25 | about 26 |
27 | 28 |
29 | {{if .UserID.Valid}}Hello User {{.UserID.Int64}}{{end}} 30 | 31 | 32 | 33 |
34 | 35 |
-------------------------------------------------------------------------------- /templates/index.html.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 24 | 25 | 26 | 27 | 33 | 34 | Quality News: Hacker News Rankings 35 | 36 | 37 | 38 | {{template "header.html.tmpl" .}} 39 | 40 | 41 |
42 | {{if .IsRawPage}} 43 | 44 | These are the current stories on the Hacker News Front page ranked by the raw score produced by the HN formula, with no moderation penalties or bonuses applied. This makes the impact of moderation on the HN frontpage visible (e.g. off-topic/non-tech stories are ranked higher). 45 | 46 | 47 | {{else if .IsFairPage}} 48 | 49 | This is an alternative Hacker News front page with a "fairer" ranking formula as described here. 50 | 51 | {{else if .IsUpvoteratePage}} 52 | 53 | This is an alternative Hacker News front page based on ×UpvoteRate (?) instead of upvotes. Ignores HN moderator boosts/penalties. 54 | 55 | {{else if .IsBestUpvoteratePage}} 56 | 57 | This page ranks Hacker News stories based on all-time highest ×UpvoteRate (?). 58 | 59 | {{else if .IsPenaltiesPage}} 60 | 61 | This page shows stories whose rank on the Hacker News front page is significantly lower than their raw rank, indicating that they have been penalized by Hacker News moderators. See this blog post for a discussion of how Hacker News applies penalties. 62 | 63 | {{else if .IsBoostsPage}} 64 | 65 | This page shows stories whose rank on the Hacker News front page is significantly lower than their raw rank, indicating action by Hacker News moderators such as addition to the second-chance pool. 66 | 67 | {{else if .IsResubmissionsPage}} 68 | 69 | This page shows stories that have been randomly selected from the second-chance pool and added to the front page. Sorted by most recent. 70 | 71 | {{else}} 72 | 73 | This is the current Hacker News 74 | 75 | {{if .IsHNTopPage}} 76 | Front 77 | {{end}} 78 | {{if .IsNewPage}} 79 | "New". 80 | {{end}} 81 | {{if .IsAskPage}} 82 | "Ask HN". 83 | {{end}} 84 | {{if .IsShowPage}} 85 | "Show HN" 86 | {{end}} 87 | {{if .IsBestPage}} 88 | "Best" Page. 89 | {{end}} Page, with some additional performance stats. 90 | 91 | {{end}} 92 | 93 | Click on the colorful stats below each story to see detailed historical charts. Click here for additional ranking algorithms 94 | 95 |
96 | 97 |
key: 98 | ×UpvoteRate (?) 99 | {{/*if (or .IsHNTopPage .IsPenaltyOrBoostPage)}}  +/- rank delta (?){{end*/}} 100 | 101 | {{if (not .IsHNTopPage)}} 102 |   #rank on front page 103 | {{end}} 104 | 105 |   rankDelta (?) 106 | 107 | {{/*  original 2nd-chance age (?)*/}} 108 |
109 | 110 | 111 |
    112 | {{range .Stories}} 113 |
  1. 114 | {{template "storyDetails.html.tmpl" .}} 115 |
  2. 116 | {{end}} 117 |
118 | 119 | {{/* 120 |
121 |

stats

122 | 127 |
128 | 129 | 130 |
131 |

parameters

132 | 142 |
143 | */}} 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /templates/normalize.css.tmpl: -------------------------------------------------------------------------------- 1 | /* Normalize.css 2 | ----------------------------------------------- */ 3 | 4 | article,aside,details,figcaption,figure,footer,header,hgroup,nav,section,summary{display:block;}audio,canvas,video{display:inline-block;*display:inline;*zoom:1;}audio:not([controls]){display:none;height:0;}[hidden]{display:none;}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;}html,button,input,select,textarea{font-family:sans-serif;}body{margin:0;}a:focus{outline:thin dotted;}a:active,a:hover{outline:0;}h1{font-size:2em;margin:0.67em 0;}h2{font-size:1.5em;margin:0.83em 0;}h3{font-size:1.17em;margin:1em 0;}h4{font-size:1em;margin:1.33em 0;}h5{font-size:0.83em;margin:1.67em 0;}h6{font-size:0.75em;margin:2.33em 0;}abbr[title]{border-bottom:1px dotted;}b,strong{font-weight:bold;}blockquote{margin:1em 40px;}dfn{font-style:italic;}mark{background:#ff0;color:#000;}p,pre{margin:1em 0;}code,kbd,pre,samp{font-family:monospace,serif;_font-family:'courier new',monospace;font-size:1em;}pre{white-space:pre;white-space:pre-wrap;word-wrap:break-word;}q{quotes:none;}q:before,q:after{content:'';content:none;}small{font-size:75%;}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline;}sup{top:-0.5em;}sub{bottom:-0.25em;}dl,menu,ol,ul{margin:1em 0;}dd{margin:0 0 0 40px;}menu,ol,ul{padding:0 0 0 40px;}nav ul,nav ol{list-style:none;list-style-image:none;}img{border:0;-ms-interpolation-mode:bicubic;}svg:not(:root){overflow:hidden;}figure{margin:0;}form{margin:0;}fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:0.35em 0.625em 0.75em;}legend{border:0;padding:0;white-space:normal;*margin-left:-7px;}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;}button,input{line-height:normal;}button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;*overflow:visible;}button[disabled],input[disabled]{cursor:default;}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*height:13px;*width:13px;}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box;}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none;}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0;}textarea{overflow:auto;vertical-align:top;}table{border-collapse:collapse;border-spacing:0;} 5 | -------------------------------------------------------------------------------- /templates/ranksPlot.js.tmpl: -------------------------------------------------------------------------------- 1 | 2 | function prepareRanksPlotData(dataPoints, submissionTime, endTime) { 3 | 4 | var length 5 | for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 6 | length = i+1 7 | } 8 | 9 | var results = [] 10 | 11 | // plot only age, qntop, hntop, new, and best 12 | // so 5 columns of data (x axis plus 4 ranks) 13 | var n = 5 14 | var lastValue = [null,null,null,null,null] 15 | for (var i = 0; i < length; i++) { 16 | 17 | var p = dataPoints[i].slice(0, n) 18 | 19 | // convert timestamp to age in hours 20 | p[0] = (p[0] - submissionTime)/3600 21 | 22 | // only plot a single point when a line leaves/exits the chart from below rank 91 23 | for (var j = 1; j < n; j++) { 24 | var lastValueIsOffChart = ( lastValue[j] == 91 || lastValue[j] == null ) 25 | var nextValueIsOnChart = ( i+1 < length && dataPoints[i+1][j] != null && dataPoints[i+1][j] != 91) 26 | 27 | if ( p[j] == 91 && (lastValueIsOffChart && !nextValueIsOnChart) ) { 28 | p[j] = null 29 | } else { 30 | lastValue[j] = p[j] 31 | } 32 | } 33 | results[i] = p 34 | } 35 | return results 36 | } 37 | 38 | function ranksPlot(dataPoints, submissionTime, startTime, endTime) { 39 | var plotDiv = document.getElementById('ranks_plot_div') 40 | 41 | var data = new google.visualization.DataTable(); 42 | data.addColumn('number', 'Age'); 43 | // data.addColumn('number', 'QN Rank'); 44 | data.addColumn('number', 'Raw Rank'); 45 | data.addColumn('number', '"Top" Rank'); 46 | data.addColumn('number', '"New" Rank'); 47 | data.addColumn('number', '"Best" Rank'); 48 | 49 | data.addRows(prepareRanksPlotData(dataPoints, submissionTime, endTime)); 50 | 51 | var ageFormatter = new ageFormat(); 52 | 53 | ageFormatter.format(data, 0); 54 | 55 | var rankFormatter = new rankFormat() 56 | rankFormatter.format(data, 1); 57 | rankFormatter.format(data, 2); 58 | rankFormatter.format(data, 3); 59 | rankFormatter.format(data, 4); 60 | 61 | 62 | // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options 63 | var options = { 64 | backgroundColor: {fill: 'transparent'}, 65 | dataOpacity: 0.85, 66 | hAxis: { 67 | title: 'Age [hours]', 68 | logScale: false, 69 | viewWindow: { 70 | min: (startTime-submissionTime)/3600, 71 | max: (endTime-submissionTime)/3600, 72 | } 73 | }, 74 | vAxis: { 75 | title: 'Rank', 76 | logScale: true, 77 | direction: -1, 78 | viewWindow: { 79 | max: 1, 80 | min: 91 81 | }, 82 | ticks: [1,2,4,8,16,32,64,{v: 91, f: "> 90"}], 83 | }, 84 | interpolateNulls: false, 85 | series: { 86 | 0: {pointShape: 'diamond', pointSize: 5, interpolateNulls: false}, 87 | 1: {pointShape: 'circle', pointSize: 3, interpolateNulls: false}, 88 | 2: {pointShape: 'square', pointSize: 3, interpolateNulls: false}, 89 | 3: {pointShape: 'square', pointSize: 3, interpolateNulls: false}, 90 | // 4: {pointShape: 'square', pointSize: 0}, 91 | // 5: {pointShape: 'square', pointSize: 0} 92 | }, 93 | 94 | lineDashStyle: [1,1], 95 | lineWidth: 1, 96 | colors: ['black', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"], 97 | // colors: ['#0089F4', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"], 98 | chartArea:{left:80, top:50, bottom: 80, right: 80}, 99 | height: 350, 100 | legend: { position: 'bottom' }, 101 | crosshair: { trigger: 'both' }, 102 | title: "Rank", 103 | }; 104 | 105 | var chart = new google.visualization.LineChart(plotDiv); 106 | 107 | chart.draw(data, options); 108 | } 109 | -------------------------------------------------------------------------------- /templates/score.html.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 74 | 75 | 76 | 79 | 80 | 81 | 82 | 83 | 102 | 103 | 104 | 105 | Quality News: User Scoreboard 106 | 107 | 108 | 109 | {{template "header.html.tmpl" .}} 110 | 111 | 112 |
113 | 114 |
115 |

Score History. Current Score: {{.ScoreString}}. Average score: {{.AverageScoreString}}

116 | 117 |
118 |
119 | 120 | 121 |
122 |

Vote History

123 |
124 | 125 |
126 | 127 | 128 |
129 | 130 | 219 | 220 |
221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /templates/scorePlot.js.tmpl: -------------------------------------------------------------------------------- 1 | 2 | function prepareScorePlotData(dataPoints, startTime, endTime) { 3 | 4 | var length 5 | for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 6 | length = i+1 7 | } 8 | 9 | var results = [] 10 | 11 | for (var i = 0; i < length; i++) { 12 | var p = dataPoints[i] 13 | var date = new Date(p[0]*1000) 14 | 15 | var userScore = p[4] 16 | var scoreClass = "" 17 | 18 | if (userScore > 0) { 19 | scoreClass = "gain" 20 | } else if (userScore < 0) { 21 | scoreClass = "loss" 22 | } 23 | 24 | 25 | var direction = p[5] 26 | var voteType = "upvote" 27 | if (direction == -1) { 28 | voteType = "downvote" 29 | } 30 | 31 | var upvoteRateString = p[7] 32 | var entryUpvoteRateString = p[6] 33 | 34 | // , p.EntryUpvoteRateString(), p.CurrentUpvoteRateString(), p.ExitUpvoteRateString() 35 | 36 | // p[1] rounded to two decimal places 37 | results[i] = [i, p[1], p[2], 38 | "\n" + p[3] + "\n" 39 | + "×" + upvoteRateString + "\n" 40 | 41 | + "\n" 42 | + "" + entryUpvoteRateString + "\n" 43 | + "" + userScore + "\n" 44 | + "\n" 45 | + "Total: " + Math.round(p[1] * 100) / 100 46 | 47 | ] 48 | } 49 | return results 50 | } 51 | 52 | var chart 53 | function scorePlot(dataPoints, startTime, endTime) { 54 | var plotDiv = document.getElementById('score_plot_div') 55 | 56 | var data = new google.visualization.DataTable(); 57 | data.addColumn('number', 'i'); 58 | // data.addColumn('number', 'QN Rank'); 59 | data.addColumn('number', 'Score'); 60 | // data.addColumn('string', 'Position ID'); 61 | data.addColumn({type: 'string', role: 'annotationText'}); 62 | data.addColumn({type: 'string', role: 'tooltip', 'p': {'html': true}}); 63 | // data.addColumn('string', 'Story Title'); 64 | 65 | 66 | dataPoints = prepareScorePlotData(dataPoints, startTime, endTime) 67 | 68 | data.addRows(dataPoints); 69 | 70 | // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options 71 | var options = { 72 | isStacked:true, 73 | series: { 74 | 0: { 75 | areaOpacity: 1, 76 | color: '#EF9A9A', 77 | type: 'area', 78 | visibleInLegend: false 79 | } 80 | }, 81 | backgroundColor: {fill: 'transparent'}, 82 | dataOpacity: 0.85, 83 | // hAxis: { 84 | // title: 'Date', 85 | // logScale: false, 86 | // // viewWindow: { 87 | // // min: (startTime-submissionTime)/3600, 88 | // // max: (endTime-submissionTime)/3600, 89 | // // } 90 | // }, 91 | vAxis: { 92 | title: 'Score', 93 | // logScale: true, 94 | // direction: -1, 95 | // viewWindow: { 96 | // max: 1, 97 | // min: 91 98 | // }, 99 | // ticks: [1,2,4,8,16,32,64,{v: 91, f: "> 90"}], 100 | }, 101 | 102 | 103 | annotations: { 104 | textStyle: { 105 | color: 'black', 106 | fontSize: 11, 107 | }, 108 | alwaysOutside: true, 109 | style: "point", 110 | }, 111 | // interpolateNulls: false, 112 | // series: { 113 | // 0: {pointShape: 'diamond', pointSize: 5, interpolateNulls: false}, 114 | // 1: {pointShape: 'circle', pointSize: 3, interpolateNulls: false}, 115 | // 2: {pointShape: 'square', pointSize: 3, interpolateNulls: false}, 116 | // 3: {pointShape: 'square', pointSize: 3, interpolateNulls: false}, 117 | // // 4: {pointShape: 'square', pointSize: 0}, 118 | // // 5: {pointShape: 'square', pointSize: 0} 119 | // }, 120 | 121 | // lineDashStyle: [1,1], 122 | // lineWidth: 1, 123 | // colors: ['black', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"], 124 | // colors: ['#0089F4', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"], 125 | chartArea:{left:60, top:20, bottom: 20, right: 0}, 126 | height: 275, 127 | width: 800, 128 | legend: { position: 'bottom' }, 129 | crosshair: { trigger: 'both' }, 130 | 131 | tooltip: {isHtml: true}, 132 | 133 | 134 | title: "Score", 135 | 136 | 137 | // annotationsWidth: 0, 138 | // displayAnnotationsFilter: false, 139 | // fill: 50, 140 | // allowHtml: true, 141 | }; 142 | 143 | var chart = new google.visualization.LineChart(plotDiv); 144 | // chart = new google.visualization.AnnotationChart(plotDiv); 145 | 146 | // google.visualization.events.addListener(chart, 'rangechange', rangechange_handler); 147 | google.visualization.events.addListener(chart, 'select', select_handler); 148 | 149 | 150 | chart.draw(data, options); 151 | 152 | 153 | function select_handler(e) { 154 | console.log("In select handler", e) 155 | console.log(chart.getSelection()) 156 | var n = chart.getSelection()[0].row 157 | window.location.hash = '#position-' + scorePlotData[n][2] 158 | 159 | // zoomIn(n) 160 | } 161 | 162 | } 163 | 164 | function clickPositionLabel(i) { 165 | zoomIn(scorePlotData.length-i-1) 166 | } 167 | 168 | function zoomIn(i) { 169 | var mid = i - 1 170 | var lower = i - 1 171 | var upper = i + 1 172 | 173 | if (lower < 0) { 174 | lower = 0 175 | } 176 | if (upper > scorePlotData.length-1) { 177 | upper = scorePlotData.length-1 178 | } 179 | 180 | var lowerTime = scorePlotData[lower][0] 181 | var midTime = scorePlotData[i][0] 182 | var upperTime = scorePlotData[upper][0] 183 | 184 | var windowLength = 3600 185 | if( (upperTime - midTime) > (midTime - lowerTime) ) { 186 | // if( (upperTime - midTime) > 3600 ) { 187 | windowLength = (upperTime - midTime) 188 | // } 189 | } else { 190 | windowLength = (midTime - lowerTime) 191 | } 192 | upperTime = midTime + windowLength 193 | lowerTime = midTime - windowLength 194 | 195 | position = scorePlotData[i] 196 | chart.setVisibleChartRange(new Date(lowerTime*1000), new Date(upperTime*1000)) 197 | } 198 | 199 | 200 | -------------------------------------------------------------------------------- /templates/spinner.css.tmpl: -------------------------------------------------------------------------------- 1 | .spinner { 2 | margin: 50px; 3 | display: block; 4 | width: 24px; 5 | height: 24px; 6 | border-color: currentColor; 7 | border-style: solid; 8 | border-radius: 99999px; 9 | border-width: 2px; 10 | border-left-color: transparent; 11 | color: palevioletred; 12 | opacity: 0; 13 | animation-name: rotate, fadeIn; 14 | animation-duration: 450ms, 600ms; 15 | animation-timing-function: linear, ease; 16 | animation-iteration-count: infinite, 1; 17 | animation-delay: 400ms; 18 | animation-fill-mode: forwards; 19 | } 20 | 21 | @keyframes rotate { 22 | 0% { 23 | transform: rotate(0deg); 24 | } 25 | 100% { 26 | transform: rotate(360deg); 27 | } 28 | } 29 | 30 | @keyframes fadeIn { 31 | 0% { 32 | opacity: 0; 33 | } 34 | 100% { 35 | opacity: 1; 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /templates/stats.html.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 30 | 31 | 32 | 35 | 36 | 37 | 38 | 43 | 44 | 45 | 46 | 47 | {{.Title}} | Hacker News Story Stats 48 | 49 | 50 | 51 | 52 | 53 | 54 | {{template "header.html.tmpl" .}} 55 | 56 |
57 | 58 |
59 | 60 | {{template "storyDetails.html.tmpl" .StoryTemplateData}} 61 | 62 |
63 |

Story Stats

64 | 65 | 66 | 68 | 69 | 83 | 84 | 85 | 97 | 98 | 99 |
100 | 101 |
102 | 103 |
104 |
105 | This chart shows the history of this story's rank on the Hacker News "Top" (Front) Page, 106 | "New" Page, 107 | and "Best" Page, as well as its raw rank given the Hacker News ranking formula. 108 |
109 | 110 |
111 | 112 |
113 |
114 | This chart shows the history of this story's upvotes compared to the expected upvotes for stories shown at the same ranks and times. 115 |
116 | 117 |
118 | 119 |
120 |
121 | This chart shows the history of this story's estimated true upvote rate: the predicted long-term ratio of upvotes to expected upvotes. 122 |
123 | 124 |
125 | 126 |
127 |
128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /templates/storyDetails.html.tmpl: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 |
5 | {{.Title}} 6 | {{if ne .Domain ""}} ({{.Domain}}){{end}} 7 |
8 |
9 | {{if .Flagged}}[flagged]{{end}} 10 | {{if .Dupe}}[dupe]{{end}} 11 | 12 | {{if (not .Job)}} 13 | 14 | 15 | 19 | ×{{.UpvoteRateString}} 20 | 21 | 22 | 23 | {{if and (.IsAlternativeFrontPage) .OverRanked}}{{.RankDiffAbs}} {{end}} 24 | {{if and (.IsAlternativeFrontPage) .UnderRanked}}{{.RankDiffAbs}} {{end}} 25 | 26 | 27 | 28 | {{if and .TopRank.Valid (not .IsHNTopPage)}}#{{.TopRank.Value}} {{end}} 29 | 30 | 31 | 32 | | 33 | 34 | {{.Score}} points by {{.By}} 35 | 36 | {{end}} 37 | 38 | {{.AgeString}} 39 | 40 | | 41 | 42 | {{if (eq .Comments 0)}}discuss{{else}}{{.Comments}} comments{{end}} 43 | 44 | 45 | | 46 | 47 | 48 | 49 | ??? 50 | 51 | ??? 52 | 53 | 54 |
55 |
56 |
57 | -------------------------------------------------------------------------------- /templates/storyplots.js.tmpl: -------------------------------------------------------------------------------- 1 | 2 | google.charts.load('current', {packages: ['corechart', 'line']}); 3 | google.charts.setOnLoadCallback(drawCharts); 4 | 5 | window.addEventListener('resize', drawCharts, false); 6 | 7 | var submissionTime = {{.Story.SubmissionTime}}; 8 | 9 | var ranksPlotData = {{.RanksPlotDataJSON}}; 10 | var upvotesPlotData = {{.UpvotesPlotDataJSON}}; 11 | var upvoteRatePlotData = upvotesPlotData; 12 | 13 | function drawCharts() { 14 | // make all charts have the same x-axis range as the ranks plot chart 15 | var startTime = ranksPlotData[0][0] 16 | 17 | date = document.getElementsByName("storyplot-date-selector")[0] 18 | var endTime = {{.Story.SubmissionTime}} + date.value*3600 19 | if (endTime > {{.MaxSampleTime}}) { 20 | endTime = {{.MaxSampleTime}} 21 | } 22 | 23 | ranksPlot(ranksPlotData, submissionTime, startTime, endTime) 24 | upvotesPlot(upvotesPlotData, submissionTime, startTime, endTime) 25 | upvoteRatePlot(upvoteRatePlotData, submissionTime, startTime, endTime) 26 | // penaltyPlot(penaltyPlotData, submissionTime, startTime, endTime) 27 | } 28 | 29 | // how rank is displayed when hovering over a datapoint 30 | class rankFormat { 31 | format(dt, column) { 32 | for (var i=0;i [(dataPoints[i][0] - submissionTime)/3600, dataPoints[i][3], 1, dataPoints[i][4]]) 3 | return dataPoints.filter((dataPoint, i) => dataPoints[i][0] <= endTime).map((dataPoint, i) => [ 4 | (dataPoints[i][0] - submissionTime)/3600, 5 | dataPoints[i][3], 6 | 1, 7 | // i == 3 ? "you voted at certain time" : null]) 8 | null]) 9 | } 10 | 11 | function upvoteRatePlot(upvoteRatePlotData, submissionTime, startTime, endTime) { 12 | 13 | var plotDiv = document.getElementById('upvoterate_plot_div') 14 | 15 | var data = new google.visualization.DataTable(); 16 | data.addColumn('number', 'Age'); 17 | data.addColumn('number', 'Estimated True Upvote Rate'); 18 | data.addColumn('number', 'Expected Upvote Rate'); 19 | data.addColumn({type: 'string', role: 'annotation'}); 20 | // data.addColumn('number', 'Moving-Average Upvote Rate'); 21 | 22 | data.addRows(prepareUpvoteRatePlotData(upvoteRatePlotData, submissionTime, endTime)); 23 | 24 | var ageFormatter = new ageFormat() 25 | ageFormatter.format(data, 0); 26 | 27 | // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options 28 | var options = { 29 | backgroundColor: {fill: 'transparent'}, 30 | hAxis: { 31 | title: 'Age [hours]', 32 | logScale: false, 33 | viewWindow: { 34 | min: (startTime-submissionTime)/3600, 35 | max: (endTime-submissionTime)/3600, 36 | } 37 | }, 38 | vAxis: { 39 | title: 'Upvote Rate', 40 | logScale: true, 41 | viewWindow: { 42 | min: 0 43 | }, 44 | }, 45 | series: { 46 | 0: {}, 47 | 1: {lineDashStyle: [5,5], lineWidth: 2}, 48 | 2: {} 49 | }, 50 | 51 | lineWidth: 3, 52 | colors: ['#0089F4', 'black', 'darkblue'], 53 | chartArea:{left:80, top:50, bottom: 80, right: 80}, 54 | height: 350, 55 | legend: { position: 'bottom' }, 56 | crosshair: { trigger: 'both' }, 57 | title: "Upvote Rate", 58 | annotations: {style: 'line'}, 59 | }; 60 | 61 | var chart = new google.visualization.LineChart(plotDiv); 62 | chart.draw(data, options) 63 | 64 | } 65 | 66 | 67 | -------------------------------------------------------------------------------- /templates/upvotesPlot.js.tmpl: -------------------------------------------------------------------------------- 1 | function prepareUpvotesPlotData(dataPoints, submissionTime, endTime) { 2 | 3 | var fatigueFactor = 0.003462767 4 | var results = [] 5 | 6 | var length 7 | for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 8 | length = i+1 9 | } 10 | 11 | // Modify our dataset so that we only plot points immediately 12 | // before or after a change. This way the plot looks more like a staircase 13 | // where the line is horizontal until there is an upvote then jumps up. Then we 14 | // only have diagonal lines where there is missing data. 15 | var lastValue = null 16 | for (var i = 0; i < length; i++) { 17 | var p = dataPoints[i] 18 | var upvotes = p[1] 19 | if ( upvotes != lastValue || i+1 == length || upvotes != dataPoints[i+1][1] ) { 20 | lastValue = upvotes 21 | } else { 22 | upvotes = null 23 | } 24 | results[i] = [(p[0] - submissionTime)/3600, upvotes, (1-Math.exp(-fatigueFactor*p[2]))/fatigueFactor] 25 | } 26 | return results; 27 | } 28 | 29 | function upvotesPlot(upvotesData, submissionTime, startTime, endTime) { 30 | 31 | var plotDiv = document.getElementById('upvotes_plot_div') 32 | 33 | var data = new google.visualization.DataTable(); 34 | data.addColumn('number', 'Age'); 35 | data.addColumn('number', 'Upvotes'); 36 | data.addColumn('number', 'Expected Upvotes'); 37 | 38 | data.addRows(prepareUpvotesPlotData(upvotesData, submissionTime, endTime)); 39 | 40 | var ageFormatter = new ageFormat() 41 | ageFormatter.format(data, 0); 42 | 43 | // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options 44 | var options = { 45 | backgroundColor: {fill: 'transparent'}, 46 | hAxis: { 47 | title: 'Age [hours]', 48 | logScale: false, 49 | viewWindow: { 50 | min: (startTime-submissionTime)/3600, 51 | max: (endTime-submissionTime)/3600, 52 | } 53 | }, 54 | vAxis: { 55 | title: 'Upvotes', 56 | viewWindow: 'pretty', 57 | }, 58 | series: { 59 | 0: {lineWidth: 3}, 60 | 1: {lineWidth: 2, lineDashStyle: [5,5]}, 61 | 2: {lineWidth: 2, lineDashStyle: [5,5]}, 62 | }, 63 | 64 | // we want a line to be drawn over intervals where the upvotes is null 65 | // since we set the upvotes to null above if there wasn't an increase in upvotes 66 | interpolateNulls: true, 67 | 68 | colors: ['#55cccc', 'black'], 69 | chartArea:{left:80, top:50, bottom: 80, right: 80}, 70 | height: 350, 71 | legend: { position: 'bottom' }, 72 | crosshair: { trigger: 'both' }, 73 | title: "Upvotes", 74 | }; 75 | 76 | var chart = new google.visualization.LineChart(plotDiv); 77 | chart.draw(data, options); 78 | } 79 | 80 | 81 | -------------------------------------------------------------------------------- /templates/vote.html.tmpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/templates/vote.html.tmpl -------------------------------------------------------------------------------- /templates/vote.js.tmpl: -------------------------------------------------------------------------------- 1 | 2 | async function vote(id, direction) { 3 | 4 | console.log("Vote", id, direction) 5 | var response = await navigator.locks.request("vote-" + id, async (lock) => { 6 | 7 | // Default options are marked with * 8 | return await fetch("/vote", { 9 | method: 'POST', 10 | mode: 'cors', 11 | cache: 'no-cache', 12 | credentials: 'same-origin', 13 | headers: { 14 | 'Content-Type': 'application/json' 15 | }, 16 | redirect: 'follow', 17 | referrerPolicy: 'no-referrer', 18 | body: JSON.stringify({storyID: id, direction: direction}) 19 | }); 20 | 21 | }); 22 | 23 | console.log("Response from vote endpoint", response, id, direction) 24 | 25 | return response.json() 26 | } 27 | 28 | 29 | async function toggleUpvote(id) { 30 | console.log("toggling upvote", id) 31 | 32 | // console.log(response.json()) 33 | 34 | var element = document.getElementById("vote-" + id) 35 | 36 | if (element.classList.contains("upvoted")) { 37 | var response = await vote(id, 0) 38 | console.log("Response clear vote", response) 39 | if (response.error||"" != "") { 40 | console.log("Error setting response", response) 41 | return 42 | } 43 | element.classList.remove("upvoted", "voted"); 44 | return 45 | } 46 | 47 | var response = await vote(id, 1) 48 | if (response.error||"" != "") { 49 | console.log("Error setting upvote", response) 50 | return 51 | } 52 | 53 | element.classList.remove("upvoted", "voted"); 54 | console.log("Response set vote", response) 55 | 56 | element.classList.add("upvoted", "voted"); 57 | element.classList.remove("downvoted"); 58 | 59 | element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = response.entryUpvoteRate.toFixed(2) 60 | 61 | var userScoreElem = element.getElementsByClassName("gainorloss")[0] 62 | userScoreElem.innerHTML = "" 63 | 64 | } 65 | 66 | 67 | 68 | async function toggleDownvote(id) { 69 | console.log("Toggle downvote", id) 70 | 71 | var element = document.getElementById("vote-" + id) 72 | if (element.classList.contains("downvoted")) { 73 | var response = await vote(id, 0) 74 | console.log("Response clear vote", response) 75 | if (response.error||"" != "") { 76 | console.log("Error setting response", response) 77 | return 78 | } 79 | element.classList.remove("downvoted", "voted"); 80 | return 81 | } 82 | 83 | var response = await vote(id, -1) 84 | if (response.error||"" != "") { 85 | console.log("Error setting downvote", response) 86 | return 87 | } 88 | 89 | element.classList.add("downvoted", "voted"); 90 | element.classList.remove("upvoted"); 91 | 92 | element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = response.entryUpvoteRate.toFixed(2) 93 | 94 | var userScoreElem = element.getElementsByClassName("gainorloss")[0] 95 | userScoreElem.innerHTML = "" 96 | 97 | 98 | } 99 | 100 | function setVotes() { 101 | if (userID == undefined) { 102 | return; 103 | } 104 | 105 | var elements = document.getElementsByClassName("vote") 106 | for (var i = 0; i < elements.length; i++) { 107 | elements[i].classList.add("logged-in") 108 | } 109 | 110 | for (var i = 0; i < positions.length; i++) { 111 | // find the story details element for this story 112 | var storyID = positions[i][0] 113 | var direction = positions[i][1] 114 | var currentUpvoteRate = positions[i][2] 115 | var entryUpvoteRate = positions[i][3] 116 | var userScore = positions[i][4] 117 | 118 | console.log("SEtting position", storyID, direction) 119 | 120 | if (direction == 0) { 121 | continue; 122 | } 123 | 124 | var element = document.getElementById("vote-" + storyID) 125 | 126 | if (element == null) { 127 | console.warn("Didn't find story on page", storyID) 128 | continue; 129 | } 130 | 131 | // console.log("Element",element) 132 | if (direction == 1) { 133 | element.classList.add("upvoted", "voted"); 134 | } else if (direction == -1) { 135 | element.classList.add("downvoted", "voted"); 136 | } 137 | 138 | console.log("SEtting upvote rate", entryUpvoteRate) 139 | 140 | console.log("Got element", storyID, element) 141 | element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = entryUpvoteRate.toFixed(2) 142 | 143 | 144 | 145 | var userScoreString = userScore.toFixed(2) 146 | if ( Math.abs(userScore) < .01 ) { 147 | userScoreString = "" 148 | } 149 | else if (userScore > 0) { 150 | userScoreString = "+" + userScoreString 151 | } 152 | var userScoreElem = element.getElementsByClassName("gainorloss")[0] 153 | userScoreElem.innerHTML = userScoreString 154 | 155 | if (userScore > 0) { 156 | userScoreElem.classList.add("gain") 157 | } else if (userScore < 0) { 158 | userScoreElem.classList.add("loss") 159 | } 160 | 161 | 162 | 163 | } 164 | 165 | } 166 | -------------------------------------------------------------------------------- /timeout.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "time" 7 | ) 8 | 9 | func (app app) timeoutMiddleware(handler http.Handler, timeoutSeconds time.Duration) http.Handler { 10 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 11 | ctx, cancel := context.WithTimeout(r.Context(), timeoutSeconds) 12 | defer cancel() 13 | handler.ServeHTTP(w, r.WithContext(ctx)) 14 | }) 15 | } 16 | -------------------------------------------------------------------------------- /upvote-rate-model.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "math" 6 | ) 7 | 8 | const ( 9 | nPages = 3 // page 1 (rank 1-30), page 2, ... 10 | nPageTypes = 5 // new, top, etc 11 | ) 12 | 13 | type pageCoefficients = struct { 14 | pageTypeCoefficient float64 15 | pageCoefficient float64 16 | rankCoefficient float64 17 | } 18 | 19 | // These coefficients are the output of bayesian-quality-pagetype-rank.R 20 | // from the hacker-news-data repository. 21 | var ( 22 | coefficients = [nPageTypes]pageCoefficients{ 23 | // {-2.733096, -3.492384, -0.5636350}, 24 | // {-5.806347, -2.680377, -0.3879157}, 25 | // {-7.365239, -1.141086, -0.2927700}, 26 | // {-5.743499, -4.986510, -1.0510611}, 27 | // {-7.237460, -4.884862, -0.8878165}, 28 | {-2.886938, -3.316492, -0.5193376}, 29 | {-5.856364, -2.564690, -0.3937709}, 30 | {-7.175409, -1.280364, -0.3717084}, 31 | {-5.316879, -5.469948, -1.2944215}, 32 | {-6.292276, -5.912105, -1.1996512}, 33 | } 34 | // fatigueFactor = 0.003462767 35 | // priorWeight = 1.7 36 | // priorWeight = 2.2956 37 | // priorWeight = 0.5 38 | ) 39 | 40 | type ModelParams struct { 41 | FatigueFactor float64 42 | PriorWeight float64 43 | } 44 | 45 | type OptionalModelParams struct { 46 | FatigueFactor sql.NullFloat64 47 | PriorWeight sql.NullFloat64 48 | } 49 | 50 | func (p OptionalModelParams) WithDefaults() ModelParams { 51 | var results ModelParams 52 | 53 | if p.PriorWeight.Valid { 54 | results.PriorWeight = p.PriorWeight.Float64 55 | } else { 56 | results.PriorWeight = defaultModelParams.PriorWeight 57 | } 58 | 59 | if p.FatigueFactor.Valid { 60 | results.FatigueFactor = p.FatigueFactor.Float64 61 | } else { 62 | results.FatigueFactor = defaultModelParams.FatigueFactor 63 | } 64 | 65 | return results 66 | } 67 | 68 | // var defaultModelParams = ModelParams{0.003462767, 2.2956} 69 | var defaultModelParams = ModelParams{FatigueFactor: 0.003462767, PriorWeight: 0.75} 70 | 71 | func (p ModelParams) upvoteRate(upvotes int, expectedUpvotes float64) float64 { 72 | return (float64(upvotes) + p.PriorWeight) / float64((1-math.Exp(-p.FatigueFactor*expectedUpvotes))/p.FatigueFactor+p.PriorWeight) 73 | } 74 | 75 | func expectedUpvoteShare(pageType pageTypeInt, oneBasedRank int) float64 { 76 | zeroBasedPage := (oneBasedRank - 1) / 30 77 | oneBasedRankOnPage := ((oneBasedRank - 1) % 30) + 1 78 | 79 | cs := coefficients[pageType] 80 | 81 | logExpectedUpvoteShare := cs.pageTypeCoefficient + 82 | cs.pageCoefficient*math.Log(float64(zeroBasedPage+1)) + 83 | cs.rankCoefficient*math.Log(float64(oneBasedRankOnPage))/float64(zeroBasedPage+1) 84 | 85 | return math.Exp(logExpectedUpvoteShare) 86 | } 87 | 88 | var averageCrawlDelay = 10 89 | 90 | func expectedUpvoteShareNewPage(oneBasedRank, elapsedTime int, newRankChanges []int) float64 { 91 | rank := oneBasedRank 92 | exUpvoteShare := 0.0 93 | 94 | for j, current := range append(newRankChanges, elapsedTime+10) { 95 | 96 | r := rank - j 97 | if r < 1 { 98 | break 99 | } 100 | 101 | var previous int 102 | var timeAtRank int 103 | 104 | // Calculate the value of the variable previous, which is how many 105 | // seconds ago this story moved out of rank r 106 | if j > 0 { 107 | previous = newRankChanges[j-1] 108 | } else { 109 | // Most stories don't appear on the new page until about 10 seconds after submission. 110 | // So subtract 10 seconds from the age of the story at rank 1. 111 | previous = averageCrawlDelay 112 | } 113 | 114 | if current > elapsedTime+averageCrawlDelay { 115 | current = elapsedTime + averageCrawlDelay 116 | } 117 | timeAtRank = current - previous 118 | if timeAtRank <= 0 { 119 | // Some stories might appear on the new page after less than averageCrawlDelay seconds. So by subtracting averageCrawlDelay seconds from 120 | // the submission time, we can end up with a negative timeAtRank. But this needs to be positive, 121 | // because total attentionShare must be greater than zero. So instead of subtracting averageCrawlDelay, divide by 2. 122 | timeAtRank = current / 2 123 | } 124 | 125 | exUpvoteShare += expectedUpvoteShare(1, r) * float64(timeAtRank) / float64(elapsedTime) 126 | } 127 | 128 | return exUpvoteShare 129 | } 130 | -------------------------------------------------------------------------------- /upvotes-db.sh: -------------------------------------------------------------------------------- 1 | # Open the upvotes DB as read-write, then attach the frontpage DB as readonly. 2 | 3 | # We need to pass an init script filename to sqlite3 to run the attach command at the beginning of the shell session. 4 | initscript=$(mktemp /tmp/init-db.XXXXXX) 5 | echo "attach database 'file:/Users/jwarden/hacker-news-data-datadir/frontpage.sqlite?mode=ro' as frontpage; 6 | .mode column 7 | .header on 8 | " > $initscript 9 | 10 | # Delete the tempfile after sqltie has tarted 11 | (sleep 1 && rm "$initscript")& 12 | 13 | sqlite3 $SQLITE_DATA_DIR/upvotes.sqlite --init $initscript 14 | 15 | -------------------------------------------------------------------------------- /utils.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | func getKeys[K comparable, V comparable](m map[K]V) []K { 4 | keys := make([]K, len(m)) 5 | var i int 6 | for key := range m { 7 | keys[i] = key 8 | i++ 9 | } 10 | return keys 11 | } 12 | 13 | func getValues[K comparable, V any](m map[K]V) []V { 14 | values := make([]V, len(m)) 15 | var i int 16 | for _, value := range m { 17 | values[i] = value 18 | i++ 19 | } 20 | return values 21 | } 22 | 23 | func mapSlice[T, U any](ts []T, f func(T) U) []U { 24 | results := make([]U, len(ts)) 25 | for i, t := range ts { 26 | results[i] = f(t) 27 | } 28 | return results 29 | } 30 | -------------------------------------------------------------------------------- /vote.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "encoding/json" 7 | "fmt" 8 | "net/http" 9 | 10 | "github.com/johnwarden/httperror" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | type voteParams struct { 15 | StoryID int `json:"storyID"` 16 | Direction int8 `json:"direction"` 17 | } 18 | 19 | type voteResponse struct { 20 | Error string `json:"error,omitempty"` 21 | EntryUpvoteRate float64 `json:"entryUpvoteRate"` 22 | } 23 | 24 | var ( 25 | insertVoteStmt *sql.Stmt 26 | getLastVoteStatement *sql.Stmt 27 | ) 28 | 29 | func (app app) prepareVoteStatements() error { 30 | err := app.ndb.attachFrontpageDB() 31 | if err != nil { 32 | return errors.Wrap(err, "attachFrontpageDB") 33 | } 34 | 35 | if insertVoteStmt == nil { 36 | 37 | var e error 38 | insertVoteStmt, e = app.ndb.upvotesDB.Prepare(` 39 | with parameters as ( 40 | select 41 | ? as userID 42 | , ? as storyID 43 | , ? as direction 44 | ) 45 | , openPositions as ( 46 | select 47 | userID 48 | , storyID 49 | , direction 50 | , entryTime 51 | from votes 52 | group by userID, storyID 53 | having max(rowid) -- use rowID instead of entryTime because two votes can come in during the same second 54 | ) 55 | -- A vote is a duplicate only if the **latest** vote (in openPositions) for this userID and storyID 56 | -- has the same direction. 57 | , duplicates as ( 58 | select parameters.userID, parameters.storyID, parameters.direction == ifnull(openPositions.direction,0) as duplicate 59 | from parameters 60 | left join openPositions using (userID, storyID) 61 | ) 62 | insert into votes(userID, storyID, direction, entryUpvotes, entryExpectedUpvotes, entryTime) 63 | select 64 | parameters.userID 65 | , parameters.storyID 66 | , parameters.direction 67 | , cumulativeUpvotes 68 | , cumulativeExpectedUpvotes 69 | , unixepoch() 70 | from parameters 71 | -- join on dataset to get latest upvoteRate 72 | join dataset on 73 | id = parameters.storyID 74 | and sampleTime = ( select max(sampleTime) from dataset join parameters where id = storyID ) 75 | -- but don't insert a vote unless it actually changes the user's position 76 | join stories using (id) 77 | join duplicates 78 | where 79 | not duplicate 80 | and not job 81 | `) 82 | 83 | if e != nil { 84 | return errors.Wrap(e, "Preparing insertVoteStmt") 85 | } 86 | 87 | } 88 | 89 | if getLastVoteStatement == nil { 90 | var e error 91 | 92 | getLastVoteStatement, e = app.ndb.upvotesDB.Prepare(` 93 | select 94 | entryUpvotes 95 | , entryExpectedUpvotes 96 | , entryTime from 97 | votes 98 | where userID = ? and storyID = ? and direction = ? 99 | `) 100 | if e != nil { 101 | return errors.Wrap(e, "Preparing getLastVoteStatement") 102 | } 103 | } 104 | return nil 105 | } 106 | 107 | func (app app) vote(ctx context.Context, userID int64, storyID int, direction int8) (r float64, t int64, err error) { 108 | if userID < 100 { 109 | return 0, 0, httperror.PublicErrorf(http.StatusUnauthorized, "Can't vote for special user IDs") 110 | } 111 | 112 | err = app.prepareVoteStatements() 113 | if err != nil { 114 | return 0, 0, err 115 | } 116 | 117 | db, err := app.ndb.upvotesDBWithDataset(ctx) 118 | if err != nil { 119 | return 0, 0, errors.Wrap(err, "upvotesDBWithDataset") 120 | } 121 | tx, e := db.BeginTx(ctx, nil) 122 | if e != nil { 123 | err = errors.Wrap(e, "BeginTX") 124 | return 125 | } 126 | 127 | // Use the commit/rollback in a defer pattern described in: 128 | // https://stackoverflow.com/questions/16184238/database-sql-tx-detecting-commit-or-rollback 129 | defer func() { 130 | if err != nil { 131 | // https://go.dev/doc/database/execute-transactions 132 | // If the transaction succeeds, it will be committed before the function exits, making the deferred rollback call a no-op. 133 | e := tx.Rollback() 134 | if e != nil { 135 | app.logger.Error("tx.Rollback in vote", e) 136 | } 137 | return 138 | } 139 | err = tx.Commit() // here we are setting the return value err 140 | if err != nil { 141 | return 142 | } 143 | }() 144 | 145 | res, err := tx.Stmt(insertVoteStmt).ExecContext(ctx, userID, storyID, direction) 146 | if err != nil { 147 | return 0, 0, errors.Wrap(err, "insertVoteStmt") 148 | } 149 | 150 | rows, _ := res.RowsAffected() 151 | if rows == 0 { 152 | Debugf(app.logger, "Duplicate vote %#v, %#v", rows, e) 153 | } else { 154 | Debugf(app.logger, "Inserted vote statement %v, %d, %d", userID, storyID, direction) 155 | } 156 | 157 | row := tx.Stmt(getLastVoteStatement).QueryRowContext(ctx, userID, storyID, direction) 158 | var entryUpvotes int 159 | var entryExpectedUpvotes float64 160 | var entryTime int64 161 | err = row.Scan(&entryUpvotes, &entryExpectedUpvotes, &entryTime) 162 | if err != nil { 163 | return 0, 0, errors.Wrapf(err, "getLastVoteStatement %v %d %d", userID, storyID, direction) 164 | } 165 | entryUpvoteRate := defaultModelParams.upvoteRate(entryUpvotes, entryExpectedUpvotes) 166 | 167 | return entryUpvoteRate, entryTime, nil 168 | } 169 | 170 | func (app app) voteHandler() func(http.ResponseWriter, *http.Request, voteParams) error { 171 | return func(w http.ResponseWriter, r *http.Request, p voteParams) error { 172 | userID := app.getUserID(r) 173 | 174 | if !userID.Valid { 175 | return httperror.PublicErrorf(http.StatusUnauthorized, "not logged in") 176 | } 177 | 178 | w.Header().Set("Content-Type", "application/json; charset=utf-8") 179 | 180 | storyID := p.StoryID 181 | if storyID <= 0 { 182 | return fmt.Errorf("Invalid story ID %d", storyID) 183 | } 184 | Debugf(app.logger, "SToryID %d", storyID) 185 | 186 | direction := p.Direction 187 | if direction < -1 || direction > 1 { 188 | return fmt.Errorf("Invalid direction %d", direction) 189 | } 190 | 191 | var b []byte 192 | var err error 193 | entryUpvoteRate, _, err := app.vote(r.Context(), userID.Int64, storyID, direction) 194 | 195 | var response voteResponse 196 | 197 | if err != nil { 198 | app.logger.Error("Writing error response", err) 199 | response = voteResponse{Error: "Internal error"} 200 | } else { 201 | response = voteResponse{EntryUpvoteRate: entryUpvoteRate} 202 | } 203 | 204 | b, err = json.Marshal(response) 205 | if err != nil { 206 | _, _ = w.Write([]byte(`{error: "internal error marshaling response"}`)) 207 | return errors.Wrap(err, "Marshaling voteResponse") 208 | } 209 | _, err = w.Write(b) 210 | return errors.Wrap(err, "writing HTTP response") 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /voting-notes.md: -------------------------------------------------------------------------------- 1 | # Voting Notes 2 | 3 | ## TODO 4 | 5 | Disable upvote buttons for jobs 6 | 7 | ## Login/Logout 8 | 9 | I have created simple login/logout functionality: 10 | 11 | Login with random user ID: 12 | /login 13 | Login with specific user user ID: 14 | /login?userID=1234 15 | Logout user: 16 | /logout 17 | 18 | If you are logged-in, your user-id will be shown on the top right, and upvote/downvote buttons will be shown next to stories. 19 | 20 | You can toggle a vote to clear the vote. Switching from upvote to downvote or vice versa first clears the current vote. 21 | 22 | ## Votes and Positions Tables 23 | 24 | The `votes` table has one entry for each change of position (from upvoted to cleared, cleared to upvoted, downvoted to upvoted, etc.) 25 | 26 | The `positions` view is like the `votes` table, but it does not contain a record for when a vote is cleared. Instead, it contains one record for each upvote/downvote, along with score/price details for the moment the upvote/downvote happened, and then the moment that the position was exited, (the moment the the vote was cleared, if any). 27 | 28 | ## Scoring 29 | 30 | Voting is like buying a stock. Your score is based on your entry price and your final price, which is either the exit price (if you exited the position), or the current price. 31 | 32 | If the final price is greater then the entry price, you gain points, if it is less, you lose. There are a couple of different scoring formulas. 33 | 34 | ## Score Page 35 | 36 | The score page is at: 37 | 38 | /score 39 | 40 | The score page shows each "position". Since a user can enter/exit a position on a story multiple times, a story might be shown multiple times. The users total score is the sum of the score for all positions. 41 | 42 | You can look at the score for a particular user: 43 | 44 | /score?userID=1234 45 | 46 | You can also use different scoring formulas 47 | 48 | /score?scoringFormula=InformationGain 49 | /score?scoringFormula=PTS # Peer Truth-Serum 50 | /score?scoringFormula=LogPTS # Default Formula: Log Peer Truth-Serum 51 | 52 | And change the model parameters, the most important of which is the priorWeight 53 | 54 | /score?priorWeight = 3.5 55 | 56 | ## Baseline User IDs 57 | 58 | UserID 0 randomly votes on stories on the new page. 59 | 60 | /score?userID=0 61 | 62 | UserID 1 randomly votes on stories on the front page. 63 | 64 | /score?userID=1 65 | 66 | Upvotes all new stories immediately (on first crawl where they appear) 67 | 68 | /score?userID=2 69 | 70 | Downvotes all new stories immediately 71 | 72 | /score?userID=3 73 | 74 | 75 | ## IMPORTANT FINDINGS 76 | 77 | - We need to constantly tune priorWeight so that the results of random voting average to 0. 78 | We get different results for the total score for userID 0 than we get from compare-against-random-voter. This is because userID 0 has a starting price that is generally slightly smaller than the priorAverage, because for some stories we accumulate some attention in the first data point. So userID 0 waits for the first data point and then votes, thereby getting in at a slightly lower price. We want this not to be a viable strategy, and it seems we can do this if we just tune down the priorWeight. 79 | 80 | - The scoring formula seems to work best if the user's vote is not counted in upvote rate calculations -- either in the final upvote rate, or in the entry upvote rate. This means if users vote through our platform, we need to count the entry upvote rate **before** their vote. Then, we need to subtract their vote out when calculating final upvote rate. 81 | - Why is this? One, intuitively, the scoring formula seems to give me more reasonable (e.g. higher) scores this way 82 | - Two, it is closer to PTS, where neither the numerator nor the denominator factor in the user's vote. 83 | 84 | 85 | # Information-Theory Scoring Formula 86 | 87 | Okay, let's say the user provides information that increases the upvote rate from R to S. 88 | 89 | views = W = A*n 90 | upvotes = U = A*n*p 91 | upvoteRate = R = U/A and thus 92 | R = np 93 | 94 | 95 | The total surprise is expected value of surprise times the number of views 96 | 97 | A*n * (p * log(p) + (1 - p) * log(1-p)) 98 | 99 | If p is the posterior probability (before user's vote), and q is the prior probability, then the surprise from the fully-informed point of view (whatever we call it), that is the expected value (over p) of the surprise of q, is 100 | 101 | A*n * (p * log(q) + (1 - p) * log(1-q)) 102 | 103 | And the difference is 104 | 105 | A*n * (p * log(p) + (1 - p) * log(1-p)) 106 | - A*n * (p * log(q) + (1 - p) * log(1-q)) 107 | = A*n * p*log(p/q) + (1-p)log((1-p)/(1-q)) 108 | 109 | 110 | Which is the KL divergence times number of views 111 | 112 | A*n * Dkl(p||q) 113 | = A*n * (p * log(p/q) + (1 - p) * log((1-p)/(q-q))) 114 | 115 | Now given 116 | 117 | R = pn 118 | S = qn 119 | p = R/n 120 | q = S/n 121 | 122 | We can rewrite that as 123 | 124 | = A ( n*p*log(p/q) + n(1 - p)log((1-p)/(1-q)) ) 125 | = A ( Rlog(R/n / S/n) + n*log((1-R/n)/(1-S/n)) - R*log((1-R/n)/(1-S/n)) ) 126 | = A ( Rlog(R/S) + n*log((1-R/n)/(1-S/n)) - R*log((n-R)/(n-S)) ) 127 | = A ( Rlog(R/S) + n*log(1-R/n) - n*log(1-S/n) - R*log((n-R)/(n-S)) ) 128 | 129 | Now we want to find the limit of this as n approaches infinity. 130 | 131 | (n-R)/(n-S) approaches 1, therefore log( (n-R)/(n-S) ) approaches 0 132 | 133 | So now we want is 134 | 135 | lim{n->∞} A ( Rlog(R/S) + n*log(1-R/n) - n*log(1-S/n) ) 136 | 137 | As n approaches infinity 138 | 139 | Now here is a key insight! 140 | 141 | lim{n->∞} n * ln(1 - c/n) = -c 142 | 143 | So converting our formula to use natural logarithm 144 | 145 | lim{n->∞} ( R×ln(R/S) + n×ln(1 − R/n) − n×ln(1 − S/n) ) * A / ln(2) 146 | 147 | And using substituting lim{n->∞} n×ln(1 − S/n) = -S and lim{n->∞} n×ln(1 − R/n) = -R 148 | 149 | lim{n->∞} ( R×ln(R/S) -R + S ) * A / ln(2) 150 | 151 | = ( R×ln(R/S) - R + S ) * A / ln(2) 152 | 153 | 154 | = ( R(ln(R/S) - 1) + S ) * A / ln(2) 155 | 156 | if R = U/A and S = V/A 157 | 158 | = ( U(ln(R/S) - 1) + V ) / ln(2) 159 | 160 | 161 | = A * (R*log(R/S) + (S-R)/ln(2) ) 162 | 163 | Okay now the idea is that this is the **total** information value of all upvotes. Each individual upvotes incrementally changes the estimated upvote from Rj to Rk. The final upvote rate is R, the final probability is p. The final views are An. That information gain is mutiplied by all **subsequent** views, which is (A - An)n. 164 | if j = k-1 165 | 166 | (A - Ak)n * (p * log(pk) + (1 - p) * log(1-pk)) 167 | - (A - Ak)n * (p * log(pj) + (1 - p) * log(1-pj)) 168 | = (A - Ak)n*n * p*log(pk/pj) + (1-p)log((1-pk)/(1-pj)) 169 | 170 | = (A - Ak) ( Rlog(Rk/Rj) + n*log(1-Rk/n) - n*log(1-Rj/n) - R*log((n-Rk)/(n-Rj)) ) 171 | 172 | lim{n->∞} of that is 173 | 174 | = (A - Ak) ( Rlog(Rk/Rj) + n*log(1-Rk/n) - n*log(1-Rj/n) ) 175 | = (A - Ak) ( Rln(Rk/Rj) + n*ln(1-Rk/n) - n*ln(1-Rj/n) ) / ln(2) 176 | 177 | = (A - Ak) ( Rlog(Rk/Rj) + (Rj - Rk)/ln2 ) 178 | 179 | Or we can go with the KL divergence between two poisson distributions, which is: 180 | 181 | https://stats.stackexchange.com/questions/145789/kl-divergence-between-two-univariate-poisson-distributions 182 | 183 | 𝐷ₖₗ(𝑓₁||𝑓₂)=𝜆₁log(𝜆₁𝜆₂)+𝜆₂−𝜆₁ 184 | 185 | ---- 186 | 187 | 188 | Okay but how do we convert this to value created? 189 | 190 | 191 | Do we credit users for more value creation for upvoting stories that ultimately get a lot of upvotes? I think not actually. The value created on the home page is a result of all the information provided. 192 | 193 | So I think we should look at total value created during some period of time, and give credit to users proportionally to the amount of information they provided for that period of time. 194 | 195 | 196 | -------------------------------------------------------------------------------- /watch.sh: -------------------------------------------------------------------------------- 1 | if which humanlog ; then 2 | LOGFORMATTER="| humanlog --truncate=0" 3 | fi 4 | 5 | ls *.go **/**.tmpl **/**.sql | entr -ncr sh -c "go install; go run *.go $LOGFORMATTER" 6 | --------------------------------------------------------------------------------