├── .envrc
├── .envrc.local.example
├── .github
    └── workflows
    │   ├── build.yml
    │   ├── deploy.yml
    │   └── format.yml
├── .gitignore
├── .golangci.yml
├── .mergify.yml
├── LICENSE
├── README.md
├── aboutpage.go
├── algorithmspage.go
├── app.go
├── archive.go
├── auth.go
├── canonicaldomain.go
├── compare-against-random-voter.sql
├── database.go
├── devbox.json
├── devbox.lock
├── domain_penalties.go
├── fly.toml
├── frontpage.go
├── go.mod
├── go.sum
├── health.go
├── httpserver.go
├── init.sql
├── justfile
├── logger.go
├── logo.svg
├── main.go
├── middleware.go
├── migrate-volume.sh
├── position.go
├── postprocessing.go
├── prometheus.go
├── rankcrawler.go
├── reset-prior-average-upvote-rate.sql
├── resources.go
├── score-page.go
├── scoring-formula.go
├── scraper.go
├── seed
    └── domain-penalties.csv
├── sql
    ├── cumulative-upvotes.sql
    ├── previous-crawl-index-old.sql
    ├── previous-crawl.sql
    ├── qnranks.sql
    ├── random-new-voter.sql
    ├── random-top-voter.sql
    ├── raw-ranks.sql
    ├── resubmissions.sql
    └── upvote-rates.sql
├── static
    ├── android-chrome-192x192.png
    ├── android-chrome-512x512.png
    ├── apple-touch-icon.png
    ├── browserconfig.xml
    ├── chart-646.png
    ├── expected-upvotes.png
    ├── favicon-16x16.png
    ├── favicon-32x32.png
    ├── favicon.ico
    ├── hn-top-page-upvotes-by-rank.png
    ├── hn-top-page-votehistogram.svg
    ├── logo.svg
    ├── mstile-144x144.png
    ├── mstile-150x150.png
    ├── mstile-310x150.png
    ├── mstile-310x310.png
    ├── mstile-70x70.png
    ├── rank-history.png
    ├── safari-pinned-tab.svg
    ├── site.webmanifest
    ├── upvote-rate.png
    └── upvote-share-by-rank.png
├── statspage.go
├── storage.go
├── story-details.go
├── storyplot-data.go
├── templates.go
├── templates
    ├── about-content.html.tmpl
    ├── about.html.tmpl
    ├── algorithms-content.html.tmpl
    ├── header.html.tmpl
    ├── index.html.tmpl
    ├── normalize.css.tmpl
    ├── ranksPlot.js.tmpl
    ├── score.html.tmpl
    ├── scorePlot.js.tmpl
    ├── spinner.css.tmpl
    ├── stats.html.tmpl
    ├── storyDetails.html.tmpl
    ├── storyplots.js.tmpl
    ├── styles.css.tmpl
    ├── upvoteRatePlot.js.tmpl
    ├── upvotesPlot.js.tmpl
    ├── vote.html.tmpl
    └── vote.js.tmpl
├── timeout.go
├── upvote-rate-model.go
├── upvotes-db.sh
├── utils.go
├── vote.go
├── voting-notes.md
└── watch.sh


/.envrc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # 1) Enable devbox environment
 4 | eval "$(devbox generate direnv --print-envrc)"
 5 | 
 6 | # 2) Any shared env variables go here
 7 | export SQLITE_DATA_DIR=data
 8 | export CACHE_SIZE=100
 9 | export LISTEN_ADDRESS=127.0.0.1
10 | export PORT=8080
11 | export R2_BUCKET=news-archive-dev
12 | export R2_USE_SSL=true
13 | export R2_ENDPOINT=https://9e2da4e2b5c6dd05d36f399d4afc7d4c.r2.cloudflarestorage.com
14 | 
15 | # 3) Only on macOS unify DEVELOPER_DIR / DEVELOPER_DIR_FOR_TARGET
16 | if [[ "$(uname)" == "Darwin" ]]; then
17 |   # Devbox may set both DEVELOPER_DIR and DEVELOPER_DIR_FOR_TARGET to different paths.
18 |   # cgo doesn't like that, so unify them.
19 |   if [[ -n "$DEVELOPER_DIR" && -n "$DEVELOPER_DIR_FOR_TARGET" ]]; then
20 |     export DEVELOPER_DIR_FOR_TARGET="$DEVELOPER_DIR"
21 |   fi
22 | fi
23 | 
24 | # 4) If there's a local override file, load it
25 | if [[ -f .envrc.local ]]; then
26 |   source .envrc.local
27 |   echo "Successfully loaded .envrc.local"
28 | fi
29 | 
30 | echo "Successfully loaded .envrc"
31 | 


--------------------------------------------------------------------------------
/.envrc.local.example:
--------------------------------------------------------------------------------
 1 | # If you use nix, you can run the nix-shell directly with the following command
 2 | if command -v nix &> /dev/null
 3 | then
 4 |     use nix
 5 |     # you can add parameters to the nix-shell as well, e.g.
 6 |     # use nix --command zsh
 7 |     # if you use lorri, replace `use nix` with (see https://github.com/nix-community/lorri)
 8 |     # eval "$(lorri direnv)"
 9 | fi
10 | 
11 | export R2_ACCESS_KEY_ID="DEV.ACCESS.KEY.ID"
12 | export R2_SECRET_ACCESS_KEY="DEV.SECRET.ACCESS.KEY"
13 | 
14 | echo "Successfully loaded .envrc.local"
15 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["master"]
 6 |     tags: [v*]
 7 |   pull_request:
 8 |     types: [opened, synchronize]
 9 |   workflow_dispatch:
10 | 
11 | permissions:
12 |   contents: read
13 | 
14 | # automatically cancel previous runs on the same PR
15 | # https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre/67939898#67939898
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   build:
22 |     name: "Build"
23 |     runs-on: ubuntu-22.04
24 |     steps:
25 |       - uses: actions/checkout@v3
26 |         with:
27 |           # https://github.com/actions/checkout/issues/626
28 |           # This is correct, because we're using a merge queue (mergify) which only merges when built against the latest target branch.
29 |           # https://docs.mergify.com/actions/queue/
30 |           ref: ${{ github.event.pull_request.head.sha }}
31 |       - uses: actions/setup-go@v3
32 |         with:
33 |           go-version-file: go.mod
34 |           cache: true
35 |       - run: go build *.go
36 |       - name: Check if working directory is clean
37 |         run: git diff --quiet --exit-code || (git status && false)
38 | 
39 |   lint:
40 |     name: "Lint"
41 |     runs-on: ubuntu-22.04
42 |     steps:
43 |       - uses: actions/checkout@v3
44 |         with:
45 |           # https://github.com/actions/checkout/issues/626
46 |           # This is correct, because we're using a merge queue (mergify) which only merges when built against the latest target branch.
47 |           # https://docs.mergify.com/actions/queue/
48 |           ref: ${{ github.event.pull_request.head.sha }}
49 |       - uses: actions/setup-go@v3
50 |         with:
51 |           go-version-file: go.mod
52 |           cache: true
53 |       - name: golangci-lint
54 |         uses: golangci/golangci-lint-action@v3
55 |         with:
56 |           version: v1.50.1
57 |       - name: Check if go code is formatted
58 |         run: |
59 |           UNFORMATTED_FILES=$(gofmt -l .)
60 |           test -z $UNFORMATTED_FILES || (echo -e "Go code not formatted:\n$UNFORMATTED_FILES\n"; exit 1)
61 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 | env:
 7 |   FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
 8 | jobs:
 9 |   deploy:
10 |       name: Deploy app
11 |       runs-on: ubuntu-22.04
12 |       steps:
13 |         - uses: actions/checkout@v3
14 |         - uses: superfly/flyctl-actions/setup-flyctl@master
15 |         - run: flyctl deploy
16 | 


--------------------------------------------------------------------------------
/.github/workflows/format.yml:
--------------------------------------------------------------------------------
 1 | name: Formatter
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [opened]
 6 |   issue_comment:
 7 |     types: [created]
 8 | 
 9 | jobs:
10 |   format:
11 |     name: "Format"
12 |     runs-on: ubuntu-22.04
13 |     if: github.event.issue.pull_request
14 |     steps:
15 |       - uses: khan/pull-request-comment-trigger@v1.1.0
16 |         id: check
17 |         with:
18 |           trigger: '/format'
19 |           reaction: "+1" # Reaction must be one of the reactions here: https://developer.github.com/v3/reactions/#reaction-types
20 |         env:
21 |           GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
22 | 
23 |       - uses: actions/checkout@v3
24 |         if: steps.check.outputs.triggered == 'true'
25 | 
26 |       - name: Check out PR
27 |         if: steps.check.outputs.triggered == 'true'
28 |         run: gh pr checkout ${{ github.event.issue.number }}
29 |         env:
30 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31 | 
32 |       - uses: actions/setup-go@v3
33 |         with:
34 |           go-version-file: go.mod
35 |           cache: true
36 | 
37 |       - name: Format Go Code
38 |         if: steps.check.outputs.triggered == 'true'
39 |         run: go fmt .
40 | 
41 |       - name: Commit changes
42 |         if: steps.check.outputs.triggered == 'true'
43 |         run: |
44 |           git config user.name "GitHub Actions Bot"
45 |           git config user.email "<>"
46 | 
47 |           git status
48 |           git diff --stat
49 |           git commit -am "chore: format code"
50 | 
51 |           git log --oneline --max-count=10
52 | 
53 |           git push
54 | 
55 |       - uses: khan/pull-request-comment-trigger@v1.1.0
56 |         if: failure()
57 |         with:
58 |           trigger: '/format'
59 |           reaction: "confused" # Reaction must be one of the reactions here: https://developer.github.com/v3/reactions/#reaction-types
60 |         env:
61 |           GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}'
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # mac
 2 | .DS_Store
 3 | 
 4 | # other?
 5 | .history
 6 | 
 7 | #general
 8 | /node_modules
 9 | /data
10 | /.envrc.local
11 | 
12 | personal-notes.md
13 | working-query.sql


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | linters:
2 |   disable:
3 |     - staticcheck
4 | 
5 | run:
6 |   skip-dirs:
7 |     - "go/pkg/mod" 
8 |     - "/Cellar/go"
9 | 


--------------------------------------------------------------------------------
/.mergify.yml:
--------------------------------------------------------------------------------
 1 | queue_rules:
 2 |   - name: Merge dependency-update PRs
 3 |     queue_conditions:
 4 |       - label=dependencies
 5 |       - base=master
 6 |     merge_conditions: []
 7 |     merge_method: squash
 8 | 
 9 |   - name: Merge PRs using label (rebase)
10 |     queue_conditions:
11 |       - label=ready-to-merge-rebase
12 |       - base=master
13 |       - "#review-requested=0"
14 |       - "#changes-requested-reviews-by=0"
15 |       - "#review-threads-unresolved=0"
16 |     merge_conditions: []
17 |     merge_method: rebase
18 | 
19 |   - name: Merge PRs using label (squash)
20 |     queue_conditions:
21 |       - label=ready-to-merge-squash
22 |       - base=master
23 |       - "#review-requested=0"
24 |       - "#changes-requested-reviews-by=0"
25 |       - "#review-threads-unresolved=0"
26 |     merge_conditions: []
27 |     merge_method: squash
28 | 
29 | pull_request_rules:
30 |   - name: All PRs into queue
31 |     conditions: []
32 |     actions:
33 |       queue:
34 | 


--------------------------------------------------------------------------------
/aboutpage.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/pkg/errors"
 7 | )
 8 | 
 9 | type AboutPageData struct {
10 | 	PageTemplateData
11 | }
12 | 
13 | func (d AboutPageData) IsAboutPage() bool {
14 | 	return true
15 | }
16 | 
17 | func (app app) aboutHandler() func(http.ResponseWriter, *http.Request, struct{}) error {
18 | 	return func(w http.ResponseWriter, r *http.Request, p struct{}) error {
19 | 		w.Header().Set("Content-Type", "text/html; charset=utf-8")
20 | 
21 | 		err := templates.ExecuteTemplate(w, "about.html.tmpl", AboutPageData{PageTemplateData{UserID: app.getUserID(r)}})
22 | 
23 | 		return errors.Wrap(err, "executing algorithms page template")
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/algorithmspage.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/pkg/errors"
 7 | )
 8 | 
 9 | type AlgorithmsPageData struct {
10 | 	PageTemplateData
11 | }
12 | 
13 | func (d AlgorithmsPageData) IsAlgorithmsPage() bool {
14 | 	return true
15 | }
16 | 
17 | func (app app) algorithmsHandler() func(http.ResponseWriter, *http.Request, struct{}) error {
18 | 	return func(w http.ResponseWriter, r *http.Request, p struct{}) error {
19 | 		w.Header().Set("Content-Type", "text/html; charset=utf-8")
20 | 
21 | 		err := templates.ExecuteTemplate(w, "about.html.tmpl", AlgorithmsPageData{PageTemplateData{UserID: app.getUserID(r)}})
22 | 
23 | 		return errors.Wrap(err, "executing Algorithms page template")
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/app.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"net/http"
 6 | 	"os"
 7 | 	"strconv"
 8 | 	"time"
 9 | 
10 | 	"github.com/johnwarden/hn"
11 | 	"golang.org/x/exp/slog"
12 | 
13 | 	retryablehttp "github.com/hashicorp/go-retryablehttp"
14 | )
15 | 
16 | type app struct {
17 | 	ndb                newsDatabase
18 | 	hnClient           *hn.Client
19 | 	httpClient         *http.Client
20 | 	logger             *slog.Logger
21 | 	cacheSize          int
22 | 	archiveTriggerChan chan context.Context
23 | }
24 | 
25 | func initApp() app {
26 | 	var err error
27 | 	var cacheSize int
28 | 	{
29 | 		s := os.Getenv("CACHE_SIZE")
30 | 		if s != "" {
31 | 			cacheSize, err = strconv.Atoi(s)
32 | 			if err != nil {
33 | 				LogFatal(slog.Default(), "CACHE_SIZE", err)
34 | 			}
35 | 		}
36 | 	}
37 | 
38 | 	logLevelString := os.Getenv("LOG_LEVEL")
39 | 	logFormatString := os.Getenv("LOG_FORMAT")
40 | 	logger := newLogger(logLevelString, logFormatString)
41 | 
42 | 	sqliteDataDir := os.Getenv("SQLITE_DATA_DIR")
43 | 	if sqliteDataDir == "" {
44 | 		panic("SQLITE_DATA_DIR not set")
45 | 	}
46 | 
47 | 	db, err := openNewsDatabase(sqliteDataDir)
48 | 	if err != nil {
49 | 		LogFatal(logger, "openNewsDatabase", err)
50 | 	}
51 | 
52 | 	retryClient := retryablehttp.NewClient()
53 | 	retryClient.RetryMax = 3
54 | 	retryClient.RetryWaitMin = 1 * time.Second
55 | 	retryClient.RetryWaitMax = 5 * time.Second
56 | 
57 | 	retryClient.Logger = wrapLoggerForRetryableHTTPClient(logger)
58 | 
59 | 	httpClient := retryClient.StandardClient()
60 | 
61 | 	hnClient := hn.NewClient(httpClient)
62 | 
63 | 	return app{
64 | 		httpClient:         httpClient,
65 | 		hnClient:           hnClient,
66 | 		logger:             logger,
67 | 		ndb:                db,
68 | 		cacheSize:          cacheSize,
69 | 		archiveTriggerChan: make(chan context.Context, 1),
70 | 	}
71 | }
72 | 
73 | func (app app) cleanup() {
74 | 	app.ndb.close()
75 | }
76 | 


--------------------------------------------------------------------------------
/auth.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"math/rand"
  6 | 	"net/http"
  7 | 	"strconv"
  8 | 
  9 | 	"github.com/johnwarden/httperror"
 10 | 	"github.com/pkg/errors"
 11 | )
 12 | 
 13 | func (app app) getUserID(r *http.Request) sql.NullInt64 {
 14 | 	var id sql.NullInt64
 15 | 
 16 | 	cookie, err := r.Cookie("userID")
 17 | 	if err != nil {
 18 | 		if !errors.Is(err, http.ErrNoCookie) {
 19 | 			app.logger.Error("r.Cookie('UserID'", err)
 20 | 		}
 21 | 		return id
 22 | 	}
 23 | 
 24 | 	idInt, err := strconv.Atoi(cookie.Value)
 25 | 	if err != nil {
 26 | 		app.logger.Error("Parsing cookie", err)
 27 | 	}
 28 | 
 29 | 	id.Int64 = int64(idInt)
 30 | 	id.Valid = true
 31 | 
 32 | 	return id
 33 | }
 34 | 
 35 | type loginParams struct {
 36 | 	UserID sql.NullInt64
 37 | }
 38 | 
 39 | func (app app) loginHandler() func(http.ResponseWriter, *http.Request, loginParams) error {
 40 | 	return func(w http.ResponseWriter, r *http.Request, p loginParams) error {
 41 | 		userID := p.UserID
 42 | 
 43 | 		if !userID.Valid {
 44 | 			loggedInUserID := app.getUserID(r)
 45 | 			if loggedInUserID.Valid {
 46 | 				http.Redirect(w, r, "/", http.StatusTemporaryRedirect)
 47 | 				return nil
 48 | 			}
 49 | 
 50 | 			// Assign a random user ID if none specified as parameter
 51 | 			userID.Int64 = rand.Int63()
 52 | 			userID.Valid = true
 53 | 		}
 54 | 
 55 | 		if userID.Int64 == 0 {
 56 | 			return httperror.PublicErrorf(http.StatusUnauthorized, "Can't login as user 0")
 57 | 		}
 58 | 
 59 | 		setUserIDCookie(w, userID)
 60 | 
 61 | 		http.Redirect(w, r, "/score", http.StatusTemporaryRedirect)
 62 | 
 63 | 		return nil
 64 | 	}
 65 | }
 66 | 
 67 | func (app app) logoutHandler() func(http.ResponseWriter, *http.Request, struct{}) error {
 68 | 	return func(w http.ResponseWriter, r *http.Request, p struct{}) error {
 69 | 		var userID sql.NullInt64
 70 | 		setUserIDCookie(w, userID)
 71 | 
 72 | 		http.Redirect(w, r, "/", http.StatusTemporaryRedirect)
 73 | 
 74 | 		return nil
 75 | 	}
 76 | }
 77 | 
 78 | func setUserIDCookie(w http.ResponseWriter, userID sql.NullInt64) {
 79 | 	value := strconv.Itoa(int(userID.Int64))
 80 | 	maxAge := 365 * 24 * 60 * 60
 81 | 	if !userID.Valid {
 82 | 		maxAge = -1
 83 | 		value = ""
 84 | 	}
 85 | 
 86 | 	cookie := http.Cookie{
 87 | 		Name:     "userID",
 88 | 		Value:    value,
 89 | 		Path:     "/",
 90 | 		MaxAge:   maxAge,
 91 | 		HttpOnly: true,
 92 | 		Secure:   true,
 93 | 		SameSite: http.SameSiteLaxMode,
 94 | 	}
 95 | 
 96 | 	// Use the http.SetCookie() function to send the cookie to the client.
 97 | 	// Behind the scenes this adds a `Set-Cookie` header to the response
 98 | 	// containing the necessary cookie data.
 99 | 	http.SetCookie(w, &cookie)
100 | }
101 | 


--------------------------------------------------------------------------------
/canonicaldomain.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/http"
 6 | 	"strings"
 7 | 
 8 | 	"github.com/johnwarden/httperror"
 9 | )
10 | 
11 | var nonCanonicalDomains = map[string]string{
12 | 	"social-protocols-news.fly.dev": "news.social-protocols.org",
13 | 	"127.0.0.1:8080":                "localhost:8080", // just for testing
14 | }
15 | 
16 | var canonicalDomains = getValues(nonCanonicalDomains)
17 | 
18 | func (app app) canonicalDomainMiddleware(handler http.Handler) http.Handler {
19 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
20 | 		// Redirect any non-canonical domain to the corresponding canonical domain.
21 | 		for nonCanonicalDomain, canonicalDomain := range nonCanonicalDomains {
22 | 			if r.Host == nonCanonicalDomain {
23 | 				url := "https://" + canonicalDomain + r.RequestURI
24 | 				http.Redirect(w, r, url, http.StatusMovedPermanently)
25 | 				return
26 | 			}
27 | 		}
28 | 		isCanonical := false
29 | 		for _, canonicalDomain := range canonicalDomains {
30 | 			if strings.HasPrefix(r.Host, canonicalDomain) {
31 | 				isCanonical = true
32 | 				break
33 | 			}
34 | 		}
35 | 		if !isCanonical {
36 | 			httperror.DefaultErrorHandler(w, httperror.New(http.StatusForbidden, fmt.Sprintf("Invalid request host: %s", r.Host)))
37 | 			return
38 | 		}
39 | 
40 | 		handler.ServeHTTP(w, r)
41 | 	})
42 | }
43 | 


--------------------------------------------------------------------------------
/compare-against-random-voter.sql:
--------------------------------------------------------------------------------
 1 | with parameters as (
 2 |   select
 3 |     1.50 as priorWeight
 4 |     , 0.003462767 as fatigueFactor
 5 | ), 
 6 | stories as (
 7 |   select
 8 |     id 
 9 |     , votes.entryTime is not null as mystory
10 |     , entryUpvoteRate
11 |     , max(cumulativeUpvotes) as cumulativeUpvotes
12 |     , max(cumulativeExpectedUpvotes) as cumulativeExpectedUpvotes
13 |     , max(score) as score
14 |     , (cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) qualityScore
15 | 
16 |     , log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight))*100 gain
17 | 
18 | 
19 |   from dataset
20 |   join parameters
21 |   left join votes on
22 |     votes.userID = 1
23 |     and votes.storyID = dataset.id
24 | 
25 | 
26 |   -- where id >= (select min(storyID) from votes where userID = 1 and storyID > 36754601) and id <= (select max(storyID) from votes where userID = 1 and storyID > 36754601) 
27 |   -- where id >= (select min(storyID) from votes where userID = 1 and storyID > 36780531) and id <= (select max(storyID) from votes where userID = 1 and storyID > 36780531) 
28 |   -- where id >= (select min(storyID) from votes where userID = 1) 
29 | where id >= (select min(storyID) from votes where userID = 1) and id <= (select max(storyID) from votes where userID = 1)
30 | 
31 |   -- and id <= (select max(storyID) from votes where userID = 1) 
32 | 
33 |   group by id
34 | )
35 | 
36 | -- select * from stories where id = 36805284; 
37 | 
38 | 
39 | 
40 | , sums as (
41 |     select
42 |     sum(case when mystory then cumulativeUpvotes else null end) as myCumulativeUpvotes
43 |     , sum(case when mystory then cumulativeExpectedUpvotes else null end) as myCumulativeExpectedUpvotes
44 |     , avg(case when mystory then score else null end) as myAverageScore
45 |     , avg(case when mystory then cumulativeUpvotes / cumulativeExpectedUpvotes else null end) as myAverageUpvoteRate
46 | 
47 |     -- The below doesn't make sense. Because cumulativeUpvotes are sometimes 0, and the log of 0 is not defined.
48 |     -- , exp(avg(case when mystory then log(cumulativeUpvotes / cumulativeExpectedUpvotes) else null end)) as myGeoAverageUpvoteRate
49 | 
50 | 
51 |     -- , sum(case when votes.entryTime is not null then score-1 else null end)/count(distinct votes.storyID) as myAverageScore
52 |     , sum(cumulativeUpvotes) as overallCumulativeUpvotes
53 |     , sum(cumulativeExpectedUpvotes) as overallCumulativeExpectedUpvotes
54 |     , avg(score) as overallAverageScore
55 |     , avg(cumulativeUpvotes / cumulativeExpectedUpvotes) as overallAverageUpvoteRate
56 | 
57 |     -- The below doesn't make sense. Because cumulativeUpvotes are sometimes 0, and the log of 0 is not defined.
58 |     -- , exp(avg(log(cumulativeUpvotes / cumulativeExpectedUpvotes))) as overallGeoAverageUpvoteRate
59 | 
60 | 
61 |     , exp(avg(log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight)))) geoAverageQualityScore
62 | 
63 | 
64 |     , sum(log((cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight))   )*100 baselineGain
65 | 
66 | 
67 |     -- , exp(avg(log((cumulativeUpvotes + priorWeight)/(cumulativeExpectedUpvotes + priorWeight)))) geoAverageQualityScore
68 | 
69 | 
70 |     -- , sum(case when votes.entryTime is null then score-1 else null end)/(count(distinct dataset.id) - count(distinct votes.storyID)) as overallAverageScore    
71 |     from stories
72 |     join parameters
73 | )
74 | select 
75 |   -- *
76 |   myAverageScore
77 |   , myAverageUpvoteRate
78 |   , myCumulativeUpvotes/myCumulativeExpectedUpvotes as myUpvoteRate
79 |   , overallAverageScore
80 |   , overallAverageUpvoteRate
81 |   , overallCumulativeUpvotes/overallCumulativeExpectedUpvotes as overallUpvoteRate
82 |   , geoAverageQualityScore
83 |   , baselineGain
84 | from sums;
85 | 
86 | 
87 | -- Discussion: The geomean quality score is close to 1, as expected. The average score is greater than 1, because that's what will happen
88 | -- if you take the average of exp(x) when the average of x is 0. FOr example in R:
89 | -- (ins)> x = rnorm(10000, mean=0, sd=2)
90 | -- (ins)> mean(x)
91 | -- [1] -0.007797868
92 | -- (ins)> mean(exp(x))
93 | -- [1] 9.844065
94 | 


--------------------------------------------------------------------------------
/devbox.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://raw.githubusercontent.com/jetify-com/devbox/0.13.4/.schema/devbox.schema.json",
 3 |   "packages": [
 4 |     "entr@latest",
 5 |     "git@latest",
 6 |     "gcc@latest",
 7 |     "gotools@latest",
 8 |     "golangci-lint@latest",
 9 |     "sqlite-interactive@latest",
10 |     "go@latest",
11 |     "just@latest"
12 |   ],
13 | }
14 | 


--------------------------------------------------------------------------------
/domain_penalties.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/csv"
 6 | 	"fmt"
 7 | 	"io"
 8 | 	"strconv"
 9 | 
10 | 	"github.com/pkg/errors"
11 | 	"gorm.io/driver/sqlite"
12 | 	"gorm.io/gorm"
13 | 	"gorm.io/gorm/clause"
14 | )
15 | 
16 | type DomainPenalty struct {
17 | 	Domain     string `gorm:"primaryKey"`
18 | 	AvgPenalty float64
19 | }
20 | 
21 | func (ndb newsDatabase) importPenaltiesData(sqliteDataDir string) error {
22 | 	frontpageDatabaseFilename := fmt.Sprintf("%s/%s", sqliteDataDir, sqliteDataFilename)
23 | 
24 | 	db, err := gorm.Open(sqlite.Open(frontpageDatabaseFilename), &gorm.Config{})
25 | 	if err != nil {
26 | 		panic("failed to connect database")
27 | 	}
28 | 
29 | 	err = db.AutoMigrate(&DomainPenalty{})
30 | 	if err != nil {
31 | 		return errors.Wrap(err, "db.AutoMigrate Domain Penalties table")
32 | 	}
33 | 
34 | 	// Open domain penalty seed data file as CSV
35 | 	b, _ := resources.ReadFile("seed/domain-penalties.csv")
36 | 	buf := bytes.NewBuffer(b)
37 | 	r := csv.NewReader(buf)
38 | 
39 | 	// Read the header row.
40 | 	_, err = r.Read()
41 | 	if err != nil {
42 | 		return errors.Wrap(err, "missing header row in domain penalties data")
43 | 	}
44 | 
45 | 	for {
46 | 		record, err := r.Read()
47 | 		if err != nil {
48 | 			if errors.Is(err, io.EOF) {
49 | 				break
50 | 			}
51 | 			return errors.Wrapf(err, "Parsing penalty CSV")
52 | 		}
53 | 
54 | 		avgPenalty, err := strconv.ParseFloat(record[1], 64)
55 | 		if err != nil {
56 | 			return errors.Wrapf(err, "Parsing penalty record %s, %s", record[0], record[1])
57 | 		}
58 | 		err = db.Clauses(clause.OnConflict{ // adding this onConflict clause makes the create into an upsert
59 | 			UpdateAll: true,
60 | 		}).Create(&DomainPenalty{Domain: record[0], AvgPenalty: avgPenalty}).Error
61 | 
62 | 		if err != nil {
63 | 			return errors.Wrapf(err, "Parsing inserting domain penalty %s, %f", record[0], avgPenalty)
64 | 		}
65 | 
66 | 	}
67 | 
68 | 	return nil
69 | }
70 | 


--------------------------------------------------------------------------------
/fly.toml:
--------------------------------------------------------------------------------
 1 | # fly.toml file generated for social-protocols-news on 2022-09-14T17:00:08+02:00
 2 | 
 3 | app = "social-protocols-news"
 4 | kill_signal = "SIGINT"
 5 | kill_timeout = 5
 6 | processes = []
 7 | primary_region = "ewr"
 8 | 
 9 | [build]
10 |   builder = "paketobuildpacks/builder:base"
11 |   buildpacks = ["gcr.io/paketo-buildpacks/go"]
12 | 
13 | [env]
14 |   PORT = "8080"
15 |   SQLITE_DATA_DIR="/data"
16 |   LOG_LEVEL="DEBUG"
17 |   CACHE_SIZE="100"
18 |   R2_BUCKET="news-archive"
19 |   R2_USE_SSL="true"
20 |   R2_ENDPOINT="https://9e2da4e2b5c6dd05d36f399d4afc7d4c.r2.cloudflarestorage.com"
21 | 
22 | [experimental]
23 |   allowed_public_ports = []
24 |   auto_rollback = true
25 | 
26 | [[services]]
27 |   http_checks = []
28 |   internal_port = 8080
29 |   processes = ["app"]
30 |   protocol = "tcp"
31 |   script_checks = []
32 |   [services.concurrency]
33 |     hard_limit = 25
34 |     soft_limit = 20
35 |     type = "connections"
36 | 
37 |   [[services.ports]]
38 |     force_https = true
39 |     handlers = ["http"]
40 |     port = 80
41 | 
42 |   [[services.ports]]
43 |     handlers = ["tls", "http"]
44 |     port = 443
45 | 
46 |   [[services.tcp_checks]]
47 |     grace_period = "1s"
48 |     interval = "15s"
49 |     restart_limit = 0
50 |     timeout = "2s"
51 | 
52 | 
53 | 
54 | # flyctl volumes create data --region ewr --size 3
55 | [[mounts]]
56 |   source = "data3"
57 |   destination = "/data"
58 | 
59 | # prometheus metrics
60 | [metrics]
61 |   port = 9091
62 |   path = "/metrics"
63 | 
64 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/social-protocols/news
 2 | 
 3 | go 1.22
 4 | 
 5 | toolchain go1.23.3
 6 | 
 7 | require (
 8 | 	github.com/NYTimes/gziphandler v1.1.1
 9 | 	github.com/VictoriaMetrics/metrics v1.23.0
10 | 	github.com/dustin/go-humanize v1.0.1
11 | 	github.com/gocolly/colly/v2 v2.1.0
12 | 	github.com/gorilla/schema v1.2.0
13 | 	github.com/hashicorp/go-retryablehttp v0.7.1
14 | 	github.com/johnwarden/hn v1.0.1
15 | 	github.com/johnwarden/httperror v1.6.0
16 | 	github.com/julienschmidt/httprouter v1.3.0
17 | 	github.com/mattn/go-sqlite3 v1.14.15
18 | 	github.com/minio/minio-go/v7 v7.0.80
19 | 	github.com/multiprocessio/go-sqlite3-stdlib v0.0.0-20220822170115-9f6825a1cd25
20 | 	github.com/pkg/errors v0.9.1
21 | 	github.com/weppos/publicsuffix-go v0.20.0
22 | 	golang.org/x/exp v0.0.0-20221114191408-850992195362
23 | 	gonum.org/v1/gonum v0.12.0
24 | 	gorm.io/driver/sqlite v1.4.3
25 | 	gorm.io/gorm v1.24.2
26 | )
27 | 
28 | //replace github.com/johnwarden/httperror v1.6.0 => ../httperror
29 | //replace "github.com/johnwarden/hn" v1.0.1 => "../hn"
30 | 
31 | require (
32 | 	github.com/PuerkitoBio/goquery v1.5.1 // indirect
33 | 	github.com/alitto/pond/v2 v2.1.4 // indirect
34 | 	github.com/andybalholm/cascadia v1.2.0 // indirect
35 | 	github.com/antchfx/htmlquery v1.2.3 // indirect
36 | 	github.com/antchfx/xmlquery v1.2.4 // indirect
37 | 	github.com/antchfx/xpath v1.1.8 // indirect
38 | 	github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
39 | 	github.com/fatih/color v1.13.0 // indirect
40 | 	github.com/go-ini/ini v1.67.0 // indirect
41 | 	github.com/gobwas/glob v0.2.3 // indirect
42 | 	github.com/goccy/go-json v0.10.3 // indirect
43 | 	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
44 | 	github.com/golang/protobuf v1.4.2 // indirect
45 | 	github.com/google/uuid v1.6.0 // indirect
46 | 	github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
47 | 	github.com/hashicorp/go-hclog v0.16.2 // indirect
48 | 	github.com/jinzhu/inflection v1.0.0 // indirect
49 | 	github.com/jinzhu/now v1.1.5 // indirect
50 | 	github.com/kennygrant/sanitize v1.2.4 // indirect
51 | 	github.com/klauspost/compress v1.17.11 // indirect
52 | 	github.com/klauspost/cpuid/v2 v2.2.8 // indirect
53 | 	github.com/mattn/go-colorable v0.1.13 // indirect
54 | 	github.com/mattn/go-isatty v0.0.16 // indirect
55 | 	github.com/minio/md5-simd v1.1.2 // indirect
56 | 	github.com/rs/xid v1.6.0 // indirect
57 | 	github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
58 | 	github.com/temoto/robotstxt v1.1.1 // indirect
59 | 	github.com/valyala/fastrand v1.1.0 // indirect
60 | 	github.com/valyala/histogram v1.2.0 // indirect
61 | 	golang.org/x/crypto v0.28.0 // indirect
62 | 	golang.org/x/net v0.30.0 // indirect
63 | 	golang.org/x/sys v0.26.0 // indirect
64 | 	golang.org/x/text v0.19.0 // indirect
65 | 	google.golang.org/appengine v1.6.6 // indirect
66 | 	google.golang.org/protobuf v1.24.0 // indirect
67 | )
68 | 


--------------------------------------------------------------------------------
/health.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/http"
 6 | 	"time"
 7 | 
 8 | 	"github.com/pkg/errors"
 9 | )
10 | 
11 | const alertAfterMinutes = 5
12 | 
13 | func (app app) healthHandler() func(http.ResponseWriter, *http.Request, loginParams) error {
14 | 	return func(w http.ResponseWriter, r *http.Request, p loginParams) error {
15 | 		w.Header().Set("Content-Type", "text/plain; charset=utf-8")
16 | 
17 | 		if r.Method != http.MethodHead {
18 | 			_, err := w.Write([]byte("ok"))
19 | 			if err != nil {
20 | 				return errors.Wrap(err, "writing response")
21 | 			}
22 | 		}
23 | 
24 | 		return nil
25 | 	}
26 | }
27 | 
28 | func (app app) crawlHealthHandler() func(http.ResponseWriter, *http.Request, loginParams) error {
29 | 	return func(w http.ResponseWriter, r *http.Request, p loginParams) error {
30 | 		w.Header().Set("Content-Type", "text/plain; charset=utf-8")
31 | 
32 | 		lastSampleTime, err := app.ndb.selectLastCrawlTime()
33 | 		if err != nil {
34 | 			return errors.Wrap(err, "getting last crawl time")
35 | 		}
36 | 
37 | 		if time.Now().Unix()-int64(lastSampleTime) > alertAfterMinutes*60 {
38 | 			return fmt.Errorf("last successful crawl of %d is more than %d minutes ago", lastSampleTime, alertAfterMinutes)
39 | 		}
40 | 
41 | 		if r.Method != http.MethodHead {
42 | 			_, err = w.Write([]byte("ok"))
43 | 			if err != nil {
44 | 				return errors.Wrap(err, "writing response")
45 | 			}
46 | 		}
47 | 
48 | 		return nil
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/httpserver.go:
--------------------------------------------------------------------------------
  1 | //nolint:typecheck
  2 | package main
  3 | 
  4 | import (
  5 | 	"embed"
  6 | 	"io/fs"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"time"
 10 | 
 11 | 	"github.com/julienschmidt/httprouter"
 12 | 	"github.com/pkg/errors"
 13 | )
 14 | 
 15 | const (
 16 | 	// writeTimeout      = 2500 * time.Millisecond
 17 | 	writeTimeout      = 60 * time.Second
 18 | 	readHeaderTimeout = 5 * time.Second
 19 | )
 20 | 
 21 | //go:embed static
 22 | var staticFS embed.FS
 23 | 
 24 | func (app app) httpServer(onPanic func(error)) *http.Server {
 25 | 	l := app.logger
 26 | 
 27 | 	port := os.Getenv("PORT")
 28 | 	if port == "" {
 29 | 		port = "8080"
 30 | 	}
 31 | 
 32 | 	listenAddress := os.Getenv("LISTEN_ADDRESS")
 33 | 
 34 | 	staticRoot, err := fs.Sub(staticFS, "static")
 35 | 	if err != nil {
 36 | 		LogFatal(l, "fs.Sub", err)
 37 | 	}
 38 | 
 39 | 	server := &http.Server{
 40 | 		Addr:              listenAddress + ":" + port,
 41 | 		WriteTimeout:      writeTimeout - 100*time.Millisecond,
 42 | 		ReadHeaderTimeout: readHeaderTimeout,
 43 | 	}
 44 | 
 45 | 	router := httprouter.New()
 46 | 	router.GET("/static/*filepath", app.serveFiles(http.FS(staticRoot)))
 47 | 
 48 | 	router.GET("/", middleware("hntop", l, onPanic, app.frontpageHandler("hntop")))
 49 | 	router.GET("/new", middleware("new", l, onPanic, app.frontpageHandler("new")))
 50 | 	router.GET("/top", middleware("top", l, onPanic, app.frontpageHandler("hntop")))
 51 | 	router.GET("/best", middleware("best", l, onPanic, app.frontpageHandler("best")))
 52 | 	router.GET("/ask", middleware("ask", l, onPanic, app.frontpageHandler("ask")))
 53 | 	router.GET("/show", middleware("show", l, onPanic, app.frontpageHandler("show")))
 54 | 	router.GET("/raw", middleware("raw", l, onPanic, app.frontpageHandler("raw")))
 55 | 	router.GET("/fair", middleware("fair", l, onPanic, app.frontpageHandler("fair")))
 56 | 	router.GET("/upvoterate", middleware("upvoterate", l, onPanic, app.frontpageHandler("upvoterate")))
 57 | 	router.GET("/best-upvoterate", middleware("best-upvoterate", l, onPanic, app.frontpageHandler("best-upvoterate")))
 58 | 	router.GET("/penalties", middleware("penalties", l, onPanic, app.frontpageHandler("penalties")))
 59 | 	router.GET("/boosts", middleware("boosts", l, onPanic, app.frontpageHandler("boosts")))
 60 | 	router.GET("/resubmissions", middleware("resubmissions", l, onPanic, app.frontpageHandler("resubmissions")))
 61 | 	router.GET("/stats", middleware("stats", l, onPanic, app.statsHandler()))
 62 | 	router.GET("/about", middleware("about", l, onPanic, app.aboutHandler()))
 63 | 	router.GET("/algorithms", middleware("algorithms", l, onPanic, app.algorithmsHandler()))
 64 | 
 65 | 	router.POST("/vote", middleware("upvote", l, onPanic, app.voteHandler()))
 66 | 
 67 | 	router.GET("/score", middleware("score", l, onPanic, app.scoreHandler()))
 68 | 
 69 | 	router.GET("/login", middleware("login", l, onPanic, app.loginHandler()))
 70 | 	router.GET("/logout", middleware("logout", l, onPanic, app.logoutHandler()))
 71 | 
 72 | 	router.GET("/health", middleware("health", l, onPanic, app.healthHandler()))
 73 | 	router.HEAD("/health", middleware("health", l, onPanic, app.healthHandler()))
 74 | 	router.GET("/crawl-health", middleware("crawl-health", l, onPanic, app.crawlHealthHandler()))
 75 | 	router.HEAD("/crawl-health", middleware("crawl-health", l, onPanic, app.crawlHealthHandler()))
 76 | 
 77 | 	server.Handler = app.preRouterMiddleware(router, writeTimeout-100*time.Millisecond)
 78 | 
 79 | 	return server
 80 | }
 81 | 
 82 | func (app app) frontpageHandler(ranking string) func(http.ResponseWriter, *http.Request, OptionalFrontPageParams) error {
 83 | 	return func(w http.ResponseWriter, r *http.Request, params OptionalFrontPageParams) error {
 84 | 		w.Header().Set("Content-Type", "text/html; charset=utf-8")
 85 | 
 86 | 		err := app.serveFrontPage(r, w, ranking, params.WithDefaults())
 87 | 		return errors.Wrap(err, "serveFrontPage")
 88 | 	}
 89 | }
 90 | 
 91 | func (app app) statsHandler() func(http.ResponseWriter, *http.Request, StatsPageParams) error {
 92 | 	return func(w http.ResponseWriter, r *http.Request, params StatsPageParams) error {
 93 | 		w.Header().Set("Content-Type", "text/html; charset=utf-8")
 94 | 
 95 | 		userID := app.getUserID(r)
 96 | 		return app.statsPage(w, r, params, userID)
 97 | 	}
 98 | }
 99 | 
100 | func (app app) serveFiles(root http.FileSystem) func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
101 | 	fileServer := http.FileServer(root)
102 | 
103 | 	return func(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
104 | 		w.Header().Set("Cache-Control", "public, max-age=86400") // 1 hours
105 | 		r.URL.Path = p.ByName("filepath")
106 | 		fileServer.ServeHTTP(w, r)
107 | 	}
108 | }
109 | 


--------------------------------------------------------------------------------
/init.sql:
--------------------------------------------------------------------------------
1 | attach database 'file:/Users/jwarden/hacker-news-data-datadir/frontpage.sqlite?mode=ro' as frontpage;
2 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | set dotenv-load := true
 2 | 
 3 | # List available recipes in the order in which they appear in this file
 4 | _default:
 5 |     @just --list --unsorted
 6 | 
 7 | watch:
 8 | 	./watch.sh
 9 | 
10 | sqlite:
11 | 	sqlite3 $SQLITE_DATA_DIR/frontpage.sqlite
12 | 
13 | upvotes-db:
14 | 	./upvotes-db.sh
15 | 
16 | format:
17 | 	go fmt


--------------------------------------------------------------------------------
/logger.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"strings"
 7 | 
 8 | 	"golang.org/x/exp/slog"
 9 | )
10 | 
11 | func newLogger(levelString, formatString string) *slog.Logger {
12 | 	if levelString == "" {
13 | 		levelString = "DEBUG"
14 | 	}
15 | 
16 | 	logLevels := map[string]slog.Leveler{
17 | 		"DEBUG": slog.DebugLevel,
18 | 		"INFO":  slog.InfoLevel,
19 | 		"WARN":  slog.WarnLevel,
20 | 		"ERROR": slog.ErrorLevel,
21 | 	}
22 | 
23 | 	l, ok := logLevels[strings.ToUpper(levelString)]
24 | 	if !ok {
25 | 		panic("Unrecognized log level: " + levelString)
26 | 	}
27 | 
28 | 	var lh slog.Handler
29 | 
30 | 	if strings.ToUpper(formatString) == "JSON" {
31 | 		lh = slog.HandlerOptions{Level: l}.NewJSONHandler(os.Stdout)
32 | 	} else {
33 | 		lh = slog.HandlerOptions{Level: l}.NewTextHandler(os.Stdout)
34 | 	}
35 | 
36 | 	logger := slog.New(lh)
37 | 	slog.SetDefault(logger)
38 | 	return logger
39 | }
40 | 
41 | func LogErrorf(logger *slog.Logger, msg string, args ...interface{}) {
42 | 	logger.Error(fmt.Sprintf(msg, args...), nil)
43 | }
44 | 
45 | func Debugf(logger *slog.Logger, msg string, args ...interface{}) {
46 | 	logger.Debug(fmt.Sprintf(msg, args...))
47 | }
48 | 
49 | func LogFatal(logger *slog.Logger, msg string, err error, args ...interface{}) {
50 | 	if len(args) > 0 {
51 | 		logger.Error(msg, err, args...)
52 | 	} else {
53 | 		logger.Error(msg, err)
54 | 	}
55 | 	os.Exit(2)
56 | }
57 | 
58 | type retryableHTTPClientloggerWrapper struct {
59 | 	*slog.Logger
60 | }
61 | 
62 | func (l retryableHTTPClientloggerWrapper) Error(msg string, keysAndValues ...interface{}) {
63 | 	l.Logger.Error("retryableHTTPClient: "+msg, nil, keysAndValues...)
64 | }
65 | 
66 | func (l retryableHTTPClientloggerWrapper) Debug(msg string, keysAndValues ...interface{}) {
67 | 	// ignore very verbose debug output from retryableHTTPClientloggerWrapper
68 | }
69 | 
70 | // wrapLoggerForRetryableHTTPClient wraps a logger so that it implements an interface required by retryableHTTPClient
71 | func wrapLoggerForRetryableHTTPClient(logger *slog.Logger) retryableHTTPClientloggerWrapper {
72 | 	// ignore debug messages from this retry client.
73 | 	l := slog.New(logger.Handler())
74 | 	return retryableHTTPClientloggerWrapper{l}
75 | }
76 | 


--------------------------------------------------------------------------------
/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    version="1.1"
 4 |    viewBox="0 0 200 200"
 5 |    id="svg4607"
 6 |    sodipodi:docname="logo.svg"
 7 |    inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)"
 8 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 9 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
10 |    xmlns="http://www.w3.org/2000/svg"
11 |    xmlns:svg="http://www.w3.org/2000/svg"
12 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
13 |    xmlns:cc="http://creativecommons.org/ns#"
14 |    xmlns:dc="http://purl.org/dc/elements/1.1/">
15 |   <defs
16 |      id="defs1221" />
17 |   <metadata
18 |      id="metadata4611">
19 |     <rdf:RDF>
20 |       <cc:Work
21 |          rdf:about="">
22 |         <dc:format>image/svg+xml</dc:format>
23 |         <dc:type
24 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
25 |       </cc:Work>
26 |     </rdf:RDF>
27 |   </metadata>
28 |   <sodipodi:namedview
29 |      pagecolor="#ffffff"
30 |      bordercolor="#666666"
31 |      borderopacity="1"
32 |      objecttolerance="10"
33 |      gridtolerance="10"
34 |      guidetolerance="10"
35 |      inkscape:pageopacity="0"
36 |      inkscape:pageshadow="2"
37 |      inkscape:window-width="2560"
38 |      inkscape:window-height="1416"
39 |      id="namedview4609"
40 |      showgrid="false"
41 |      inkscape:snap-object-midpoints="true"
42 |      inkscape:snap-bbox="true"
43 |      inkscape:snap-page="true"
44 |      inkscape:zoom="4"
45 |      inkscape:cx="117.875"
46 |      inkscape:cy="123.5"
47 |      inkscape:window-x="0"
48 |      inkscape:window-y="1104"
49 |      inkscape:window-maximized="1"
50 |      inkscape:current-layer="svg4607"
51 |      inkscape:document-rotation="0"
52 |      inkscape:pagecheckerboard="0"
53 |      showguides="false" />
54 |   <circle
55 |      style="display:inline;opacity:1;fill:#94caff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:59.3609;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;paint-order:markers fill stroke"
56 |      id="path4613"
57 |      cx="100"
58 |      cy="100"
59 |      inkscape:label="path4613"
60 |      r="99" />
61 |   <circle
62 |      style="display:inline;fill:#4a9ced;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:34.3389;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;paint-order:markers fill stroke"
63 |      id="path4613-3"
64 |      cx="100"
65 |      cy="100"
66 |      inkscape:label="path4613"
67 |      r="57.269165" />
68 |   <circle
69 |      style="display:inline;fill:#005fbe;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:18.1607;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;paint-order:markers fill stroke"
70 |      id="path4613-3-6"
71 |      cx="100"
72 |      cy="100"
73 |      inkscape:label="path4613"
74 |      r="30.287827" />
75 | </svg>
76 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"net/http"
  6 | 	"os"
  7 | 	"os/signal"
  8 | 	"syscall"
  9 | 	"time"
 10 | 
 11 | 	"github.com/pkg/errors"
 12 | )
 13 | 
 14 | const maxShutDownTimeout = 5 * time.Second
 15 | 
 16 | func main() {
 17 | 	app := initApp()
 18 | 	defer app.cleanup()
 19 | 
 20 | 	logger := app.logger
 21 | 
 22 | 	ctx, cancelContext := context.WithCancel(context.Background())
 23 | 	defer cancelContext()
 24 | 
 25 | 	shutdownPrometheusServer := servePrometheusMetrics()
 26 | 
 27 | 	// Start the archive worker
 28 | 	go app.archiveWorker(ctx)
 29 | 
 30 | 	// Listen for a soft kill signal (INT, TERM, HUP)
 31 | 	c := make(chan os.Signal, 1)
 32 | 	signal.Notify(c, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP)
 33 | 
 34 | 	// shutdown function call in case of 1) panic 2) soft kill signal
 35 | 	var httpServer *http.Server // this variable included in shutdown closure
 36 | 
 37 | 	shutdown := func() {
 38 | 		// cancel the current background context
 39 | 		cancelContext()
 40 | 
 41 | 		err := shutdownPrometheusServer(ctx)
 42 | 		if err != nil {
 43 | 			logger.Error("shutdownPrometheusServer", err)
 44 | 		}
 45 | 
 46 | 		if httpServer != nil {
 47 | 			logger.Info("Shutting down HTTP server")
 48 | 			// shut down the HTTP server with a timeout in case the server doesn't want to shut down.
 49 | 			// use background context, because we just cancelled ctx
 50 | 			ctxWithTimeout, cancel := context.WithTimeout(context.Background(), maxShutDownTimeout)
 51 | 			defer cancel()
 52 | 			err := httpServer.Shutdown(ctxWithTimeout)
 53 | 			if err != nil {
 54 | 				logger.Error("httpServer.Shutdown", err)
 55 | 				// if server doesn't respond to shutdown signal, nothing remains but to panic.
 56 | 				panic("HTTP server shutdown failed")
 57 | 			}
 58 | 
 59 | 			logger.Info("HTTP server shutdown complete")
 60 | 		}
 61 | 	}
 62 | 
 63 | 	go func() {
 64 | 		sig := <-c
 65 | 
 66 | 		// Clean shutdown
 67 | 		logger.Info("Received shutdown signal", "signal", sig)
 68 | 		shutdown()
 69 | 
 70 | 		// now exit process
 71 | 		logger.Info("Main loop exited. Terminating process")
 72 | 
 73 | 		os.Exit(0)
 74 | 	}()
 75 | 
 76 | 	httpServer = app.httpServer(
 77 | 		func(error) {
 78 | 			logger.Info("Panic in HTTP handler. Shutting down")
 79 | 			shutdown()
 80 | 			os.Exit(2)
 81 | 		},
 82 | 	)
 83 | 
 84 | 	go func() {
 85 | 		logger.Info("HTTP server listening", "address", httpServer.Addr)
 86 | 		err := httpServer.ListenAndServe()
 87 | 		if err != nil && err != http.ErrServerClosed {
 88 | 			logger.Error("server.ListenAndServe", err)
 89 | 		}
 90 | 		logger.Info("Server shut down")
 91 | 	}()
 92 | 
 93 | 	app.mainLoop(ctx)
 94 | }
 95 | 
 96 | func (app app) mainLoop(ctx context.Context) {
 97 | 	logger := app.logger
 98 | 
 99 | 	lastCrawlTime, err := app.ndb.selectLastCrawlTime()
100 | 	if err != nil {
101 | 		LogFatal(logger, "selectLastCrawlTime", err)
102 | 	}
103 | 
104 | 	t := time.Now().Unix()
105 | 
106 | 	elapsed := int(t) - lastCrawlTime
107 | 
108 | 	// If it has been more than a minute since our last crawl,
109 | 	// then crawl right away.
110 | 	if elapsed >= 60 {
111 | 		logger.Info("60 seconds since last crawl. Crawling now.")
112 | 		if err = app.crawlAndPostprocess(ctx); err != nil {
113 | 			logger.Error("crawlAndPostprocess", err)
114 | 
115 | 			if errors.Is(err, context.Canceled) {
116 | 				return
117 | 			}
118 | 		}
119 | 	} else {
120 | 		logger.Info("Less than 60 seconds since last crawl.", "waitSeconds", 60-time.Now().Unix()%60)
121 | 	}
122 | 
123 | 	// And now set a ticker so we crawl every minute going forward
124 | 	ticker := make(chan int64)
125 | 
126 | 	// Make the first tick happen at the next
127 | 	// Minute mark.
128 | 	go func() {
129 | 		t := time.Now().Unix()
130 | 		delay := 60 - t%60
131 | 		<-time.After(time.Duration(delay) * time.Second)
132 | 		ticker <- t + delay
133 | 	}()
134 | 
135 | 	for {
136 | 		select {
137 | 		case <-ticker:
138 | 			t := time.Now().Unix()
139 | 			// Set the next tick at the minute mark. We use this instead of using
140 | 			// time.NewTicker because in dev mode our app can be suspended, and I
141 | 			// want to see all the timestamps in the DB as multiples of 60.
142 | 			delay := 60 - t%60
143 | 			nextTickTime := t + delay
144 | 			go func() {
145 | 				<-time.After(time.Duration(delay) * time.Second)
146 | 				ticker <- nextTickTime
147 | 			}()
148 | 
149 | 			logger.Info("Beginning crawl")
150 | 
151 | 			// Create a context with deadline for both crawl and idle period
152 | 			crawlCtx, cancel := context.WithDeadline(ctx, time.Unix(nextTickTime-1, 0))
153 | 			defer cancel()
154 | 
155 | 			if err = app.crawlAndPostprocess(crawlCtx); err != nil {
156 | 				logger.Error("crawlAndPostprocess", err)
157 | 			} else {
158 | 				app.logger.Info("Finished crawl and postprocess")
159 | 
160 | 				// Only send idle context if we have enough time (at least 10 seconds)
161 | 				if delay >= 5 {
162 | 					// Try to send the same context to the archive worker
163 | 					select {
164 | 					case app.archiveTriggerChan <- crawlCtx:
165 | 						app.logger.Debug("Sent idle context to archive worker")
166 | 					default:
167 | 						app.logger.Debug("Archive trigger channel full, skipping signal")
168 | 					}
169 | 				} else {
170 | 					app.logger.Debug("Skipping idle context - not enough time", "delay", delay)
171 | 				}
172 | 			}
173 | 
174 | 		case <-ctx.Done():
175 | 			return
176 | 		}
177 | 	}
178 | }
179 | 


--------------------------------------------------------------------------------
/middleware.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"encoding/json"
  6 | 	"net/http"
  7 | 	"reflect"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 	"time"
 11 | 
 12 | 	"github.com/pkg/errors"
 13 | 	"golang.org/x/exp/slog"
 14 | 
 15 | 	"github.com/julienschmidt/httprouter"
 16 | 
 17 | 	"github.com/johnwarden/httperror"
 18 | 
 19 | 	"github.com/gorilla/schema"
 20 | 
 21 | 	"github.com/NYTimes/gziphandler"
 22 | )
 23 | 
 24 | // middleware converts a handler of type httperror.XHandlerFunc[P] into an
 25 | // httprouter.Handle. We use the former type for our http handler functions:
 26 | // this is a clean function signature that accepts parameters as a struct and
 27 | // returns an error. But we need to pass an httprouter.Handle to our router.
 28 | // So we wrap our httperror.XHandlerFunc[P], parsing the URL parameters to
 29 | // produce the parameter struct, passing it to the inner handler, then
 30 | // handling any errors that are returned.
 31 | func middleware[P any](routeName string, logger *slog.Logger, onPanic func(error), h httperror.XHandlerFunc[P]) httprouter.Handle {
 32 | 	h = httperror.XPanicMiddleware[P](h)
 33 | 
 34 | 	h = prometheusMiddleware[P](routeName, h)
 35 | 
 36 | 	handleError := func(w http.ResponseWriter, err error) {
 37 | 		if errors.Is(err, httperror.Panic) {
 38 | 			// do this in a goroutine otherwise we get deadlock if onPanic shuts downs the HTTP server
 39 | 			// because the http server shutdown function will wait for all requests to terminate,
 40 | 			// including this one!
 41 | 			go onPanic(err)
 42 | 		}
 43 | 		httperror.DefaultErrorHandler(w, err)
 44 | 	}
 45 | 
 46 | 	return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 47 | 		var params P
 48 | 		err := unmarshalRouterRequest(r, ps, &params)
 49 | 		if err != nil {
 50 | 			err = httperror.Wrap(err, http.StatusBadRequest)
 51 | 			logger.Error("unmarshalRouterRequest", err, "url", r.URL)
 52 | 			handleError(w, err)
 53 | 			return
 54 | 		}
 55 | 
 56 | 		err = h(w, r, params)
 57 | 		if err != nil {
 58 | 			if httperror.StatusCode(err) >= 500 {
 59 | 				logger.Error("executing handler", err, "url", r.URL)
 60 | 				requestErrorsTotal.Inc()
 61 | 			}
 62 | 			handleError(w, err)
 63 | 		}
 64 | 	}
 65 | }
 66 | 
 67 | var decoder = schema.NewDecoder()
 68 | 
 69 | func nullInt64Converter(value string) reflect.Value {
 70 | 	var result sql.NullInt64
 71 | 	if value != "" {
 72 | 		v, _ := strconv.ParseInt(value, 10, 64)
 73 | 		result = sql.NullInt64{Int64: v, Valid: true}
 74 | 	}
 75 | 	return reflect.ValueOf(result)
 76 | }
 77 | 
 78 | func nullFloat64Converter(value string) reflect.Value {
 79 | 	var result sql.NullFloat64
 80 | 	if value != "" {
 81 | 		v, _ := strconv.ParseFloat(value, 64)
 82 | 		result = sql.NullFloat64{Float64: v, Valid: true}
 83 | 	}
 84 | 	return reflect.ValueOf(result)
 85 | }
 86 | 
 87 | func init() {
 88 | 	decoder.RegisterConverter(sql.NullInt64{}, nullInt64Converter)
 89 | 	decoder.RegisterConverter(sql.NullFloat64{}, nullFloat64Converter)
 90 | }
 91 | 
 92 | // unmarshalRouterRequest is a generic request URL unmarshaler for use with
 93 | // httprouter. It unmarshals the request parameters parsed by httprouter, as
 94 | // well as any URL parameters, into a struct of any type, matching query
 95 | // names to struct field names.
 96 | func unmarshalRouterRequest(r *http.Request, ps httprouter.Params, params any) error {
 97 | 	if r.Method == "POST" {
 98 | 		err := json.NewDecoder(r.Body).Decode(params)
 99 | 		if err != nil {
100 | 			return errors.Wrap(err, "decode json")
101 | 		}
102 | 		return nil
103 | 	}
104 | 
105 | 	m := make(map[string][]string)
106 | 
107 | 	// First convert the httprouter.Params into a map
108 | 	for _, p := range ps {
109 | 		key := p.Key
110 | 		if v, ok := m[key]; ok {
111 | 			m[key] = append(v, p.Value)
112 | 		} else {
113 | 			m[key] = []string{p.Value}
114 | 		}
115 | 	}
116 | 
117 | 	// Then merge in the URL query parameters.
118 | 	for key, values := range r.URL.Query() {
119 | 		if v, ok := m[key]; ok {
120 | 			m[key] = append(v, values...)
121 | 		} else {
122 | 			m[key] = values
123 | 		}
124 | 	}
125 | 
126 | 	// Then unmarshal.
127 | 	err := decoder.Decode(params, m)
128 | 	if err != nil {
129 | 		if !strings.HasPrefix(err.Error(), "schema: invalid path") {
130 | 			// ignore errors due to unrecognized parameters
131 | 			return errors.Wrap(err, "decode parameters")
132 | 		}
133 | 	}
134 | 
135 | 	return nil
136 | }
137 | 
138 | // preRouterMiddleware wraps the router itself. It is for middleware that does
139 | // not need to know anything about the route (params, name, etc)
140 | func (app app) preRouterMiddleware(handler http.Handler, writeTimeout time.Duration) http.Handler {
141 | 	handler = app.cacheAndCompressMiddleware(handler)
142 | 	handler = app.canonicalDomainMiddleware(handler)       // redirects must happen before caching!
143 | 	handler = app.timeoutMiddleware(handler, writeTimeout) // redirects must happen before caching!
144 | 	return handler
145 | }
146 | 
147 | // We could improve this middleware. Currently we cache before we
148 | // compress, because the cache middleware we use here doesn't recognize the
149 | // accept-encoding header, and if we compressed before we cache, cache
150 | // entries would be randomly compressed or not, regardless of the
151 | // accept-encoding header. Unfortunately by caching before we compress,
152 | // requests are cached uncompressed. A compressed-cache middleware would be a
153 | // nice improvement. Also our cache-control headers should be synced with the
154 | // exact cache expiration time, which should be synced with the crawl. But
155 | // what we have here is simple and probably good enough.
156 | 
157 | func (app app) cacheAndCompressMiddleware(handler http.Handler) http.Handler {
158 | 	// if app.cacheSize >  0 {
159 | 
160 | 	// 	memorycached, err := memory.NewAdapter(
161 | 	// 		memory.AdapterWithAlgorithm(memory.LRU),
162 | 	// 		memory.AdapterWithCapacity(app.cacheSize),
163 | 	// 	)
164 | 	// 	if err != nil {
165 | 	// 		LogFatal(app.logger, "memory.NewAdapater", err)
166 | 	// 	}
167 | 
168 | 	// 	cacheClient, err := cache.NewClient(
169 | 	// 		cache.ClientWithAdapter(memorycached),
170 | 	// 		cache.ClientWithTTL(1*time.Minute),
171 | 	// 		cache.ClientWithRefreshKey("opn"),
172 | 	// 	)
173 | 	// 	if err != nil {
174 | 	// 		LogFatal(app.logger, "cache.NewClient", err)
175 | 	// 	}
176 | 
177 | 	// 	var h http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
178 | 	// 		// since we update data only every minute, tell browsers to cache for one minute
179 | 	// 		handler.ServeHTTP(w, r)
180 | 	// 	})
181 | 
182 | 	// 	h = cacheClient.Middleware(h)
183 | 	// }
184 | 	h := handler
185 | 
186 | 	return gziphandler.GzipHandler(h)
187 | }
188 | 


--------------------------------------------------------------------------------
/migrate-volume.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e  # Exit on any error
  3 | 
  4 | # Configuration
  5 | APP_NAME="news"
  6 | OLD_VOLUME_NAME="data2"
  7 | NEW_VOLUME_NAME="data3"
  8 | NEW_VOLUME_SIZE="3"  # Adjust this to your needs
  9 | REGION="ewr"  # Your current region
 10 | 
 11 | # Function to wait for VM to be ready
 12 | wait_for_vm() {
 13 |     echo "Waiting for VM to be ready..."
 14 |     while true; do
 15 |         STATUS=$(fly status --app $APP_NAME)
 16 |         if echo "$STATUS" | grep -q "running"; then
 17 |             echo "VM is ready"
 18 |             break
 19 |         fi
 20 |         echo "VM not ready yet, waiting..."
 21 |         sleep 5
 22 |     done
 23 | }
 24 | 
 25 | echo "Stopping the application..."
 26 | fly scale count 0 --app $APP_NAME
 27 | 
 28 | echo "Creating new volume..."
 29 | fly volumes create $NEW_VOLUME_NAME --size $NEW_VOLUME_SIZE --region $REGION 
 30 | 
 31 | echo "Creating temporary machine with old volume..."
 32 | cat > migrate-old.toml << EOL
 33 | app = "$APP_NAME"
 34 | primary_region = "$REGION"
 35 | 
 36 | [build]
 37 |   image = "alpine:latest"
 38 | 
 39 | [mounts]
 40 |   source = "$OLD_VOLUME_NAME"
 41 |   destination = "/data"
 42 | 
 43 | [processes]
 44 |   app = "sleep infinity"
 45 | EOL
 46 | 
 47 | echo "Deploying temporary machine with old volume..."
 48 | fly deploy --config migrate-old.toml --app $APP_NAME
 49 | wait_for_vm
 50 | 
 51 | echo "Copying data from old volume to temporary storage..."
 52 | fly ssh console --command 'cd /data && tar czf frontpage.sqlite.gz frontpage.sqlite && tar czf frontpage.sqlite-shm.gz frontpage.sqlite-shm && tar czf frontpage.sqlite-wal.gz frontpage.sqlite-wal' --app $APP_NAME
 53 | 
 54 | echo "Downloading database files from old volume..."
 55 | fly sftp shell --app $APP_NAME << EOF
 56 | get /data/frontpage.sqlite.gz ~/social-protocols-data/recover/frontpage.sqlite.gz
 57 | get /data/frontpage.sqlite-shm.gz ~/social-protocols-data/recover/frontpage.sqlite-shm.gz
 58 | get /data/frontpage.sqlite-wal.gz ~/social-protocols-data/recover/frontpage.sqlite-wal.gz
 59 | exit
 60 | EOF
 61 | 
 62 | echo "Destroying temporary machine..."
 63 | fly scale count 0 --app $APP_NAME
 64 | fly machines destroy $(fly machines list --json | jq -r '.[].id') --force --app $APP_NAME
 65 | 
 66 | echo "Creating temporary machine with new volume..."
 67 | cat > migrate-new.toml << EOL
 68 | app = "$APP_NAME"
 69 | primary_region = "$REGION"
 70 | 
 71 | [build]
 72 |   image = "alpine:latest"
 73 | 
 74 | [mounts]
 75 |   source = "$NEW_VOLUME_NAME"
 76 |   destination = "/data"
 77 | 
 78 | [processes]
 79 |   app = "sleep infinity"
 80 | EOL
 81 | 
 82 | echo "Deploying temporary machine with new volume..."
 83 | fly deploy --config migrate-new.toml --app $APP_NAME
 84 | wait_for_vm
 85 | 
 86 | echo "Uploading database files to new volume..."
 87 | fly sftp shell --app $APP_NAME << EOF
 88 | put ~/social-protocols-data/recover/frontpage.sqlite.gz /data/frontpage.sqlite.gz
 89 | put ~/social-protocols-data/recover/frontpage.sqlite-shm.gz /data/frontpage.sqlite-shm.gz
 90 | put ~/social-protocols-data/recover/frontpage.sqlite-wal.gz /data/frontpage.sqlite-wal.gz
 91 | exit
 92 | EOF
 93 | 
 94 | echo "Extracting database files on new volume..."
 95 | fly ssh console --command 'cd /data && gunzip frontpage.sqlite.gz && gunzip frontpage.sqlite-shm.gz && gunzip frontpage.sqlite-wal.gz' --app $APP_NAME
 96 | 
 97 | echo "Updating mount configuration..."
 98 | # Create a temporary file for the new fly.toml
 99 | cat > fly.toml.new << EOL
100 | [mounts]
101 |   source = "$NEW_VOLUME_NAME"
102 |   destination = "/data"
103 | EOL
104 | 
105 | # Backup the original fly.toml
106 | cp fly.toml fly.toml.backup
107 | 
108 | # Update the mounts section in fly.toml
109 | sed -i.bak '/\[mounts\]/,/^$/c\' fly.toml
110 | cat fly.toml.new >> fly.toml
111 | rm fly.toml.new migrate-old.toml migrate-new.toml
112 | 
113 | echo "Deploying application with new volume..."
114 | fly deploy
115 | wait_for_vm
116 | 
117 | echo "Verifying application is running..."
118 | fly status --app $APP_NAME
119 | 
120 | echo "If everything looks good, you can delete the old volume with:"
121 | echo "fly volumes delete $OLD_VOLUME_NAME --app $APP_NAME"
122 | echo ""
123 | echo "To rollback, restore the original fly.toml:"
124 | echo "mv fly.toml.backup fly.toml" 


--------------------------------------------------------------------------------
/postprocessing.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"database/sql"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"strings"
 10 | 	"time"
 11 | 
 12 | 	"github.com/pkg/errors"
 13 | 	"golang.org/x/exp/slog"
 14 | )
 15 | 
 16 | const (
 17 | 	qnRankFormulaSQL = "pow(ageHours * (cumulativeUpvotes + overallPriorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + overallPriorWeight), 0.8) / pow(ageHours + 2, gravity/0.8) desc"
 18 | 
 19 | 	// qnRankFormulaSQL = `
 20 | 	// 	pow(
 21 | 	// 		ageHours *
 22 | 	// 		sample_from_gamma_distribution(
 23 | 	// 			cumulativeUpvotes + overallPriorWeight,
 24 | 	// 			(
 25 | 	// 					1-exp(-fatigueFactor*cumulativeExpectedUpvotes)
 26 | 	// 			) / fatigueFactor + overallPriorWeight
 27 | 	// 		 )
 28 | 	// 		 , 0.8
 29 | 	// 	) / pow(
 30 | 	// 			ageHours + 2
 31 | 	// 			, gravity/0.8
 32 | 	// 	) desc`
 33 | 
 34 | 	hnRankFormulaSQL = "(score-1) / pow(ageHours + 2, gravity/0.8) desc"
 35 | )
 36 | 
 37 | func (app app) crawlPostprocess(ctx context.Context, tx *sql.Tx) error {
 38 | 	t := time.Now()
 39 | 	defer crawlPostprocessingDuration.UpdateDuration(t)
 40 | 
 41 | 	var err error
 42 | 
 43 | 	// for _, filename := range []string{"previous-crawl.sql", "resubmissions.sql", "raw-ranks.sql", "upvote-rates.sql"} {
 44 | 	for _, filename := range []string{
 45 | 		"previous-crawl.sql",
 46 | 		"resubmissions.sql",
 47 | 		"raw-ranks.sql",
 48 | 	} {
 49 | 		app.logger.Info("Processing SQL file", slog.String("filename", filename))
 50 | 		err = executeSQLFile(ctx, tx, filename)
 51 | 		if err != nil {
 52 | 			return err
 53 | 		}
 54 | 	}
 55 | 
 56 | 	err = app.updateQNRanks(ctx, tx)
 57 | 	if err != nil {
 58 | 		return errors.Wrap(err, "updateQNRanks")
 59 | 	}
 60 | 
 61 | 	app.logger.Info("Finished crawl postprocessing", slog.Duration("elapsed", time.Since(t)))
 62 | 
 63 | 	return err
 64 | }
 65 | 
 66 | var qnRanksSQL = readSQLSource("qnranks.sql")
 67 | 
 68 | func (app app) updateQNRanks(ctx context.Context, tx *sql.Tx) error {
 69 | 	t := time.Now()
 70 | 
 71 | 	d := defaultFrontPageParams
 72 | 	sql := fmt.Sprintf(qnRanksSQL, d.PriorWeight, d.OverallPriorWeight, d.Gravity, d.PenaltyWeight, d.FatigueFactor, qnRankFormulaSQL)
 73 | 
 74 | 	stmt, err := tx.Prepare(sql)
 75 | 	if err != nil {
 76 | 		return errors.Wrap(err, "preparing updateQNRanksSQL")
 77 | 	}
 78 | 
 79 | 	_, err = stmt.ExecContext(ctx)
 80 | 
 81 | 	app.logger.Info("Finished executing updateQNRanks", slog.Duration("elapsed", time.Since(t)))
 82 | 
 83 | 	return errors.Wrap(err, "executing updateQNRanksSQL")
 84 | }
 85 | 
 86 | func readSQLSource(filename string) string {
 87 | 	f, err := resources.Open("sql/" + filename)
 88 | 	if err != nil {
 89 | 		panic(err)
 90 | 	}
 91 | 	defer f.Close()
 92 | 	buf := bytes.NewBuffer(nil)
 93 | 	_, err = io.Copy(buf, f)
 94 | 	if err != nil {
 95 | 		panic(err)
 96 | 	}
 97 | 
 98 | 	return buf.String()
 99 | }
100 | 
101 | func executeSQLFile(ctx context.Context, tx *sql.Tx, filename string) error {
102 | 	sql := readSQLSource(filename)
103 | 
104 | 	sql = strings.Trim(sql, " \n\r;")
105 | 
106 | 	parts := strings.Split(sql, ";\n")
107 | 
108 | 	for _, sql := range parts {
109 | 
110 | 		stmt, err := tx.Prepare(sql)
111 | 		if err != nil {
112 | 			return errors.Wrapf(err, "preparing SQL in file %s", filename)
113 | 		}
114 | 
115 | 		_, err = stmt.ExecContext(ctx)
116 | 
117 | 		if err != nil {
118 | 			return errors.Wrapf(err, "executing SQL in file %s", filename)
119 | 		}
120 | 	}
121 | 	return nil
122 | }
123 | 


--------------------------------------------------------------------------------
/prometheus.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"net/http"
 6 | 	"os"
 7 | 	"time"
 8 | 
 9 | 	"github.com/VictoriaMetrics/metrics"
10 | 	"github.com/johnwarden/httperror"
11 | 	"golang.org/x/exp/slog"
12 | )
13 | 
14 | // Register various metrics.
15 | // Metric name may contain labels in Prometheus format - see below.
16 | 
17 | var (
18 | 	crawlErrorsTotal            = metrics.NewCounter(`errors_total{type="crawl"}`)
19 | 	archiveErrorsTotal          = metrics.NewCounter(`errors_total{type="archive"}`)
20 | 	requestErrorsTotal          = metrics.NewCounter(`errors_total{type="request"}`)
21 | 	crawlDuration               = metrics.NewHistogram("crawl_duration_seconds")
22 | 	crawlPostprocessingDuration = metrics.NewHistogram("crawl_postprocessing_duration_seconds")
23 | 
24 | 	upvotesTotal         = metrics.NewCounter(`upvotes_total`)
25 | 	submissionsTotal     = metrics.NewCounter(`submissions_total`)
26 | 	storiesArchivedTotal = metrics.NewCounter(`stories_archived_total`)
27 | 	storiesPurgedTotal   = metrics.NewCounter(`stories_purged_total`)
28 | 
29 | 	vacuumOperationsTotal = metrics.NewCounter(`database_vacuum_operations_total{database="frontpage"}`)
30 | 
31 | 	// Store histograms per route to avoid duplicate registration
32 | 	routeHistograms = make(map[string]*metrics.Histogram)
33 | )
34 | 
35 | // getRouteHistogram returns an existing histogram for a route or creates a new one
36 | func getRouteHistogram(routeName string) *metrics.Histogram {
37 | 	if h, exists := routeHistograms[routeName]; exists {
38 | 		return h
39 | 	}
40 | 	h := metrics.NewHistogram(`requests_duration_seconds{route="` + routeName + `"}`)
41 | 	routeHistograms[routeName] = h
42 | 	return h
43 | }
44 | 
45 | func servePrometheusMetrics() func(ctx context.Context) error {
46 | 	mux := http.NewServeMux()
47 | 
48 | 	// Export all the registered metrics in Prometheus format at `/metrics` http path.
49 | 	mux.HandleFunc("/metrics", func(w http.ResponseWriter, req *http.Request) {
50 | 		metrics.WritePrometheus(w, true)
51 | 	})
52 | 
53 | 	listenAddress := os.Getenv("LISTEN_ADDRESS")
54 | 
55 | 	s := &http.Server{
56 | 		Addr:    listenAddress + ":9091",
57 | 		Handler: mux,
58 | 	}
59 | 
60 | 	go func() {
61 | 		LogFatal(slog.Default(), "Listen and serve prometheus", s.ListenAndServe())
62 | 	}()
63 | 
64 | 	return s.Shutdown
65 | }
66 | 
67 | func prometheusMiddleware[P any](routeName string, h httperror.XHandler[P]) httperror.XHandlerFunc[P] {
68 | 	requestDuration := getRouteHistogram(routeName)
69 | 
70 | 	return func(w http.ResponseWriter, r *http.Request, p P) error {
71 | 		var startTime time.Time
72 | 		if r.Method != http.MethodHead {
73 | 			startTime = time.Now()
74 | 		}
75 | 
76 | 		err := h.Serve(w, r, p)
77 | 
78 | 		if r.Method != http.MethodHead && routeName != "health" && routeName != "crawl-health" {
79 | 			requestDuration.UpdateDuration(startTime)
80 | 		}
81 | 
82 | 		return err
83 | 	}
84 | }
85 | 


--------------------------------------------------------------------------------
/reset-prior-average-upvote-rate.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | with parameters as (
 3 |   select
 4 |     -- 2.2956 as priorWeight
 5 |     -- 4.0 as priorWeight
 6 |     1.7 as priorWeight
 7 |     , 0.003462767 as fatigueFactor
 8 |     -- , 1.036 as priorAverage
 9 |     -- , 1.036 as priorAverage
10 |     -- , .99 as priorAverage
11 |     -- , 1.0 as priorAverage
12 | ), entryRates as (
13 |   select
14 |     userID
15 |     , storyID
16 |     , entryTime
17 |     , entryUpvoteRate
18 |     , max(cumulativeUpvotes) cumulativeUpvotes
19 |     , max(cumulativeExpectedUpvotes) cumulativeExpectedUpvotes
20 |     , (cumulativeUpvotes + priorWeight)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) newEntryUpvoteRate
21 |     -- , (cumulativeUpvotes + priorWeight*1.174)/((1-exp(-fatigueFactor*cumulativeExpectedUpvotes))/fatigueFactor + priorWeight) newEntryUpvoteRate
22 |     -- , (cumulativeUpvotes + priorWeight*1.145)/(cumulativeExpectedUpvotes + priorWeight) as newEntryUpvoteRate
23 | 
24 | 
25 |   from 
26 |     votes
27 |     join dataset 
28 |     on dataset.id = storyID
29 |     join parameters
30 |   where
31 |     dataset.sampleTime
32 |     and sampleTime <= entryTime
33 |     -- and votes.userID != 0
34 |   group by userID, storyID, entryTime
35 | )
36 | -- select * from entryRates where userID = 0 and storyID = 36805231 limit 10;
37 | 
38 | update votes as u
39 | set entryUpvotes = entryRates.cumulativeUpvotes
40 |   , entryExpectedUpvotes = entryRates.cumulativeExpectedUpvotes
41 |   , entryUpvoteRate = entryRates.newEntryUpvoteRate
42 | from
43 | entryRates
44 | where entryRates.userID = u.userID
45 | and entryRates.storyID = u.storyID ;
46 | 
47 | 


--------------------------------------------------------------------------------
/resources.go:
--------------------------------------------------------------------------------
1 | package main
2 | 
3 | import "embed"
4 | 
5 | //go:embed templates/*
6 | //go:embed sql/*
7 | //go:embed seed/*
8 | var resources embed.FS
9 | 


--------------------------------------------------------------------------------
/score-page.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"fmt"
  6 | 	"net/http"
  7 | 
  8 | 	"github.com/johnwarden/httperror"
  9 | 	"github.com/pkg/errors"
 10 | )
 11 | 
 12 | type ScorePageData struct {
 13 | 	PageTemplateData
 14 | 	Positions     []Position
 15 | 	Score         float64
 16 | 	ScorePlotData [][]any
 17 | }
 18 | 
 19 | // Override IsScorePage since it's not determined by Ranking
 20 | func (d ScorePageData) IsScorePage() bool {
 21 | 	return true
 22 | }
 23 | 
 24 | func (p ScorePageData) ScoreString() string {
 25 | 	return fmt.Sprintf("%.2f", p.Score)
 26 | }
 27 | 
 28 | func (p ScorePageData) AverageScoreString() string {
 29 | 	return fmt.Sprintf("%.2f", p.Score/float64(len(p.Positions)))
 30 | }
 31 | 
 32 | type ScorePageParams struct {
 33 | 	UserID sql.NullInt64
 34 | 	OptionalModelParams
 35 | 	ScoringFormula string
 36 | }
 37 | 
 38 | func (app app) scoreHandler() func(http.ResponseWriter, *http.Request, ScorePageParams) error {
 39 | 	return func(w http.ResponseWriter, r *http.Request, params ScorePageParams) error {
 40 | 		nullUserID := params.UserID
 41 | 		if !nullUserID.Valid {
 42 | 
 43 | 			nullUserID = app.getUserID(r)
 44 | 
 45 | 			if !nullUserID.Valid {
 46 | 				return httperror.PublicErrorf(http.StatusUnauthorized, "not logged in")
 47 | 			}
 48 | 		}
 49 | 
 50 | 		modelParams := params.OptionalModelParams.WithDefaults()
 51 | 
 52 | 		userID := int(nullUserID.Int64)
 53 | 
 54 | 		positions, err := app.getDetailedPositions(r.Context(), userID)
 55 | 		if err != nil {
 56 | 			return errors.Wrap(err, "getDetailedPositions")
 57 | 		}
 58 | 
 59 | 		var score float64
 60 | 		for i, p := range positions {
 61 | 
 62 | 			p.EntryUpvoteRate = modelParams.upvoteRate(p.EntryUpvotes, p.EntryExpectedUpvotes)
 63 | 			p.CurrentUpvoteRate = modelParams.upvoteRate(p.CurrentUpvotes, p.CurrentExpectedUpvotes)
 64 | 			p.Story.UpvoteRate = p.CurrentUpvoteRate
 65 | 
 66 | 			if p.ExitUpvotes.Valid && p.ExitExpectedUpvotes.Valid {
 67 | 				p.ExitUpvoteRate = sql.NullFloat64{
 68 | 					Float64: modelParams.upvoteRate(int(p.ExitUpvotes.Int64), p.ExitExpectedUpvotes.Float64),
 69 | 					Valid:   true,
 70 | 				}
 71 | 			}
 72 | 
 73 | 			p.UserScore = UserScore(p, modelParams, params.ScoringFormula)
 74 | 
 75 | 			score += p.UserScore
 76 | 			p.RunningScore = score
 77 | 
 78 | 			p.Story.UpvoteRate = p.UpvoteRate
 79 | 
 80 | 			positions[i] = p
 81 | 		}
 82 | 
 83 | 		n := len(positions)
 84 | 		for i := range positions {
 85 | 			positions[i].RunningScore = score - positions[i].RunningScore + positions[i].UserScore
 86 | 			positions[i].Label = intToAlphaLabel(n - i - 1)
 87 | 		}
 88 | 
 89 | 		scorePlotData := make([][]any, n)
 90 | 		for i, p := range positions {
 91 | 			scorePlotData[n-i-1] = []any{
 92 | 				p.EntryTime, p.RunningScore, fmt.Sprintf("%d", p.PositionID), p.Story.Title, p.UserScoreString(), p.Direction, p.EntryUpvoteRateString(), p.CurrentUpvoteRateString(), p.ExitUpvoteRateString(),
 93 | 			}
 94 | 		}
 95 | 
 96 | 		pageSize := 1000
 97 | 		if n > pageSize {
 98 | 			n = pageSize
 99 | 		}
100 | 
101 | 		d := ScorePageData{
102 | 			PageTemplateData: PageTemplateData{
103 | 				UserID:  nullUserID,
104 | 				Ranking: "score",
105 | 			},
106 | 			Positions:     positions[0:n],
107 | 			Score:         score,
108 | 			ScorePlotData: scorePlotData,
109 | 		}
110 | 
111 | 		if err = templates.ExecuteTemplate(w, "score.html.tmpl", d); err != nil {
112 | 			return errors.Wrap(err, "executing score template")
113 | 		}
114 | 
115 | 		return nil
116 | 	}
117 | }
118 | 
119 | // convert an integer into an alpha-numerical label starting with A through Z, then continuing AA, AB, etc.
120 | 
121 | func intToAlphaLabel(i int) string {
122 | 	r := make([]byte, 0, 1)
123 | 
124 | 	// result := ""
125 | 	n := 0
126 | 	for {
127 | 		digit := i % 26
128 | 		letter := 'A' + digit
129 | 		// result = string(letter) + result
130 | 
131 | 		r = append(r, byte(letter))
132 | 
133 | 		i -= digit
134 | 		if i == 0 {
135 | 			break
136 | 		}
137 | 		i /= 26
138 | 		i -= 1
139 | 		n++
140 | 	}
141 | 
142 | 	n = len(r)
143 | 	for i := 0; i < n/2; i++ {
144 | 		j := n - i - 1
145 | 
146 | 		r[i], r[j] = r[j], r[i]
147 | 	}
148 | 
149 | 	return string(r)
150 | }
151 | 


--------------------------------------------------------------------------------
/scraper.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 	"sync"
  9 | 	"time"
 10 | 
 11 | 	colly "github.com/gocolly/colly/v2"
 12 | 	"github.com/pkg/errors"
 13 | 	"golang.org/x/exp/slog"
 14 | )
 15 | 
 16 | type rawStory struct {
 17 | 	ID string
 18 | 	row1
 19 | 	row2
 20 | }
 21 | 
 22 | type row1 struct {
 23 | 	Title     string `selector:"span.titleline a"`
 24 | 	FullTitle string `selector:"span.titleline"`
 25 | 	URL       string `selector:"span.titleline a" attr:"href"`
 26 | 	Rank      string `selector:"span.rank"`
 27 | }
 28 | 
 29 | type row2 struct {
 30 | 	Author         string   `selector:"a.hnuser"`
 31 | 	Score          string   `selector:"span.score"`
 32 | 	SubmissionTime string   `selector:"span.age" attr:"title"`
 33 | 	AgeApprox      string   `selector:"span.age"`
 34 | 	Links          []string `selector:"a"`
 35 | }
 36 | 
 37 | type ScrapedStory struct {
 38 | 	Story
 39 | 	Rank   int
 40 | 	Source string
 41 | }
 42 | 
 43 | func (rs rawStory) Clean() (ScrapedStory, error) {
 44 | 	story := ScrapedStory{
 45 | 		Story: Story{
 46 | 			Title: rs.Title,
 47 | 			By:    rs.Author,
 48 | 			URL:   rs.URL,
 49 | 		},
 50 | 		Source: "scraper",
 51 | 	}
 52 | 
 53 | 	// parse id
 54 | 	{
 55 | 		id, err := strconv.Atoi(rs.ID)
 56 | 		if err != nil {
 57 | 			return story, errors.Wrapf(err, "parse story id %s", rs.ID)
 58 | 		}
 59 | 		story.ID = id
 60 | 	}
 61 | 
 62 | 	// fix url
 63 | 	if strings.HasPrefix(story.Story.URL, "item?id=") {
 64 | 		story.Story.URL = "https://news.ycombinator.com/" + story.Story.URL
 65 | 	}
 66 | 
 67 | 	// parse score. This field will look like "4 points"
 68 | 	{
 69 | 		if fs := strings.Fields(rs.Score); len(fs) > 0 {
 70 | 			scoreStr := strings.Fields(rs.Score)[0]
 71 | 
 72 | 			score, err := strconv.Atoi(scoreStr)
 73 | 			story.Score = score
 74 | 			if err != nil {
 75 | 				return story, errors.Wrapf(err, "parse story score %s", rs.Score)
 76 | 			}
 77 | 		} else {
 78 | 			// if there is no upvotes field, then this is an HN job.
 79 | 			// we want to include these in the database because they get ranked
 80 | 			story.Job = true
 81 | 		}
 82 | 	}
 83 | 
 84 | 	// parse submission time
 85 | 	{
 86 | 		// submission times now contain a timestamp string, followed by a
 87 | 		// space then a unix timestamp with what looks like the *current*
 88 | 		// time which I suppose we can just ignore. For
 89 | 		// example "2024-10-23T16:44:01 1729713776"
 90 | 		parts := strings.Split(rs.SubmissionTime, " ")
 91 | 
 92 | 		var submissionTime time.Time
 93 | 		var err error
 94 | 
 95 | 		if strings.HasSuffix(parts[0], "Z") {
 96 | 			// Old format with "Z" indicating UTC
 97 | 			submissionTime, err = time.Parse("2006-01-02T15:04:05Z", parts[0])
 98 | 		} else {
 99 | 			// New format without "Z"
100 | 			submissionTime, err = time.Parse("2006-01-02T15:04:05", parts[0])
101 | 		}
102 | 
103 | 		if err != nil {
104 | 			return story, errors.Wrapf(err, "parse submission time %s", rs.SubmissionTime)
105 | 		}
106 | 		story.SubmissionTime = submissionTime.Unix()
107 | 		story.OriginalSubmissionTime = story.SubmissionTime
108 | 	}
109 | 
110 | 	// parse approximate age
111 | 	{
112 | 		// this will be something like "1 minute ago" or "3 hours ago"
113 | 		if fs := strings.Fields(rs.AgeApprox); len(fs) > 1 {
114 | 			n, err := strconv.Atoi(fs[0])
115 | 			if err != nil {
116 | 				return story, errors.Wrapf(err, "parse relative age %s", rs.AgeApprox)
117 | 			}
118 | 
119 | 			var units int64
120 | 			if strings.HasPrefix(fs[1], "minute") { // "minute" or "minutes"
121 | 				units = 60
122 | 			} else if strings.HasPrefix(fs[1], "hour") {
123 | 				units = 3600
124 | 			} else if strings.HasPrefix(fs[1], "day") {
125 | 				units = 3600 * 24
126 | 			} else if strings.HasPrefix(fs[1], "month") {
127 | 				units = 3600 * 24 * 30
128 | 			} else if strings.HasPrefix(fs[1], "year") {
129 | 				units = 3600 * 24 * 364
130 | 			}
131 | 
132 | 			story.AgeApprox = int64(n) * units
133 | 		} else {
134 | 			return story, fmt.Errorf("parse age %s", rs.AgeApprox)
135 | 		}
136 | 
137 | 		// parse rank. we know the rank because of the order it appears in.
138 | 		// we just use this to do an integrity check later.
139 | 		{
140 | 			tRank := strings.Trim(rs.Rank, ".")
141 | 			var err error
142 | 			story.Rank, err = strconv.Atoi(tRank)
143 | 			if err != nil || story.Rank == 0 {
144 | 				return story, errors.Wrapf(err, "parse rank %s", rs.Rank)
145 | 			}
146 | 		}
147 | 
148 | 		// parse the number of comments
149 | 		{
150 | 			// if there are comments, this will be the last <a> tag. Unfortunately, it doesn't have an id or class.
151 | 			commentString := rs.Links[len(rs.Links)-1]
152 | 
153 | 			// this string will be a single word like "comment" or "hide" if there are no comments.
154 | 			// otherwise it will be something like "12 comments"
155 | 			if fs := strings.Fields(commentString); len(fs) > 1 {
156 | 				c, err := strconv.Atoi(fs[0])
157 | 				if err != nil {
158 | 					return story, errors.Wrapf(err, "parse comments %s", commentString)
159 | 				}
160 | 				story.Comments = c
161 | 			}
162 | 		}
163 | 
164 | 		// parse [flagged] and [dupe] tags
165 | 		{
166 | 			if strings.Contains(rs.FullTitle, "[flagged]") {
167 | 				story.Flagged = true
168 | 			}
169 | 			if strings.Contains(rs.FullTitle, "[dupe]") {
170 | 				story.Dupe = true
171 | 			}
172 | 		}
173 | 
174 | 		return story, nil
175 | 	}
176 | }
177 | 
178 | func (app app) newScraper(resultCh chan ScrapedStory, errCh chan error, moreLinkCh chan string) *colly.Collector {
179 | 	c := colly.NewCollector()
180 | 	c.SetClient(app.httpClient)
181 | 
182 | 	var rs rawStory
183 | 
184 | 	c.OnHTML("a.morelink", func(e *colly.HTMLElement) {
185 | 		moreLinkCh <- e.Attr("href")
186 | 	})
187 | 
188 | 	c.OnHTML("tr table", func(e *colly.HTMLElement) {
189 | 		n := 0
190 | 		lastStoryRownum := 0
191 | 		e.ForEach("tr", func(i int, e *colly.HTMLElement) {
192 | 			class := e.Attr("class")
193 | 
194 | 			// stories will always start with a tr of class athing
195 | 			if strings.Contains(class, "athing") && n < 30 {
196 | 				n = n + 1
197 | 				lastStoryRownum = i
198 | 				if n > 30 {
199 | 					return
200 | 				}
201 | 
202 | 				rs = rawStory{
203 | 					ID: e.Attr("id"),
204 | 				}
205 | 				err := e.Unmarshal(&rs.row1)
206 | 				if err != nil {
207 | 					errCh <- err
208 | 				}
209 | 			} else if class == "" && i == lastStoryRownum+1 && n > 0 && n <= 30 {
210 | 				// the first tr after the "athing" contains the second row of
211 | 				// details for the story. Note also we must skip any trs
212 | 				// before the first athing because sometimes they contain
213 | 				// general page content.
214 | 
215 | 				err := e.Unmarshal(&rs.row2)
216 | 
217 | 				if err != nil {
218 | 					errCh <- err
219 | 				} else {
220 | 					st, err := rs.Clean()
221 | 					rank := st.Rank
222 | 
223 | 					// Do an integrity check. If the row shown for the story equals the row
224 | 					// count we are keeping, we area all good.
225 | 					if err == nil && ((rank-1)%30)+1 != n {
226 | 						err = fmt.Errorf("Ranks out of order. Expected %d but parsed %d", n, (rank-1)%30+1)
227 | 					}
228 | 
229 | 					if err != nil {
230 | 						Debugf(app.logger, "Failed to parse story %d. Raw story %#v", n, rs)
231 | 						errCh <- err
232 | 					} else {
233 | 						resultCh <- st
234 | 					}
235 | 				}
236 | 			}
237 | 		})
238 | 	})
239 | 
240 | 	c.OnError(func(r *colly.Response, err error) {
241 | 		err = errors.Wrapf(err, "Failed to parse page %s", r.Request.URL)
242 | 		errCh <- err
243 | 	})
244 | 
245 | 	return c
246 | }
247 | 
248 | func (app app) scrapeHN(pageType string, resultCh chan ScrapedStory, errCh chan error) {
249 | 	baseUrl := "https://news.ycombinator.com/"
250 | 	url := baseUrl
251 | 	if pageType == "new" {
252 | 		url = url + "newest"
253 | 	} else if pageType != "top" {
254 | 		url = url + pageType
255 | 	}
256 | 	for p := 1; p <= 3; p++ {
257 | 		moreLinkCh := make(chan string, 1)
258 | 		c := app.newScraper(resultCh, errCh, moreLinkCh)
259 | 		err := c.Visit(url)
260 | 		if err != nil {
261 | 			errCh <- err
262 | 		}
263 | 		select {
264 | 		case relativeURL := <-moreLinkCh:
265 | 			url = baseUrl + relativeURL
266 | 		default:
267 | 			// there won't always be a next link, in particular the show page could have less than 3 pages worth of stories
268 | 		}
269 | 
270 | 	}
271 | 	close(resultCh)
272 | 	close(errCh)
273 | }
274 | 
275 | func (app app) scrapeFrontPageStories(ctx context.Context) (map[int]ScrapedStory, error) {
276 | 	app.logger.Info("Scraping front page stories")
277 | 
278 | 	stories := map[int]ScrapedStory{}
279 | 
280 | 	pageTypeName := "top"
281 | 
282 | 	nSuccess := 0
283 | 
284 | 	resultCh := make(chan ScrapedStory)
285 | 	errCh := make(chan error)
286 | 
287 | 	var wg sync.WaitGroup
288 | 
289 | 	t := time.Now()
290 | 
291 | 	// scrape in a goroutine. the scraper will write results to the channel
292 | 	// we provide
293 | 	wg.Add(1)
294 | 	go func() {
295 | 		defer wg.Done()
296 | 		app.scrapeHN(pageTypeName, resultCh, errCh)
297 | 	}()
298 | 
299 | 	// read from the error channel in print errors in a separate goroutine.
300 | 	// The scraper will block writing to the error channel if nothing is reading
301 | 	// from it.
302 | 	wg.Add(1)
303 | 	go func() {
304 | 		defer wg.Done()
305 | 		for err := range errCh {
306 | 			app.logger.Error("Error parsing story", err)
307 | 			crawlErrorsTotal.Inc()
308 | 		}
309 | 	}()
310 | 
311 | 	for story := range resultCh {
312 | 		id := story.ID
313 | 
314 | 		stories[id] = story
315 | 
316 | 		nSuccess += 1
317 | 	}
318 | 
319 | 	if nSuccess == 0 {
320 | 		return stories, fmt.Errorf("Didn't successfully parse any stories from %s page", pageTypeName)
321 | 	}
322 | 	Debugf(app.logger, "Crawled %d stories on %s page", nSuccess, pageTypeName)
323 | 
324 | 	wg.Wait()
325 | 
326 | 	app.logger.Info("Scraped stories", "pageTypeName", pageTypeName, slog.Duration("elapsed", time.Since(t)))
327 | 
328 | 	return stories, nil
329 | }
330 | 


--------------------------------------------------------------------------------
/seed/domain-penalties.csv:
--------------------------------------------------------------------------------
  1 | domain,avgPenalty
  2 | www.phoronix.com,0.255983153311316
  3 | arstechnica.com,0.236416125252806
  4 | www.theguardian.com,0.304580202256946
  5 | old.reddit.com,0.329146893687822
  6 | twitter.com,0.343862039075023
  7 | www.theregister.com,0.281049828586506
  8 | www.theatlantic.com,0.269041168885244
  9 | www.cnn.com,0.287677200012903
 10 | www.latimes.com,0.442553344733379
 11 | apnews.com,0.306319646291724
 12 | www.wired.com,0.247088570701928
 13 | torrentfreak.com,0.378240055055204
 14 | www.fastcompany.com,0.259302489090707
 15 | www.protocol.com,0.295085830042498
 16 | www.forbes.com,0.373620224179877
 17 | reason.com,0.393667070368929
 18 | drewdevault.com,0.426765471415533
 19 | www.washingtonpost.com,0.281714743939709
 20 | www.scmp.com,0.357802047615181
 21 | www.politico.com,0.300041785931081
 22 | medium.com,0.280016222997068
 23 | www.wsj.com,0.246757013133426
 24 | www.cnbc.com,0.292165034038854
 25 | www.ft.com,0.271791649480615
 26 | nypost.com,0.426552760633264
 27 | www.nytimes.com,0.247017260946097
 28 | reclaimthenet.org,0.449452850524531
 29 | tech.marksblogg.com,0.681258036075086
 30 | gizmodo.com,0.235581227145393
 31 | www.vice.com,0.273699442401756
 32 | www.bbc.com,0.243614162442268
 33 | techcrunch.com,0.267693218312698
 34 | en.wikipedia.org,0.200554150996098
 35 | www.macrumors.com,0.252190681850287
 36 | www.bleepingcomputer.com,0.298953498978752
 37 | www.telegraph.co.uk,0.328513269271261
 38 | www.tomshardware.com,0.225581267219099
 39 | www.thedrive.com,0.253433255715287
 40 | www.sfchronicle.com,0.295735132759634
 41 | www.businessinsider.com,0.327460617838536
 42 | www.theverge.com,0.233671896247822
 43 | www.eff.org,0.30545264142663
 44 | theconversation.com,0.238252498612625
 45 | www.bbc.co.uk,0.266873076770723
 46 | astralcodexten.substack.com,0.274378958851422
 47 | www.engadget.com,0.222515465389002
 48 | www.marketwatch.com,0.345967596506456
 49 | www.nasa.gov,0.235891052875635
 50 | www.nationalreview.com,0.386958948863856
 51 | web.archive.org,0.299904628615894
 52 | www.dw.com,0.319977862059942
 53 | nationalpost.com,0.442595555825156
 54 | www.newsweek.com,0.389042499337081
 55 | www.bloomberg.com,0.242968773325337
 56 | www.nbcnews.com,0.272889000950255
 57 | www.technologyreview.com,0.257431302810684
 58 | lite.cnn.com,0.250700151641931
 59 | venturebeat.com,0.276781089118411
 60 | www.sfgate.com,0.281372733111948
 61 | phys.org,0.191695575116859
 62 | petapixel.com,0.202737368232906
 63 | jalopnik.com,0.224182778568468
 64 | www.cbsnews.com,0.215748434523396
 65 | www.sciencealert.com,0.291226974242294
 66 | appleinsider.com,0.358670726355805
 67 | hackernoon.com,0.314810633979195
 68 | www.space.com,0.209511817616723
 69 | www.techdirt.com,0.279776442812103
 70 | www.cbc.ca,0.247250210810386
 71 | slate.com,0.265835164534057
 72 | 9to5mac.com,0.251672380939431
 73 | quillette.com,0.30212810108685
 74 | www.independent.co.uk,0.287744881377527
 75 | news.yahoo.com,0.306129953355535
 76 | www.newscientist.com,0.248626857306742
 77 | marginalrevolution.com,0.240319697857452
 78 | www.cnet.com,0.240582214090158
 79 | www.usatoday.com,0.249900498713825
 80 | futurism.com,0.311144077242749
 81 | www.scientificamerican.com,0.250479576916355
 82 | thehill.com,0.297360919489742
 83 | www.indiehackers.com,0.280577764502318
 84 | finance.yahoo.com,0.242354394239265
 85 | docs.google.com,0.246768467189723
 86 | therecord.media,0.265035130188274
 87 | blogs.nasa.gov,0.234768976839632
 88 | www.micahlerner.com,0.228182741047012
 89 | themarkup.org,0.169215301594475
 90 | restofworld.org,0.277497103221563
 91 | www.politico.eu,0.231618820954611
 92 | www.france24.com,0.298707256113616
 93 | betterprogramming.pub,0.239944839492751
 94 | time.com,0.220345275200395
 95 | www.schneier.com,0.24690892186614
 96 | www.lesswrong.com,0.198347134793858
 97 | www.pcmag.com,0.252328948155656
 98 | thebulletin.org,0.185234962067628
 99 | www.teslaoracle.com,0.218183317161904
100 | hbr.org,0.193063289207588
101 | thenewstack.io,0.226202908952288
102 | www.productlessons.xyz,0.165012975629553
103 | www.polygon.com,0.209155781150655
104 | medicalxpress.com,0.187450245477553
105 | www.anandtech.com,0.174405165433974
106 | electrek.co,0.142642514874113
107 | 


--------------------------------------------------------------------------------
/sql/cumulative-upvotes.sql:
--------------------------------------------------------------------------------
 1 | -- this query updates cumulativeUpvotes and cumulativeExpectedUpvotes
 2 | -- accounting for possible gaps in the data (stories in the latest crawl but not the previous crawl).
 3 | -- We only want cumulativeUpvotes or cumulativeExpectedUpvotes to increase if we have two consecutive data
 4 | -- points (one minute apart).
 5 | 
 6 | with latest as (
 7 | 	select * from dataset where sampleTime = (select max(sampleTime) from dataset)
 8 | )
 9 | update dataset as d
10 | set
11 | 	cumulativeUpvotes = case 
12 | 		when not gapInData then previousCrawl.cumulativeUpvotes + latest.score - previousCrawl.score 
13 | 		else previousCrawl.cumulativeUpvotes
14 | 	end 
15 | 	, cumulativeExpectedUpvotes = case 
16 | 		when not gapInData then latest.cumulativeExpectedUpvotes 
17 | 		else previousCrawl.cumulativeExpectedUpvotes
18 | 	end 
19 | from latest left join previousCrawl using (id)
20 | where
21 | 	d.id = latest.id 
22 | 	and d.sampleTime = (select max(sampleTime) from dataset)
23 | 


--------------------------------------------------------------------------------
/sql/previous-crawl-index-old.sql:
--------------------------------------------------------------------------------
1 | create index previousCrawl_id_idx on previousCrawl (id);


--------------------------------------------------------------------------------
/sql/previous-crawl.sql:
--------------------------------------------------------------------------------
 1 | -- This query selects the previous datapoint for every story in the latest crawl
 2 | -- It is a bit tricky because the sampleTime may be different for each story, because
 3 | -- Some stories may appear and disappear from crawl results if they fall off the front page and reappear.
 4 | 
 5 | create view if not exists previousCrawl as
 6 | with latest as (
 7 |   select * from dataset
 8 |   where sampleTime = (select max(sampleTime) from dataset)
 9 | )
10 | -- identify stories that are in the previous crawl. This is a quick indexed lookup
11 | , previousCrawl as (
12 |   select
13 |     id
14 |     , sampleTime
15 |   from dataset
16 |   where sampleTime = (select max(sampleTime) from dataset where sampleTime != (select max(sampleTime) from dataset))
17 | )
18 | -- this this query finds the sampleTime of the last time this story was
19 | -- crawled, for all stories that were not in the previous crawl. This
20 | -- subquery can be slow, so only do it for stories that weren't in the
21 | -- previous crawl.
22 | , previousSampleForStory as (
23 |   select
24 |     latest.id
25 |     , ifnull(previousCrawl.sampleTime, max(dataset.sampleTime)) as sampleTime
26 |     , previousCrawl.sampleTime is null as gapInData
27 |   from latest left join previousCrawl using (id)
28 |   left join dataset on (
29 |     previousCrawl.id is null
30 |     and latest.id = dataset.id
31 |     and dataset.sampleTime < (select max(sampleTime) from dataset)
32 |   )
33 |   group by 1
34 | )
35 | select dataset.*, gapInData from previousSampleForStory join dataset using (id, sampleTime);
36 | 


--------------------------------------------------------------------------------
/sql/qnranks.sql:
--------------------------------------------------------------------------------
 1 | with parameters as (select %f as priorWeight, %f as overallPriorWeight, %f as gravity, %f as penaltyWeight, %f as fatigueFactor)
 2 | , latestData as (
 3 | 	select	
 4 | 		id
 5 | 		, score
 6 | 		, sampleTime
 7 | 		, cast(sampleTime-submissionTime as real)/3600 as ageHours
 8 | 		, cumulativeUpvotes
 9 | 		, cumulativeExpectedUpvotes
10 | 		, penalty
11 | 	from dataset
12 | 	where sampleTime = (select max(sampleTime) from dataset)
13 | 	and score >= 3 -- story can't reach front page until score >= 3
14 |   and coalesce(topRank, bestRank, newRank, askRank, showRank) is not null -- let's not rank stories if they aren't accumulating attention
15 | ),
16 | qnRanks as (
17 |   select 
18 |   id
19 |     , dense_rank() over(order by %s) as rank
20 |     , sampleTime
21 |     , penalty
22 |   from latestData join parameters
23 | )
24 | update dataset as d set qnRank = qnRanks.rank
25 | from qnRanks
26 | where d.id = qnRanks.id and d.sampleTime = qnRanks.sampleTime;
27 | 


--------------------------------------------------------------------------------
/sql/random-new-voter.sql:
--------------------------------------------------------------------------------
 1 | with limits as (
 2 |   select
 3 |     count(*) / 1000 as n
 4 |     , abs(random()) % 10 as m
 5 |   from dataset
 6 | )
 7 | , randomFrontpageSample as (
 8 |   select id, sampleTime, cumulativeUpvotes, cumulativeExpectedUpvotes
 9 |   from dataset 
10 |   join stories using (id)
11 |   join limits
12 |   where timestamp > ( select min(sampleTime) from dataset ) -- only stories submitted since we started crawling
13 |   and newRank is not null 
14 |   and not job
15 |   and ( ( dataset.rowid - (select min(rowid) from dataset) )  %  n ) = m
16 | )
17 | , storiesToUpvote as (
18 |   select id as storyID
19 |     , min(sampleTime) as minSampleTime
20 |     , min(cumulativeUpvotes) as minUpvotes
21 |     , min(cumulativeExpectedUpvotes) as minExpectedUpvotes
22 |   from randomFrontpageSample
23 |   group by id
24 |   order by sampleTime
25 | )
26 | , positions as (
27 |   select 
28 |     0 as userID
29 |     , storiesToUpvote.storyID
30 |     , 1 as direction
31 |     , minSampleTime as entryTime
32 |     , minUpvotes as entryUpvotes
33 |     , minExpectedUPvotes as entryExpectedUpvotes
34 |     , row_number() over () as positionID
35 |   from storiesToUpvote
36 |   -- left join votes existingVotes using (storyID)
37 |   -- where existingVotes.storyID is null
38 | ) select
39 |   userID
40 |   , storyID
41 |   , positionID
42 |   , direction
43 |   , entryTime
44 |   , entryUpvotes
45 |   , entryExpectedUpvotes
46 |   , null as exitTime
47 |   , null as exitUpvotes
48 |   , null as exitExpectedUpvotes
49 |   , cumulativeUpvotes
50 |   , cumulativeExpectedUpvotes
51 |   , title
52 |   , url
53 |   , by
54 |   , unixepoch() - sampleTime + coalesce(ageApprox, sampleTime - submissionTime) ageApprox
55 |   , score
56 |   , descendants as comments
57 |   from positions 
58 |   join dataset on 
59 |     positions.storyID = id
60 |   join stories using (id)
61 |   group by positionID
62 |   having max(dataset.sampleTime)
63 |   order by entryTime desc
64 | ;
65 | 


--------------------------------------------------------------------------------
/sql/random-top-voter.sql:
--------------------------------------------------------------------------------
 1 | with randomDatapoints as (
 2 |   select 
 3 |     id, sampleTime , cumulativeUpvotes, cumulativeExpectedUpvotes
 4 |     -- , row_number() over () as 
 5 |     , row_number() over () as i
 6 |     , count() over () as nIDs
 7 |   from dataset 
 8 |   join stories using (id)
 9 |   where
10 |   timestamp > ( select min(sampleTime) from dataset ) -- only stories submitted since we started crawling
11 |   and sampleTime > ( select max(sampleTime) from dataset ) - 24 * 60 * 60
12 |   and topRank is not null 
13 | ), 
14 |  limits as (
15 |   select abs(random()) % ( nIds / 100 ) as n
16 |   from randomDatapoints
17 |   where i = 1
18 | )
19 | , storiesToUpvote as (
20 |   select id as storyID
21 |     , min(sampleTime) as minSampleTime
22 |     , min(cumulativeUpvotes) as minUpvotes
23 |     , min(cumulativeExpectedUpvotes) as minExpectedUpvotes
24 |   from randomDatapoints join limits
25 |   -- sampleTime % nIDs = n
26 |   where
27 |    ( i ) % (nIDs / 100) = n
28 |   group by id
29 |   order by sampleTime
30 | )
31 | , positions as (
32 |   select 
33 |     ? as userID
34 |     , storiesToUpvote.storyID
35 |     , 1 as direction
36 |     , minSampleTime as entryTime
37 |     , minUpvotes as entryUpvotes
38 |     , minExpectedUPvotes as entryExpectedUpvotes
39 |     , row_number() over () as positionID
40 |   from storiesToUpvote
41 |   -- left join votes existingVotes using (storyID)
42 |   -- where existingVotes.storyID is null
43 | ) select
44 |   userID
45 |   , storyID
46 |   , positionID
47 |   , direction
48 |   , entryTime
49 |   , entryUpvotes
50 |   , entryExpectedUpvotes
51 |   , null as exitTime
52 |   , null as exitUpvotes
53 |   , null as exitExpectedUpvotes
54 |   , cumulativeUpvotes
55 |   , cumulativeExpectedUpvotes
56 |   , title
57 |   , url
58 |   , by
59 |   , unixepoch() - sampleTime + coalesce(ageApprox, sampleTime - submissionTime) ageApprox
60 |   , score
61 |   , descendants as comments
62 |   from positions 
63 |   join dataset on 
64 |     positions.storyID = id
65 |   join stories using (id)
66 |   group by positionID
67 |   having max(dataset.sampleTime)
68 |   order by entryTime desc;


--------------------------------------------------------------------------------
/sql/raw-ranks.sql:
--------------------------------------------------------------------------------
 1 | with rankingScores as (  
 2 |   select 
 3 |   	id 
 4 |     , sampleTime
 5 |     , topRank
 6 |     , pow(score-1, 0.8) / pow(cast(sampleTime - submissionTime as real)/3600+2, 1.8) as rankingScore -- pre-penalty HN ranking formula
 7 |     , ageApprox
 8 |     , job
 9 |     , score
10 |     , timeStamp != submissionTime as resubmitted
11 |   from dataset join stories using (id)
12 |   where sampleTime = (select max(sampleTime) from dataset)
13 |   -- normally a story is eligible to rank on front page once score >= 3 
14 |   -- but jobs can be on the front  page without a score, and sometimes I see
15 |   -- stories on the front page of a score of only 2. We want to calculate
16 |   -- raw rank for any store that is ranked, or **should** be ranked.
17 |   and (score >= 3 or topRank is not null)
18 |   order by topRank asc, rankingScore desc
19 | ),
20 | rawRanks as (
21 |   select 
22 |     id
23 |     , sampleTime
24 |     , job
25 |     , resubmitted
26 |     , topRank as rank
27 |     , score
28 |     , count(*) over (order by rankingScore desc) as rawRank
29 |   from rankingScores 
30 |   order by rank nulls last
31 | )
32 | update dataset as d
33 |   set rawRank = count(*) over (
34 |     order by case when rawRanks.job then rawRanks.rank else rawRanks.rawRank end, rawRanks.job desc
35 |   )
36 |   from rawRanks
37 |   where d.id = rawRanks.id
38 |   and d.sampleTime = rawRanks.sampleTime
39 | ;
40 | 


--------------------------------------------------------------------------------
/sql/resubmissions.sql:
--------------------------------------------------------------------------------
  1 | -- ESTIMATING RESUBMISSION TIME
  2 | 
  3 | -- THE PROBLEM
  4 | 
  5 | -- When a story is resubmitted, its submission time is updated to the current
  6 | -- time, which gives it a rankings boost. 
  7 | 
  8 | -- We want to know what this new submission time is, so our algorithm can give
  9 | -- stories the same boost. Also our penalty calculate requires knowing each
 10 | -- story's pre-penalty ranking score, which requires knowing their submission
 11 | -- times. 
 12 | 
 13 | -- Unfortunately exact resubmission times are not currently published by HN. The API always
 14 | -- gives the story's original submission time.
 15 | 
 16 | -- Each story's submission time datestamp is also included in the HTML when
 17 | -- the story is displayed: you can see it when you hover the mouse over the
 18 | -- age field ("20 minutes ago").
 19 | 
 20 | -- Unfortunately, although the approximate age field age ("20 minutes ago:)
 21 | -- reflects the resubmission time, the datestamp in the HTML is the original
 22 | -- submission time.
 23 | 
 24 | -- So we can only estimate the resubmission time from this approximate age
 25 | -- field. 
 26 | 
 27 | -- But the approximate age is neither precise nor accurate. It is always a
 28 | -- whole number of minutes, hours, or days, rounded down: 1 hour 59 minutes
 29 | -- is show as "1 hour ago",  and 1 day 23 hours is shown as "one day ago".
 30 | 
 31 | -- When a story is less than an hour old, we have minute-level granularity,
 32 | -- However, this number is imprecise: it can be off by a couple of minutes
 33 | -- either way.
 34 | 
 35 | -- Further resubmitted stories don't seem to show up on the front page (at least
 36 | -- not the top 90 ranks we crawl) until they are at least an hour old.
 37 | 
 38 | 
 39 | -- THE SOLUTION We wrote dang to ask if he can help us out here. But I have
 40 | -- implemented a pretty accurate solution:
 41 | 
 42 | -- We can tell a story has been resubmitted within the last 24 hours because
 43 | -- the submission time will be far earlier (typically hours) than the
 44 | -- approximate age parsed from the web page (e.g. 3 hours ago). 
 45 | 
 46 | -- If the story is less than 1 day old, we can then place lower and upper
 47 | -- bounds on the resubmission time. If it says "3 hours", it means anyway
 48 | -- from 3:00 h to 3:59 h ago.
 49 | 
 50 | -- So each time we crawl, we calculate a lowe bound on the story's
 51 | -- resubmission time (based on an upper-boudn on age
 52 | -- , and then compare it to the previous upper bound and move the bound
 53 | --   accordingly (taking the lowest upper bound).
 54 | 
 55 | -- So if a story was submitted "3 hours ago" we know the story is at at most 4
 56 | -- hours old. So we save the sampleTime-4 hours in the submissionTime field,
 57 | -- understanding that this is a lower bound on submissionTime. Then in the
 58 | -- next minute we redo the calculation. If it still says "3 hours old" then
 59 | -- our new implied lower bound on submission time will be greater the the
 60 | -- previously lower bound by one minute. So we move up the lower bound up by a minue.
 61 | -- (lower bounds always move up as we discover higher lower bounds).
 62 | 
 63 | -- When the age string changes to "4 hours ago", we will know the story is at
 64 | -- least 5 hours 59 minutes old. But the implied submission time will one hour less
 65 | -- than the lower bound we calculated one minute before. So we keep the
 66 | -- current lower bound. At this point, we have the exact resumibssion time
 67 | -- within a couple of minutes either way.
 68 | 
 69 | -- Other considerations: We can't detect resubmission times for stories more than a day old
 70 | -- (unless they were resubmitted several days later) It is possible that a
 71 | -- resubmitted story is more than a day old, and is still on the front page.
 72 | -- In that case, we cannot determine it is a resubmitted story. So we need to
 73 | -- calculate the resubmission time beofre the stories is a day old. We then
 74 | -- remember this time, updating each subsequent datapoint to use this time.
 75 | 
 76 | with latest as (
 77 |   -- first, get the data from the latest crawl, determine which stories have
 78 |   -- been resubmitted, and estimate a lower bound on submission time
 79 |   select 
 80 |   *
 81 |   , timestamp as originalSubmissionTime
 82 |   , sampleTime - ageApprox - timestamp > 7200 and ageApprox < 3600*24 as resubmitted
 83 |   , cast(
 84 |     case 
 85 |       when 
 86 |         -- we know a story has been resubmitted if the submission time implied
 87 |         -- by the approximate age differs by too much. Because age is rounded
 88 |         -- down, the difference can be up to one hour plus a few minutes
 89 |         -- because of data delay. In practice, the difference is always
 90 |         -- several hours. Using a cutoff at two hours should be good. also,
 91 |         -- we should filter out stories more than a day old: if we just saw
 92 |         -- these stories for the first time, we don't know if they have been
 93 |         -- resubmitted or not (and thus don't know how old they really are)
 94 |         sampleTime - ageApprox - timestamp > 3600*2 and ageApprox < 3600*24
 95 |         and not job then
 96 |           -- calculate an upper bound on age
 97 |           case 
 98 |             when ageApprox < 3600 then ageApprox+59 -- e.g. if a story is "5 minutes old", it could be up to 5 minutes and 59 seconds old
 99 |             when ageApprox < 3600*24 then (ageApprox+59*60) -- if a story is "1 hour old" it could be up to 1:59m old
100 |           end + 100 -- add another 100 seconds because the age field tends to be a little stale. 
101 |         else sampleTime - timestamp
102 |       end 
103 |     as real) / 3600 as ageHours
104 |   from dataset join stories using (id)
105 |   where sampleTime = (select max(sampleTime) from dataset)
106 | )
107 | update dataset as d 
108 | -- And use the greater of the lower-bound submission time from the last crawl, and the one we just calculated.  
109 | set submissionTime = case when latest.sampleTime - ageHours*3600 > ifnull(previousCrawl.submissionTime,0) then cast(latest.sampleTime - ageHours*3600 as int) else previousCrawl.submissionTime end
110 | from latest
111 | left join previousCrawl using (id)
112 | where d.id = latest.id and d.sampleTime = latest.sampleTime;
113 | 


--------------------------------------------------------------------------------
/sql/upvote-rates.sql:
--------------------------------------------------------------------------------
  1 | /*Calculate the moving average upvote rate. The moving average window is based
  2 |   on expected upvotes, instead of time. As a result, the length of the window
  3 |   in terms of number of rows of data is variable. The calculation to identify
  4 |   the rows that fall within the window could be very inefficient: the query
  5 |   will scan the entire dataset to find rows where the difference between
  6 |   cumulativeExpectedUpvotes and the latest cumulativeExpectedUpvotes falls
  7 |   within the window. So we save the samleTime of the start of the window in
  8 |   the database, so the query only needs to scan rows within this window.
  9 | */
 10 | with parameters as (
 11 |     select 50 as windowSize
 12 |     , 2.3 as priorWeight
 13 |     , 0.003462767 as fatigueFactor
 14 | ), latest as (
 15 |   select 
 16 |     latest.id
 17 |     , latest.sampleTime
 18 |     , latest.score 
 19 |     , latest.cumulativeUpvotes
 20 |     , latest.cumulativeExpectedUpvotes
 21 |     , ifnull(previous.upvoteRateWindow,0) as upvoteRateWindow
 22 |   from dataset latest join previousCrawl previous using (id)
 23 |   where latest.sampleTime = (select max(sampleTime) from dataset)
 24 | )
 25 | , windows as (
 26 |   select 
 27 |     latest.id
 28 |     , latest.sampleTime
 29 |     , latest.cumulativeUpvotes as cumulativeUpvotes
 30 |     , latest.cumulativeExpectedUpvotes as cumulativeExpectedUpvotes
 31 |     , max(dataset.sampleTime) as newWindow
 32 |     , min(latest.cumulativeUpvotes - dataset.cumulativeUpvotes) as upvotesInWindow
 33 |     , min(latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes) as expectedUpvotesInWindow
 34 |     , min(latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes) - windowSize as over
 35 |     , parameters.*
 36 |     from latest 
 37 |     join parameters
 38 |     left join dataset on
 39 |       latest.id = dataset.id 
 40 |       and dataset.sampleTime >= latest.upvoteRateWindow
 41 |       and latest.cumulativeExpectedUpvotes - dataset.cumulativeExpectedUpvotes > windowSize
 42 |   group by latest.id
 43 | )
 44 | update dataset
 45 |   set
 46 |     upvoteRate = case 
 47 |       when upvotesInWindow is null then ( dataset.cumulativeUpvotes + priorWeight ) / ( (1-exp(-fatigueFactor*dataset.cumulativeExpectedUpvotes))/fatigueFactor + priorWeight)
 48 |       else ( upvotesInWindow + priorWeight ) / (
 49 |           -- The formula for adjusting expected upvotes for fatigue comes from the assumption that expected upvote rate decays
 50 |           -- exponentially: fatigueAdjustedExpectedUpvoteRate = exp(-fatigueFactor*cumulativeExpectedUpvotes).
 51 |           -- So fatigueAdjustedExpectedUpvotes is the total area under this curve, or the integral of
 52 |           -- fatigueAdjustedExpectedUpvoteRate from 0 to max(cumulativeExpectedUpvotes), which is:
 53 |           --   ( 1-exp(-fatigueFactor*max(cumulativeExpectedUpvotes)) ) / fatigueFactor
 54 |           -- But no we want the area under the curve within the moving average window,
 55 |           -- So we integrate from max(cumulativeExpectedUpvotes) - expectedUpvotesInWindow to max(cumulativeExpectedUpvotes),
 56 |           -- which gives us the below formula.
 57 | 
 58 |           (
 59 |               exp(-fatigueFactor*(dataset.cumulativeExpectedUpvotes - expectedUpvotesInWindow))
 60 |               -exp(-fatigueFactor*dataset.cumulativeExpectedUpvotes)
 61 |           )/fatigueFactor
 62 |           + priorWeight)
 63 |     end
 64 |     , upvoteRateWindow = newWindow
 65 | from windows
 66 | where windows.id = dataset.id and windows.sampleTime = dataset.sampleTime;
 67 | 
 68 | -- select 
 69 | --   id
 70 | --   , sampleTime
 71 | --   , newWindow
 72 | --   , cumulativeUpvotes
 73 | --   , cumulativeExpectedUpvotes
 74 | --   , upvotesInWindow
 75 | --   , expectedUpvotesInWindow
 76 | --   , ( upvotesInWindow + priorWeight ) / ( expectedUpvotesInWindow + priorWeight) as movingAverageUpvoteRate
 77 | --   , ( cumulativeUpvotes + priorWeight ) / ( cumulativeExpectedUpvotes + priorWeight) as upvoteRate
 78 | -- from windows
 79 | -- where movingAverageUpvoteRate is not null
 80 | -- limit 10;
 81 | 
 82 | 
 83 | 
 84 | 
 85 | -- where datset.id = windows.id
 86 | 
 87 | 
 88 | -- select 
 89 | --   id
 90 | --   , newWindow
 91 | --   , cumulativeUpvotes
 92 | --   , cumulativeExpectedUpvotes
 93 | --   , upvotesInWindow
 94 | --   , expectedUpvotesInWindow
 95 | --   , ( upvotesInWindow + priorWeight ) / ( expectedUpvotesInWindow + priorWeight) as movingAverageUpvoteRate
 96 | --   , ( cumulativeUpvotes + priorWeight ) / ( cumulativeExpectedUpvotes + priorWeight) as upvoteRate
 97 | -- from windows join parameters
 98 | -- -- where movingAverageUpvoteRate is not null
 99 | -- limit 10;
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/static/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/android-chrome-192x192.png


--------------------------------------------------------------------------------
/static/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/android-chrome-512x512.png


--------------------------------------------------------------------------------
/static/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/apple-touch-icon.png


--------------------------------------------------------------------------------
/static/browserconfig.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <browserconfig>
 3 |     <msapplication>
 4 |         <tile>
 5 |             <square150x150logo src="static/mstile-150x150.png"/>
 6 |             <TileColor>#4a9ced</TileColor>
 7 |         </tile>
 8 |     </msapplication>
 9 | </browserconfig>
10 | 


--------------------------------------------------------------------------------
/static/chart-646.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/chart-646.png


--------------------------------------------------------------------------------
/static/expected-upvotes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/expected-upvotes.png


--------------------------------------------------------------------------------
/static/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon-16x16.png


--------------------------------------------------------------------------------
/static/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon-32x32.png


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/favicon.ico


--------------------------------------------------------------------------------
/static/hn-top-page-upvotes-by-rank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/hn-top-page-upvotes-by-rank.png


--------------------------------------------------------------------------------
/static/logo.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 200 200" xmlns="http://www.w3.org/2000/svg"><circle cx="100" cy="100" r="99" fill="#94caff" paint-order="markers fill stroke"/><circle cx="100" cy="100" r="57.269" fill="#4a9ced" paint-order="markers fill stroke"/><circle cx="100" cy="100" r="30.288" fill="#005fbe" paint-order="markers fill stroke"/></svg>


--------------------------------------------------------------------------------
/static/mstile-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-144x144.png


--------------------------------------------------------------------------------
/static/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-150x150.png


--------------------------------------------------------------------------------
/static/mstile-310x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-310x150.png


--------------------------------------------------------------------------------
/static/mstile-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-310x310.png


--------------------------------------------------------------------------------
/static/mstile-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/mstile-70x70.png


--------------------------------------------------------------------------------
/static/rank-history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/rank-history.png


--------------------------------------------------------------------------------
/static/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 3 |  "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
 4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg"
 5 |  width="700.000000pt" height="700.000000pt" viewBox="0 0 700.000000 700.000000"
 6 |  preserveAspectRatio="xMidYMid meet">
 7 | <metadata>
 8 | Created by potrace 1.14, written by Peter Selinger 2001-2017
 9 | </metadata>
10 | <g transform="translate(0.000000,700.000000) scale(0.100000,-0.100000)"
11 | fill="#000000" stroke="none">
12 | <path d="M3367 6964 c-1 -1 -50 -5 -108 -8 -59 -4 -115 -8 -125 -10 -11 -2
13 | -46 -7 -79 -10 -33 -4 -73 -9 -90 -12 -16 -3 -43 -7 -60 -10 -16 -3 -37 -7
14 | -45 -9 -8 -2 -31 -7 -50 -10 -19 -3 -55 -11 -80 -16 -25 -6 -53 -12 -62 -14
15 | -36 -8 -176 -48 -262 -77 -235 -77 -518 -210 -731 -343 -92 -57 -368 -254
16 | -385 -274 -3 -3 -40 -37 -83 -75 -126 -111 -206 -192 -342 -347 -223 -251
17 | -450 -627 -583 -964 -69 -175 -152 -444 -168 -545 -2 -14 -7 -36 -10 -50 -10
18 | -42 -14 -63 -20 -110 -3 -25 -8 -47 -9 -51 -2 -3 -7 -39 -11 -80 -3 -40 -8
19 | -77 -10 -80 -6 -10 -18 -254 -18 -374 0 -119 11 -331 19 -370 3 -11 7 -42 10
20 | -70 13 -128 40 -268 84 -435 176 -682 581 -1321 1121 -1772 141 -117 376 -282
21 | 505 -354 561 -311 1115 -458 1725 -457 263 0 487 23 740 78 145 31 415 111
22 | 505 150 17 7 75 32 130 55 384 161 765 421 1083 740 379 380 669 854 835 1365
23 | 48 147 97 335 112 430 1 11 8 52 15 90 6 39 13 92 16 118 2 27 6 52 8 56 7 11
24 | 19 237 19 376 1 221 -23 477 -60 655 -8 41 -17 88 -20 104 -19 116 -121 429
25 | -200 616 -77 180 -242 479 -325 590 -11 14 -49 66 -85 115 -484 663 -1259
26 | 1155 -2078 1320 -114 23 -141 27 -245 40 -25 3 -61 8 -80 10 -88 12 -493 27
27 | -503 19z"/>
28 | </g>
29 | </svg>
30 | 


--------------------------------------------------------------------------------
/static/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Quality News",
 3 |     "short_name": "Quality News",
 4 |     "start_url": "/",
 5 |     "icons": [
 6 |         {
 7 |             "src": "android-chrome-192x192.png",
 8 |             "sizes": "192x192",
 9 |             "type": "image/png"
10 |         },
11 |         {
12 |             "src": "android-chrome-512x512.png",
13 |             "sizes": "512x512",
14 |             "type": "image/png"
15 |         }
16 |     ],
17 |     "theme_color": "#ffffff",
18 |     "background_color": "#ffffff",
19 |     "display": "standalone"
20 | }
21 | 


--------------------------------------------------------------------------------
/static/upvote-rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/upvote-rate.png


--------------------------------------------------------------------------------
/static/upvote-share-by-rank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/static/upvote-share-by-rank.png


--------------------------------------------------------------------------------
/statspage.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"html/template"
  9 | 	"io"
 10 | 	"net/http"
 11 | 	"time"
 12 | 
 13 | 	"github.com/pkg/errors"
 14 | 
 15 | 	"github.com/johnwarden/httperror"
 16 | )
 17 | 
 18 | type StatsPageParams struct {
 19 | 	StoryID int `schema:"id,required"`
 20 | 	OptionalModelParams
 21 | }
 22 | 
 23 | type StatsData struct {
 24 | 	RanksPlotDataJSON   template.JS
 25 | 	UpvotesPlotDataJSON template.JS
 26 | 	MaxSampleTime       int
 27 | }
 28 | 
 29 | type StatsPageData struct {
 30 | 	StatsPageParams
 31 | 	EstimatedUpvoteRate int
 32 | 	StoryTemplateData
 33 | 	StatsData
 34 | }
 35 | 
 36 | func (s StatsPageData) MaxSampleTimeISOString() string {
 37 | 	return time.Unix(int64(s.MaxSampleTime), 0).UTC().Format("2006-01-02T15:04")
 38 | }
 39 | 
 40 | func (s StatsPageData) OriginalSubmissionTimeISOString() string {
 41 | 	return time.Unix(s.OriginalSubmissionTime, 0).UTC().Format("2006-01-02T15:04")
 42 | }
 43 | 
 44 | func (s StatsPageData) MaxAgeHours() int {
 45 | 	return (s.MaxSampleTime - int(s.OriginalSubmissionTime)) / 3600
 46 | }
 47 | 
 48 | var ErrStoryIDNotFound = httperror.New(404, "Story ID not found")
 49 | 
 50 | func (app app) statsPage(w io.Writer, r *http.Request, params StatsPageParams, userID sql.NullInt64) error {
 51 | 	s, stats, err := app.loadStoryAndStats(r.Context(), params.StoryID, params.OptionalModelParams)
 52 | 	if err != nil {
 53 | 		return err
 54 | 	}
 55 | 
 56 | 	modelParams := params.OptionalModelParams.WithDefaults()
 57 | 	s.UpvoteRate = modelParams.upvoteRate(s.CumulativeUpvotes, s.CumulativeExpectedUpvotes)
 58 | 
 59 | 	pageTemplate := PageTemplateData{
 60 | 		UserID: userID,
 61 | 	}
 62 | 
 63 | 	storyTemplate := StoryTemplateData{
 64 | 		Story:            s,
 65 | 		PageTemplateData: pageTemplate,
 66 | 	}
 67 | 
 68 | 	d := StatsPageData{
 69 | 		StatsPageParams:     params,
 70 | 		EstimatedUpvoteRate: 1.0,
 71 | 		StoryTemplateData:   storyTemplate,
 72 | 		StatsData:           stats,
 73 | 	}
 74 | 
 75 | 	err = templates.ExecuteTemplate(w, "stats.html.tmpl", d)
 76 | 	return errors.Wrap(err, "executing stats page template")
 77 | }
 78 | 
 79 | func (app app) loadStoryAndStats(ctx context.Context, storyID int, modelParams OptionalModelParams) (Story, StatsData, error) {
 80 | 	ndb := app.ndb
 81 | 
 82 | 	// Try to get story from DB first
 83 | 	s, err := ndb.selectStoryDetails(ctx, storyID)
 84 | 	dbRecordExists := (err == nil)
 85 | 	isArchived := (dbRecordExists && s.Archived)
 86 | 
 87 | 	// If story doesn't exist in DB or is archived, try to load from archive
 88 | 	if !dbRecordExists || isArchived {
 89 | 		app.logger.Info("Loading story from archive", "storyID", storyID, "dbRecordExists", dbRecordExists, "isArchived", isArchived)
 90 | 
 91 | 		sc, err := NewStorageClient()
 92 | 		if err != nil {
 93 | 			return Story{}, StatsData{}, errors.Wrap(err, "create storage client")
 94 | 		}
 95 | 
 96 | 		// Try v2 archive first
 97 | 		filename := fmt.Sprintf("%d.v2.json", storyID)
 98 | 		jsonData, err := sc.DownloadFile(ctx, filename)
 99 | 		isV2 := err == nil
100 | 		if err != nil {
101 | 			// Try legacy archive
102 | 			filename = fmt.Sprintf("%d.json", storyID)
103 | 			jsonData, err = sc.DownloadFile(ctx, filename)
104 | 			if err != nil {
105 | 				if !dbRecordExists {
106 | 					return Story{}, StatsData{}, ErrStoryIDNotFound
107 | 				}
108 | 				return Story{}, StatsData{}, errors.Wrapf(err, "failed to load archive file %s for story marked as archived", filename)
109 | 			}
110 | 		}
111 | 
112 | 		var archiveData ArchiveData
113 | 		err = json.Unmarshal(jsonData, &archiveData)
114 | 		if err != nil {
115 | 			return Story{}, StatsData{}, errors.Wrap(err, "unmarshal archive data")
116 | 		}
117 | 
118 | 		if isV2 {
119 | 			// Calculate AgeApprox as current time minus submission time
120 | 			ageApprox := time.Now().Unix() - archiveData.SubmissionTime
121 | 
122 | 			s = Story{
123 | 				ID:                        archiveData.ID,
124 | 				By:                        archiveData.By,
125 | 				Title:                     archiveData.Title,
126 | 				URL:                       archiveData.URL,
127 | 				SubmissionTime:            archiveData.SubmissionTime,
128 | 				OriginalSubmissionTime:    archiveData.OriginalSubmissionTime,
129 | 				AgeApprox:                 ageApprox,
130 | 				Score:                     archiveData.Score,
131 | 				Comments:                  archiveData.Comments,
132 | 				CumulativeUpvotes:         archiveData.CumulativeUpvotes,
133 | 				CumulativeExpectedUpvotes: archiveData.CumulativeExpectedUpvotes,
134 | 				TopRank:                   archiveData.TopRank,
135 | 				QNRank:                    archiveData.QNRank,
136 | 				RawRank:                   archiveData.RawRank,
137 | 				Flagged:                   archiveData.Flagged,
138 | 				Dupe:                      archiveData.Dupe,
139 | 				Job:                       archiveData.Job,
140 | 				Archived:                  archiveData.Archived,
141 | 			}
142 | 		} else {
143 | 			// For legacy archives, we need story details from DB
144 | 			return Story{}, StatsData{}, ErrStoryIDNotFound
145 | 		}
146 | 
147 | 		// Convert plot data to JSON
148 | 		ranksJson, err := json.Marshal(archiveData.RanksPlotData)
149 | 		if err != nil {
150 | 			return Story{}, StatsData{}, errors.Wrap(err, "marshal ranks plot data")
151 | 		}
152 | 
153 | 		upvotesJson, err := json.Marshal(archiveData.UpvotesPlotData)
154 | 		if err != nil {
155 | 			return Story{}, StatsData{}, errors.Wrap(err, "marshal upvotes plot data")
156 | 		}
157 | 
158 | 		stats := StatsData{
159 | 			RanksPlotDataJSON:   template.JS(string(ranksJson)),
160 | 			UpvotesPlotDataJSON: template.JS(string(upvotesJson)),
161 | 			MaxSampleTime:       archiveData.MaxSampleTime,
162 | 		}
163 | 
164 | 		return s, stats, nil
165 | 	}
166 | 
167 | 	// Story is not archived, get stats from DB
168 | 	maxSampleTime, err := maxSampleTime(ctx, ndb, storyID)
169 | 	if err != nil {
170 | 		return Story{}, StatsData{}, errors.Wrap(err, "maxSampleTime")
171 | 	}
172 | 
173 | 	ranks, err := rankDatapoints(ctx, ndb, storyID)
174 | 	if err != nil {
175 | 		return Story{}, StatsData{}, errors.Wrap(err, "rankDatapoints")
176 | 	}
177 | 
178 | 	ranksJson, err := json.Marshal(ranks)
179 | 	if err != nil {
180 | 		return Story{}, StatsData{}, errors.Wrap(err, "marshal ranks plot data")
181 | 	}
182 | 
183 | 	upvotes, err := upvotesDatapoints(ctx, ndb, storyID, modelParams.WithDefaults())
184 | 	if err != nil {
185 | 		return Story{}, StatsData{}, errors.Wrap(err, "upvotesDatapoints")
186 | 	}
187 | 
188 | 	upvotesJson, err := json.Marshal(upvotes)
189 | 	if err != nil {
190 | 		return Story{}, StatsData{}, errors.Wrap(err, "marshal upvotes plot data")
191 | 	}
192 | 
193 | 	stats := StatsData{
194 | 		RanksPlotDataJSON:   template.JS(string(ranksJson)),
195 | 		UpvotesPlotDataJSON: template.JS(string(upvotesJson)),
196 | 		MaxSampleTime:       maxSampleTime,
197 | 	}
198 | 
199 | 	return s, stats, nil
200 | }
201 | 


--------------------------------------------------------------------------------
/storage.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"compress/gzip"
  6 | 	"context"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"os"
 10 | 
 11 | 	minio "github.com/minio/minio-go/v7"
 12 | 	"github.com/minio/minio-go/v7/pkg/credentials"
 13 | )
 14 | 
 15 | type StorageClient struct {
 16 | 	minioClient *minio.Client
 17 | 	bucket      string
 18 | }
 19 | 
 20 | func NewStorageClient() (*StorageClient, error) {
 21 | 	endpoint := os.Getenv("R2_ENDPOINT")
 22 | 	if endpoint == "" {
 23 | 		return nil, fmt.Errorf("R2_ENDPOINT environment variable not set")
 24 | 	}
 25 | 
 26 | 	accessKeyID := os.Getenv("R2_ACCESS_KEY_ID")
 27 | 	if accessKeyID == "" {
 28 | 		return nil, fmt.Errorf("R2_ACCESS_KEY_ID environment variable not set")
 29 | 	}
 30 | 
 31 | 	secretAccessKey := os.Getenv("R2_SECRET_ACCESS_KEY")
 32 | 	if secretAccessKey == "" {
 33 | 		return nil, fmt.Errorf("R2_SECRET_ACCESS_KEY environment variable not set")
 34 | 	}
 35 | 
 36 | 	useSSL := os.Getenv("R2_USE_SSL")
 37 | 	if useSSL == "" {
 38 | 		return nil, fmt.Errorf("R2_USE_SSL environment variable not set")
 39 | 	}
 40 | 
 41 | 	bucket := os.Getenv("R2_BUCKET")
 42 | 	if bucket == "" {
 43 | 		return nil, fmt.Errorf("R2_BUCKET environment variable not set")
 44 | 	}
 45 | 
 46 | 	// Convert useSSL to boolean
 47 | 	var ssl bool
 48 | 	if useSSL == "true" || useSSL == "1" {
 49 | 		ssl = true
 50 | 	} else {
 51 | 		ssl = false
 52 | 	}
 53 | 
 54 | 	// Remove "https://" or "http://" prefix from endpoint if present
 55 | 	endpoint = trimEndpointScheme(endpoint)
 56 | 
 57 | 	minioClient, err := minio.New(endpoint, &minio.Options{
 58 | 		Creds:  credentials.NewStaticV4(accessKeyID, secretAccessKey, ""),
 59 | 		Secure: ssl,
 60 | 		Region: "auto", // For Cloudflare R2
 61 | 	})
 62 | 	if err != nil {
 63 | 		return nil, fmt.Errorf("failed to create MinIO client: %v", err)
 64 | 	}
 65 | 
 66 | 	return &StorageClient{minioClient: minioClient, bucket: bucket}, nil
 67 | }
 68 | 
 69 | // UploadFile uploads a file with the specified content type and optional compression
 70 | func (sc *StorageClient) UploadFile(ctx context.Context, objectName string, content []byte, contentType string, compress bool) error {
 71 | 	var reader *bytes.Reader
 72 | 	var size int64
 73 | 
 74 | 	if compress {
 75 | 		// Compress the content using gzip
 76 | 		var compressedContent bytes.Buffer
 77 | 		gzipWriter := gzip.NewWriter(&compressedContent)
 78 | 		_, err := gzipWriter.Write(content)
 79 | 		if err != nil {
 80 | 			return fmt.Errorf("failed to compress content: %v", err)
 81 | 		}
 82 | 		gzipWriter.Close() // Make sure to close the writer to flush the data
 83 | 
 84 | 		reader = bytes.NewReader(compressedContent.Bytes())
 85 | 		size = int64(compressedContent.Len())
 86 | 	} else {
 87 | 		reader = bytes.NewReader(content)
 88 | 		size = int64(len(content))
 89 | 	}
 90 | 
 91 | 	// Set appropriate options
 92 | 	opts := minio.PutObjectOptions{
 93 | 		ContentType: contentType,
 94 | 	}
 95 | 	if compress {
 96 | 		opts.ContentEncoding = "gzip"
 97 | 	}
 98 | 
 99 | 	// Upload the content
100 | 	_, err := sc.minioClient.PutObject(ctx, sc.bucket, objectName, reader, size, opts)
101 | 	if err != nil {
102 | 		return fmt.Errorf("failed to upload object %s: %v", objectName, err)
103 | 	}
104 | 
105 | 	return nil
106 | }
107 | 
108 | // DownloadFile downloads a file from storage and returns its content
109 | func (sc *StorageClient) DownloadFile(ctx context.Context, objectName string) ([]byte, error) {
110 | 	// Get the object from storage
111 | 	object, err := sc.minioClient.GetObject(ctx, sc.bucket, objectName, minio.GetObjectOptions{})
112 | 	if err != nil {
113 | 		return nil, fmt.Errorf("failed to get object %s: %v", objectName, err)
114 | 	}
115 | 	defer object.Close()
116 | 
117 | 	// Read the object content
118 | 	var buf bytes.Buffer
119 | 	_, err = buf.ReadFrom(object)
120 | 	if err != nil {
121 | 		return nil, fmt.Errorf("failed to read object %s: %v", objectName, err)
122 | 	}
123 | 
124 | 	// Check if the content is compressed
125 | 	info, err := object.Stat()
126 | 	if err != nil {
127 | 		return nil, fmt.Errorf("failed to stat object %s: %v", objectName, err)
128 | 	}
129 | 
130 | 	content := buf.Bytes()
131 | 	if info.Metadata.Get("Content-Encoding") == "gzip" {
132 | 		// Decompress the content
133 | 		gzipReader, err := gzip.NewReader(bytes.NewReader(content))
134 | 		if err != nil {
135 | 			return nil, fmt.Errorf("failed to create gzip reader for object %s: %v", objectName, err)
136 | 		}
137 | 		defer gzipReader.Close()
138 | 
139 | 		decompressedContent, err := io.ReadAll(gzipReader)
140 | 		if err != nil {
141 | 			return nil, fmt.Errorf("failed to decompress object %s: %v", objectName, err)
142 | 		}
143 | 
144 | 		content = decompressedContent
145 | 	}
146 | 
147 | 	return content, nil
148 | }
149 | 
150 | func (sc *StorageClient) FileExists(ctx context.Context, objectName string) (bool, error) {
151 | 	// Attempt to get object information
152 | 	_, err := sc.minioClient.StatObject(ctx, sc.bucket, objectName, minio.StatObjectOptions{})
153 | 	if err != nil {
154 | 		// If the error is because the object does not exist, return false
155 | 		if minio.ToErrorResponse(err).Code == "NoSuchKey" {
156 | 			return false, nil
157 | 		}
158 | 		// Otherwise, return the error
159 | 		return false, fmt.Errorf("error checking if object %s exists: %v", objectName, err)
160 | 	}
161 | 	// If no error, the object exists
162 | 	return true, nil
163 | }
164 | 
165 | // DeleteFile deletes a file from storage
166 | func (sc *StorageClient) DeleteFile(ctx context.Context, objectName string) error {
167 | 	err := sc.minioClient.RemoveObject(ctx, sc.bucket, objectName, minio.RemoveObjectOptions{})
168 | 	if err != nil {
169 | 		return fmt.Errorf("failed to delete object %s: %v", objectName, err)
170 | 	}
171 | 	return nil
172 | }
173 | 
174 | // Helper function to trim scheme from endpoint
175 | func trimEndpointScheme(endpoint string) string {
176 | 	if len(endpoint) >= 8 && endpoint[:8] == "https://" {
177 | 		return endpoint[8:]
178 | 	}
179 | 	if len(endpoint) >= 7 && endpoint[:7] == "http://" {
180 | 		return endpoint[7:]
181 | 	}
182 | 	return endpoint
183 | }
184 | 


--------------------------------------------------------------------------------
/story-details.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"fmt"
  6 | 	"net/url"
  7 | 	"strings"
  8 | 	"time"
  9 | 
 10 | 	"github.com/weppos/publicsuffix-go/publicsuffix"
 11 | 
 12 | 	humanize "github.com/dustin/go-humanize"
 13 | )
 14 | 
 15 | type Story struct {
 16 | 	ID                        int
 17 | 	By                        string
 18 | 	Title                     string
 19 | 	URL                       string
 20 | 	SubmissionTime            int64
 21 | 	OriginalSubmissionTime    int64
 22 | 	AgeApprox                 int64
 23 | 	Score                     int
 24 | 	Comments                  int
 25 | 	CumulativeUpvotes         int
 26 | 	CumulativeExpectedUpvotes float64
 27 | 	UpvoteRate                float64
 28 | 	TopRank                   sql.NullInt32
 29 | 	QNRank                    sql.NullInt32
 30 | 	RawRank                   sql.NullInt32
 31 | 	Job                       bool
 32 | 	Flagged                   bool
 33 | 	Dupe                      bool
 34 | 	Archived                  bool
 35 | }
 36 | 
 37 | // PageTemplateData contains the common template data for all pages
 38 | type PageTemplateData struct {
 39 | 	Ranking string
 40 | 	UserID  sql.NullInt64
 41 | }
 42 | 
 43 | // StoryTemplateData combines a Story with page context for use in templates
 44 | type StoryTemplateData struct {
 45 | 	Story // embed Story instead of having it as a named field
 46 | 	PageTemplateData
 47 | }
 48 | 
 49 | // Page-specific methods for ranking-based pages
 50 | func (p PageTemplateData) IsHNTopPage() bool {
 51 | 	return p.Ranking == "hntop"
 52 | }
 53 | 
 54 | func (p PageTemplateData) IsFairPage() bool {
 55 | 	return p.Ranking == "fair"
 56 | }
 57 | 
 58 | func (p PageTemplateData) IsUpvoteratePage() bool {
 59 | 	return p.Ranking == "upvoterate"
 60 | }
 61 | 
 62 | func (p PageTemplateData) IsBestUpvoteratePage() bool {
 63 | 	return p.Ranking == "best-upvoterate"
 64 | }
 65 | 
 66 | func (p PageTemplateData) IsNewPage() bool {
 67 | 	return p.Ranking == "new"
 68 | }
 69 | 
 70 | func (p PageTemplateData) IsBestPage() bool {
 71 | 	return p.Ranking == "best"
 72 | }
 73 | 
 74 | func (p PageTemplateData) IsAskPage() bool {
 75 | 	return p.Ranking == "ask"
 76 | }
 77 | 
 78 | func (p PageTemplateData) IsShowPage() bool {
 79 | 	return p.Ranking == "show"
 80 | }
 81 | 
 82 | func (p PageTemplateData) IsRawPage() bool {
 83 | 	return p.Ranking == "raw"
 84 | }
 85 | 
 86 | func (p PageTemplateData) IsPenaltiesPage() bool {
 87 | 	return p.Ranking == "penalties"
 88 | }
 89 | 
 90 | func (p PageTemplateData) IsBoostsPage() bool {
 91 | 	return p.Ranking == "boosts"
 92 | }
 93 | 
 94 | func (p PageTemplateData) IsResubmissionsPage() bool {
 95 | 	return p.Ranking == "resubmissions"
 96 | }
 97 | 
 98 | // Default implementations for non-ranking based pages
 99 | func (p PageTemplateData) IsAboutPage() bool {
100 | 	return false
101 | }
102 | 
103 | func (p PageTemplateData) IsAlgorithmsPage() bool {
104 | 	return false
105 | }
106 | 
107 | func (p PageTemplateData) IsScorePage() bool {
108 | 	return false
109 | }
110 | 
111 | func (p PageTemplateData) IsStatsPage() bool {
112 | 	return false
113 | }
114 | 
115 | func (p PageTemplateData) IsAlternativeFrontPage() bool {
116 | 	return p.IsHNTopPage() || p.IsRawPage() || p.IsPenaltiesPage() || p.IsBoostsPage() || p.IsResubmissionsPage() || p.IsFairPage() || p.IsUpvoteratePage() || p.IsBestUpvoteratePage() || p.IsNewPage() || p.IsBestPage() || p.IsAskPage() || p.IsShowPage()
117 | }
118 | 
119 | func (s Story) AgeString() string {
120 | 	// return humanize.Time(time.Unix(int64(time.Now().Unix()-s.AgeApprox), 0))
121 | 	return humanize.Time(time.Unix(int64(time.Now().Unix()-s.AgeApprox), 0))
122 | }
123 | 
124 | func (s Story) OriginalAgeString() string {
125 | 	return humanize.Time(time.Unix(s.OriginalSubmissionTime, 0))
126 | }
127 | 
128 | func (s Story) IsResubmitted() bool {
129 | 	return s.SubmissionTime != s.OriginalSubmissionTime
130 | }
131 | 
132 | func (s Story) UpvoteRateString() string {
133 | 	return fmt.Sprintf("%.2f", s.UpvoteRate)
134 | }
135 | 
136 | func (s Story) RankDiff() int32 {
137 | 	if !s.RawRank.Valid {
138 | 		return 0
139 | 	}
140 | 	rawRank := s.RawRank.Int32
141 | 	topRank := s.TopRank.Int32
142 | 
143 | 	if !s.TopRank.Valid {
144 | 		topRank = 91
145 | 	}
146 | 
147 | 	return rawRank - topRank
148 | }
149 | 
150 | func abs(a int32) int32 {
151 | 	if a >= 0 {
152 | 		return a
153 | 	}
154 | 	return -a
155 | }
156 | 
157 | func (s Story) RankDiffAbs() int32 {
158 | 	return abs(s.RankDiff())
159 | }
160 | 
161 | func (s Story) OverRanked() bool {
162 | 	return s.RankDiff() > 0
163 | }
164 | 
165 | func (s Story) UnderRanked() bool {
166 | 	return s.RankDiff() < 0
167 | }
168 | 
169 | func (s Story) Domain() string {
170 | 	u, err := url.Parse(s.URL)
171 | 	if err != nil {
172 | 		return ""
173 | 	}
174 | 
175 | 	domain, err := publicsuffix.Domain(u.Host)
176 | 	if err != nil {
177 | 		return ""
178 | 	}
179 | 
180 | 	// some domains are treated specially:
181 | 
182 | 	// twitter.com/x
183 | 	// github.com/x
184 | 	// x.substack.com
185 | 	// x.notion.site
186 | 	// x.dreamhosters.com
187 | 
188 | 	if u.Host == "news.ycombinator.com" {
189 | 		return ""
190 | 	}
191 | 	if domain == "twitter.com" || domain == "github.com" {
192 | 		// keep first part of path
193 | 		return domain + "/" + strings.Split(u.Path, "/")[1]
194 | 	}
195 | 
196 | 	if domain == "substack.com" || domain == "notion.site" || domain == "dreamhosters.com" {
197 | 		// keep subdomain
198 | 		return strings.Split(u.Host, ".")[0] + "." + domain
199 | 	}
200 | 
201 | 	return domain
202 | }
203 | 
204 | func (s Story) ISOTimestamp() string {
205 | 	return time.Unix(s.SubmissionTime, 0).UTC().Format("2006-01-02T15:04:05")
206 | }
207 | 
208 | func (s Story) OriginalISOTimestamp() string {
209 | 	return time.Unix(s.OriginalSubmissionTime, 0).UTC().Format("2006-01-02T15:04:05")
210 | }
211 | 


--------------------------------------------------------------------------------
/storyplot-data.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 
  7 | 	"github.com/pkg/errors"
  8 | )
  9 | 
 10 | func maxSampleTime(ctx context.Context, ndb newsDatabase, storyID int) (int, error) {
 11 | 	var n int
 12 | 	err := ndb.db.QueryRowContext(ctx, `
 13 | 			select max(sampleTime) from dataset
 14 | 			where id = ?
 15 | 		`, storyID).Scan(&n)
 16 | 
 17 | 	return n, errors.Wrap(err, "QueryRow count: select max(sampleTime)")
 18 | }
 19 | 
 20 | func rankDatapoints(ctx context.Context, ndb newsDatabase, storyID int) ([][]any, error) {
 21 | 	var n int
 22 | 	if err := ndb.db.QueryRowContext(ctx, "select count(*) from dataset where id = ?", storyID).Scan(&n); err != nil {
 23 | 		return nil, errors.Wrap(err, "QueryRow: select count")
 24 | 	}
 25 | 
 26 | 	if n == 0 {
 27 | 		return nil, ErrStoryIDNotFound
 28 | 	}
 29 | 
 30 | 	var submissionTime int64
 31 | 	if err := ndb.db.QueryRowContext(ctx, "select timestamp from stories where id = ?", storyID).Scan(&submissionTime); err != nil {
 32 | 		return nil, errors.Wrap(err, "QueryRow: select submissionTime")
 33 | 	}
 34 | 
 35 | 	ranks := make([][]any, n)
 36 | 
 37 | 	rows, err := ndb.db.QueryContext(ctx, "select sampleTime, rawRank, topRank, newRank, bestRank, askRank, showRank from dataset where id = ?", storyID)
 38 | 	if err != nil {
 39 | 		return nil, errors.Wrap(err, "Query: select ranks")
 40 | 	}
 41 | 	defer rows.Close()
 42 | 
 43 | 	// rawRank, top, new, bet, ask, show
 44 | 	const nRanks = 6
 45 | 
 46 | 	i := 0
 47 | 	for rows.Next() {
 48 | 		var sampleTime int64
 49 | 
 50 | 		var nullableRanks [nRanks]sql.NullInt32
 51 | 
 52 | 		err = rows.Scan(&sampleTime, &nullableRanks[0], &nullableRanks[1], &nullableRanks[2], &nullableRanks[3], &nullableRanks[4], &nullableRanks[5])
 53 | 
 54 | 		if err != nil {
 55 | 			return nil, errors.Wrap(err, "rows.Scan")
 56 | 		}
 57 | 
 58 | 		ranks[i] = make([]any, nRanks+1)
 59 | 		ranks[i][0] = sampleTime
 60 | 
 61 | 		for j, rank := range nullableRanks {
 62 | 			if rank.Valid {
 63 | 				ranks[i][j+1] = rank.Int32
 64 | 			} else {
 65 | 				ranks[i][j+1] = 91
 66 | 			}
 67 | 		}
 68 | 
 69 | 		i++
 70 | 	}
 71 | 
 72 | 	err = rows.Err()
 73 | 
 74 | 	return ranks, errors.Wrap(err, "rows.Err")
 75 | }
 76 | 
 77 | func upvotesDatapoints(ctx context.Context, ndb newsDatabase, storyID int, modelParams ModelParams) ([][]any, error) {
 78 | 	var n int
 79 | 	if err := ndb.db.QueryRowContext(ctx, "select count(*) from dataset where id = ?", storyID).Scan(&n); err != nil {
 80 | 		return nil, errors.Wrap(err, "QueryRow: select count")
 81 | 	}
 82 | 
 83 | 	if n == 0 {
 84 | 		return nil, ErrStoryIDNotFound
 85 | 	}
 86 | 
 87 | 	var submissionTime int64
 88 | 	if err := ndb.db.QueryRowContext(ctx, "select timestamp from stories where id = ?", storyID).Scan(&submissionTime); err != nil {
 89 | 		return nil, errors.Wrap(err, "QueryRow: select submissionTime")
 90 | 	}
 91 | 
 92 | 	upvotesData := make([][]any, n)
 93 | 
 94 | 	rows, err := ndb.db.QueryContext(ctx, `select sampleTime, cumulativeUpvotes, cumulativeExpectedUpvotes
 95 |  	 from dataset where id = ?`, storyID)
 96 | 	if err != nil {
 97 | 		return nil, errors.Wrap(err, "Query: select upvotes")
 98 | 	}
 99 | 	defer rows.Close()
100 | 
101 | 	i := 0
102 | 	for rows.Next() {
103 | 		var sampleTime int64
104 | 		var upvotes int
105 | 		var expectedUpvotes float64
106 | 
107 | 		err = rows.Scan(&sampleTime, &upvotes, &expectedUpvotes)
108 | 
109 | 		if err != nil {
110 | 			return nil, errors.Wrap(err, "rows.Scan")
111 | 		}
112 | 
113 | 		upvotesData[i] = []any{
114 | 			sampleTime,
115 | 			int32(upvotes),
116 | 			expectedUpvotes,
117 | 			modelParams.upvoteRate(upvotes, expectedUpvotes),
118 | 		}
119 | 		i++
120 | 	}
121 | 
122 | 	err = rows.Err()
123 | 
124 | 	return upvotesData, errors.Wrap(err, "rows.Err")
125 | }
126 | 


--------------------------------------------------------------------------------
/templates.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"html/template"
 6 | 	"io/fs"
 7 | 
 8 | 	"github.com/pkg/errors"
 9 | 	// some templates functions we might use
10 | 	// "github.com/Masterminds/sprig/v3"
11 | )
12 | 
13 | var templates = template.Must(ParseFSStrict(resources, "templates"))
14 | 
15 | // ParseFSStrict works like template.ParseFS, but is more strict:
16 | // - each template will be given the same name as the file it is defined in
17 | // - each filename can contain only one template and may not {{define}} subtemplates
18 | // - filenames must end in .tmpl
19 | //
20 | // This approach eliminates the possibility of inconsistency between the names
21 | // of templates and the names of template files, reducing decision overhead
22 | // and opportunities for surprises for developers. It also eliminates the
23 | // possibility of two templates accidentally being given the same name, which
24 | // will result in one template being overwritten by the other and can create
25 | // surprising bugs (this was the immediate motivation for creating this
26 | // function).
27 | //
28 | // The returned template's name will have the base name and parsed contents of
29 | // the first file. There must be at least one file. If an error occurs,
30 | // parsing stops and the returned *Template is nil.
31 | //
32 | // Templates in subdirectories of the provided directory will be parsed. The
33 | // names of templates in subdirectories will be prefixed with the name
34 | // of subdirectory (e.g. "charts/chart1.html.tmpl")
35 | //
36 | // TODO: submit pull request to add this to the html/template library.
37 | 
38 | func ParseFSStrict(resources fs.FS, dir string) (*template.Template, error) {
39 | 	var ts *template.Template
40 | 
41 | 	templateFiles, err := fs.ReadDir(resources, dir)
42 | 	if err != nil {
43 | 		return ts, errors.Wrapf(err, "fs.ReadDir(%s)", dir)
44 | 	}
45 | 
46 | 	for _, dirEntry := range templateFiles {
47 | 		if dirEntry.IsDir() {
48 | 			subDirName := dir + "/" + dirEntry.Name()
49 | 			subTemplates, err := ParseFSStrict(resources, subDirName)
50 | 			if err != nil {
51 | 				return ts, errors.Wrapf(err, "fs.ReadDir(%s)", subDirName)
52 | 			}
53 | 
54 | 			for _, t := range subTemplates.Templates() {
55 | 				fileName := dirEntry.Name() + "/" + t.Name()
56 | 				if ts == nil {
57 | 					ts = t
58 | 				}
59 | 				_, err := ts.AddParseTree(fileName, t.Tree)
60 | 				if err != nil {
61 | 					return ts, errors.Wrapf(err, "ts.AddParseTree(%s)", fileName)
62 | 				}
63 | 			}
64 | 
65 | 			continue
66 | 		}
67 | 		fileName := dirEntry.Name()
68 | 
69 | 		// use this to add sprig functions.
70 | 		// t, err := template.New(fileName).Funcs(sprig.FuncMap()).ParseFS(resources, dir+"/"+fileName)
71 | 		t, err := template.New(fileName).ParseFS(resources, dir+"/"+fileName)
72 | 		if err != nil {
73 | 			return ts, errors.Wrapf(err, "parsing template %s", dir+"/"+fileName)
74 | 		}
75 | 
76 | 		for _, t := range t.Templates() {
77 | 			if t.Name() != fileName {
78 | 				return ts, fmt.Errorf(`{{define "%v"}} in file %v not allowed when using ParseFSStrict. Each template file must contain one template whose name will be equal to the filename.`, t.Name(), fileName)
79 | 			}
80 | 		}
81 | 		if ts == nil {
82 | 			ts = t
83 | 		}
84 | 
85 | 		_, err = ts.AddParseTree(fileName, t.Tree)
86 | 		if err != nil {
87 | 			return ts, errors.Wrapf(err, "ts.AddParseTree(%s)", fileName)
88 | 		}
89 | 	}
90 | 
91 | 	if ts == nil {
92 | 		return ts, fmt.Errorf("No template files found in directory %s", dir)
93 | 	}
94 | 	return ts, nil
95 | }
96 | 


--------------------------------------------------------------------------------
/templates/about-content.html.tmpl:
--------------------------------------------------------------------------------
 1 | <p>Quality News is a Hacker News client with:</p>
 2 | <ol>
 3 | <li>additional story performance stats shown below each story</li>
 4 | <li>detailed historical charts (click on the story's <strong class="upvoterate">×upvoteRate</strong> stats)</li>
 5 | <li>additional <a href="/algorithms">ranking algorithms</a></li>
 6 | 
 7 | </ol>
 8 | 
 9 | <p>For more details, see the <a href="https://github.com/social-protocols/news#readme">Readme</a> on
10 | GitHub.</p>
11 | 
12 | <p>This is a collective intelligence experiment by <a href="https://social-protocols.org">Social Protocols</a>. Follow us on <a rel="me" href="https://mas.to/@SocialProtocols">Mastodon</a> or <a href="https://twitter.com/socialprotocols">Twitter</a>, or send a <a href="mailto:mail@social-protocols.org">mail</a>.</p>
13 | 
14 | 
15 | <h1>Definition<h1>
16 | 
17 | <h2 id="upvote-rate">Upvote Rate</h2>
18 | <p>The <strong class="upvoterate">×upvoteRate</strong> quantifies how much more or less
19 | likely users are to upvote this story compared to the average story. It
20 | is calculated as the story's total upvotes divided by total
21 | <strong>expected upvotes</strong>.</p>
22 | 
23 | <h2 id="expected-upvotes">Expected Upvotes</h2>
24 | <p>The <strong>expected upvotes</strong> for a story is an estimate of
25 | the number of upvotes the <strong>average story</strong> would have
26 | received if it were shown at the same times at the same ranks.</p>
27 | 
28 | <h2 id="raw-rank">Raw Rank</h2>
29 | <p>The <strong>raw rank</strong> is the rank that a story would have
30 | according to the "raw" Hacker News ranking formula:</p>
31 | <pre><code>upvotes^0.8 / (ageHours+2)^1.8</code></pre>
32 | 
33 | <p>This formula produces a certain ranking that you can see on the (<a href="https://news.social-protocols.org/raw">raw page</a>).
34 | 
35 | But the HN ranking is further influenced by moderator actions, user flags, and other factors which boost or penalize stories.
36 | 
37 | </p>
38 | 
39 | 
40 | <h2 id="rank-delta">Rank Delta</h2>
41 | <p>The delta between the raw rank and front page rank. An <span class="over-ranked">over-ranked</span> page may have received a boost by HN moderators, while an
42 | <span class="under-ranked">under-ranked</span> page may have received a penalty.
43 | </p>
44 | 
45 | 
46 | 
47 | 
48 | {{/*
49 | <h2 id="rank-delta">Rank Delta</h2>
50 | <p>The <strong>rank delta</strong> is the difference between the story's
51 | actual rank and it's <strong>raw rank</strong> (described above).</p>
52 | 
53 | <p>
54 | A value of +1 means that a story is ranked 1 position higher on the front page than if it were ranked using the raw formula only.
55 | </p>
56 | */}}
57 | 
58 | {{/*<h2 id="second-chance-age">Second-Chance Age</h2>
59 | <p>The <strong>second-chance age</strong> is the story's revised age
60 | after being re-posted from the the <a
61 | href="https://news.ycombinator.com/item?id=19774614">second-chance
62 | queue</a>.</p>*/}}
63 | 
64 | 


--------------------------------------------------------------------------------
/templates/about.html.tmpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
 5 | 
 6 | <link rel="apple-touch-icon" sizes="180x180" href="static/apple-touch-icon.png">
 7 | <link rel="icon" type="image/png" sizes="32x32" href="static/favicon-32x32.png">
 8 | <link rel="icon" type="image/png" sizes="16x16" href="static/favicon-16x16.png">
 9 | <link rel="manifest" href="static/site.webmanifest">
10 | <link rel="mask-icon" href="static/safari-pinned-tab.svg" color="#4a9ced">
11 | <link rel="shortcut icon" href="static/favicon.ico">
12 | <meta name="msapplication-TileColor" content="#4a9ced">
13 | <meta name="msapplication-config" content="static/browserconfig.xml">
14 | <meta name="theme-color" content="#ffffff">
15 | 
16 | 
17 | <style type="text/css">
18 | 
19 | {{template "normalize.css.tmpl"}}
20 | 
21 | {{template "styles.css.tmpl"}}
22 | 
23 | .content {
24 |   padding: 0 10px 20px 10px;
25 |   max-width: 600px;
26 | }
27 | 
28 | </style>
29 | 
30 | <script data-goatcounter="https://qualitynews.goatcounter.com/count" async src="//gc.zgo.at/count.js"></script>
31 | 
32 | <title>About Quality News</title>
33 | </head>
34 | <body>
35 | 
36 | {{template "header.html.tmpl"  .}}
37 | 
38 | <div class="content">
39 | 
40 | {{if .IsAboutPage}}
41 | {{template "about-content.html.tmpl" .}}
42 | {{end}}
43 | 
44 | {{if .IsAlgorithmsPage}}
45 | {{template "algorithms-content.html.tmpl" .}}
46 | {{end}}
47 | 
48 | </div>
49 | 
50 | </body>
51 | </html>
52 | 


--------------------------------------------------------------------------------
/templates/algorithms-content.html.tmpl:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | <h1>Ranking Algorithms</h1>
 4 | <p>
 5 | 
 6 | <ul>
 7 | 	<li><strong><a href="/new">new</a>, <a href="/top">top</a>, <a href="/ask">ask</a>, <a href="/show">show</a>, and <a href="/best">best</a></strong>: same as the corresponding pages on hn</li> 
 8 | 
 9 | 	<li><strong><a href="/raw">raw</a></strong>: all stories ranked by the <a href="/about#raw-rank">"raw rank"</a></li>
10 | 
11 | 	<li><strong><a href="/upvoterate">upvoterate</a></strong>: front-page ranking algorithm that uses <span class="upvoterate">×UpvoteRate</span> <a class="question-mark" href="/about#upvote-rate">(?)</a> instead of upvotes, and ignores HN moderator boosts/penalties</li>
12 | 
13 | 	<li><strong><a href="/best-upvoterate">best-upvoterate</a></strong>: like upvoterate, but removes the time/gravity component to show stories with the all time highest upvoterate</li>
14 | 
15 | 	<li><strong><a href="/boosts">boosts</a></strong>: stories that have received "boosts" by HN moderators</li>
16 | 
17 | 	<li><strong><a href="/penalties">penalties</a></strong>: stories that have received "penalties" by HN moderators</li>
18 | 
19 | 	<li><strong><a href="/resubmissions">resubmissions</a></strong>: stories that have been randomly selected from the <a href="https://news.ycombinator.com/item?id=26998308">second-chance pool</a> and added to the front page</li>
20 | 
21 | </ul>
22 | </p>
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/templates/header.html.tmpl:
--------------------------------------------------------------------------------
 1 | <div class="header">
 2 | <svg style="flex-shrink: 0" class="icon" viewBox="0 0 200 200" xmlns="http://www.w3.org/2000/svg"><circle cx="100" cy="100" r="99" fill="#94caff" paint-order="markers fill stroke"/><circle cx="100" cy="100" r="57.269" fill="#4a9ced" paint-order="markers fill stroke"/><circle cx="100" cy="100" r="30.288" fill="#005fbe" paint-order="markers fill stroke"/></svg>
 3 | <a href="/" class="header-title">Quality News</a>
 4 | <div>
 5 | <!--<a class="nav-link {{if .IsFairPage}}active{{end}}" href="/">qn&nbsp;top</a> |-->
 6 | <a class="nav-link {{if .IsNewPage}}active{{end}}" href="/new">new</a> |
 7 | <a class="nav-link {{if .IsHNTopPage}}active{{end}}" href="/">top</a> |
 8 | <a class="nav-link {{if .IsAskPage}}active{{end}}" href="/ask">ask</a> |
 9 | <a class="nav-link {{if .IsShowPage}}active{{end}}" href="/show">show</a> |
10 | <a class="nav-link {{if .IsBestPage}}active{{end}}" href="/best">best</a> |
11 | 
12 | {{if .IsRawPage}}<a class="nav-link active" href="/raw">raw</a> |{{end}}
13 | {{if .IsFairPage}}<a class="nav-link active" href="/fair">fair</a> |{{end}}
14 | 
15 | {{if .IsUpvoteratePage}}<a class="nav-link active" href="/upvoterate">upvoterate</a> |{{end}}
16 | {{if .IsBestUpvoteratePage}}<a class="nav-link active" href="/best-upvoterate">best-upvoterate</a> |{{end}}
17 | 
18 | {{if .IsPenaltiesPage}}<a class="nav-link active" href="/penalties">penalties</a> |{{end}}
19 | {{if .IsBoostsPage}}<a class="nav-link active" href="/boosts">boosts</a> |{{end}}
20 | {{if .IsResubmissionsPage}}<a class="nav-link active" href="/resubmissions">resubmissions</a> |{{end}}
21 | 
22 | <a class="nav-link {{if .IsAlgorithmsPage}}active{{end}}" href="/algorithms">algorithms</a> |
23 | 
24 | {{ if .UserID.Valid }} <a class="nav-link {{if .IsScorePage}}active{{end}}" href="/score">score</a> | {{ end }}
25 | <a class="nav-link {{if .IsAboutPage}}active{{end}}" href="/about">about</a>
26 | </div>
27 | 
28 | <div style="margin-left: auto" >
29 | <span>{{if .UserID.Valid}}Hello User {{.UserID.Int64}}{{end}}</span>
30 | <a class="icon" title="Source Code on GitHub" href="https://github.com/social-protocols/news">
31 | <svg class="icon" title="Contribute" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><path fill="currentColor" d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>
32 | </a>
33 | </div>
34 | 
35 | </div>


--------------------------------------------------------------------------------
/templates/index.html.tmpl:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  5 | 
  6 | <link rel="apple-touch-icon" sizes="180x180" href="static/apple-touch-icon.png">
  7 | <link rel="icon" type="image/png" sizes="32x32" href="static/favicon-32x32.png">
  8 | <link rel="icon" type="image/png" sizes="16x16" href="static/favicon-16x16.png">
  9 | <link rel="manifest" href="static/site.webmanifest">
 10 | <link rel="mask-icon" href="static/safari-pinned-tab.svg" color="#4a9ced">
 11 | <link rel="shortcut icon" href="static/favicon.ico">
 12 | <meta name="msapplication-TileColor" content="#4a9ced">
 13 | <meta name="msapplication-config" content="static/browserconfig.xml">
 14 | <meta name="theme-color" content="#ffffff">
 15 | 
 16 | 
 17 | <style type="text/css">
 18 | 
 19 | {{template "normalize.css.tmpl"}}
 20 | 
 21 | {{template "styles.css.tmpl"}}
 22 | 
 23 | </style>
 24 | 
 25 | <script data-goatcounter="https://qualitynews.goatcounter.com/count" async src="//gc.zgo.at/count.js"></script>
 26 | 
 27 | <script type="text/javascript">
 28 | {{template "vote.js.tmpl"}}
 29 | 
 30 | var positions = {{.PositionsJSONData}}
 31 | var userID {{if .UserID.Valid}} = {{.UserID}} {{end}}
 32 | </script>
 33 | 
 34 | <title>Quality News: Hacker News Rankings</title>
 35 | </head>
 36 | <body onload="setVotes()">
 37 | 
 38 | {{template "header.html.tmpl"  .}}
 39 | 
 40 | 
 41 | 	<div class="introduction">
 42 | 	{{if .IsRawPage}}
 43 | 
 44 | 			These are the current stories on the Hacker News Front page ranked by the raw score produced by the HN formula, with no moderation penalties or bonuses applied. This makes the impact of moderation on the HN frontpage visible (e.g. off-topic/non-tech stories are ranked higher).
 45 | 
 46 | 
 47 | 	{{else if .IsFairPage}}
 48 | 
 49 | 			This is an alternative Hacker News front page with a "fairer" ranking formula as described <a href="https://github.com/social-protocols/quality-news#readme">here</a>. 
 50 | 
 51 | 	{{else if .IsUpvoteratePage}}
 52 | 
 53 | 			This is an alternative Hacker News front page based on <span class="upvoterate">×UpvoteRate</span> <a class="question-mark" href="/about#upvote-rate">(?)</a> instead of upvotes. Ignores HN moderator boosts/penalties.
 54 | 
 55 | 	{{else if .IsBestUpvoteratePage}}
 56 | 
 57 | 			This page ranks Hacker News stories based on all-time highest <span class="upvoterate">×UpvoteRate</span> <a class="question-mark" href="/about#upvote-rate">(?)</a>.
 58 | 
 59 | 	{{else if .IsPenaltiesPage}}
 60 | 
 61 | 			This page shows stories whose rank on the Hacker News front page is significantly lower than their <a href="/about#raw-rank">raw rank</a>, indicating that they have been penalized by Hacker News moderators. See <a href="https://www.righto.com/2013/11/how-hacker-news-ranking-really-works.html">this blog post</a> for a discussion of how Hacker News applies penalties.
 62 | 
 63 | 	{{else if .IsBoostsPage}}
 64 | 
 65 | 			This page shows stories whose rank on the Hacker News front page is significantly lower than their <a href="/about#raw-rank">raw rank</a>, indicating action by Hacker News moderators such as addition to the <a href="https://news.ycombinator.com/item?id=26998308">second-chance pool</a>.
 66 | 
 67 | 	{{else if .IsResubmissionsPage}}
 68 | 
 69 | 			This page shows stories that have been randomly selected from the <a href="https://news.ycombinator.com/item?id=26998308">second-chance pool</a> and added to the front page. Sorted by most recent.
 70 | 
 71 | 	{{else}}
 72 | 
 73 | 		This is the current Hacker News 
 74 | 
 75 |       {{if .IsHNTopPage}}
 76 |         <a href="https://news.ycombinator.com/">Front</a>
 77 |       {{end}}
 78 | 	    {{if .IsNewPage}}
 79 | 	        <a href="https://news.ycombinator.com/newest">"New"</a>.
 80 | 	    {{end}}
 81 | 	    {{if .IsAskPage}}
 82 | 	        <a href="https://news.ycombinator.com/ask">"Ask HN"</a>.
 83 | 	    {{end}}
 84 | 	    {{if .IsShowPage}}
 85 | 	        <a href="https://news.ycombinator.com/show">"Show HN"</a>
 86 | 	    {{end}}
 87 | 	    {{if .IsBestPage}}
 88 | 	        <a href="https://news.ycombinator.com/best">"Best" Page</a>.
 89 | 	    {{end}} Page, with some additional performance stats. 
 90 | 
 91 | 	{{end}}
 92 | 
 93 | 	    Click on the colorful stats below each story to see detailed historical charts. Click <a href="/algorithms">here</a> for additional ranking algorithms 
 94 | 
 95 | 	</div>
 96 | 
 97 | 	<div class="key">key: 
 98 | 		<span class="upvoterate">×UpvoteRate</span> <a class="question-mark" href="/about#upvote-rate">(?)</a>
 99 |     {{/*if (or .IsHNTopPage .IsPenaltyOrBoostPage)}}&nbsp; <span class="boost">+</span>/<span class="penalty">-</span> <span style="color: #223344">rank delta</span> <a class="question-mark" href="/about#rank-delta">(?)</a>{{end*/}}
100 | 
101 | 		{{if (not .IsHNTopPage)}}
102 | 		&nbsp; <span class="rank-icon hn">#rank</span> on front page
103 | 		{{end}}
104 | 
105 | 		&nbsp; <span style="white-space:nowrap"><span class="over-ranked"></span><span class="under-ranked"></span>rankDelta <a class="question-mark" href="/about#rank-delta">(?)</span></a>
106 | 
107 | 		{{/*&nbsp; <span class="original-age">original</span> <span class="resubmitted-age">2nd-chance</span> age <a class="question-mark" href="/about#second-chance-age">(?)</a>*/}}
108 | 	</div>	
109 | 
110 | 
111 | <ol class="stories">
112 | {{range .Stories}}
113 | <li id="{{.ID}}">
114 | {{template "storyDetails.html.tmpl" .}}
115 | </li>
116 | {{end}}
117 | </ol>
118 | 
119 | {{/*
120 | <div class="stats">
121 | <h2>stats</h2>
122 | <ul>
123 | <li>Average Submission Time: {{.AverageAgeString}}</li>
124 | <li>Estimated Overall Upvote Rate: {{.AverageQualityString}}×</li>
125 | <li>Average Upvotes: {{.AverageUpvotesString}}</li>
126 | </ul>
127 | </div>
128 | 
129 | 
130 | <div class="parameters">
131 | <h2>parameters</h2>
132 | <ul>
133 | {{if .IsFairPage }}
134 | <li>Gravity: {{.GravityString}}</li>
135 | {{ end }}
136 | <li>Prior Weight: {{.PriorWeightString}}</li>
137 | {{if .IsFairPage }}
138 | <li>Overall Prior Weight: {{.OverallPriorWeightString}}</li>
139 | {{ end }}
140 | <li>Sample Time: {{.SampleTimeString}}</li>
141 | </ul>
142 | </div>
143 | */}}
144 | 
145 | 
146 | </body>
147 | </html>
148 | 


--------------------------------------------------------------------------------
/templates/normalize.css.tmpl:
--------------------------------------------------------------------------------
1 | /* Normalize.css
2 | ----------------------------------------------- */
3 | 
4 | article,aside,details,figcaption,figure,footer,header,hgroup,nav,section,summary{display:block;}audio,canvas,video{display:inline-block;*display:inline;*zoom:1;}audio:not([controls]){display:none;height:0;}[hidden]{display:none;}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;}html,button,input,select,textarea{font-family:sans-serif;}body{margin:0;}a:focus{outline:thin dotted;}a:active,a:hover{outline:0;}h1{font-size:2em;margin:0.67em 0;}h2{font-size:1.5em;margin:0.83em 0;}h3{font-size:1.17em;margin:1em 0;}h4{font-size:1em;margin:1.33em 0;}h5{font-size:0.83em;margin:1.67em 0;}h6{font-size:0.75em;margin:2.33em 0;}abbr[title]{border-bottom:1px dotted;}b,strong{font-weight:bold;}blockquote{margin:1em 40px;}dfn{font-style:italic;}mark{background:#ff0;color:#000;}p,pre{margin:1em 0;}code,kbd,pre,samp{font-family:monospace,serif;_font-family:'courier new',monospace;font-size:1em;}pre{white-space:pre;white-space:pre-wrap;word-wrap:break-word;}q{quotes:none;}q:before,q:after{content:'';content:none;}small{font-size:75%;}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline;}sup{top:-0.5em;}sub{bottom:-0.25em;}dl,menu,ol,ul{margin:1em 0;}dd{margin:0 0 0 40px;}menu,ol,ul{padding:0 0 0 40px;}nav ul,nav ol{list-style:none;list-style-image:none;}img{border:0;-ms-interpolation-mode:bicubic;}svg:not(:root){overflow:hidden;}figure{margin:0;}form{margin:0;}fieldset{border:1px solid #c0c0c0;margin:0 2px;padding:0.35em 0.625em 0.75em;}legend{border:0;padding:0;white-space:normal;*margin-left:-7px;}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;}button,input{line-height:normal;}button,html input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;*overflow:visible;}button[disabled],input[disabled]{cursor:default;}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*height:13px;*width:13px;}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box;}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none;}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0;}textarea{overflow:auto;vertical-align:top;}table{border-collapse:collapse;border-spacing:0;}
5 | 


--------------------------------------------------------------------------------
/templates/ranksPlot.js.tmpl:
--------------------------------------------------------------------------------
  1 | 
  2 | function prepareRanksPlotData(dataPoints, submissionTime, endTime) {
  3 | 
  4 |   var length
  5 |   for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 
  6 |     length = i+1
  7 |   }
  8 | 
  9 |   var results = []
 10 | 
 11 |   // plot only age, qntop, hntop, new, and best
 12 |   // so 5 columns of data (x axis plus 4 ranks) 
 13 |   var n = 5 
 14 |   var lastValue = [null,null,null,null,null]
 15 |   for (var i = 0; i < length; i++) {
 16 | 
 17 |     var p = dataPoints[i].slice(0, n)
 18 | 
 19 |     // convert timestamp to age in hours
 20 |     p[0] = (p[0] - submissionTime)/3600
 21 | 
 22 |     // only plot a single point when a line leaves/exits the chart from below rank 91
 23 |     for (var j = 1; j < n; j++) {
 24 |       var lastValueIsOffChart = ( lastValue[j] == 91 || lastValue[j] == null )
 25 |       var nextValueIsOnChart = ( i+1 < length && dataPoints[i+1][j] != null && dataPoints[i+1][j] != 91)
 26 | 
 27 |       if ( p[j] == 91 && (lastValueIsOffChart && !nextValueIsOnChart) ) {
 28 |         p[j] = null
 29 |       } else {
 30 |         lastValue[j] = p[j]
 31 |       } 
 32 |     }
 33 |     results[i] = p
 34 |   }
 35 |   return results
 36 | }
 37 | 
 38 | function ranksPlot(dataPoints, submissionTime, startTime, endTime) {
 39 |   var plotDiv = document.getElementById('ranks_plot_div')
 40 | 
 41 |   var data = new google.visualization.DataTable();
 42 |   data.addColumn('number', 'Age');
 43 | //  data.addColumn('number', 'QN Rank');
 44 |   data.addColumn('number', 'Raw Rank');
 45 |   data.addColumn('number', '"Top" Rank');
 46 |   data.addColumn('number', '"New" Rank');
 47 |   data.addColumn('number', '"Best" Rank');
 48 | 
 49 |   data.addRows(prepareRanksPlotData(dataPoints, submissionTime, endTime));
 50 | 
 51 |   var ageFormatter = new ageFormat();
 52 |   
 53 |   ageFormatter.format(data, 0);
 54 | 
 55 |   var rankFormatter = new rankFormat()
 56 |   rankFormatter.format(data, 1);
 57 |   rankFormatter.format(data, 2);
 58 |   rankFormatter.format(data, 3);
 59 |   rankFormatter.format(data, 4);
 60 | 
 61 | 
 62 |   // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options
 63 |   var options = {
 64 |     backgroundColor: {fill: 'transparent'},
 65 |     dataOpacity: 0.85,
 66 |     hAxis: {
 67 |       title: 'Age [hours]',
 68 |       logScale: false,
 69 |       viewWindow: {
 70 |         min: (startTime-submissionTime)/3600,
 71 |         max: (endTime-submissionTime)/3600,
 72 |       }
 73 |     },
 74 |     vAxis: {
 75 |       title: 'Rank',
 76 |       logScale: true,
 77 |       direction: -1,
 78 |       viewWindow: {
 79 |         max: 1,
 80 |         min: 91
 81 |       },
 82 |       ticks: [1,2,4,8,16,32,64,{v: 91, f: "> 90"}],
 83 |     },
 84 |     interpolateNulls: false, 
 85 |     series: {
 86 |       0: {pointShape: 'diamond', pointSize: 5, interpolateNulls: false},
 87 |       1: {pointShape: 'circle', pointSize: 3, interpolateNulls: false},
 88 |       2: {pointShape: 'square', pointSize: 3, interpolateNulls: false},
 89 |       3: {pointShape: 'square', pointSize: 3, interpolateNulls: false},
 90 | //            4: {pointShape: 'square', pointSize: 0},
 91 | //            5: {pointShape: 'square', pointSize: 0}
 92 |     },
 93 | 
 94 |     lineDashStyle: [1,1],
 95 |     lineWidth: 1,
 96 |     colors: ['black', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"],
 97 |     // colors: ['#0089F4', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"],
 98 |     chartArea:{left:80, top:50, bottom: 80, right: 80},
 99 |     height: 350,
100 |     legend: { position: 'bottom' },
101 |     crosshair: { trigger: 'both' },
102 |     title: "Rank",
103 |   };
104 | 
105 |   var chart = new google.visualization.LineChart(plotDiv);
106 | 
107 |   chart.draw(data, options);
108 | }
109 | 


--------------------------------------------------------------------------------
/templates/score.html.tmpl:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  5 | 
  6 | <link rel="apple-touch-icon" sizes="180x180" href="static/apple-touch-icon.png">
  7 | <link rel="icon" type="image/png" sizes="32x32" href="static/favicon-32x32.png">
  8 | <link rel="icon" type="image/png" sizes="16x16" href="static/favicon-16x16.png">
  9 | <link rel="manifest" href="static/site.webmanifest">
 10 | <link rel="mask-icon" href="static/safari-pinned-tab.svg" color="#4a9ced">
 11 | <link rel="shortcut icon" href="static/favicon.ico">
 12 | <meta name="msapplication-TileColor" content="#4a9ced">
 13 | <meta name="msapplication-config" content="static/browserconfig.xml">
 14 | <meta name="theme-color" content="#ffffff">
 15 | 
 16 | 
 17 | <style type="text/css">
 18 | 
 19 | {{template "normalize.css.tmpl"}}
 20 | 
 21 | {{template "styles.css.tmpl"}}
 22 | 
 23 | .toppane {
 24 | 
 25 |   position: sticky; 
 26 |   top: 35px; 
 27 |   background-color: var(--background);
 28 |   width: 800px;
 29 |   margin-bottom: 20px;
 30 |   padding: 0;
 31 |   padding-top: 5px;
 32 | 
 33 |   z-index: 1;
 34 | 
 35 | }
 36 | 
 37 | .header {
 38 |   z-index: 1;
 39 | }
 40 | 
 41 | .toppane .toppane-content {
 42 |   margin-left: 35px;
 43 | }
 44 | 
 45 | .bottompane-header {
 46 |   margin-left: 35px; background-color: lightblue; width: 100%; height: 30px; line-height: 30px; padding-left: 6px
 47 | }
 48 | 
 49 | .bottompane {
 50 |   margin-left: 0px;
 51 | }
 52 | 
 53 | .positions {
 54 |     position: relative;
 55 | }
 56 | 
 57 | /*
 58 | .position-label {
 59 |   background-color: #efefef;
 60 |   border: solid darkblue 0.5px;
 61 |   border-radius: 3px;
 62 |   font-size: 10px;
 63 |   padding: 0px 2px;
 64 |   color: blue;
 65 |   font-family: helvetica;
 66 |   line-height: 20px;
 67 |   margin-right: 2px;
 68 |   vertical-align: text-top;
 69 |   text-decoration: none;
 70 | }
 71 | */
 72 | 
 73 | </style>
 74 | 
 75 | 
 76 | <script type="text/javascript">
 77 | {{template "vote.js.tmpl"}}
 78 | </script>
 79 | 
 80 | <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
 81 | 
 82 | 
 83 | <script>
 84 | 
 85 | google.charts.load('current', {packages: ['corechart', 'line', 'annotationchart']});
 86 | google.charts.setOnLoadCallback(drawCharts);
 87 | 
 88 | window.addEventListener('resize', drawCharts, false);
 89 | 
 90 | var scorePlotData = {{.ScorePlotData}};
 91 | 
 92 | function drawCharts() {
 93 |   var startTime = scorePlotData[0][0]
 94 |   var endTime = scorePlotData[scorePlotData.length-1][0]
 95 | 
 96 |   scorePlot(scorePlotData, startTime, endTime)
 97 | }
 98 | 
 99 | {{template "scorePlot.js.tmpl" .}}
100 | 
101 | </script>
102 | 
103 | 
104 | 
105 | <title>Quality News: User Scoreboard</title>
106 | </head>
107 | <body>
108 | 
109 | {{template "header.html.tmpl"  .}}
110 | 
111 | 
112 | <div class="toppane">
113 | 
114 |   <div class="toppane-content">
115 |     <h3>Score History. Current Score: {{.ScoreString}}. Average score: {{.AverageScoreString}} </h3>
116 | 
117 |     <div id="score_plot_div"></div>
118 |   </div>
119 | 
120 | 
121 |   <div class="bottompane-header">
122 |     <h3>Vote History</h3>
123 |   </div>
124 |   
125 | </div>
126 | 
127 | 
128 | <div class="bottompane">
129 | 
130 |  <ul class="positions">
131 | 
132 | {{ range $i, $position := .Positions }}
133 | {{ with $position }}
134 |   <!-- <tr> -->
135 |     <li class="position" style="display: flex; justify-content: flex-start;">
136 | 
137 |       <span class="story-item" id="position-{{.PositionID}}" style="width: 410px;">
138 |       <!--<a class="upvote-button" href="https://news.ycombinator.com/item?id={{.ID}}" target="_blank" rel="noopener noreferrer">▲</a>//-->
139 |         <div style="text-overflow: ellipsis; overflow: hidden; white-space: nowrap;">
140 | 
141 | <!--
142 |         <a href="javascript:clickPositionLabel({{$i}})" class="position-label"> {{.Label}} </span>
143 | -->
144 |           <a class="story-title" href="{{.URL}}">{{.Title}}</a>
145 |         </div>
146 | 
147 |         <div class="story-details">
148 |           <span class="story-details-static">
149 |             {{if .Flagged}}[flagged]{{end}}
150 |             {{if .Dupe}}[dupe]{{end}}
151 | 
152 |             {{if (not .Job)}}
153 | 
154 |               <a href="/stats?id={{.ID}}">
155 |                 <span 
156 |                   title="Estimated True Upvote Rate:&#013;Ratio of how more or less likely users are to upvote this story compared to the average story (x1.00)."
157 |                   {{if ge .Story.UpvoteRateString "1"}} class="upvoterate" {{end}}
158 |                   {{if ge .Story.UpvoteRateString "2"}} style="font-weight:bold;" {{end}}
159 |                  >
160 |                     ×{{.Story.UpvoteRateString}}
161 |                 </span> 
162 |               </a>
163 |               <span class="comparative-rank">
164 |                 {{if and .TopRank.Valid (not .IsHNTopPage)}}
165 |                   <a href="/#{{.ID}}">
166 |                     <span title="Rank on Hacker News Front Page" class="rank-icon hn">#{{.TopRank.Value}}</span>
167 |                   </a> 
168 |                 {{end}}
169 |               </span> |
170 | 
171 |               {{.Score}} points by <a href="https://news.ycombinator.com/user?id={{.By}}">{{.By}}</a> 
172 | 
173 |             {{end}}
174 | 
175 | 
176 |             <a href="https://news.ycombinator.com/item?id={{.ID}}" title="{{.ISOTimestamp}}">{{.AgeString}}</a>
177 | 
178 |             |
179 | 
180 |             <a href="https://news.ycombinator.com/item?id={{.ID}}">{{if (eq .Comments 0)}}discuss{{else}}{{.Comments}}&nbsp;comments{{end}}</a> 
181 |           
182 |           </span>
183 |         </div>
184 | 
185 |       </span>
186 | 
187 |       <span class="story-details" style="width: 210px; font-size: 12px; margin-left: 10px; margin-top: 8px">
188 | 
189 |           <div class="vote {{.VoteTypeString}}{{if .Exited}} closed{{end}}" id="vote-{{.ID}}">
190 |             <a href="javascript:toggleUpvote({{ .ID }})" class="upvote">▲</a> 
191 |             <a href="javascript:toggleDownvote({{ .ID }})" class="downvote">▼</a>
192 | 
193 |             <span class="showifvoted price"><span class="upvoterate">×{{.EntryUpvoteRateString}}</span></span>
194 | 
195 |             <span 
196 |               style="display: inline-block; width: 40px" 
197 |               class="showifvoted gainorloss {{if .IsGain}}gain{{else}}loss{{end}}"
198 |             >{{.UserScoreString}}</span>
199 |           </div>
200 | 
201 |         <div style="margin-left: 32px;">{{.EntryTimeString}}</div>
202 | 
203 |       </span>
204 | 
205 |       <span class="story-details" style="font-size: 12px; width: 180px;">
206 |         {{if .Exited}} 
207 |           vote cleared <a href="/stats?id={{.StoryID}}" class="upvoterate">×@{{.ExitUpvoteRateString}}</a>
208 |           <br/>
209 |           <span style="font-size: 12px">{{.ExitTimeString}}</span>
210 |         {{end}}
211 |       </span>
212 | 
213 |     </li>
214 | 
215 | {{end}}
216 | {{end}}
217 | 
218 | </ul>
219 | 
220 | </div>
221 | 
222 | </body>
223 | </html>
224 | 


--------------------------------------------------------------------------------
/templates/scorePlot.js.tmpl:
--------------------------------------------------------------------------------
  1 | 
  2 | function prepareScorePlotData(dataPoints, startTime, endTime) {
  3 | 
  4 |   var length
  5 |   for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 
  6 |     length = i+1
  7 |   }
  8 | 
  9 |   var results = []
 10 | 
 11 |   for (var i = 0; i < length; i++) {
 12 |     var p = dataPoints[i]
 13 |     var date = new Date(p[0]*1000)
 14 | 
 15 |     var userScore = p[4]
 16 |     var scoreClass = ""
 17 | 
 18 |     if (userScore > 0) {
 19 |       scoreClass = "gain"
 20 |     } else if (userScore < 0) {
 21 |       scoreClass = "loss"
 22 |     }
 23 | 
 24 | 
 25 |     var direction = p[5]
 26 |     var voteType = "upvote"
 27 |     if (direction == -1) {
 28 |       voteType = "downvote"
 29 |     }
 30 | 
 31 |     var upvoteRateString = p[7]
 32 |     var entryUpvoteRateString = p[6]
 33 | 
 34 |     // , p.EntryUpvoteRateString(), p.CurrentUpvoteRateString(), p.ExitUpvoteRateString()
 35 | 
 36 |     // p[1] rounded to two decimal places
 37 |     results[i] = [i, p[1], p[2], 
 38 |       "<span class='story-title'>\n" + p[3] + "</span>\n"
 39 |       + "<span style='margin-left: 1px;' class='upvoterate'>×" + upvoteRateString + "</span>\n"
 40 | 
 41 |       + "<span style='margin-left: 1px;' class='vote " + voteType + "d'>\n"
 42 |         + "<span class='showifvoted price'>" + entryUpvoteRateString + "</span>\n"
 43 |         + "<span style='margin-left: 1px;' class='showifvoted gainorloss " + scoreClass + "'>" + userScore + "</span>\n"
 44 |       + "</span>\n" 
 45 |       + "Total: " + Math.round(p[1] * 100) / 100
 46 | 
 47 |       ]
 48 |   }
 49 |   return results
 50 | }
 51 | 
 52 | var chart
 53 | function scorePlot(dataPoints, startTime, endTime) {
 54 |   var plotDiv = document.getElementById('score_plot_div')
 55 | 
 56 |   var data = new google.visualization.DataTable();
 57 |   data.addColumn('number', 'i');
 58 | //  data.addColumn('number', 'QN Rank');
 59 |   data.addColumn('number', 'Score');
 60 |   // data.addColumn('string', 'Position ID');
 61 |   data.addColumn({type: 'string', role: 'annotationText'});
 62 |   data.addColumn({type: 'string', role: 'tooltip', 'p': {'html': true}});
 63 |   // data.addColumn('string', 'Story Title');
 64 | 
 65 | 
 66 |   dataPoints = prepareScorePlotData(dataPoints, startTime, endTime)
 67 | 
 68 |   data.addRows(dataPoints);
 69 | 
 70 |   // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options
 71 |   var options = {
 72 |     isStacked:true,
 73 |     series: {
 74 |           0: {
 75 |           areaOpacity: 1,
 76 |           color: '#EF9A9A',
 77 |           type: 'area',
 78 |           visibleInLegend: false              
 79 |           }
 80 |         },
 81 |     backgroundColor: {fill: 'transparent'},
 82 |     dataOpacity: 0.85,
 83 |     // hAxis: {
 84 |     //   title: 'Date',
 85 |     //   logScale: false,
 86 |     //   // viewWindow: {
 87 |     //   //   min: (startTime-submissionTime)/3600,
 88 |     //   //   max: (endTime-submissionTime)/3600,
 89 |     //   // }
 90 |     // },
 91 |     vAxis: {
 92 |       title: 'Score',
 93 |       // logScale: true,
 94 |       // direction: -1,
 95 |       // viewWindow: {
 96 |       //   max: 1,
 97 |       //   min: 91
 98 |       // },
 99 |       // ticks: [1,2,4,8,16,32,64,{v: 91, f: "> 90"}],
100 |     },
101 | 
102 | 
103 | annotations: {
104 |     textStyle: {
105 |          color: 'black',
106 |          fontSize: 11,
107 |      },
108 |      alwaysOutside: true,
109 |      style: "point",
110 | },    
111 |     // interpolateNulls: false, 
112 | //     series: {
113 | //       0: {pointShape: 'diamond', pointSize: 5, interpolateNulls: false},
114 | //       1: {pointShape: 'circle', pointSize: 3, interpolateNulls: false},
115 | //       2: {pointShape: 'square', pointSize: 3, interpolateNulls: false},
116 | //       3: {pointShape: 'square', pointSize: 3, interpolateNulls: false},
117 | // //            4: {pointShape: 'square', pointSize: 0},
118 | // //            5: {pointShape: 'square', pointSize: 0}
119 | //     },
120 | 
121 |     // lineDashStyle: [1,1],
122 |     // lineWidth: 1,
123 |     // colors: ['black', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"],
124 |     // colors: ['#0089F4', '#FF6600', "#AF7FDF", "#6FAEAE", "green","pink"],
125 |     chartArea:{left:60, top:20, bottom: 20, right: 0},
126 |     height: 275,
127 |     width: 800,
128 |     legend: { position: 'bottom' },
129 |     crosshair: { trigger: 'both' },
130 | 
131 |     tooltip: {isHtml: true},
132 | 
133 | 
134 |     title: "Score",
135 | 
136 | 
137 |     // annotationsWidth: 0,
138 |     // displayAnnotationsFilter: false,
139 |     // fill: 50,
140 |     // allowHtml: true,
141 |   };
142 | 
143 |   var chart = new google.visualization.LineChart(plotDiv);
144 |   // chart = new google.visualization.AnnotationChart(plotDiv);
145 | 
146 |   // google.visualization.events.addListener(chart, 'rangechange', rangechange_handler);
147 |   google.visualization.events.addListener(chart, 'select', select_handler);
148 | 
149 | 
150 |   chart.draw(data, options);
151 | 
152 | 
153 |   function select_handler(e) {
154 |     console.log("In select handler", e)
155 |     console.log(chart.getSelection())
156 |     var n = chart.getSelection()[0].row
157 |     window.location.hash = '#position-' + scorePlotData[n][2]
158 | 
159 |     // zoomIn(n)
160 |   }
161 | 
162 | }
163 | 
164 | function clickPositionLabel(i) {
165 |   zoomIn(scorePlotData.length-i-1)
166 | }
167 | 
168 | function zoomIn(i) {
169 |   var mid = i - 1
170 |   var lower = i - 1
171 |   var upper = i + 1 
172 | 
173 |   if (lower < 0) {
174 |     lower = 0
175 |   }
176 |   if (upper > scorePlotData.length-1) {
177 |     upper = scorePlotData.length-1
178 |   }
179 | 
180 |   var lowerTime = scorePlotData[lower][0]
181 |   var midTime = scorePlotData[i][0]
182 |   var upperTime = scorePlotData[upper][0]
183 | 
184 |   var windowLength = 3600
185 |   if( (upperTime - midTime) > (midTime - lowerTime) ) {
186 |     // if( (upperTime - midTime) > 3600 ) {
187 |       windowLength = (upperTime - midTime)
188 |     // }
189 |   } else {
190 |     windowLength = (midTime - lowerTime)
191 |   }
192 |   upperTime = midTime + windowLength
193 |   lowerTime = midTime - windowLength
194 | 
195 |   position = scorePlotData[i]
196 |   chart.setVisibleChartRange(new Date(lowerTime*1000), new Date(upperTime*1000)) 
197 | }
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/templates/spinner.css.tmpl:
--------------------------------------------------------------------------------
 1 | .spinner {
 2 |   margin: 50px;
 3 |   display: block;
 4 |   width: 24px;
 5 |   height: 24px;
 6 |   border-color: currentColor;
 7 |   border-style: solid;
 8 |   border-radius: 99999px;
 9 |   border-width: 2px;
10 |   border-left-color: transparent;
11 |   color: palevioletred;
12 |   opacity: 0;
13 |   animation-name: rotate, fadeIn;
14 |   animation-duration: 450ms, 600ms;
15 |   animation-timing-function: linear, ease;
16 |   animation-iteration-count: infinite, 1;
17 |   animation-delay: 400ms;
18 |   animation-fill-mode: forwards;
19 | }
20 | 
21 | @keyframes rotate {
22 |   0% {
23 |     transform: rotate(0deg);
24 |   }
25 |   100% {
26 |     transform: rotate(360deg);
27 |   }
28 | }
29 | 
30 | @keyframes fadeIn {
31 |   0% {
32 |     opacity: 0;
33 |   }
34 |   100% {
35 |     opacity: 1;
36 |   }
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/templates/stats.html.tmpl:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  5 | 
  6 | <link rel="apple-touch-icon" sizes="180x180" href="static/apple-touch-icon.png">
  7 | <link rel="icon" type="image/png" sizes="32x32" href="static/favicon-32x32.png">
  8 | <link rel="icon" type="image/png" sizes="16x16" href="static/favicon-16x16.png">
  9 | <link rel="manifest" href="static/site.webmanifest">
 10 | <link rel="mask-icon" href="static/safari-pinned-tab.svg" color="#4a9ced">
 11 | <link rel="shortcut icon" href="static/favicon.ico">
 12 | <meta name="msapplication-TileColor" content="#4a9ced">
 13 | <meta name="msapplication-config" content="static/browserconfig.xml">
 14 | <meta name="theme-color" content="#ffffff">
 15 | 
 16 | <meta charset="UTF-8">
 17 | 
 18 | <style type="text/css">
 19 | 
 20 | {{template "normalize.css.tmpl"}}
 21 | 
 22 | {{template "styles.css.tmpl"}}
 23 | 
 24 | 
 25 | .content {
 26 |   padding: 0 10px 20px 10px;
 27 | }
 28 | 
 29 | </style>
 30 | 
 31 | 
 32 | <script type="text/javascript">
 33 | {{template "vote.js.tmpl"}}
 34 | </script>
 35 | 
 36 | <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
 37 | 
 38 | <script>
 39 | 
 40 | {{template "storyplots.js.tmpl" .}}
 41 | 
 42 | </script>
 43 | 
 44 | 
 45 | <script data-goatcounter="https://qualitynews.goatcounter.com/count" async src="//gc.zgo.at/count.js"></script>
 46 | 
 47 | <title>{{.Title}} | Hacker News Story Stats</title>
 48 | <meta name="description" content="These charts shows ranking history of the Hacker News story {{.Title}}">
 49 | 
 50 | 
 51 | </head>
 52 | <body>
 53 | 
 54 | {{template "header.html.tmpl"  .}}
 55 | 
 56 | <div class="content">
 57 | 
 58 | <br/>
 59 | 
 60 | {{template "storyDetails.html.tmpl" .StoryTemplateData}}
 61 | 
 62 | <div class="storyplot-header">
 63 |   <h2> Story Stats </h2>
 64 |   <span class="storyplot-date-selector">
 65 | 
 66 |     <!-- Time picker that selects a date range (not a timestamp). Easily increment
 67 |     days/hours/minutes. -->
 68 | 
 69 |     <select name="storyplot-date-selector" onchange="changeRelativeDate(this)">
 70 |       {{if (ge .MaxAgeHours 1)}}<option value="1">first hour</option>{{end}}
 71 |       {{if (ge .MaxAgeHours 3)}}<option value="3">first 3 hours</option>{{end}}
 72 |       {{if (ge .MaxAgeHours 6)}}<option value="6">first 6 hours</option>{{end}}
 73 |       {{if (ge .MaxAgeHours 12)}}<option value="12">first 12 hours</option>{{end}}
 74 |       {{if (ge .MaxAgeHours 24)}}<option value="24">first day</option>{{end}}
 75 |       {{if (ge .MaxAgeHours 48)}}<option value="48">first 2 days</option>{{end}}
 76 |       {{if (ge .MaxAgeHours 72)}}<option value="72">first 3 days</option>{{end}}
 77 |       {{if (ge .MaxAgeHours 96)}}<option value="96">first 4 days</option>{{end}}
 78 |       {{if (ge .MaxAgeHours 120)}}<option value="120">first 5 days</option>{{end}}
 79 |       {{if (ge .MaxAgeHours 144)}}<option value="144">first 6 days</option>{{end}}
 80 |       {{if (ge .MaxAgeHours 168)}}<option value="168">first week</option>{{end}}
 81 |       <option selected value="100000">all time</option>
 82 |     </select>
 83 | 
 84 |     <!-- Alternative date selector. But I like the above a bit better. -->
 85 |     <!--
 86 |       <label for="storyplot-date-selector">Max Timestamp</label>
 87 |       <input 
 88 |         type="datetime-local" 
 89 |         name="storyplot-date-selector"
 90 |         id="storyplot-date-selector"
 91 |         onchange="changeDate(this)" 
 92 |         value="{{.MaxSampleTimeISOString}}"
 93 |         min="{{.OriginalSubmissionTimeISOString}}" 
 94 |         max="{{.MaxSampleTimeISOString}}"
 95 |       > 
 96 |     -->
 97 | 
 98 |   </span>
 99 | </div>
100 | 
101 | <div id="storyplots">
102 | 
103 |   <div id="ranks_plot_div"></div>
104 |   <div class="plot-description">
105 |     This chart shows the history of this story's rank on the Hacker News <a href="https://news.ycombinator.com/" style="color: #FF6600; font-weight: bold;">"Top"</a> (Front) Page,
106 |     <a href="https://news.ycombinator.com/newest" style="color: #AF7FDF; font-weight: bold;">"New"</a> Page,
107 |     and <a href="https://news.ycombinator.com/best" style="color: #6FAEAE; font-weight: bold;">"Best"</a> Page, as well as its <a href="/about#raw-rank" style="color: black; font-weight: bold; text-decoration: underline;">raw rank</a> given the Hacker News ranking formula.
108 |   </div>
109 | 
110 |   <hr/>
111 | 
112 |   <div id="upvotes_plot_div"></div>
113 |   <div class="plot-description">
114 |     This chart shows the history of this story's <span style="color: #55cccc; font-weight: bold;">upvotes</span> compared to the <a href="/about#expected-upvotes" style="color: black; font-weight: bold; text-decoration: underline;">expected upvotes</a> for stories shown at the same ranks and times.
115 |   </div>
116 | 
117 |   <hr/>
118 | 
119 |   <div id="upvoterate_plot_div"></div>
120 |   <div class="plot-description">
121 |     This chart shows the history of this story's estimated true <a href="/about#upvote-rate" style="color: #0089F4; font-weight: bold; text-decoration: underline;">upvote rate</a>: the predicted long-term ratio of upvotes to expected upvotes.
122 |   </div>
123 | 
124 |   <hr/>
125 | 
126 | </div>
127 | </div>
128 | 
129 | </body>
130 | </html>
131 | 


--------------------------------------------------------------------------------
/templates/storyDetails.html.tmpl:
--------------------------------------------------------------------------------
 1 | <div class="story-item" id="story-{{.ID}}">
 2 | <!--<a class="upvote-button" href="https://news.ycombinator.com/item?id={{.ID}}" target="_blank" rel="noopener noreferrer">▲</a>//-->
 3 | <div>
 4 |   <div>
 5 |     <a class="story-title" href="{{.URL}}">{{.Title}}</a>
 6 |     {{if ne .Domain ""}} <span class="story-domain">(<a href="https://news.ycombinator.com/from?site={{.Domain}}">{{.Domain}}</a>)</span>{{end}}
 7 |   </div>
 8 |   <div class="story-details">
 9 |     {{if .Flagged}}[flagged]{{end}}
10 |     {{if .Dupe}}[dupe]{{end}}
11 | 
12 |   {{if (not .Job)}}
13 |     <a href="/stats?id={{.ID}}">
14 | 
15 |       <span 
16 |         title="Estimated True Upvote Rate:&#013;Ratio of how more or less likely users are to upvote this story compared to the average story (x1.00)."
17 |         {{if ge .UpvoteRateString "1"}} class="upvoterate" {{end}}
18 |         {{if ge .UpvoteRateString "2"}} style="font-weight:bold;" {{end}}>
19 |         ×{{.UpvoteRateString}}</span> 
20 | 
21 |     </a>
22 |     <span>
23 |       {{if and (.IsAlternativeFrontPage) .OverRanked}}<a href="/stats?id={{.ID}}"><span title="Rank Delta:&#013;Difference between rank on top page and 'raw' rank before penalties or boosts" class="delta over-ranked">{{.RankDiffAbs}}</span></a> {{end}}
24 |       {{if and (.IsAlternativeFrontPage) .UnderRanked}}<a href="/stats?id={{.ID}}"><span title="Rank Delta:&#013;Difference between rank on top page and 'raw' rank before penalties or boosts" class="delta under-ranked">{{.RankDiffAbs}}</span></a> {{end}}
25 |     </span>
26 | 
27 |     <span class="comparative-rank">
28 |       {{if and .TopRank.Valid (not .IsHNTopPage)}}<a href="/#{{.ID}}"><span title="Rank on Hacker News Front Page" class="rank-icon hn">#{{.TopRank.Value}}</span></a> {{end}}
29 | 
30 |     </span>
31 | 
32 |     |
33 | 
34 |     {{.Score}} points by <a href="https://news.ycombinator.com/user?id={{.By}}">{{.By}}</a> 
35 | 
36 |   {{end}}
37 | 
38 |     <a href="https://news.ycombinator.com/item?id={{.ID}}" title="{{.ISOTimestamp}}">{{.AgeString}}</a>
39 | 
40 |     |
41 | 
42 |     <a href="https://news.ycombinator.com/item?id={{.ID}}">{{if (eq .Comments 0)}}discuss{{else}}{{.Comments}}&nbsp;comments{{end}}</a> 
43 | 
44 |     <span class="vote" id="vote-{{.ID}}">
45 |       | 
46 |       <a href="javascript:toggleUpvote({{ .ID }})" class="upvote">▲</a> 
47 |       <a href="javascript:toggleDownvote({{ .ID }})" class="downvote">▼</a>
48 | 
49 |       <span class="showifvoted price"><span class="upvoterate">???</span></span>
50 | 
51 |       <span class="showifvoted gainorloss gain">???</span>
52 |     </span>
53 | 
54 |   </div>
55 | </div>
56 | </div>
57 | 


--------------------------------------------------------------------------------
/templates/storyplots.js.tmpl:
--------------------------------------------------------------------------------
 1 | 
 2 | google.charts.load('current', {packages: ['corechart', 'line']});
 3 | google.charts.setOnLoadCallback(drawCharts);
 4 | 
 5 | window.addEventListener('resize', drawCharts, false);
 6 | 
 7 | var submissionTime = {{.Story.SubmissionTime}};
 8 | 
 9 | var ranksPlotData = {{.RanksPlotDataJSON}};
10 | var upvotesPlotData = {{.UpvotesPlotDataJSON}};
11 | var upvoteRatePlotData = upvotesPlotData;
12 | 
13 | function drawCharts() {
14 |   // make all charts have the same x-axis range as the ranks plot chart
15 |   var startTime = ranksPlotData[0][0]
16 | 
17 |   date = document.getElementsByName("storyplot-date-selector")[0]
18 |   var endTime = {{.Story.SubmissionTime}} + date.value*3600
19 |   if (endTime > {{.MaxSampleTime}}) {
20 |     endTime = {{.MaxSampleTime}}
21 |   }
22 | 
23 |   ranksPlot(ranksPlotData, submissionTime, startTime, endTime)
24 |   upvotesPlot(upvotesPlotData, submissionTime, startTime, endTime)
25 |   upvoteRatePlot(upvoteRatePlotData, submissionTime, startTime, endTime)
26 |   // penaltyPlot(penaltyPlotData, submissionTime, startTime, endTime)
27 | }
28 | 
29 | // how rank is displayed when hovering over a datapoint
30 | class rankFormat {
31 |   format(dt, column) {
32 |     for (var i=0;i<dt.getNumberOfRows();i++) {
33 |       var v = dt.getValue(i, column);
34 |       if (v == 91) {
35 |         v = "> 90"
36 |       }
37 |       dt.setFormattedValue(i, column, v);
38 |     }
39 |   }
40 | }
41 | 
42 | // how age is displayed when hovering over a datapoint
43 | class ageFormat {
44 |   format(dt, column) {
45 |     for (var i=0;i<dt.getNumberOfRows();i++) {
46 |       var ageHours = dt.getValue(i, column);
47 | 
48 |       // We converted the sample time to an age in hours for display on the X axis
49 |       // But when we hover we want to see the original sample time.
50 |       var timeStamp = {{.Story.SubmissionTime}} + ageHours * 3600
51 | 
52 |       var d = new Date(0); // The 0 there is the key, which sets the date to the epoch
53 |       d.setUTCSeconds(timeStamp);
54 | 
55 |       dt.setFormattedValue(i, column, d.toISOString().split('.')[0] + 'Z');
56 |     }
57 |   }
58 | }
59 | 
60 | var getJSON = function(url, callback) {
61 |     var xhr = new XMLHttpRequest();
62 |     xhr.open('GET', url, true);
63 |     xhr.responseType = 'json';
64 |     xhr.onload = function() {
65 |       var status = xhr.status;
66 |       if (status === 200) {
67 |         callback(null, xhr.response);
68 |       } else {
69 |         callback(status, xhr.response);
70 |       }
71 |     };
72 |     xhr.send();
73 | };
74 | 
75 | // var changeDate = function(date) {
76 | //   var unixEpoch =  Math.floor(new Date(date.value) / 1000)
77 | //   drawCharts(unixEpoch)
78 | // }
79 | 
80 | var changeRelativeDate = function() {
81 |   drawCharts()
82 | }
83 | 
84 | {{template "ranksPlot.js.tmpl" .}}
85 | {{template "upvotesPlot.js.tmpl" .}}
86 | {{template "upvoteRatePlot.js.tmpl" .}}
87 | 
88 | 


--------------------------------------------------------------------------------
/templates/styles.css.tmpl:
--------------------------------------------------------------------------------
  1 | :root {
  2 |   --background: #f6f6ef;
  3 |   --text: #223355;
  4 |   --text-dimmed: #828282;
  5 |   --text-red: #d4214b;
  6 |   --text-green: #05851e;
  7 |   --text-blue: #0089f4;
  8 |   --text-hn:  #ff6600;
  9 | }
 10 | 
 11 | html {
 12 |   scroll-padding-top: 80%;
 13 | }
 14 | 
 15 | body {
 16 |   background-color: var(--background);
 17 |   color: var(--text);
 18 |   font-family: Verdana, Geneva, sans-serif;
 19 |   font-size: 15px;
 20 |   margin: 0;
 21 | }
 22 | 
 23 | body a:link,
 24 | body a:visited {
 25 |   color: var(--text);
 26 | }
 27 | 
 28 | @keyframes highlight {
 29 |   0% {
 30 |     background: lightblue;
 31 |   }
 32 |   100% {
 33 |     background: none;
 34 |   }
 35 | }
 36 | 
 37 | :target {
 38 |   animation: highlight 2s;
 39 | }
 40 | 
 41 | /* HEADER */
 42 | 
 43 | .header {
 44 |   background-color: #86bdf3;
 45 |   color: #000000;
 46 |   font-size: 12px;
 47 |   padding: 6px 10px;
 48 | 
 49 |   display: flex;
 50 |   align-items: center;
 51 | 
 52 |   position: -webkit-sticky;
 53 |   position: sticky;
 54 |   top: 0;
 55 | 
 56 | }
 57 | 
 58 | .header .icon {
 59 |   width: 20px;
 60 |   height: 20px;
 61 | }
 62 | 
 63 | .header a:link.header-title,
 64 | .header a:visited.header-title {
 65 |   margin-left: 0.5rem;
 66 |   margin-right: 1rem;
 67 |   font-weight: bold;
 68 |   font-size: 15px;
 69 | }
 70 | 
 71 | .header a:link,
 72 | .header a:visited {
 73 |   color: #000000;
 74 |   text-decoration: none;
 75 | }
 76 | 
 77 | .header a:link.nav-link {
 78 |   margin: 0px;
 79 |   padding: 2px 3px;
 80 |   border-radius: 2px;
 81 | }
 82 | 
 83 | .header a:link.nav-link.active,
 84 | .header a:visited.nav-link.active {
 85 |   background: #ffffff;
 86 |   color: #000000;
 87 | }
 88 | 
 89 | /* CONTENT */
 90 | 
 91 | .introduction {
 92 |   margin-left: 28px;
 93 |   margin-right: 7px;
 94 |   margin-top: 8px;
 95 |   margin-bottom: 12px;
 96 |   max-width: 650px;
 97 | }
 98 | 
 99 | .key {
100 |   font-size: 13px;
101 |   margin-top: 6px;
102 |   margin-bottom: 10px;
103 |   margin-left: 28px;
104 | }
105 | 
106 | .key a:link.question-mark,
107 | .key a:visited.question-mark {
108 |   color: var(--text-dimmed);
109 |   text-decoration: none;
110 | }
111 | 
112 | /* STORY LIST */
113 | 
114 | .stories li {
115 |   margin-bottom: 6px;
116 | }
117 | 
118 | .stories li::marker {
119 |   font-size: 15px;
120 |   color: var(--text-dimmed);
121 | }
122 | 
123 | .stories {
124 |   margin-top: 0px;
125 |   margin-left: 5px;
126 |   margin-right: 7px;
127 | }
128 | 
129 | @media (max-width: 480px) {
130 |   .stories {
131 |     margin-left: 0;
132 |     margin-right: 3px;
133 |     padding-left: 4ch;
134 |   }
135 |   a:link.upvote-button,
136 |   a:visited.upvote-button {
137 |     margin-right: 0;
138 |   }
139 | }
140 | 
141 | /* STORY DETAILS */
142 | 
143 | /*
144 | .story-item {
145 |   display: flex;
146 |   align-items: baseline;
147 | }
148 | 
149 | a:link.upvote-button,
150 | a:visited.upvote-button {
151 |   color: #9a9a9a;
152 |   margin-right: 0.5rem;
153 |   font-size: 13px;
154 |   align-self: flex-start;
155 |   flex-shrink: 0;
156 |   flex-grow: 0;
157 |   width: 15px;
158 | }
159 | */
160 | 
161 | a:link.story-title {
162 |   text-decoration: none;
163 |   color: #000000;
164 | }
165 | 
166 | a:visited.story-title {
167 |   color: var(--text-dimmed);
168 | }
169 | 
170 | .story-domain {
171 |   font-size: 13px;
172 | }
173 | .story-domain,
174 | .story-domain a:link,
175 | .story-domain a:visited {
176 |   color: var(--text-dimmed);
177 |   text-decoration: none;
178 | }
179 | .story-domain a:hover {
180 |   text-decoration: underline;
181 | }
182 | 
183 | .story-details {
184 |   margin-top: 3px;
185 |   font-size: 12px;
186 |   color: var(--text-dimmed);
187 | }
188 | .story-details a:link,
189 | .story-details a:visited {
190 |   color: var(--text-dimmed);
191 |   text-decoration: none;
192 |   padding: 4px; /* to make the clickable area larger */
193 |   margin: -4px;
194 | }
195 | .story-details a:hover {
196 |   text-decoration: underline;
197 | }
198 | 
199 | .upvoterate,
200 | a:link.upvoterate,
201 | a:visited.upvoterate {
202 |   color: var(--text-blue);
203 | }
204 | 
205 | .penalty,
206 | a:link.penalty,
207 | a:visited.penalty {
208 |   color: var(--text-red);
209 | }
210 | 
211 | .boost,
212 | a:link.boost,
213 | a:visited.boost {
214 |   color: var(--text-green);
215 | }
216 | 
217 | .rank-icon {
218 |   display: inline-block;
219 |   text-align: center;
220 |   min-width: 2em;
221 |   border-radius: 6px;
222 |   padding-left: 2px;
223 |   padding-right: 3px;
224 |   padding-top: 1px;
225 |   padding-bottom: 1px;
226 |   /* negative margin reverts the height increase by the padding */
227 |   margin-top: -1px;
228 |   margin-bottom: -1px;
229 | }
230 | 
231 | .rank-icon.hn {
232 |   background-color: rgba(255, 102, 00, 0.2);
233 |   color: #6c6c6c;
234 | }
235 | 
236 | .rank-icon.qn {
237 |   background-color: #cae2f4;
238 |   color: #5a6266;
239 | }
240 | 
241 | .delta {
242 |   display: inline-block;
243 |   text-align: center;
244 |   border-radius: 6px;
245 |   padding-left: 1px;
246 |   padding-right: 1px;
247 |   padding-top: 1px;
248 |   padding-bottom: 1px;
249 |   margin-top: -1px;
250 |   margin-bottom: -1px;
251 |   font-size: 11px;
252 | }
253 | 
254 | .over-ranked,
255 | a:link.over-ranked,
256 | a:visited.over-ranked {
257 |   color: darkgreen;
258 | }
259 | 
260 | .over-ranked::before {
261 |   content: '▲'
262 | }
263 | 
264 | 
265 | .under-ranked,
266 | a:link.under-ranked,
267 | a:visited.under-ranked {
268 |   color: darkred;
269 | }
270 | 
271 | .under-ranked::before {
272 |   content: '▼'
273 | }
274 | 
275 | 
276 | 
277 | .original-age {
278 |   text-decoration: line-through;
279 | }
280 | 
281 | .resubmitted-age,
282 | a:link.resubmitted-age,
283 | a:visited.resubmitted-age {
284 |   color: var(--text-green);
285 | }
286 | 
287 | /* STATS */
288 | 
289 | .stats {
290 |   margin-left: 5px;
291 |   margin-right: 7px;
292 | }
293 | 
294 | /* PLOTS */
295 | 
296 | .storyplot-header {
297 |   max-width: 550px; 
298 |   margin-top: 19px;
299 | }
300 | 
301 | .storyplot-header h2 {
302 |    display: inline;
303 | }
304 | 
305 | .storyplot-header .storyplot-date-selector {
306 |   margin-top: 8px;  
307 |   float: right;
308 | }
309 | 
310 | #storyplots {
311 |   height: auto;
312 |   min-width: 400px;
313 |   max-width: 650px;
314 | }
315 | 
316 | .plot-description {
317 |   margin-top: 2px;
318 |   margin-bottom: 14px;
319 |   margin-left: 50px;
320 |   color: #303040;
321 | }
322 | 
323 | 
324 | /* UPVOTES */
325 | 
326 | .vote {
327 |   visibility: hidden;
328 | }
329 | 
330 | .vote.logged-in {
331 |   visibility: visible;  
332 | }
333 | 
334 | .vote span.showifvoted
335 | {
336 |   visibility: hidden;
337 |   width: 0px;
338 | }
339 | 
340 | .vote.upvoted span.showifvoted, 
341 | .vote.downvoted span.showifvoted 
342 | {
343 |   visibility: visible;
344 | }
345 | 
346 | .vote.upvoted a.upvote {
347 |   color: green;
348 | }
349 | 
350 | .vote.downvoted a.downvote {
351 |   color: darkred;
352 | }
353 | 
354 | .vote.upvoted .price::before {
355 |   content: "upvoted @×"
356 | }
357 | 
358 | .vote.downvoted .price::before {
359 |   content: "downvoted @×"
360 | }
361 | 
362 | .vote.upvoted .price::before {
363 |   color: green;
364 | }
365 | 
366 | .vote.upvoted .price::before {
367 |   content: "upvoted @"
368 | }
369 | 
370 | .vote.downvoted .price::before {
371 |   color: darkred;
372 | }
373 | 
374 | .vote.downvoted .price::before {
375 |   content: "downvoted @"
376 | }
377 | 
378 | .vote a:link,
379 | .vote a:visited {
380 |   text-decoration: none;
381 | } 
382 | 
383 | .vote.closed a.downvote,
384 | .vote.closed a.upvote
385 | {
386 |   visibility: hidden;
387 | }
388 | 
389 | 
390 | 
391 | 
392 | .gainorloss {
393 | }
394 | 
395 | .gainorloss.gain {
396 |   color: darkgreen;
397 | }
398 | 
399 | .gainorloss.loss {
400 |   color: red;
401 | }
402 | 
403 | 
404 | 
405 | .vote .relativetime {
406 |   font-size: 12px;
407 | }
408 | 
409 | 
410 | /* SCORE PAGE */
411 | 
412 | #scoreplots {
413 |   height: auto;
414 |   min-width: 400px;
415 |   max-width: 650px;
416 | }
417 | 
418 | 
419 | 
420 | 
421 | 
422 | 
423 | 


--------------------------------------------------------------------------------
/templates/upvoteRatePlot.js.tmpl:
--------------------------------------------------------------------------------
 1 | function prepareUpvoteRatePlotData(dataPoints, submissionTime, endTime) {
 2 | //  return dataPoints.map((dataPoint, i) => [(dataPoints[i][0] - submissionTime)/3600, dataPoints[i][3], 1, dataPoints[i][4]])
 3 |   return dataPoints.filter((dataPoint, i) => dataPoints[i][0] <= endTime).map((dataPoint, i) => [
 4 |     (dataPoints[i][0] - submissionTime)/3600,
 5 |     dataPoints[i][3],
 6 |     1,
 7 | //    i == 3 ? "you voted at certain time" : null])
 8 |     null])
 9 | }
10 | 
11 | function upvoteRatePlot(upvoteRatePlotData, submissionTime, startTime, endTime) {
12 | 
13 |   var plotDiv = document.getElementById('upvoterate_plot_div')
14 | 
15 |   var data = new google.visualization.DataTable();
16 |   data.addColumn('number', 'Age');
17 |   data.addColumn('number', 'Estimated True Upvote Rate');
18 |   data.addColumn('number', 'Expected Upvote Rate');
19 |   data.addColumn({type: 'string', role: 'annotation'});
20 | //  data.addColumn('number', 'Moving-Average Upvote Rate');
21 | 
22 |   data.addRows(prepareUpvoteRatePlotData(upvoteRatePlotData, submissionTime, endTime));
23 | 
24 |   var ageFormatter = new ageFormat()
25 |   ageFormatter.format(data, 0);
26 | 
27 |   // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options
28 |   var options = {
29 |     backgroundColor: {fill: 'transparent'},
30 |     hAxis: {
31 |       title: 'Age [hours]',
32 |       logScale: false,
33 |       viewWindow: {
34 |         min: (startTime-submissionTime)/3600,
35 |         max: (endTime-submissionTime)/3600,
36 |       }
37 |     },
38 |     vAxis: {
39 |       title: 'Upvote Rate',
40 |       logScale: true,
41 |       viewWindow: {
42 |         min: 0
43 |       },
44 |     },
45 |     series: {
46 |       0: {},
47 |       1: {lineDashStyle: [5,5], lineWidth: 2},
48 |       2: {}
49 |     },
50 | 
51 |     lineWidth: 3,
52 |     colors: ['#0089F4',  'black', 'darkblue'],
53 |     chartArea:{left:80, top:50, bottom: 80, right: 80},
54 |     height: 350,
55 |     legend: { position: 'bottom' },
56 |     crosshair: { trigger: 'both' },
57 |     title: "Upvote Rate",
58 |     annotations: {style: 'line'},
59 |   };
60 | 
61 |   var chart = new google.visualization.LineChart(plotDiv);
62 |   chart.draw(data, options)
63 | 
64 | }
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/templates/upvotesPlot.js.tmpl:
--------------------------------------------------------------------------------
 1 | function prepareUpvotesPlotData(dataPoints, submissionTime, endTime) {
 2 | 
 3 |   var fatigueFactor = 0.003462767
 4 |   var results = []
 5 | 
 6 |   var length
 7 |   for (var i = 0; i < dataPoints.length && dataPoints[i][0] <= endTime; i++) { 
 8 |     length = i+1
 9 |   }
10 | 
11 |    // Modify our dataset so that we only plot points immediately
12 |    // before or after a change. This way the plot looks more like a staircase
13 |    // where the line is horizontal until there is an upvote then jumps up. Then we
14 |    // only have diagonal lines where there is missing data.
15 |    var lastValue = null
16 |    for (var i = 0; i < length; i++) { 
17 |      var p = dataPoints[i]
18 |      var upvotes = p[1]
19 |      if ( upvotes != lastValue || i+1 == length || upvotes != dataPoints[i+1][1] )  {
20 |        lastValue = upvotes
21 |      } else {
22 |        upvotes = null
23 |      }
24 |      results[i] = [(p[0] - submissionTime)/3600, upvotes, (1-Math.exp(-fatigueFactor*p[2]))/fatigueFactor]
25 |    }
26 |    return results;
27 | }
28 | 
29 | function upvotesPlot(upvotesData, submissionTime, startTime, endTime) {
30 | 
31 |   var plotDiv = document.getElementById('upvotes_plot_div')
32 | 
33 |   var data = new google.visualization.DataTable();
34 |   data.addColumn('number', 'Age');
35 |   data.addColumn('number', 'Upvotes');
36 |   data.addColumn('number', 'Expected Upvotes');
37 | 
38 |   data.addRows(prepareUpvotesPlotData(upvotesData, submissionTime, endTime));
39 | 
40 |   var ageFormatter = new ageFormat()
41 |   ageFormatter.format(data, 0);
42 | 
43 |   // https://developers.google.com/chart/interactive/docs/gallery/linechart#configuration-options
44 |   var options = {
45 |     backgroundColor: {fill: 'transparent'},
46 |     hAxis: {
47 |       title: 'Age [hours]',
48 |       logScale: false,
49 |       viewWindow: {
50 |         min: (startTime-submissionTime)/3600,
51 |         max: (endTime-submissionTime)/3600,
52 |       }
53 |     },
54 |     vAxis: {
55 |       title: 'Upvotes',
56 |       viewWindow: 'pretty',
57 |     },
58 |     series: {
59 |       0: {lineWidth: 3},
60 |       1: {lineWidth: 2, lineDashStyle: [5,5]},
61 |       2: {lineWidth: 2, lineDashStyle: [5,5]},
62 |     },
63 | 
64 |     // we want a line to be drawn over intervals where the upvotes is null
65 |     // since we set the upvotes to null above if there wasn't an increase in upvotes
66 |     interpolateNulls: true, 
67 | 
68 |     colors: ['#55cccc', 'black'],
69 |     chartArea:{left:80, top:50, bottom: 80, right: 80},
70 |     height: 350,
71 |     legend: { position: 'bottom' },
72 |     crosshair: { trigger: 'both' },
73 |     title: "Upvotes",
74 |   };
75 | 
76 |   var chart = new google.visualization.LineChart(plotDiv);
77 |   chart.draw(data, options);
78 | }
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/templates/vote.html.tmpl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/social-protocols/quality-news/17d482e0ee499116207c67aa66a787c7808c95a2/templates/vote.html.tmpl


--------------------------------------------------------------------------------
/templates/vote.js.tmpl:
--------------------------------------------------------------------------------
  1 | 
  2 | async function vote(id, direction) {
  3 | 
  4 |   console.log("Vote", id, direction)
  5 |   var response = await navigator.locks.request("vote-" + id, async (lock) => {
  6 | 
  7 |     // Default options are marked with *
  8 |     return await fetch("/vote", {
  9 |       method: 'POST',
 10 |       mode: 'cors',
 11 |       cache: 'no-cache',
 12 |       credentials: 'same-origin',
 13 |       headers: {
 14 |         'Content-Type': 'application/json'
 15 |       },
 16 |       redirect: 'follow',
 17 |       referrerPolicy: 'no-referrer',
 18 |       body: JSON.stringify({storyID: id, direction: direction})
 19 |     });
 20 | 
 21 |   });
 22 |   
 23 |   console.log("Response from vote endpoint", response, id, direction)
 24 | 
 25 |   return response.json()
 26 | }
 27 | 
 28 | 
 29 | async function toggleUpvote(id) {
 30 |   console.log("toggling upvote", id)
 31 | 
 32 |   // console.log(response.json())
 33 | 
 34 |   var element = document.getElementById("vote-" + id)
 35 | 
 36 |   if (element.classList.contains("upvoted")) {
 37 |     var response = await vote(id, 0)
 38 |     console.log("Response clear vote", response)
 39 |     if (response.error||"" != "") {
 40 |       console.log("Error setting response", response)
 41 |       return
 42 |     }
 43 |     element.classList.remove("upvoted", "voted");
 44 |     return
 45 |   }
 46 | 
 47 |   var response = await vote(id, 1)
 48 |   if (response.error||"" != "") {
 49 |     console.log("Error setting upvote", response)
 50 |     return
 51 |   }
 52 | 
 53 |   element.classList.remove("upvoted", "voted");
 54 |   console.log("Response set vote", response)
 55 | 
 56 |   element.classList.add("upvoted", "voted");
 57 |   element.classList.remove("downvoted");
 58 | 
 59 |   element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = response.entryUpvoteRate.toFixed(2)
 60 | 
 61 |   var userScoreElem = element.getElementsByClassName("gainorloss")[0]
 62 |   userScoreElem.innerHTML = ""
 63 | 
 64 | }
 65 | 
 66 | 
 67 | 
 68 | async function toggleDownvote(id) {
 69 |   console.log("Toggle downvote", id)
 70 | 
 71 |   var element = document.getElementById("vote-" + id)
 72 |   if (element.classList.contains("downvoted")) {
 73 |     var response = await vote(id, 0)
 74 |     console.log("Response clear vote", response)
 75 |     if (response.error||"" != "") {
 76 |       console.log("Error setting response", response)
 77 |       return
 78 |     }
 79 |     element.classList.remove("downvoted", "voted");
 80 |     return
 81 |   }
 82 | 
 83 |   var response = await vote(id, -1)
 84 |   if (response.error||"" != "") {
 85 |     console.log("Error setting downvote", response)
 86 |     return
 87 |   }
 88 | 
 89 |   element.classList.add("downvoted", "voted");
 90 |   element.classList.remove("upvoted");
 91 | 
 92 |   element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = response.entryUpvoteRate.toFixed(2)
 93 | 
 94 |   var userScoreElem = element.getElementsByClassName("gainorloss")[0]
 95 |   userScoreElem.innerHTML = ""
 96 | 
 97 | 
 98 | }
 99 | 
100 | function setVotes() {
101 |   if (userID == undefined) {
102 |     return;
103 |   }
104 | 
105 |   var elements = document.getElementsByClassName("vote")
106 |   for (var i = 0; i < elements.length; i++) {
107 |     elements[i].classList.add("logged-in")
108 |   }
109 | 
110 |   for (var i = 0; i < positions.length; i++) {
111 |     // find the story details element for this story
112 |     var storyID = positions[i][0]
113 |     var direction = positions[i][1]
114 |     var currentUpvoteRate = positions[i][2]
115 |     var entryUpvoteRate = positions[i][3]
116 |     var userScore  = positions[i][4]
117 | 
118 |     console.log("SEtting position", storyID, direction)
119 | 
120 |     if (direction == 0) {
121 |       continue;
122 |     }
123 | 
124 |     var element = document.getElementById("vote-" + storyID)
125 | 
126 |     if (element == null) {
127 |       console.warn("Didn't find story on page", storyID)
128 |       continue;
129 |     }
130 | 
131 |     // console.log("Element",element)
132 |     if (direction == 1) {
133 |       element.classList.add("upvoted", "voted");
134 |     } else if (direction == -1) {
135 |       element.classList.add("downvoted", "voted");
136 |     }
137 | 
138 |     console.log("SEtting upvote rate", entryUpvoteRate)
139 | 
140 |     console.log("Got element", storyID, element)
141 |     element.getElementsByClassName("price")[0].getElementsByClassName("upvoterate")[0].innerHTML = entryUpvoteRate.toFixed(2)
142 | 
143 | 
144 | 
145 |     var userScoreString = userScore.toFixed(2)
146 |     if ( Math.abs(userScore) < .01 ) {
147 |       userScoreString = ""
148 |     }
149 |     else if (userScore > 0) {
150 |       userScoreString = "+" + userScoreString
151 |     }
152 |     var userScoreElem = element.getElementsByClassName("gainorloss")[0]
153 |     userScoreElem.innerHTML = userScoreString
154 | 
155 |     if (userScore > 0) {
156 |       userScoreElem.classList.add("gain")
157 |     } else if (userScore < 0) {
158 |       userScoreElem.classList.add("loss")      
159 |     }
160 | 
161 | 
162 | 
163 |   }
164 | 
165 | }
166 | 


--------------------------------------------------------------------------------
/timeout.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"net/http"
 6 | 	"time"
 7 | )
 8 | 
 9 | func (app app) timeoutMiddleware(handler http.Handler, timeoutSeconds time.Duration) http.Handler {
10 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
11 | 		ctx, cancel := context.WithTimeout(r.Context(), timeoutSeconds)
12 | 		defer cancel()
13 | 		handler.ServeHTTP(w, r.WithContext(ctx))
14 | 	})
15 | }
16 | 


--------------------------------------------------------------------------------
/upvote-rate-model.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"database/sql"
  5 | 	"math"
  6 | )
  7 | 
  8 | const (
  9 | 	nPages     = 3 // page 1 (rank 1-30), page 2, ...
 10 | 	nPageTypes = 5 // new, top, etc
 11 | )
 12 | 
 13 | type pageCoefficients = struct {
 14 | 	pageTypeCoefficient float64
 15 | 	pageCoefficient     float64
 16 | 	rankCoefficient     float64
 17 | }
 18 | 
 19 | // These coefficients are the output of bayesian-quality-pagetype-rank.R
 20 | // from the hacker-news-data repository.
 21 | var (
 22 | 	coefficients = [nPageTypes]pageCoefficients{
 23 | 		// {-2.733096, -3.492384, -0.5636350},
 24 | 		// {-5.806347, -2.680377, -0.3879157},
 25 | 		// {-7.365239, -1.141086, -0.2927700},
 26 | 		// {-5.743499, -4.986510, -1.0510611},
 27 | 		// {-7.237460, -4.884862, -0.8878165},
 28 | 		{-2.886938, -3.316492, -0.5193376},
 29 | 		{-5.856364, -2.564690, -0.3937709},
 30 | 		{-7.175409, -1.280364, -0.3717084},
 31 | 		{-5.316879, -5.469948, -1.2944215},
 32 | 		{-6.292276, -5.912105, -1.1996512},
 33 | 	}
 34 | 	// fatigueFactor = 0.003462767
 35 | 	// priorWeight = 1.7
 36 | 	// priorWeight = 2.2956
 37 | 	// priorWeight = 0.5
 38 | )
 39 | 
 40 | type ModelParams struct {
 41 | 	FatigueFactor float64
 42 | 	PriorWeight   float64
 43 | }
 44 | 
 45 | type OptionalModelParams struct {
 46 | 	FatigueFactor sql.NullFloat64
 47 | 	PriorWeight   sql.NullFloat64
 48 | }
 49 | 
 50 | func (p OptionalModelParams) WithDefaults() ModelParams {
 51 | 	var results ModelParams
 52 | 
 53 | 	if p.PriorWeight.Valid {
 54 | 		results.PriorWeight = p.PriorWeight.Float64
 55 | 	} else {
 56 | 		results.PriorWeight = defaultModelParams.PriorWeight
 57 | 	}
 58 | 
 59 | 	if p.FatigueFactor.Valid {
 60 | 		results.FatigueFactor = p.FatigueFactor.Float64
 61 | 	} else {
 62 | 		results.FatigueFactor = defaultModelParams.FatigueFactor
 63 | 	}
 64 | 
 65 | 	return results
 66 | }
 67 | 
 68 | // var defaultModelParams = ModelParams{0.003462767, 2.2956}
 69 | var defaultModelParams = ModelParams{FatigueFactor: 0.003462767, PriorWeight: 0.75}
 70 | 
 71 | func (p ModelParams) upvoteRate(upvotes int, expectedUpvotes float64) float64 {
 72 | 	return (float64(upvotes) + p.PriorWeight) / float64((1-math.Exp(-p.FatigueFactor*expectedUpvotes))/p.FatigueFactor+p.PriorWeight)
 73 | }
 74 | 
 75 | func expectedUpvoteShare(pageType pageTypeInt, oneBasedRank int) float64 {
 76 | 	zeroBasedPage := (oneBasedRank - 1) / 30
 77 | 	oneBasedRankOnPage := ((oneBasedRank - 1) % 30) + 1
 78 | 
 79 | 	cs := coefficients[pageType]
 80 | 
 81 | 	logExpectedUpvoteShare := cs.pageTypeCoefficient +
 82 | 		cs.pageCoefficient*math.Log(float64(zeroBasedPage+1)) +
 83 | 		cs.rankCoefficient*math.Log(float64(oneBasedRankOnPage))/float64(zeroBasedPage+1)
 84 | 
 85 | 	return math.Exp(logExpectedUpvoteShare)
 86 | }
 87 | 
 88 | var averageCrawlDelay = 10
 89 | 
 90 | func expectedUpvoteShareNewPage(oneBasedRank, elapsedTime int, newRankChanges []int) float64 {
 91 | 	rank := oneBasedRank
 92 | 	exUpvoteShare := 0.0
 93 | 
 94 | 	for j, current := range append(newRankChanges, elapsedTime+10) {
 95 | 
 96 | 		r := rank - j
 97 | 		if r < 1 {
 98 | 			break
 99 | 		}
100 | 
101 | 		var previous int
102 | 		var timeAtRank int
103 | 
104 | 		// Calculate the value of the variable previous, which is how many
105 | 		// seconds ago this story moved out of rank r
106 | 		if j > 0 {
107 | 			previous = newRankChanges[j-1]
108 | 		} else {
109 | 			// Most stories don't appear on the new page until about 10 seconds after submission.
110 | 			// So subtract 10 seconds from the age of the story at rank 1.
111 | 			previous = averageCrawlDelay
112 | 		}
113 | 
114 | 		if current > elapsedTime+averageCrawlDelay {
115 | 			current = elapsedTime + averageCrawlDelay
116 | 		}
117 | 		timeAtRank = current - previous
118 | 		if timeAtRank <= 0 {
119 | 			// Some stories might appear on the new page after less than averageCrawlDelay seconds. So by subtracting averageCrawlDelay seconds from
120 | 			// the submission time, we can end up with a negative timeAtRank. But this needs to be positive,
121 | 			// because total attentionShare must be greater than zero. So instead of subtracting averageCrawlDelay, divide by 2.
122 | 			timeAtRank = current / 2
123 | 		}
124 | 
125 | 		exUpvoteShare += expectedUpvoteShare(1, r) * float64(timeAtRank) / float64(elapsedTime)
126 | 	}
127 | 
128 | 	return exUpvoteShare
129 | }
130 | 


--------------------------------------------------------------------------------
/upvotes-db.sh:
--------------------------------------------------------------------------------
 1 | # Open the upvotes DB as read-write, then attach the frontpage DB as readonly.
 2 | 
 3 | # We need to pass an init script filename to sqlite3 to run the attach command at the beginning of the shell session.
 4 | initscript=$(mktemp /tmp/init-db.XXXXXX)
 5 | echo "attach database 'file:/Users/jwarden/hacker-news-data-datadir/frontpage.sqlite?mode=ro' as frontpage;
 6 | .mode column
 7 | .header on
 8 | " > $initscript 
 9 | 
10 | # Delete the tempfile after sqltie has tarted
11 | (sleep 1 && rm "$initscript")&
12 | 
13 | sqlite3 $SQLITE_DATA_DIR/upvotes.sqlite --init $initscript
14 | 
15 | 


--------------------------------------------------------------------------------
/utils.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | func getKeys[K comparable, V comparable](m map[K]V) []K {
 4 | 	keys := make([]K, len(m))
 5 | 	var i int
 6 | 	for key := range m {
 7 | 		keys[i] = key
 8 | 		i++
 9 | 	}
10 | 	return keys
11 | }
12 | 
13 | func getValues[K comparable, V any](m map[K]V) []V {
14 | 	values := make([]V, len(m))
15 | 	var i int
16 | 	for _, value := range m {
17 | 		values[i] = value
18 | 		i++
19 | 	}
20 | 	return values
21 | }
22 | 
23 | func mapSlice[T, U any](ts []T, f func(T) U) []U {
24 | 	results := make([]U, len(ts))
25 | 	for i, t := range ts {
26 | 		results[i] = f(t)
27 | 	}
28 | 	return results
29 | }
30 | 


--------------------------------------------------------------------------------
/vote.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"database/sql"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"net/http"
  9 | 
 10 | 	"github.com/johnwarden/httperror"
 11 | 	"github.com/pkg/errors"
 12 | )
 13 | 
 14 | type voteParams struct {
 15 | 	StoryID   int  `json:"storyID"`
 16 | 	Direction int8 `json:"direction"`
 17 | }
 18 | 
 19 | type voteResponse struct {
 20 | 	Error           string  `json:"error,omitempty"`
 21 | 	EntryUpvoteRate float64 `json:"entryUpvoteRate"`
 22 | }
 23 | 
 24 | var (
 25 | 	insertVoteStmt       *sql.Stmt
 26 | 	getLastVoteStatement *sql.Stmt
 27 | )
 28 | 
 29 | func (app app) prepareVoteStatements() error {
 30 | 	err := app.ndb.attachFrontpageDB()
 31 | 	if err != nil {
 32 | 		return errors.Wrap(err, "attachFrontpageDB")
 33 | 	}
 34 | 
 35 | 	if insertVoteStmt == nil {
 36 | 
 37 | 		var e error
 38 | 		insertVoteStmt, e = app.ndb.upvotesDB.Prepare(`
 39 | 		    with parameters as (
 40 | 		      select
 41 | 		        ? as userID
 42 | 		        , ? as storyID
 43 | 		        , ? as direction
 44 | 		    )
 45 | 		    , openPositions as (
 46 | 				select
 47 | 					userID
 48 | 					, storyID
 49 | 					, direction
 50 | 					, entryTime
 51 | 				from votes
 52 | 				group by userID, storyID
 53 | 				having max(rowid) -- use rowID instead of entryTime because two votes can come in during the same second
 54 | 			)
 55 | 		    -- A vote is a duplicate only if the **latest** vote (in openPositions) for this userID and storyID
 56 | 		    -- has the same direction.
 57 | 		    , duplicates as (
 58 | 		    	select parameters.userID, parameters.storyID, parameters.direction == ifnull(openPositions.direction,0) as duplicate
 59 | 		    	from parameters 
 60 | 			    left join openPositions using (userID, storyID)
 61 | 		    )
 62 | 		    insert into votes(userID, storyID, direction, entryUpvotes, entryExpectedUpvotes, entryTime) 
 63 | 		    select 
 64 | 		      parameters.userID
 65 | 		      , parameters.storyID
 66 | 		      , parameters.direction
 67 | 		      , cumulativeUpvotes
 68 | 		      , cumulativeExpectedUpvotes
 69 | 		      , unixepoch()
 70 | 		    from parameters
 71 | 		    -- join on dataset to get latest upvoteRate
 72 | 		    join dataset on 
 73 | 		      id = parameters.storyID 
 74 | 		      and sampleTime = ( select max(sampleTime) from dataset join parameters where id = storyID )
 75 | 		    -- but don't insert a vote unless it actually changes the user's position
 76 | 		    join stories using (id)
 77 | 		    join duplicates
 78 | 		    where 
 79 | 		    	not duplicate
 80 | 			    and not job 
 81 | 		`)
 82 | 
 83 | 		if e != nil {
 84 | 			return errors.Wrap(e, "Preparing insertVoteStmt")
 85 | 		}
 86 | 
 87 | 	}
 88 | 
 89 | 	if getLastVoteStatement == nil {
 90 | 		var e error
 91 | 
 92 | 		getLastVoteStatement, e = app.ndb.upvotesDB.Prepare(`
 93 | 			select 
 94 | 			entryUpvotes
 95 | 			, entryExpectedUpvotes
 96 | 			, entryTime from 
 97 | 			votes
 98 | 			where userID = ? and storyID = ? and direction = ?
 99 | 		`)
100 | 		if e != nil {
101 | 			return errors.Wrap(e, "Preparing getLastVoteStatement")
102 | 		}
103 | 	}
104 | 	return nil
105 | }
106 | 
107 | func (app app) vote(ctx context.Context, userID int64, storyID int, direction int8) (r float64, t int64, err error) {
108 | 	if userID < 100 {
109 | 		return 0, 0, httperror.PublicErrorf(http.StatusUnauthorized, "Can't vote for special user IDs")
110 | 	}
111 | 
112 | 	err = app.prepareVoteStatements()
113 | 	if err != nil {
114 | 		return 0, 0, err
115 | 	}
116 | 
117 | 	db, err := app.ndb.upvotesDBWithDataset(ctx)
118 | 	if err != nil {
119 | 		return 0, 0, errors.Wrap(err, "upvotesDBWithDataset")
120 | 	}
121 | 	tx, e := db.BeginTx(ctx, nil)
122 | 	if e != nil {
123 | 		err = errors.Wrap(e, "BeginTX")
124 | 		return
125 | 	}
126 | 
127 | 	// Use the commit/rollback in a defer pattern described in:
128 | 	// https://stackoverflow.com/questions/16184238/database-sql-tx-detecting-commit-or-rollback
129 | 	defer func() {
130 | 		if err != nil {
131 | 			// https://go.dev/doc/database/execute-transactions
132 | 			// If the transaction succeeds, it will be committed before the function exits, making the deferred rollback call a no-op.
133 | 			e := tx.Rollback()
134 | 			if e != nil {
135 | 				app.logger.Error("tx.Rollback in vote", e)
136 | 			}
137 | 			return
138 | 		}
139 | 		err = tx.Commit() // here we are setting the return value err
140 | 		if err != nil {
141 | 			return
142 | 		}
143 | 	}()
144 | 
145 | 	res, err := tx.Stmt(insertVoteStmt).ExecContext(ctx, userID, storyID, direction)
146 | 	if err != nil {
147 | 		return 0, 0, errors.Wrap(err, "insertVoteStmt")
148 | 	}
149 | 
150 | 	rows, _ := res.RowsAffected()
151 | 	if rows == 0 {
152 | 		Debugf(app.logger, "Duplicate vote %#v, %#v", rows, e)
153 | 	} else {
154 | 		Debugf(app.logger, "Inserted vote statement %v, %d, %d", userID, storyID, direction)
155 | 	}
156 | 
157 | 	row := tx.Stmt(getLastVoteStatement).QueryRowContext(ctx, userID, storyID, direction)
158 | 	var entryUpvotes int
159 | 	var entryExpectedUpvotes float64
160 | 	var entryTime int64
161 | 	err = row.Scan(&entryUpvotes, &entryExpectedUpvotes, &entryTime)
162 | 	if err != nil {
163 | 		return 0, 0, errors.Wrapf(err, "getLastVoteStatement %v %d %d", userID, storyID, direction)
164 | 	}
165 | 	entryUpvoteRate := defaultModelParams.upvoteRate(entryUpvotes, entryExpectedUpvotes)
166 | 
167 | 	return entryUpvoteRate, entryTime, nil
168 | }
169 | 
170 | func (app app) voteHandler() func(http.ResponseWriter, *http.Request, voteParams) error {
171 | 	return func(w http.ResponseWriter, r *http.Request, p voteParams) error {
172 | 		userID := app.getUserID(r)
173 | 
174 | 		if !userID.Valid {
175 | 			return httperror.PublicErrorf(http.StatusUnauthorized, "not logged in")
176 | 		}
177 | 
178 | 		w.Header().Set("Content-Type", "application/json; charset=utf-8")
179 | 
180 | 		storyID := p.StoryID
181 | 		if storyID <= 0 {
182 | 			return fmt.Errorf("Invalid story ID %d", storyID)
183 | 		}
184 | 		Debugf(app.logger, "SToryID %d", storyID)
185 | 
186 | 		direction := p.Direction
187 | 		if direction < -1 || direction > 1 {
188 | 			return fmt.Errorf("Invalid direction %d", direction)
189 | 		}
190 | 
191 | 		var b []byte
192 | 		var err error
193 | 		entryUpvoteRate, _, err := app.vote(r.Context(), userID.Int64, storyID, direction)
194 | 
195 | 		var response voteResponse
196 | 
197 | 		if err != nil {
198 | 			app.logger.Error("Writing error response", err)
199 | 			response = voteResponse{Error: "Internal error"}
200 | 		} else {
201 | 			response = voteResponse{EntryUpvoteRate: entryUpvoteRate}
202 | 		}
203 | 
204 | 		b, err = json.Marshal(response)
205 | 		if err != nil {
206 | 			_, _ = w.Write([]byte(`{error: "internal error marshaling response"}`))
207 | 			return errors.Wrap(err, "Marshaling voteResponse")
208 | 		}
209 | 		_, err = w.Write(b)
210 | 		return errors.Wrap(err, "writing HTTP response")
211 | 	}
212 | }
213 | 


--------------------------------------------------------------------------------
/voting-notes.md:
--------------------------------------------------------------------------------
  1 | # Voting Notes
  2 | 
  3 | ## TODO
  4 | 
  5 | Disable upvote buttons for jobs
  6 | 
  7 | ## Login/Logout
  8 | 
  9 | I have created simple login/logout functionality:
 10 | 	
 11 | 	Login with random user ID: 	
 12 | 		/login
 13 | 	Login with specific user user ID: 
 14 | 		/login?userID=1234
 15 | 	Logout user: 
 16 | 		/logout
 17 | 
 18 | If you are logged-in, your user-id will be shown on the top right, and upvote/downvote buttons will be shown next to stories. 
 19 | 
 20 | You can toggle a vote to clear the vote. Switching from upvote to downvote or vice versa first clears the current vote.
 21 | 
 22 | ## Votes and Positions Tables
 23 | 
 24 | The `votes` table has one entry for each change of position (from upvoted to cleared, cleared to upvoted, downvoted to upvoted, etc.)
 25 | 
 26 | The `positions` view is like the `votes` table, but it does not contain a record for when a vote is cleared. Instead, it contains one record for each upvote/downvote, along with score/price details for the moment the upvote/downvote happened, and then the moment that the position was exited, (the moment the the vote was cleared, if any).
 27 | 
 28 | ## Scoring
 29 | 
 30 | Voting is like buying a stock. Your score is based on your entry price and your final price, which is either the exit price (if you exited the position), or the current price. 
 31 | 
 32 | If the final price is greater then the entry price, you gain points, if it is less, you lose. There are a couple of different scoring formulas. 
 33 | 
 34 | ## Score Page
 35 | 
 36 | The score page is at:
 37 | 
 38 | 	/score
 39 | 
 40 | The score page shows each "position". Since a user can enter/exit a position on a story multiple times, a story might be shown multiple times. The users total score is the sum of the score for all positions.
 41 | 
 42 | You can look at the score for a particular user:
 43 | 
 44 | 		/score?userID=1234
 45 | 
 46 | You can also use different scoring formulas
 47 | 
 48 | 		/score?scoringFormula=InformationGain
 49 | 		/score?scoringFormula=PTS     	# Peer Truth-Serum
 50 | 		/score?scoringFormula=LogPTS    # Default Formula: Log Peer Truth-Serum
 51 | 
 52 | And change the model parameters, the most important of which is the priorWeight
 53 | 
 54 | 		/score?priorWeight = 3.5
 55 | 
 56 | ## Baseline User IDs
 57 | 
 58 | UserID 0 randomly votes on stories on the new page.
 59 | 
 60 | 		/score?userID=0
 61 | 
 62 | UserID 1 randomly votes on stories on the front page.
 63 | 
 64 | 		/score?userID=1
 65 | 
 66 | Upvotes all new stories immediately (on first crawl where they appear)
 67 | 
 68 | 		/score?userID=2
 69 | 
 70 | Downvotes all new stories immediately
 71 | 
 72 | 		/score?userID=3
 73 | 
 74 | 
 75 | ## IMPORTANT FINDINGS
 76 | 
 77 | - We need to constantly tune priorWeight so that the results of random voting average to 0.
 78 | 	We get different results for the total score for userID 0 than we get from compare-against-random-voter. This is because userID 0 has a starting price that is generally slightly smaller than the priorAverage, because for some stories we accumulate some attention in the first data point. So userID 0 waits for the first data point and then votes, thereby getting in at a slightly lower price. We want this not to be a viable strategy, and it seems we can do this if we just tune down the priorWeight.
 79 | 
 80 | - The scoring formula seems to work best if the user's vote is not counted in upvote rate calculations -- either in the final upvote rate, or in the entry upvote rate. This means if users vote through our platform, we need to count the entry upvote rate **before** their vote. Then, we need to subtract their vote out when calculating final upvote rate.
 81 | 	- Why is this? One, intuitively, the scoring formula seems to give me more reasonable (e.g. higher) scores this way
 82 | 	- Two, it is closer to PTS, where neither the numerator nor the denominator factor in the user's vote.
 83 | 
 84 | 
 85 | # Information-Theory Scoring Formula
 86 | 
 87 | Okay, let's say the user provides information that increases the upvote rate from R to S.
 88 | 
 89 | views = W = A*n
 90 | upvotes = U = A*n*p
 91 | upvoteRate = R = U/A and thus 
 92 | R = np
 93 | 
 94 | 
 95 | The total surprise is expected value of surprise times the number of views
 96 | 
 97 | 	A*n * (p * log(p) + (1 - p) * log(1-p))
 98 | 
 99 | If p is the posterior probability (before user's vote), and q is the prior probability, then the surprise from the fully-informed point of view (whatever we call it), that is the expected value (over p) of the surprise of q, is 
100 | 
101 | 	A*n * (p * log(q) + (1 - p) * log(1-q))
102 | 
103 | And the difference is 
104 | 
105 | 	A*n * (p * log(p) + (1 - p) * log(1-p))
106 | 	- A*n * (p * log(q) + (1 - p) * log(1-q))
107 | 	= A*n * p*log(p/q) + (1-p)log((1-p)/(1-q))
108 | 
109 | 
110 | Which is the KL divergence times number of views
111 | 
112 | A*n * Dkl(p||q) 
113 | 	= A*n * (p * log(p/q) + (1 - p) * log((1-p)/(q-q)))
114 | 
115 | Now given 
116 | 
117 | 	R = pn
118 | 	S = qn
119 | 	p = R/n
120 | 	q = S/n
121 | 
122 | We can rewrite that as
123 | 
124 | 	= A ( n*p*log(p/q) + n(1 - p)log((1-p)/(1-q)) )
125 | 	= A ( Rlog(R/n / S/n) + n*log((1-R/n)/(1-S/n)) - R*log((1-R/n)/(1-S/n)) )
126 | 	= A ( Rlog(R/S) + n*log((1-R/n)/(1-S/n)) - R*log((n-R)/(n-S)) )
127 | 	= A ( Rlog(R/S) + n*log(1-R/n) - n*log(1-S/n) - R*log((n-R)/(n-S)) )
128 | 
129 | Now we want to find the limit of this as n approaches infinity.
130 | 
131 | 	(n-R)/(n-S) approaches 1, therefore log( (n-R)/(n-S) ) approaches 0
132 | 
133 | So now we want is
134 | 
135 | 	lim{n->∞} A ( Rlog(R/S) + n*log(1-R/n) - n*log(1-S/n) )
136 | 
137 | As n approaches infinity
138 | 
139 | Now here is a key insight!
140 | 
141 | lim{n->∞} n * ln(1 - c/n) = -c
142 | 
143 | So converting our formula to use natural logarithm
144 | 
145 | 	lim{n->∞}  ( R×ln(R/S) + n×ln(1 − R/n) − n×ln(1 − S/n) ) * A / ln(2)
146 | 
147 | And using substituting lim{n->∞} n×ln(1 − S/n) = -S and lim{n->∞} n×ln(1 − R/n) = -R
148 | 
149 | 	lim{n->∞}  ( R×ln(R/S) -R + S ) * A / ln(2)
150 | 
151 | 	= ( R×ln(R/S) - R + S ) * A / ln(2)
152 | 
153 | 
154 | 	= ( R(ln(R/S) - 1) + S ) * A / ln(2)
155 | 
156 | 	if R = U/A and S = V/A
157 | 
158 | 	= ( U(ln(R/S) - 1) + V ) / ln(2)
159 | 
160 | 
161 | 	= A * (R*log(R/S) + (S-R)/ln(2) )
162 | 
163 | Okay now the idea is that this is the **total** information value of all upvotes. Each individual upvotes incrementally changes the estimated upvote from Rj to Rk. The final upvote rate is R, the final probability is p. The final views are An. That information gain is mutiplied by all **subsequent** views, which is (A - An)n. 
164 | 	if j = k-1
165 | 
166 | 	(A - Ak)n * (p * log(pk) + (1 - p) * log(1-pk))
167 | 	- (A - Ak)n * (p * log(pj) + (1 - p) * log(1-pj))
168 | 	= (A - Ak)n*n * p*log(pk/pj) + (1-p)log((1-pk)/(1-pj))
169 | 
170 | 	= (A - Ak) ( Rlog(Rk/Rj) + n*log(1-Rk/n) - n*log(1-Rj/n) - R*log((n-Rk)/(n-Rj)) )
171 | 
172 | 	lim{n->∞} of that is
173 | 
174 | 	= (A - Ak) ( Rlog(Rk/Rj) + n*log(1-Rk/n) - n*log(1-Rj/n)  )
175 | 	= (A - Ak) ( Rln(Rk/Rj) + n*ln(1-Rk/n) - n*ln(1-Rj/n) ) / ln(2)
176 | 
177 | 	= (A - Ak) ( Rlog(Rk/Rj) + (Rj - Rk)/ln2 )
178 | 
179 | Or we can go with the KL divergence between two poisson distributions, which is:
180 | 
181 | https://stats.stackexchange.com/questions/145789/kl-divergence-between-two-univariate-poisson-distributions
182 | 	
183 | 𝐷ₖₗ(𝑓₁||𝑓₂)=𝜆₁log(𝜆₁𝜆₂)+𝜆₂−𝜆₁
184 | 
185 | ----
186 | 
187 | 
188 | Okay but how do we convert this to value created? 
189 | 
190 | 
191 | Do we credit users for more value creation for upvoting stories that ultimately get a lot of upvotes? I think not actually. The value created on the home page is a result of all the information provided. 
192 | 
193 | So I think we should look at total value created during some period of time, and give credit to users proportionally to the amount of information they provided for that period of time.
194 | 
195 | 
196 | 


--------------------------------------------------------------------------------
/watch.sh:
--------------------------------------------------------------------------------
1 | if which humanlog ; then
2 | 	LOGFORMATTER="| humanlog --truncate=0"
3 | fi
4 | 
5 | ls *.go **/**.tmpl **/**.sql | entr -ncr sh -c "go install; go run *.go $LOGFORMATTER"
6 | 


--------------------------------------------------------------------------------