├── end_to_end_tests ├── .ruby-version ├── .solargraph.yml ├── Gemfile ├── Rakefile ├── .rubocop.yml ├── Makefile ├── test │ ├── search_engines.rb │ └── extension_popup.rb ├── Gemfile.lock └── setup.rb ├── extension ├── src │ ├── help │ │ ├── help.ts │ │ ├── help.scss │ │ └── help.html │ ├── icons │ │ ├── green_heart-48.png │ │ ├── green_heart-72.png │ │ └── green_heart-128.png │ ├── popup │ │ ├── controllers │ │ │ ├── help_button_controller.ts │ │ │ ├── settings_page_controller.ts │ │ │ ├── icon_setting_controller.ts │ │ │ └── voting_button_controller.ts │ │ ├── popup.ts │ │ ├── popup.scss │ │ └── popup.html │ ├── background_scripts │ │ └── index.ts │ ├── types.ts │ ├── api.ts │ ├── settings.ts │ ├── content_scripts │ │ └── index.ts │ ├── settings.test.ts │ ├── search_engine.ts │ ├── manifest.firefox.json │ └── manifest.chrome.json ├── tsconfig.json ├── .eslintrc.cjs ├── package.json ├── webpack.config.js └── Makefile ├── backend ├── lambda │ ├── .gitignore │ ├── data │ │ └── vote.json │ ├── Makefile │ ├── Cargo.toml │ └── src │ │ ├── scoring.rs │ │ ├── main.rs │ │ ├── routes.rs │ │ ├── validate.rs │ │ ├── types.rs │ │ └── dynamodb.rs ├── samconfig.production.toml ├── samconfig.development.toml ├── pyproject.toml ├── fixtures │ └── database.yaml ├── process_submissions.py ├── README.md ├── test_scraper.py ├── Makefile ├── hacker_news_scraper.py ├── template.development.yaml ├── template.production.yaml ├── integration_tests │ └── test_backend.py ├── config │ └── Database-NoSQLWorkbench-Model.json └── database.py ├── docs ├── assets │ ├── button_edge.png │ ├── button_opera.png │ ├── button_chrome.png │ ├── button_firefox.png │ ├── URL_description.png │ ├── screenshot_popup_main.png │ ├── screenshot_popup_settings.png │ ├── chrome │ │ ├── screenshot_popup_main.jpg │ │ └── screenshot_good_and_bad_links.jpg │ └── screenshot_good_and_bad_links_cropped.jpg ├── privacy.md ├── journal.md ├── contributing.md └── architecture.md ├── sonar-project.properties ├── .gitignore ├── Makefile ├── .github └── workflows │ └── continuous-integration.yml └── README.md /end_to_end_tests/.ruby-version: -------------------------------------------------------------------------------- 1 | 3.1.2 2 | -------------------------------------------------------------------------------- /extension/src/help/help.ts: -------------------------------------------------------------------------------- 1 | import "./help.scss"; 2 | import "bootstrap"; 3 | -------------------------------------------------------------------------------- /backend/lambda/.gitignore: -------------------------------------------------------------------------------- 1 | # Needed so that `cargo lambda start` doesn't fall over 2 | target 3 | -------------------------------------------------------------------------------- /docs/assets/button_edge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/button_edge.png -------------------------------------------------------------------------------- /docs/assets/button_opera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/button_opera.png -------------------------------------------------------------------------------- /docs/assets/button_chrome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/button_chrome.png -------------------------------------------------------------------------------- /docs/assets/button_firefox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/button_firefox.png -------------------------------------------------------------------------------- /docs/assets/URL_description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/URL_description.png -------------------------------------------------------------------------------- /end_to_end_tests/.solargraph.yml: -------------------------------------------------------------------------------- 1 | --- 2 | include: 3 | - "**/*.rb" 4 | require: [] 5 | domains: [] 6 | reporters: 7 | - rubocop -------------------------------------------------------------------------------- /extension/src/help/help.scss: -------------------------------------------------------------------------------- 1 | @import "~bootstrap/scss/bootstrap"; 2 | @import "~bootstrap-icons/font/bootstrap-icons.css"; 3 | -------------------------------------------------------------------------------- /docs/assets/screenshot_popup_main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/screenshot_popup_main.png -------------------------------------------------------------------------------- /extension/src/icons/green_heart-48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/extension/src/icons/green_heart-48.png -------------------------------------------------------------------------------- /extension/src/icons/green_heart-72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/extension/src/icons/green_heart-72.png -------------------------------------------------------------------------------- /extension/src/icons/green_heart-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/extension/src/icons/green_heart-128.png -------------------------------------------------------------------------------- /docs/assets/screenshot_popup_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/screenshot_popup_settings.png -------------------------------------------------------------------------------- /docs/assets/chrome/screenshot_popup_main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/chrome/screenshot_popup_main.jpg -------------------------------------------------------------------------------- /docs/assets/chrome/screenshot_good_and_bad_links.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/chrome/screenshot_good_and_bad_links.jpg -------------------------------------------------------------------------------- /docs/assets/screenshot_good_and_bad_links_cropped.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tom-barone/discontent/HEAD/docs/assets/screenshot_good_and_bad_links_cropped.jpg -------------------------------------------------------------------------------- /backend/lambda/data/vote.json: -------------------------------------------------------------------------------- 1 | { 2 | "link": { 3 | "hostname": "abc.com" 4 | }, 5 | "vote_value": 1, 6 | "user_id": "b4a70900-1c86-4dd7-8d38-0dc05bfb1e0a" 7 | } 8 | -------------------------------------------------------------------------------- /extension/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "sourceMap": true, 4 | "strict": true, 5 | "target": "es6", 6 | "moduleResolution": "node" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /extension/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended"], 3 | parser: "@typescript-eslint/parser", 4 | plugins: ["@typescript-eslint"], 5 | root: true, 6 | }; 7 | -------------------------------------------------------------------------------- /end_to_end_tests/Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | gem 'capybara' 6 | gem 'debug', '>= 1.0.0' 7 | gem 'minitest' 8 | gem 'rake' 9 | gem 'rubocop' 10 | gem 'selenium-webdriver' 11 | gem 'solargraph' 12 | -------------------------------------------------------------------------------- /end_to_end_tests/Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'minitest/test_task' 4 | 5 | # Setup the `rake test` command 6 | Minitest::TestTask.create(:test) do |t| 7 | t.libs << 'test' 8 | t.warning = false 9 | t.test_globs = ['test/**/*.rb'] 10 | end 11 | 12 | task default: :test 13 | -------------------------------------------------------------------------------- /extension/src/popup/controllers/help_button_controller.ts: -------------------------------------------------------------------------------- 1 | import * as browser from "webextension-polyfill"; 2 | import { Controller } from "@hotwired/stimulus"; 3 | 4 | export default class extends Controller { 5 | open_help_page() { 6 | browser.tabs.create({ 7 | url: browser.runtime.getURL("/help/help.html"), 8 | }); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /end_to_end_tests/.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | TargetRubyVersion: 3.1.2 3 | 4 | Style/Documentation: 5 | Exclude: 6 | - "**/*.rb" 7 | Metrics/MethodLength: 8 | Exclude: 9 | - "**/*.rb" 10 | Metrics/AbcSize: 11 | Exclude: 12 | - "**/*.rb" 13 | Metrics/ClassLength: 14 | Exclude: 15 | - "**/*.rb" 16 | Metrics/BlockLength: 17 | Exclude: 18 | - "**/*.rb" 19 | -------------------------------------------------------------------------------- /backend/samconfig.production.toml: -------------------------------------------------------------------------------- 1 | version = 0.1 2 | [default] 3 | [default.deploy] 4 | [default.deploy.parameters] 5 | stack_name = "discontent-production" 6 | s3_bucket = "aws-sam-cli-managed-default-samclisourcebucket-1hrien2b75gym" 7 | s3_prefix = "discontent-production" 8 | region = "us-east-1" 9 | confirm_changeset = true 10 | capabilities = "CAPABILITY_IAM" 11 | image_repositories = [] 12 | -------------------------------------------------------------------------------- /backend/samconfig.development.toml: -------------------------------------------------------------------------------- 1 | version = 0.1 2 | [default] 3 | [default.deploy] 4 | [default.deploy.parameters] 5 | stack_name = "discontent-development" 6 | s3_bucket = "aws-sam-cli-managed-default-samclisourcebucket-1hrien2b75gym" 7 | s3_prefix = "discontent-development" 8 | region = "us-east-1" 9 | confirm_changeset = false 10 | capabilities = "CAPABILITY_IAM" 11 | image_repositories = [] 12 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | # This is the name and version displayed in the SonarCloud UI. 2 | sonar.projectName=Discontent 3 | sonar.projectKey=tom-barone_Discontent 4 | sonar.organization=tom-barone 5 | 6 | sonar.python.version=3.10 7 | 8 | sonar.sources=extension/src,backend/lambda/src 9 | sonar.exclusions=**/*.test.ts 10 | sonar.tests=end_to_end_tests/test,backend/integration_tests 11 | sonar.tests.inclusions=**/*.test.ts 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | .aws-sam 17 | build 18 | samconfig.toml 19 | 20 | .env 21 | dist/ 22 | node_modules/ 23 | seed/ 24 | __pycache__/ 25 | docker/ 26 | ci/ 27 | tmp/ 28 | -------------------------------------------------------------------------------- /docs/privacy.md: -------------------------------------------------------------------------------- 1 | # Privacy 2 | 3 | This extension stores no Personally Identifiable Information. 4 | 5 | When you install the extension and before submitting a vote, a randomly generated [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) is created for you and saved locally to your browser storage. 6 | 7 | When you vote, the following data is sent and stored in the database: 8 | - The URL of the page you are voting on. 9 | - Your vote value (+1 or -1). 10 | - The user UUID. 11 | - The timestamp (e.g. `2023-02-02T09:36:03Z`). 12 | 13 | Data is only sent and stored when you click a vote button. 14 | 15 | Icon preferences are stored locally in your browser storage. 16 | -------------------------------------------------------------------------------- /backend/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "discontent-backend" 3 | version = "0.1.0" 4 | description = "Python environment to manage the Discontent backend" 5 | authors = ["Tom Barone "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.10" 10 | fire = "^0.5.0" 11 | requests = "^2.28.2" 12 | beautifulsoup4 = "^4.11.1" 13 | tqdm = "^4.64.1" 14 | fake-useragent = "^1.1.1" 15 | pandas = "^1.5.3" 16 | boto3 = "^1.24.66" 17 | aiohttp = "^3.8.3" 18 | asyncio = "^3.4.3" 19 | cfn-flip = "^1.3.0" 20 | pytest = "^7.2.2" 21 | amazon-ion = "~0.12.0" 22 | 23 | 24 | [build-system] 25 | requires = ["poetry-core"] 26 | build-backend = "poetry.core.masonry.api" 27 | -------------------------------------------------------------------------------- /extension/src/popup/popup.ts: -------------------------------------------------------------------------------- 1 | // src/application.js 2 | import { Application } from "@hotwired/stimulus"; 3 | import IconSettingController from "./controllers/icon_setting_controller"; 4 | import SettingsPageController from "./controllers/settings_page_controller"; 5 | import VotingButton from "./controllers/voting_button_controller"; 6 | import HelpButton from "./controllers/help_button_controller"; 7 | import "./popup.scss"; 8 | import "bootstrap"; 9 | 10 | window.Stimulus = Application.start(); 11 | window.Stimulus.register("icon-setting", IconSettingController); 12 | window.Stimulus.register("settings-page", SettingsPageController); 13 | window.Stimulus.register("voting-button", VotingButton); 14 | window.Stimulus.register("help-button", HelpButton); 15 | -------------------------------------------------------------------------------- /end_to_end_tests/Makefile: -------------------------------------------------------------------------------- 1 | ifneq (,$(wildcard ../.env)) 2 | include ../.env 3 | export 4 | endif 5 | 6 | # Primary targets 7 | 8 | install: 9 | bundle install 10 | 11 | test: guard-BROWSERS_TO_TEST guard-CHROME_EXTENSION_ID guard-FIREFOX_EXTENSION_ID install 12 | cd ../extension && $(MAKE) build 13 | cd ../backend && $(MAKE) dev 14 | @echo "Check that the backend is running" 15 | @curl localhost:9000 || (echo "No local backend is running!" && exit 1) 16 | mkdir -p ci/screenshots 17 | bundle exec rake test > ci/test.log 18 | cat ci/test.log 19 | cd ../backend && $(MAKE) stop 20 | 21 | clean: 22 | rm -rf ci 23 | 24 | # Guard to fail the make target if the specified env variable doesn't exist 25 | # https://lithic.tech/blog/2020-05/makefile-wildcards 26 | guard-%: 27 | @if [ -z '${${*}}' ]; then echo 'ERROR: variable $* not set' && exit 1; fi 28 | -------------------------------------------------------------------------------- /backend/fixtures/database.yaml: -------------------------------------------------------------------------------- 1 | # Typing `github en.wikipedia twitter` into the main search engines consistently 2 | # returns the same results. Makes for good test fixtures: 3 | # - en.wikipedia.org 4 | # - github.com 5 | # - twitter.com 6 | # Use a search like `reddit quora` to be guaranteed some results for 7 | # - www.reddit.com 8 | # - www.quora.com 9 | development_links: 10 | - link: "en.wikipedia.org" # Good 11 | sum_of_votes: 20 12 | count_of_votes: 20 13 | - link: "github.com" # Controversial 14 | sum_of_votes: 10 15 | count_of_votes: 52 16 | - link: "twitter.com" # Bad 17 | sum_of_votes: -20 18 | count_of_votes: 20 19 | - link: "www.reddit.com" # NoScore - with some votes 20 | sum_of_votes: 2 21 | count_of_votes: 2 22 | 23 | settings: 24 | voting_is_disabled: false 25 | maximum_votes_per_user_per_day: 10 26 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Check if a .env file exists, and then load it 2 | ifneq (,$(wildcard ./.env)) 3 | include .env 4 | export 5 | endif 6 | 7 | # Primary targets 8 | 9 | dev: stop 10 | cd backend && $(MAKE) dev # will run services as background processes 11 | cd extension && $(MAKE) dev 12 | 13 | build: 14 | cd extension && $(MAKE) build 15 | cd backend && $(MAKE) build 16 | 17 | stop: 18 | cd backend && $(MAKE) stop # will stop the background processes 19 | 20 | test: 21 | cd extension && $(MAKE) test 22 | cd backend && $(MAKE) test 23 | cd end_to_end_tests && $(MAKE) test 24 | @echo 'Tests succeeded' 25 | 26 | # Secondary targets 27 | 28 | # Guard to fail the make target if the specified env variable doesn't exist 29 | # https://lithic.tech/blog/2020-05/makefile-wildcards 30 | guard-%: 31 | @if [ -z '${${*}}' ]; then echo 'ERROR: variable $* not set' && exit 1; fi 32 | -------------------------------------------------------------------------------- /extension/src/popup/controllers/settings_page_controller.ts: -------------------------------------------------------------------------------- 1 | import { Controller } from "@hotwired/stimulus"; 2 | import { DEFAULT_ICONS } from "../../settings"; 3 | 4 | export default class extends Controller { 5 | static targets = ["goodInput", "controversialInput", "badInput"]; 6 | declare readonly goodInputTarget: HTMLInputElement; 7 | declare readonly controversialInputTarget: HTMLInputElement; 8 | declare readonly badInputTarget: HTMLInputElement; 9 | 10 | reset() { 11 | this.goodInputTarget.value = DEFAULT_ICONS.good; 12 | this.controversialInputTarget.value = DEFAULT_ICONS.controversial; 13 | this.badInputTarget.value = DEFAULT_ICONS.bad; 14 | 15 | this.goodInputTarget.dispatchEvent(new Event("input")); 16 | this.controversialInputTarget.dispatchEvent(new Event("input")); 17 | this.badInputTarget.dispatchEvent(new Event("input")); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /extension/src/background_scripts/index.ts: -------------------------------------------------------------------------------- 1 | import * as browser from "webextension-polyfill"; 2 | import { fetchScores } from "../api"; 3 | import { Message } from "../types"; 4 | 5 | browser.runtime.onMessage.addListener( 6 | async (message: Message): Promise => { 7 | switch (message.type) { 8 | case "ScoresRequest": 9 | return fetchScores(message.data).then((scores) => ({ 10 | type: "ScoresResponse", 11 | data: scores, 12 | })); 13 | default: 14 | // TODO proper error handling 15 | } 16 | return Promise.reject("Unknown message type"); 17 | } 18 | ); 19 | 20 | // Display the help page when newly installed 21 | browser.runtime.onInstalled.addListener((details) => { 22 | if (details.reason === "install") { 23 | // Open the help page 24 | browser.tabs.create({ 25 | url: browser.runtime.getURL("/help/help.html"), 26 | }); 27 | } 28 | }); 29 | -------------------------------------------------------------------------------- /backend/lambda/Makefile: -------------------------------------------------------------------------------- 1 | ifneq (,$(wildcard ../../.env)) 2 | include ../../.env 3 | export 4 | endif 5 | 6 | dev: guard-TABLE_NAME guard-LOG_LEVEL guard-USE_LOCAL_DATABASE guard-RANDOMIZE_SCORES 7 | cargo build 8 | @echo "Start the lambda locally" 9 | # Send a ping after 3 seconds to the cargo-lambda server to wake it up 10 | sleep 3 && curl "http://localhost:9000/lambda-url/request-handler/v1" & 11 | cargo lambda watch --no-reload 12 | 13 | build: 14 | pipx install cargo-lambda 15 | cargo build 16 | cargo lambda build --release --arm64 17 | 18 | stop: 19 | @echo "Force stopping the lambda" 20 | pkill cargo-lambda || true 21 | 22 | clean: 23 | cargo clean 24 | 25 | test: 26 | cargo test -- --nocapture 27 | 28 | # Guard to fail the make target if the specified env variable doesn't exist 29 | # https://lithic.tech/blog/2020-05/makefile-wildcards 30 | guard-%: 31 | @if [ -z '${${*}}' ]; then echo 'ERROR: variable $* not set' && exit 1; fi 32 | -------------------------------------------------------------------------------- /backend/lambda/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "request-handler" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # Used for `cargo build` 7 | [profile.dev] 8 | opt-level = 0 9 | debug = true 10 | 11 | # Used for `cargo build --release` 12 | [profile.release] 13 | opt-level = 3 14 | debug = false 15 | 16 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 17 | 18 | [dependencies] 19 | aws-config = "0.54.1" 20 | aws-sdk-dynamodb = "0.24" 21 | tokio = { version = "1", features = ["full"] } 22 | lambda_runtime = "0.7.3" 23 | serde_json = "1.0.91" 24 | lambda_http = "0.7.3" 25 | tracing = "0.1.37" 26 | tracing-subscriber = { version = "0.3.16", features = ["env-filter"] } 27 | serde = { version = "1.0.152", features = ["derive"] } 28 | validator = { version = "0.16", features = ["derive"] } 29 | uuid = { version = "1.3.0", features = ["serde", "v4"] } 30 | lazy_static = "1.4.0" 31 | regex = "1.7.1" 32 | chrono = "0.4.23" 33 | futures = "0.3" 34 | rand = "0.8.5" 35 | -------------------------------------------------------------------------------- /end_to_end_tests/test/search_engines.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../setup' 4 | 5 | class TestSearchEngines < CapybaraTestCase 6 | def check_search(domain) 7 | visit("https://#{domain}=site%3Aen.wikipedia.org") 8 | refresh 9 | assert_text(:all, /💚 .+/) 10 | visit("https://#{domain}=site%3Agithub.com") 11 | refresh 12 | assert_text(:all, /🤨 .+/) 13 | visit("https://#{domain}=site%3Atwitter.com") 14 | refresh 15 | assert_text(:all, /❌ .+/) 16 | end 17 | 18 | def prepare(browser) 19 | Capybara.current_driver = browser 20 | sleep(5) # Give the browser some time to load 21 | 22 | # Create a new window for the visits because of tab weirdness and the 23 | # extension auto opening the help page 24 | new_window = open_new_window 25 | switch_to_window new_window 26 | sleep(5) 27 | end 28 | 29 | BROWSERS_TO_TEST.each do |browser| 30 | define_method("test_#{browser}_google_displays_icons_on_links") do 31 | prepare(browser) 32 | # Google 33 | check_search('www.google.com/search?q') 34 | check_search('www.google.it/search?q') 35 | check_search('www.google.com.au/search?q') 36 | end 37 | define_method("test_#{browser}_bing_displays_icons_on_links") do 38 | prepare(browser) 39 | # Bing 40 | check_search('www.bing.com/search?q') 41 | end 42 | define_method("test_#{browser}_duckduckgo_displays_icons_on_links") do 43 | prepare(browser) 44 | # DuckDuckGo 45 | check_search('duckduckgo.com/?q') 46 | check_search('html.duckduckgo.com/html/?q') 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /extension/src/types.ts: -------------------------------------------------------------------------------- 1 | import { SearchEngineLink } from "./search_engine"; 2 | import { Browser } from "webextension-polyfill"; 3 | import { Application } from "@hotwired/stimulus"; 4 | 5 | declare global { 6 | // So typescript doesn't complain about tiny `hasRun` boolean 7 | interface Window { 8 | hasRun: boolean; 9 | Stimulus: Application; 10 | } 11 | } 12 | 13 | export interface ScoresRequestMessage { 14 | type: "ScoresRequest"; 15 | data: ScoresRequest; 16 | } 17 | 18 | export interface ScoresResponseMessage { 19 | type: "ScoresResponse"; 20 | data: ScoresResponse; 21 | } 22 | 23 | export interface BingReferralMessage { 24 | type: "BingReferral"; 25 | data: string; 26 | } 27 | 28 | export type Message = 29 | | ScoresRequestMessage 30 | | ScoresResponseMessage 31 | | BingReferralMessage; 32 | 33 | export enum Score { 34 | Good = "Good", 35 | Bad = "Bad", 36 | Controversial = "Controversial", 37 | NoScore = "NoScore", 38 | } 39 | 40 | export interface Link { 41 | hostname: string; 42 | } 43 | 44 | export interface LinkScore { 45 | link: Link; 46 | score: Score; 47 | } 48 | 49 | export class ScoresRequest { 50 | public json: string; 51 | 52 | constructor(search_engine_links: SearchEngineLink[]) { 53 | // Use a set so that duplicate links are removed 54 | const links = new Set( 55 | search_engine_links.map( 56 | (search_engine_link) => search_engine_link.link.hostname 57 | ) 58 | ); 59 | this.json = JSON.stringify({ 60 | links: [...links].map((link) => ({ hostname: link })), 61 | }); 62 | } 63 | } 64 | 65 | export type ScoresResponse = { 66 | [key: string]: Score; 67 | }; 68 | -------------------------------------------------------------------------------- /backend/process_submissions.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import glob 3 | import pandas as pd 4 | import csv 5 | from urllib.parse import urlparse 6 | 7 | 8 | def process_submissions(input_files): 9 | """ 10 | Process submissions from Hacker News 11 | 12 | This script takes the output from the hacker_news_scraper and processes it 13 | into something that Discontent can use. The steps are: 14 | 1. Read the CSV and merge them into a single pandas DataFrame 15 | 2. Map all the urls to just the domain part 16 | 3. Combine duplicate submissions and sum their votes (drop the date column) 17 | 4. Scale the votes so we get results between 20 & 1000 18 | 4. Output the results to a CSV 19 | 20 | Example usage: 21 | process_submissions.py --input_files="output/submissions_*.csv" 22 | 23 | """ 24 | submission_files = glob.glob(input_files) 25 | 26 | # Step 1 27 | df = pd.concat([ 28 | pd.read_csv(f, names=['date', 'link', 'votes']) 29 | for f in submission_files 30 | ]) 31 | 32 | # Step 2 33 | df['link'] = df['link'].map(lambda l: urlparse(l).hostname) 34 | 35 | # Step 3 36 | df = df.groupby(['link'])['votes'].sum().to_frame() 37 | df = df.sort_values(by='votes', ascending=False) 38 | 39 | # Step 4 40 | new_min = 25 41 | new_max = 100 42 | current_min = df['votes'].min() 43 | current_max = df['votes'].max() 44 | df['scaled_votes'] = ((new_max - new_min) * (df['votes'] - current_min) / ( 45 | current_max - current_min) + new_min).astype(int) 46 | 47 | # Step 4 48 | df.to_csv('output/processed_submissions.csv', quoting=csv.QUOTE_ALL) 49 | return 50 | 51 | 52 | if __name__ == '__main__': 53 | # Use python-fire to give nice CLI argument parsing 54 | fire.Fire(process_submissions) 55 | -------------------------------------------------------------------------------- /extension/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Discontent", 3 | "version": "1.0.0", 4 | "description": "Open source browser extension to fight garbage content on the web.", 5 | "author": "Tom Barone", 6 | "license": "GPL-3.0", 7 | "homepage": "https://github.com/tom-barone/Discontent", 8 | "repository": { 9 | "type": "git", 10 | "url": "git+https://github.com/tom-barone/Discontent.git" 11 | }, 12 | "bugs": { 13 | "url": "https://github.com/tom-barone/Discontent/issues" 14 | }, 15 | "devDependencies": { 16 | "@babel/core": "^7.21.0", 17 | "@babel/preset-env": "^7.20.2", 18 | "@babel/preset-typescript": "^7.21.0", 19 | "@types/jest": "^29.4.0", 20 | "@types/uuid": "^9.0.1", 21 | "@types/webextension-polyfill": "^0.10.0", 22 | "@typescript-eslint/eslint-plugin": "^5.53.0", 23 | "@typescript-eslint/parser": "^5.53.0", 24 | "autoprefixer": "^10.4.13", 25 | "babel-jest": "^29.4.3", 26 | "copy-webpack-plugin": "^11.0.0", 27 | "css-loader": "^6.7.3", 28 | "eslint": "^8.34.0", 29 | "jest": "^29.4.1", 30 | "jest-webextension-mock": "^3.8.8", 31 | "postcss-loader": "^7.0.2", 32 | "sass": "^1.57.1", 33 | "sass-loader": "^13.2.0", 34 | "style-loader": "^3.3.1", 35 | "ts-loader": "^9.4.2", 36 | "typescript": "^4.9.5", 37 | "web-ext": "^7.5.0", 38 | "webextension-polyfill": "^0.10.0", 39 | "webpack": "^5.75.0", 40 | "webpack-cli": "^5.0.1" 41 | }, 42 | "dependencies": { 43 | "@hotwired/stimulus": "^3.2.1", 44 | "bootstrap": "^5.3.0-alpha1", 45 | "bootstrap-icons": "^1.10.3", 46 | "uuid": "^9.0.0" 47 | }, 48 | "babel": { 49 | "presets": [ 50 | [ 51 | "@babel/preset-env", 52 | { 53 | "targets": { 54 | "node": "current" 55 | } 56 | } 57 | ], 58 | "@babel/preset-typescript" 59 | ] 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /extension/src/api.ts: -------------------------------------------------------------------------------- 1 | import { Score, ScoresRequest, ScoresResponse } from "./types"; 2 | 3 | const ENDPOINT = process.env.LAMBDA_API_URL; 4 | 5 | export async function fetchScores( 6 | request: ScoresRequest 7 | ): Promise { 8 | const params = new URLSearchParams(); 9 | 10 | params.append("from", request.json); 11 | const url = ENDPOINT + "/scores?" + params; 12 | const response = await fetch(url, { 13 | method: "GET", 14 | }); 15 | const scores = await response.json(); 16 | // Try parse the response into a ScoresResponse 17 | // TODO: Add error handling for parse failures 18 | const result = new Map(); 19 | scores.forEach((link_score: any) => { 20 | result.set(link_score.link.hostname, link_score.score); 21 | }); 22 | return Object.fromEntries(result); 23 | } 24 | 25 | export async function submitVote( 26 | value: 1 | -1, 27 | hostname: string, 28 | user_id: string 29 | ): Promise { 30 | const url = ENDPOINT + "/vote"; 31 | return fetch(url, { 32 | method: "POST", 33 | body: JSON.stringify({ 34 | link: { 35 | hostname, 36 | }, 37 | value, 38 | user_id, 39 | }), 40 | }) 41 | .catch((error) => { 42 | console.error(error); 43 | return Promise.reject("Could not connect to the Discontent API"); 44 | }) 45 | .then(async (response) => { 46 | return [response.status, await response.text()]; 47 | }) 48 | .then(([status, response_object]) => { 49 | if (status === 200) { 50 | return true; 51 | } else { 52 | console.error(`Request error ${status}`, response_object); 53 | // TODO: 54 | // Add nice user formatted messages to the lambda response 55 | // e.g. {error: "", user_message: ""} 56 | // Return the user message here 57 | // console.error the full message 58 | return Promise.reject("Could not submit your vote"); 59 | } 60 | }); 61 | } 62 | -------------------------------------------------------------------------------- /extension/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const webpack = require("webpack"); 3 | const CopyPlugin = require("copy-webpack-plugin"); 4 | 5 | module.exports = (env) => ({ 6 | entry: { 7 | "content_scripts/index": "./src/content_scripts/index.ts", 8 | "background_scripts/index": "./src/background_scripts/index.ts", 9 | "popup/popup": "./src/popup/popup.ts", 10 | "help/help": "./src/help/help.ts", 11 | }, 12 | devtool: "inline-source-map", 13 | module: { 14 | rules: [ 15 | { 16 | test: /\.ts$/, 17 | use: "ts-loader", 18 | exclude: /node_modules/, 19 | }, 20 | { 21 | test: /\.(scss)$/, 22 | use: [ 23 | { 24 | loader: "style-loader", 25 | }, 26 | { 27 | loader: "css-loader", 28 | }, 29 | { 30 | loader: "postcss-loader", 31 | options: { 32 | postcssOptions: { 33 | plugins: () => [require("autoprefixer")], 34 | }, 35 | }, 36 | }, 37 | { 38 | loader: "sass-loader", 39 | }, 40 | ], 41 | }, 42 | ], 43 | }, 44 | resolve: { 45 | extensions: [".ts", ".js"], 46 | }, 47 | output: { 48 | filename: "[name].js", 49 | path: path.resolve(__dirname, `dist/${env.BROWSER}`), 50 | }, 51 | plugins: [ 52 | new webpack.DefinePlugin({ 53 | // Want to be able to access this in the code 54 | "process.env.LAMBDA_API_URL": JSON.stringify(env.LAMBDA_API_URL), 55 | }), 56 | // Copy the manifest.json to the dist folder 57 | new CopyPlugin({ 58 | patterns: [ 59 | { from: `./src/manifest.${env.BROWSER}.json`, to: "manifest.json" }, 60 | { from: "./src/popup/popup.html", to: "popup/popup.html" }, 61 | { 62 | from: "./src/help/help.html", 63 | to: "help/help.html", 64 | }, 65 | { from: "./src/icons", to: "icons" }, 66 | ], 67 | }), 68 | ], 69 | }); 70 | -------------------------------------------------------------------------------- /backend/lambda/src/scoring.rs: -------------------------------------------------------------------------------- 1 | use crate::scoring::Score::*; 2 | use std::collections::HashMap; 3 | 4 | use crate::types::{database::LinkDetail, *}; 5 | 6 | const GOOD_SCORE_BOUND: &i32 = &20; 7 | const BAD_SCORE_BOUND: &i32 = &-10; 8 | 9 | pub fn random_link_scores(links: &Vec) -> Vec { 10 | let score_enums = vec![Good, Bad, Controversial, NoScore]; 11 | let mut scores: Vec = vec![]; 12 | for link in links { 13 | // Choose random score from the enums 14 | let random_score = score_enums 15 | .get(rand::random::() % score_enums.len()) 16 | .unwrap() 17 | .to_owned(); 18 | scores.push(LinkScore::new(link.to_owned(), random_score)); 19 | } 20 | scores 21 | } 22 | 23 | pub fn calculate_link_scores( 24 | links: &Vec, 25 | link_details: &HashMap, 26 | ) -> Vec { 27 | let mut scores: Vec = vec![]; 28 | for link in links { 29 | match link_details.get(&link) { 30 | Some(link_detail) => { 31 | let LinkDetail { 32 | sum_of_votes, 33 | count_of_votes, 34 | .. 35 | } = link_detail; 36 | 37 | if sum_of_votes >= GOOD_SCORE_BOUND { 38 | scores.push(LinkScore::new(link.to_owned(), Score::Good)); 39 | } else if sum_of_votes <= BAD_SCORE_BOUND { 40 | scores.push(LinkScore::new(link.to_owned(), Score::Bad)); 41 | } else if count_of_votes > &50 42 | && sum_of_votes > BAD_SCORE_BOUND 43 | && sum_of_votes < GOOD_SCORE_BOUND 44 | { 45 | scores.push(LinkScore::new(link.to_owned(), Score::Controversial)); 46 | } else { 47 | scores.push(LinkScore::new(link.to_owned(), Score::NoScore)); 48 | } 49 | } 50 | None => { 51 | scores.push(LinkScore::new(link.to_owned(), Score::NoScore)); 52 | } 53 | } 54 | } 55 | scores 56 | } 57 | -------------------------------------------------------------------------------- /backend/README.md: -------------------------------------------------------------------------------- 1 | # Discontent Backend 2 | 3 | The file `Database-NoSQLWorkbench-Model.json` should be opened the [NoSQL Workbench](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/workbench.html) application, and it contains some sample data to help visualise the database. 4 | 5 | The file `Database-CloudFormation-Model.json` is an export from NoSQL Workbench and is used as the schema for AWS Cloudformation. 6 | 7 | # Seeding the database 8 | 9 | The problem we're up against is how to get things started? 10 | 11 | ## Good sites 12 | 13 | Hacker News submissions seemed like a decent starting point for building a set of "good sites". See `hacker_news_scraper.py` for a python script that scrapes the [front page submissions list](https://news.ycombinator.com/lists) from [HackerNews](https://news.ycombinator.com/news) and gets the links & upvotes. 14 | 15 | # Database 16 | 17 | This directory contains python scripts to manage the Discontent DynamoDB. 18 | 19 | 1. Run a local instance of DynamoDB for development and testing 20 | 1. Seed the DynamoDB 21 | 22 | ```bash 23 | poetry run python database.py setup 24 | ``` 25 | will create the database with the latest schema, and initialise it with seed data 26 | 27 | ```bash 28 | poetry run python database.py reset 29 | ``` 30 | 31 | - drop: delete the database 32 | - setup: create and seed 33 | - reset: drop and setup 34 | 35 | 36 | 37 | ## Requirements 38 | 39 | - [Poetry](https://python-poetry.org) 40 | 41 | ## Usage 42 | 43 | Setup the environment 44 | 45 | ```bash 46 | poetry install 47 | ``` 48 | 49 | Run the scripts 50 | 51 | ```bash 52 | poetry run python hacker_news_scraper.py --start_date=2023-01-10 --end_date=2023-01-29 53 | poetry run python process_submissions.py --input_files "output/submissions_*.csv" 54 | poetry run python send_post_requests.py --input_file "output/processed_submissions.csv" 55 | ``` 56 | 57 | Sorry for spamming your servers Hacker News. `hacker_news_scraper.py` will use a random `user-agent` on each request, but every 30 or so requests you'll be hit with a 403. Rotate your VPN to reset. 58 | 59 | I thought briefly about using their [unban API](https://news.ycombinator.com/item?id=4761102) to get around this... but that might be taking things a bit too far. 60 | -------------------------------------------------------------------------------- /extension/Makefile: -------------------------------------------------------------------------------- 1 | ifneq (,$(wildcard ../.env)) 2 | include ../.env 3 | export 4 | endif 5 | 6 | WEBPACK_CMD := npx webpack --mode development --config webpack.config.js --env LAMBDA_API_URL=$(LAMBDA_API_URL) 7 | CHROME_VERSION := $(shell jq -r .version src/manifest.chrome.json) 8 | FIREFOX_VERSION := $(shell jq -r .version src/manifest.firefox.json) 9 | 10 | # Primary targets 11 | 12 | install: 13 | npm install 14 | 15 | dev: install guard-BROWSER 16 | $(MAKE) --jobs=2 dev-webpack-watch dev-$(BROWSER)-extension 17 | 18 | build: guard-LAMBDA_API_URL clean install 19 | $(WEBPACK_CMD) --env BROWSER=firefox 20 | $(WEBPACK_CMD) --env BROWSER=chrome 21 | mkdir -p dist/packed 22 | cd dist/firefox && npx web-ext build --overwrite-dest --artifacts-dir ../packed --filename discontent-firefox-$(FIREFOX_VERSION).zip 23 | cd dist/chrome && zip -r ../packed/discontent-chrome-$(CHROME_VERSION).zip * 24 | 25 | clean: 26 | rm -rf dist 27 | 28 | test: install 29 | npx jest 30 | 31 | # Secondary targets 32 | 33 | dev-webpack-watch: guard-LAMBDA_API_URL guard-BROWSER 34 | $(WEBPACK_CMD) --watch --env BROWSER=$(BROWSER) 35 | 36 | dev-firefox-extension: 37 | cd dist/firefox && npx web-ext run --start-url https://www.google.com/search?q=github+en.wikipedia+twitter https://www.bing.com/search?q=github+en.wikipedia+twitter https://duckduckgo.com/?q=github+en.wikipedia+twitter https://html.duckduckgo.com/html/?q=github+en.wikipedia+twitter about:debugging 38 | 39 | dev-chrome-extension: #build 40 | # Open up chrome with the extension loaded and two tabs that are helpful for development 41 | # TODO: Not very system independent :( 42 | /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --load-extension=./dist/chrome/ https://www.google.com/search?q=github+en.wikipedia+twitter https://www.bing.com/search?q=github+en.wikipedia+twitter https://duckduckgo.com/?q=github+en.wikipedia+twitter https://html.duckduckgo.com/html/?q=github+en.wikipedia+twitter chrome://extensions/ chrome-extension://kglbdhongcfkafgfgofpgaehafnbgnhd/popup/popup.html 43 | 44 | # Guard to fail the make target if the specified env variable doesn't exist 45 | # https://lithic.tech/blog/2020-05/makefile-wildcards 46 | guard-%: 47 | @if [ -z '${${*}}' ]; then echo 'ERROR: variable $* not set' && exit 1; fi 48 | -------------------------------------------------------------------------------- /extension/src/settings.ts: -------------------------------------------------------------------------------- 1 | import { Browser } from "webextension-polyfill"; 2 | import * as uuid from "uuid"; 3 | 4 | export type IconName = "good" | "controversial" | "bad"; 5 | export const DEFAULT_ICONS = { 6 | good: "💚", 7 | controversial: "🤨", 8 | bad: "❌", 9 | }; 10 | 11 | export class Settings { 12 | private readonly _browser: Browser; 13 | 14 | constructor(browser: Browser) { 15 | this._browser = browser; 16 | } 17 | 18 | async get_icons() { 19 | return Promise.all([ 20 | this.get_icon("good"), 21 | this.get_icon("controversial"), 22 | this.get_icon("bad"), 23 | ]).then((values) => { 24 | return { 25 | good: values[0], 26 | controversial: values[1], 27 | bad: values[2], 28 | }; 29 | }); 30 | } 31 | 32 | async get_icon(icon_name: IconName) { 33 | return this._browser.storage.local.get(icon_name).then(async (settings) => { 34 | if (is_valid_emoji(settings[icon_name])) { 35 | return settings[icon_name] as string; 36 | } else { 37 | // Save the default icon to storage if it's not valid 38 | return this.set_icon(icon_name, DEFAULT_ICONS[icon_name]); 39 | } 40 | }); 41 | } 42 | 43 | async set_icon(icon_name: IconName, icon: string) { 44 | if (!is_valid_emoji(icon)) { 45 | return Promise.reject("Icon must be a single character"); 46 | } 47 | return this._browser.storage.local 48 | .set({ [icon_name]: icon }) 49 | .then(() => icon); 50 | } 51 | 52 | async get_user_id() { 53 | return this._browser.storage.local.get("user_id").then((settings) => { 54 | if (uuid.validate(settings["user_id"])) { 55 | return settings["user_id"]; 56 | } else { 57 | // Save a newly generated user ID to storage if it's not valid 58 | return this.set_user_id(uuid.v4()); 59 | } 60 | }); 61 | } 62 | 63 | async set_user_id(user_id: string) { 64 | if (!uuid.validate(user_id)) { 65 | return Promise.reject("User ID must be a valid UUID"); 66 | } 67 | return this._browser.storage.local 68 | .set({ ["user_id"]: user_id }) 69 | .then(() => user_id); 70 | } 71 | } 72 | 73 | function is_valid_emoji(emoji: string | undefined): boolean { 74 | // https://stackoverflow.com/questions/54369513/how-to-count-the-correct-length-of-a-string-with-emojis-in-javascript 75 | return ( 76 | emoji !== undefined && typeof emoji === "string" && [...emoji].length == 1 77 | ); 78 | } 79 | -------------------------------------------------------------------------------- /extension/src/content_scripts/index.ts: -------------------------------------------------------------------------------- 1 | import * as browser from "webextension-polyfill"; 2 | import { identify } from "../search_engine"; 3 | import { ScoresRequest, ScoresResponseMessage } from "../types"; 4 | import { Settings } from "../settings"; 5 | 6 | /* The flow of the content script is quite simple: 7 | * 1. Check if the content script has already run 8 | * 2. Find out which search engine we're on (if any) 9 | * 3. Get all the search engine links 10 | * 4. Send the links to the background script and request their scores 11 | * 5. Add the scores to the links 12 | */ 13 | 14 | function runDiscontent() { 15 | // Step 1 16 | if (window.hasRun) { 17 | return; 18 | } 19 | window.hasRun = true; 20 | 21 | // Step 2 22 | const search_engine = identify(window.location.hostname); 23 | if (search_engine == null) { 24 | // Just do nothing if we're not on a supported search engine 25 | return; 26 | } 27 | 28 | // Step 3 29 | search_engine 30 | .getAllLinks() 31 | .then((search_engine_links) => { 32 | if (search_engine_links.length === 0) { 33 | return; 34 | } 35 | // Step 4 36 | const settings = new Settings(browser); 37 | return Promise.all([ 38 | settings.get_icons(), 39 | browser.runtime.sendMessage({ 40 | type: "ScoresRequest", 41 | data: new ScoresRequest(search_engine_links), 42 | }), 43 | ]).then(([icons, message]) => { 44 | const scoresResponse = (message as ScoresResponseMessage).data; 45 | // Step 5 46 | search_engine_links.forEach((search_engine_link) => { 47 | switch (scoresResponse[search_engine_link.link.hostname]) { 48 | case "Good": 49 | search_engine_link.addSymbol(icons.good); 50 | break; 51 | case "Controversial": 52 | search_engine_link.addSymbol(icons.controversial); 53 | break; 54 | case "Bad": 55 | search_engine_link.addSymbol(icons.bad); 56 | break; 57 | case "NoScore": 58 | default: 59 | // Do nothing 60 | } 61 | }); 62 | }); 63 | }) 64 | .catch((error) => { 65 | // TODO: Handle the error somehow 66 | console.error(error); 67 | }); 68 | } 69 | 70 | if (navigator.userAgent.includes("Firefox")) { 71 | document.addEventListener( 72 | "DOMContentLoaded", 73 | function () { 74 | runDiscontent(); 75 | }, 76 | false, 77 | ); 78 | } 79 | 80 | if (navigator.userAgent.includes("Chrome")) { 81 | window.onload = function () { 82 | runDiscontent(); 83 | }; 84 | } 85 | -------------------------------------------------------------------------------- /end_to_end_tests/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | addressable (2.8.1) 5 | public_suffix (>= 2.0.2, < 6.0) 6 | ast (2.4.2) 7 | backport (1.2.0) 8 | benchmark (0.2.1) 9 | capybara (3.38.0) 10 | addressable 11 | matrix 12 | mini_mime (>= 0.1.3) 13 | nokogiri (~> 1.8) 14 | rack (>= 1.6.0) 15 | rack-test (>= 0.6.3) 16 | regexp_parser (>= 1.5, < 3.0) 17 | xpath (~> 3.2) 18 | debug (1.7.1) 19 | irb (>= 1.5.0) 20 | reline (>= 0.3.1) 21 | diff-lcs (1.5.0) 22 | e2mmap (0.1.0) 23 | io-console (0.6.0) 24 | irb (1.6.2) 25 | reline (>= 0.3.0) 26 | jaro_winkler (1.5.4) 27 | json (2.6.3) 28 | kramdown (2.4.0) 29 | rexml 30 | kramdown-parser-gfm (1.1.0) 31 | kramdown (~> 2.0) 32 | matrix (0.4.2) 33 | mini_mime (1.1.2) 34 | minitest (5.17.0) 35 | nokogiri (1.14.2-x86_64-darwin) 36 | racc (~> 1.4) 37 | nokogiri (1.14.2-x86_64-linux) 38 | racc (~> 1.4) 39 | parallel (1.22.1) 40 | parser (3.2.1.0) 41 | ast (~> 2.4.1) 42 | public_suffix (5.0.1) 43 | racc (1.6.2) 44 | rack (3.0.4.1) 45 | rack-test (2.0.2) 46 | rack (>= 1.3) 47 | rainbow (3.1.1) 48 | rake (13.0.6) 49 | regexp_parser (2.7.0) 50 | reline (0.3.2) 51 | io-console (~> 0.5) 52 | reverse_markdown (2.1.1) 53 | nokogiri 54 | rexml (3.2.5) 55 | rubocop (1.46.0) 56 | json (~> 2.3) 57 | parallel (~> 1.10) 58 | parser (>= 3.2.0.0) 59 | rainbow (>= 2.2.2, < 4.0) 60 | regexp_parser (>= 1.8, < 3.0) 61 | rexml (>= 3.2.5, < 4.0) 62 | rubocop-ast (>= 1.26.0, < 2.0) 63 | ruby-progressbar (~> 1.7) 64 | unicode-display_width (>= 2.4.0, < 3.0) 65 | rubocop-ast (1.27.0) 66 | parser (>= 3.2.1.0) 67 | ruby-progressbar (1.11.0) 68 | rubyzip (2.3.2) 69 | selenium-webdriver (4.8.1) 70 | rexml (~> 3.2, >= 3.2.5) 71 | rubyzip (>= 1.2.2, < 3.0) 72 | websocket (~> 1.0) 73 | solargraph (0.48.0) 74 | backport (~> 1.2) 75 | benchmark 76 | bundler (>= 1.17.2) 77 | diff-lcs (~> 1.4) 78 | e2mmap 79 | jaro_winkler (~> 1.5) 80 | kramdown (~> 2.3) 81 | kramdown-parser-gfm (~> 1.1) 82 | parser (~> 3.0) 83 | reverse_markdown (>= 1.0.5, < 3) 84 | rubocop (>= 0.52) 85 | thor (~> 1.0) 86 | tilt (~> 2.0) 87 | yard (~> 0.9, >= 0.9.24) 88 | thor (1.2.1) 89 | tilt (2.1.0) 90 | unicode-display_width (2.4.2) 91 | webrick (1.7.0) 92 | websocket (1.2.9) 93 | xpath (3.2.0) 94 | nokogiri (~> 1.8) 95 | yard (0.9.28) 96 | webrick (~> 1.7.0) 97 | 98 | PLATFORMS 99 | x86_64-darwin-20 100 | x86_64-linux 101 | 102 | DEPENDENCIES 103 | capybara 104 | debug (>= 1.0.0) 105 | minitest 106 | rake 107 | rubocop 108 | selenium-webdriver 109 | solargraph 110 | 111 | BUNDLED WITH 112 | 2.3.7 113 | -------------------------------------------------------------------------------- /end_to_end_tests/setup.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'capybara/minitest' 4 | require 'date' 5 | require 'selenium-webdriver' 6 | 7 | class CapybaraTestCase < Minitest::Test 8 | include Capybara::DSL 9 | include Capybara::Minitest::Assertions 10 | 11 | # Chrome setup 12 | chrome_options = Selenium::WebDriver::Chrome::Options.new 13 | chrome_options.add_argument( 14 | '--disable-extensions-except=../extension/dist/chrome' 15 | ) 16 | chrome_options.add_argument('--load-extension=../extension/dist/chrome') 17 | chrome_options.add_argument('--headless=new') if ENV['HEADLESS'] == 'true' 18 | chrome_options.add_option('goog:loggingPrefs', { browser: 'ALL' }) 19 | Capybara.register_driver :chrome do |app| 20 | driver = Capybara::Selenium::Driver.new(app, browser: :chrome, options: chrome_options) 21 | driver.browser.manage.window.resize_to(1920, 1080) 22 | sleep(10) 23 | driver 24 | end 25 | 26 | # Firefox setup 27 | firefox_options = Selenium::WebDriver::Firefox::Options.new 28 | firefox_options.add_argument('-headless') if ENV['HEADLESS'] == 'true' 29 | Capybara.register_driver :firefox do |app| 30 | driver = Capybara::Selenium::Driver.new(app, browser: :firefox, options: firefox_options) 31 | driver.browser.manage.window.resize_to(1920, 1080) 32 | driver.browser.install_addon('../extension/dist/firefox', true) 33 | sleep(10) 34 | driver 35 | end 36 | 37 | Capybara.default_driver = :chrome 38 | Capybara.default_max_wait_time = 40 # Seconds 39 | Capybara.enable_aria_label = true 40 | 41 | BROWSERS_TO_TEST = ENV['BROWSERS_TO_TEST'].split(' ').map(&:to_sym) 42 | 43 | def extension_popup_url 44 | case Capybara.current_driver 45 | when :chrome 46 | return "chrome-extension://#{ENV['CHROME_EXTENSION_ID']}/popup/popup.html" 47 | when :firefox 48 | prefs_file = "#{page.driver.browser.capabilities['moz:profile']}/prefs.js" 49 | # Extract the webextensions uuids 50 | uuid_regex = /user_pref\("extensions.webextensions.uuids", "(.+)"\)/ 51 | uuid_match = uuid_regex.match(File.read(prefs_file))[1] 52 | # Remove all the backslashes 53 | uuid_match.gsub!('\\', '') 54 | # Parse the uuid_match as JSON 55 | uuids = JSON.parse(uuid_match) 56 | return "moz-extension://#{uuids[ENV['FIREFOX_EXTENSION_ID']]}/popup/popup.html" 57 | end 58 | throw 'Unknown driver' 59 | end 60 | 61 | def teardown 62 | # Take a screenshot and save the page html if the test failed 63 | timestamp = Time.now.strftime('%Y_%m_%d-%H_%M_%S') 64 | filename = "#{name}-#{timestamp}" 65 | unless passed? 66 | save_screenshot("ci/screenshots/#{filename}.png") 67 | File.write("ci/screenshots/#{filename}.html", page.html) 68 | if defined?(page.driver.browser.logs) 69 | File.write("ci/screenshots/#{filename}-console-logs.log", 70 | page.driver.browser.logs.get(:browser)) 71 | end 72 | end 73 | 74 | Capybara.reset_sessions! 75 | Capybara.use_default_driver 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /docs/journal.md: -------------------------------------------------------------------------------- 1 | # Journal 2 | 3 | I figured I'd take some time out to document the process of this side project, but on a more personal level. The idea came from when someone posted the first review of the extension to the Chrome store, and it made me so happy I wanted to tell the world. 4 | 5 | Ironically though this isn't for anyone else to read, really only doing it for my own sake. 6 | 7 | # 15-03-2023 8 | 9 | The extension has been up and running for almost a week now and has a modest user base of about 90 people. 10 | 11 | Someone just posted this review to the Chrome store: 12 | 13 | > The best extension I have in my browser. It really has the potential to help me a lot with the recent upcoming struggles of just pure SEO results coming up and actual useful info getting burried below. That problem actually drives me crazy sometimes. 14 | > I hope this gets more attention, by good ppl only ofc <3. 15 | 16 | I don't know who you are `ohnesinn` but you are actual the biggest legend. 17 | 18 | Some of the design choices I made for the NoSQL database were a bit misguided. Where I thought I needed GSI's to accomplish certain tasks, turns out I can just use the main table itself. Also I definitely misunderstood the usage of sort keys, and thought I could query with them without providing a PK but nope. 19 | 20 | I've made a couple of promotional posts on r/chrome, r/firefox and HackerNews but don't really how else to promote it. Perhaps asking existing users to share it and leveraging the fact that if more people it, the more useful it'll become. 21 | 22 | People seem preoccupied with spam prevention, though that might just the tech minded folks doing a bit of bike shedding. Apart from the simple prevention stuff already in the extension I'm not gonna give it much more thought until it becomes a problem. Interestingly it looks like someone already cracked it, a few days ago there were a bunch of votes on DuckDuckGo from these user IDs, all within the span of a couple minutes. 23 | 24 | ``` 25 | 8fe16b70-f4b9-475c-90c5-da078c28381c 26 | 8fe13b70-f4b9-475c-90c5-da078c28381c 27 | 8fe11b70-f4b9-475c-90c5-da278c28381c 28 | 8fe11b70-f4b9-475c-90c5-da278c28185c 29 | 8fe11b70-f4b9-475c-90c5-da278c28185b 30 | 8fe11b70-f4b9-475c-90c5-da278c28181c 31 | 8fe11b70-f4b9-4757-98c4-da298c28285b 32 | 8fe11b70-f4b9-4757-98c4-da298c28282b 33 | 8fe11b70-f4b9-4757-98c4-da298c28185b 34 | 8fe11b70-f4b9-4757-98c4-da278c28186b 35 | 8fe11b70-f4b9-4757-95c4-da278c28185b 36 | 8fe11b70-f4b9-4756-95c4-da278c28185b 37 | 8fe11b70-f4b9-4756-94c4-da278c28185b 38 | 8fe11b70-f4b9-4756-93c4-da278c28185b 39 | 8fe11b70-f4b9-4756-92c4-da278c28185b 40 | 8fe11b70-f4b9-4756-91c5-da278c28185b 41 | 8fe11b70-f4b9-4756-91c4-da278c28185b 42 | 8fe11b70-f4b9-4756-90c5-da278c28185b 43 | ``` 44 | 45 | I wondered if they would go on to do something more nefarious and so kept an eye out for any more shenanigans, but nope. I guess they were satisfied with themselves that they found a hack and then moved on - fine by me. Obviously it's not exactly a difficult hack to pull off, but enough to prevent people with 0 programming ability from causing mischief. 46 | -------------------------------------------------------------------------------- /backend/test_scraper.py: -------------------------------------------------------------------------------- 1 | from hacker_news_scraper import date_range, get_submissions 2 | from datetime import datetime 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | def test_date_range(): 7 | start = datetime(2022, 12, 29) 8 | end = datetime(2023, 1, 4) 9 | 10 | dates = list(date_range(start, end)) 11 | assert dates[0] == datetime(2022, 12, 29) 12 | assert dates[1] == datetime(2022, 12, 30) 13 | assert dates[2] == datetime(2022, 12, 31) 14 | assert dates[3] == datetime(2023, 1, 1) 15 | assert dates[4] == datetime(2023, 1, 2) 16 | assert dates[5] == datetime(2023, 1, 3) 17 | assert dates[6] == datetime(2023, 1, 4) 18 | assert len(dates) == 7 19 | 20 | 21 | def test_scraping(): 22 | # Truth set for the html file in test/hacker_news_front_2023_01_11.html 23 | truth_set = [ 24 | ('https://news.ycombinator.com/item?id=34338995', 123), 25 | ('https://elkue.com/nyc-slice/', 670), 26 | ('https://github.com/karpathy/nanoGPT', 1532), 27 | ('https://news.ycombinator.com/item?id=34322303', 461), 28 | ('https://www.beautifulpublicdata.com/the-style-guide-for-americas-highways-mutcd/', 29 | 198), ('https://www.starfivetech.com/en/site/new_details/976', 144), 30 | ('https://github.com/DesktopECHO/T95-H616-Malware', 221), 31 | ('https://www.val.town/', 332), 32 | ('https://devblogs.microsoft.com/oldnewthing/20230109-00/?p=107685', 33 | 230), ('https://github.com/ToolJet/ToolJet/releases/tag/v2.0.0', 210), 34 | ('https://github.com/sourcegraph/conc', 254), 35 | ('https://lcamtuf.coredump.cx/gcnc/', 383), 36 | ('https://lateblt.tripod.com/bit68.txt', 452), 37 | ('https://www.sapiens.org/culture/lebanon-solar-power/', 214), 38 | ('https://github.com/toblotron/praxis-ide', 167), 39 | ('https://renato.athaydes.com/posts/unison-revolution.html', 296), 40 | ('https://www.allaboutcircuits.com/textbook/', 315), 41 | ('https://findthatmeme.com/blog/2023/01/08/image-stacks-and-iphone-racks-building-an-internet-scale-meme-search-engine-Qzrz7V6T.html', 42 | 785), ('https://store.steampowered.com/app/1261430/Kandria/', 487), 43 | ('https://www.infoq.com/news/2022/12/apple-swift-foundation-rewrite/', 44 | 434), 45 | ('https://www.vqronline.org/essay/john-hughes-goes-deep-unexpected-heaviosity-ferris-bueller%E2%80%99s-day', 46 | 230), 47 | ('http://uu.diva-portal.org/smash/record.jsf?pid=diva2%3A1721987&dswid=-4818', 48 | 165), ('https://usbc.wtf/', 214), 49 | ('https://networkx.org/documentation/stable/release/release_3.0.html', 50 | 195), 51 | ('https://www.nytimes.com/2023/01/09/climate/ozone-hole-restoration-montreal-protocol.html', 52 | 152), ('https://htmlwithsuperpowers.netlify.app/', 155), 53 | ('https://en.wikipedia.org/wiki/Grandma_Gatewood', 121), 54 | ('https://replicate.com/andreasjansson/cantable-diffuguesion', 68), 55 | ('https://arxiv.org/abs/2301.03149', 139), 56 | ('https://zapier.com/blog/secondments-at-zapier/', 91) 57 | ] 58 | 59 | with open('fixtures/scraper/hacker_news_front_2023_01_11.html', 'r') as f: 60 | html = f.read() 61 | soup = BeautifulSoup(html, 'html.parser') 62 | 63 | submissions = get_submissions(soup) 64 | assert len(submissions) == 30 65 | assert submissions == truth_set 66 | -------------------------------------------------------------------------------- /extension/src/settings.test.ts: -------------------------------------------------------------------------------- 1 | // Get the type of the browser object from the polyfill 2 | import { Browser } from "webextension-polyfill"; 3 | // Mock out the entire polyfill for testing 4 | import "jest-webextension-mock"; 5 | declare const browser: Browser; 6 | 7 | import { Settings, IconName } from "./settings"; 8 | import * as uuid from "uuid"; 9 | 10 | describe("Settings", () => { 11 | let settings: Settings; 12 | 13 | beforeEach(() => { 14 | settings = new Settings(browser); 15 | }); 16 | 17 | test("Getting a non-existant user_id generates a new one", async () => { 18 | settings.get_user_id().then((newly_generated_id) => { 19 | expect(uuid.validate(newly_generated_id)).toBe(true); 20 | 21 | // More requests for the user_id should return the same value 22 | for (let i = 0; i < 3; i++) { 23 | expect(settings.get_user_id()).resolves.toEqual(newly_generated_id); 24 | } 25 | }); 26 | }); 27 | 28 | test("Getting an existing user_id", async () => { 29 | expect( 30 | settings.set_user_id("e2f50e34-203a-4fc3-9952-7029bfe65838") 31 | ).resolves.toEqual("e2f50e34-203a-4fc3-9952-7029bfe65838"); 32 | 33 | for (let i = 0; i < 3; i++) { 34 | expect(settings.get_user_id()).resolves.toEqual( 35 | "e2f50e34-203a-4fc3-9952-7029bfe65838" 36 | ); 37 | } 38 | }); 39 | 40 | test("Setting an invalid user_id fails", async () => { 41 | const user_id = await settings.get_user_id(); 42 | 43 | expect(settings.set_user_id("[;.[;")).rejects.toEqual( 44 | "User ID must be a valid UUID" 45 | ); 46 | 47 | expect(settings.get_user_id()).resolves.toEqual(user_id); 48 | }); 49 | 50 | test("Getting icons returns the default values", async () => { 51 | // Do it a few times to make sure 52 | for (let i = 0; i < 3; i++) { 53 | expect(settings.get_icon("good")).resolves.toEqual("💚"); 54 | expect(settings.get_icon("controversial")).resolves.toEqual("🤨"); 55 | expect(settings.get_icon("bad")).resolves.toEqual("❌"); 56 | } 57 | }); 58 | 59 | test("Getting icons returns the existing values", async () => { 60 | expect(settings.set_icon("good", "a")).resolves.toEqual("a"); 61 | expect(settings.set_icon("controversial", "b")).resolves.toEqual("b"); 62 | expect(settings.set_icon("bad", "c")).resolves.toEqual("c"); 63 | 64 | // Do it a few times to make sure 65 | for (let i = 0; i < 3; i++) { 66 | expect(settings.get_icon("good")).resolves.toEqual("a"); 67 | expect(settings.get_icon("controversial")).resolves.toEqual("b"); 68 | expect(settings.get_icon("bad")).resolves.toEqual("c"); 69 | } 70 | }); 71 | 72 | test("Setting incorrect icons fails", async () => { 73 | expect(settings.set_icon("good", "a")).resolves.toEqual("a"); 74 | expect(settings.set_icon("controversial", "b")).resolves.toEqual("b"); 75 | expect(settings.set_icon("bad", "c")).resolves.toEqual("c"); 76 | 77 | ["good", "controversial", "bad"].forEach((icon_name) => { 78 | expect(settings.set_icon(icon_name as IconName, "123")).rejects.toEqual( 79 | "Icon must be a single character" 80 | ); 81 | }); 82 | 83 | // Check our original values are still the same 84 | expect(settings.get_icon("good")).resolves.toEqual("a"); 85 | expect(settings.get_icon("controversial")).resolves.toEqual("b"); 86 | expect(settings.get_icon("bad")).resolves.toEqual("c"); 87 | }); 88 | }); 89 | -------------------------------------------------------------------------------- /extension/src/popup/popup.scss: -------------------------------------------------------------------------------- 1 | @import "~bootstrap/scss/bootstrap"; 2 | @import "~bootstrap-icons/font/bootstrap-icons.css"; 3 | 4 | $background-color: rgb(21, 20, 26); 5 | $text-color: rgb(234, 234, 234); 6 | 7 | html, 8 | body, 9 | div.offcanvas { 10 | // Size of the popup 11 | width: 160px; 12 | 13 | // White text dark background 14 | background: $background-color; 15 | color: $text-color; 16 | font-family: "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif, 17 | "Apple Color Emoji", "Segoe UI Emoji"; 18 | } 19 | 20 | i.bi { 21 | // Vertically center all icons 22 | line-height: 0; 23 | 24 | &.bi-check { 25 | color: green; 26 | } 27 | 28 | &.bi-exclamation-triangle { 29 | color: yellow; 30 | } 31 | } 32 | 33 | #settings-page { 34 | .icon-setting { 35 | padding: 0.1rem 0; 36 | display: flex; 37 | align-items: center; 38 | justify-content: flex-start; 39 | 40 | .input-group { 41 | // Fix display of status icons next to the input group 42 | width: fit-content; 43 | } 44 | 45 | &-label, 46 | &-input { 47 | padding: 0.1rem 0.4rem; 48 | font-size: 0.8em; 49 | } 50 | 51 | &-input { 52 | margin-right: 0.8rem; 53 | max-width: 2.8rem; 54 | width: 2.8rem; 55 | text-align: center; 56 | } 57 | 58 | &-spinner { 59 | width: 0.7rem; 60 | height: 0.7rem; 61 | } 62 | 63 | &-label { 64 | width: 3rem; 65 | 66 | // Right align the text 67 | display: flex; 68 | flex-direction: row-reverse; 69 | } 70 | } 71 | 72 | .reset-button-and-link { 73 | display: flex; 74 | justify-content: space-between; 75 | align-items: baseline; 76 | 77 | button { 78 | font-size: 0.8em; 79 | padding: 0.1em 0.6em; 80 | margin-top: 0.4em; 81 | } 82 | 83 | a { 84 | font-size: 0.6em; 85 | } 86 | } 87 | } 88 | 89 | .voting-button { 90 | width: 100%; 91 | margin: 3% auto; 92 | display: flex; 93 | align-items: center; 94 | justify-content: center; 95 | font-size: 1.5rem; 96 | 97 | & [data-voting-button-target="icon"] { 98 | // Get the icon itself to display in the middle 99 | margin-bottom: -0.3rem; 100 | } 101 | 102 | & [data-voting-button-target="spinner"] { 103 | // Make the spinner a nice size 104 | font-size: 0.5em; 105 | } 106 | } 107 | 108 | .header { 109 | // Make the header and settings gear display on the same line 110 | display: flex; 111 | justify-content: space-between; 112 | 113 | // Touch up to align the header 114 | padding: 5px 0; 115 | 116 | h1 { 117 | font-size: 10pt; 118 | margin: 0; 119 | 120 | // Vertically center the header 121 | display: flex; 122 | align-items: center; 123 | } 124 | 125 | button { 126 | &.help, 127 | &.settings { 128 | padding: 0.2rem; 129 | line-height: 0; 130 | font-size: 1rem; 131 | color: $text-color; 132 | } 133 | 134 | &.help { 135 | background: transparent; 136 | border: none; 137 | } 138 | } 139 | } 140 | 141 | button { 142 | border: none; 143 | border-radius: 5px; 144 | text-align: center; 145 | font-size: 1.5em; 146 | cursor: pointer; 147 | } 148 | 149 | // Remove the annoying box shadow from the input fields 150 | .form-control:focus { 151 | box-shadow: none; 152 | } 153 | -------------------------------------------------------------------------------- /extension/src/popup/controllers/icon_setting_controller.ts: -------------------------------------------------------------------------------- 1 | import * as browser from "webextension-polyfill"; 2 | import { Controller } from "@hotwired/stimulus"; 3 | import { Settings, IconName } from "../../settings"; 4 | import VotingButtonController from "./voting_button_controller"; 5 | 6 | const FADE_IN_AND_OUT_TIME = 300; // milliseconds 7 | const FADE_OUT_CHECK_AFTER = 3; // seconds 8 | 9 | export default class extends Controller { 10 | static targets = ["input", "spinner", "check", "error"]; 11 | static values = { 12 | iconName: String, 13 | }; 14 | static outlets = [ "voting-button" ] 15 | declare readonly inputTarget: HTMLInputElement; 16 | declare readonly spinnerTarget: HTMLDivElement; 17 | declare readonly checkTarget: HTMLElement; 18 | declare readonly errorTarget: HTMLElement; 19 | declare readonly iconNameValue: IconName; 20 | declare timer: NodeJS.Timeout; 21 | declare readonly votingButtonOutlets: Array; 22 | declare settings: Settings; 23 | 24 | connect() { 25 | this.settings = new Settings(browser); 26 | // Load icons from settings 27 | this._showSpinner(); 28 | this.settings 29 | .get_icon(this.iconNameValue) 30 | .then((icon) => { 31 | // Don't want to show the check mark on the first load 32 | this._hideAll(); 33 | this.inputTarget.value = icon; 34 | }) 35 | .catch((error) => { 36 | this._showError(error); 37 | }); 38 | } 39 | 40 | update() { 41 | this._showSpinner(); 42 | this.settings 43 | .set_icon(this.iconNameValue, this.inputTarget.value) 44 | .then(() => { 45 | this._showCheck(); 46 | }) 47 | .then(() => { 48 | // Reload icons on the voting page with new changes 49 | this.votingButtonOutlets.forEach((outlet) => { 50 | outlet.load_icon(); 51 | }); 52 | }) 53 | .catch((error) => { 54 | this._showError(error); 55 | }); 56 | } 57 | 58 | _fadeIn(element: HTMLElement) { 59 | element.animate([{ opacity: 0 }, { opacity: 1 }], FADE_IN_AND_OUT_TIME); 60 | element.classList.remove("d-none"); 61 | } 62 | 63 | _fadeOut(element: HTMLElement, after_milliseconds: number) { 64 | this.timer && clearTimeout(this.timer); 65 | this.timer = setTimeout(() => { 66 | element.animate([{ opacity: 1 }, { opacity: 0 }], FADE_IN_AND_OUT_TIME); 67 | setTimeout(() => { 68 | element.classList.add("d-none"); 69 | }, FADE_IN_AND_OUT_TIME); 70 | }, after_milliseconds); 71 | } 72 | 73 | _hideAll() { 74 | this.spinnerTarget.classList.add("d-none"); 75 | this.checkTarget.classList.add("d-none"); 76 | this.errorTarget.classList.add("d-none"); 77 | } 78 | 79 | _showSpinner() { 80 | this.checkTarget.classList.add("d-none"); 81 | this.errorTarget.classList.add("d-none"); 82 | this._fadeIn(this.spinnerTarget); 83 | } 84 | 85 | _showCheck() { 86 | // The check will disappear after some time 87 | this.spinnerTarget.classList.add("d-none"); 88 | this.errorTarget.classList.add("d-none"); 89 | this._fadeIn(this.checkTarget); 90 | this._fadeOut(this.checkTarget, FADE_OUT_CHECK_AFTER * 1000); 91 | } 92 | 93 | _showError(error: string) { 94 | this.spinnerTarget.classList.add("d-none"); 95 | this.checkTarget.classList.add("d-none"); 96 | this._fadeIn(this.errorTarget); 97 | this.errorTarget.title = error; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /backend/Makefile: -------------------------------------------------------------------------------- 1 | ifneq (,$(wildcard ../.env)) 2 | include ../.env 3 | export 4 | endif 5 | 6 | # Primary targets 7 | 8 | install: 9 | poetry install 10 | 11 | dev: install start 12 | poetry run python database.py load_development_votes 13 | 14 | start: build stop start-local-database start-lambda 15 | sleep 15 # Wait a bit for everything to get up and running 16 | stop: stop-lambda stop-local-database 17 | 18 | build: install 19 | @echo "Make sure the local dynamodb is pulled and ready to run" 20 | docker pull amazon/dynamodb-local 21 | @echo "Build the lambda function" 22 | cd lambda && $(MAKE) build 23 | 24 | test: 25 | cd lambda && $(MAKE) test 26 | $(MAKE) start 27 | @curl -s localhost:8000 > /dev/null || (echo "No local database found!" && exit 1) 28 | @curl -s localhost:9000 || (echo "No local lambda found!" && exit 1) 29 | poetry run pytest # --capture=no to see stdout 30 | $(MAKE) stop 31 | 32 | deploy: build 33 | sam deploy --config-file samconfig.production.toml --template-file template.production.yaml 34 | 35 | deploy-dev: build 36 | sam deploy --config-file samconfig.development.toml --template-file template.development.yaml 37 | 38 | clean: stop-local-database 39 | rm -rf __pycache__ .pytest_cache tmp 40 | 41 | redeploy: 42 | sam delete 43 | $(MAKE) deploy 44 | 45 | seed: install seed/hacker_news_submissions seed/public_blocklists 46 | poetry run python database.py generate_production_seed_data 47 | @echo "Upload generated seed data to s3" 48 | aws s3 cp seed/seed.ion s3://discontent-seed-bucket/seed.ion 49 | @echo "When deploying a new CloudFormation stack, the database will be loaded with this seed data" 50 | 51 | # Secondary targets 52 | 53 | seed/public_blocklists: 54 | mkdir -p seed/public_blocklists 55 | curl https://raw.githubusercontent.com/arosh/ublacklist-github-translation/master/uBlacklist.txt -o seed/public_blocklists/github_splogs.txt 56 | curl https://raw.githubusercontent.com/arosh/ublacklist-stackoverflow-translation/master/uBlacklist.txt -o seed/public_blocklists/stack_overflow_translations.txt 57 | curl https://raw.githubusercontent.com/franga2000/aliexpress-fake-sites/main/domains_uBlacklist.txt -o seed/public_blocklists/fake_webstores.txt 58 | curl https://raw.githubusercontent.com/wdmpa/content-farm-list/main/uBlacklist.txt -o seed/public_blocklists/wdmpa_content_farms.txt 59 | curl https://danny0838.github.io/content-farm-terminator/files/blocklist/content-farms.txt -o seed/public_blocklists/danny0838_content_farms.txt 60 | curl https://danny0838.github.io/content-farm-terminator/files/blocklist/nearly-content-farms.txt -o seed/public_blocklists/danny0838_nearly_content_farms.txt 61 | curl https://danny0838.github.io/content-farm-terminator/files/blocklist/extra-content-farms.txt -o seed/public_blocklists/danny0838_extra_content_farms.txt 62 | curl https://danny0838.github.io/content-farm-terminator/files/blocklist/bad-cloners.txt -o seed/public_blocklists/danny0838_bad_cloners.txt 63 | aws s3 cp s3://discontent-seed-bucket/bad_sites_still_active.csv seed/bad_sites_still_active.csv 64 | 65 | start-lambda: 66 | cd lambda && $(MAKE) dev & 67 | 68 | stop-lambda: 69 | cd lambda && $(MAKE) stop 70 | 71 | start-local-database: 72 | @echo "Start the local database" 73 | docker run --rm --name dynamodb -d -p 8000:8000 amazon/dynamodb-local -jar DynamoDBLocal.jar -port 8000 -inMemory -cors '*' 74 | @echo "Wait a couple seconds before running the setup scripts" 75 | sleep 5 && poetry run python database.py setup 76 | 77 | stop-local-database: 78 | docker stop dynamodb || true 79 | 80 | # Fetch the hacker news submissions from S3 81 | # This is better than scraping them all over again 82 | seed/hacker_news_submissions: 83 | aws s3 cp --recursive s3://discontent-seed-bucket/hacker_news_submissions seed/hacker_news_submissions 84 | 85 | guard-%: 86 | @if [ -z '${${*}}' ]; then echo 'ERROR: variable $* not set' && exit 1; fi 87 | -------------------------------------------------------------------------------- /backend/hacker_news_scraper.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from datetime import datetime, timedelta 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from tqdm import tqdm 6 | import csv 7 | import os 8 | from fake_useragent import UserAgent 9 | 10 | HACKER_NEWS_URL = "https://news.ycombinator.com/front" 11 | 12 | 13 | # Helper function to iterate through 2 dates (including end date) 14 | def date_range(start_date, end_date): 15 | for difference in range(int((end_date - start_date).days) + 1): 16 | yield start_date + timedelta(difference) 17 | 18 | 19 | # Extract the submission links and return them as a list 20 | def get_submission_links(soup): 21 | # All links are in a table row with class "athing" 22 | submission_rows = soup.find_all("tr", class_="athing") 23 | 24 | def extract_href(row_tag): 25 | href = row_tag.find("span", class_="titleline").find("a").get("href") 26 | # Fix relative urls that point to Hacker News itself 27 | if href.startswith("item?id="): 28 | href = f"https://news.ycombinator.com/{href}" 29 | return href 30 | 31 | return [extract_href(row) for row in submission_rows] 32 | 33 | 34 | # Extract the number of votes given the surrounding span tag 35 | def get_submission_votes(soup): 36 | # All span tags that hold the votes have the class "score" 37 | span_tags = soup.find_all("span", class_="score") 38 | 39 | def extract_votes(span_tag): 40 | vote_string = span_tag.get_text().strip().split(" ") 41 | assert len(vote_string) == 2 42 | assert vote_string[1] == "points" 43 | return int(vote_string[0]) 44 | 45 | return [extract_votes(span) for span in span_tags] 46 | 47 | 48 | def get_submissions(soup): 49 | submission_links = get_submission_links(soup) 50 | submission_votes = get_submission_votes(soup) 51 | 52 | assert len(submission_links) == 30 53 | assert len(submission_votes) == 30 54 | 55 | return list(zip(submission_links, submission_votes)) 56 | 57 | 58 | def hacker_news_scraper(start_date: str, end_date: str): 59 | """ 60 | Hacker News Scraper 61 | 62 | Scrapes links and updates from front page submissions on Hacker News. 63 | Saves csv results to ./output/submissions__.csv 64 | 65 | Example usage: 66 | hacker_news_scraper.py --start_date=2023-01-10 --end_date=2023-01-20 67 | """ 68 | # Parse dates and prepare output 69 | FORMAT = '%Y-%m-%d' 70 | start = datetime.strptime(start_date, FORMAT) 71 | end = datetime.strptime(end_date, FORMAT) 72 | os.makedirs('output', exist_ok=True) 73 | ua = UserAgent() 74 | 75 | with open(f'output/submissions_{start_date}_{end_date}.csv', 76 | 'w', 77 | newline='') as f: 78 | writer = csv.writer(f, dialect='unix') 79 | progress = tqdm(date_range(start, end)) 80 | for day in progress: 81 | day_string = day.strftime(FORMAT) 82 | progress.set_description(f"Scraping {day_string}") 83 | # Sorry 84 | headers = {'User-Agent': ua.random} 85 | r = requests.get(HACKER_NEWS_URL, 86 | params={'day': day_string}, 87 | headers=headers) 88 | if r.ok: 89 | soup = BeautifulSoup(r.text, 'html.parser') 90 | submissions = get_submissions(soup) 91 | for submission in submissions: 92 | writer.writerow([day_string, submission[0], submission[1]]) 93 | else: 94 | print(f"\nError: {day_string} failed with " 95 | "{r.status_code} {r.reason}") 96 | 97 | return 98 | 99 | 100 | if __name__ == '__main__': 101 | # Use python-fire to give nice CLI argument parsing 102 | fire.Fire(hacker_news_scraper) 103 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration.yml: -------------------------------------------------------------------------------- 1 | # See here for list of installed packages under 'ubuntu-22.04' 2 | # https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md 3 | 4 | name: "continuous integration" 5 | on: 6 | push: 7 | branches: ["develop"] 8 | jobs: 9 | run_tests: 10 | runs-on: "ubuntu-22.04" 11 | environment: "test" 12 | timeout-minutes: 60 13 | env: 14 | LAMBDA_API_URL: "http://localhost:9000/lambda-url/request-handler/v1" 15 | TABLE_NAME: "DiscontentProduction" 16 | LOG_LEVEL: "info" 17 | RANDOMIZE_SCORES: "false" 18 | USE_LOCAL_DATABASE: "true" 19 | USE_SYSTEM_TIME: "false" 20 | HEADLESS: "true" 21 | BROWSERS_TO_TEST: "chrome firefox" 22 | CHROME_EXTENSION_ID: "kglbdhongcfkafgfgofpgaehafnbgnhd" 23 | FIREFOX_EXTENSION_ID: "{3f504997-80b7-467d-9d7b-e2fbb6d55e34}" 24 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 25 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 26 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 27 | steps: 28 | - name: "Checkout code" 29 | uses: "actions/checkout@v3" 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: 'Scan with SonarCloud' 34 | uses: 'SonarSource/sonarcloud-github-action@v1.8' 35 | env: 36 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 37 | SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} 38 | 39 | - name: 'Check SonarCloud results and fail pipeline if failed' 40 | run: | 41 | sleep 30s # Wait for results to be uploaded 42 | GATE_STATUS=$(curl https://sonarcloud.io/api/qualitygates/project_status?projectKey=tom-barone_Discontent --silent | jq .projectStatus.status) 43 | echo "Gate status: $GATE_STATUS" 44 | [[ $GATE_STATUS != "ERROR" ]] # Will fail if equal to ERROR 45 | 46 | - name: "Setup ruby for end to end tests" 47 | uses: "ruby/setup-ruby@v1" 48 | with: 49 | bundler-cache: true 50 | working-directory: "./end_to_end_tests" 51 | 52 | - name: "Install poetry" 53 | run: "pipx install poetry" 54 | - uses: "actions/setup-python@v4" 55 | with: 56 | python-version: "3.9" 57 | cache: "poetry" 58 | 59 | - name: "Set up Homebrew" 60 | uses: "Homebrew/actions/setup-homebrew@master" 61 | 62 | - name: "Install cargo-lambda" 63 | run: | 64 | brew install curl 65 | export HOMEBREW_FORCE_BREWED_CURL=1 66 | brew tap cargo-lambda/cargo-lambda 67 | brew install cargo-lambda 68 | 69 | - name: "Run tests" 70 | run: "make test" 71 | 72 | - name: "Save failed tests" 73 | if: ${{ failure() }} 74 | uses: "actions/upload-artifact@v3" 75 | with: 76 | name: "failed_tests" 77 | path: "end_to_end_tests/ci" 78 | 79 | build: 80 | needs: ["run_tests"] 81 | runs-on: "ubuntu-22.04" 82 | environment: "build" 83 | env: 84 | LAMBDA_API_URL: "https://2zeiy58jgk.execute-api.us-east-1.amazonaws.com/v1" 85 | TABLE_NAME: "DiscontentProduction" 86 | LOG_LEVEL: "info" 87 | RANDOMIZE_SCORES: "false" 88 | USE_LOCAL_DATABASE: "false" 89 | USE_SYSTEM_TIME: "true" 90 | steps: 91 | - name: "Checkout code" 92 | uses: "actions/checkout@v3" 93 | with: 94 | fetch-depth: 0 95 | 96 | - name: "Install poetry" 97 | run: "pipx install poetry" 98 | - uses: "actions/setup-python@v4" 99 | with: 100 | python-version: "3.9" 101 | cache: "poetry" 102 | 103 | - name: "Set up Homebrew" 104 | uses: "Homebrew/actions/setup-homebrew@master" 105 | 106 | - name: "Install cargo-lambda" 107 | run: | 108 | brew tap cargo-lambda/cargo-lambda 109 | brew install cargo-lambda 110 | 111 | - name: "Build extensions and backend" 112 | run: "make build" 113 | 114 | - name: "Save built extensions" 115 | uses: "actions/upload-artifact@v3" 116 | with: 117 | name: "packed_extensions" 118 | path: "extension/dist/packed" 119 | -------------------------------------------------------------------------------- /extension/src/popup/controllers/voting_button_controller.ts: -------------------------------------------------------------------------------- 1 | import * as browser from "webextension-polyfill"; 2 | import { Controller } from "@hotwired/stimulus"; 3 | import { Settings } from "../../settings"; 4 | import { submitVote } from "../../api"; 5 | 6 | const FADE_IN_AND_OUT_TIME = 200; // milliseconds 7 | const FADE_OUT_CHECK_AFTER = 1; // seconds 8 | const FADE_OUT_ERROR_AFTER = 10; // seconds 9 | 10 | export default class extends Controller { 11 | // TODO: Add displaying of errors somehow 12 | static targets = ["icon", "spinner", "check", "error"]; 13 | static values = { 14 | vote: String, 15 | }; 16 | declare readonly voteValue: "good" | "bad"; 17 | declare readonly iconTarget: HTMLDivElement; 18 | declare readonly spinnerTarget: HTMLDivElement; 19 | declare readonly checkTarget: HTMLElement; 20 | declare readonly errorTarget: HTMLElement; 21 | declare settings: Settings; 22 | declare timer: NodeJS.Timeout; 23 | 24 | connect() { 25 | this.settings = new Settings(browser); 26 | this.load_icon(); 27 | } 28 | 29 | load_icon() { 30 | this.settings.get_icon(this.voteValue).then((icon) => { 31 | this.iconTarget.innerText = icon; 32 | this.element.classList.remove("invisible"); 33 | }); 34 | } 35 | 36 | submit() { 37 | this.element.disabled = true; 38 | this._showSpinner(); 39 | Promise.all([ 40 | // Get the current user_id 41 | this.settings.get_user_id(), 42 | // Get the current tab's URL 43 | browser.tabs.query({ active: true, currentWindow: true }).then((tabs) => { 44 | if (tabs.length === 0 || tabs[0].url === undefined) { 45 | return Promise.reject(new Error("No active tab found")); 46 | } 47 | const hostname = new URL(tabs[0].url).hostname; 48 | if (hostname === "") { 49 | return Promise.reject( 50 | new Error(`No hostname found found for ${tabs[0].url}`) 51 | ); 52 | } 53 | return hostname; 54 | }), 55 | ]) 56 | .then(([user_id, hostname]) => { 57 | const vote_value = this.voteValue === "good" ? 1 : -1; 58 | return submitVote(vote_value, hostname, user_id); 59 | }) 60 | .then(() => { 61 | this._showCheck(); 62 | }) 63 | .catch((error) => { 64 | this._showError(error); 65 | }) 66 | .finally(() => { 67 | this.element.disabled = false; 68 | }); 69 | } 70 | 71 | _showSpinner() { 72 | this.iconTarget.classList.add("d-none"); 73 | this.checkTarget.classList.add("d-none"); 74 | this.errorTarget.classList.add("d-none"); 75 | this._fadeIn(this.spinnerTarget); 76 | } 77 | 78 | _showCheck() { 79 | // The check will disappear after some time 80 | this.spinnerTarget.classList.add("d-none"); 81 | this.iconTarget.classList.add("d-none"); 82 | this.errorTarget.classList.add("d-none"); 83 | this._fadeIn(this.checkTarget); 84 | 85 | // Fade back in the icon 86 | this.timer && clearTimeout(this.timer); 87 | this.timer = setTimeout(() => { 88 | this.checkTarget.animate( 89 | [{ opacity: 1 }, { opacity: 0 }], 90 | FADE_IN_AND_OUT_TIME 91 | ); 92 | setTimeout(() => { 93 | this.checkTarget.classList.add("d-none"); 94 | this._fadeIn(this.iconTarget); 95 | }, FADE_IN_AND_OUT_TIME); 96 | }, FADE_OUT_CHECK_AFTER * 1000); 97 | } 98 | 99 | _fadeIn(element: HTMLElement) { 100 | element.animate([{ opacity: 0 }, { opacity: 1 }], FADE_IN_AND_OUT_TIME); 101 | element.classList.remove("d-none"); 102 | } 103 | 104 | _showError(error: string) { 105 | this.spinnerTarget.classList.add("d-none"); 106 | this.iconTarget.classList.add("d-none"); 107 | this.checkTarget.classList.add("d-none"); 108 | this._fadeIn(this.errorTarget); 109 | this.errorTarget.title = error; 110 | 111 | // Fade back in the icon after 10 seconds 112 | this.timer && clearTimeout(this.timer); 113 | this.timer = setTimeout(() => { 114 | this.checkTarget.animate( 115 | [{ opacity: 1 }, { opacity: 0 }], 116 | FADE_IN_AND_OUT_TIME 117 | ); 118 | setTimeout(() => { 119 | this.errorTarget.classList.add("d-none"); 120 | this._fadeIn(this.iconTarget); 121 | }, FADE_IN_AND_OUT_TIME); 122 | }, FADE_OUT_ERROR_AFTER * 1000); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /extension/src/help/help.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Welcome 6 | 7 | 8 | 9 | 10 |
11 |
14 |

💚 Discontent

15 |
16 |  Home 23 |
24 |
25 |

26 |
27 | Hello friend, 28 |
29 |
30 | The vision here is to filter out unhelpful content on the web, and make 31 | it easier to find the good stuff. 32 |
33 |
34 | When you search on Google, Bing or Duckduckgo: 35 |

36 |
    37 |
  • If a site has a bad score, it'll prepend the result with a ❌
  • 38 |
  • If it's spicy and there's lots of votes both ways, you see a 🤨
  • 39 |
  • If it's awesome and deserves to be cherished, you see a 💚
  • 40 |
  • When there aren't enough votes, nothing will show up.
  • 41 |
42 | 43 |

44 | Your votes are what makes this work! If you find a great site, share 45 | your love with 💚. When you find a site that is... not so great, give it 46 | a ❌. 47 |
48 |
49 | Here's some highly rated sites to get going: 50 |

51 | 52 |
53 |
54 | 69 |
70 | 71 |
72 | 89 |
90 |
91 | 92 |

93 | You can see this page again by clicking:   94 | 95 |
96 |
97 | Like or hate something? Have a suggestion? Send me an email at 98 | mail@tombarone.net 99 |
100 |
101 |

102 |
103 |

104 | But what do you do about all those nasty rotten spammers? 107 |
108 |
109 | There are some checks in place to stop abuse: 110 |

111 |
    112 |
  • You can only vote on a maximum of 10 websites a day
  • 113 |
  • You can only ever have 1 vote on a website
  • 114 |
  • 115 | If there are a bunch of suspicious votes from suspicious users, the 116 | system can flag it, ban the users and remove the votes. 117 |
  • 118 |
119 |

120 | It's not a perfect system, and a cheeky spammer could probably get 121 | around it if they analyse the code carefully. If that's you, send me an 122 | email and I'll pay you for your skillz. 123 |
124 |

125 |
126 |
127 | 128 | 129 | -------------------------------------------------------------------------------- /backend/lambda/src/main.rs: -------------------------------------------------------------------------------- 1 | mod routes; 2 | mod scoring; 3 | mod types; 4 | mod validate; 5 | mod dynamodb; 6 | 7 | use aws_sdk_dynamodb::Client; 8 | use lambda_http::{ 9 | http::{Method, StatusCode}, 10 | *, 11 | }; 12 | use routes::*; 13 | use std::env; 14 | use tracing::*; 15 | use tracing_subscriber::fmt; 16 | use types::Config; 17 | 18 | #[tokio::main] 19 | async fn main() -> Result<(), Error> { 20 | let (config, dynamo_db_client) = setup().await; 21 | info!("Loaded config [{:?}]", config); 22 | 23 | run(service_fn(|request: Request| async { 24 | root_handler(request, &config, &dynamo_db_client).await 25 | })) 26 | .await 27 | } 28 | 29 | async fn setup() -> (Config, Client) { 30 | let table_name = env::var("TABLE_NAME").expect("ERROR: Env variable TABLE_NAME should be set"); 31 | 32 | let log_level = env::var("LOG_LEVEL").expect("ERROR: Env variable LOG_LEVEL should be set"); 33 | fmt().with_env_filter(log_level).without_time().init(); 34 | 35 | let use_local_database = env::var("USE_LOCAL_DATABASE") 36 | .expect("ERROR: Env variable USE_LOCAL_DATABASE should be set") 37 | .parse::() 38 | .expect("ERROR: Env variable USE_LOCAL_DATABASE should be a boolean"); 39 | let sdk_config = aws_config::load_from_env().await; 40 | let mut dynamo_config_builder = aws_sdk_dynamodb::config::Builder::from(&sdk_config); 41 | if use_local_database { 42 | dynamo_config_builder = dynamo_config_builder.endpoint_url("http://localhost:8000"); 43 | } 44 | let dynamo_config = dynamo_config_builder.build(); 45 | let dynamo_db_client = Client::from_conf(dynamo_config); 46 | 47 | let randomize_scores = env::var("RANDOMIZE_SCORES") 48 | .expect("ERROR: Env variable RANDOMIZE_SCORES should be set") 49 | .parse::() 50 | .expect("ERROR: Env variable RANDOMIZE_SCORES should be a boolean"); 51 | 52 | let use_system_time = env::var("USE_SYSTEM_TIME") 53 | .expect("ERROR: Env variable USE_SYSTEM_TIME should be set") 54 | .parse::() 55 | .expect("ERROR: Env variable USE_SYSTEM_TIME should be a boolean"); 56 | 57 | return ( 58 | Config { 59 | table_name, 60 | // The following are for testing & development 61 | use_local_database, 62 | randomize_scores, 63 | use_system_time, 64 | }, 65 | dynamo_db_client, 66 | ); 67 | } 68 | 69 | #[instrument(level = "trace")] 70 | async fn root_handler( 71 | request: Request, 72 | config: &Config, 73 | dynamo_db_client: &Client, 74 | ) -> Result, Error> { 75 | let path = request.uri().path(); 76 | let method = request.method(); 77 | let response: Result; 78 | if path == "/v1/scores" && method == &Method::GET { 79 | response = scores(request, &config, &dynamo_db_client).await; 80 | } else if path == "/v1/vote" && method == &Method::POST { 81 | response = vote(request, &config, &dynamo_db_client).await; 82 | } else { 83 | return not_found(); 84 | } 85 | match response { 86 | Ok(body) => success(body), 87 | // TODO: Handle the HTTP errors better than just chucking them 88 | // all into a 500 response 89 | Err(e) => { 90 | warn!("Could not complete request [error={:#?}]", e); 91 | handle_error(e) 92 | } 93 | } 94 | } 95 | 96 | fn not_found() -> Result, Error> { 97 | Ok(Response::builder() 98 | .status(StatusCode::NOT_FOUND) 99 | .header("Access-Control-Allow-Headers", "*") 100 | .header("Access-Control-Allow-Origin", "*") 101 | .header("Access-Control-Allow-Methods", "POST, GET") 102 | .body(Body::Empty) 103 | .unwrap()) 104 | } 105 | 106 | fn success(body: Body) -> Result, Error> { 107 | Ok(Response::builder() 108 | .status(StatusCode::OK) 109 | .header("content-type", "application/json") 110 | .header("Access-Control-Allow-Headers", "*") 111 | .header("Access-Control-Allow-Origin", "*") 112 | .header("Access-Control-Allow-Methods", "POST, GET") 113 | .body(body) 114 | .unwrap()) 115 | } 116 | 117 | fn handle_error(error: Error) -> Result, Error> { 118 | let error_body = format!( 119 | r#"{{"error": {}}}"#, 120 | serde_json::to_string(&error.to_string()) 121 | .unwrap_or("Something bad and unknown".to_string()) 122 | ); 123 | server_error(Body::from(error_body)) 124 | } 125 | 126 | fn server_error(body: Body) -> Result, Error> { 127 | Ok(Response::builder() 128 | .status(StatusCode::INTERNAL_SERVER_ERROR) 129 | .header("content-type", "application/json") 130 | .header("Access-Control-Allow-Headers", "*") 131 | .header("Access-Control-Allow-Origin", "*") 132 | .header("Access-Control-Allow-Methods", "POST, GET") 133 | .body(body) 134 | .unwrap()) 135 | } 136 | -------------------------------------------------------------------------------- /backend/template.development.yaml: -------------------------------------------------------------------------------- 1 | # Config reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/template-reference.html 2 | AWSTemplateFormatVersion: "2010-09-09" 3 | Transform: "AWS::Serverless-2016-10-31" 4 | Description: "Backend services for Discontent" 5 | 6 | Resources: 7 | Database: 8 | Type: AWS::DynamoDB::Table 9 | Properties: 10 | KeySchema: 11 | - AttributeName: PK 12 | KeyType: HASH 13 | - AttributeName: SK 14 | KeyType: RANGE 15 | AttributeDefinitions: 16 | - AttributeName: PK 17 | AttributeType: S 18 | - AttributeName: SK 19 | AttributeType: S 20 | - AttributeName: UserVotes_PK 21 | AttributeType: S 22 | - AttributeName: created_at 23 | AttributeType: S 24 | - AttributeName: DailyUserHistory_PK 25 | AttributeType: S 26 | - AttributeName: count_of_votes 27 | AttributeType: N 28 | - AttributeName: DailyLinkHistory_PK 29 | AttributeType: S 30 | - AttributeName: sum_of_votes 31 | AttributeType: N 32 | GlobalSecondaryIndexes: 33 | - IndexName: UserVotes 34 | KeySchema: 35 | - AttributeName: UserVotes_PK 36 | KeyType: HASH 37 | - AttributeName: created_at 38 | KeyType: RANGE 39 | Projection: 40 | ProjectionType: INCLUDE 41 | NonKeyAttributes: 42 | - entity_type 43 | - value 44 | - PK 45 | - IndexName: DailyUserHistory 46 | KeySchema: 47 | - AttributeName: DailyUserHistory_PK 48 | KeyType: HASH 49 | - AttributeName: count_of_votes 50 | KeyType: RANGE 51 | Projection: 52 | ProjectionType: INCLUDE 53 | NonKeyAttributes: 54 | - entity_type 55 | - SK 56 | - sum_of_votes 57 | - IndexName: DailyLinkHistoryByCountOfVotes 58 | KeySchema: 59 | - AttributeName: DailyLinkHistory_PK 60 | KeyType: HASH 61 | - AttributeName: count_of_votes 62 | KeyType: RANGE 63 | Projection: 64 | ProjectionType: INCLUDE 65 | NonKeyAttributes: 66 | - entity_type 67 | - SK 68 | - sum_of_votes 69 | - IndexName: DailyLinkHistoryBySumOfVotes 70 | KeySchema: 71 | - AttributeName: DailyLinkHistory_PK 72 | KeyType: HASH 73 | - AttributeName: sum_of_votes 74 | KeyType: RANGE 75 | Projection: 76 | ProjectionType: INCLUDE 77 | NonKeyAttributes: 78 | - entity_type 79 | - SK 80 | - count_of_votes 81 | BillingMode: PAY_PER_REQUEST 82 | TableName: DiscontentDevelopment 83 | ImportSourceSpecification: 84 | InputFormat: ION 85 | InputCompressionType: NONE 86 | S3BucketSource: 87 | S3Bucket: "discontent-seed-bucket" 88 | S3KeyPrefix: "seed.ion" 89 | 90 | RequestHandler: 91 | Type: AWS::Serverless::Function 92 | Properties: 93 | MemorySize: 128 94 | Architectures: ["arm64"] 95 | PackageType: Zip 96 | Handler: bootstrap 97 | Runtime: provided.al2 98 | Timeout: 5 99 | CodeUri: lambda/target/lambda/request-handler 100 | Events: 101 | GetScores: 102 | Type: Api 103 | Properties: 104 | Path: /scores 105 | Method: get 106 | RestApiId: !Ref ApiGateway 107 | PostVote: 108 | Type: Api 109 | Properties: 110 | Path: /vote 111 | Method: post 112 | RestApiId: !Ref ApiGateway 113 | Environment: 114 | Variables: 115 | TABLE_NAME: !Ref Database 116 | LOG_LEVEL: info 117 | USE_LOCAL_DATABASE: false 118 | RANDOMIZE_SCORES: false 119 | USE_SYSTEM_TIME: true 120 | Policies: 121 | - DynamoDBCrudPolicy: # More info about SAM policy templates: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-policy-templates.html 122 | TableName: !Ref Database 123 | 124 | ApiGateway: 125 | Type: AWS::Serverless::Api 126 | Properties: 127 | StageName: v1 128 | 129 | Outputs: 130 | # ServerlessRestApi is an implicit API created out of Events key under Serverless::Function 131 | # Find out more about other implicit resources you can reference within SAM 132 | # https://github.com/awslabs/serverless-application-model/blob/master/docs/internals/generated_resources.rst#api 133 | ApiGateway: 134 | Description: "API Gateway endpoint URL for the production stage" 135 | Value: !Sub "https://${ApiGateway}.execute-api.${AWS::Region}.${AWS::URLSuffix}/v1/" 136 | RequestHandler: 137 | Description: "Lambda Function ARN that handles the API requests" 138 | Value: !GetAtt RequestHandler.Arn 139 | Database: 140 | Description: "DynamoDB table" 141 | Value: !GetAtt Database.Arn 142 | -------------------------------------------------------------------------------- /backend/template.production.yaml: -------------------------------------------------------------------------------- 1 | # Config reference: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/template-reference.html 2 | AWSTemplateFormatVersion: "2010-09-09" 3 | Transform: "AWS::Serverless-2016-10-31" 4 | Description: "Backend services for Discontent" 5 | 6 | Resources: 7 | Database: 8 | Type: AWS::DynamoDB::Table 9 | Properties: 10 | KeySchema: 11 | - AttributeName: PK 12 | KeyType: HASH 13 | - AttributeName: SK 14 | KeyType: RANGE 15 | AttributeDefinitions: 16 | - AttributeName: PK 17 | AttributeType: S 18 | - AttributeName: SK 19 | AttributeType: S 20 | - AttributeName: UserVotes_PK 21 | AttributeType: S 22 | - AttributeName: created_at 23 | AttributeType: S 24 | - AttributeName: DailyUserHistory_PK 25 | AttributeType: S 26 | - AttributeName: count_of_votes 27 | AttributeType: N 28 | - AttributeName: DailyLinkHistory_PK 29 | AttributeType: S 30 | - AttributeName: sum_of_votes 31 | AttributeType: N 32 | GlobalSecondaryIndexes: 33 | - IndexName: UserVotes 34 | KeySchema: 35 | - AttributeName: UserVotes_PK 36 | KeyType: HASH 37 | - AttributeName: created_at 38 | KeyType: RANGE 39 | Projection: 40 | ProjectionType: INCLUDE 41 | NonKeyAttributes: 42 | - entity_type 43 | - value 44 | - PK 45 | - IndexName: DailyUserHistory 46 | KeySchema: 47 | - AttributeName: DailyUserHistory_PK 48 | KeyType: HASH 49 | - AttributeName: count_of_votes 50 | KeyType: RANGE 51 | Projection: 52 | ProjectionType: INCLUDE 53 | NonKeyAttributes: 54 | - entity_type 55 | - SK 56 | - sum_of_votes 57 | - IndexName: DailyLinkHistoryByCountOfVotes 58 | KeySchema: 59 | - AttributeName: DailyLinkHistory_PK 60 | KeyType: HASH 61 | - AttributeName: count_of_votes 62 | KeyType: RANGE 63 | Projection: 64 | ProjectionType: INCLUDE 65 | NonKeyAttributes: 66 | - entity_type 67 | - SK 68 | - sum_of_votes 69 | - IndexName: DailyLinkHistoryBySumOfVotes 70 | KeySchema: 71 | - AttributeName: DailyLinkHistory_PK 72 | KeyType: HASH 73 | - AttributeName: sum_of_votes 74 | KeyType: RANGE 75 | Projection: 76 | ProjectionType: INCLUDE 77 | NonKeyAttributes: 78 | - entity_type 79 | - SK 80 | - count_of_votes 81 | BillingMode: PAY_PER_REQUEST 82 | TableName: DiscontentProduction 83 | PointInTimeRecoverySpecification: 84 | PointInTimeRecoveryEnabled: true 85 | ImportSourceSpecification: 86 | InputFormat: ION 87 | InputCompressionType: NONE 88 | S3BucketSource: 89 | S3Bucket: "discontent-seed-bucket" 90 | S3KeyPrefix: "seed.ion" 91 | 92 | RequestHandler: 93 | Type: AWS::Serverless::Function 94 | Properties: 95 | MemorySize: 128 96 | Architectures: ["arm64"] 97 | PackageType: Zip 98 | Handler: bootstrap 99 | Runtime: provided.al2 100 | Timeout: 5 101 | CodeUri: lambda/target/lambda/request-handler 102 | Events: 103 | GetScores: 104 | Type: Api 105 | Properties: 106 | Path: /scores 107 | Method: get 108 | RestApiId: !Ref ApiGateway 109 | PostVote: 110 | Type: Api 111 | Properties: 112 | Path: /vote 113 | Method: post 114 | RestApiId: !Ref ApiGateway 115 | Environment: 116 | Variables: 117 | TABLE_NAME: !Ref Database 118 | LOG_LEVEL: info 119 | USE_LOCAL_DATABASE: false 120 | RANDOMIZE_SCORES: false 121 | USE_SYSTEM_TIME: true 122 | Policies: 123 | - DynamoDBCrudPolicy: # More info about SAM policy templates: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/serverless-policy-templates.html 124 | TableName: !Ref Database 125 | 126 | ApiGateway: 127 | Type: AWS::Serverless::Api 128 | Properties: 129 | StageName: v1 130 | 131 | Outputs: 132 | # ServerlessRestApi is an implicit API created out of Events key under Serverless::Function 133 | # Find out more about other implicit resources you can reference within SAM 134 | # https://github.com/awslabs/serverless-application-model/blob/master/docs/internals/generated_resources.rst#api 135 | ApiGateway: 136 | Description: "API Gateway endpoint URL for the production stage" 137 | Value: !Sub "https://${ApiGateway}.execute-api.${AWS::Region}.${AWS::URLSuffix}/v1/" 138 | RequestHandler: 139 | Description: "Lambda Function ARN that handles the API requests" 140 | Value: !GetAtt RequestHandler.Arn 141 | Database: 142 | Description: "DynamoDB table" 143 | Value: !GetAtt Database.Arn 144 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project is powered by [Makefiles](https://www.gnu.org/software/make/) and environment variables. 4 | 5 | Most normal tasks can be run with a make recipe and the correct environment variables. For example: 6 | 7 | - `BROWSER=firefox make dev` 8 | - `USE_SYSTEM_TIME=false make test` 9 | 10 | For ease, it's recommended to add a `.env` file in the repository root with all your values set in there. The make recipes should complain if an environment variable is not set when it should be. 11 | 12 | A sample `.env` for local development and testing would be: 13 | 14 | ``` 15 | BROWSER=firefox 16 | LAMBDA_API_URL=http://localhost:9000/lambda-url/request-handler/v1 17 | TABLE_NAME=Discontent 18 | LOG_LEVEL=info 19 | RANDOMIZE_SCORES=false 20 | USE_LOCAL_DATABASE=true 21 | USE_SYSTEM_TIME=false 22 | HEADLESS=true 23 | CHROME_EXTENSION_ID=kglbdhongcfkafgfgofpgaehafnbgnhd 24 | FIREFOX_EXTENSION_ID={3f504997-80b7-467d-9d7b-e2fbb6d55e34} 25 | ``` 26 | 27 | The environment variables are: 28 | 29 | | Variable | Values | Description | 30 | | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | 31 | | BROWSER | `chrome` or `firefox` | When running `make dev`, it will open this browser | 32 | | LAMBDA_API_URL | `http://localhost:9000/lambda-url/request-handler/v1` or the production API `https://.execute-api.us-east-1.amazonaws.com/v1` | The endpoint for the extension to use when looking for scores or when voting | 33 | | TABLE_NAME | `Discontent` | The name of the database table, should always be `Discontent` | 34 | | LOG_LEVEL | `info`, `request_handler=trace`, ... | Logging levels for the lambda. See [here](https://docs.rs/env_logger/0.10.0/env_logger/#enabling-logging) for reference | 35 | | RANDOMIZE_SCORES | `true` or `false` | Whether the lambda should get scores from the database or generate random ones for development | 36 | | USE_LOCAL_DATABASE | `true` or `false` | Should the local lambda look at a local database or connect to the live production database | 37 | | USE_SYSTEM_TIME | `true` or `false` | Normally true but set to false when testing. Used to produce reproducible tests | 38 | | HEADLESS | `true` or `false` | Whether to run the end to end tests with headless browsers or not | 39 | | CHROME_EXTENSION_ID | | Local extension ID, used during end to end tests | 40 | | FIREFOX_EXTENSION_ID | | Local extension ID, used during end to end tests | 41 | | ACCESS_KEY | | AWS key used for deploying the backend | 42 | | SECRET_ACCESS_KEY | | AWS key used for deploying the backend | 43 | 44 | ## Building the extension 45 | 46 | You'll need: [npm](https://docs.npmjs.com/), [make](https://www.gnu.org/software/make/), [web-ext](https://github.com/mozilla/web-ext) 47 | 48 | Recommended root `.env` file: 49 | 50 | ``` 51 | LAMBDA_API_URL=https://2zeiy58jgk.execute-api.us-east-1.amazonaws.com/v1 52 | ``` 53 | 54 | Run `cd extension && make build` in the root directory. The builds will be in: 55 | 56 | - `/extension/dist/packed/discontent--.zip` 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 💚 Discontent 2 | 3 |
4 | Get Discontent for Chrome 5 | Get Discontent for Firefox 6 |
7 | 8 | [![Chrome Web Store](https://img.shields.io/chrome-web-store/users/kglbdhongcfkafgfgofpgaehafnbgnhd?label=Chrome%20users&color=blue)](https://chrome.google.com/webstore/detail/discontent/kglbdhongcfkafgfgofpgaehafnbgnhd) 9 | [![Mozilla Add-on](https://img.shields.io/amo/users/discontent?label=Firefox%20users&color=blue)](https://addons.mozilla.org/addon/discontent) 10 | [![Chrome Web Store](https://img.shields.io/chrome-web-store/stars/kglbdhongcfkafgfgofpgaehafnbgnhd?label=Chrome%20rating)](https://chrome.google.com/webstore/detail/discontent/kglbdhongcfkafgfgofpgaehafnbgnhd) 11 | [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/tom-barone/Discontent/continuous-integration.yml?label=Build)](https://github.com/tom-barone/Discontent/actions/workflows/continuous-integration.yml) 12 | 13 | This aims to be an open, crowdsourced browser extension to fight garbage content on the web. 14 | 15 | As of writing, if I type `difference between reddit and twitter` into [Google](https://www.google.com/search?q=difference%20between%20reddit%20and%20twitter), in the second result I get [this](https://askanydifference.com/difference-between-reddit-and-twitte/): 16 | 17 | > Reddit is dependent on the users of the website. They need to not be registered users. Whereas, Twitter is dependent on the general public who are registered profile users of Twitter along with their editors. 18 | 19 | Clearly this website is complete trash. With the advent of incredible AI tools like ChatGPT, I fear everyone out there that makes interesting and quality content will soon be buried by an immense ocean of AI generated, SEO optimised nonsense. All you lovely people producing good stuff, all I want to do is _find_ you. Use this extension to do so. 20 | 21 | Inspired by my **deep hate** of those AI customer support bots. And immoral SEO consultants. 22 | 23 | ## How it works? 24 | 25 | It's basically a like / dislike system, but for websites. 26 | 27 | Links with icons prepended 28 | 29 | - If a website is in the database and it has a bad score, it'll prepend all your links with a ❌. 30 | - If it's spicy and there's lots of votes both ways, you see a 🤨. 31 | - If it's awesome and deserves to be cherished, you see a 💚. 32 | - When there aren't enough ratings, nothing will show up. 33 | 34 | 💚 When you find a site that is a beautiful smiling breath of fresh air; use the extension popup to vote and share your love. 35 | 36 | ❌ When you find a site that kicks down your door, calls you stupid and holds out its greasy hand demanding ad revenue; use the popup to warn the next bloke. 37 | 38 | Works for Google, Bing & DuckDuckGo. 39 | 40 | ## Motivation 41 | 42 | Historically we relied on the big search engines to filter out the good from the bad, and generally speaking they did a pretty good job. But all this generative AI stuff has them spooked and now we have to wade through a lot of rubbish to find anything half decent. 43 | 44 | As an aside, rating platforms like this become useless when they start accepting money in exchange for ratings. My promise is that Discontent will never do this, so don't bother asking. 45 | 46 | ## Initial Data 47 | 48 | To get things off the ground, I've used the [front page submissions list](https://news.ycombinator.com/lists) from [HackerNews](https://news.ycombinator.com/news). I've gone back a year and taken the best 30 from each day to build a set of good links. 49 | 50 | There is also some excellent work done by the legends at [uBlacklist](https://iorate.github.io/ublacklist/docs) [[1](https://github.com/arosh/ublacklist-github-translation),[2](https://github.com/arosh/ublacklist-stackoverflow-translation),[3](https://github.com/franga2000/aliexpress-fake-sites)], [Content Farm List](https://github.com/wdmpa/content-farm-list) & [Content Farm Terminator](https://danny0838.github.io/content-farm-terminator/en/). Using those lists I've compiled a set of initial bad links as well. 51 | 52 | ## Configuration 53 | 54 | You can change the good / spicy / bad icons in the settings to something custom. Perhaps something like this? 55 | 56 | | Setting | Icon | 57 | | ------- | :--: | 58 | | Good | 😍 | 59 | | Spicy | 🌶 | 60 | | Bad | 🤮 | 61 | 62 | ## Technical 63 | 64 | When you fire up a search engine, it'll grab a list of all the relevant links on the page, then hit an API asking for their scores. 65 | When a user submits a vote it stores the vote, the timestamp and a randomly generated UUID for that user. No other user data is stored. 66 | 67 | The extension itself is written in Typescript. The web API runs off an AWS lambda built in Rust, and the whole thing is backed by DynamoDB. There's some piecemeal python & ruby scripts for testing. 68 | 69 | For the nuts and bolts, see the [Architecture](./docs/architecture.md) page. 70 | 71 | For instructions on building, testing & developing see the [Contributing](./docs/contributing.md) page. 72 | 73 | ## Privacy 74 | 75 | It's an important thing. Take a look at the [Privacy](./docs/privacy.md) page. 76 | -------------------------------------------------------------------------------- /end_to_end_tests/test/extension_popup.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../setup' 4 | 5 | GOOD_ICON_SETTING_SELECTOR = 'input[data-settings-page-target="goodInput"]' 6 | CONTROVERSIAL_ICON_SETTING_SELECTOR = 'input[data-settings-page-target="controversialInput"]' 7 | BAD_ICON_SETTING_SELECTOR = 'input[data-settings-page-target="badInput"]' 8 | SPINNER_SELECTOR = '[data-icon-setting-target="spinner"]' 9 | TICK_SELECTOR = '[data-icon-setting-target="check"]' 10 | ERROR_SELECTOR = '[data-icon-setting-target="error"]' 11 | 12 | class TestExtensionPopup < CapybaraTestCase 13 | def prepare(browser) 14 | Capybara.current_driver = browser 15 | sleep(5) # Give the browser time to open 16 | 17 | # Create a new window because of tab weirdness and the 18 | # extension auto opening the help page 19 | new_window = open_new_window 20 | switch_to_window new_window 21 | visit(extension_popup_url) 22 | sleep(15) # Give the popup time to register everything 23 | end 24 | 25 | def open_settings 26 | sleep(5) # enough for the open animation to finish 27 | click_on('Open settings') 28 | sleep(5) # enough for the open animation to finish 29 | end 30 | 31 | def close_settings 32 | sleep(5) # enough for the open animation to finish 33 | click_on('Close settings') 34 | sleep(5) # enough for the open animation to finish 35 | end 36 | 37 | # Run tests for multiple browsers 38 | BROWSERS_TO_TEST.each do |browser| 39 | define_method("test_#{browser}_the_popup_displays_correctly") do 40 | prepare(browser) 41 | assert_text('Discontent') 42 | assert_no_text('Settings') 43 | assert_no_text('Settings') 44 | assert_button('Upvote button') 45 | assert_button('Downvote button') 46 | assert_equal find_button('Upvote button').text.strip, '💚' 47 | assert_equal find_button('Downvote button').text.strip, '❌' 48 | end 49 | 50 | define_method("test_#{browser}_can_show_and_hide_the_settings_page") do 51 | prepare(browser) 52 | open_settings 53 | assert_text('Settings') 54 | assert_text('Good') 55 | assert_text('Spicy') 56 | assert_text('Bad') 57 | assert_button('Reset') 58 | assert_link('Icon list') 59 | good_icon_input = find(GOOD_ICON_SETTING_SELECTOR) 60 | controversial_icon_input = find(CONTROVERSIAL_ICON_SETTING_SELECTOR) 61 | bad_icon_input = find(BAD_ICON_SETTING_SELECTOR) 62 | assert_equal good_icon_input.value, '💚' 63 | assert_equal controversial_icon_input.value, '🤨' 64 | assert_equal bad_icon_input.value, '❌' 65 | 66 | close_settings 67 | assert_no_text('Settings') 68 | assert_no_text('Good') 69 | assert_no_text('Spicy') 70 | assert_no_text('Bad') 71 | assert_no_button('Reset') 72 | assert_no_link('Icon list') 73 | end 74 | 75 | define_method("test_#{browser}_changing_and_resetting_the_vote_settings") do 76 | prepare(browser) 77 | open_settings 78 | good_icon_input = find(GOOD_ICON_SETTING_SELECTOR) 79 | controversial_icon_input = find(CONTROVERSIAL_ICON_SETTING_SELECTOR) 80 | bad_icon_input = find(BAD_ICON_SETTING_SELECTOR) 81 | assert_equal good_icon_input.value, '💚' 82 | assert_equal controversial_icon_input.value, '🤨' 83 | assert_equal bad_icon_input.value, '❌' 84 | 85 | # Set new icons 86 | good_icon_input.set('g') 87 | controversial_icon_input.set('c') 88 | bad_icon_input.set('b') 89 | 90 | # Check that 3 checkmarks appear 91 | assert_selector(TICK_SELECTOR, count: 3) 92 | 93 | # After some time they should disappear 94 | sleep(3) 95 | assert_no_selector(TICK_SELECTOR) 96 | 97 | assert_equal good_icon_input.value, 'g' 98 | assert_equal controversial_icon_input.value, 'c' 99 | assert_equal bad_icon_input.value, 'b' 100 | 101 | # Check the vote buttons show the new icons 102 | close_settings 103 | assert_equal find_button('Upvote button').text.strip, 'g' 104 | assert_equal find_button('Downvote button').text.strip, 'b' 105 | 106 | # Open settings again and reset 107 | open_settings 108 | click_on('Reset') 109 | assert_selector(TICK_SELECTOR, count: 3) 110 | sleep(3) 111 | assert_no_selector(TICK_SELECTOR) 112 | 113 | assert_equal good_icon_input.value, '💚' 114 | assert_equal controversial_icon_input.value, '🤨' 115 | assert_equal bad_icon_input.value, '❌' 116 | close_settings 117 | assert_equal find_button('Upvote button').text.strip, '💚' 118 | assert_equal find_button('Downvote button').text.strip, '❌' 119 | end 120 | 121 | define_method("test_#{browser}_setting_the_vote_icons_to_invalid_values") do 122 | prepare(browser) 123 | open_settings 124 | good_icon_input = find(GOOD_ICON_SETTING_SELECTOR) 125 | controversial_icon_input = find(CONTROVERSIAL_ICON_SETTING_SELECTOR) 126 | bad_icon_input = find(BAD_ICON_SETTING_SELECTOR) 127 | 128 | # Set invalid 129 | good_icon_input.send_keys('gg') 130 | controversial_icon_input.send_keys(:backspace) 131 | bad_icon_input.send_keys('as') 132 | 133 | # Check that 3 error messages show 134 | assert_selector(ERROR_SELECTOR, count: 3) 135 | # Check the error messages are correct 136 | assert_selector('[title="Icon must be a single character"]', count: 3) 137 | 138 | # Check they don't disappear 139 | sleep(5) 140 | assert_selector(ERROR_SELECTOR, count: 3) 141 | end 142 | 143 | define_method("test_#{browser}_clicking_the_icon_list_link") do 144 | prepare(browser) 145 | open_settings 146 | new_window = window_opened_by { click_link 'Icon list' } 147 | sleep(5) 148 | within_window new_window do 149 | assert_text('Emoji List, v15.0') 150 | end 151 | end 152 | 153 | define_method("test_#{browser}_clicking_the_help_page") do 154 | prepare(browser) 155 | new_window = window_opened_by { click_on('Open help page') } 156 | within_window new_window do 157 | assert_text('Hello friend,') 158 | end 159 | end 160 | end 161 | 162 | def teardown 163 | # Run javascript to clear local storage 164 | # Need to deal with annoying browser / chrome name differences 165 | page.execute_script('let browser = window.browser || window.chrome; browser.storage.local.clear()') 166 | sleep(5) 167 | super 168 | end 169 | end 170 | -------------------------------------------------------------------------------- /extension/src/search_engine.ts: -------------------------------------------------------------------------------- 1 | import { Link } from "./types"; 2 | 3 | export function identify(hostname: string): SearchEngine | null { 4 | if (hostname.includes("www.google.")) { 5 | return new Google(); 6 | } else if (hostname.includes("www.bing.")) { 7 | return new Bing(); 8 | } else if ( 9 | hostname.includes("duckduckgo.com") || 10 | hostname.includes( 11 | // The onion version of DuckDuckGo 12 | "duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion", 13 | ) 14 | ) { 15 | return new DuckDuckGo(); 16 | } else { 17 | return null; 18 | } 19 | } 20 | 21 | export function isValidHTTPURL(url: string): boolean { 22 | try { 23 | const url_obj = new URL(url); 24 | return url_obj.protocol === "http:" || url_obj.protocol === "https:"; 25 | } catch (e) { 26 | return false; 27 | } 28 | } 29 | 30 | class SearchEngine { 31 | public async getAllLinks(): Promise { 32 | throw new Error("Not implemented"); 33 | } 34 | } 35 | 36 | class Google extends SearchEngine { 37 | public async getAllLinks(): Promise { 38 | // Get all the anchor tags on the page 39 | const anchor_tags = document.getElementsByTagName("a"); 40 | 41 | // Remove the google referral from the search results 42 | const search_links: SearchEngineLink[] = []; 43 | Array.from(anchor_tags).forEach((tag) => { 44 | // All google search results have an h3 tag below them 45 | const headerElement = tag.querySelector("h3"); 46 | if (headerElement != null && isValidHTTPURL(tag.href)) { 47 | search_links.push( 48 | // Remove any google referral stuff from the url 49 | new SearchEngineLink( 50 | this.removeGoogleReferral(tag.href), 51 | headerElement, 52 | ), 53 | ); 54 | } 55 | }); 56 | return Promise.resolve(search_links); 57 | } 58 | 59 | private removeGoogleReferral(url: string): string { 60 | const url_obj = new URL(url); 61 | const params = new URLSearchParams(url_obj.search); 62 | return params.get("url") ?? url; 63 | } 64 | } 65 | 66 | class Bing extends SearchEngine { 67 | public async getAllLinks(): Promise { 68 | // Get all the anchor tags on the page 69 | const anchor_tags = document.getElementsByTagName("a"); 70 | 71 | return Promise.allSettled( 72 | Array.from(anchor_tags).map(async (tag) => { 73 | // All bing results have no siblings and a parent h2 element 74 | if ( 75 | tag.parentElement?.tagName !== "H2" || 76 | tag.parentElement?.children.length !== 1 || 77 | !isValidHTTPURL(tag.href) 78 | ) { 79 | return Promise.reject("Not a bing result"); 80 | } 81 | // For SOME REASON bing wraps results in firefox with a referral url 82 | // BUT NOT for chrome! Why?? 83 | // firefox: https://www.bing.com/ck/a?!&&p=... 84 | // chrome: https://en.wikipedia.org/wiki/GitHub 85 | if (tag.href.includes("www.bing.com/ck")) { 86 | return this.fetchLinkFromBingReferral(tag.href).then( 87 | (link) => new SearchEngineLink(link, tag), 88 | ); 89 | } 90 | return Promise.resolve(new SearchEngineLink(tag.href, tag)); 91 | }), 92 | ).then((results) => { 93 | return results.reduce((acc, result) => { 94 | if (result.status === "fulfilled") { 95 | acc.push(result.value); 96 | } 97 | return acc; 98 | }, [] as SearchEngineLink[]); 99 | }); 100 | } 101 | 102 | private async fetchLinkFromBingReferral( 103 | referral_url: string, 104 | ): Promise { 105 | return await fetch(referral_url) 106 | .then((response) => response.text()) 107 | .then((text) => { 108 | const search_result = text.match(/var u = "(.*)"/); 109 | if (search_result === null) { 110 | return Promise.reject("Could not find link"); 111 | } 112 | if (search_result.length !== 2) { 113 | return Promise.reject("Regex returned unexepected results"); 114 | } 115 | return search_result[1]; 116 | }); 117 | } 118 | } 119 | 120 | class DuckDuckGo extends SearchEngine { 121 | public async getAllLinks(): Promise { 122 | // TODO: Fix this hack and find a way to properly wait for DDG page links to load 123 | await new Promise((resolve) => setTimeout(resolve, 1000)); 124 | // Get all the anchor tags on the page 125 | const anchor_tags = document.getElementsByTagName("a"); 126 | const search_links: SearchEngineLink[] = []; 127 | Array.from(anchor_tags).forEach((tag) => { 128 | // DuckDuckGo results are all under h2 tags 129 | if ( 130 | tag.parentElement?.tagName === "H2" && 131 | tag.parentElement?.children.length === 1 && 132 | isValidHTTPURL(tag.href) 133 | ) { 134 | // For the header element: 135 | // The HTML version of DDG (html.duckduckgo.com) uses the tag itself 136 | // The regular version uses a child of the tag 137 | let headerElement: HTMLElement; 138 | if (window.location.hostname.includes("html.duckduckgo.com")) { 139 | headerElement = tag; 140 | } else { 141 | headerElement = tag.children[0] as HTMLElement; 142 | } 143 | if (headerElement != null) { 144 | search_links.push( 145 | new SearchEngineLink( 146 | this.removeDuckDuckGoReferral(tag.href), 147 | headerElement, 148 | ), 149 | ); 150 | } 151 | } 152 | }); 153 | return Promise.resolve(search_links); 154 | } 155 | 156 | private removeDuckDuckGoReferral(url: string): string { 157 | const url_obj = new URL(url); 158 | const params = new URLSearchParams(url_obj.search); 159 | return params.get("uddg") ?? url; 160 | } 161 | } 162 | 163 | export class SearchEngineLink { 164 | private _link: Link; 165 | private _textElement: HTMLElement; 166 | 167 | constructor(url: string, textElement: HTMLElement) { 168 | this._link = { hostname: new URL(url).hostname }; 169 | this._textElement = textElement; 170 | } 171 | 172 | get link(): Link { 173 | return this._link; 174 | } 175 | 176 | public addSymbol(symbol: string): void { 177 | if (this._textElement.innerText.startsWith(symbol)) { 178 | // Do nothing if there's already a symbol 179 | return; 180 | } 181 | this._textElement.innerText = `${symbol} ${this._textElement.innerText}`; 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /backend/integration_tests/test_backend.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import boto3 3 | import requests 4 | import os 5 | import json 6 | 7 | API_ENDPOINT = 'http://localhost:9000/lambda-url/request-handler' 8 | DATABASE_ENDPOINT = 'http://localhost:8000' 9 | SETTINGS_KEY = {'PK': {'S': 'settings'}, 'SK': {'S': 'settings'}} 10 | TABLE_NAME = os.environ['TABLE_NAME'] 11 | 12 | 13 | @pytest.fixture 14 | def dynamodb(): 15 | return boto3.client( 16 | 'dynamodb', 17 | endpoint_url=DATABASE_ENDPOINT, 18 | ) 19 | 20 | 21 | def update_maximum_votes_per_user_per_day(value, dynamodb): 22 | dynamodb.update_item( 23 | TableName=TABLE_NAME, 24 | Key=SETTINGS_KEY, 25 | UpdateExpression='SET maximum_votes_per_user_per_day = :val', 26 | ExpressionAttributeValues={':val': { 27 | 'N': str(value) 28 | }}, 29 | ) 30 | 31 | 32 | def set_is_banned(user_id, value, dynamodb): 33 | dynamodb.update_item( 34 | TableName=TABLE_NAME, 35 | Key={ 36 | 'PK': { 37 | 'S': f'user#{user_id}' 38 | }, 39 | 'SK': { 40 | 'S': f'user#{user_id}' 41 | } 42 | }, 43 | UpdateExpression='SET is_banned = :val', 44 | ExpressionAttributeValues={':val': { 45 | 'BOOL': value 46 | }}, 47 | ) 48 | 49 | 50 | def update_voting_is_disabled(value, dynamodb): 51 | dynamodb.update_item( 52 | TableName=TABLE_NAME, 53 | Key=SETTINGS_KEY, 54 | UpdateExpression='SET voting_is_disabled = :val', 55 | ExpressionAttributeValues={':val': { 56 | 'BOOL': value 57 | }}, 58 | ) 59 | 60 | 61 | def get_scores(hostnames): 62 | links = [{'hostname': hostname} for hostname in hostnames] 63 | params = {'from': json.dumps({'links': links})} 64 | response = requests.get(f'{API_ENDPOINT}/v1/scores', params=params) 65 | assert response.status_code == 200 66 | return [x['score'] for x in response.json()] 67 | 68 | 69 | def vote(hostname, value, user_id): 70 | vote = {"link": {"hostname": hostname}, "value": value, "user_id": user_id} 71 | response = requests.post(f'{API_ENDPOINT}/v1/vote', json=vote) 72 | assert response.status_code == 200 73 | return 74 | 75 | 76 | def assert_vote_fails(hostname, value, user_id, reason): 77 | vote = {"link": {"hostname": hostname}, "value": value, "user_id": user_id} 78 | response = requests.post(f'{API_ENDPOINT}/v1/vote', json=vote) 79 | assert response.status_code == 500 80 | assert response.json()['error'] == reason 81 | return 82 | 83 | 84 | def test_backend(dynamodb): 85 | # Make sure we're using a stubbed out time 86 | assert os.environ['USE_SYSTEM_TIME'] == 'false' 87 | 88 | # Simple test to make sure we can get a score 89 | assert get_scores(['a.com', 'b.com']) == ['NoScore', 'NoScore'] 90 | 91 | # Check that the Good scoring works 92 | for i in range(19): 93 | vote('good.com', 1, f"beda{i:04}-0822-4342-0990-b92d94d9489a") 94 | assert get_scores(['good.com']) == ['NoScore'] 95 | for i in range(2): 96 | vote('good.com', 1, f"beda{i:04}-1822-4342-0990-b92d94d9489a") 97 | assert get_scores(['good.com']) == ['Good'] 98 | assert get_scores(['good.com']) == ['Good'] 99 | 100 | # Check that the Bad scoring works 101 | for i in range(9): 102 | vote('bad.com', -1, f"beda{i:04}-0822-4342-0990-b92d94d9489a") 103 | assert get_scores(['bad.com']) == ['NoScore'] 104 | for i in range(2): 105 | vote('bad.com', -1, f"beda{i:04}-1822-4342-0990-b92d94d9489a") 106 | assert get_scores(['bad.com']) == ['Bad'] 107 | assert get_scores(['bad.com']) == ['Bad'] 108 | 109 | # Check that the Controversial scoring works 110 | for i in range(30): 111 | vote('controversial.com', -1, 112 | f"beda{i:04}-0822-4342-0990-b92d94d9489a") 113 | assert get_scores(['controversial.com']) == ['Bad'] 114 | for i in range(25): 115 | vote('controversial.com', 1, f"beda{i:04}-1822-4342-0990-b92d94d9489a") 116 | assert get_scores(['controversial.com']) == ['Controversial'] 117 | assert get_scores(['controversial.com']) == ['Controversial'] 118 | 119 | # CHeck that max votes per user per day works 120 | for i in range(10): 121 | # 10 votes no worries 122 | vote(f"shill{i}.com", 1, "beda9999-0822-4342-0990-b92d94d9489a") 123 | for i in range(10): 124 | # Changed the votes, still no worries 125 | vote(f"shill{i}.com", -1, "beda9999-0822-4342-0990-b92d94d9489a") 126 | for i in range(10, 15): 127 | # 11th vote for new sites fail 128 | assert_vote_fails(f"shill{i}.com", 1, 129 | "beda9999-0822-4342-0990-b92d94d9489a", 130 | "User has voted too many times today") 131 | update_maximum_votes_per_user_per_day(15, dynamodb) 132 | for i in range(10, 15): 133 | # Can now have 15 a day, so no worries 134 | vote(f"shill{i}.com", 1, "beda9999-0822-4342-0990-b92d94d9489a") 135 | for i in range(16, 20): 136 | # Any more though will fail 137 | assert_vote_fails(f"shill{i}.com", 1, 138 | "beda9999-0822-4342-0990-b92d94d9489a", 139 | "User has voted too many times today") 140 | update_maximum_votes_per_user_per_day(10, dynamodb) 141 | 142 | # Check that voting can be disabled across the board 143 | update_voting_is_disabled(True, dynamodb) 144 | for i in range(5): 145 | assert_vote_fails('good.com', 1, 146 | f"beda{i:04}-0822-4342-0990-b92d94d9489a", 147 | "Voting is disabled") 148 | assert_vote_fails('bad.com', 1, 149 | f"beda{i:04}-0822-4342-0990-b92d94d9489a", 150 | "Voting is disabled") 151 | assert_vote_fails('controversial.com', 1, 152 | f"beda{i:04}-0822-4342-0990-b92d94d9489a", 153 | "Voting is disabled") 154 | assert_vote_fails('random-site.com', 1, 155 | f"beda{i:04}-0822-4342-0990-b92d94d9489a", 156 | "Voting is disabled") 157 | update_voting_is_disabled(False, dynamodb) 158 | 159 | # Check that banned users can't vote 160 | user = "beda0000-5822-4342-0990-b92d94d9489a" 161 | vote('good.com', 1, user) # all good 162 | set_is_banned(user, True, dynamodb) 163 | assert_vote_fails('good.com', 1, user, "User is banned") 164 | assert_vote_fails('bad.com', 1, user, "User is banned") 165 | assert_vote_fails('other.com', 1, user, "User is banned") 166 | set_is_banned(user, False, dynamodb) 167 | vote('good.com', -1, user) # all good again 168 | vote('bad.com', -1, user) # all good again 169 | vote('other.com', -1, user) # all good again 170 | 171 | # TODO: test incorrectly formatted requests 172 | -------------------------------------------------------------------------------- /extension/src/manifest.firefox.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | "name": "Discontent", 4 | "version": "1.5.1", 5 | "description": "Open source extension to fight garbage content on the web", 6 | "icons": { 7 | "48": "icons/green_heart-48.png" 8 | }, 9 | "permissions": [ 10 | "tabs", 11 | "storage" 12 | ], 13 | "browser_action": { 14 | "default_icon": "icons/green_heart-48.png", 15 | "default_title": "Discontent", 16 | "default_popup": "popup/popup.html" 17 | }, 18 | "content_scripts": [ 19 | { 20 | "matches": [ 21 | "*://*.google.com/*", 22 | "*://*.google.ad/*", 23 | "*://*.google.ae/*", 24 | "*://*.google.com.af/*", 25 | "*://*.google.com.ag/*", 26 | "*://*.google.com.ai/*", 27 | "*://*.google.al/*", 28 | "*://*.google.am/*", 29 | "*://*.google.co.ao/*", 30 | "*://*.google.com.ar/*", 31 | "*://*.google.as/*", 32 | "*://*.google.at/*", 33 | "*://*.google.com.au/*", 34 | "*://*.google.az/*", 35 | "*://*.google.ba/*", 36 | "*://*.google.com.bd/*", 37 | "*://*.google.be/*", 38 | "*://*.google.bf/*", 39 | "*://*.google.bg/*", 40 | "*://*.google.com.bh/*", 41 | "*://*.google.bi/*", 42 | "*://*.google.bj/*", 43 | "*://*.google.com.bn/*", 44 | "*://*.google.com.bo/*", 45 | "*://*.google.com.br/*", 46 | "*://*.google.bs/*", 47 | "*://*.google.bt/*", 48 | "*://*.google.co.bw/*", 49 | "*://*.google.by/*", 50 | "*://*.google.com.bz/*", 51 | "*://*.google.ca/*", 52 | "*://*.google.cd/*", 53 | "*://*.google.cf/*", 54 | "*://*.google.cg/*", 55 | "*://*.google.ch/*", 56 | "*://*.google.ci/*", 57 | "*://*.google.co.ck/*", 58 | "*://*.google.cl/*", 59 | "*://*.google.cm/*", 60 | "*://*.google.cn/*", 61 | "*://*.google.com.co/*", 62 | "*://*.google.co.cr/*", 63 | "*://*.google.com.cu/*", 64 | "*://*.google.cv/*", 65 | "*://*.google.com.cy/*", 66 | "*://*.google.cz/*", 67 | "*://*.google.de/*", 68 | "*://*.google.dj/*", 69 | "*://*.google.dk/*", 70 | "*://*.google.dm/*", 71 | "*://*.google.com.do/*", 72 | "*://*.google.dz/*", 73 | "*://*.google.com.ec/*", 74 | "*://*.google.ee/*", 75 | "*://*.google.com.eg/*", 76 | "*://*.google.es/*", 77 | "*://*.google.com.et/*", 78 | "*://*.google.fi/*", 79 | "*://*.google.com.fj/*", 80 | "*://*.google.fm/*", 81 | "*://*.google.fr/*", 82 | "*://*.google.ga/*", 83 | "*://*.google.ge/*", 84 | "*://*.google.gg/*", 85 | "*://*.google.com.gh/*", 86 | "*://*.google.com.gi/*", 87 | "*://*.google.gl/*", 88 | "*://*.google.gm/*", 89 | "*://*.google.gr/*", 90 | "*://*.google.com.gt/*", 91 | "*://*.google.gy/*", 92 | "*://*.google.com.hk/*", 93 | "*://*.google.hn/*", 94 | "*://*.google.hr/*", 95 | "*://*.google.ht/*", 96 | "*://*.google.hu/*", 97 | "*://*.google.co.id/*", 98 | "*://*.google.ie/*", 99 | "*://*.google.co.il/*", 100 | "*://*.google.im/*", 101 | "*://*.google.co.in/*", 102 | "*://*.google.iq/*", 103 | "*://*.google.is/*", 104 | "*://*.google.it/*", 105 | "*://*.google.je/*", 106 | "*://*.google.com.jm/*", 107 | "*://*.google.jo/*", 108 | "*://*.google.co.jp/*", 109 | "*://*.google.co.ke/*", 110 | "*://*.google.com.kh/*", 111 | "*://*.google.ki/*", 112 | "*://*.google.kg/*", 113 | "*://*.google.co.kr/*", 114 | "*://*.google.com.kw/*", 115 | "*://*.google.kz/*", 116 | "*://*.google.la/*", 117 | "*://*.google.com.lb/*", 118 | "*://*.google.li/*", 119 | "*://*.google.lk/*", 120 | "*://*.google.co.ls/*", 121 | "*://*.google.lt/*", 122 | "*://*.google.lu/*", 123 | "*://*.google.lv/*", 124 | "*://*.google.com.ly/*", 125 | "*://*.google.co.ma/*", 126 | "*://*.google.md/*", 127 | "*://*.google.me/*", 128 | "*://*.google.mg/*", 129 | "*://*.google.mk/*", 130 | "*://*.google.ml/*", 131 | "*://*.google.com.mm/*", 132 | "*://*.google.mn/*", 133 | "*://*.google.ms/*", 134 | "*://*.google.com.mt/*", 135 | "*://*.google.mu/*", 136 | "*://*.google.mv/*", 137 | "*://*.google.mw/*", 138 | "*://*.google.com.mx/*", 139 | "*://*.google.com.my/*", 140 | "*://*.google.co.mz/*", 141 | "*://*.google.com.na/*", 142 | "*://*.google.com.ng/*", 143 | "*://*.google.com.ni/*", 144 | "*://*.google.ne/*", 145 | "*://*.google.nl/*", 146 | "*://*.google.no/*", 147 | "*://*.google.com.np/*", 148 | "*://*.google.nr/*", 149 | "*://*.google.nu/*", 150 | "*://*.google.co.nz/*", 151 | "*://*.google.com.om/*", 152 | "*://*.google.com.pa/*", 153 | "*://*.google.com.pe/*", 154 | "*://*.google.com.pg/*", 155 | "*://*.google.com.ph/*", 156 | "*://*.google.com.pk/*", 157 | "*://*.google.pl/*", 158 | "*://*.google.pn/*", 159 | "*://*.google.com.pr/*", 160 | "*://*.google.ps/*", 161 | "*://*.google.pt/*", 162 | "*://*.google.com.py/*", 163 | "*://*.google.com.qa/*", 164 | "*://*.google.ro/*", 165 | "*://*.google.ru/*", 166 | "*://*.google.rw/*", 167 | "*://*.google.com.sa/*", 168 | "*://*.google.com.sb/*", 169 | "*://*.google.sc/*", 170 | "*://*.google.se/*", 171 | "*://*.google.com.sg/*", 172 | "*://*.google.sh/*", 173 | "*://*.google.si/*", 174 | "*://*.google.sk/*", 175 | "*://*.google.com.sl/*", 176 | "*://*.google.sn/*", 177 | "*://*.google.so/*", 178 | "*://*.google.sm/*", 179 | "*://*.google.sr/*", 180 | "*://*.google.st/*", 181 | "*://*.google.com.sv/*", 182 | "*://*.google.td/*", 183 | "*://*.google.tg/*", 184 | "*://*.google.co.th/*", 185 | "*://*.google.com.tj/*", 186 | "*://*.google.tl/*", 187 | "*://*.google.tm/*", 188 | "*://*.google.tn/*", 189 | "*://*.google.to/*", 190 | "*://*.google.com.tr/*", 191 | "*://*.google.tt/*", 192 | "*://*.google.com.tw/*", 193 | "*://*.google.co.tz/*", 194 | "*://*.google.com.ua/*", 195 | "*://*.google.co.ug/*", 196 | "*://*.google.co.uk/*", 197 | "*://*.google.com.uy/*", 198 | "*://*.google.co.uz/*", 199 | "*://*.google.com.vc/*", 200 | "*://*.google.co.ve/*", 201 | "*://*.google.vg/*", 202 | "*://*.google.co.vi/*", 203 | "*://*.google.com.vn/*", 204 | "*://*.google.vu/*", 205 | "*://*.google.ws/*", 206 | "*://*.google.rs/*", 207 | "*://*.google.co.za/*", 208 | "*://*.google.co.zm/*", 209 | "*://*.google.co.zw/*", 210 | "*://*.google.cat/*", 211 | "*://*.bing.com/*", 212 | "*://duckduckgo.com/*", 213 | "*://*.duckduckgo.com/*", 214 | "*://duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion/*" 215 | ], 216 | "js": [ 217 | "content_scripts/index.js" 218 | ], 219 | "run_at": "document_end" 220 | } 221 | ], 222 | "background": { 223 | "scripts": [ 224 | "background_scripts/index.js" 225 | ] 226 | }, 227 | "browser_specific_settings": { 228 | "gecko": { 229 | "id": "{3f504997-80b7-467d-9d7b-e2fbb6d55e34}" 230 | } 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /extension/src/manifest.chrome.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 3, 3 | "name": "Discontent", 4 | "version": "1.5.1", 5 | "description": "Open source extension to fight garbage content on the web", 6 | "icons": { 7 | "48": "icons/green_heart-48.png" 8 | }, 9 | "permissions": [ 10 | "tabs", 11 | "storage" 12 | ], 13 | "action": { 14 | "default_icon": "icons/green_heart-48.png", 15 | "default_title": "Discontent", 16 | "default_popup": "popup/popup.html" 17 | }, 18 | "content_scripts": [ 19 | { 20 | "matches": [ 21 | "*://*.google.com/*", 22 | "*://*.google.ad/*", 23 | "*://*.google.ae/*", 24 | "*://*.google.com.af/*", 25 | "*://*.google.com.ag/*", 26 | "*://*.google.com.ai/*", 27 | "*://*.google.al/*", 28 | "*://*.google.am/*", 29 | "*://*.google.co.ao/*", 30 | "*://*.google.com.ar/*", 31 | "*://*.google.as/*", 32 | "*://*.google.at/*", 33 | "*://*.google.com.au/*", 34 | "*://*.google.az/*", 35 | "*://*.google.ba/*", 36 | "*://*.google.com.bd/*", 37 | "*://*.google.be/*", 38 | "*://*.google.bf/*", 39 | "*://*.google.bg/*", 40 | "*://*.google.com.bh/*", 41 | "*://*.google.bi/*", 42 | "*://*.google.bj/*", 43 | "*://*.google.com.bn/*", 44 | "*://*.google.com.bo/*", 45 | "*://*.google.com.br/*", 46 | "*://*.google.bs/*", 47 | "*://*.google.bt/*", 48 | "*://*.google.co.bw/*", 49 | "*://*.google.by/*", 50 | "*://*.google.com.bz/*", 51 | "*://*.google.ca/*", 52 | "*://*.google.cd/*", 53 | "*://*.google.cf/*", 54 | "*://*.google.cg/*", 55 | "*://*.google.ch/*", 56 | "*://*.google.ci/*", 57 | "*://*.google.co.ck/*", 58 | "*://*.google.cl/*", 59 | "*://*.google.cm/*", 60 | "*://*.google.cn/*", 61 | "*://*.google.com.co/*", 62 | "*://*.google.co.cr/*", 63 | "*://*.google.com.cu/*", 64 | "*://*.google.cv/*", 65 | "*://*.google.com.cy/*", 66 | "*://*.google.cz/*", 67 | "*://*.google.de/*", 68 | "*://*.google.dj/*", 69 | "*://*.google.dk/*", 70 | "*://*.google.dm/*", 71 | "*://*.google.com.do/*", 72 | "*://*.google.dz/*", 73 | "*://*.google.com.ec/*", 74 | "*://*.google.ee/*", 75 | "*://*.google.com.eg/*", 76 | "*://*.google.es/*", 77 | "*://*.google.com.et/*", 78 | "*://*.google.fi/*", 79 | "*://*.google.com.fj/*", 80 | "*://*.google.fm/*", 81 | "*://*.google.fr/*", 82 | "*://*.google.ga/*", 83 | "*://*.google.ge/*", 84 | "*://*.google.gg/*", 85 | "*://*.google.com.gh/*", 86 | "*://*.google.com.gi/*", 87 | "*://*.google.gl/*", 88 | "*://*.google.gm/*", 89 | "*://*.google.gr/*", 90 | "*://*.google.com.gt/*", 91 | "*://*.google.gy/*", 92 | "*://*.google.com.hk/*", 93 | "*://*.google.hn/*", 94 | "*://*.google.hr/*", 95 | "*://*.google.ht/*", 96 | "*://*.google.hu/*", 97 | "*://*.google.co.id/*", 98 | "*://*.google.ie/*", 99 | "*://*.google.co.il/*", 100 | "*://*.google.im/*", 101 | "*://*.google.co.in/*", 102 | "*://*.google.iq/*", 103 | "*://*.google.is/*", 104 | "*://*.google.it/*", 105 | "*://*.google.je/*", 106 | "*://*.google.com.jm/*", 107 | "*://*.google.jo/*", 108 | "*://*.google.co.jp/*", 109 | "*://*.google.co.ke/*", 110 | "*://*.google.com.kh/*", 111 | "*://*.google.ki/*", 112 | "*://*.google.kg/*", 113 | "*://*.google.co.kr/*", 114 | "*://*.google.com.kw/*", 115 | "*://*.google.kz/*", 116 | "*://*.google.la/*", 117 | "*://*.google.com.lb/*", 118 | "*://*.google.li/*", 119 | "*://*.google.lk/*", 120 | "*://*.google.co.ls/*", 121 | "*://*.google.lt/*", 122 | "*://*.google.lu/*", 123 | "*://*.google.lv/*", 124 | "*://*.google.com.ly/*", 125 | "*://*.google.co.ma/*", 126 | "*://*.google.md/*", 127 | "*://*.google.me/*", 128 | "*://*.google.mg/*", 129 | "*://*.google.mk/*", 130 | "*://*.google.ml/*", 131 | "*://*.google.com.mm/*", 132 | "*://*.google.mn/*", 133 | "*://*.google.ms/*", 134 | "*://*.google.com.mt/*", 135 | "*://*.google.mu/*", 136 | "*://*.google.mv/*", 137 | "*://*.google.mw/*", 138 | "*://*.google.com.mx/*", 139 | "*://*.google.com.my/*", 140 | "*://*.google.co.mz/*", 141 | "*://*.google.com.na/*", 142 | "*://*.google.com.ng/*", 143 | "*://*.google.com.ni/*", 144 | "*://*.google.ne/*", 145 | "*://*.google.nl/*", 146 | "*://*.google.no/*", 147 | "*://*.google.com.np/*", 148 | "*://*.google.nr/*", 149 | "*://*.google.nu/*", 150 | "*://*.google.co.nz/*", 151 | "*://*.google.com.om/*", 152 | "*://*.google.com.pa/*", 153 | "*://*.google.com.pe/*", 154 | "*://*.google.com.pg/*", 155 | "*://*.google.com.ph/*", 156 | "*://*.google.com.pk/*", 157 | "*://*.google.pl/*", 158 | "*://*.google.pn/*", 159 | "*://*.google.com.pr/*", 160 | "*://*.google.ps/*", 161 | "*://*.google.pt/*", 162 | "*://*.google.com.py/*", 163 | "*://*.google.com.qa/*", 164 | "*://*.google.ro/*", 165 | "*://*.google.ru/*", 166 | "*://*.google.rw/*", 167 | "*://*.google.com.sa/*", 168 | "*://*.google.com.sb/*", 169 | "*://*.google.sc/*", 170 | "*://*.google.se/*", 171 | "*://*.google.com.sg/*", 172 | "*://*.google.sh/*", 173 | "*://*.google.si/*", 174 | "*://*.google.sk/*", 175 | "*://*.google.com.sl/*", 176 | "*://*.google.sn/*", 177 | "*://*.google.so/*", 178 | "*://*.google.sm/*", 179 | "*://*.google.sr/*", 180 | "*://*.google.st/*", 181 | "*://*.google.com.sv/*", 182 | "*://*.google.td/*", 183 | "*://*.google.tg/*", 184 | "*://*.google.co.th/*", 185 | "*://*.google.com.tj/*", 186 | "*://*.google.tl/*", 187 | "*://*.google.tm/*", 188 | "*://*.google.tn/*", 189 | "*://*.google.to/*", 190 | "*://*.google.com.tr/*", 191 | "*://*.google.tt/*", 192 | "*://*.google.com.tw/*", 193 | "*://*.google.co.tz/*", 194 | "*://*.google.com.ua/*", 195 | "*://*.google.co.ug/*", 196 | "*://*.google.co.uk/*", 197 | "*://*.google.com.uy/*", 198 | "*://*.google.co.uz/*", 199 | "*://*.google.com.vc/*", 200 | "*://*.google.co.ve/*", 201 | "*://*.google.vg/*", 202 | "*://*.google.co.vi/*", 203 | "*://*.google.com.vn/*", 204 | "*://*.google.vu/*", 205 | "*://*.google.ws/*", 206 | "*://*.google.rs/*", 207 | "*://*.google.co.za/*", 208 | "*://*.google.co.zm/*", 209 | "*://*.google.co.zw/*", 210 | "*://*.google.cat/*", 211 | "*://*.bing.com/*", 212 | "*://duckduckgo.com/*", 213 | "*://*.duckduckgo.com/*" 214 | ], 215 | "js": [ 216 | "content_scripts/index.js" 217 | ], 218 | "run_at": "document_end" 219 | } 220 | ], 221 | "background": { 222 | "service_worker": "background_scripts/index.js" 223 | }, 224 | "key": "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA4VcYvE9S56OTCkJfJp5P2U42/rl9ANnGgIGFmvEA2QwToqtLPjy3bm19OAL5ENLL0qR8C3BNOo05IjpL9U09dsau/9te+ctg38+o7ixn3/FzOTK4RbKVtsW+VtOriZMqi2PTVHKZFbQAXDacAJ1xHrhC0bf12WA4UQKiA8visG/sGBO0SF/d/e/60u/+FpQ48ofCrLFOpKQ6B3VeNFHfVE4GgilzUa1pmguI/8ThanHluj0yWcysQI65Hk5dQCd6R5I/ZJz+vI4OgIE9z4H8RGJ9tpf9uKgPQ9vbn7HpPvkRTZFN+rwPysnA6YacIIzVM4XtOUznm12HWI4NR57ERwIDAQAB" 225 | } 226 | -------------------------------------------------------------------------------- /extension/src/popup/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Discontent 6 | 7 | 8 | 9 |
10 | 11 |

Discontent

12 |
13 | 22 | 31 |
32 |
33 |
34 | 62 | 90 |
91 |
92 | 93 |
233 | 234 | 235 | -------------------------------------------------------------------------------- /backend/lambda/src/routes.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | dynamodb::*, 5 | scoring::*, 6 | types::{database::*, Config}, 7 | validate::{validate_get_scores_request, validate_vote_request}, 8 | }; 9 | use aws_sdk_dynamodb::{ 10 | model::{AttributeValue::*, KeysAndAttributes, TransactWriteItem}, 11 | Client, 12 | }; 13 | use chrono::{SecondsFormat, Utc}; 14 | use lambda_http::{Body, Error, Request, RequestExt}; 15 | use tracing::*; 16 | use validator::Validate; 17 | 18 | #[instrument(level = "trace")] 19 | pub async fn vote( 20 | request: Request, 21 | config: &Config, 22 | dynamo_db_client: &Client, 23 | ) -> Result { 24 | let vote_request = validate_vote_request(request.body())?; 25 | 26 | let created_at: String; 27 | if config.use_system_time { 28 | // Always use "2018-01-26T18:30:09Z" format 29 | created_at = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); 30 | } else { 31 | created_at = "2022-07-27T12:30:00Z".to_string(); // For testing, <3 bel 32 | } 33 | 34 | let vote = Vote { 35 | link: vote_request.link.clone(), 36 | user_id: vote_request.user_id.clone(), 37 | value: vote_request.value, 38 | created_at, 39 | }; 40 | // Extract day string `2023-02-09` 41 | let day = vote.created_at.clone()[..10].to_string(); 42 | 43 | info!("New vote request: {:?}", vote); 44 | 45 | // Get settings and user history 46 | let settings_and_user_request = dynamo_db_client 47 | .batch_get_item() 48 | .request_items( 49 | &config.table_name, 50 | KeysAndAttributes::builder() 51 | .set_keys(Some(vec![ 52 | get_settings(), 53 | get_user(&vote.user_id), 54 | get_daily_user_history(&day, &vote.user_id), 55 | get_vote(&vote), 56 | ])) 57 | .build(), 58 | ) 59 | .send() 60 | .await?; 61 | debug!( 62 | "Settings and User History response: {:#?}", 63 | settings_and_user_request 64 | ); 65 | 66 | let mut user_does_not_exist = true; 67 | let mut user_is_banned = false; 68 | let mut first_vote_on_link_for_user = true; 69 | let mut voting_is_disabled = false; 70 | let mut user_has_reached_max_vote_limit_for_today = false; 71 | let mut maximum_votes_per_user_per_day: u32 = 10; 72 | let mut old_vote: Option = None; 73 | for item in settings_and_user_request 74 | .responses() 75 | .ok_or("DynamoDB request error")? 76 | .get(&config.table_name) 77 | .ok_or("DynamoDB request error")? 78 | .iter() 79 | { 80 | let entity_type = item 81 | .get("entity_type") 82 | .ok_or("No entity_type")? 83 | .as_s() 84 | .or(Err("entity_type is not a string"))?; 85 | match entity_type.as_str() { 86 | "Settings" => { 87 | let settings = Settings::try_from(item)?; 88 | voting_is_disabled = settings.voting_is_disabled; 89 | maximum_votes_per_user_per_day = settings.maximum_votes_per_user_per_day; 90 | } 91 | "User" => { 92 | user_does_not_exist = false; 93 | let user = User::try_from(item)?; 94 | user_is_banned = user.is_banned; 95 | } 96 | "UserHistory" => { 97 | let daily_user_history = UserHistory::try_from(item)?; 98 | // Assumes the settings have already been retrieved 99 | user_has_reached_max_vote_limit_for_today = 100 | daily_user_history.count_of_votes >= maximum_votes_per_user_per_day; 101 | } 102 | "Vote" => { 103 | first_vote_on_link_for_user = false; 104 | old_vote = Some(Vote::try_from(item)?); 105 | } 106 | _ => { 107 | return Err("Unknown entity_type".into()); 108 | } 109 | } 110 | } 111 | 112 | if user_is_banned { 113 | return Err("User is banned".into()); 114 | } 115 | if voting_is_disabled { 116 | return Err("Voting is disabled".into()); 117 | } 118 | if first_vote_on_link_for_user && user_has_reached_max_vote_limit_for_today { 119 | return Err("User has voted too many times today".into()); 120 | } 121 | 122 | let mut write_requests: Vec = vec![]; 123 | if user_does_not_exist { 124 | write_requests.push(put_new_user(&vote.user_id, &vote.created_at, config)); 125 | } 126 | if first_vote_on_link_for_user { 127 | write_requests.push(put_vote(&vote, config)); 128 | write_requests.push(update_link_detail(&vote.link, vote.value, config)); 129 | write_requests.push(increment_link_history(&day, &vote, config)); 130 | write_requests.push(increment_user_history(&day, &vote, config)); 131 | } else if let Some(old_vote) = old_vote { 132 | let old_day = old_vote.created_at[..10].to_string(); 133 | write_requests.push(put_vote(&vote, config)); 134 | write_requests.push(update_existing_link_detail( 135 | &vote.link, 136 | -old_vote.value + vote.value, // The change in vote value 137 | config, 138 | )); 139 | // If updates are on the same day 140 | if old_day == day { 141 | // Update old day 142 | write_requests.push(update_link_history(&day, &old_vote, &vote, config)); 143 | write_requests.push(update_user_history(&day, &old_vote, &vote, config)); 144 | } else { 145 | // Revert old day, increment new day 146 | write_requests.push(revert_link_history(&old_vote, &vote.link, config)); 147 | write_requests.push(revert_user_history(&old_vote, &vote.user_id, config)); 148 | write_requests.push(increment_link_history(&day, &vote, config)); 149 | write_requests.push(increment_user_history(&day, &vote, config)); 150 | } 151 | } 152 | 153 | let write_result = dynamo_db_client 154 | .transact_write_items() 155 | .set_transact_items(Some(write_requests)) 156 | .send() 157 | .await?; 158 | 159 | debug!("Successfully submitted vote [result={:?}]", write_result); 160 | 161 | return Ok(Body::Empty); 162 | } 163 | 164 | #[instrument(level = "trace")] 165 | pub async fn scores( 166 | request: Request, 167 | config: &Config, 168 | dynamo_db_client: &Client, 169 | ) -> Result { 170 | // Extract the links from the query parameters and validate them 171 | let scores_request = validate_get_scores_request(request.query_string_parameters())?; 172 | 173 | if config.randomize_scores { 174 | let link_scores = random_link_scores(&scores_request.links); 175 | let link_scores_json = serde_json::to_string(&link_scores)?; 176 | return Ok(link_scores_json.into()); 177 | } 178 | 179 | // Combine the requests for link details into a single DynamoDB request 180 | let mut dynamodb_request_builder = KeysAndAttributes::builder(); 181 | for link in &scores_request.links { 182 | dynamodb_request_builder = dynamodb_request_builder.keys(HashMap::from([ 183 | ("PK".to_string(), S(format!("link#{}", link.hostname))), 184 | ("SK".to_string(), S(format!("link#{}", link.hostname))), 185 | ])); 186 | } 187 | 188 | // Send the request to DynamoDB and wait for the results 189 | let dynamodb_response = dynamo_db_client 190 | .batch_get_item() 191 | .request_items(&config.table_name, dynamodb_request_builder.build()) 192 | .send() 193 | .await?; 194 | 195 | // Extract the link details 196 | //let mut link_details: Vec = vec![]; 197 | let mut link_details = HashMap::new(); 198 | for item in dynamodb_response 199 | .responses() 200 | .ok_or("DynamoDB request error")? 201 | .get(&config.table_name) 202 | .ok_or("DynamoDB request error")? 203 | .iter() 204 | { 205 | let link_detail = LinkDetail::try_from(item)?; 206 | link_detail.validate()?; 207 | link_details.insert(link_detail.link.clone(), link_detail); 208 | } 209 | 210 | // Calculate the scores 211 | let link_scores = calculate_link_scores(&scores_request.links, &link_details); 212 | let link_scores_json = serde_json::to_string(&link_scores)?; 213 | 214 | return Ok(link_scores_json.into()); 215 | } 216 | -------------------------------------------------------------------------------- /backend/lambda/src/validate.rs: -------------------------------------------------------------------------------- 1 | use crate::types::api; 2 | use chrono::DateTime; 3 | use lambda_http::Error; 4 | use lambda_http::{aws_lambda_events::query_map::QueryMap, Body}; 5 | use lazy_static::lazy_static; 6 | use regex::Regex; 7 | use validator::{Validate, ValidationError}; 8 | 9 | pub fn validate_get_scores_request(query_map: QueryMap) -> Result { 10 | let links_query_parameter = query_map 11 | .first("from") 12 | .ok_or("Incorrect query parameters. Expected `from`")?; 13 | let links = serde_json::from_str::(links_query_parameter)?; 14 | links.validate()?; 15 | Ok(links) 16 | } 17 | 18 | pub fn validate_vote_request(body: &Body) -> Result { 19 | let vote_request = serde_json::from_slice::(body)?; 20 | vote_request.validate()?; 21 | Ok(vote_request) 22 | } 23 | 24 | lazy_static! { 25 | // For timestamps in the format "2023-02-02T09:36:03Z" 26 | static ref TIMESTAMP_REGEX: Regex = Regex::new(r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ$").unwrap(); 27 | } 28 | 29 | pub fn is_timestamp_valid(timestamp: &String) -> Result<(), ValidationError> { 30 | if (!TIMESTAMP_REGEX.is_match(×tamp)) 31 | || (DateTime::parse_from_rfc3339(×tamp).is_err()) 32 | { 33 | return Err(ValidationError::new( 34 | "Timestamp should be in the RFC3339 format 2023-02-02T09:36:03Z", 35 | )); 36 | } 37 | Ok(()) 38 | } 39 | 40 | pub fn is_vote_value_valid(vote_value: i32) -> Result<(), ValidationError> { 41 | if (vote_value != -1) && (vote_value != 1) { 42 | return Err(ValidationError::new("Vote should be -1 or 1")); 43 | } 44 | Ok(()) 45 | } 46 | 47 | // Copyright 2018-2022 System76 48 | // SPDX-License-Identifier: MIT 49 | // https://docs.rs/hostname-validator 50 | /// Validate a hostname according to [IETF RFC 1123](https://tools.ietf.org/html/rfc1123). 51 | /// 52 | /// A hostname is valid if the following condition are true: 53 | /// 54 | /// - It does not start or end with `-` or `.`. 55 | /// - It does not contain any characters outside of the alphanumeric range, except for `-` and `.`. 56 | /// - It is not empty. 57 | /// - It is 253 or fewer characters. 58 | /// - Its labels (characters separated by `.`) are not empty. 59 | /// - Its labels are 63 or fewer characters. 60 | /// - Its lables do not start or end with '-' or '.'. 61 | pub fn is_hostname_valid(hostname: &str) -> Result<(), ValidationError> { 62 | fn is_valid_char(byte: u8) -> bool { 63 | (b'a'..=b'z').contains(&byte) 64 | || (b'A'..=b'Z').contains(&byte) 65 | || (b'0'..=b'9').contains(&byte) 66 | || byte == b'-' 67 | || byte == b'.' 68 | } 69 | 70 | if hostname.bytes().any(|byte| !is_valid_char(byte)) 71 | || hostname.split('.').any(|label| { 72 | label.is_empty() || label.len() > 63 || label.starts_with('-') || label.ends_with('-') 73 | }) 74 | || hostname.is_empty() 75 | || hostname.len() > 253 76 | { 77 | Err(ValidationError::new("Hostname is invalid")) 78 | } else { 79 | Ok(()) 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | mod tests { 85 | use super::*; 86 | use crate::types::{api::ScoresRequest, Link}; 87 | use std::collections::HashMap; 88 | 89 | // Make it easy to create new ScoresRequest objects 90 | impl ScoresRequest { 91 | pub fn new(links: Vec<&str>) -> Self { 92 | ScoresRequest { 93 | links: links 94 | .into_iter() 95 | .map(|link| Link::new(link)) 96 | .collect::>(), 97 | } 98 | } 99 | } 100 | 101 | #[test] 102 | fn test_is_timestamp_valid() { 103 | // Valid timestamps 104 | for timestamp in &["VaLiD-HoStNaMe"] { 105 | assert_eq!(is_hostname_valid(timestamp), Ok(())); 106 | } 107 | 108 | // Invalid timestamps 109 | for invalid_timestamp in &[ 110 | "2023-02-0209:36:03Z", 111 | "a2023-02-02T09:36:03Z", 112 | "2023-02-02T09:3603Z", 113 | "2023-02-02T09:36:03UTC", 114 | "2023-99-02T09:36:03Z", 115 | "2023-09-02T99:36:03Z", 116 | "2023:09:02T09:36:03Z", 117 | "2020-12-31 21:07:14-05:00", 118 | ] { 119 | assert_eq!( 120 | is_timestamp_valid(&invalid_timestamp.to_string()), 121 | Err(ValidationError::new( 122 | "Timestamp should be in the RFC3339 format 2023-02-02T09:36:03Z" 123 | )) 124 | ); 125 | } 126 | } 127 | 128 | #[test] 129 | fn test_validate_get_scores_request() { 130 | fn test_helper( 131 | query_key: &str, 132 | query_value: &T, 133 | ) -> Result { 134 | validate_get_scores_request(QueryMap::from(HashMap::from([( 135 | query_key.to_string(), 136 | serde_json::to_string(query_value.into()).unwrap(), 137 | )]))) 138 | } 139 | 140 | // Happy path 141 | let key = "from"; 142 | let value = ScoresRequest::new(vec!["www.google.com", "abc.com", "domain.me"]); 143 | let result = test_helper(key, &value); 144 | assert!(result.is_ok()); 145 | 146 | // Not enough links 147 | let key = "from"; 148 | let value = ScoresRequest::new(vec![]); 149 | let result = test_helper(key, &value); 150 | assert!(result.is_err()); 151 | assert!(result 152 | .unwrap_err() 153 | .to_string() 154 | .contains("Validation error: length")); 155 | 156 | // Too many links 157 | let key = "from"; 158 | let value = ScoresRequest::new(vec!["www.google.com"; 101]); 159 | let result = test_helper(key, &value); 160 | assert!(result.is_err()); 161 | assert!(result 162 | .unwrap_err() 163 | .to_string() 164 | .contains("Validation error: length")); 165 | 166 | // Incorrect query key 167 | let key = "fromzzz"; 168 | let value = ScoresRequest::new(vec!["www.google.com", "abc.com", "domain.me"]); 169 | let result = test_helper(key, &value); 170 | assert!(result.is_err()); 171 | assert_eq!( 172 | result.unwrap_err().to_string(), 173 | "Incorrect query parameters. Expected `from`" 174 | ); 175 | 176 | // Incorrect value type 177 | let key = "from"; 178 | let value = "www.google.com"; // should be an array 179 | let result = test_helper(key, &value); 180 | assert!(result.is_err()); 181 | assert!(result.unwrap_err().to_string().contains("invalid type")); 182 | 183 | // Invalid hostname 184 | let key = "from"; 185 | let value = ScoresRequest::new(vec!["www.google.com", "abc;;;com"]); 186 | let result = test_helper(key, &value); 187 | assert!(result.is_err()); 188 | assert!(result 189 | .unwrap_err() 190 | .to_string() 191 | .contains("Hostname is invalid")); 192 | } 193 | 194 | #[test] 195 | fn test_is_vote_value_valid() { 196 | assert_eq!(is_vote_value_valid(1), Ok(())); 197 | assert_eq!(is_vote_value_valid(-1), Ok(())); 198 | for invalid_vote in [2, 0, 223, -5, -9999] { 199 | assert_eq!( 200 | is_vote_value_valid(invalid_vote), 201 | Err(ValidationError::new("Vote should be -1 or 1")) 202 | ); 203 | } 204 | } 205 | 206 | #[test] 207 | fn test_is_hostname_valid() { 208 | // Valid hostnames 209 | for hostname in &[ 210 | "VaLiD-HoStNaMe", 211 | "50-name", 212 | "235235", 213 | "example.com", 214 | "VaLid.HoStNaMe", 215 | "www.place.au", 216 | "123.456", 217 | ] { 218 | assert_eq!(is_hostname_valid(hostname), Ok(())); 219 | } 220 | 221 | // Invalid hostnames 222 | for hostname in &[ 223 | "-invalid-name", 224 | "also-invalid-", 225 | "asdf@fasd", 226 | "@asdfl", 227 | "asd f@", 228 | ".invalid", 229 | "invalid.name.", 230 | "foo.label-is-way-to-longgggggggggggggggggggggggggggggggggggggggggggg.org", 231 | "invalid.-starting.char", 232 | "invalid.ending-.char", 233 | "empty..label", 234 | ] { 235 | assert_eq!( 236 | is_hostname_valid(hostname), 237 | Err(ValidationError::new("Hostname is invalid")) 238 | ); 239 | } 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /backend/lambda/src/types.rs: -------------------------------------------------------------------------------- 1 | use crate::validate::*; 2 | use serde::{Deserialize, Serialize}; 3 | use validator::Validate; 4 | 5 | #[derive(Debug)] 6 | pub struct Config { 7 | pub table_name: String, 8 | pub use_local_database: bool, 9 | pub randomize_scores: bool, 10 | pub use_system_time: bool, 11 | } 12 | 13 | #[derive(Debug, Validate, Serialize, Deserialize, PartialEq, Eq, Hash, Clone)] 14 | pub struct Link { 15 | #[validate(custom = "is_hostname_valid")] 16 | pub hostname: String, 17 | } 18 | impl Link { 19 | pub fn new(hostname: &str) -> Self { 20 | Link { 21 | hostname: hostname.to_string(), 22 | } 23 | } 24 | } 25 | 26 | #[derive(Debug, Serialize, Deserialize, Clone)] 27 | pub enum Score { 28 | Good, 29 | Bad, 30 | Controversial, 31 | NoScore, 32 | } 33 | 34 | #[derive(Debug, Validate, Serialize, Deserialize)] 35 | pub struct LinkScore { 36 | #[validate] 37 | link: Link, 38 | score: Score, 39 | } 40 | impl LinkScore { 41 | pub fn new(link: Link, score: Score) -> Self { 42 | LinkScore { link, score } 43 | } 44 | } 45 | 46 | pub mod api { 47 | use super::{Link, LinkScore}; 48 | use crate::validate::is_vote_value_valid; 49 | use serde::{Deserialize, Serialize}; 50 | use uuid::Uuid; 51 | use validator::Validate; 52 | 53 | #[derive(Debug, Validate, Deserialize, PartialEq)] 54 | pub struct VoteRequest { 55 | #[validate] 56 | pub link: Link, 57 | #[validate(custom = "is_vote_value_valid")] 58 | pub value: i32, 59 | pub user_id: Uuid, 60 | } 61 | 62 | #[derive(Debug, Validate, Deserialize, Serialize, PartialEq)] 63 | pub struct ScoresRequest { 64 | #[validate] 65 | #[validate(length(min = 1, max = 100))] 66 | pub links: Vec, 67 | } 68 | 69 | #[derive(Debug, Validate, Serialize)] 70 | pub struct ScoresResponse { 71 | #[validate] 72 | #[validate(length(min = 1, max = 100))] 73 | scores: Vec, 74 | } 75 | 76 | #[derive(Debug, Serialize)] 77 | pub struct Error { 78 | pub error: String, 79 | pub description: serde_json::Value, 80 | } 81 | } 82 | 83 | pub mod database { 84 | use super::Link; 85 | use crate::validate::*; 86 | use aws_sdk_dynamodb::model::AttributeValue; 87 | use lambda_http::Error; 88 | use serde::Deserialize; 89 | use std::collections::HashMap; 90 | use uuid::Uuid; 91 | use validator::Validate; 92 | // TODO: Add validation to these database types 93 | 94 | #[derive(Debug, Validate, Deserialize, PartialEq)] 95 | pub struct Vote { 96 | #[validate] 97 | pub link: Link, 98 | #[validate(custom = "is_vote_value_valid")] 99 | pub value: i32, 100 | pub user_id: Uuid, 101 | #[validate(custom = "is_timestamp_valid")] 102 | pub created_at: String 103 | } 104 | impl TryFrom<&HashMap> for Vote { 105 | type Error = Error; 106 | fn try_from(hash_map: &HashMap) -> Result { 107 | let primary_key = hash_map 108 | .get("PK") 109 | .ok_or("No PK")? 110 | .as_s() 111 | .or(Err("PK is not a string"))?; 112 | let link = Link::new(primary_key.split('#').nth(1).ok_or("No link")?); 113 | let value = hash_map 114 | .get("value") 115 | .ok_or("No value")? 116 | .as_n() 117 | .or(Err("value is not a number"))? 118 | .parse::()?; 119 | let sort_key = hash_map 120 | .get("SK") 121 | .ok_or("No SK")? 122 | .as_s() 123 | .or(Err("SK is not a string"))?; 124 | let user_id = Uuid::parse_str(sort_key.split('#').nth(1).ok_or("No user_id")?)?; 125 | let created_at = hash_map 126 | .get("created_at") 127 | .ok_or("No created_at")? 128 | .as_s() 129 | .or(Err("created_at is not a string"))? 130 | .to_string(); 131 | let vote = Vote { 132 | link, 133 | value, 134 | user_id, 135 | created_at, 136 | }; 137 | vote.validate()?; 138 | Ok(vote) 139 | } 140 | } 141 | 142 | #[derive(Debug)] 143 | pub struct UserHistory { 144 | pub day: String, 145 | pub count_of_votes: u32, 146 | pub sum_of_votes: i32, 147 | } 148 | impl TryFrom<&HashMap> for UserHistory { 149 | type Error = Error; 150 | fn try_from(hash_map: &HashMap) -> Result { 151 | let primary_key = hash_map 152 | .get("PK") 153 | .ok_or("No PK")? 154 | .as_s() 155 | .or(Err("PK is not a string"))?; 156 | let day = primary_key 157 | .split('#') 158 | .nth(1) 159 | .ok_or("No day found in PK")? 160 | .to_string(); 161 | let count_of_votes = hash_map 162 | .get("count_of_votes") 163 | .ok_or("No count_of_votes")? 164 | .as_n() 165 | .or(Err("count_of_votes is not a number"))? 166 | .parse::()?; 167 | let sum_of_votes = hash_map 168 | .get("sum_of_votes") 169 | .ok_or("No sum_of_votes")? 170 | .as_n() 171 | .or(Err("sum_of_votes is not a number"))? 172 | .parse::()?; 173 | 174 | Ok(UserHistory { 175 | day, 176 | count_of_votes, 177 | sum_of_votes, 178 | }) 179 | } 180 | } 181 | 182 | #[derive(Debug)] 183 | pub struct User { 184 | pub is_banned: bool, 185 | } 186 | impl TryFrom<&HashMap> for User { 187 | type Error = Error; 188 | fn try_from(hash_map: &HashMap) -> Result { 189 | let is_banned = hash_map 190 | .get("is_banned") 191 | .ok_or("No is_banned")? 192 | .as_bool() 193 | .or(Err("is_banned is not a bool"))? 194 | .clone(); 195 | 196 | Ok(User { is_banned }) 197 | } 198 | } 199 | 200 | #[derive(Debug)] 201 | pub struct Settings { 202 | pub voting_is_disabled: bool, 203 | pub maximum_votes_per_user_per_day: u32, 204 | } 205 | impl TryFrom<&HashMap> for Settings { 206 | type Error = Error; 207 | fn try_from(hash_map: &HashMap) -> Result { 208 | let voting_is_disabled = hash_map 209 | .get("voting_is_disabled") 210 | .ok_or("No voting_is_disabled")? 211 | .as_bool() 212 | .or(Err("voting_is_disabled is not a bool"))? 213 | .clone(); 214 | let maximum_votes_per_user_per_day = hash_map 215 | .get("maximum_votes_per_user_per_day") 216 | .ok_or("No maximum_votes_per_user_per_day")? 217 | .as_n() 218 | .or(Err("maximum_votes_per_user_per_day is not a number"))? 219 | .parse::()?; 220 | 221 | Ok(Settings { 222 | voting_is_disabled, 223 | maximum_votes_per_user_per_day, 224 | }) 225 | } 226 | } 227 | 228 | #[derive(Debug, Validate, PartialEq)] 229 | pub struct LinkDetail { 230 | #[validate] 231 | pub link: super::Link, 232 | pub count_of_votes: u32, 233 | pub sum_of_votes: i32, 234 | } 235 | impl TryFrom<&HashMap> for LinkDetail { 236 | type Error = Error; 237 | fn try_from(hash_map: &HashMap) -> Result { 238 | let primary_key = hash_map 239 | .get("PK") 240 | .ok_or("No PK")? 241 | .as_s() 242 | .or(Err("PK is not a string"))?; 243 | let link = Link::new(primary_key.split('#').nth(1).ok_or("No link")?); 244 | let count_of_votes = hash_map 245 | .get("count_of_votes") 246 | .ok_or("No count_of_votes")? 247 | .as_n() 248 | .or(Err("count_of_votes is not a number"))? 249 | .parse::()?; 250 | let sum_of_votes = hash_map 251 | .get("sum_of_votes") 252 | .ok_or("No sum_of_votes")? 253 | .as_n() 254 | .or(Err("sum_of_votes is not a number"))? 255 | .parse::()?; 256 | 257 | Ok(LinkDetail { 258 | link, 259 | count_of_votes, 260 | sum_of_votes, 261 | }) 262 | } 263 | } 264 | } 265 | -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | ## Data Structures 4 | 5 | ```mermaid 6 | %%{ init: { "er" : { "layoutDirection" : "LR" } } }%% 7 | erDiagram 8 | User ||--o{ Vote : submits 9 | Vote o{--|| Link : on 10 | Link o{--|| Score : has 11 | ``` 12 | 13 | ### Link 14 | 15 | The hostname `String` that represents the website, for example `"www.google.com" or "blog.myspecialplace.com"`. [See here](https://developer.mozilla.org/en-US/docs/Web/API/Location/hostname) for a good explanation of the different pieces in the URL: 16 |
Structure and components of a URL 17 | 18 | _Right now all voting just happens on the hostname, but there could be a future where voting happens on full URL paths. Like voting on individual Medium articles for example._ 19 | 20 | ### Vote 21 | 22 | An `Integer` that's either a +1 or -1, stored with a `Timestamp` when the vote was made. Always associated with a `User` and a `Link`. 23 | 24 | ### Timestamp 25 | 26 | Represented everywhere as an [RFC 3339](https://www.rfc-editor.org/rfc/rfc3339) string with the specific format `2023-02-02T09:36:03Z`. 27 | 28 | ### Score 29 | 30 | An `enum` that represents how good a website `Link` is. It has 4 possible values: 31 | 32 | | Enum | Definition | 33 | | --------------- | ------------------------------------------------------- | 34 | | `Good` | Sum of all votes >= 20 | 35 | | `Bad` | Sum of all votes <= -10 | 36 | | `Controversial` | (-10 < Sum of all votes < 20) && (Number of votes > 50) | 37 | | `NoScore` | If none of the above | 38 | 39 | The score is calculated in the API and exposed to the extension through the `/scores` request. 40 | 41 | In the future this will probably need to be tweaked for more nuanced scoring, like weighting recent votes higher. 42 | 43 | ### User 44 | 45 | Identified by a `UUID`. I wanted a passwordless system and this seemed like a flexible choice. Has a number of properties: 46 | 47 | - is_banned: `Boolean` 48 | - created_at: `Timestamp` 49 | 50 | ### Settings 51 | 52 | System wide configuration that can change the behaviour of everything. 53 | 54 | - voting_is_disabled: `Boolean` 55 | - maximum_votes_per_user_per_day: 10 56 | 57 | The idea behind `voting_is_disabled` is in case there's a spam armaggedon and all voting needs to be stopped. 58 | 59 | ## API 60 | 61 | | Request | Response | 62 | | ------------------------------------- | ------------------------------ | 63 | | `GET /scores?for=[link1, link2, ...]` | `[{link: Link, score: Score}]` | 64 | | `POST /vote {link, vote, user_id}` | | 65 | 66 | ## Database 67 | 68 | I decided to go with a NoSQL database for two reasons: 69 | 70 | 1. It'd be cool to learn. 71 | 1. My extremely basic understanding of NoSQL leads me to believe that it's better suited for what this is trying to do. 72 | DynamoDB on AWS seems cheap enough and if this extension actually gets used and needs to scale then future Tom won't be boned. 73 | 74 | The access patterns are reasonably well defined: 75 | 76 | | Runtime Access Patterns | Description | Table - Filter | 77 | | ----------------------------- | -------------------------------------------------- | --------------------------------------------------------- | 78 | | Get vote summaries for a Link | Summaries are `sum_of_votes` & `count_of_votes` | `Table:Discontent - PK=link#, SK=link#` | 79 | | Get all votes for a Link | To calculate `sum_of_votes` & `count_of_votes` | `Table:Discontent - PK=link#, SK.startswith(user#)` | 80 | | Get vote for a Link and user | To make sure a user can't vote twice | `Table:Discontent - PK=link#, SK=user#` | 81 | | Get vote for a Link and user | To auto select the correct vote button | `Table:Discontent - PK=link#, SK=user#` | 82 | | Get vote summaries for a User | To limit the number of submissions in a time range | `Table:Discontent - PK=day#, SK=user#` | 83 | | Get banned state for a User | Prevent banned users from submitting more votes | `Table:Discontent - PK=user#, SK=user#` | 84 | 85 | The following are analysis access patterns, not really part of regular usage. 86 | 87 | | Analysis Access Patterns | Description | Table & Filter | 88 | | ------------------------------------- | ------------------------------------------- | ---------------------------------------------------------- | 89 | | Get User details | To carry out abuse investigations | `Table:Discontent - PK=user#, SK=user#,` | 90 | | Get all votes for a user | To carry out abuse investigations | `GSI:UserVotes - PK=user#, SK.within(timerange)` | 91 | | Get top users by daily count of votes | To identify possible abuse | `GSI:DailyUserHistory - PK=, SK.top(N)` | 92 | | Get top links by daily count of votes | To identify possible abuse | `GSI:DailyLinkHistoryByCountOfVotes - PK=, SK.top(N)` | 93 | | Get top links by daily sum of votes | To create a best links leaderboard | `GSI:DailyLinkHistoryBySumOfVotes - PK=, SK.top(N)` | 94 | | Get top links by daily count of votes | To create a controversial links leaderboard | `GSI:DailyLinkHistoryByCountOfVotes - PK=, SK.top(N)` | 95 | 96 | ## Sequence diagrams 97 | 98 | ### Get scores for links 99 | 100 | ```mermaid 101 | sequenceDiagram 102 | actor Extension 103 | participant API 104 | participant Database 105 | Extension->>API: GET /scores?for=[link1, link2, ...] 106 | activate API 107 | API->>API: Validate request 108 | alt Request Error 109 | API->>Extension: Request Error (Invalid params / authentication...) 110 | end 111 | API->>Database: BatchGetItem(Table:Discontent - PK,SK=) 112 | Note over API,Database: If a link does not yet exist in the table, it's not returned 113 | activate Database 114 | alt Database Error 115 | Database->>API: Database Error (connection / server...) 116 | API->>Extension: Server Error 117 | end 118 | Database->>API: Return [{link, sum_of_votes, count_of_votes}, ...] 119 | API->>API: Calculate scores 120 | API->>Extension: Return [{link, score}, ...] 121 | deactivate Database 122 | deactivate API 123 | ``` 124 | 125 | ### Submit a vote for a link 126 | 127 | ```mermaid 128 | sequenceDiagram 129 | actor Extension 130 | participant API 131 | participant Database 132 | Extension->>API: POST /vote {link, vote, user_id}` 133 | activate API 134 | API->>API: Validate parameters 135 | alt Invalid parameters 136 | API->>Extension: Error: Invalid parameters 137 | end 138 | API->>Database: Check user history & settings. GetBatchItems(___________) 139 | Note over API,Database: Voting disabled? GetItem(PK=settings, SK=settings) 140 | Note over API,Database: Too many votes? GetItem(PK=date, SK=user_id) 141 | Note over API,Database: User exists? User banned? GetItem(PK=user_id, SK=user_id) 142 | Note over API,Database: Already voted? GetItem(PK=link, SK=user_id) 143 | activate Database 144 | alt Database Error 145 | Database->>API: Database Error (connection / server...) 146 | API->>Extension: Server Error 147 | end 148 | Database->>API: Return UserHistory & Settings 149 | deactivate Database 150 | API->>API: Check user history & Settings 151 | alt Failed 152 | API->>Extension: 403 Forbidden: Too many votes / banned / voting disabled 153 | end 154 | activate Database 155 | API->>Database: Submit vote. BatchWriteItems(_________________) 156 | alt If user does not exist 157 | Note over API,Database: Put(PK=user_id, SK=user_id | not_banned,created_at) 158 | Note over API,Database: 159 | else First time user voting on link 160 | Note over API,Database: Put(PK=link, SK=user_id | vote) 161 | Note over API,Database: Update(PK=link, SK=link | count_of_votes++, sum_of_votes+=vote) 162 | Note over API,Database: -- Add history 163 | Note over API,Database: Update(PK=day, SK=link | count++, sum+=vote) 164 | Note over API,Database: Update(PK=day, SK=user | count++, sum+=vote) 165 | else If user already voted on link 166 | Note over API,Database: Put(PK=link, SK=user_id | vote) 167 | Note over API,Database: Update(PK=link, SK=link | sum_of_votes+=(-old_vote+new_vote)) 168 | Note over API,Database: -- Undo old history 169 | Note over API,Database: Update(PK=old_day, SK=link | count--, sum-=old_vote) 170 | Note over API,Database: Update(PK=old_day, SK=user | count--, sum-=old_vote) 171 | Note over API,Database: -- Add history 172 | Note over API,Database: Update(PK=day, SK=link | count++, sum+=vote) 173 | Note over API,Database: Update(PK=day, SK=user | count++, sum+=vote) 174 | end 175 | activate Database 176 | alt Database Error 177 | Database->>API: Database Error (connection / server...) 178 | API->>Extension: Server Error 179 | end 180 | Database->>API: Return success 181 | API->>Extension: Return success 182 | deactivate Database 183 | deactivate API 184 | ``` 185 | 186 | # Testing 187 | 188 | Some manual tests are required right now: 189 | 190 | - Check that everything works on the Tor browser for the [DuckDuckGo onion site](https://duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad.onion/) 191 | -------------------------------------------------------------------------------- /backend/lambda/src/dynamodb.rs: -------------------------------------------------------------------------------- 1 | use aws_sdk_dynamodb::model::{ 2 | AttributeValue::{self, *}, 3 | *, 4 | }; 5 | use std::collections::HashMap; 6 | use uuid::Uuid; 7 | 8 | use crate::types::{database::Vote, Config, Link}; 9 | 10 | pub fn get_settings() -> HashMap { 11 | HashMap::from([ 12 | ("PK".to_string(), S("settings".to_string())), 13 | ("SK".to_string(), S("settings".to_string())), 14 | ]) 15 | } 16 | 17 | pub fn get_user(user_id: &Uuid) -> HashMap { 18 | HashMap::from([ 19 | ( 20 | "PK".to_string(), 21 | S(format!("user#{}", user_id.hyphenated())), 22 | ), 23 | ( 24 | "SK".to_string(), 25 | S(format!("user#{}", user_id.hyphenated())), 26 | ), 27 | ]) 28 | } 29 | 30 | pub fn get_daily_user_history(day: &String, user_id: &Uuid) -> HashMap { 31 | HashMap::from([ 32 | ("PK".to_string(), S(format!("day#{}", day))), 33 | ( 34 | "SK".to_string(), 35 | S(format!("user#{}", user_id.hyphenated())), 36 | ), 37 | ]) 38 | } 39 | 40 | pub fn get_vote(vote: &Vote) -> HashMap { 41 | HashMap::from([ 42 | ("PK".to_string(), S(format!("link#{}", vote.link.hostname))), 43 | ( 44 | "SK".to_string(), 45 | S(format!("user#{}", vote.user_id.hyphenated())), 46 | ), 47 | ]) 48 | } 49 | 50 | pub fn put_new_user(user_id: &Uuid, created_at: &String, config: &Config) -> TransactWriteItem { 51 | TransactWriteItem::builder() 52 | .put( 53 | Put::builder() 54 | .item("PK", S(format!("user#{}", user_id.hyphenated()))) 55 | .item("SK", S(format!("user#{}", user_id.hyphenated()))) 56 | .item("entity_type", S("User".to_string())) 57 | .item("created_at", S(created_at.clone())) 58 | .item("is_banned", Bool(false)) 59 | .table_name(&config.table_name) 60 | .build(), 61 | ) 62 | .build() 63 | } 64 | 65 | pub fn put_vote(vote: &Vote, config: &Config) -> TransactWriteItem { 66 | TransactWriteItem::builder() 67 | .put( 68 | Put::builder() 69 | .item("PK", S(format!("link#{}", vote.link.hostname))) 70 | .item("SK", S(format!("user#{}", vote.user_id.hyphenated()))) 71 | .item("entity_type", S("Vote".to_string())) 72 | .item("value", N(vote.value.to_string())) 73 | .item("created_at", S(vote.created_at.clone())) 74 | .item("UserVotes_PK", S(vote.user_id.hyphenated().to_string())) 75 | .table_name(&config.table_name) 76 | .build(), 77 | ) 78 | .build() 79 | } 80 | 81 | pub fn update_link_detail(link: &Link, vote_value: i32, config: &Config) -> TransactWriteItem { 82 | TransactWriteItem::builder() 83 | .update( 84 | Update::builder() 85 | .key("PK", S(format!("link#{}", link.hostname))) 86 | .key("SK", S(format!("link#{}", link.hostname))) 87 | .update_expression(format!( 88 | "SET {},{},{}", 89 | "count_of_votes = if_not_exists(count_of_votes, :zero) + :one", 90 | "sum_of_votes = if_not_exists(sum_of_votes, :zero) + :value", 91 | "entity_type = :entity_type", 92 | )) 93 | .expression_attribute_values(":value", N(vote_value.to_string())) 94 | .expression_attribute_values(":zero", N(0.to_string())) 95 | .expression_attribute_values(":one", N(1.to_string())) 96 | .expression_attribute_values(":entity_type", S("LinkDetail".to_string())) 97 | .table_name(&config.table_name) 98 | .build(), 99 | ) 100 | .build() 101 | } 102 | 103 | pub fn update_existing_link_detail( 104 | link: &Link, 105 | vote_value_change: i32, 106 | config: &Config, 107 | ) -> TransactWriteItem { 108 | TransactWriteItem::builder() 109 | .update( 110 | Update::builder() 111 | .key("PK", S(format!("link#{}", link.hostname))) 112 | .key("SK", S(format!("link#{}", link.hostname))) 113 | .update_expression(format!("SET {}", "sum_of_votes = sum_of_votes + :change",)) 114 | .expression_attribute_values(":change", N(vote_value_change.to_string())) 115 | .table_name(&config.table_name) 116 | .build(), 117 | ) 118 | .build() 119 | } 120 | 121 | pub fn increment_link_history(day: &String, vote: &Vote, config: &Config) -> TransactWriteItem { 122 | TransactWriteItem::builder() 123 | .update( 124 | Update::builder() 125 | .key("PK", S(format!("day#{}", day))) 126 | .key("SK", S(format!("link#{}", vote.link.hostname))) 127 | .update_expression(format!( 128 | "SET {},{},{},{}", 129 | "count_of_votes = if_not_exists(count_of_votes, :zero) + :one", 130 | "sum_of_votes = if_not_exists(sum_of_votes, :zero) + :value", 131 | "entity_type = :entity_type", 132 | "DailyLinkHistory_PK = :DailyLinkHistory_PK", 133 | )) 134 | .expression_attribute_values(":value", N(vote.value.to_string())) 135 | .expression_attribute_values(":zero", N(0.to_string())) 136 | .expression_attribute_values(":one", N(1.to_string())) 137 | .expression_attribute_values(":entity_type", S("LinkHistory".to_string())) 138 | .expression_attribute_values(":DailyLinkHistory_PK", S(format!("day#{}", day))) 139 | .table_name(&config.table_name) 140 | .build(), 141 | ) 142 | .build() 143 | } 144 | 145 | pub fn increment_user_history(day: &String, vote: &Vote, config: &Config) -> TransactWriteItem { 146 | TransactWriteItem::builder() 147 | .update( 148 | Update::builder() 149 | .key("PK", S(format!("day#{}", day))) 150 | .key("SK", S(format!("user#{}", vote.user_id.hyphenated()))) 151 | .update_expression(format!( 152 | "SET {},{},{},{}", 153 | "count_of_votes = if_not_exists(count_of_votes, :zero) + :one", 154 | "sum_of_votes = if_not_exists(sum_of_votes, :zero) + :value", 155 | "entity_type = :entity_type", 156 | "DailyUserHistory_PK = :DailyUserHistory_PK", 157 | )) 158 | .expression_attribute_values(":value", N(vote.value.to_string())) 159 | .expression_attribute_values(":zero", N(0.to_string())) 160 | .expression_attribute_values(":one", N(1.to_string())) 161 | .expression_attribute_values(":entity_type", S("UserHistory".to_string())) 162 | .expression_attribute_values(":DailyUserHistory_PK", S(format!("day#{}", day))) 163 | .table_name(&config.table_name) 164 | .build(), 165 | ) 166 | .build() 167 | } 168 | 169 | pub fn revert_link_history(old_vote: &Vote, link: &Link, config: &Config) -> TransactWriteItem { 170 | let old_day = &old_vote.created_at[..10].to_string(); 171 | TransactWriteItem::builder() 172 | .update( 173 | Update::builder() 174 | .key("PK", S(format!("day#{}", old_day))) 175 | .key("SK", S(format!("link#{}", link.hostname))) 176 | .update_expression(format!( 177 | "SET {},{}", 178 | "count_of_votes = count_of_votes - :one", 179 | "sum_of_votes = sum_of_votes - :value", 180 | )) 181 | .expression_attribute_values(":value", N(old_vote.value.to_string())) 182 | .expression_attribute_values(":one", N(1.to_string())) 183 | .table_name(&config.table_name) 184 | .build(), 185 | ) 186 | .build() 187 | } 188 | 189 | pub fn revert_user_history(old_vote: &Vote, user_id: &Uuid, config: &Config) -> TransactWriteItem { 190 | let old_day = &old_vote.created_at[..10].to_string(); 191 | TransactWriteItem::builder() 192 | .update( 193 | Update::builder() 194 | .key("PK", S(format!("day#{}", old_day))) 195 | .key("SK", S(format!("user#{}", user_id.hyphenated()))) 196 | .update_expression(format!( 197 | "SET {},{}", 198 | "count_of_votes = count_of_votes - :one", 199 | "sum_of_votes = sum_of_votes - :value", 200 | )) 201 | .expression_attribute_values(":value", N(old_vote.value.to_string())) 202 | .expression_attribute_values(":one", N(1.to_string())) 203 | .table_name(&config.table_name) 204 | .build(), 205 | ) 206 | .build() 207 | } 208 | 209 | pub fn update_link_history( 210 | day: &String, 211 | old_vote: &Vote, 212 | vote: &Vote, 213 | config: &Config, 214 | ) -> TransactWriteItem { 215 | let vote_value_change = vote.value - old_vote.value; 216 | TransactWriteItem::builder() 217 | .update( 218 | Update::builder() 219 | .key("PK", S(format!("day#{}", day))) 220 | .key("SK", S(format!("link#{}", vote.link.hostname))) 221 | .update_expression(format!("SET {}", "sum_of_votes = sum_of_votes + :change",)) 222 | .expression_attribute_values(":change", N(vote_value_change.to_string())) 223 | .table_name(&config.table_name) 224 | .build(), 225 | ) 226 | .build() 227 | } 228 | 229 | pub fn update_user_history( 230 | day: &String, 231 | old_vote: &Vote, 232 | vote: &Vote, 233 | config: &Config, 234 | ) -> TransactWriteItem { 235 | let vote_value_change = vote.value - old_vote.value; 236 | TransactWriteItem::builder() 237 | .update( 238 | Update::builder() 239 | .key("PK", S(format!("day#{}", day))) 240 | .key("SK", S(format!("user#{}", vote.user_id.hyphenated()))) 241 | .update_expression(format!("SET {}", "sum_of_votes = sum_of_votes + :change",)) 242 | .expression_attribute_values(":change", N(vote_value_change.to_string())) 243 | .table_name(&config.table_name) 244 | .build(), 245 | ) 246 | .build() 247 | } 248 | -------------------------------------------------------------------------------- /backend/config/Database-NoSQLWorkbench-Model.json: -------------------------------------------------------------------------------- 1 | { 2 | "ModelName": "Discontent Data Model", 3 | "ModelMetadata": { 4 | "Author": "Tom Barone", 5 | "DateCreated": "Feb 01, 2023, 10:42 AM", 6 | "DateLastModified": "Mar 03, 2023, 05:36 PM", 7 | "Description": "Database model for Discontent", 8 | "AWSService": "Amazon DynamoDB", 9 | "Version": "3.0" 10 | }, 11 | "DataModel": [ 12 | { 13 | "TableName": "Discontent", 14 | "KeyAttributes": { 15 | "PartitionKey": { 16 | "AttributeName": "PK", 17 | "AttributeType": "S" 18 | }, 19 | "SortKey": { 20 | "AttributeName": "SK", 21 | "AttributeType": "S" 22 | } 23 | }, 24 | "NonKeyAttributes": [ 25 | { 26 | "AttributeName": "entity_type", 27 | "AttributeType": "S" 28 | }, 29 | { 30 | "AttributeName": "count_of_votes", 31 | "AttributeType": "N" 32 | }, 33 | { 34 | "AttributeName": "sum_of_votes", 35 | "AttributeType": "N" 36 | }, 37 | { 38 | "AttributeName": "value", 39 | "AttributeType": "N" 40 | }, 41 | { 42 | "AttributeName": "created_at", 43 | "AttributeType": "S" 44 | }, 45 | { 46 | "AttributeName": "is_banned", 47 | "AttributeType": "BOOL" 48 | }, 49 | { 50 | "AttributeName": "UserVotes_PK", 51 | "AttributeType": "S" 52 | }, 53 | { 54 | "AttributeName": "DailyUserHistory_PK", 55 | "AttributeType": "S" 56 | }, 57 | { 58 | "AttributeName": "DailyLinkHistory_PK", 59 | "AttributeType": "S" 60 | }, 61 | { 62 | "AttributeName": "voting_is_disabled", 63 | "AttributeType": "BOOL" 64 | }, 65 | { 66 | "AttributeName": "maximum_votes_per_user_per_day", 67 | "AttributeType": "N" 68 | } 69 | ], 70 | "GlobalSecondaryIndexes": [ 71 | { 72 | "IndexName": "UserVotes", 73 | "KeyAttributes": { 74 | "PartitionKey": { 75 | "AttributeName": "UserVotes_PK", 76 | "AttributeType": "S" 77 | }, 78 | "SortKey": { 79 | "AttributeName": "created_at", 80 | "AttributeType": "S" 81 | } 82 | }, 83 | "Projection": { 84 | "ProjectionType": "INCLUDE", 85 | "NonKeyAttributes": [ 86 | "entity_type", 87 | "value", 88 | "PK" 89 | ] 90 | } 91 | }, 92 | { 93 | "IndexName": "DailyUserHistory", 94 | "KeyAttributes": { 95 | "PartitionKey": { 96 | "AttributeName": "DailyUserHistory_PK", 97 | "AttributeType": "S" 98 | }, 99 | "SortKey": { 100 | "AttributeName": "count_of_votes", 101 | "AttributeType": "N" 102 | } 103 | }, 104 | "Projection": { 105 | "ProjectionType": "INCLUDE", 106 | "NonKeyAttributes": [ 107 | "entity_type", 108 | "SK", 109 | "sum_of_votes" 110 | ] 111 | } 112 | }, 113 | { 114 | "IndexName": "DailyLinkHistoryByCountOfVotes", 115 | "KeyAttributes": { 116 | "PartitionKey": { 117 | "AttributeName": "DailyLinkHistory_PK", 118 | "AttributeType": "S" 119 | }, 120 | "SortKey": { 121 | "AttributeName": "count_of_votes", 122 | "AttributeType": "N" 123 | } 124 | }, 125 | "Projection": { 126 | "ProjectionType": "INCLUDE", 127 | "NonKeyAttributes": [ 128 | "entity_type", 129 | "SK", 130 | "sum_of_votes" 131 | ] 132 | } 133 | }, 134 | { 135 | "IndexName": "DailyLinkHistoryBySumOfVotes", 136 | "KeyAttributes": { 137 | "PartitionKey": { 138 | "AttributeName": "DailyLinkHistory_PK", 139 | "AttributeType": "S" 140 | }, 141 | "SortKey": { 142 | "AttributeName": "sum_of_votes", 143 | "AttributeType": "N" 144 | } 145 | }, 146 | "Projection": { 147 | "ProjectionType": "INCLUDE", 148 | "NonKeyAttributes": [ 149 | "entity_type", 150 | "SK", 151 | "count_of_votes" 152 | ] 153 | } 154 | } 155 | ], 156 | "TableData": [ 157 | { 158 | "PK": { 159 | "S": "link#www.wikipedia.org" 160 | }, 161 | "SK": { 162 | "S": "link#www.wikipedia.org" 163 | }, 164 | "entity_type": { 165 | "S": "LinkDetail" 166 | }, 167 | "count_of_votes": { 168 | "N": "2" 169 | }, 170 | "sum_of_votes": { 171 | "N": "2" 172 | } 173 | }, 174 | { 175 | "PK": { 176 | "S": "link#www.wikipedia.org" 177 | }, 178 | "SK": { 179 | "S": "user#john" 180 | }, 181 | "entity_type": { 182 | "S": "Vote" 183 | }, 184 | "value": { 185 | "N": "1" 186 | }, 187 | "created_at": { 188 | "S": "2023-01-10T20:18:29Z" 189 | }, 190 | "UserVotes_PK": { 191 | "S": "john" 192 | } 193 | }, 194 | { 195 | "PK": { 196 | "S": "link#www.wikipedia.org" 197 | }, 198 | "SK": { 199 | "S": "user#jeff" 200 | }, 201 | "entity_type": { 202 | "S": "Vote" 203 | }, 204 | "value": { 205 | "N": "1" 206 | }, 207 | "created_at": { 208 | "S": "2023-01-10T20:18:29Z" 209 | }, 210 | "UserVotes_PK": { 211 | "S": "jeff" 212 | } 213 | }, 214 | { 215 | "PK": { 216 | "S": "link#www.google.com" 217 | }, 218 | "SK": { 219 | "S": "link#www.google.com" 220 | }, 221 | "entity_type": { 222 | "S": "LinkDetail" 223 | }, 224 | "count_of_votes": { 225 | "N": "2" 226 | }, 227 | "sum_of_votes": { 228 | "N": "0" 229 | } 230 | }, 231 | { 232 | "PK": { 233 | "S": "link#www.google.com" 234 | }, 235 | "SK": { 236 | "S": "user#john" 237 | }, 238 | "entity_type": { 239 | "S": "Vote" 240 | }, 241 | "value": { 242 | "N": "1" 243 | }, 244 | "created_at": { 245 | "S": "2023-01-10T20:18:29Z" 246 | }, 247 | "UserVotes_PK": { 248 | "S": "john" 249 | } 250 | }, 251 | { 252 | "PK": { 253 | "S": "link#www.google.com" 254 | }, 255 | "SK": { 256 | "S": "user#steve" 257 | }, 258 | "entity_type": { 259 | "S": "Vote" 260 | }, 261 | "value": { 262 | "N": "-1" 263 | }, 264 | "created_at": { 265 | "S": "2023-01-10T20:18:29Z" 266 | }, 267 | "UserVotes_PK": { 268 | "S": "steve" 269 | } 270 | }, 271 | { 272 | "PK": { 273 | "S": "user#jeff" 274 | }, 275 | "SK": { 276 | "S": "user#jeff" 277 | }, 278 | "entity_type": { 279 | "S": "User" 280 | }, 281 | "created_at": { 282 | "S": "2023-01-10T20:18:29Z" 283 | }, 284 | "is_banned": { 285 | "BOOL": true 286 | } 287 | }, 288 | { 289 | "PK": { 290 | "S": "user#john" 291 | }, 292 | "SK": { 293 | "S": "user#john" 294 | }, 295 | "entity_type": { 296 | "S": "User" 297 | }, 298 | "created_at": { 299 | "S": "2023-01-10T20:18:29Z" 300 | }, 301 | "is_banned": { 302 | "BOOL": false 303 | } 304 | }, 305 | { 306 | "PK": { 307 | "S": "user#steve" 308 | }, 309 | "SK": { 310 | "S": "user#steve" 311 | }, 312 | "entity_type": { 313 | "S": "User" 314 | }, 315 | "created_at": { 316 | "S": "2023-01-10T20:18:29Z" 317 | }, 318 | "is_banned": { 319 | "BOOL": false 320 | } 321 | }, 322 | { 323 | "PK": { 324 | "S": "day#2023-01-10" 325 | }, 326 | "SK": { 327 | "S": "user#john" 328 | }, 329 | "entity_type": { 330 | "S": "UserHistory" 331 | }, 332 | "count_of_votes": { 333 | "N": "22" 334 | }, 335 | "sum_of_votes": { 336 | "N": "20" 337 | }, 338 | "DailyUserHistory_PK": { 339 | "S": "2023-01-10" 340 | } 341 | }, 342 | { 343 | "PK": { 344 | "S": "day#2023-01-10" 345 | }, 346 | "SK": { 347 | "S": "user#jeff" 348 | }, 349 | "entity_type": { 350 | "S": "UserHistory" 351 | }, 352 | "count_of_votes": { 353 | "N": "12" 354 | }, 355 | "sum_of_votes": { 356 | "N": "2" 357 | }, 358 | "DailyUserHistory_PK": { 359 | "S": "2023-01-10" 360 | } 361 | }, 362 | { 363 | "PK": { 364 | "S": "day#2023-01-10" 365 | }, 366 | "SK": { 367 | "S": "user#steve" 368 | }, 369 | "entity_type": { 370 | "S": "UserHistory" 371 | }, 372 | "count_of_votes": { 373 | "N": "1" 374 | }, 375 | "sum_of_votes": { 376 | "N": "-1" 377 | }, 378 | "DailyUserHistory_PK": { 379 | "S": "2023-01-10" 380 | } 381 | }, 382 | { 383 | "PK": { 384 | "S": "day#2023-01-10" 385 | }, 386 | "SK": { 387 | "S": "link#www.wikipedia.org" 388 | }, 389 | "entity_type": { 390 | "S": "LinkHistory" 391 | }, 392 | "count_of_votes": { 393 | "N": "20" 394 | }, 395 | "sum_of_votes": { 396 | "N": "10" 397 | }, 398 | "DailyLinkHistory_PK": { 399 | "S": "2023-01-10" 400 | } 401 | }, 402 | { 403 | "PK": { 404 | "S": "day#2023-01-10" 405 | }, 406 | "SK": { 407 | "S": "link#www.google.com" 408 | }, 409 | "entity_type": { 410 | "S": "LinkHistory" 411 | }, 412 | "count_of_votes": { 413 | "N": "10" 414 | }, 415 | "sum_of_votes": { 416 | "N": "-10" 417 | }, 418 | "DailyLinkHistory_PK": { 419 | "S": "2023-01-10" 420 | } 421 | }, 422 | { 423 | "PK": { 424 | "S": "settings" 425 | }, 426 | "SK": { 427 | "S": "settings" 428 | }, 429 | "entity_type": { 430 | "S": "Settings" 431 | }, 432 | "voting_is_disabled": { 433 | "BOOL": false 434 | }, 435 | "maximum_votes_per_user_per_day": { 436 | "N": "10" 437 | } 438 | } 439 | ], 440 | "DataAccess": { 441 | "MySql": {} 442 | }, 443 | "BillingMode": "PAY_PER_REQUEST" 444 | } 445 | ] 446 | } -------------------------------------------------------------------------------- /backend/database.py: -------------------------------------------------------------------------------- 1 | import fire 2 | import boto3 3 | import csv 4 | from cfn_tools import load_yaml 5 | import yaml 6 | import requests 7 | import glob 8 | import pandas as pd 9 | from urllib.parse import urlparse 10 | from tqdm import tqdm 11 | import amazon.ion.simpleion as ion 12 | import os 13 | import json 14 | 15 | CONFIG_FILE = './template.production.yaml' 16 | API_ENDPOINT = 'http://localhost:9000/lambda-url/request-handler' 17 | 18 | dynamodb = boto3.client( 19 | 'dynamodb', 20 | endpoint_url="http://localhost:8000", 21 | ) 22 | 23 | 24 | def create(): 25 | config = _load_config() 26 | 27 | dynamodb.create_table( 28 | TableName=config['TableName'], 29 | KeySchema=config['KeySchema'], 30 | AttributeDefinitions=config['AttributeDefinitions'], 31 | GlobalSecondaryIndexes=config['GlobalSecondaryIndexes'], 32 | BillingMode=config['BillingMode'], 33 | ) 34 | 35 | 36 | def drop(): 37 | for table in dynamodb.list_tables()['TableNames']: 38 | dynamodb.delete_table(TableName=table) 39 | dynamodb.get_waiter('table_not_exists').wait(TableName=table) 40 | 41 | 42 | def load_settings(): 43 | fixture = yaml.safe_load(open('fixtures/database.yaml')) 44 | config = _load_config() 45 | dynamodb.put_item( 46 | TableName=config['TableName'], 47 | Item={ 48 | 'PK': { 49 | 'S': 'settings' 50 | }, 51 | 'SK': { 52 | 'S': 'settings' 53 | }, 54 | 'entity_type': { 55 | 'S': 'Settings' 56 | }, 57 | 'voting_is_disabled': { 58 | 'BOOL': fixture['settings']['voting_is_disabled'] 59 | }, 60 | 'maximum_votes_per_user_per_day': { 61 | 'N': str(fixture['settings']['maximum_votes_per_user_per_day']) 62 | }, 63 | }) 64 | print('Loaded initial database settings') 65 | 66 | 67 | def load_development_votes(): 68 | fixtures = yaml.safe_load(open('fixtures/database.yaml')) 69 | for link_detail in tqdm(fixtures['development_links']): 70 | count_of_votes = link_detail['count_of_votes'] 71 | sum_of_votes = link_detail['sum_of_votes'] 72 | link = link_detail['link'] 73 | 74 | # Generate votes to fit the count and sum 75 | temp_sum = 0 76 | vote_value = 0 77 | for i in tqdm(range(count_of_votes)): 78 | if temp_sum <= sum_of_votes: 79 | vote_value = 1 80 | temp_sum += 1 81 | else: 82 | vote_value = -1 83 | temp_sum -= 1 84 | vote = { 85 | "link": { 86 | "hostname": link 87 | }, 88 | "value": vote_value, 89 | # Generate a UUID, <3 bel 90 | "user_id": f"beda{i:04}-4822-4342-0990-b92d94d9489a", 91 | } 92 | response = requests.post(f'{API_ENDPOINT}/v1/vote', json=vote) 93 | response.raise_for_status() 94 | print('Loaded development votes') 95 | 96 | 97 | def seed_with_previous_votes(): 98 | SEED_ENDPOINT='' 99 | df = pd.read_csv('./seed/previous_votes.csv') 100 | df['link'] = df['PK'].apply(lambda x: x.replace('link#', '')) 101 | df['user'] = df['SK'].apply(lambda x: x.replace('user#', '')) 102 | df['vote_value'] = df['value'].apply(lambda x: 1 if x == '1' else -1) 103 | for index, row in df.iterrows(): 104 | vote = { 105 | "link": { 106 | "hostname": row['link'] 107 | }, 108 | "value": row['vote_value'], 109 | # Generate a UUID, <3 bel 110 | "user_id": row['user'], 111 | } 112 | response = requests.post(f'{SEED_ENDPOINT}/v1/vote', json=vote) 113 | response.raise_for_status() 114 | 115 | 116 | def create_list_of_bad_sites_and_check_if_still_active(): 117 | bad_sites = set() 118 | 119 | def add_bad_site(site): 120 | site = site.replace(".*", "").replace("*.", "") 121 | bad_sites.add(f'http://{site}/') 122 | bad_sites.add(f'https://{site}/') 123 | 124 | for blocklist in [ 125 | 'bad_cloners', 'content_farms', 'extra_content_farms', 126 | 'nearly_content_farms' 127 | ]: 128 | with open(f'./seed/public_blocklists/danny0838_{blocklist}.txt') as f: 129 | for line in f: 130 | stripped_line = line.strip() 131 | if stripped_line.startswith('#') or stripped_line.startswith( 132 | '/') or stripped_line == '': 133 | continue 134 | site = stripped_line.split(' ')[ 135 | 0] # The first part is the domain 136 | add_bad_site(site) 137 | add_bad_site(f'www.{site}') 138 | 139 | for blocklist in [ 140 | 'fake_webstores', 'github_splogs', 'stack_overflow_translations' 141 | ]: 142 | with open(f'./seed/public_blocklists/{blocklist}.txt') as f: 143 | for line in f: 144 | stripped_line = line.strip() 145 | site = stripped_line.split('/')[ 146 | 2] # The first part is the domain 147 | add_bad_site(site) 148 | 149 | with open('./seed/public_blocklists/wdmpa_content_farms.txt') as f: 150 | for line in f: 151 | stripped_line = line.strip() 152 | if not stripped_line.startswith('*://*.'): 153 | continue 154 | add_bad_site(stripped_line[6:-2]) 155 | add_bad_site(f'www.{stripped_line[6:-2]}') 156 | 157 | with open('./tmp/bad_sites_to_check_with_lychee.txt', 'w') as f: 158 | for site in bad_sites: 159 | f.write(f'{site}\n') 160 | 161 | os.system('lychee ./tmp/bad_sites_to_check_with_lychee.txt --output ' 162 | './tmp/lychee_output.json --format json --max-redirects 1 ' 163 | '--max-retries 0 --max-concurrency 5000 --timeout 10') 164 | 165 | failed_sites = list() 166 | with open('./tmp/lychee_output.json') as f: 167 | lychee_output = json.load(f) 168 | failures = lychee_output['fail_map'][ 169 | './tmp/bad_sites_to_check_with_lychee.txt'] 170 | for site in failures: 171 | failed_sites.append(site['url']) 172 | 173 | for site in failed_sites: 174 | try: 175 | bad_sites.remove(site) 176 | except KeyError: 177 | pass 178 | 179 | checked_bad_sites = set() 180 | for site in bad_sites: 181 | if site.startswith('https://'): 182 | checked_bad_sites.add(site[8:-1]) 183 | else: 184 | checked_bad_sites.add(site[7:-1]) 185 | 186 | with open('./tmp/checked_bad_sites.txt', 'w') as f: 187 | for site in checked_bad_sites: 188 | f.write(f'{site}\n') 189 | 190 | 191 | def generate_production_seed_data(): 192 | """ 193 | Process submissions from Hacker News and output them into 194 | the DynamoDB seed format 195 | 196 | 1. Read the CSV and merge them into a single pandas DataFrame 197 | 2. Map all the urls to just the domain part 198 | 3. Combine duplicate submissions and sum their votes (drop the date column) 199 | 4. Scale the votes so we get results between reasonable numbers 200 | """ 201 | hacker_news_submissions = glob.glob( 202 | 'seed/hacker_news_submissions/submissions_*.csv') 203 | bad_sites = 'seed/bad_sites_still_active.csv' 204 | output = 'seed/seed.ion' 205 | 206 | # Step 1 207 | df_good_sites = pd.concat([ 208 | pd.read_csv(f, names=['date', 'link', 'votes']) 209 | for f in hacker_news_submissions 210 | ]) 211 | df_bad_sites = pd.read_csv(bad_sites, names=['link']) 212 | 213 | # Step 2 214 | df_good_sites['old_link'] = df_good_sites['link'] 215 | df_good_sites['link'] = df_good_sites['link'].map( 216 | lambda link: urlparse(link).hostname) 217 | # Manual fix for _.0xffff.me 218 | df_good_sites['link'] = df_good_sites['link'].replace( 219 | '_.0xffff.me', 'me.0xffff.me') 220 | 221 | # Because it's interesting to look at 222 | ranked_list = df_good_sites.groupby(['link', 'old_link' 223 | ])['votes'].sum().to_frame() 224 | ranked_list = ranked_list.sort_values(by='votes', ascending=False) 225 | ranked_list.to_csv('seed/ranked_list_of_good_sites.csv', 226 | quoting=csv.QUOTE_ALL) 227 | 228 | # Step 3 229 | df_good_sites = df_good_sites.groupby(['link'])['votes'].sum().to_frame() 230 | df_good_sites = df_good_sites.sort_values(by='votes', ascending=False) 231 | df_good_sites = df_good_sites.reset_index() 232 | 233 | # Step 4 234 | new_min = 25 235 | new_max = 50 236 | current_min = df_good_sites['votes'].min() 237 | current_max = df_good_sites['votes'].max() 238 | df_good_sites['scaled_votes'] = ((new_max - new_min) * 239 | (df_good_sites['votes'] - current_min) / 240 | (current_max - current_min) + 241 | new_min).astype(int) 242 | df_good_sites = df_good_sites.drop(columns=['votes']) 243 | df_bad_sites['scaled_votes'] = -20 244 | 245 | seed_rows = [] 246 | created_at = '2022-07-27T12:30:00Z' 247 | day = '2022-07-27' 248 | user_id = 'beda0000-4822-4342-0990-b92d94d9489a' 249 | df = pd.concat([df_good_sites, df_bad_sites]) 250 | 251 | for index, row in df.iterrows(): 252 | sum_votes = row['scaled_votes'] 253 | count_votes = abs(row['scaled_votes']) 254 | vote_value = 1 if row['scaled_votes'] > 0 else -1 255 | # Vote (only need one, it's inconsistent but it works) 256 | seed_rows.append('$ion_1_0 {Item:{' + f'PK:"link#{row["link"]}",' + 257 | f'SK:"user#{user_id}",' + f'value:{vote_value}.,' + 258 | f'created_at:"{created_at}",' + 259 | f'UserVotes_PK:"{user_id}",' + 'entity_type:"Vote"' + 260 | '}}') 261 | 262 | # Links 263 | seed_rows.append('$ion_1_0 {Item:{' + f'PK:"link#{row["link"]}",' + 264 | f'SK:"link#{row["link"]}",' + 265 | f'count_of_votes:{count_votes}.,' + 266 | f'sum_of_votes:{sum_votes}.,' + 267 | 'entity_type:"LinkDetail"' + '}}') 268 | 269 | # Link histories 270 | seed_rows.append('$ion_1_0 {Item:{' + f'PK:"day#{day}",' + 271 | f'SK:"link#{row["link"]}",' + 272 | f'count_of_votes:{count_votes}.,' + 273 | f'sum_of_votes:{sum_votes}.,' + 274 | f'DailyLinkHistory_PK:"day#{day}",' 275 | 'entity_type:"LinkHistory"' + '}}') 276 | 277 | # Users 278 | seed_rows.append('$ion_1_0 {Item:{' + f'PK:"user#{user_id}",' + 279 | f'SK:"user#{user_id}",' + 'is_banned:true,' + 280 | f'created_at:"{created_at}",' + 'entity_type:"User"' + 281 | '}}') 282 | 283 | # User histories 284 | seed_rows.append('$ion_1_0 {Item:{' + f'PK:"day#{day}",' + 285 | f'SK:"user#{user_id}",' + 'is_banned:true,' + 286 | f'count_of_votes:{abs(df["scaled_votes"]).sum()}.,' + 287 | f'sum_of_votes:{df["scaled_votes"].sum()}.,' + 288 | f'DailyUserHistory_PK:"day#{day}",' + 289 | 'entity_type:"UserHistory"' + '}}') 290 | 291 | # Settings 292 | seed_rows.append('$ion_1_0 {Item:{' + 'PK:"settings",' + 'SK:"settings",' + 293 | 'voting_is_disabled:false,' + 294 | 'maximum_votes_per_user_per_day:10.,' + 295 | 'entity_type:"Settings"' + '}}') 296 | 297 | for item in seed_rows: 298 | # Check that the items are all valid ion objects 299 | ion.loads(item) 300 | 301 | # Write seed_rows to output file 302 | with open(output, "w") as outfile: 303 | outfile.write("\n".join(seed_rows)) 304 | 305 | 306 | def setup(): 307 | drop() 308 | create() 309 | load_settings() 310 | 311 | 312 | def _load_config(): 313 | with open(CONFIG_FILE) as f: 314 | config = load_yaml(f) 315 | properties = config['Resources']['Database']['Properties'] 316 | return { 317 | 'TableName': properties['TableName'], 318 | 'KeySchema': properties['KeySchema'], 319 | 'AttributeDefinitions': properties['AttributeDefinitions'], 320 | 'GlobalSecondaryIndexes': properties['GlobalSecondaryIndexes'], 321 | 'BillingMode': properties['BillingMode'], 322 | } 323 | 324 | 325 | if __name__ == '__main__': 326 | fire.Fire({ 327 | 'create': create, 328 | 'drop': drop, 329 | 'setup': setup, 330 | 'load_settings': load_settings, 331 | 'load_development_votes': load_development_votes, 332 | 'generate_production_seed_data': generate_production_seed_data, 333 | 'seed_with_previous_votes': seed_with_previous_votes 334 | }) 335 | --------------------------------------------------------------------------------