├── .gitignore ├── INSTALL.md ├── LICENSE ├── README.md ├── admin ├── addBlacklistItem.py ├── clean.sh ├── cleanLogs.sh ├── cleanMongoDb.sh ├── init.sh ├── mongoCleaner.js ├── printUserStatistics.py ├── start.sh ├── startMongoDb.sh ├── startServer.sh ├── stop.sh ├── stopMongoDb.sh └── stopServer.sh ├── package.json ├── references ├── home-feed.html ├── sample-story.html └── weekly-report.html ├── screenshots └── email-report.png └── server ├── Cleanser.js ├── Logger.js ├── MongoClient.js ├── PropertyManager.js ├── ReportMailman.js ├── cleanser.properties ├── main.js ├── paths.js └── util.js /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | /logs 3 | package-lock.json 4 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | The following instructions have been tested on macOS (for development) and Rocky 9 (for production). While it's certainly possible to host or test the Hacker News Cleanser on different environments, it may require adjustments to the installation steps. 4 | 5 | ## Requirements 6 | 7 | * An "always on" server at home or a virtual machine provided by [AWS](https://aws.amazon.com), [DigitalOcean](https://www.digitalocean.com), or other providers. I use a DigitalOcean ["Droplet"](https://www.digitalocean.com/products/droplets/). 8 | * A Linux flavor. I use [Rocky 9](https://rockylinux.org), but other distributions should work just fine. macOS also works fine for development or production (assuming "always on"). Windows *probably* works fine, but is untested and may require additional setup. 9 | * [node.js](https://www.digitalocean.com/community/tutorials/how-to-install-node-js-on-a-centos-7-server): I used v20, though newer versions should be fine. Older versions may or may not work. 10 | * [mongod](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-red-hat/#configure-the-package-management-system-yum): I used v7, though newer versions should be fine. Older versions may or may not work. 11 | * The [pm2](http://pm2.keymetrics.io) process manager for node.js to make it easy to have the Cleanser automatically start on system boot (among other QOL improvements from having it "service"-fied). I used v5, though newer versions should be fine. Older versions may or may not work. If you don't want this, you can skip this dependency and set `export USE_PM2=false` in `admin/init.sh`. 12 | * If you want both pm2 & the Cleanser to start on boot, ensure `export USE_PM2=true` is set in `admin/init.sh`, start the Cleanser (or restart if already running), execute `pm2 save` to have pm2 start the process automatically on pm2 start, and finally execute `pm2 startup` to have pm2 itself start on system boot. 13 | * [python](https://www.python.org) & [pip](https://pypi.org/project/pip/) (which are used for various administrative scripts to make it easy to add blacklist items or gather statistics). Once in place, install the [pymongo](https://api.mongodb.com/python/current/) package with `pip install pymongo` (Version 3.10.1 or newer required). Python 3 (not 2.7) is implied, as 2 is end of life. If you'd rather add blacklist items manually into MongoDB yourself, you can skip these dependencies. 14 | 15 | ## Setup 16 | 17 | After preparing the requirements and cloning a copy of the project from Github, you'll first want to execute `npm install` in the project's root to download the various required packages. 18 | 19 | Then, some minor configuration is required in `server/cleanser.properties`. Please set your credentials for the `hacker.news.username` and `hacker.news.password` properties, and set the other optional ones to your liking. 20 | 21 | Next, run `npm install` in the project's root directory to pull all Hacker News Cleanser's dependencies. 22 | 23 | Finally, you'll need to decide a directory you want Mongo to use for writing the Hacker News Cleanser DB. Once you've decided on a location, set that full directory path to `MONGO_DB` in `admin/init.sh`. If you want to create a directory somewhere that typically requires root permissions (such as in `/var/lib`), you **must** manually create the directory with the necessary permissions in advance with a command such as `sudo mkdir /var/lib/mongo ; sudo chown -R $(whoami) /var/lib/mongo`. Alternatively, you can choose a directory your user account is guarenteed to have write permissions to, such somewhere in your home directory. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Marc Barrowclift 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hacker News Cleanser 2 | 3 | This is a self-hosted service that will auto-hide Hacker News articles by: 4 | 5 | * Site 6 | * Title text matching 7 | * Title keyword matching 8 | * Title regex matching 9 | * Username 10 | 11 | You can download the latest release [here](https://github.com/barrowclift/hacker-news-cleanser/releases/latest) 12 | 13 | ## Why This Exists 14 | 15 | I've been a "lurker" on Hacker News for three years and have long enjoyed reading the thoughtful articles and sometimes heated but always informative comments. 16 | 17 | However, over the past year or so I've noticed a trend of articles covering an array of other topics making their way into the front page more frequently and in greater numbers. *And that's fine*. The Internet and Hacker News does not exist to exclusively serve my fussy requirements, and clearly the articles and their information are valuable to others. 18 | 19 | With that said, it does somewhat diminish my personal Hacker News experience, given that I tend to prefer articles that are strictly related to technology. That means either manually hiding the other articles or clicking past the first page, neither of which is ideal. 20 | 21 | That's where the inception of the Hacker News Cleanser came from; it's a "set it and forget it" service that will automatically tailor my home page to more closely fit my personal taste. 22 | 23 | ## The Compromise Between Being Informed and Information Separation 24 | 25 | 26 | 27 | While I do wish for my personal Hacker News page to be more focused, I also still want to have the ability to see the hidden articles and read them if I so choose. No amount of filter tweaking will prevent the occasional salient article or discussion from getting caught when I would have wished to still see it. Additionally, I do still feel it's important to have a finger on the heartbeat of Hacker News as a whole, only in a separate view and on my own time. 28 | 29 | To achieve this, the Hacker News Cleanser supports sending out email reports styled to look just like the Hacker News homepage containing all cleansed articles since the last email report. This requires setting up a Gmail "service" account to send emails through or using an existing Gmail account (not recommended). 30 | 31 | Personally, I have the Cleanser set to send an email report of cleansed articles every week, but it can easily be set to daily, bi-weekly, or any other kind of day-length frequency. 32 | 33 | ## Why Not A Browser Extension? 34 | 35 | My portable operating system of choice is iOS, and since Safari on iOS [does not support traditional browser extensions](https://apple.stackexchange.com/a/321213), this would mean my phone (where easily the majority of my browsing and reading occurs) would not benefit. 36 | 37 | Additionally, there are many other different browsers out there, and I have a better chance of serving the needs of other similarly-minded Hacker News readers regardless of their preferred browser or platform by focusing on just one product that's independent of the browser landscape. 38 | 39 | # Setup 40 | 41 | ## Installation 42 | 43 | See [INSTALL.md](https://github.com/barrowclift/hacker-news-cleanser/blob/master/INSTALL.md). 44 | 45 | ## Starting and Stopping 46 | 47 | Starting and stopping the Hacker News Cleanser is as simple as running `admin/start.sh` or `admin/stop.sh`, respectively. 48 | 49 | If you wish to start or stop specific components of the service, use their individual, respective start/stop scripts (not recommended). 50 | 51 | ## Adding Items To Filter 52 | 53 | You can filter articles by site, title, and username. To a new filter item, execute `admin/addBlacklistItem.py`, and the usage string will explain the rest. 54 | 55 | # License 56 | 57 | The Hacker News Cleanser is open source, licensed under the MIT License. 58 | 59 | See [LICENSE](https://github.com/barrowclift/hacker-news-cleanser/blob/master/LICENSE) for more. 60 | -------------------------------------------------------------------------------- /admin/addBlacklistItem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Adds the specified items to the appropriate blacklist collection 4 | 5 | import sys 6 | import argparse 7 | import pymongo 8 | 9 | parser = argparse.ArgumentParser(description="Adds the specified items to the appropriate Mongo blacklist collection") 10 | parser.add_argument("-t", "--text", required=False, nargs='+', action="store", dest="text", metavar='string', help="Blacklist stories if their title contains this text anywhere. This will block words that contain this text as a substring (e.g. the text \"gpt\" will match stories containing \"GPT\", \"GPT3.5\", etc.)") 11 | parser.add_argument("-k", "--keyword", required=False, nargs='+', action="store", dest="keyword", metavar='string', help="Blacklist stories if their title contains this exact string. This will not block words that contain the keyword as a substring (e.g. the keyword \"trump\" will not match stories containing the word \"trumpet\")") 12 | parser.add_argument("-r", "--regex", required=False, nargs='+', action="store", dest="regex", metavar='string', help="Blacklist stories if their title matches this regex. Regex must be Javascript flavored") 13 | parser.add_argument("-s", "--site", required=False, nargs='+', action="store", dest="site", metavar='string', help="Blacklist stories if they're from this source (e.g. \"newyorker.com\")") 14 | parser.add_argument("-u", "--user", required=False, nargs='+', action="store", dest="user", metavar='string', help="Blacklist all stories added by a particular user") 15 | args = parser.parse_args() 16 | 17 | if not args.text and not args.keyword and not args.regex and not args.user and not args.site: 18 | print("You must at least one item to blacklist\n") 19 | parser.print_help() 20 | sys.exit(1) 21 | 22 | client = pymongo.MongoClient() 23 | db = client["hackerNewsCleanserDb"] 24 | if args.text or args.keyword or args.regex: 25 | blacklistedTitlesCollection = db["blacklistedTitles"] 26 | 27 | if args.text: 28 | textToAdd = [] 29 | for text in args.text: 30 | exists = blacklistedTitlesCollection.count_documents({"text":text}) != 0 31 | if exists: 32 | print("Title text \"{}\" is already blacklisted".format(keyword)) 33 | else: 34 | textDocument = { 35 | "text": text, 36 | "type": "text" 37 | } 38 | textToAdd.append(textDocument) 39 | if textToAdd: 40 | blacklistedTitlesCollection.insert_many(textToAdd) 41 | if args.keyword: 42 | keywordsToAdd = [] 43 | for keyword in args.keyword: 44 | exists = blacklistedTitlesCollection.count_documents({"keyword":keyword}) != 0 45 | if exists: 46 | print("Title keyword \"{}\" is already blacklisted".format(keyword)) 47 | else: 48 | keywordDocument = { 49 | "keyword": keyword, 50 | "type": "keyword" 51 | } 52 | keywordsToAdd.append(keywordDocument) 53 | if keywordsToAdd: 54 | blacklistedTitlesCollection.insert_many(keywordsToAdd) 55 | if args.regex: 56 | regexsToAdd = [] 57 | for r in args.regex: 58 | exists = blacklistedTitlesCollection.count_documents({"regex":r}) != 0 59 | if exists: 60 | print("Title regex \"{}\" is already blacklisted".format(r)) 61 | else: 62 | regexDocument = { 63 | "regex": r, 64 | "type": "regex" 65 | } 66 | regexsToAdd.append(regexDocument) 67 | if regexsToAdd: 68 | blacklistedTitlesCollection.insert_many(regexsToAdd) 69 | if args.site: 70 | blacklistedSitesCollection = db["blacklistedSites"] 71 | sitesToAdd = [] 72 | for site in args.site: 73 | exists = blacklistedSitesCollection.count_documents({"site":site}) != 0 74 | if exists: 75 | print("Site \"{}\" is already blacklisted".format(site)) 76 | else: 77 | siteDocument = { 78 | "site": site 79 | } 80 | sitesToAdd.append(siteDocument) 81 | if sitesToAdd: 82 | blacklistedSitesCollection.insert_many(sitesToAdd) 83 | if args.user: 84 | blacklistedUsersCollection = db["blacklistedUsers"] 85 | usersToAdd = [] 86 | for user in args.user: 87 | exists = blacklistedUsersCollection.count_documents({"user":user}) != 0 88 | if exists: 89 | print("User \"{}\" is already blacklisted".format(user)) 90 | else: 91 | userDocument = { 92 | "user": user 93 | } 94 | usersToAdd.append(userDocument) 95 | if usersToAdd: 96 | blacklistedUsersCollection.insert_many(usersToAdd) 97 | -------------------------------------------------------------------------------- /admin/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | # Confirm if they really want to clean MongoDB's data store 7 | echo "You are about to clean all Hacker News Cleanser data, and logs. This CANNOT be undone." 8 | read -p "Are you absolutely sure you want to proceed? (y/n): " -r 9 | if [[ $REPLY =~ ^[Yy]$ ]] 10 | then 11 | "${ADMIN_DIR}"/cleanMongoDb.sh 1 12 | "${ADMIN_DIR}"/cleanLogs.sh 13 | echo -e "\n${GREEN}All Hacker News Cleanser data & logs deleted${RESET}" 14 | fi 15 | -------------------------------------------------------------------------------- /admin/cleanLogs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | rm "${LOGS_DIR}"/mongodb.log 2> /dev/null 7 | rm "${LOGS_DIR}"/server.log 2> /dev/null 8 | 9 | echo "Log directory cleaned" -------------------------------------------------------------------------------- /admin/cleanMongoDb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | MONGODB_NOHUP_RUNNING=$(ps -ef | grep "mongod --dbpath ""$MONGO_DB" | grep -v grep) 7 | HAS_SERVICE_COMMAND=$(command -v service) 8 | HAS_BREW_COMMAND=$(command -v brew) 9 | if [ -n "$HAS_SERVICE_COMMAND" ]; then 10 | HAS_MONGODB_INITD_SERVICE=$(ls /etc/init.d/mongod 2>/dev/null) 11 | HAS_MONGODB_SYSTEMCTL_SERVICE=$(ls /usr/lib/systemd/system/mongod.service 2>/dev/null) 12 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 13 | MONGODB_SERVICE_RUNNING=$(service mongod status | grep 'is running\|active (running)') 14 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 15 | MONGODB_SERVICE_RUNNING=$(systemctl status mongod | grep 'is running\|active (running)') 16 | fi 17 | elif [ -n "$HAS_BREW_COMMAND" ]; then 18 | HAS_MONGODB_TAPPED=$(brew services list | grep "mongodb-community" 2>/dev/null) 19 | if [ -n "$HAS_MONGODB_TAPPED" ]; then 20 | MONGODB_SERVICE_RUNNING=$(brew services info mongodb-community | grep "PID" 2>/dev/null) 21 | else 22 | echo -e "${RED}You don't seem to have MongoDB available through brew or systemctl${RESET}" 23 | exit 1 24 | fi 25 | fi 26 | 27 | # MongoDB may not be currently running. If that's the case, we want to briefly start 28 | # it for the cleaning process, then immediately turn it off again 29 | START_THEN_STOP=0 30 | if [ -z "$MONGODB_NOHUP_RUNNING" ] && [ -z "$MONGODB_SERVICE_RUNNING" ]; then 31 | START_THEN_STOP=1 32 | fi 33 | 34 | function cleanMongoDb { 35 | if [ $START_THEN_STOP -eq 1 ]; then 36 | "${ADMIN_DIR}"/startMongoDb.sh 37 | starterProcess=$! 38 | wait $starterProcess 39 | fi 40 | 41 | node "${ADMIN_DIR}"/mongoCleaner.js > "${LOGS_DIR}"/clean-mongodb.log 2>&1 & 42 | cleanerProcess=$! 43 | wait $cleanerProcess 44 | 45 | echo "Hacker News Cleanser database deleted" 46 | 47 | if [ $START_THEN_STOP -eq 1 ]; then 48 | "${ADMIN_DIR}"/stopMongoDb.sh 49 | fi 50 | } 51 | 52 | if [[ $1 == 1 ]] # If 1 is provided, skip safety prompt and just do it. 53 | then 54 | cleanMongoDb 55 | else 56 | # Confirm if they really want to clean MongoDB's data store 57 | echo "You are about to drop all collections in Hacker News Cleanser's database, this CANNOT be undone." 58 | read -p "Are you absolutely sure you want to proceed? (y/n): " -r 59 | if [[ $REPLY =~ ^[Yy]$ ]] 60 | then 61 | cleanMongoDb 62 | fi 63 | fi 64 | 65 | exit 0 -------------------------------------------------------------------------------- /admin/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export RED='\033[0;31m' 4 | export RESET='\033[0m' 5 | export YELLOW='\033[0;33m' 6 | export GREEN='\033[0;32m' 7 | 8 | export REPO=$(dirname "${ADMIN_DIR}") 9 | export SERVER_DIR="${REPO}"/server 10 | export LOGS_DIR="${REPO}"/logs 11 | export MONGO_DB=set/me/somewhere 12 | 13 | export USE_PM2=true -------------------------------------------------------------------------------- /admin/mongoCleaner.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import path from "path"; 7 | import url from "url"; 8 | // Local 9 | const FILENAME = url.fileURLToPath(import.meta.url); 10 | const CLEANSER_ROOT_DIRECTORY_PATH = path.join(path.dirname(FILENAME), "../"); 11 | const PropertyManager = await import (path.join(CLEANSER_ROOT_DIRECTORY_PATH, "server/PropertyManager.js")); 12 | const MongoClient = await import (path.join(CLEANSER_ROOT_DIRECTORY_PATH, "server/MongoClient.js")); 13 | 14 | 15 | // CONSTANTS 16 | // --------- 17 | const PROPERTIES_FILE_NAME = path.join(CLEANSER_ROOT_DIRECTORY_PATH, "server/cleanser.properties"); 18 | 19 | 20 | // GLOBALS 21 | // ------- 22 | var propertyManager = null; 23 | 24 | 25 | async function cleanDbAndClose() { 26 | var mongoClient = new MongoClient.default(propertyManager); 27 | await mongoClient.connect(); 28 | await mongoClient.dropCollectionBlacklistedTitles(); 29 | await mongoClient.dropCollectionBlacklistedSites(); 30 | await mongoClient.dropCollectionBlacklistedUsers(); 31 | await mongoClient.dropCollectionCleansedItems(); 32 | await mongoClient.dropCollectionWeeklyReportsLog(); 33 | await mongoClient.close(); 34 | } 35 | 36 | async function main() { 37 | propertyManager = new PropertyManager.default(); 38 | await propertyManager.load(PROPERTIES_FILE_NAME); 39 | 40 | await cleanDbAndClose(); 41 | } 42 | 43 | main(); -------------------------------------------------------------------------------- /admin/printUserStatistics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import operator 5 | from pymongo import MongoClient 6 | 7 | client = MongoClient("localhost:27017") 8 | db = client.hackerNewsCleanserDb 9 | 10 | distribution = {} 11 | cursor = db.cleansedItems.find() 12 | for cleansedItem in cursor: 13 | user = cleansedItem["user"] 14 | if user in distribution: 15 | distribution[user] += 1 16 | else: 17 | distribution[user] = 1 18 | 19 | cursor = db.blacklistedUsers.find() 20 | for blacklistedUser in cursor: 21 | user = blacklistedUser["user"] 22 | if user in distribution: 23 | distribution.pop(user, None) 24 | 25 | sortedDistribution = sorted(distribution.items(), key=operator.itemgetter(1), reverse=True) 26 | maxPrint = 10 27 | count = 0 28 | for entry in sortedDistribution: 29 | print("%s:%d" % (entry[0], entry[1])) 30 | count += 1 31 | if count >= maxPrint: 32 | break 33 | -------------------------------------------------------------------------------- /admin/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | MONGODB_NOHUP_RUNNING=$(ps -ef | grep "mongod --dbpath ""$MONGO_DB" | grep -v grep) 7 | HAS_SERVICE_COMMAND=$(command -v service) 8 | HAS_BREW_COMMAND=$(command -v brew) 9 | if [ -n "$HAS_SERVICE_COMMAND" ]; then 10 | HAS_MONGODB_INITD_SERVICE=$(ls /etc/init.d/mongod 2>/dev/null) 11 | HAS_MONGODB_SYSTEMCTL_SERVICE=$(ls /usr/lib/systemd/system/mongod.service 2>/dev/null) 12 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 13 | MONGODB_SERVICE_RUNNING=$(service mongod status | grep 'is running\|active (running)') 14 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 15 | MONGODB_SERVICE_RUNNING=$(systemctl status mongod | grep 'is running\|active (running)') 16 | fi 17 | elif [ -n "$HAS_BREW_COMMAND" ]; then 18 | HAS_MONGODB_TAPPED=$(brew services list | grep "mongodb-community" 2>/dev/null) 19 | if [ -n "$HAS_MONGODB_TAPPED" ]; then 20 | MONGODB_SERVICE_RUNNING=$(brew services info mongodb-community | grep "PID" 2>/dev/null) 21 | else 22 | echo -e "${RED}You don't seem to have MongoDB available through brew or systemctl${RESET}" 23 | exit 1 24 | fi 25 | fi 26 | SERVER_RUNNING=$(ps -ef | grep "node ""${SERVER_DIR}" | grep -v grep) 27 | 28 | # If all necessary ingredients are running successfully, nothing to do 29 | if [ -n "$SERVER_RUNNING" ] && { [ -n "$MONGODB_NOHUP_RUNNING" ] || [ -n "$MONGODB_SERVICE_RUNNING" ]; }; then 30 | echo -e "${GREEN}Hacker News Cleanser is already running${RESET}" 31 | exit 0 32 | fi 33 | 34 | "${ADMIN_DIR}"/cleanLogs.sh 35 | 36 | echo "Starting Hacker News Cleanser..." 37 | 38 | "${ADMIN_DIR}"/startMongoDb.sh 39 | result=$! 40 | wait $result 41 | 42 | "${ADMIN_DIR}"/startServer.sh 43 | SUCCESS=$? 44 | 45 | if [ $SUCCESS -eq 0 ]; then 46 | echo -e "${GREEN}All started successfully${RESET}" 47 | exit 0 48 | else 49 | echo -e "${RED}Hacker News Cleanser failed to start${RESET}" 50 | exit 1 51 | fi 52 | -------------------------------------------------------------------------------- /admin/startMongoDb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | MONGODB_NOHUP_RUNNING=$(ps -ef | grep "mongod --dbpath ""$MONGO_DB" | grep -v grep) 7 | HAS_SERVICE_COMMAND=$(command -v service) 8 | HAS_BREW_COMMAND=$(command -v brew) 9 | if [ -n "$HAS_SERVICE_COMMAND" ]; then 10 | HAS_MONGODB_INITD_SERVICE=$(ls /etc/init.d/mongod 2>/dev/null) 11 | HAS_MONGODB_SYSTEMCTL_SERVICE=$(ls /usr/lib/systemd/system/mongod.service 2>/dev/null) 12 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 13 | MONGODB_SERVICE_RUNNING=$(service mongod status | grep 'is running\|active (running)') 14 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 15 | MONGODB_SERVICE_RUNNING=$(systemctl status mongod | grep 'is running\|active (running)') 16 | fi 17 | elif [ -n "$HAS_BREW_COMMAND" ]; then 18 | HAS_MONGODB_TAPPED=$(brew services list | grep "mongodb-community" 2>/dev/null) 19 | if [ -n "$HAS_MONGODB_TAPPED" ]; then 20 | MONGODB_SERVICE_RUNNING=$(brew services info mongodb-community | grep "PID" 2>/dev/null) 21 | else 22 | echo -e "${RED}You don't seem to have MongoDB available through brew or systemctl${RESET}" 23 | exit 1 24 | fi 25 | fi 26 | 27 | # If neither way of running MongoDB is running, start MongoDB 28 | if [ -z "$MONGODB_NOHUP_RUNNING" ] && [ -z "$MONGODB_SERVICE_RUNNING" ]; then 29 | if [ ! -d "$MONGO_DB" ]; then 30 | mkdir -p "$MONGO_DB" 31 | fi 32 | if [ ! -d "$LOGS_DIR" ]; then 33 | mkdir "$LOGS_DIR" 34 | fi 35 | 36 | rm "$LOGS_DIR"/mongodb.log 2> /dev/null 37 | 38 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 39 | sudo service mongod start 40 | SUCCESS=$? 41 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 42 | sudo systemctl start mongod 43 | SUCCESS=$? 44 | elif [ -n "$HAS_MONGODB_TAPPED" ]; then 45 | brew services start mongodb-community 46 | SUCCESS=$? 47 | else 48 | nohup mongod --dbpath "$MONGO_DB" --bind_ip 127.0.0.1 > "$LOGS_DIR"/mongodb.log 2>&1 & 49 | SUCCESS=$? 50 | fi 51 | 52 | if [ $SUCCESS -eq 0 ]; then 53 | echo -e "${GREEN}MongoDB started${RESET}" 54 | exit 0 55 | else 56 | echo -e "${RED}MongoDB failed to start${RESET}" 57 | exit 1 58 | fi 59 | else 60 | echo "MongoDB already running" 61 | exit 0 62 | fi 63 | -------------------------------------------------------------------------------- /admin/startServer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | if [ ! -d "$LOGS_DIR" ]; then 7 | mkdir "$LOGS_DIR"; 8 | fi 9 | 10 | SERVER_RUNNING=$(ps -ef | grep "node ""${SERVER_DIR}" | grep -v grep) 11 | 12 | if [ -n "$SERVER_RUNNING" ]; then 13 | echo -e "Server already running" 14 | else 15 | if [ "$USE_PM2" = true ] ; then 16 | pm2 --log "${LOGS_DIR}"/server.log --name hackerNewsCleanser --silent start "${SERVER_DIR}"/main.js 17 | else 18 | nohup node "${SERVER_DIR}"/main.js > "$LOGS_DIR"/server.log 2>&1 & 19 | fi 20 | 21 | SUCCESS=$? 22 | if [ $SUCCESS -eq 0 ]; then 23 | echo -e "${GREEN}Server started${RESET}" 24 | exit 0 25 | else 26 | echo -e "${RED}Server failed to start${RESET}" 27 | exit 1 28 | fi 29 | fi 30 | -------------------------------------------------------------------------------- /admin/stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | MONGODB_NOHUP_RUNNING=$(ps -ef | grep "mongod --dbpath ""$MONGO_DB" | grep -v grep) 7 | HAS_SERVICE_COMMAND=$(command -v service) 8 | HAS_BREW_COMMAND=$(command -v brew) 9 | if [ -n "$HAS_SERVICE_COMMAND" ]; then 10 | HAS_MONGODB_INITD_SERVICE=$(ls /etc/init.d/mongod 2>/dev/null) 11 | HAS_MONGODB_SYSTEMCTL_SERVICE=$(ls /usr/lib/systemd/system/mongod.service 2>/dev/null) 12 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 13 | MONGODB_SERVICE_RUNNING=$(service mongod status | grep 'is running\|active (running)') 14 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 15 | MONGODB_SERVICE_RUNNING=$(systemctl status mongod | grep 'is running\|active (running)') 16 | fi 17 | elif [ -n "$HAS_BREW_COMMAND" ]; then 18 | HAS_MONGODB_TAPPED=$(brew services list | grep "mongodb-community" 2>/dev/null) 19 | if [ -n "$HAS_MONGODB_TAPPED" ]; then 20 | MONGODB_SERVICE_RUNNING=$(brew services info mongodb-community | grep "PID" 2>/dev/null) 21 | else 22 | echo -e "${RED}You don't seem to have MongoDB available through brew or systemctl${RESET}" 23 | exit 1 24 | fi 25 | fi 26 | SERVER_RUNNING=$(ps -ef | grep "node ""${SERVER_DIR}" | grep -v grep) 27 | 28 | # If all necessary ingredients aren't running, then there's nothing to stop 29 | if [ -z "$SERVER_RUNNING" ] && { [ -z "$MONGODB_NOHUP_RUNNING" ] && [ -z "$MONGODB_SERVICE_RUNNING" ]; }; then 30 | echo "Hacker News Cleanser is not running" 31 | exit 0 32 | fi 33 | 34 | echo "Stopping Hacker News Cleanser..." 35 | 36 | # Ask to Stop MongoDB 37 | if [ -n "$MONGODB_NOHUP_RUNNING" ] || [ -n "$MONGODB_SERVICE_RUNNING" ]; then 38 | read -p "Do you want to stop MongoDB? (y/n): " -r 39 | if [[ $REPLY =~ ^[Yy]$ ]]; then 40 | read -p "Do you also want to clean MongoDB? (y/n): " -r 41 | if [[ $REPLY =~ ^[Yy]$ ]] 42 | then 43 | echo "Cleaning MongoDB..." 44 | "${ADMIN_DIR}"/cleanMongoDb.sh 1 45 | result=$! 46 | wait $result 47 | fi 48 | 49 | echo "Stopping MongoDB..." 50 | "${ADMIN_DIR}"/stopMongoDb.sh 51 | elif [ -z "$SERVER_RUNNING" ]; then 52 | echo "Server is not running" 53 | exit 0 54 | fi 55 | else 56 | echo "MongoDB is not running" 57 | fi 58 | if [ -n "$SERVER_RUNNING" ]; then 59 | "${ADMIN_DIR}"/stopServer.sh 60 | else 61 | echo "Server is not running" 62 | fi 63 | 64 | echo -e "${GREEN}Hacker News Cleanser has been stopped${RESET}" -------------------------------------------------------------------------------- /admin/stopMongoDb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | MONGODB_NOHUP_RUNNING=$(ps -ef | grep "mongod --dbpath ""$MONGO_DB" | grep -v grep) 7 | HAS_SERVICE_COMMAND=$(command -v service) 8 | HAS_BREW_COMMAND=$(command -v brew) 9 | if [ -n "$HAS_SERVICE_COMMAND" ]; then 10 | HAS_MONGODB_INITD_SERVICE=$(ls /etc/init.d/mongod 2>/dev/null) 11 | HAS_MONGODB_SYSTEMCTL_SERVICE=$(ls /usr/lib/systemd/system/mongod.service 2>/dev/null) 12 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 13 | MONGODB_SERVICE_RUNNING=$(service mongod status | grep 'is running\|active (running)') 14 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 15 | MONGODB_SERVICE_RUNNING=$(systemctl status mongod | grep 'is running\|active (running)') 16 | fi 17 | elif [ -n "$HAS_BREW_COMMAND" ]; then 18 | HAS_MONGODB_TAPPED=$(brew services list | grep "mongodb-community" 2>/dev/null) 19 | if [ -n "$HAS_MONGODB_TAPPED" ]; then 20 | MONGODB_SERVICE_RUNNING=$(brew services info mongodb-community | grep "PID" 2>/dev/null) 21 | else 22 | echo -e "${RED}You don't seem to have MongoDB available through brew or systemctl${RESET}" 23 | exit 1 24 | fi 25 | fi 26 | 27 | # If either way of running MongoDB is active, stop MongoDB 28 | if [ -n "$MONGODB_NOHUP_RUNNING" ] || [ -n "$MONGODB_SERVICE_RUNNING" ]; then 29 | if [ -n "$HAS_MONGODB_INITD_SERVICE" ]; then 30 | sudo service mongod stop 31 | SUCCESS=$? 32 | elif [ -n "$HAS_MONGODB_SYSTEMCTL_SERVICE" ]; then 33 | sudo systemctl stop mongod 34 | SUCCESS=$? 35 | elif [ -n "$HAS_MONGODB_TAPPED" ]; then 36 | brew services stop mongodb-community 37 | SUCCESS=$? 38 | else 39 | ps -ef | grep "mongod --dbpath $MONGO_DB" | grep -v grep | awk '{print $2}' | xargs kill -9 40 | SUCCESS=$? 41 | fi 42 | 43 | if [ $SUCCESS -eq 0 ]; then 44 | echo -e "${GREEN}MongoDB stopped${RESET}" 45 | exit 0 46 | else 47 | echo -e "${RED}MongoDB failed to stop${RESET}" 48 | exit 1 49 | fi 50 | else 51 | echo "MongoDB is not running" 52 | exit 0 53 | fi 54 | -------------------------------------------------------------------------------- /admin/stopServer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ADMIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 4 | source "${ADMIN_DIR}"/init.sh 5 | 6 | SERVER_RUNNING=$(ps -ef | grep "node ""${SERVER_DIR}" | grep -v grep) 7 | 8 | if [ -n "$SERVER_RUNNING" ]; then 9 | if [ "$USE_PM2" = true ] ; then 10 | pm2 --silent stop hackerNewsCleanser 11 | else 12 | ps -ef | grep "node ""${SERVER_DIR}""/main.js" | grep -v grep | awk '{print $2}' | xargs kill -9 13 | fi 14 | 15 | SUCCESS=$? 16 | if [ $SUCCESS -eq 0 ]; then 17 | echo -e "${GREEN}Server stopped${RESET}" 18 | exit 0 19 | else 20 | echo -e "${RED}Server failed to stop${RESET}" 21 | exit 1 22 | fi 23 | else 24 | echo "Server is not running" 25 | exit 0 26 | fi 27 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Hacker News Cleanser", 3 | "version": "2.8", 4 | "type": "module", 5 | "description": "Taking out the trash, one New Yorker post at a time", 6 | "main": "server/main.js", 7 | "author": "Marc Barrowclift", 8 | "license": "MIT", 9 | "repository": { 10 | "type": "git", 11 | "url": "https://github.com/barrowclift/hacker-news-cleanser.git" 12 | }, 13 | "dependencies": { 14 | "got": "^14.4.1", 15 | "jsdom": "^24.1.0", 16 | "mongodb": "^6.7.0", 17 | "nodemailer": "^6.9.14", 18 | "properties": "^1.2.1", 19 | "tough-cookie": "^4.1.4" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /references/home-feed.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Hacker News 10 | 11 | 12 | 13 |
14 | 15 | 16 | 28 | 29 | 30 | 31 | 581 | 582 | 583 | 611 | 612 |
17 | 18 | 19 | 20 | 22 | 25 | 26 |
Hacker News 21 | new | threads | comments | show | ask | jobs | submit 23 | barrowclift (4) | 24 | logout
27 |
32 | 33 | 34 | 35 | 42 | 44 | 45 | 46 | 47 | 49 | 50 | 51 | 52 | 53 | 60 | 62 | 63 | 64 | 65 | 67 | 68 | 69 | 70 | 71 | 78 | 80 | 81 | 82 | 83 | 85 | 86 | 87 | 88 | 89 | 96 | 98 | 99 | 100 | 101 | 103 | 104 | 105 | 106 | 107 | 114 | 116 | 117 | 118 | 119 | 121 | 122 | 123 | 124 | 125 | 132 | 134 | 135 | 136 | 137 | 139 | 140 | 141 | 142 | 143 | 150 | 152 | 153 | 154 | 155 | 157 | 158 | 159 | 160 | 161 | 168 | 170 | 171 | 172 | 173 | 175 | 176 | 177 | 178 | 179 | 186 | 188 | 189 | 190 | 191 | 193 | 194 | 195 | 196 | 197 | 204 | 206 | 207 | 208 | 209 | 211 | 212 | 213 | 214 | 215 | 222 | 224 | 225 | 226 | 227 | 229 | 230 | 231 | 232 | 233 | 240 | 242 | 243 | 244 | 245 | 247 | 248 | 249 | 250 | 251 | 258 | 260 | 261 | 262 | 263 | 265 | 266 | 267 | 268 | 269 | 276 | 278 | 279 | 280 | 281 | 283 | 284 | 285 | 286 | 287 | 294 | 295 | 296 | 297 | 298 | 300 | 301 | 302 | 303 | 304 | 311 | 313 | 314 | 315 | 316 | 318 | 319 | 320 | 321 | 322 | 329 | 331 | 332 | 333 | 334 | 336 | 337 | 338 | 339 | 340 | 347 | 349 | 350 | 351 | 352 | 354 | 355 | 356 | 357 | 358 | 365 | 367 | 368 | 369 | 370 | 372 | 373 | 374 | 375 | 376 | 383 | 385 | 386 | 387 | 388 | 390 | 391 | 392 | 393 | 394 | 401 | 403 | 404 | 405 | 406 | 408 | 409 | 410 | 411 | 412 | 419 | 421 | 422 | 423 | 424 | 426 | 427 | 428 | 429 | 430 | 437 | 439 | 440 | 441 | 442 | 444 | 445 | 446 | 447 | 448 | 455 | 457 | 458 | 459 | 460 | 462 | 463 | 464 | 465 | 466 | 473 | 475 | 476 | 477 | 478 | 483 | 484 | 485 | 486 | 487 | 494 | 496 | 497 | 498 | 499 | 501 | 502 | 503 | 504 | 505 | 512 | 514 | 515 | 516 | 517 | 519 | 520 | 521 | 522 | 523 | 530 | 532 | 533 | 534 | 535 | 537 | 538 | 539 | 540 | 541 | 548 | 549 | 550 | 551 | 552 | 554 | 555 | 556 | 557 | 558 | 565 | 567 | 568 | 569 | 570 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 |
1.Walmart Whistle-Blower Claims Cheating in Race with Amazon (bloomberg.com) 43 |
48 | 68 points by kyleblarson 1 hour ago | hide | 41 comments
2.Airtable gets $52M in funding (businessinsider.com) 61 |
66 | 186 points by prostoalex 3 hours ago | hide | 55 comments
3.Writing System Software [video] (youtube.com) 79 |
84 | 259 points by dsr12 5 hours ago | hide | 33 comments
4.How JavaScript works: the rendering engine and tips to optimize its performance (sessionstack.com) 97 |
102 | 75 points by zlatkov 3 hours ago | hide | 7 comments
5.A Usable C++ Dialect That Is Safe Against Memory Corruption (ithare.com) 115 |
120 | 62 points by ibobev 3 hours ago | hide | 38 comments
6.Private Internet Access Goes Open Source (privateinternetaccess.com) 133 |
138 | 98 points by iamd3vil 1 hour ago | hide | 37 comments
7.Intel Publishes Spectre and Meltdown Hardware Plans: Fixed Gear Later This Year (anandtech.com) 151 |
156 | 137 points by nuriaion 6 hours ago | hide | 36 comments
8.Show HN: Fossa-cli – Fast and reliable dependency analysis for any codebase (github.com) 169 |
174 | 70 points by XiZhao 4 hours ago | hide | 25 comments
9.Human-powered cranes and lifting devices (2010) (lowtechmagazine.com) 187 |
192 | 21 points by codezero 2 hours ago | hide | discuss
10.Monarch butterfly numbers keep declining (beyondpesticides.org) 205 |
210 | 7 points by accordionclown 1 hour ago | hide | 1 comment
11.Physicists Still Don’t Know What Puts the Curl in Curling (newyorker.com) 223 |
228 | 141 points by hoaxcracker 7 hours ago | hide | 62 comments
12.Org-brain – mind-mapping for org-mode (github.com) 241 |
246 | 114 points by zeveb 6 hours ago | hide | 11 comments
13.A billion reasons never to buy IBM services (foliovision.com) 259 |
264 | 158 points by shelkie 3 hours ago | hide | 84 comments
14.Atom 1.25 (atom.io) 277 |
282 | 5 points by kylebarron 13 minutes ago | hide | discuss
15.Launch HN: Piccolo (YC W18) – Camera for controlling your home with gestures
299 | 50 points by marlonmisra 3 hours ago | hide | 34 comments
16.Portacle – A Portable Common Lisp Development Environment (portacle.github.io) 312 |
317 | 205 points by wheresvic1 10 hours ago | hide | 73 comments
17.Atomic Commit in SQLite (2007) (sqlite.org) 330 |
335 | 28 points by jeffreyrogers 3 hours ago | hide | 6 comments
18.GEMM: From Pure C to SSE Optimized Micro Kernels (2014) (uni-ulm.de) 348 |
353 | 4 points by felixr 1 hour ago | hide | discuss
19.Celebrating Stephen Hawking (maths.org) 366 |
371 | 81 points by CarolineW 4 hours ago | hide | 5 comments
20.Learning Rust (learning-rust.github.io) 384 |
389 | 105 points by dumindunuwan 4 hours ago | hide | 25 comments
21.Revisiting an Explorer’s Northwest Passage Disappointment After Nearly 230 Years (atlasobscura.com) 402 |
407 | 16 points by Thevet 3 hours ago | hide | 3 comments
22.Florida Pedestrian Bridge Constructed 5 Days Ago Collapses, Multiple Casualties (jalopnik.com) 420 |
425 | 66 points by guardiangod 2 hours ago | hide | 18 comments
23.Ancient DNA Is Rewriting Human and Neanderthal History (theatlantic.com) 438 |
443 | 94 points by dpflan 9 hours ago | hide | 61 comments
24.ANA Avatar XPRIZE (xprize.org) 456 |
461 | 16 points by T-A 2 hours ago | hide | 5 comments
25.Using AI to match human performance in translating news from Chinese to English (microsoft.com) 474 |
479 | 204 points by 480 | 481 | Maimedpuppet 482 | 13 hours ago | hide | 51 comments
26.Canopy: An End-to-End Performance Tracing And Analysis System [pdf] (brown.edu) 495 |
500 | 73 points by fanf2 8 hours ago | hide | 7 comments
27.Show HN: Coherence API – Modern Serverless Code. Pass a func and it runs online (coherenceapi.com) 513 |
518 | 25 points by bthornbury 4 hours ago | hide | 28 comments
28.Google and LG creates VR AMOLED 120 Hz at 5500 x 3000 (blurbusters.com) 531 |
536 | 207 points by methyl 6 hours ago | hide | 128 comments
29.Ask HN: “Write your own” or “Build your own” software projects
553 | 388 points by n_t 9 hours ago | hide | 67 comments
30.Open Location Code: Easier location encoding (openlocationcode.com) 566 |
571 | 36 points by yarapavan 4 hours ago | hide | 25 comments
More
580 |
584 | 585 | 586 | 587 | 588 |
589 |
590 |
591 | Applications are open for YC Summer 2018 592 |
593 |
594 |
Guidelines 595 | | FAQ 596 | | Support 597 | | API 598 | | Security 599 | | Lists 600 | | Bookmarklet 601 | | Legal 602 | | Apply to YC 603 | | Contact 604 |
605 |
606 |
Search: 607 | 608 |
609 |
610 |
613 |
614 | 615 | 616 | 617 | -------------------------------------------------------------------------------- /references/sample-story.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 1. 4 | 5 | 6 |
7 | 8 |
9 |
10 |
11 | 12 | 13 | Walmart Whistle-Blower Claims Cheating in Race with Amazon 14 | (bloomberg.com) 15 | 16 | 17 | 18 | 19 | 20 | 68 points by 21 | kyleblarson 22 | 23 | 1 hour ago 24 | 25 | | 26 | hide | 27 | 41 comments 28 | 29 | 30 | -------------------------------------------------------------------------------- /references/weekly-report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 181 | 182 | Hacker News Cleanser Weekly Report - TODO 183 | 184 | 185 | 186 |
187 | 188 | 189 | 199 | 200 | 201 | 202 | 221 | 222 | 223 | 233 | 234 |
190 | 191 | 192 | 193 | 194 | 196 | 197 |
Hacker News 195 | barrowclift (TODO total articles cleansed)
198 |
203 | 204 | 205 | 206 | 207 | 209 | 210 | 211 | 212 | 215 | 216 | 217 | 218 | 219 |
*TODO (TODO) 208 |
213 | Originally shared by TODO at TODO 214 |
220 |
224 | 225 | 226 | 227 | 228 |
229 |
230 |
Generated by the Hacker News Cleanser
231 |
232 |
235 |
236 | 237 | -------------------------------------------------------------------------------- /screenshots/email-report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barrowclift/hacker-news-cleanser/465de8ec2ee61ce7c43994725cc81001ca10329b/screenshots/email-report.png -------------------------------------------------------------------------------- /server/Cleanser.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import { JSDOM } from "jsdom"; 7 | import got from "got"; 8 | import { CookieJar } from "tough-cookie"; 9 | // Local 10 | import Logger from "./Logger.js"; 11 | import util from "./util.js"; 12 | 13 | 14 | // CONSTANTS 15 | // --------- 16 | const CLASS_NAME = "Cleanser"; 17 | 18 | 19 | // GLOBALS 20 | // ------- 21 | let log = new Logger(CLASS_NAME); 22 | 23 | 24 | export default class Cleanser { 25 | 26 | /** 27 | * Initializes the cleanser, but does not automatically kick it off. 28 | * To start the cleanser, `start()` must be called. 29 | * 30 | * @param {PropertyManager} propertyManager 31 | * @param {MongoClient} mongoClient 32 | */ 33 | constructor(propertyManager, mongoClient) { 34 | this.propertyManager = propertyManager; 35 | this.mongoClient = mongoClient; 36 | 37 | this.isStopping = false; 38 | this.currentlyCleansing = false; 39 | this.cleanseIntervalId = null; 40 | 41 | this.totalCleansedStories = 0; 42 | 43 | /** 44 | * If a regex pattern for matching titles was provided that does not 45 | * compile, we'll only attempt the once then ignore it in all future 46 | * cleansings in this Hacker News Cleanser instance. That way, we're 47 | * not reattempting to parse the regex every cleanse and clogging up 48 | * the log file with the same errors. 49 | */ 50 | this.ignoredRegexes = new Set(); 51 | 52 | // Default options to be applied regardless of the request 53 | this.gotClient = got.extend({ 54 | prefixUrl: this.propertyManager.hackerNewsBaseUrl, 55 | cookieJar: new CookieJar() 56 | }); 57 | 58 | // Initializing option templates that the Cleanser will use when started 59 | this.loginOptions = { 60 | followRedirect: true, 61 | form: { 62 | acct: this.propertyManager.hackerNewsUsername, 63 | pw: this.propertyManager.hackerNewsPassword, 64 | goto: "news" 65 | }, 66 | headers: { 67 | "User-Agent": this.propertyManager.userAgent 68 | } 69 | } 70 | this.homePageOptions = { 71 | followRedirect: true, 72 | headers: { 73 | "User-Agent": this.propertyManager.userAgent 74 | } 75 | } 76 | this.hideOptions = { 77 | followRedirect: true, 78 | form: { 79 | id: "", 80 | goto: "news", 81 | auth: "" 82 | }, 83 | headers: { 84 | "User-Agent": this.propertyManager.userAgent 85 | } 86 | } 87 | } 88 | 89 | /** 90 | * ============== 91 | * PUBLIC METHODS 92 | * ============== 93 | */ 94 | 95 | async start() { 96 | log.info("Starting..."); 97 | 98 | // 1. Validate that all required properties were provided 99 | if (!this.propertyManager.requiredPropertiesWereProvided()) { 100 | throw "Required Hacker News Cleanser properties were not provided, cannot startup"; 101 | } 102 | 103 | // 2. Print total cleansed stories since the beginning of time 104 | this.totalCleansedStories = await this.mongoClient.countAllCleansedItems(); 105 | log.info("So far, the Hacker News Cleanser has cleansed " + this.totalCleansedStories + " total stories from your feed"); 106 | 107 | // 3. Login 108 | await this._login(); 109 | 110 | // 4. Run cleanse so the user doesn't have to wait for the first frequency time to elapse before results. 111 | await this._cleanse(); 112 | 113 | // 5. Finally, kick off the cleanse interval 114 | this.cleanseIntervalId = setInterval(async () => { 115 | if (this.isStopping) { 116 | log.info("Preventing cleanse, shutting down..."); 117 | } else if (this.currentlyCleansing) { 118 | log.info("Skipping cleanse, still processing previous one..."); 119 | } else { 120 | await this._cleanse(); 121 | } 122 | }, this.propertyManager.cleanserFrequencyInMillis); 123 | } 124 | 125 | async stop() { 126 | this.isStopping = true; 127 | log.info("Stopping..."); 128 | clearInterval(this.cleanseIntervalId); 129 | log.info("Stopped"); 130 | } 131 | 132 | /** 133 | * =============== 134 | * PRIVATE METHODS 135 | * =============== 136 | */ 137 | 138 | /** 139 | * The Node request instance will retain whatever authentication cookies 140 | * Hacker News returns from this login attempt, so all we need to do is 141 | * report back if it was successful or not and use the same request 142 | * instance going forward. 143 | */ 144 | async _login() { 145 | log.info("Logging into Hacker News"); 146 | 147 | let response = await this.gotClient.post("login", this.loginOptions); 148 | if (response != null) { 149 | if (response.body.indexOf("Bad login.") > -1) { 150 | log.error("Hacker News login failed, user='" + this.propertyManager.hackerNewsUsername + "', pass='" + this.propertyManager.hackerNewsPassword + "'"); 151 | throw "Hacker News login failed"; 152 | } else if (response.body.indexOf("Validation required.") > -1) { 153 | log.error("Hacker News login failed, too many bad login attempts, Hacker News now requesting Recaptcha validation. Unfortunately, the only way to fix this is time; please ensure your credentials are correct then try again at a later time."); 154 | throw "Too many bad login attempts, ReCAPTCHA validation now required"; 155 | } 156 | } 157 | this.lastAuthRefreshTime = new Date(); 158 | 159 | let frequencyString = this.propertyManager.cleanserFrequencyInMinutes + " minutes"; 160 | if (this.propertyManager.cleanserFrequencyInMinutes == 1) { 161 | frequencyString = "minute"; 162 | } 163 | log.info("Login successful, will cleanse Hacker News every " + frequencyString); 164 | } 165 | 166 | async _cleanse() { 167 | let homePage = await this._getHomePage(); 168 | if (!homePage) { 169 | return; 170 | } 171 | 172 | log.debug("Scanning home page for stories to cleanse"); 173 | 174 | let cleansedAtLeastOneStory = false; 175 | let nowCheckingStory = false; 176 | let idOfCurrentStory = ""; 177 | let title = "Untitled"; 178 | let storyLink = "#"; 179 | let source = "self"; 180 | 181 | let dom = new JSDOM(homePage); 182 | let rows = dom.window.document.querySelectorAll("tr"); 183 | for (let row of rows) { 184 | let className = row.getAttribute("class"); 185 | if (className && className.includes("athing")) { 186 | nowCheckingStory = true; 187 | 188 | idOfCurrentStory = row.getAttribute("id"); 189 | 190 | let titleElement = row.querySelector("span.titleline > a") 191 | title = "Untitled"; 192 | storyLink = "#"; 193 | if (titleElement) { 194 | title = titleElement.textContent; 195 | storyLink = titleElement.getAttribute("href"); 196 | } else { 197 | log.error("_cleanse", "Unable to find the story link & title"); 198 | } 199 | let sourceElement = row.querySelector("span.sitestr"); 200 | source = "self"; 201 | if (sourceElement) { 202 | source = sourceElement.textContent; 203 | } else { 204 | log.debug("Item '" + title + ' is a self post (no site link)'); 205 | } 206 | } else if (nowCheckingStory) { 207 | nowCheckingStory = false; 208 | 209 | let userElement = row.querySelector("a.hnuser"); 210 | let user = "anonymous"; 211 | let isAd = false; 212 | if (userElement) { 213 | user = userElement.textContent; 214 | } else { 215 | isAd = true; 216 | log.info("Found an ad!"); 217 | } 218 | 219 | // "verdict" is a JSON of boolean "shouldCleanse" and string "cleansedBy"; 220 | let verdict = await this._shouldCleanseStory(title, user, source); 221 | if (verdict.shouldCleanse || isAd) { 222 | log.info("Cleansing story, title=\"" + title + "\" from " + source); 223 | 224 | // Extracting auth token from "hide" href link 225 | let authForStory = ""; 226 | let actionRowLinks = row.querySelectorAll("a"); 227 | for (let link of actionRowLinks) { 228 | if ("hide" == link.textContent) { 229 | let hideLink = link.getAttribute("href"); 230 | authForStory = this._getParameterByName("auth", hideLink); 231 | break; 232 | } 233 | } 234 | if (!authForStory) { 235 | log.error("_cleanse", "No auth provided in \"" + title + "\"'s link, maybe session has expired?"); 236 | log.error(homePage); 237 | break; 238 | } 239 | 240 | // Save the cleansed story in Mongo and hide it 241 | let cleansedStoryDocument = { 242 | _id: idOfCurrentStory, 243 | title: title, 244 | user: user, 245 | source: source, 246 | cleansedBy: verdict.cleansedBy, 247 | link: storyLink, 248 | hideTime: new Date().getTime() 249 | }; 250 | try { 251 | await this._hideStory(idOfCurrentStory, authForStory); 252 | await this.mongoClient.insertCleansedStory(cleansedStoryDocument); 253 | } catch (error) { 254 | log.error("_cleanse", "Failed to cleanse Hacker News story, error=" + error); 255 | } 256 | cleansedAtLeastOneStory = true; 257 | } 258 | 259 | continue; 260 | } 261 | } 262 | 263 | if (!cleansedAtLeastOneStory) { 264 | log.debug("No stories needed cleansing"); 265 | } 266 | } 267 | 268 | async _getHomePage() { 269 | log.debug("Getting current Hacker News home page"); 270 | 271 | let response = null; 272 | try { 273 | response = await this.gotClient.post(this.homePageOptions); 274 | this.lastAuthRefreshTime = new Date(); 275 | log.debug("Hacker News home page obtained"); 276 | } catch (error) { 277 | log.error("_getHomePage", "Failed to retrieve the Hacker News home page, error=" + error) 278 | } 279 | return response.body; 280 | } 281 | 282 | async _shouldCleanseStory(title, user, source, callback) { 283 | let blacklistedTitles = await this.mongoClient.findAllBlacklistedTitles(); 284 | if (blacklistedTitles) { 285 | for (let titleDocument of blacklistedTitles) { 286 | if ("text" == titleDocument.type) { 287 | if (title.toUpperCase().indexOf(titleDocument.text.toUpperCase()) > -1) { 288 | return { 289 | shouldCleanse: true, 290 | cleansedBy: this.propertyManager.collectionBlacklistedTitles 291 | }; 292 | } 293 | } else if ("keyword" == titleDocument.type) { 294 | if (new RegExp("\\b" + titleDocument.keyword + "\\b", "i").test(title)) { 295 | return { 296 | shouldCleanse: true, 297 | cleansedBy: this.propertyManager.collectionBlacklistedTitles 298 | }; 299 | } 300 | } else if ("regex" == titleDocument.type) { 301 | if (!ignoredRegexes.has(titleDocument.regex)) { 302 | try { 303 | if (new RegExp(titleDocument.regex).test(title)) { 304 | return { 305 | shouldCleanse: true, 306 | cleansedBy: this.propertyManager.collectionBlacklistedTitles 307 | }; 308 | } 309 | } catch (error) { 310 | log.error("_shouldCleanseStory", "Failed to parse title blacklist regex \"" + titleDocument.regex + "\", ignoring and skipping"); 311 | ignoredRegexes.add(titleDocument.regex); 312 | } 313 | } 314 | } 315 | } 316 | } 317 | 318 | let blacklistedSites = await this.mongoClient.findAllBlacklistedSites(); 319 | if (blacklistedSites) { 320 | for (let siteDocument of blacklistedSites) { 321 | if (siteDocument.site == source) { 322 | return { 323 | shouldCleanse: true, 324 | cleansedBy: this.propertyManager.collectionBlacklistedSites 325 | }; 326 | } 327 | } 328 | } 329 | 330 | let blacklistedUsers = await this.mongoClient.findAllBlacklistedUsers(); 331 | if (blacklistedUsers) { 332 | for (let userDocument of blacklistedUsers) { 333 | if (userDocument.user == user) { 334 | return { 335 | shouldCleanse: true, 336 | cleansedBy: this.propertyManager.collectionBlacklistedUsers 337 | }; 338 | } 339 | } 340 | } 341 | 342 | return { 343 | shouldCleanse: false 344 | }; 345 | } 346 | 347 | async _hideStory(storyId, auth) { 348 | this.hideOptions.form.id = storyId; 349 | this.hideOptions.form.auth = auth; 350 | /** 351 | * Always wait a few seconds before sending the "hide" form to Hacker 352 | * News. If you're on a fast system (or hiding a lot of articles in 353 | * one go), Hacker News eventually starts stonewalling requests with 354 | * 503 and an error that too many requests were made. There's no need 355 | * to rush on this, take a break between each hide to avoid the block. 356 | */ 357 | await util.sleepForSeconds(3); 358 | await this.gotClient.post("hide", this.hideOptions); 359 | } 360 | 361 | /** 362 | * Parse URL parameter values by key 363 | * 364 | * https://stackoverflow.com/users/1045296/jolly-exe 365 | * https://stackoverflow.com/a/901144 366 | */ 367 | _getParameterByName(name, url) { 368 | if (!url) { 369 | url = window.location.href 370 | } 371 | name = name.replace(/[\[\]]/g, "\\$&") 372 | let regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"); 373 | let results = regex.exec(url); 374 | if (!results) { 375 | return null 376 | } 377 | if (!results[2]) { 378 | return '' 379 | } 380 | return decodeURIComponent(results[2].replace(/\+/g, " ")) 381 | } 382 | } 383 | -------------------------------------------------------------------------------- /server/Logger.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import path from "path"; 7 | import properties from "properties"; 8 | // Local 9 | import paths from "./paths.js"; 10 | 11 | 12 | // CONSTANTS 13 | // --------- 14 | const DEFAULT_LOG_LEVEL = "INFO"; 15 | 16 | 17 | // GLOBALS 18 | // ------- 19 | let propertiesFileName = path.join(paths.SERVER_DIRECTORY_PATH, "cleanser.properties"); 20 | 21 | 22 | export default class Logger { 23 | 24 | /** 25 | * Initializes a new Logger instance for a particular class 26 | * @param {String} className 27 | */ 28 | constructor(className) { 29 | const THIS = this; // For referencing root-level "this" in promise context 30 | this.className = className; 31 | 32 | // Setting log level from setting in properties 33 | this.logLevel = DEFAULT_LOG_LEVEL; 34 | new Promise(function(resolve, reject) { 35 | properties.parse(propertiesFileName, 36 | { path: true }, 37 | function(error, theProperties) { 38 | if (error) { 39 | reject(Error(error)); 40 | } else { 41 | resolve(theProperties); 42 | } 43 | }); 44 | }).then(function(result) { 45 | if ("log.level" in result) { 46 | THIS.logLevel = result["log.level"].toUpperCase(); 47 | } 48 | }).catch(function(error) { 49 | console.log(error); 50 | }); 51 | } 52 | 53 | debug(message) { 54 | if ("DEBUG" == this.logLevel) { 55 | let debugLog = new Date().toISOString() + " [" + this.className + "] DEBUG"; 56 | this._log(debugLog, message); 57 | } 58 | } 59 | info(message) { 60 | if ("DEBUG" == this.logLevel 61 | || "INFO" == this.logLevel) { 62 | let infoLog = new Date().toISOString() + " [" + this.className + "] INFO"; 63 | this._log(infoLog, message); 64 | } 65 | } 66 | warning(method, message) { 67 | if ("DEBUG" == this.logLevel 68 | || "INFO" == this.logLevel 69 | || "WARN" == this.logLevel) { 70 | let warningLog = new Date().toISOString() + " [" + this.className + "] WARN " + method + "()"; 71 | this._log(warningLog, message); 72 | } 73 | } 74 | /** 75 | * Functionally the same as Logger@warning() 76 | */ 77 | warn(method, message) { 78 | this.warning(method, message); 79 | } 80 | error(method, message) { 81 | let errorLog = new Date().toISOString() + " [" + this.className + "] ERROR " + method + "()"; 82 | this._log(errorLog, message); 83 | } 84 | 85 | _log(logHeader, message) { 86 | if (message instanceof Object) { 87 | console.log(logHeader + ":"); 88 | console.log(message); 89 | } else { 90 | console.log(logHeader + " - " + message); 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /server/MongoClient.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import mongodb from "mongodb"; 7 | // Local 8 | import Logger from "./Logger.js"; 9 | 10 | 11 | // CONSTANTS 12 | // --------- 13 | const CLASS_NAME = "mongoConnection" 14 | 15 | 16 | // GLOBALS 17 | // ------- 18 | let log = new Logger(CLASS_NAME); 19 | 20 | /** 21 | * This was originally a "wrapper" to the old v3 `mongodb` client primarily to 22 | * sidestep the absolutely nightmare that was callback handling and instead 23 | * wrap everything in Promises so callers could simply `await` the necessary 24 | * calls. 25 | * 26 | * However, modern `mongodb` client versions (thankfully!) migrated natively 27 | * to Promises, so the vast majority of the value this wrapper class provided 28 | * is now moot. 29 | * 30 | * I'm keeping it around mostly for legacy reasons (it's more work to fully 31 | * remove than to remove the now unnecessary Promise wrapping). If this was 32 | * being written from scratch today with the modern `mongodb` package, I 33 | * wouldn't have made this class at all. 34 | */ 35 | export default class MongoClient { 36 | 37 | /** 38 | * @param {PropertyManager} propertyManager 39 | */ 40 | constructor(propertyManager) { 41 | this.propertyManager = propertyManager; 42 | this.mongo = null; 43 | 44 | log.debug("Initialized"); 45 | } 46 | 47 | /** 48 | * ============== 49 | * PUBLIC METHODS 50 | * ============== 51 | */ 52 | 53 | async connect() { 54 | let mongoServerUrl = "mongodb://" + this.propertyManager.mongoHost + ":" + this.propertyManager.mongoPort + "/" + this.propertyManager.db; 55 | log.info("Connecting to Mongo at " + mongoServerUrl); 56 | this.connection = new mongodb.MongoClient(mongoServerUrl); 57 | await this.connection.connect(); 58 | this.mongo = this.connection.db(this.propertyManager.db); 59 | } 60 | 61 | async close() { 62 | log.debug("Closing Mongo connection..."); 63 | await this.connection.close(); 64 | log.info("Closed Mongo connection"); 65 | } 66 | 67 | /** 68 | * Like Mongo's find, but wrapping up tiresome boilerplate for increased 69 | * safety and ease of use. Returned document array will be "null" as well 70 | * when no documents exist, as that's also far easier to check for and 71 | * handle than an empty array. 72 | */ 73 | find(collectionName, query, sortQuery) { 74 | if (sortQuery == null) { 75 | sortQuery = {}; 76 | } 77 | 78 | let collection = this.mongo.collection(collectionName); 79 | if (collection == null || query == null) { 80 | throw "Invalid find arguments, query='" + JSON.stringify(query) + "', collectionName=" + collectionName; 81 | } 82 | 83 | return collection.find(query).sort(sortQuery).toArray(); 84 | } 85 | findWeeklyReportLogs(query, sortQuery) { 86 | return this.find(this.propertyManager.collectionWeeklyReportsLog, query, sortQuery); 87 | } 88 | findCleansedItems(query, sortQuery) { 89 | return this.find(this.propertyManager.collectionCleansedItems, query, sortQuery); 90 | } 91 | 92 | /** 93 | * Equivalent to Mongo's find with an empty/blank query, but with increased 94 | * safety and ease of use. Returned document array will be "null" as well when 95 | * no documents exist, as that's also far ease 96 | */ 97 | findAll(collectionName) { 98 | return this.find(collectionName, {}); 99 | } 100 | findAllBlacklistedTitles() { 101 | return this.find(this.propertyManager.collectionBlacklistedTitles, {}); 102 | } 103 | findAllBlacklistedSites() { 104 | return this.find(this.propertyManager.collectionBlacklistedSites, {}); 105 | } 106 | findAllBlacklistedUsers() { 107 | return this.find(this.propertyManager.collectionBlacklistedUsers, {}); 108 | } 109 | 110 | /** 111 | * Like Mongo's updateOne, but wrapping up tiresome boilerplate for 112 | * increased safety and ease of use. This method has strict "insert" 113 | * behavior (no updates). 114 | */ 115 | insertOne(collectionName, documentToInsert, upsert) { 116 | let collection = this.mongo.collection(collectionName); 117 | if (collection == null || documentToInsert == null) { 118 | throw "Invalid insertOne arguments, document='" + documentToInsert + "', collectionName=" + collectionName; 119 | } 120 | collection.insertOne(documentToInsert, { upsert }); 121 | log.debug("Inserted one document, _id=" + documentToInsert._id + ", collectionName=" + collectionName); 122 | } 123 | insertWeeklyReportLog(documentToInsert) { 124 | return this.insertOne(this.propertyManager.collectionWeeklyReportsLog, documentToInsert); 125 | } 126 | insertCleansedStory(documentToInsert) { 127 | return this.insertOne(this.propertyManager.collectionCleansedItems, documentToInsert, true); 128 | } 129 | 130 | /** 131 | * Like Mongo's deleteOne, but wrapping up tiresome boilerplate for 132 | * increased safety and ease of use. This method has delete ONE behavior 133 | * ONLY, will result in an error if no document exists with the provided _id 134 | */ 135 | deleteById(collectionName, id) { 136 | let collection = this.mongo.collection(collectionName); 137 | if (collection == null || id == null) { 138 | "Invalid deleteById arguments, _id=" + id + ", collectionName=" + collectionName; 139 | } 140 | collection.deleteOne({ _id: id }); 141 | log.debug("Deleted document, _id=" + id + ", collectionName=" + collectionName); 142 | } 143 | 144 | /** 145 | * Like Mongo's drop, but wrapping up tiresome boilerplate for increased 146 | * safety and ease of use. This method will drop empty or full 147 | * collections, and will consider it a success if no collection exists 148 | * with the provided name. 149 | */ 150 | dropCollection(collectionName) { 151 | let collection = this.mongo.collection(collectionName); 152 | if (collection == null) { 153 | "Cannot drop 'null' collection"; 154 | } 155 | return collection.drop(); 156 | } 157 | dropCollectionBlacklistedTitles() { 158 | return this.dropCollection(this.propertyManager.collectionBlacklistedTitles); 159 | } 160 | dropCollectionBlacklistedSites() { 161 | return this.dropCollection(this.propertyManager.collectionBlacklistedSites); 162 | } 163 | dropCollectionBlacklistedUsers() { 164 | return this.dropCollection(this.propertyManager.collectionBlacklistedUsers); 165 | } 166 | dropCollectionCleansedItems() { 167 | return this.dropCollection(this.propertyManager.collectionCleansedItems); 168 | } 169 | dropCollectionWeeklyReportsLog() { 170 | return this.dropCollection(this.propertyManager.collectionWeeklyReportsLog); 171 | } 172 | 173 | /** 174 | * Like Mongo's find + count, but wrapping up tiresome boilerplate for 175 | * increased safety and ease of use. This method will count all matches 176 | * for the provided query in a particular collection. If no documents 177 | * match the query, a count of 0 is returned. 178 | */ 179 | count(collectionName, query) { 180 | let collection = this.mongo.collection(collectionName); 181 | if (collection && query != null) { 182 | return collection.countDocuments(query); 183 | } 184 | throw "Invalid count arguments, query='" + JSON.stringify(query) + "', collectionName=" + collectionName; 185 | } 186 | 187 | /** 188 | * Equivalent to Mongo's find + count with an empty/blank query, but with 189 | * increased safety and ease of use. Returned count will be 0 if no 190 | * documents exist in the collection. 191 | */ 192 | countAll(collectionName) { 193 | return this.count(collectionName, {}); 194 | } 195 | countAllCleansedItems() { 196 | return this.countAll(this.propertyManager.collectionCleansedItems); 197 | } 198 | 199 | } 200 | -------------------------------------------------------------------------------- /server/PropertyManager.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import nodePropertyLoader from "properties"; 7 | // Local 8 | import Logger from "./Logger.js"; 9 | import util from "./util.js"; 10 | 11 | 12 | // CONSTANTS 13 | // --------- 14 | const CLASS_NAME = "PropertyManager"; 15 | 16 | // Property default values 17 | const DEFAULT_HACKER_NEWS_BASE_URL = "https://news.ycombinator.com"; 18 | const DEFAULT_HACKER_NEWS_USERNAME = null; 19 | const DEFAULT_HACKER_NEWS_PASSWORD = null; 20 | 21 | const DEFAULT_CLEANSER_FREQUENCY_IN_MINUTES = 1; 22 | const DEFAULT_USER_AGENT_BASE = "HackerNewsCleanser/2.8 +https://github.com/barrowclift/hacker-news-cleanser" 23 | 24 | const DEFAULT_MONGO_HOST = "localhost"; 25 | const DEFAULT_MONGO_PORT = 27017; 26 | const DEFAULT_DB = "hackerNewsCleanserDb"; 27 | const DEFAULT_COLLECTION_BLACKLISTED_TITLES = "blacklistedTitles"; 28 | const DEFAULT_COLLECTION_BLACKLISTED_SITES = "blacklistedSites"; 29 | const DEFAULT_COLLECTION_BLACKLISTED_USERS = "blacklistedUsers"; 30 | const DEFAULT_COLLECTION_CLEANSED_ITEMS = "cleansedItems"; 31 | const DEFAULT_COLLECTION_WEEKLY_REPORTS_LOG = "weeklyReportsLog"; 32 | 33 | const DEFAULT_EMAIL_REPORT_ENABLED = false; 34 | const DEFAULT_EMAIL_REPORT_FREQUENCY_IN_DAYS = 7; 35 | const DEFAULT_EMAIL_REPORT_SENDER = null; 36 | const DEFAULT_EMAIL_REPORT_SENDER_PASSWORD = null; 37 | const DEFAULT_EMAIL_REPORT_RECIPIENTS = []; 38 | 39 | 40 | // GLOBALS 41 | // ------- 42 | let log = new Logger(CLASS_NAME); 43 | 44 | 45 | /** 46 | * Working with properties is a pain. You have to check for existance, have 47 | * default values defined, etc. This detracts from what the code using those 48 | * values actually wants: a sane default if not present, no boilerplate hiding 49 | * the core of their own logic. 50 | * 51 | * Thus, any and ALL Hacker News Cleanser properties are pre-loaded and 52 | * validated here, and if not provided or present fall back to sane defaults. 53 | * Thus, letting calling code get back to what's *actually* important to them: 54 | * their own work. 55 | */ 56 | export default class PropertyManager { 57 | 58 | /** 59 | * Does not automatically load any properties file, but simply initializes 60 | * all Hacker News Cleanser properties to their default values. To load 61 | * `cleanser.properties`, call load(). 62 | */ 63 | constructor() { 64 | this.hackerNewsBaseUrl = DEFAULT_HACKER_NEWS_BASE_URL; 65 | this.hackerNewsUsername = DEFAULT_HACKER_NEWS_USERNAME; 66 | this.hackerNewsPassword = DEFAULT_HACKER_NEWS_PASSWORD; 67 | 68 | this.cleanserFrequencyInMinutes = DEFAULT_CLEANSER_FREQUENCY_IN_MINUTES; 69 | this.userAgentBase = DEFAULT_USER_AGENT_BASE; 70 | 71 | // MongoDB 72 | this.mongoHost = DEFAULT_MONGO_HOST; 73 | this.mongoPort = DEFAULT_MONGO_PORT; 74 | this.db = DEFAULT_DB; 75 | this.collectionBlacklistedTitles = DEFAULT_COLLECTION_BLACKLISTED_TITLES; 76 | this.collectionBlacklistedSites = DEFAULT_COLLECTION_BLACKLISTED_SITES; 77 | this.collectionBlacklistedUsers = DEFAULT_COLLECTION_BLACKLISTED_USERS; 78 | this.collectionCleansedItems = DEFAULT_COLLECTION_CLEANSED_ITEMS; 79 | this.collectionWeeklyReportsLog = DEFAULT_COLLECTION_WEEKLY_REPORTS_LOG; 80 | 81 | // Email Report 82 | this.emailReportEnabled = DEFAULT_EMAIL_REPORT_ENABLED; 83 | this.emailReportFrequencyInDays = DEFAULT_EMAIL_REPORT_FREQUENCY_IN_DAYS; 84 | this.emailReportSender = DEFAULT_EMAIL_REPORT_SENDER; 85 | this.emailReportSenderPassword = DEFAULT_EMAIL_REPORT_SENDER_PASSWORD; 86 | this.emailReportRecipients = DEFAULT_EMAIL_REPORT_RECIPIENTS; 87 | } 88 | 89 | /** 90 | * ============== 91 | * PUBLIC METHODS 92 | * ============== 93 | */ 94 | 95 | async load(filename) { 96 | if (!filename) { 97 | throw "Properties filename is null"; 98 | } 99 | 100 | let properties = await this._load(filename); 101 | 102 | if ("hacker.news.base.url" in properties) { 103 | this.hackerNewsBaseUrl = properties["hacker.news.base.url"]; 104 | } 105 | if ("hacker.news.username" in properties) { 106 | this.hackerNewsUsername = properties["hacker.news.username"]; 107 | } 108 | if ("hacker.news.password" in properties) { 109 | this.hackerNewsPassword = properties["hacker.news.password"]; 110 | } 111 | 112 | if ("cleanser.frequency.in.minutes" in properties) { 113 | this.cleanserFrequencyInMinutes = properties["cleanser.frequency.in.minutes"]; 114 | } 115 | this.cleanserFrequencyInMillis = util.minutesToMillis(this.cleanserFrequencyInMinutes); 116 | if ("user.agent.base" in properties) { 117 | this.userAgentBase = properties["user.agent.base"]; 118 | } 119 | 120 | // MongoDB 121 | if ("mongodb.host" in properties) { 122 | this.mongoHost = properties["mongodb.host"]; 123 | } 124 | if ("mongodb.port" in properties) { 125 | this.mongoPort = properties["mongodb.port"]; 126 | } 127 | if ("db" in properties) { 128 | this.db = properties["db"]; 129 | } 130 | if ("collection.blacklisted.titles" in properties) { 131 | this.collectionBlacklistedTitles = properties["collection.blacklisted.titles"]; 132 | } 133 | if ("collection.blacklisted.sites" in properties) { 134 | this.collectionBlacklistedSites = properties["collection.blacklisted.sites"]; 135 | } 136 | if ("collection.blacklisted.users" in properties) { 137 | this.collectionBlacklistedUsers = properties["collection.blacklisted.users"]; 138 | } 139 | if ("collection.cleansed.items" in properties) { 140 | this.collectionCleansedItems = properties["collection.cleansed.items"]; 141 | } 142 | if ("collection.weekly.reports.log" in properties) { 143 | this.collectionWeeklyReportsLog = properties["collection.weekly.reports.log"]; 144 | } 145 | 146 | // Email Report 147 | if ("email.report.enabled" in properties) { 148 | this.emailReportEnabled = properties["email.report.enabled"]; 149 | } 150 | if ("email.report.frequency.in.days" in properties) { 151 | this.emailReportFrequencyInDays = properties["email.report.frequency.in.days"]; 152 | } 153 | if ("email.report.sender" in properties) { 154 | this.emailReportSender = properties["email.report.sender"]; 155 | } 156 | if ("email.report.sender.password" in properties) { 157 | this.emailReportSenderPassword = properties["email.report.sender.password"]; 158 | } 159 | if ("email.report.recipients" in properties) { 160 | this.emailReportRecipients = properties["email.report.recipients"]; 161 | } 162 | 163 | // Computed 164 | this.userAgent = this.hackerNewsUsername + " " + this.userAgentBase; 165 | } 166 | 167 | requiredPropertiesWereProvided() { 168 | return this.hackerNewsUsername != null 169 | && this.hackerNewsPassword != null 170 | } 171 | 172 | requiredEmailReportPropertiesWereProvided() { 173 | return this.emailReportSender != null 174 | && this.emailReportSenderPassword != null 175 | && this.emailReportRecipients != [] 176 | } 177 | 178 | /** 179 | * =============== 180 | * PRIVATE METHODS 181 | * =============== 182 | */ 183 | 184 | async _load(filename) { 185 | // The properties package does not currently support promises natively 186 | return new Promise((resolve, reject) => { 187 | nodePropertyLoader.parse(filename, 188 | { path: true }, 189 | (error, properties) => { 190 | if (error) { 191 | log.error("loadProperties", "An error occurred while loading properties"); 192 | reject(Error(error)); 193 | } else { 194 | log.info("Loaded properties"); 195 | resolve(properties); 196 | } 197 | }); 198 | }); 199 | } 200 | 201 | } 202 | -------------------------------------------------------------------------------- /server/ReportMailman.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import nodemailer from 'nodemailer'; 7 | // Local 8 | import Logger from "./Logger.js"; 9 | import util from "./util.js"; 10 | 11 | 12 | // CONSTANTS 13 | // --------- 14 | const CLASS_NAME = "ReportMailman"; 15 | 16 | 17 | // GLOBALS 18 | // ------- 19 | let log = new Logger(CLASS_NAME); 20 | 21 | 22 | export default class ReportMailman { 23 | 24 | /** 25 | * Initializes the report "mailman" that can be used to send out report 26 | * emails of cleaned stories. 27 | * 28 | * @param {PropertyManager} propertyManager 29 | * @param {MongoClient} mongoClient 30 | */ 31 | constructor(propertyManager, mongoClient) { 32 | this.propertyManager = propertyManager; 33 | this.mongoClient = mongoClient; 34 | 35 | this.isStopping = false; 36 | this.mailmanIntervalId = null; 37 | } 38 | 39 | /** 40 | * ============== 41 | * PUBLIC METHODS 42 | * ============== 43 | */ 44 | 45 | async start() { 46 | log.info("Starting..."); 47 | 48 | // Start the mailman interval to send occassional cleaner reports 49 | this.mailmanIntervalId = setInterval(async () => { 50 | await this.intervalProcess(); 51 | }, util.daysToMillis(this.propertyManager.emailReportFrequencyInDays)); 52 | await this.intervalProcess(); 53 | } 54 | 55 | async stop() { 56 | this.isStopping = true; 57 | log.info("Stopping..."); 58 | clearInterval(this.mailmanIntervalId); 59 | log.info("Stopped"); 60 | } 61 | 62 | async intervalProcess() { 63 | if (this.isStopping) { 64 | log.info("Preventing mailman, shutting down..."); 65 | } else { 66 | let shouldSend = await this.shouldSend(); 67 | if (shouldSend) { 68 | await this.send(); 69 | } 70 | } 71 | } 72 | 73 | /** 74 | * Determines whether or not the time since the last report send time has 75 | * elapsed the report frequency. 76 | * 77 | * Will only potentially return true if email reporting is enabled. 78 | * @return {boolean} true if the time since last report send time has elapsed 79 | * the report frequency, otherwise false. 80 | */ 81 | async shouldSend() { 82 | if (!this.propertyManager.emailReportEnabled) { 83 | log.info("Email Reports are disabled"); 84 | return false; 85 | } 86 | if (!this.propertyManager.requiredEmailReportPropertiesWereProvided()) { 87 | log.warning("Email Reports are enabled but required properties are missing"); 88 | return false; 89 | } 90 | 91 | // Find all documents 92 | const FIND_QUERY = {} 93 | // Sort the report documents in descending order (newest to oldest) 94 | const REPORT_SORT_QUERY = { 95 | sentTime: -1 96 | } 97 | 98 | let reports = await this.mongoClient.findWeeklyReportLogs(FIND_QUERY, REPORT_SORT_QUERY); 99 | let shouldSend = false; 100 | 101 | // We've sent at least one report in the past 102 | if (reports && reports.length > 0) { 103 | /** 104 | * Creating a Date instance exactly one frequency ago. If this date is 105 | * still after the last report's send time, then it's time to send a 106 | * new report (it's been a whole frequency since last report). 107 | */ 108 | let lookback = new Date(new Date().getTime() - util.daysToMillis(this.propertyManager.emailReportFrequencyInDays)); 109 | let lastSentTime = new Date(reports[0].sentTime); 110 | if (lookback.getTime() > lastSentTime.getTime()) { 111 | shouldSend = true; 112 | } else { 113 | shouldSend = false; 114 | } 115 | // We've never sent a report before 116 | } else { 117 | /** 118 | * The user has just enabled reports for the first time, so we don't 119 | * have any past report send times to check against. Instead, save a 120 | * fake "last sent" report time in Mongo. The emailReport will then 121 | * use that time for future checks, so eventually the first report 122 | * will be all stories cleansed since the feature was enabled, once the 123 | * frequency has elpased. 124 | */ 125 | let lastSentTime = new Date(); 126 | let sentTimeDocument = { 127 | sentTime: lastSentTime.getTime() 128 | } 129 | try { 130 | await this.mongoClient.insertWeeklyReportLog(sentTimeDocument); 131 | } catch (error) { 132 | log.error("shouldSend", "Failed to persist first Weekly Report Log, error=" + error); 133 | } 134 | 135 | shouldSend = false; 136 | } 137 | 138 | return shouldSend; 139 | } 140 | 141 | /** 142 | * Sends a new report with all cleansed stories since the last report. This will 143 | * include all stories cleansed since the last report time even if it's not yet 144 | * time to send the report (i.e. even if the frequency hasn't yet elapsed). 145 | * 146 | * If no stories were cleansed since the last report, no email will be sent. 147 | */ 148 | async send() { 149 | // Get stories cleansed since last report 150 | let cleansedStories = [] 151 | try { 152 | cleansedStories = await this._getStoriesSinceLastReport(); 153 | if (cleansedStories.length > 0) { 154 | // Count total cleansed stories of all time 155 | let totalStoriesCleansedSinceBeginningOfTime = -1 156 | try { 157 | totalStoriesCleansedSinceBeginningOfTime = await this.mongoClient.countAllCleansedItems(); 158 | } catch (error) { 159 | log.error("mongoClient.countAllCleansedItems", error); 160 | } 161 | 162 | let rightNow = new Date(); 163 | let today = this._formatDateToHumanReadableCalendarDate(rightNow); 164 | let emailSubject = "Hacker News Cleanser Weekly Report: " + today; 165 | let htmlBody = this._getHtmlBody(emailSubject, 166 | this.propertyManager.hackerNewsUsername, 167 | cleansedStories, 168 | totalStoriesCleansedSinceBeginningOfTime); 169 | 170 | // Create reusable transporter object using the default SMTP transport 171 | let transporter = nodemailer.createTransport({ 172 | host: "smtp.gmail.com", 173 | port: 465, 174 | secure: true, 175 | auth: { 176 | user: this.propertyManager.emailReportSender, 177 | pass: this.propertyManager.emailReportSenderPassword 178 | } 179 | }); 180 | 181 | // Setup email data with unicode symbols 182 | let mailOptions = { 183 | from: "\"Hacker News Cleanser\" <" + this.propertyManager.emailReportSender + ">", 184 | to: this.propertyManager.emailReportRecipients, // Comma delimited list of recievers 185 | subject: emailSubject, 186 | html: htmlBody 187 | } 188 | 189 | // Send mail with defined transport object 190 | let emailInfo = null; 191 | try { 192 | emailInfo = await transporter.sendMail(mailOptions); 193 | log.info("Weekly report has been sent: " + emailInfo.messageId); 194 | let sentTimeDocument = { 195 | sentTime: rightNow.getTime() 196 | } 197 | try { 198 | await this.mongoClient.insertWeeklyReportLog(sentTimeDocument); 199 | } catch (error) { 200 | log.error("mongoClient.insertWeeklyReportLog", "Couldn't save the timestamp of the weekly report into Mongo, will continue to send every cleanse cycle until this issue is resolved, error=" + error) 201 | } 202 | } catch (error) { 203 | log.error("transporter.sendMail", error) 204 | } 205 | } 206 | } catch (error) { 207 | log.error("_getStoriesSinceLastReport", error); 208 | return; 209 | } 210 | } 211 | 212 | /** 213 | * =============== 214 | * PRIVATE METHODS 215 | * =============== 216 | */ 217 | 218 | /** 219 | * Converts a Date into a human-readable calendar date (mm/dd/YYYY) 220 | * @param {Date} date The date to parse 221 | * @return {string} The Date object as a human-readable calendar date (mm/dd/YYYY) 222 | */ 223 | _formatDateToHumanReadableCalendarDate(date) { 224 | let dd = date.getDate(); 225 | let mm = date.getMonth() + 1; // January is 0! 226 | let yyyy = date.getFullYear(); 227 | let formattedDate = dd + "/" + mm + "/" + yyyy; 228 | return formattedDate; 229 | } 230 | 231 | /** 232 | * Fetches all stories cleansed since the last report. This is NOT equivalent 233 | * to shouldSend(), since this will return all stories cleansed since the last 234 | * report time even if it's not yet time to send the report (i.e. even if the 235 | * frequency hasn't yet elapsed). 236 | * 237 | * Will return results (if any) even if email reporting is disabled. 238 | * @return {array} All stories cleansed since the last report send time. If no 239 | * report has been sent before, this array will be empty. 240 | */ 241 | async _getStoriesSinceLastReport() { 242 | // Find all documents 243 | const FIND_QUERY = {} 244 | // Sort the report documents in descending order (newest to oldest) 245 | const REPORT_SORT_QUERY = { 246 | sentTime: -1 247 | } 248 | 249 | let reports = await this.mongoClient.findWeeklyReportLogs(FIND_QUERY, REPORT_SORT_QUERY); 250 | // By default, no stories cleansed since last report 251 | let storiesSinceLastReport = []; 252 | 253 | // We've sent at least one report in the past 254 | if (reports.length > 0) { 255 | let lastSentTime = new Date(reports[0].sentTime); 256 | let allStoriesSinceLastReportQuery = { 257 | hideTime: { 258 | $gte: lastSentTime.getTime() 259 | } 260 | } 261 | // Sort the story documents in descending order (newest to oldest) 262 | const STORY_SORT_QUERY = { 263 | hideTime: -1 264 | } 265 | storiesSinceLastReport = await this.mongoClient.findCleansedItems(allStoriesSinceLastReportQuery, STORY_SORT_QUERY); 266 | // We've never sent a report before, no stories cleansed since last report 267 | } else { 268 | storiesSinceLastReport = []; 269 | } 270 | 271 | return storiesSinceLastReport; 272 | } 273 | 274 | /** 275 | * Builds Hacker News-styled HTML rows for each story in the provided array 276 | * @param {array} stories The MongoDB story documents 277 | * @return {string} A string of HTML rows for the provided stories 278 | */ 279 | _getHtmlStoryRows(stories) { 280 | let htmlStoryRows = ""; 281 | for (let story of stories) { 282 | let humanReadableHideTime = new Date(story.hideTime).toLocaleString(); 283 | let htmlStory = ` 284 | 285 | 286 | 287 | ` + story.title + ` (` + story.source + `) 288 | 289 | 290 | 291 | 292 | 293 | Originally shared by ` + story.user + ` at ` + humanReadableHideTime + ` 294 | 295 | 296 | `; 297 | htmlStoryRows = htmlStoryRows + htmlStory; 298 | } 299 | return htmlStoryRows; 300 | } 301 | 302 | /** 303 | * Builds a Hacker News-styled HTML page containing the cleansed stories. 304 | * @param {string} htmlTitle The HTML of the page 305 | * @param {string} username The Hacker News user ID 306 | * @param {number} totalStoriesCleansedSinceBeginningOfTime The number of 307 | * cleansed stories since the beginning of time 308 | * @param {array} cleansedStories The number of cleansed 309 | * stories included in this report 310 | * @return {string} The built Hacker News-styled HTML page of the provided, 311 | * cleansed stories 312 | */ 313 | _getHtmlBody(htmlTitle, 314 | username, 315 | cleansedStories, 316 | totalStoriesCleansedSinceBeginningOfTime) { 317 | return ` 318 | <html op="news"> 319 | <head> 320 | <meta name="referrer" content="origin"> 321 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> 322 | <style> 323 | body { font-family:Verdana, Geneva, sans-serif; font-size:10pt; color:#828282; background-color: rgb(246, 246, 239); } 324 | td { font-family:Verdana, Geneva, sans-serif; font-size:10pt; color:#828282; } 325 | .admin td { font-family:Verdana, Geneva, sans-serif; font-size:8.5pt; color:#000000; } 326 | .subtext td { font-family:Verdana, Geneva, sans-serif; font-size: 7pt; color:#828282; } 327 | .rank { color:#828282; } 328 | input { font-family:monospace; font-size:10pt; } 329 | input[type="submit"] { font-family:Verdana, Geneva, sans-serif; } 330 | textarea { font-family:monospace; font-size:10pt; } 331 | a:link { color:#000000; text-decoration:none; } 332 | a:visited { color:#828282; text-decoration:none; } 333 | .spacer { height: 6px; } 334 | .default { font-family:Verdana, Geneva, sans-serif; font-size: 10pt; color:#828282; } 335 | .admin { font-family:Verdana, Geneva, sans-serif; font-size:8.5pt; color:#000000; } 336 | .title { font-family:Verdana, Geneva, sans-serif; font-size: 10pt; color:#828282; padding-left:5px; padding-right:5px; } 337 | .subtext { font-family:Verdana, Geneva, sans-serif; font-size: 7pt; color:#828282; padding-left:5px;} 338 | .yclinks { font-family:Verdana, Geneva, sans-serif; font-size: 8pt; color:#828282; } 339 | .pagetop { font-family:Verdana, Geneva, sans-serif; font-size: 10pt; color:#222222; } 340 | .comhead { font-family:Verdana, Geneva, sans-serif; font-size: 8pt; color:#828282; } 341 | .comment { font-family:Verdana, Geneva, sans-serif; font-size: 9pt; } 342 | .hnname { margin-right: 5px; } 343 | .comment a:link, .comment a:visited { text-decoration: underline; } 344 | .noshow { display: none; } 345 | .nosee { visibility: hidden; pointer-events: none; cursor: default } 346 | .c00, .c00 a:link { color:#000000; } 347 | .c5a, .c5a a:link, .c5a a:visited { color:#5a5a5a; } 348 | .c73, .c73 a:link, .c73 a:visited { color:#737373; } 349 | .c82, .c82 a:link, .c82 a:visited { color:#828282; } 350 | .c88, .c88 a:link, .c88 a:visited { color:#888888; } 351 | .c9c, .c9c a:link, .c9c a:visited { color:#9c9c9c; } 352 | .cae, .cae a:link, .cae a:visited { color:#aeaeae; } 353 | .cbe, .cbe a:link, .cbe a:visited { color:#bebebe; } 354 | .cce, .cce a:link, .cce a:visited { color:#cecece; } 355 | .cdd, .cdd a:link, .cdd a:visited { color:#dddddd; } 356 | .pagetop a:visited { color:#000000;} 357 | .topsel a:link, .topsel a:visited { color:#ffffff; } 358 | .subtext a:link, .subtext a:visited { color:#828282; } 359 | .subtext a:hover { text-decoration:underline; } 360 | .comhead a:link, .subtext a:visited { color:#828282; } 361 | .comhead a:hover { text-decoration:underline; } 362 | .default p { margin-top: 8px; margin-bottom: 0px; } 363 | .pagebreak {page-break-before:always} 364 | pre { overflow: auto; padding: 2px; } 365 | pre:hover { overflow:auto } 366 | .votearrow { 367 | width: 10px; 368 | height: 10px; 369 | border: 0px; 370 | margin: 3px 2px 6px; 371 | background: url("grayarrow.gif") 372 | no-repeat; 373 | } 374 | .votelinks.nosee div.votearrow.rotate180 { 375 | display: none; 376 | } 377 | @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and (min-device-pixel-ratio: 2) { 378 | .votearrow { background-size: 10px; background-image: url("grayarrow2x.gif"); } 379 | } 380 | .rotate180 { 381 | -webkit-transform: rotate(180deg); /* Chrome and other webkit browsers */ 382 | -moz-transform: rotate(180deg); /* FF */ 383 | -o-transform: rotate(180deg); /* Opera */ 384 | -ms-transform: rotate(180deg); /* IE9 */ 385 | transform: rotate(180deg); /* W3C complaint browsers */ 386 | 387 | /* IE8 and below */ 388 | -ms-filter: "progid:DXImageTransform.Microsoft.Matrix(M11=-1, M12=0, M21=0, M22=-1, DX=0, DY=0, SizingMethod=\'auto expand\')"; 389 | } 390 | /* mobile device */ 391 | @media only screen 392 | and (min-width : 300px) 393 | and (max-width : 750px) { 394 | #hnmain { width: 100%; } 395 | body { padding: 0; margin: 0; width: 100%; -webkit-text-size-adjust: none; } 396 | td { height: inherit !important; } 397 | .title, .comment { font-size: inherit; } 398 | span.pagetop { display: block; margin: 3px 5px; font-size: 12px; } 399 | span.pagetop b { display: block; font-size: 15px; } 400 | table.comment-tree .comment a { display: inline-block; max-width: 200px; overflow: hidden; white-space: nowrap; 401 | text-overflow: ellipsis; vertical-align:top; } 402 | img[src="s.gif"][width="40"] { width: 12px; } 403 | img[src="s.gif"][width="80"] { width: 24px; } 404 | img[src="s.gif"][width="120"] { width: 36px; } 405 | img[src="s.gif"][width="160"] { width: 48px; } 406 | img[src="s.gif"][width="200"] { width: 60px; } 407 | img[src="s.gif"][width="240"] { width: 72px; } 408 | img[src="s.gif"][width="280"] { width: 84px; } 409 | img[src="s.gif"][width="320"] { width: 96px; } 410 | img[src="s.gif"][width="360"] { width: 108px; } 411 | img[src="s.gif"][width="400"] { width: 120px; } 412 | img[src="s.gif"][width="440"] { width: 132px; } 413 | img[src="s.gif"][width="480"] { width: 144px; } 414 | img[src="s.gif"][width="520"] { width: 156px; } 415 | img[src="s.gif"][width="560"] { width: 168px; } 416 | img[src="s.gif"][width="600"] { width: 180px; } 417 | img[src="s.gif"][width="640"] { width: 192px; } 418 | img[src="s.gif"][width="680"] { width: 204px; } 419 | img[src="s.gif"][width="720"] { width: 216px; } 420 | img[src="s.gif"][width="760"] { width: 228px; } 421 | img[src="s.gif"][width="800"] { width: 240px; } 422 | img[src="s.gif"][width="840"] { width: 252px; } 423 | .title { font-size: 11pt; line-height: 14pt; } 424 | .subtext { font-size: 9pt; } 425 | .itemlist { padding-right: 5px;} 426 | .votearrow { transform: scale(1.3,1.3); margin-right: 6px; } 427 | .votearrow.rotate180 { 428 | -webkit-transform: rotate(180deg) scale(1.3,1.3); /* Chrome and other webkit browsers */ 429 | -moz-transform: rotate(180deg) scale(1.3,1.3); /* FF */ 430 | -o-transform: rotate(180deg) scale(1.3,1.3); /* Opera */ 431 | -ms-transform: rotate(180deg) scale(1.3,1.3); /* IE9 */ 432 | transform: rotate(180deg) scale(1.3,1.3); /* W3C complaint browsers */ 433 | } 434 | .votelinks a { display: block; margin-bottom: 9px; } 435 | input[type="text"], input[type="number"], textarea { font-size: 16px; width: 90%; } 436 | .votelinks { min-width: unset; } 437 | } 438 | .comment { max-width: 1215px; overflow: auto } 439 | pre { max-width: 900px; } 440 | 441 | @media only screen and (min-width : 300px) and (max-width : 389px) { 442 | .comment { max-width: 270px; overflow: auto } 443 | pre { max-width: 200px; } 444 | } 445 | @media only screen and (min-width : 390px) and (max-width : 509px) { 446 | .comment { max-width: 350px; overflow: auto } 447 | pre { max-width: 260px; } 448 | } 449 | @media only screen and (min-width : 510px) and (max-width : 599px) { 450 | .comment { max-width: 460px; overflow: auto } 451 | pre { max-width: 340px; } 452 | } 453 | @media only screen and (min-width : 600px) and (max-width : 689px) { 454 | .comment { max-width: 540px; overflow: auto } 455 | pre { max-width: 400px; } 456 | } 457 | @media only screen and (min-width : 690px) and (max-width : 809px) { 458 | .comment { max-width: 620px; overflow: auto } 459 | pre { max-width: 460px; } 460 | } 461 | @media only screen and (min-width : 810px) and (max-width : 899px) { 462 | .comment { max-width: 730px; overflow: auto } 463 | pre { max-width: 540px; } 464 | } 465 | @media only screen and (min-width : 900px) and (max-width : 1079px) { 466 | .comment { max-width: 810px; overflow: auto } 467 | pre { max-width: 600px; } 468 | } 469 | @media only screen and (min-width : 1080px) and (max-width : 1169px) { 470 | .comment { max-width: 970px; overflow: auto } 471 | pre { max-width: 720px; } 472 | } 473 | @media only screen and (min-width : 1170px) and (max-width : 1259px) { 474 | .comment { max-width: 1050px; overflow: auto } 475 | pre { max-width: 780px; } 476 | } 477 | @media only screen and (min-width : 1260px) and (max-width : 1349px) { 478 | .comment { max-width: 1130px; overflow: auto } 479 | pre { max-width: 840px; } 480 | } 481 | </style> 482 | <link rel="shortcut icon" href="https://news.ycombinator.com/favicon.ico"> 483 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> 484 | <title>` + htmlTitle + ` 485 | 486 | 487 |
488 | 489 | 490 | 500 | 501 | 502 | 503 | 509 | 510 | 511 | 521 | 522 |
491 | 492 | 493 | 494 | 495 | 497 | 498 |
Hacker News 496 | ` + username + ` (` + totalStoriesCleansedSinceBeginningOfTime + ` stories cleansed, ` + cleansedStories.length + ` this week)
499 |
504 | ` 505 | + this._getHtmlStoryRows(cleansedStories) + 506 | ` 507 |
508 |
512 | 513 | 514 | 515 | 516 |
517 |
518 |
Generated by the Hacker News Cleanser
519 |
520 |
523 |
524 | 525 | 526 | `; 527 | } 528 | } 529 | -------------------------------------------------------------------------------- /server/cleanser.properties: -------------------------------------------------------------------------------- 1 | # Acceptable values are: 2 | # - error: Only logs error messages. Acceptable for production. 3 | # - warning: Logs warning messages in addition to error messages. Acceptable for production. 4 | # - info: Minimal logs for tracking purposes. Not recommended in production. 5 | # - debug: All possible logs are enabled. Strongly encouraged to use during development only. 6 | log.level=info 7 | 8 | # [Required] The username for the Hacker News user the Cleanser will auto-hide 9 | # stories for. If not provided, the Cleanser will automatically quit on 10 | # startup. 11 | hacker.news.username=REQUIRED 12 | 13 | # [Required] The password for the Hacker News user the Cleanser will auto-hide 14 | # stories for. If not provided, the Cleanser will automatically quit on 15 | # startup. 16 | hacker.news.password=REQUIRED 17 | 18 | # Enabled or disables sending an occassional email report of the cleansed 19 | # Hacker News stories. If enabled with `true`, ensure that the required 20 | # `email.report.*` properties below are also set. 21 | email.report.enabled=false 22 | 23 | # Determines how often the Hacker News Cleanser will cleanse. If not 24 | # provided, the default value of 1 minute is used. 25 | #cleanser.frequency.in.minutes=1 26 | 27 | # Set this property to adjust the frequency of the report email. If not set, 28 | # the default value of 7 days is used. 29 | #email.report.frequency.in.days=7 30 | 31 | # The email address that will send the report email. This is a required field 32 | # if the email report is enabled. If not provided, the report will be 33 | # disabled. 34 | #email.report.sender= 35 | 36 | # The password for the email address that will send the report email. This is 37 | # a required field if the email report is enabled. If not provided, the report 38 | # will be disabled. 39 | #email.report.sender.password= 40 | 41 | # Comma delimited list of email addresses to send the report to. This is a 42 | # required field if the email report is enabled. If not provided, the report 43 | # will be disabled. 44 | #email.report.recipients= -------------------------------------------------------------------------------- /server/main.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import path from "path"; 7 | // Local 8 | import MongoClient from "./MongoClient.js"; 9 | import Logger from "./Logger.js"; 10 | import paths from "./paths.js"; 11 | import PropertyManager from "./PropertyManager.js"; 12 | import ReportMailman from "./ReportMailman.js"; 13 | import Cleanser from "./Cleanser.js"; 14 | 15 | 16 | // CONSTANTS 17 | // --------- 18 | const CLASS_NAME = "main" 19 | 20 | 21 | // GLOBALS 22 | // ------- 23 | let propertyManager = null; 24 | let mongoClient = null; 25 | let cleanser = null; 26 | let mailman = null; 27 | 28 | let log = new Logger(CLASS_NAME); 29 | let propertiesFileName = path.join(paths.SERVER_DIRECTORY_PATH, "cleanser.properties"); 30 | 31 | 32 | // STARTUP 33 | // ------- 34 | log.info("Starting up..."); 35 | 36 | async function startup() { 37 | // 1. Load properties 38 | propertyManager = new PropertyManager(); 39 | await propertyManager.load(propertiesFileName); 40 | 41 | // 2. Connect to MongoDB 42 | mongoClient = new MongoClient(propertyManager); 43 | await mongoClient.connect(); 44 | 45 | // 3. Start cleanser 46 | cleanser = new Cleanser(propertyManager, mongoClient); 47 | cleanser.start(); 48 | 49 | // 4. Start Report Mailman 50 | mailman = new ReportMailman(propertyManager, mongoClient); 51 | mailman.start(); 52 | } 53 | 54 | try { 55 | startup(); 56 | } catch (error) { 57 | log.error("startup", error); 58 | } 59 | 60 | 61 | // SHUTDOWN 62 | // -------- 63 | ["SIGHUP", 64 | "SIGINT", 65 | "SIGQUIT", 66 | "SIGIL", 67 | "SIGTRAP", 68 | "SIGABRT", 69 | "SIGBUS", 70 | "SIGFPE", 71 | "SIGUSR1", 72 | "SIGSEGV", 73 | "SIGUSR2", 74 | "SIGTERM" 75 | ].forEach((signal) => { 76 | // Catching & handling all terminating signals 77 | process.on(signal, () => { 78 | log.info("Received signal=" + signal); 79 | shutdown(); 80 | 81 | // Force a shutdown anyway if still alive after ten seconds 82 | setTimeout(() => { 83 | log.warn("Shutdown still not complete, forcing shutdown... NOW"); 84 | process.exit(1); 85 | }, 10000); 86 | }); 87 | }) 88 | async function shutdown() { 89 | log.info("Shutting down..."); 90 | if (cleanser) { 91 | await cleanser.stop(); 92 | } 93 | if (mongoClient) { 94 | await mongoClient.close(); 95 | } 96 | if (mailman) { 97 | await mailman.stop(); 98 | } 99 | log.info("Completed shutdown"); 100 | process.exit(0); 101 | } -------------------------------------------------------------------------------- /server/paths.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | // DEPENDENCIES 4 | // ------------ 5 | // External 6 | import path from "path"; 7 | import url from "url"; 8 | // Local 9 | 10 | 11 | // CONSTANTS 12 | // --------- 13 | 14 | let __filename = url.fileURLToPath(import.meta.url); 15 | let CLEANSER_ROOT_DIRECTORY_PATH = path.join(path.dirname(__filename), "../"); 16 | let SERVER_DIRECTORY_PATH = path.join(CLEANSER_ROOT_DIRECTORY_PATH, "server"); 17 | 18 | export default { 19 | CLEANSER_ROOT_DIRECTORY_PATH, 20 | SERVER_DIRECTORY_PATH 21 | } -------------------------------------------------------------------------------- /server/util.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | let minutesToMillis = function(minutes) { 4 | return minutes * 60000; 5 | }; 6 | let daysToMillis = function(days) { 7 | return days * 86400000; 8 | }; 9 | 10 | /** 11 | * Forcing request-promise to return both the image AND the headers (in case 12 | * an image is not returned as expected) 13 | */ 14 | let includeHeaders = function(body, response, resolveWithFullResponse) { 15 | return { 16 | headers: response.headers, 17 | data: body 18 | }; 19 | }; 20 | 21 | let sleepForSeconds = function(seconds) { 22 | return new Promise(function(resolve, reject) { 23 | _sleepForSeconds( 24 | seconds 25 | ).then(function() { 26 | resolve(); 27 | }); 28 | }); 29 | }; 30 | function _sleepForSeconds(seconds) { 31 | return new Promise(resolve => setTimeout(resolve, seconds * 1000)); 32 | } 33 | 34 | export default { 35 | minutesToMillis, 36 | daysToMillis, 37 | includeHeaders, 38 | sleepForSeconds 39 | } --------------------------------------------------------------------------------