├── .gitignore ├── .npmignore ├── .travis.yml ├── LICENSE ├── README.md ├── decay.js ├── package.json ├── rating-equation.png └── test └── order.test.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | decay-cov.js 3 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test 2 | examples 3 | rating-equation.png 4 | .gitignore 5 | .travis.yml 6 | .npmignore 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | sudo: false 3 | 4 | node_js: 5 | - 4 6 | - node 7 | 8 | notifications: 9 | email: false 10 | 11 | before_script: 12 | - npm link 13 | 14 | after_script: 15 | - npm install coveralls 16 | - npm run coverage | coveralls 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2011 Eirik Albrigtsen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # decay 2 | [![npm status](http://img.shields.io/npm/v/decay.svg)](https://www.npmjs.org/package/decay) 3 | [![build status](https://secure.travis-ci.org/clux/decay.svg)](http://travis-ci.org/clux/decay) 4 | [![dependency status](https://david-dm.org/clux/decay.svg)](https://david-dm.org/clux/decay) 5 | [![coverage status](http://img.shields.io/coveralls/clux/decay.svg)](https://coveralls.io/r/clux/decay) 6 | 7 | This library houses 3 popularity estimating algorithms employed by bigger news sites used to sort for best content: 8 | 9 | 1. `wilsonScore` - Reddit's _best_ comment scoring system 10 | 2. `redditHot` - Reddit's _hot_ post scoring system for news posts 11 | 3. `hackerHot` - Hackernews' scoring system 12 | 13 | ![Wilson score equation](https://github.com/clux/decay/raw/master/rating-equation.png) 14 | 15 | Algorithms may cause scores to *decay* based on distance to post time. 16 | 17 | ## 1. Decaying algorithms 18 | Algorithms that are designed to decay based on time needs continual recomputation of scores. An example of doing so would be keeping track of, and periodically computing the score(s) required in a node process on a set of suitable candidates: 19 | 20 | ```js 21 | var decay = require('decay') 22 | , hotScore = decay.redditHot(); 23 | 24 | setInterval(function () { 25 | candidates = []; // perhaps get recent posts saved in db here 26 | candidates.forEach(function (c) { 27 | c.score = hotScore(c.upVotes, c.dnVotes, c.date); 28 | // save so that next GET /entry/ gets an updated ordering 29 | save(c); 30 | }); 31 | }, 1000 * 60 * 5); // run every 5 minutes, say 32 | ``` 33 | 34 | ## 2. Non-decaying algorithms 35 | Algorithms that produce a time agnostic popularity score is typically good for comments. For best results, simply recompute the score at every new vote: 36 | 37 | ```js 38 | var decay = require('decay') 39 | , wilsonScore = decay.wilsonScore(); 40 | 41 | // assume req.entry is the item being voted on 42 | app.post('/entry/upvote', middleWare, function (req, res) { 43 | // call wilsonScore with ups, downs to recompute 44 | req.entry.score = wilsonScore(req.entry.upVotes + 1, req.entry.dnVotes); 45 | 46 | // save new score in database so that new pageviews sort 47 | save(req.entry); 48 | }); 49 | ``` 50 | 51 | ## Usage 52 | Decay exports 3 scoring function factories. 53 | 54 | Two of these algorithms decay with time, and the other is based purely on statistical popularity. 55 | 56 | ```js 57 | // 1. zero decay 58 | var wilsonScore = decay.wilsonScore(zScore); 59 | var score = wilsonScore(upVotes, downVotes); 60 | 61 | // 2. decays 62 | var redditHotScore = decay.redditHot(halflife); 63 | var score = redditHotScore(upVotes, downVotes, date); 64 | 65 | // 3. decays 66 | var hackerHotScore = decay.hackerHot(gravity); 67 | var score = hackerHotScore(upVotes, date); 68 | ``` 69 | 70 | ## Parameter Explanation 71 | ### 1. Wilson Score 72 | AKA Reddit's *[Best](http://blog.reddit.com/2009/10/reddits-new-comment-sorting-system.html)* comment sorting system. [Source](https://github.com/reddit/reddit/blob/bd922104b971a5c6794b199f364a06fdf61359a2/r2/r2/lib/db/_sorts.pyx#L70-L85) 73 | 74 | Statistically, it is the lower bound of the [Wilson Score interval](http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval) at the alpha level based on supplied Z score. 75 | 76 | The optional `zScore` parameter can be passed as to the exported `wilsonScore` factory. 77 | The Z score is a statistical value which roughly means how many standard deviations of safety you want, so it maps directly onto the confidence level of the Wilson Score interval. 78 | 79 | It will default to `z=1.96` if left out, representing a `95%` confidence level in the lower bound. Otherwise, values through `1.0` (69%), to `3.3` (99.9%) good alternatives. 80 | 81 | ### 2. Reddit Hot Sort 82 | Based on the difference between ups/downs, and decays with time. Causes hive mind effects in large crowds. 83 | 84 | An optional _halflife_ parameter can be passed to the exported `redditHot` factory. 85 | The half-life defaults to 45000 [s]. For info on the effects on this parameter read the original [blog post](https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9) about it. See also the canonical [reddit source version](https://github.com/reddit/reddit/blob/bd922104b971a5c6794b199f364a06fdf61359a2/r2/r2/lib/db/_sorts.pyx#L47-L58). 86 | 87 | ### 3. HackerNews Hot Sort 88 | Based on simply the amount of upvotes, and decays with time. Prone to advertising abuse. 89 | 90 | An optional `gravity` parameter (defaulting to `1.8`) can be passed to the exported `hackerHot` factory. For info on the effects of this parameter read the original [blog post](https://medium.com/hacking-and-gonzo/how-hacker-news-ranking-algorithm-works-1d9b0cf2c08d) about it. 91 | 92 | ## Installation 93 | 94 | ```bash 95 | $ npm install decay 96 | ``` 97 | 98 | ## License 99 | MIT-Licensed. See LICENSE file for details. 100 | -------------------------------------------------------------------------------- /decay.js: -------------------------------------------------------------------------------- 1 | // decaying 2 | 3 | /** 4 | * Reddit's hot sort 5 | * (popularized by reddit's news ranking) 6 | * https://medium.com/hacking-and-gonzo/how-reddit-ranking-algorithms-work-ef111e33d0d9 7 | * Corrected for decay errors in post 8 | */ 9 | exports.redditHot = function (decay) { 10 | if (decay == null) { 11 | decay = 45000; 12 | } 13 | return function (ups, downs, date) { 14 | var s = ups - downs 15 | , sign = Math.sign(s) 16 | , order = Math.log(Math.max(Math.abs(s), 1)) / Math.LN10 17 | , secAge = (Date.now() - date.getTime()) / 1000; 18 | return sign*order - secAge / decay; 19 | }; 20 | }; 21 | 22 | /** 23 | * Hackernews' hot sort 24 | * https://medium.com/hacking-and-gonzo/how-hacker-news-ranking-algorithm-works-1d9b0cf2c08d 25 | */ 26 | exports.hackerHot = function (gravity) { 27 | if (gravity == null) { 28 | gravity = 1.8; 29 | } 30 | return function (votes, itemDate) { 31 | var hourAge = (Date.now() - itemDate.getTime()) / (1000 * 3600); 32 | return (votes - 1) / Math.pow(hourAge + 2, gravity); 33 | }; 34 | }; 35 | 36 | // non-decaying 37 | 38 | /** 39 | * Wilson score interval sort 40 | * (popularized by reddit's best comment system) 41 | * http://www.evanmiller.org/how-not-to-sort-by-average-rating.html 42 | */ 43 | exports.wilsonScore = function (z) { 44 | if (z == null) { 45 | // z represents the statistical confidence 46 | // z = 1.0 => ~69%, 1.96 => ~95% (default) 47 | z = 1.96; 48 | } 49 | 50 | return function (ups, downs) { 51 | var n = ups + downs; 52 | if (n === 0) { 53 | return 0; 54 | } 55 | 56 | var p = ups / n 57 | , sqrtexpr = ( p*(1-p) + z*z/(4*n) ) / n; 58 | return ( p + z*z/(2*n) - z*Math.sqrt(sqrtexpr) ) / (1+z*z/n); 59 | }; 60 | }; 61 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Eirik Albrigtsen ", 3 | "name": "decay", 4 | "description": "Famous sorting algorithms based on vote popularity and time", 5 | "version": "1.0.12", 6 | "repository": { 7 | "type": "git", 8 | "url": "clux/decay" 9 | }, 10 | "main": "decay.js", 11 | "keywords": [ 12 | "hotsort", 13 | "hackerhot", 14 | "wilsonscore", 15 | "popularity", 16 | "sorting", 17 | "voting" 18 | ], 19 | "scripts": { 20 | "test": "bndg test/*.test.js", 21 | "precoverage": "istanbul cover bndg test/*.test.js", 22 | "coverage": "cat coverage/lcov.info && rm -rf coverage" 23 | }, 24 | "dependencies": {}, 25 | "devDependencies": { 26 | "bandage": "^0.5.0", 27 | "istanbul": "^0.4.5" 28 | }, 29 | "license": "MIT" 30 | } 31 | -------------------------------------------------------------------------------- /rating-equation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clux/decay/6ce01aebb1d715deb02783d59d81d8b5fbbf1760/rating-equation.png -------------------------------------------------------------------------------- /test/order.test.js: -------------------------------------------------------------------------------- 1 | const decay = require('..'); 2 | const test = require('bandage'); 3 | 4 | /** 5 | * Tests basic reasonable assumptions: 6 | * 7 | * If upvotes increase, the score increases 8 | * If downvotes (if applicable) increases, the score decreases 9 | * If dates decrease, the score decreases 10 | * Certain instantiation parameters give a higher score than others 11 | */ 12 | 13 | test('wilsonScore', function *(t) { 14 | const s1 = decay.wilsonScore(1) 15 | , s2 = decay.wilsonScore(2) 16 | , s3 = decay.wilsonScore(); 17 | 18 | t.ok(s1(5, 0) > s1(4, 0), 'upvotes good'); 19 | t.ok(s1(5, 3) < s1(5, 2), 'downvotes bad'); 20 | 21 | t.ok(s1(10, 2) > s2(10, 2), 'higher confidence means lowers bounds'); 22 | 23 | t.equal(s3(0, 0), 0, 'no votes gives a zero'); 24 | t.ok(s3(0, 100000) >= 0, 'scores always >= 0 (even if all downvotes)'); 25 | t.ok(s3(1000000, 0) < 1, 'and always less than 1'); 26 | 27 | // lock down some values - verified with redit's python version 28 | t.eq(s3(1, 0), 0.20654329147389294, '1 ups 0 downs'); 29 | t.eq(s3(10, 10), 0.2992949144298199, '10 ups 10 downs'); 30 | t.eq(s3(10, 0), 0.7224598312333834, '10 ups 0 downs'); 31 | t.eq(s3(0, 10), 0, '0 ups 10 downs'); 32 | t.eq(s3(100, 50), 0.5878960768592671, '100 ups 50 downs'); 33 | t.eq(s3(50, 100), 0.2628864565745068, '50 ups 100 downs'); 34 | }); 35 | 36 | 37 | // decaying algorithms need some dates 38 | const d1 = new Date(); 39 | const d2 = new Date(); 40 | d1.setTime(d1.getTime() - 1 * 60 * 1000); // turn back one minute 41 | d2.setTime(d2.getTime() - 61 * 60 * 1000); // turn back one hour extra 42 | 43 | test('redditHot', function *(t) { 44 | const hot = decay.redditHot(); 45 | 46 | t.ok(hot(10, 2, d1) > hot(9, 2, d1), 'upvotes good'); 47 | t.ok(hot(10, 3, d1) < hot(10, 2, d1), 'downvotes bad'); 48 | 49 | t.ok(hot(5, 1, d1) < hot(5, 1, new Date()), 'fresher post good'); 50 | 51 | t.ok(hot(5, 1, d1) > hot(5, 1, d2), 'age causes decay'); 52 | 53 | t.ok(hot(5, 2, d1) > hot(1, 5, d2), 'sign sanity'); 54 | 55 | const hotLow = decay.redditHot(20000); // lower number => faster decay 56 | t.ok(hotLow(5, 1, d1) < hot(5, 1, d1), 'faster decay => slightly lower numbers early on'); 57 | }); 58 | 59 | test('hackerHot', function *(t) { 60 | const hhot = decay.hackerHot(); 61 | 62 | t.ok(hhot(10, d1) > hhot(9, d1), 'upvotes good'); 63 | t.ok(hhot(10, d1) < hhot(10, new Date()), 'fresher post good'); 64 | t.ok(hhot(5, d1) > hhot(5, d2), 'age causes decay'); 65 | 66 | var hhothigh = decay.hackerHot(3); // more gravity => faster decay 67 | t.ok(hhothigh(5, d1) < hhot(5, d1), 'more gravity => slightly lower numbers in score early on'); 68 | }); 69 | --------------------------------------------------------------------------------