├── images ├── eye.png ├── thumbs-down.png └── thumbs-up.png ├── bookmarklet.js └── hnbayes.js /images/eye.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/eye.png -------------------------------------------------------------------------------- /images/thumbs-down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/thumbs-down.png -------------------------------------------------------------------------------- /images/thumbs-up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/thumbs-up.png -------------------------------------------------------------------------------- /bookmarklet.js: -------------------------------------------------------------------------------- 1 | javascript:(function(){var script = document.createElement("script"); script.src="https://raw.github.com/rogerbraun/HNBayes/master/hnbayes.js"; document.body.appendChild(script);})(); 2 | -------------------------------------------------------------------------------- /hnbayes.js: -------------------------------------------------------------------------------- 1 | javascript:(function(e,a,g,h,f,c,b,d){if(!(f=e.jQuery)||g>f.fn.jquery||h(f)){c=a.createElement("script");c.type="text/javascript";c.src="http://ajax.googleapis.com/ajax/libs/jquery/"+g+"/jquery.min.js";c.onload=c.onreadystatechange=function(){if(!b&&(!(d=this.readyState)||d=="loaded"||d=="complete")){h((f=e.jQuery).noConflict(1),b=1);f(c).remove()}};a.documentElement.childNodes[0].appendChild(c)}})(window,document,"1.7.1",function($,L){ 2 | // My code 3 | 4 | // Add mysteriously missing functions 5 | 6 | Array.prototype.contains = function(element) { 7 | return this.some(function(cmpElement){ 8 | return element === cmpElement; 9 | }); 10 | } 11 | 12 | Array.prototype.uniq = function(){ 13 | return this.reduce(function(result, element){ 14 | if(!result.contains(element)){ 15 | result.push(element); 16 | } 17 | return result; 18 | } 19 | ,[]); 20 | } 21 | 22 | BayesFilter = function(useLocalStorage) { 23 | 24 | // Variables 25 | this.klasses = {}; 26 | this.data = {}; 27 | this.assumedProbability = 0.5; 28 | this.assumedProbabilityWeight = 1; 29 | this.documentCount = 0; 30 | this.useLocalStorage = !!useLocalStorage; 31 | 32 | // Local Storage 33 | 34 | this.loadFromLocalStorage = function(){ 35 | var data = window.localStorage.getItem("BayesFilterData"); 36 | if(data){ 37 | savedData = JSON.parse(data); 38 | this.data = savedData.data; 39 | this.klasses = savedData.klasses; 40 | this.documentCount = savedData.documentCount; 41 | } 42 | } 43 | 44 | this.saveToLocalStorage = function(){ 45 | var savedData = {data: this.data, klasses: this.klasses, documentCount: this.documentCount} 46 | var dataJSON = JSON.stringify(savedData); 47 | window.localStorage.setItem("BayesFilterData", dataJSON); 48 | } 49 | 50 | if(this.useLocalStorage){ 51 | this.loadFromLocalStorage(); 52 | } 53 | 54 | // Helpers 55 | // May not really be thought out too well... 56 | this.helpers = {}; 57 | 58 | this.helpers.getWordSet = function(text) { 59 | var split = text.split(/\W/); // Split on everything that isn't a word character. TODO: Rethink for utf-8 60 | split = split.filter(function(word){return word != ""}); // Remove empty strings 61 | split = split.map(function(word){return word.toLowerCase()}); // Make everything lowercase 62 | split = split.uniq(); // Get only unique words 63 | return split; 64 | }; 65 | 66 | this.helpers.addWordSet = function(oldData, words, klass) { 67 | // How do I clone objects? 68 | var newData = oldData; 69 | for(var i = 0; i < words.length; i++){ 70 | word = words[i]; 71 | if(newData[word]) { 72 | if(newData[word][klass]){ 73 | newData[word][klass] += 1; 74 | } else { 75 | newData[word][klass] = 1; 76 | } 77 | } else { 78 | newData[word] = {}; 79 | newData[word][klass] = 1; 80 | } 81 | } 82 | return newData; 83 | } 84 | 85 | this.helpers.addKlass = function(oldKlasses, klass) { 86 | if(oldKlasses[klass]){ 87 | oldKlasses[klass] += 1; 88 | } else { 89 | oldKlasses[klass] = 1; 90 | } 91 | return oldKlasses; 92 | } 93 | 94 | 95 | // Functions 96 | 97 | this.wordCount = function(word, klass) { 98 | word = word.toLowerCase(); 99 | if(this.data[word] && this.data[word][klass]){ 100 | return this.data[word][klass]; 101 | } else { 102 | return 0; 103 | } 104 | } 105 | 106 | this.totalWordCount = function(word) { 107 | word = word.toLowerCase(); 108 | var count = 0; 109 | if(this.data[word]){ 110 | for(klass in this.data[word]){ 111 | count += this.data[word][klass]; 112 | } 113 | } 114 | return count; 115 | } 116 | 117 | this.wordProbability = function(word, klass) { 118 | word = word.toLowerCase(); 119 | if(this.data[word] && this.data[word][klass]) { // Word must exist and class must exist 120 | var wordCount = this.data[word][klass]; 121 | var klassCount = this.klasses[klass]; 122 | return wordCount / klassCount; 123 | } else { 124 | return 0; 125 | } 126 | } 127 | 128 | this.weightedProbability = function(word, klass) { 129 | word = word.toLowerCase(); 130 | var unweightedProbability = this.wordProbability(word, klass); 131 | var totalWordCount = this.totalWordCount(word); 132 | return ((this.assumedProbability * this.assumedProbabilityWeight) + (totalWordCount * unweightedProbability)) / (this.assumedProbabilityWeight + totalWordCount); 133 | } 134 | 135 | this.documentProbability = function(dokument, klass) { 136 | var probability = 1; 137 | var words = this.helpers.getWordSet(dokument); 138 | for(i = 0; i < words.length; i++){ 139 | var word = words[i]; 140 | probability = probability * this.weightedProbability(word, klass) * 2; 141 | } 142 | return probability; 143 | } 144 | 145 | this.categoryProbability = function(dokument, klass) { 146 | var documentProbability = this.documentProbability(dokument, klass); 147 | var categoryProbability = this.klasses[klass] / this.documentCount; 148 | // think of new name 149 | return documentProbability * categoryProbability; 150 | } 151 | 152 | this.train = function(text, klass) { 153 | var words = this.helpers.getWordSet(text); 154 | this.data = this.helpers.addWordSet(this.data, words, klass); 155 | this.klasses = this.helpers.addKlass(this.klasses, klass); 156 | this.documentCount += 1; 157 | 158 | if(this.useLocalStorage){ 159 | this.saveToLocalStorage(); 160 | } 161 | return this; 162 | }; 163 | 164 | }; 165 | 166 | filter = new BayesFilter(true) // Use local storage; 167 | 168 | // Load views urls 169 | 170 | var viewedUrls = {} 171 | 172 | var maybeUrls = localStorage.getItem("viewedUrls"); 173 | 174 | if(maybeUrls){ 175 | viewedUrls = JSON.parse(maybeUrls); 176 | } 177 | 178 | // Add like / dislike links 179 | 180 | var like = $(" "); 181 | var dislike = $(" "); 182 | var rate = $(" "); 183 | var rate_result = $(""); 184 | 185 | var trainFromUrl = function(url, klass){ 186 | var request = "http://viewtext.org/api/text?url=" + encodeURI(url) + "&callback=?"; 187 | $.getJSON(request, function(response){ 188 | filter.train(response.content, klass); 189 | }); 190 | viewedUrls[url] = true; 191 | localStorage.setItem("viewedUrls", JSON.stringify(viewedUrls)); 192 | } 193 | 194 | // Some styles 195 | var style = document.createElement("style"); 196 | style.type = "text/css"; 197 | style.innerHTML = "\ 198 | img.training, img.rate { \ 199 | padding-left:5px; \ 200 | cursor: pointer; \ 201 | }"; 202 | document.body.appendChild(style); 203 | 204 | var rateFromUrl = function(url, target){ 205 | var request = "http://viewtext.org/api/text?url=" + encodeURI(url) + "&callback=?"; 206 | $.getJSON(request, function(response){ 207 | var good = filter.categoryProbability(response.content, "good"); 208 | var bad = filter.categoryProbability(response.content, "bad"); 209 | if(good > bad) { 210 | target.innerHTML = " Probably good! " + (good / bad).toPrecision(5) + " times more likely."; 211 | } else { 212 | target.innerHTML = " Probably bad! " + (bad / good).toPrecision(5) + " times more likely."; 213 | } 214 | }); 215 | } 216 | 217 | like.bind("click", function(event) { 218 | var target = $(event.target); 219 | var link = target.siblings("a")[0].href; 220 | target.siblings(".training").remove(); 221 | target.remove(); 222 | trainFromUrl(link, "good"); 223 | }); 224 | 225 | dislike.bind("click", function(event) { 226 | var target = $(event.target); 227 | var link = target.siblings("a")[0].href; 228 | target.siblings(".training").remove(); 229 | target.remove(); 230 | trainFromUrl(link, "bad"); 231 | }); 232 | 233 | rate.bind("click", function(event){ 234 | var target = $(event.target); 235 | var link = target.siblings("a")[0].href; 236 | var result_span = target.siblings(".rate_result")[0]; 237 | rateFromUrl(link, result_span); 238 | }); 239 | 240 | var stories = $(".title:nth-child(3) a:nth-child(1)"); 241 | stories.after(rate_result); 242 | stories.after(rate); 243 | 244 | var newStories = stories.filter(function(i){ 245 | return !(viewedUrls[this.href]); 246 | }); 247 | 248 | newStories.after(dislike); 249 | newStories.after(like); 250 | }); 251 | --------------------------------------------------------------------------------