├── images
├── eye.png
├── thumbs-down.png
└── thumbs-up.png
├── bookmarklet.js
└── hnbayes.js
/images/eye.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/eye.png
--------------------------------------------------------------------------------
/images/thumbs-down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/thumbs-down.png
--------------------------------------------------------------------------------
/images/thumbs-up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rogerbraun/HNBayes/HEAD/images/thumbs-up.png
--------------------------------------------------------------------------------
/bookmarklet.js:
--------------------------------------------------------------------------------
1 | javascript:(function(){var script = document.createElement("script"); script.src="https://raw.github.com/rogerbraun/HNBayes/master/hnbayes.js"; document.body.appendChild(script);})();
2 |
--------------------------------------------------------------------------------
/hnbayes.js:
--------------------------------------------------------------------------------
1 | javascript:(function(e,a,g,h,f,c,b,d){if(!(f=e.jQuery)||g>f.fn.jquery||h(f)){c=a.createElement("script");c.type="text/javascript";c.src="http://ajax.googleapis.com/ajax/libs/jquery/"+g+"/jquery.min.js";c.onload=c.onreadystatechange=function(){if(!b&&(!(d=this.readyState)||d=="loaded"||d=="complete")){h((f=e.jQuery).noConflict(1),b=1);f(c).remove()}};a.documentElement.childNodes[0].appendChild(c)}})(window,document,"1.7.1",function($,L){
2 | // My code
3 |
4 | // Add mysteriously missing functions
5 |
6 | Array.prototype.contains = function(element) {
7 | return this.some(function(cmpElement){
8 | return element === cmpElement;
9 | });
10 | }
11 |
12 | Array.prototype.uniq = function(){
13 | return this.reduce(function(result, element){
14 | if(!result.contains(element)){
15 | result.push(element);
16 | }
17 | return result;
18 | }
19 | ,[]);
20 | }
21 |
22 | BayesFilter = function(useLocalStorage) {
23 |
24 | // Variables
25 | this.klasses = {};
26 | this.data = {};
27 | this.assumedProbability = 0.5;
28 | this.assumedProbabilityWeight = 1;
29 | this.documentCount = 0;
30 | this.useLocalStorage = !!useLocalStorage;
31 |
32 | // Local Storage
33 |
34 | this.loadFromLocalStorage = function(){
35 | var data = window.localStorage.getItem("BayesFilterData");
36 | if(data){
37 | savedData = JSON.parse(data);
38 | this.data = savedData.data;
39 | this.klasses = savedData.klasses;
40 | this.documentCount = savedData.documentCount;
41 | }
42 | }
43 |
44 | this.saveToLocalStorage = function(){
45 | var savedData = {data: this.data, klasses: this.klasses, documentCount: this.documentCount}
46 | var dataJSON = JSON.stringify(savedData);
47 | window.localStorage.setItem("BayesFilterData", dataJSON);
48 | }
49 |
50 | if(this.useLocalStorage){
51 | this.loadFromLocalStorage();
52 | }
53 |
54 | // Helpers
55 | // May not really be thought out too well...
56 | this.helpers = {};
57 |
58 | this.helpers.getWordSet = function(text) {
59 | var split = text.split(/\W/); // Split on everything that isn't a word character. TODO: Rethink for utf-8
60 | split = split.filter(function(word){return word != ""}); // Remove empty strings
61 | split = split.map(function(word){return word.toLowerCase()}); // Make everything lowercase
62 | split = split.uniq(); // Get only unique words
63 | return split;
64 | };
65 |
66 | this.helpers.addWordSet = function(oldData, words, klass) {
67 | // How do I clone objects?
68 | var newData = oldData;
69 | for(var i = 0; i < words.length; i++){
70 | word = words[i];
71 | if(newData[word]) {
72 | if(newData[word][klass]){
73 | newData[word][klass] += 1;
74 | } else {
75 | newData[word][klass] = 1;
76 | }
77 | } else {
78 | newData[word] = {};
79 | newData[word][klass] = 1;
80 | }
81 | }
82 | return newData;
83 | }
84 |
85 | this.helpers.addKlass = function(oldKlasses, klass) {
86 | if(oldKlasses[klass]){
87 | oldKlasses[klass] += 1;
88 | } else {
89 | oldKlasses[klass] = 1;
90 | }
91 | return oldKlasses;
92 | }
93 |
94 |
95 | // Functions
96 |
97 | this.wordCount = function(word, klass) {
98 | word = word.toLowerCase();
99 | if(this.data[word] && this.data[word][klass]){
100 | return this.data[word][klass];
101 | } else {
102 | return 0;
103 | }
104 | }
105 |
106 | this.totalWordCount = function(word) {
107 | word = word.toLowerCase();
108 | var count = 0;
109 | if(this.data[word]){
110 | for(klass in this.data[word]){
111 | count += this.data[word][klass];
112 | }
113 | }
114 | return count;
115 | }
116 |
117 | this.wordProbability = function(word, klass) {
118 | word = word.toLowerCase();
119 | if(this.data[word] && this.data[word][klass]) { // Word must exist and class must exist
120 | var wordCount = this.data[word][klass];
121 | var klassCount = this.klasses[klass];
122 | return wordCount / klassCount;
123 | } else {
124 | return 0;
125 | }
126 | }
127 |
128 | this.weightedProbability = function(word, klass) {
129 | word = word.toLowerCase();
130 | var unweightedProbability = this.wordProbability(word, klass);
131 | var totalWordCount = this.totalWordCount(word);
132 | return ((this.assumedProbability * this.assumedProbabilityWeight) + (totalWordCount * unweightedProbability)) / (this.assumedProbabilityWeight + totalWordCount);
133 | }
134 |
135 | this.documentProbability = function(dokument, klass) {
136 | var probability = 1;
137 | var words = this.helpers.getWordSet(dokument);
138 | for(i = 0; i < words.length; i++){
139 | var word = words[i];
140 | probability = probability * this.weightedProbability(word, klass) * 2;
141 | }
142 | return probability;
143 | }
144 |
145 | this.categoryProbability = function(dokument, klass) {
146 | var documentProbability = this.documentProbability(dokument, klass);
147 | var categoryProbability = this.klasses[klass] / this.documentCount;
148 | // think of new name
149 | return documentProbability * categoryProbability;
150 | }
151 |
152 | this.train = function(text, klass) {
153 | var words = this.helpers.getWordSet(text);
154 | this.data = this.helpers.addWordSet(this.data, words, klass);
155 | this.klasses = this.helpers.addKlass(this.klasses, klass);
156 | this.documentCount += 1;
157 |
158 | if(this.useLocalStorage){
159 | this.saveToLocalStorage();
160 | }
161 | return this;
162 | };
163 |
164 | };
165 |
166 | filter = new BayesFilter(true) // Use local storage;
167 |
168 | // Load views urls
169 |
170 | var viewedUrls = {}
171 |
172 | var maybeUrls = localStorage.getItem("viewedUrls");
173 |
174 | if(maybeUrls){
175 | viewedUrls = JSON.parse(maybeUrls);
176 | }
177 |
178 | // Add like / dislike links
179 |
180 | var like = $("
");
181 | var dislike = $("
");
182 | var rate = $("
");
183 | var rate_result = $("");
184 |
185 | var trainFromUrl = function(url, klass){
186 | var request = "http://viewtext.org/api/text?url=" + encodeURI(url) + "&callback=?";
187 | $.getJSON(request, function(response){
188 | filter.train(response.content, klass);
189 | });
190 | viewedUrls[url] = true;
191 | localStorage.setItem("viewedUrls", JSON.stringify(viewedUrls));
192 | }
193 |
194 | // Some styles
195 | var style = document.createElement("style");
196 | style.type = "text/css";
197 | style.innerHTML = "\
198 | img.training, img.rate { \
199 | padding-left:5px; \
200 | cursor: pointer; \
201 | }";
202 | document.body.appendChild(style);
203 |
204 | var rateFromUrl = function(url, target){
205 | var request = "http://viewtext.org/api/text?url=" + encodeURI(url) + "&callback=?";
206 | $.getJSON(request, function(response){
207 | var good = filter.categoryProbability(response.content, "good");
208 | var bad = filter.categoryProbability(response.content, "bad");
209 | if(good > bad) {
210 | target.innerHTML = " Probably good! " + (good / bad).toPrecision(5) + " times more likely.";
211 | } else {
212 | target.innerHTML = " Probably bad! " + (bad / good).toPrecision(5) + " times more likely.";
213 | }
214 | });
215 | }
216 |
217 | like.bind("click", function(event) {
218 | var target = $(event.target);
219 | var link = target.siblings("a")[0].href;
220 | target.siblings(".training").remove();
221 | target.remove();
222 | trainFromUrl(link, "good");
223 | });
224 |
225 | dislike.bind("click", function(event) {
226 | var target = $(event.target);
227 | var link = target.siblings("a")[0].href;
228 | target.siblings(".training").remove();
229 | target.remove();
230 | trainFromUrl(link, "bad");
231 | });
232 |
233 | rate.bind("click", function(event){
234 | var target = $(event.target);
235 | var link = target.siblings("a")[0].href;
236 | var result_span = target.siblings(".rate_result")[0];
237 | rateFromUrl(link, result_span);
238 | });
239 |
240 | var stories = $(".title:nth-child(3) a:nth-child(1)");
241 | stories.after(rate_result);
242 | stories.after(rate);
243 |
244 | var newStories = stories.filter(function(i){
245 | return !(viewedUrls[this.href]);
246 | });
247 |
248 | newStories.after(dislike);
249 | newStories.after(like);
250 | });
251 |
--------------------------------------------------------------------------------