├── .gitignore
├── Falcon.gif
├── art
├── falcon-bw.png
└── falcon-color.psd
├── extension
├── assets
│ ├── 128.png
│ ├── 16.png
│ ├── 19.png
│ ├── 38.png
│ ├── 48.png
│ ├── style.css
│ ├── preferences.html
│ ├── popup.html
│ └── notie.css
├── js
│ ├── content.js
│ ├── relevanttext.js
│ ├── textprocessing.js
│ ├── popup.js
│ ├── queryparser.js
│ ├── lib
│ │ ├── stopwords_res.js
│ │ ├── notie.min.js
│ │ └── readability.js
│ ├── background.js
│ └── preferences.js
└── manifest.json
├── README.md
└── LICENSE.md
/.gitignore:
--------------------------------------------------------------------------------
1 | *zip
2 |
--------------------------------------------------------------------------------
/Falcon.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/Falcon.gif
--------------------------------------------------------------------------------
/art/falcon-bw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/art/falcon-bw.png
--------------------------------------------------------------------------------
/art/falcon-color.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/art/falcon-color.psd
--------------------------------------------------------------------------------
/extension/assets/128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/extension/assets/128.png
--------------------------------------------------------------------------------
/extension/assets/16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/extension/assets/16.png
--------------------------------------------------------------------------------
/extension/assets/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/extension/assets/19.png
--------------------------------------------------------------------------------
/extension/assets/38.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/extension/assets/38.png
--------------------------------------------------------------------------------
/extension/assets/48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CennoxX/falcon/HEAD/extension/assets/48.png
--------------------------------------------------------------------------------
/extension/js/content.js:
--------------------------------------------------------------------------------
1 | window.setTimeout((function(){
2 | var epochTime = (new Date()).getTime();
3 | var url = window.location.href;
4 | /*var nlparser = new NLParser()
5 | var docString = document.documentElement.cloneNode(deep=true).outerHTML
6 | var relText = document.body.innerText
7 | if(!((window.location.protocol + "//" + window.location.host + "/") === url)) {
8 | relText = nlparser.getRelevantText(docString)
9 | if (!relText) {
10 | relText = document.body.innerText
11 | }
12 | }*/
13 | chrome.runtime.sendMessage({
14 | "msg":'pageContent',
15 | "time":epochTime,
16 | "url":url,
17 | "text":document.body.innerText, //relText,
18 | "title":document.title,
19 | });
20 | }), 1000);
21 |
--------------------------------------------------------------------------------
/extension/js/relevanttext.js:
--------------------------------------------------------------------------------
1 | String.prototype.deleteArray = function(find) {
2 | var replaceString = this;
3 | for (var i = 0; i < find.length; i++) {
4 | replaceString = replaceString.replace(" " + find[i] + " ", " ");
5 | }
6 | return replaceString;
7 | };
8 |
9 | var NLParser = (function NLParser() {
10 |
11 | return function NLParserConstructor() {
12 | var _this = this; // Cache the `this` keyword
13 | _this.getRelevantText = function(docstr) {
14 | var parser = new DOMParser()
15 | , doc = parser.parseFromString(docstr, "text/html");
16 | try {
17 | var readableResponse = readability.grabArticle(doc)
18 | if (!readableResponse) {
19 | return null;
20 | }
21 | return readableResponse.innerText.replace(/(\r\n|\n|\r|\t| )/gm,"").deleteArray(stops) + " --- " + readability.getArticleTitle(doc)
22 | } catch (err) {
23 | return null;
24 | }
25 | }
26 | };
27 | }());
28 |
--------------------------------------------------------------------------------
/extension/js/textprocessing.js:
--------------------------------------------------------------------------------
1 | function processPageText(str) {
2 | return removeDiacritics(str).replace('[^a-zA-Z0-9-._~]',"").toLowerCase();
3 | }
4 |
5 | function removeDiacritics(str) {
6 | return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
7 | }
8 |
9 | function escape(str) {
10 | var ret = '';
11 | var i;
12 | for (i = 0; i < str.length; i++) {
13 | switch (str.charAt(i)) {
14 | case '"':
15 | ret += '"';
16 | break;
17 | case '\'':
18 | ret += ''';
19 | break;
20 | case '<':
21 | ret += '<'
22 | break;
23 | case '>':
24 | ret += '>'
25 | break;
26 | case '&':
27 | ret += '&'
28 | break;
29 | default:
30 | ret += str.charAt(i);
31 | }
32 | }
33 | return ret;
34 | }
35 |
36 |
--------------------------------------------------------------------------------
/extension/js/popup.js:
--------------------------------------------------------------------------------
1 | window.addEventListener('click',function(e){
2 | if(e.target.href!==undefined && e.target.href.length > 0 && !e.target.href.startsWith("https")){
3 | chrome.tabs.create({url:e.target.href});
4 | window.close();
5 | }
6 | });
7 |
8 | window.addEventListener('DOMContentLoaded', function() {
9 | var quickBlacklist = document.getElementById('quick-blacklist');
10 | quickBlacklist.addEventListener('click', function() {
11 | disableIfBlacklisted(true);
12 | });
13 | });
14 |
15 | function disableIfBlacklisted(add=false){
16 | var quickBlacklist = document.getElementById('quick-blacklist');
17 | chrome.storage.local.get(['blacklist'], function(items) {
18 | chrome.tabs.query({'active': true, 'lastFocusedWindow': true}, function (tabs) {
19 | var tabUrl = tabs[0].url;
20 | var blacklist = items['blacklist'];
21 | if(!blacklist['SITE'].includes(tabUrl) && add){
22 | blacklist['SITE'].push(tabUrl)
23 | chrome.storage.local.set({'blacklist':blacklist});
24 | }
25 | if(blacklist['SITE'].includes(tabUrl)){
26 | quickBlacklist.classList.add('disabled');
27 | }
28 | });
29 | });
30 | }
31 |
32 | disableIfBlacklisted(false);
33 |
--------------------------------------------------------------------------------
/extension/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 | "name": "Falcon",
4 | "short_name":"Falcon",
5 | "description": "Search your full text browsing history by typing 'f' and space in the address bar to start searching your previously visited websites!",
6 | "version": "0.4.1",
7 | "content_scripts": [
8 | {
9 | "matches": [
10 | " FalconPreferences
10 |
11 |
12 | Falcon History
13 |
17 |
18 |
19 |
20 |
Blacklisted Websites
21 | Enter any valid regex
22 |
23 |
25 |
26 |
27 |
28 |
29 |
24 | Websites Type Enabled Danger Zone!
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/extension/assets/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
75 |
76 |
77 |
79 | Quick Blacklist
80 | Preferences
81 | GitHub
82 |
83 |
84 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🦅 Falcon
2 |
3 | Firefox extension for flexible full text browsing history search. **Press `f`, then `space`, in the omnibar to start searching your previously visited websites**!
4 |
5 | Every time you visit a website in Firefox, Falcon indexes all the text on the page so that the site can be easily found later. Then, for example, if you type `f
8 |
9 | ## Examples
10 |
11 | `before: "yesterday at 5pm" after: "three weeks ago" emscripten blog "anish athalye"`
12 | - Searches for websites that you browsed between yesterday at 5pm and 3 weeks ago containing the keywords "emscripten" and "blog" and "anish athalye"
13 |
14 | `-"cat food" just "a dog"`
15 | - Searches for websites you visited containing the keywords "just" and "a dog", and without the phrase "cat food".
16 |
17 | `ethereum medium`
18 | - Searches for websites you visited in the last 2 weeks containing the keywords "ethereum" and "medium"
19 |
20 | `ethereum medium after:11/29/2015 before:3/26/2016`
21 | - Searches for websites you visited between 11/29/2015 and 3/26/2016 containing the keywords "ethereum" and "medium"
22 |
23 | ## Transparent Installation
24 | If you don't feel comfortable installing a Firefox extension that can read and modify all data on the websites you visit from the webstore (we wouldn't either!), you can clone it on your local machine, read through our code to verify that it is not malicious, and sign it at addons.mozilla.org and install it. This way you also won't receive any automatic updates, as well.
25 |
26 | ## Preferences Page
27 | To manage which URLs Falcon can index, delete websites from the index, and more, go to the preferences page.
28 |
29 | 
30 |
31 | ## FAQ
32 | ### Will this index my bank statements?
33 | Nope! We have a blacklist of domains to avoid indexing that includes the majority of banking websites. You can edit them and add your own in the preferences page! If you accidentally visit a page that is indexed that you don't want indexed, you can go into preferences and delete it from your indexed pages.
34 |
35 | ### Do you store any information remotely?
36 | No, all information is stored locally! No data leaves your computer over the network.
37 |
38 | ## More Details
39 | - Use `before:date` and `after:date` to search your history in a certain time range
40 | - You can use natural language along with quotes to specify dates as well, e.g. `before:"yesterday at 5pm"`
41 | - Use quotations to look for exact matches of strings containing whitespace
42 | - Only documents containing all words will be returned
43 |
--------------------------------------------------------------------------------
/extension/js/queryparser.js:
--------------------------------------------------------------------------------
1 | var BEFORE = /(\s|^)before:/i;
2 | var AFTER = /(\s|^)after:/i;
3 | var NEGATIVE = /(\s|^)-/i;
4 | var QUOTEREGEX = /["']/;
5 | var DEFAULT_DATE_OFFSET = 14;
6 | var PARSERS = [parseDate, parseNegative, parseExact, parseKeywords, normalize];
7 |
8 | var CUTOFF_DATE = new Date();
9 | CUTOFF_DATE.setDate(CUTOFF_DATE.getDate() - DEFAULT_DATE_OFFSET);
10 |
11 | function extractTextBtwChars(i, text) { // i is index of first char
12 | var ch = text[i];
13 | if (!ch.match(QUOTEREGEX)) {
14 | ch = " ";
15 | }
16 | var next = text.indexOf(ch, i+1);
17 | var matched;
18 | if (next > -1) {
19 | matched = text.substring(i+1, next);
20 | text = text.substring(0,i) + " " + text.substring(next+1, text.length);
21 | } else if (ch === ' ') {
22 | matched = text.substring(i+1, text.length);
23 | text = text.substring(0,i);
24 | }
25 | else {
26 | return [false, false];
27 | }
28 |
29 | return [matched, text];
30 | }
31 |
32 | function getArgumentForRegex(text, regex) {
33 | var textLen = text.length;
34 | var quoteMap = new Array(textLen);
35 | var currQuote = false;
36 | var i = 0;
37 | while (i < textLen) {
38 | var ch = text[i];
39 | if (currQuote === false) {
40 | if (ch.match(QUOTEREGEX) != null) {
41 | currQuote = ch;
42 | }
43 | } else {
44 | if (ch === currQuote) {
45 | currQuote = false;
46 | }
47 | }
48 |
49 | quoteMap[i] = currQuote === false ? 0 : 1;
50 | i += 1;
51 | }
52 |
53 | var res = text.match(regex);
54 | var offset = 0;
55 | var pos;
56 | if (res != null) {
57 | pos = res.index;
58 | } else {
59 | pos = 0;
60 | }
61 |
62 | while (res !== null && quoteMap[pos] === 1) {
63 | offset += res.index + res.length;
64 | res = text.substring(offset+1,text.length).match(regex);
65 | pos = res.index + 1 + offset
66 | }
67 |
68 | if (res === null) return [false, false];
69 | if (offset > 0) res.index += offset + 1;
70 |
71 | var i = res.index + res[0].length;
72 | while (text[i] === ' ') {i += 1};
73 | if (i >= textLen) {
74 | return [false, false];
75 | }
76 |
77 | if (text[i].match(QUOTEREGEX)) {
78 | var [matched, text] = extractTextBtwChars(i, text);
79 | } else {
80 | var [matched, text] = extractTextBtwChars(i-1, text);
81 | }
82 |
83 | if (text === false) {
84 | return [false, false];
85 | }
86 |
87 | text = text.substring(0, res.index) + " " + text.substring(res.index + res[0].length, text.length);
88 | return [matched, text];
89 | }
90 |
91 | function getNextQuote(i, text) {
92 | var p1 = text.indexOf('"',i);
93 | var p2 = text.indexOf("'",i);
94 | if (p1 === -1) {
95 | return p2;
96 | }
97 |
98 | if (p2 === -1) {
99 | return p1;
100 | }
101 |
102 | return Math.min(p1, p2);
103 | }
104 |
105 | function parseExact(query) {
106 | var text = query.text;
107 | var i = getNextQuote(0, text);
108 | var next;
109 | while (i > -1) {
110 | var [matched, text] = extractTextBtwChars(i, text);
111 | if (text === false) {
112 | return query;
113 | }
114 |
115 | query.keywords.push(matched);
116 | query.text = text;
117 | i = getNextQuote(i, text);
118 | }
119 |
120 | query.text = text;
121 | return query;
122 | }
123 |
124 | function parseDate(query) {
125 | // chrono.parseDate('An appointment on Sep 12-13')
126 | var text = query.text;
127 | var regexes = {'before':BEFORE, 'after':AFTER};
128 | for (var arg in regexes) {
129 | var [match, textTmp] = getArgumentForRegex(text, regexes[arg]);
130 | if (match != false) {
131 | var date = chrono.parseDate(match);
132 | if (date != null) {
133 | query[arg] = date;
134 | query.shouldDate = true;
135 | }
136 | text = textTmp;
137 | }
138 | }
139 |
140 | query.text = text;
141 | return query;
142 | }
143 |
144 | function parseNegative(query) {
145 | // chrono.parseDate('An appointment on Sep 12-13')
146 | var text = query.text;
147 | while (true) {
148 | var [match, textTmp] = getArgumentForRegex(text, NEGATIVE);
149 | if (match != false) {
150 | query.negative.push(match);
151 | text = textTmp;
152 | } else {
153 | break;
154 | }
155 | }
156 |
157 | query.text = text;
158 | return query;
159 | }
160 |
161 | function parseKeywords(query) {
162 | var text = query.text;
163 | query.keywords = query.keywords.concat(query.text.trim().split(/\s+/));
164 | return query;
165 | }
166 |
167 | function normalize(query) {
168 | query.keywords = query.keywords.map(processPageText);
169 | return query;
170 | }
171 |
172 | function makeQueryFromText(text) {
173 | var query = {
174 | text: text,
175 | before: false,
176 | after: CUTOFF_DATE,
177 | keywords: [],
178 | negative:[],
179 | shouldDate: false // has the date been set
180 | }
181 |
182 | function reducer(prev, curr, index, arr) {
183 | return curr(prev);
184 | }
185 |
186 | return finalizedQuery = PARSERS.reduce(reducer, query);
187 | }
188 |
--------------------------------------------------------------------------------
/extension/assets/notie.css:
--------------------------------------------------------------------------------
1 | .notie-transition {
2 | -moz-transition: all 0.3s ease;
3 | -webkit-transition: all 0.3s ease;
4 | transition: all 0.3s ease; }
5 |
6 | .notie-background-success {
7 | background-color: #57BF57 !important; }
8 |
9 | .notie-background-warning {
10 | background-color: #D6A14D !important; }
11 |
12 | .notie-background-error {
13 | background-color: #E1715B !important; }
14 |
15 | .notie-background-info {
16 | background-color: #4D82D6 !important; }
17 |
18 | #notie-alert-outer, #notie-confirm-outer, #notie-input-outer, #notie-select-outer, #notie-date-outer {
19 | position: fixed;
20 | top: 0;
21 | left: 0;
22 | z-index: 999999999;
23 | height: auto;
24 | width: 100%;
25 | display: none;
26 | text-align: center;
27 | cursor: pointer;
28 | font-size: 24px;
29 | -o-box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.5);
30 | -ms-box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.5);
31 | -moz-box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.5);
32 | -webkit-box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.5);
33 | box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.5); }
34 | @media (max-width: 600px) {
35 | #notie-alert-outer, #notie-confirm-outer, #notie-input-outer, #notie-select-outer, #notie-date-outer {
36 | font-size: 18px; } }
37 |
38 | #notie-alert-inner {
39 | padding: 20px;
40 | display: table-cell; }
41 |
42 | #notie-alert-content {
43 | max-width: 900px;
44 | margin: 0 auto; }
45 |
46 | #notie-alert-text {
47 | color: #FFFFFF; }
48 |
49 | #notie-confirm-outer {
50 | cursor: default; }
51 |
52 | #notie-confirm-inner, #notie-input-inner, #notie-select-inner {
53 | box-sizing: border-box;
54 | width: 100%;
55 | padding: 20px;
56 | display: block;
57 | cursor: default;
58 | background-color: #4D82D6; }
59 |
60 | #notie-confirm-text {
61 | color: #FFFFFF; }
62 |
63 | #notie-confirm-text-yes {
64 | color: #FFFFFF; }
65 |
66 | #notie-confirm-text-no {
67 | color: #FFFFFF; }
68 |
69 | #notie-confirm-yes, #notie-confirm-no, #notie-input-no, #notie-input-yes {
70 | float: left;
71 | height: 50px;
72 | line-height: 50px;
73 | width: 50%;
74 | cursor: pointer;
75 | background-color: #57BF57; }
76 |
77 | #notie-confirm-no, #notie-input-no {
78 | float: right;
79 | background-color: #E1715B; }
80 |
81 | #notie-confirm-background, #notie-input-background, #notie-select-background, #notie-date-background {
82 | position: fixed;
83 | top: 0;
84 | left: 0;
85 | z-index: 999999980;
86 | height: 100%;
87 | width: 100%;
88 | display: none;
89 | background-color: #FFFFFF;
90 | opacity: 0; }
91 |
92 | /* INPUT */
93 | #notie-input-outer {
94 | cursor: default; }
95 |
96 | #notie-input-field {
97 | display: block;
98 | box-sizing: border-box;
99 | height: 55px;
100 | width: 100%;
101 | text-align: center;
102 | outline: 0;
103 | border: 0;
104 | background-color: #FFFFFF;
105 | font-family: inherit;
106 | font-size: 24px; }
107 | @media (max-width: 600px) {
108 | #notie-input-field {
109 | font-size: 18px; } }
110 |
111 | #notie-input-text {
112 | color: #FFFFFF; }
113 |
114 | #notie-input-text-yes {
115 | color: #FFFFFF; }
116 |
117 | #notie-input-text-no {
118 | color: #FFFFFF; }
119 |
120 | #notie-select-outer {
121 | top: auto;
122 | bottom: 0;
123 | cursor: default; }
124 |
125 | #notie-select-text {
126 | color: #FFFFFF; }
127 |
128 | #notie-select-choices, .notie-select-choice {
129 | background-color: #57BF57; }
130 |
131 | .notie-select-choice {
132 | height: 50px;
133 | line-height: 50px;
134 | color: #FFFFFF;
135 | cursor: pointer; }
136 |
137 | #notie-select-cancel {
138 | height: 60px;
139 | line-height: 60px;
140 | color: #FFFFFF;
141 | cursor: pointer;
142 | background-color: #A0A0A0; }
143 |
144 | .notie-select-choice-bottom-border {
145 | border-bottom: 1px solid rgba(255, 255, 255, 0.2); }
146 |
147 | #notie-date-outer {
148 | background-color: #4D82D6;
149 | color: #FFFFFF;
150 | cursor: default; }
151 |
152 | #notie-date-selector {
153 | margin: 0 auto;
154 | max-width: 900px;
155 | padding-left: 10px;
156 | padding-right: 10px; }
157 |
158 | .notie-date-up, .notie-date-down {
159 | height: 50px;
160 | float: left;
161 | width: 33.333333%;
162 | cursor: pointer; }
163 |
164 | .notie-date-arrow, .notie-date-arrow-up, .notie-date-arrow-down {
165 | height: 40px;
166 | width: 40px;
167 | background-size: 40px 40px;
168 | margin: 0 auto;
169 | margin-top: 5px; }
170 |
171 | .notie-date-arrow-up {
172 | background-image: url(data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiA/PjxzdmcgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDQ4IDQ4IiB3aWR0aD0iNDgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTE0IDI4bDEwLTEwIDEwIDEweiIgZmlsbD0id2hpdGUiLz48cGF0aCBkPSJNMCAwaDQ4djQ4aC00OHoiIGZpbGw9Im5vbmUiLz48L3N2Zz4=); }
173 |
174 | .notie-date-arrow-down {
175 | background-image: url("data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiA/PjxzdmcgaGVpZ2h0PSI0OCIgdmlld0JveD0iMCAwIDQ4IDQ4IiB3aWR0aD0iNDgiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTE0IDIwbDEwIDEwIDEwLTEweiIgZmlsbD0id2hpdGUiLz48cGF0aCBkPSJNMCAwaDQ4djQ4aC00OHoiIGZpbGw9Im5vbmUiLz48L3N2Zz4="); }
176 |
177 | .notie-date-text {
178 | height: 50px;
179 | line-height: 50px;
180 | float: left;
181 | width: 33.333333%; }
182 |
183 | #notie-date-yes, #notie-date-no {
184 | float: left;
185 | width: 50%;
186 | height: 50px;
187 | line-height: 50px;
188 | color: #FFFFFF;
189 | background-color: #57BF57;
190 | cursor: pointer; }
191 |
192 | #notie-date-no {
193 | background-color: #E1715B; }
--------------------------------------------------------------------------------
/extension/js/lib/stopwords_res.js:
--------------------------------------------------------------------------------
1 | var stops = ['a', 'able', 'about', 'above', 'abst', 'accordance', 'according', 'accordingly', 'across', 'act', 'actually', 'added', 'adj', 'affected', 'affecting', 'affects', 'after', 'afterwards', 'again', 'against', 'ah', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'an', 'and', 'announce', 'another', 'any', 'anybody', 'anyhow', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apparently', 'approximately', 'are', 'aren', 'arent', 'arise', 'around', 'as', 'aside', 'ask', 'asking', 'at', 'auth', 'available', 'away', 'awfully', 'b', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'begin', 'beginning', 'beginnings', 'begins', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'between', 'beyond', 'biol', 'both', 'brief', 'briefly', 'but', 'by', 'c', 'ca', 'came', 'can', 'cannot', "can't", 'cause', 'causes', 'certain', 'certainly', 'co', 'com', 'come', 'comes', 'contain', 'containing', 'contains', 'could', 'couldnt', 'd', 'date', 'did', "didn't", 'different', 'do', 'does', "doesn't", 'doing', 'done', "don't", 'down', 'downwards', 'due', 'during', 'e', 'each', 'ed', 'edu', 'effect', 'eg', 'eight', 'eighty', 'either', 'else', 'elsewhere', 'end', 'ending', 'enough', 'especially', 'et', 'et-al', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'except', 'f', 'far', 'few', 'ff', 'fifth', 'first', 'five', 'fix', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'found', 'four', 'from', 'further', 'furthermore', 'g', 'gave', 'get', 'gets', 'getting', 'give', 'given', 'gives', 'giving', 'go', 'goes', 'gone', 'got', 'gotten', 'h', 'had', 'happens', 'hardly', 'has', "hasn't", 'have', "haven't", 'having', 'he', 'hed', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'heres', 'hereupon', 'hers', 'herself', 'hes', 'hi', 'hid', 'him', 'himself', 'his', 'hither', 'home', 'how', 'howbeit', 'however', 'hundred', 'i', 'id', 'ie', 'if', "i'll", 'im', 'immediate', 'immediately', 'importance', 'important', 'in', 'inc', 'indeed', 'index', 'information', 'instead', 'into', 'invention', 'inward', 'is', "isn't", 'it', 'itd', "it'll", 'its', 'itself', "i've", 'j', 'just', 'k', 'keepkeeps', 'kept', 'kg', 'km', 'know', 'known', 'knows', 'l', 'largely', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'lets', 'like', 'liked', 'likely', 'line', 'little', "'ll", 'look', 'looking', 'looks', 'ltd', 'm', 'made', 'mainly', 'make', 'makes', 'many', 'may', 'maybe', 'me', 'mean', 'means', 'meantime', 'meanwhile', 'merely', 'mg', 'might', 'million', 'miss', 'ml', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'much', 'mug', 'must', 'my', 'myself', 'n', 'na', 'name', 'namely', 'nay', 'nd', 'near', 'nearly', 'necessarily', 'necessary', 'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'ninety', 'no', 'nobody', 'non', 'none', 'nonetheless', 'noone', 'nor', 'normally', 'nos', 'not', 'noted', 'nothing', 'now', 'nowhere', 'o', 'obtain', 'obtained', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'omitted', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'ord', 'other', 'others', 'otherwise', 'ought', 'our', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'owing', 'own', 'p', 'page', 'pages', 'part', 'particular', 'particularly', 'past', 'per', 'perhaps', 'placed', 'please', 'plus', 'poorly', 'possible', 'possibly', 'potentially', 'pp', 'predominantly', 'present', 'previously', 'primarily', 'probably', 'promptly', 'proud', 'provides', 'put', 'q', 'que', 'quickly', 'quite', 'qv', 'r', 'ran', 'rather', 'rd', 're', 'readily', 'really', 'recent', 'recently', 'ref', 'refs', 'regarding', 'regardless', 'regards', 'related', 'relatively', 'research', 'respectively', 'resulted', 'resulting', 'results', 'right', 'run', 's', 'said', 'same', 'saw', 'say', 'saying', 'says', 'sec', 'section', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', 'seen', 'self', 'selves', 'sent', 'seven', 'several', 'shall', 'she', 'shed', "she'll", 'shes', 'should', "shouldn't", 'show', 'showed', 'shown', 'showns', 'shows', 'significant', 'significantly', 'similar', 'similarly', 'since', 'six', 'slightly', 'so', 'some', 'somebody', 'somehow', 'someone', 'somethan', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specifically', 'specified', 'specify', 'specifying', 'still', 'stop', 'strongly', 'sub', 'substantially', 'successfully', 'such', 'sufficiently', 'suggest', 'sup', 'suret', 'take', 'taken', 'taking', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', "that'll", 'thats', "that've", 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'thered', 'therefore', 'therein', "there'll", 'thereof', 'therere', 'theres', 'thereto', 'thereupon', "there've", 'these', 'they', 'theyd', "they'll", 'theyre', "they've", 'think', 'this', 'those', 'thou', 'though', 'thoughh', 'thousand', 'throug', 'through', 'throughout', 'thru', 'thus', 'til', 'tip', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', 'ts', 'twice', 'two', 'u', 'un', 'under', 'unfortunately', 'unless', 'unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'ups', 'us', 'use', 'used', 'useful', 'usefully', 'usefulness', 'uses', 'using', 'usually', 'v', 'value', 'various', "'ve", 'very', 'via', 'viz', 'vol', 'vols', 'vs', 'w', 'want', 'wants', 'was', 'wasnt', 'way', 'we', 'wed', 'welcome', "we'll", 'went', 'were', 'werent', "we've", 'what', 'whatever', "what'll", 'whats', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'wheres', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whim', 'whither', 'who', 'whod', 'whoever', 'whole', "who'll", 'whom', 'whomever', 'whos', 'whose', 'why', 'widely', 'willing', 'wish', 'with', 'within', 'without', 'wont', 'words', 'world', 'would', 'wouldnt', 'www', 'x', 'y', 'yes', 'yet', 'you', 'youd', "you'll", 'your', 'youre', 'yours', 'yourself', 'yourselves', "you've", 'z', 'zero', 'I', 'will', "aren't", "couldn't", "hadn't", "he'd", "he'll", "he's", "here's", "how's", "i'd", "i'm", "it's", "let's", "mustn't", 'oursourselves', "shan't", "she'd", "she's", "that's", "there's", "they'd", "they're", "wasn't", "we'd", "we're", "weren't", "what's", "when's", "where's", "who's", "why's", "won't", "wouldn't", "you'd", "you're", 'amoungst', 'amount', 'bill', 'bottom', 'call', 'cant', 'computer', 'con', 'cry', 'de', 'describe', 'detail', 'eleven', 'empty', 'fifteen', 'fify', 'fill', 'find', 'fire', 'forty', 'front', 'full', 'hasnt', 'herse"', 'himse"', 'interest', 'itse"', 'keep', 'mill', 'mine', 'move', 'myse"', 'serious', 'side', 'sincere', 'sixty', 'system', 'ten', 'thick', 'thin', 'third', 'three', 'top', 'twelve', 'twenty', 'well']
2 |
--------------------------------------------------------------------------------
/extension/js/background.js:
--------------------------------------------------------------------------------
1 | var MILLIS_BEFORE_CLEAR = 1000 * 60; // 60 seconds
2 | var CLEAR_DELAY = 20000;
3 | var LT = function(a,b) {return a < b};
4 | var GT = function(a,b) {return a > b};
5 | var LT_OBJ = function(a,b) {
6 | return a.time < b.time;
7 | }
8 |
9 | var GT_OBJ = function(a,b) {
10 | return a.time > b.time;
11 | }
12 |
13 | Array.max = function( array ){
14 | return Math.max.apply(Math,array);
15 | };
16 |
17 | function ValidURL(text) {
18 | var valid = /((https?):\/\/)?(([w|W]{3}\.)+)?[a-zA-Z0-9\-\.]{3,}\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?/
19 | return valid.test(text);
20 | }
21 |
22 | chrome.omnibox.onInputChanged.addListener(omnibarHandler);
23 | chrome.omnibox.onInputEntered.addListener(acceptInput);
24 | chrome.runtime.onMessage.addListener(handleMessage);
25 | chrome.runtime.onInstalled.addListener(function (object) {
26 | chrome.storage.local.get("shouldOpenTab", function(item) {
27 | if (Object.keys(item).length == 0) {
28 | chrome.tabs.create({url: "https://github.com/cennoxx/falcon"}, function (tab) {
29 | });
30 | chrome.storage.local.set({"shouldOpenTab": {"dontShow": true}})
31 | }
32 | })
33 | });
34 |
35 | function acceptInput(text, disposition) {
36 | // disposition: "currentTab", "newForegroundTab", or "newBackgroundTab"
37 | if (!ValidURL(text)) {
38 | text = "assets/preferences.html?query=" + text;
39 | }
40 | switch (disposition) {
41 | case "currentTab":
42 | chrome.tabs.update({url: text});
43 | break;
44 | case "newForegroundTab":
45 | chrome.tabs.create({url: text});
46 | break;
47 | case "newBackgroundTab":
48 | chrome.tabs.create({url: text, active: false});
49 | break;
50 | }
51 | }
52 |
53 | function init() {
54 | window.preloaded = [];
55 | window.cache = {};
56 | chrome.storage.local.get(['blacklist', 'preferences'], function(items) {
57 | var obj = items['blacklist'];
58 | if (obj === undefined || !('PAGE' in obj && 'SITE' in obj && 'REGEX' in obj)) {
59 | window.blacklist = {'PAGE':[], 'REGEX':[], 'SITE':[]}; // show example in page
60 | chrome.storage.local.set({'blacklist':blacklist});
61 | } else {
62 | window.blacklist = obj;
63 | }
64 |
65 | var obj = items['preferences'];
66 | if (obj === undefined) {
67 | window.preferences = {};
68 | chrome.storage.local.set({'preferences':preferences});
69 | } else {
70 | window.preferences = obj;
71 | }
72 | });
73 |
74 | chrome.storage.local.get('index', function(items) {
75 | var obj = items['index'];
76 | if (obj === undefined) {
77 | window.timeIndex = [];
78 | chrome.storage.local.get(null, function(items) {
79 | for (var key in items) {
80 | if (key != 'index') {
81 | timeIndex.push(items[key].time.toString());
82 | }
83 | }
84 |
85 | timeIndex.sort(function(a,b) {return parseInt(a) - parseInt(b)}); // soonest last
86 | makePreloaded(timeIndex);
87 | chrome.storage.local.set({'index':{'index':timeIndex}});
88 | });
89 |
90 | } else {
91 | window.timeIndex = obj.index;
92 | makePreloaded(timeIndex);
93 | }
94 | });
95 | }
96 |
97 | function makePreloaded(index) {
98 | var preloaded_index = [];
99 | var millis = +CUTOFF_DATE;
100 | var i = Math.floor(binarySearch(index, millis, LT, GT, 0, index.length));
101 | for (var j = i; j < index.length; j++) {
102 | preloaded_index.push(index[j]);
103 | }
104 |
105 | chrome.storage.local.get(preloaded_index, function(items) {
106 | window.preloaded = [];
107 | for (var key in items) {
108 | preloaded.push(items[key]);
109 | }
110 |
111 | preloaded.sort(function(a,b){return a.time-b.time});
112 | });
113 | }
114 |
115 | function assert(condition, message) {
116 | if (!condition) {
117 | throw message || "Assertion failed";
118 | }
119 | }
120 |
121 | function handleMessage(data, sender, sendResponse) {
122 | // data is from message
123 | if (data.msg === 'pageContent' && shouldArchive(data)) {
124 | delete data.msg;
125 | data.text = processPageText(data.text);
126 | var time = data.time;
127 | var keyValue = {};
128 | keyValue[time] = data;
129 | chrome.storage.local.get(function(results) {
130 | for (key in results) {
131 | if (!isNaN(key) && (results[key].url == data.url) && results[key].text == data.text) {
132 | return;
133 | }
134 | }
135 | chrome.storage.local.set(keyValue, function() {
136 | console.log("Stored: " + data.title);
137 | });
138 | timeIndex.push(time.toString());
139 | preloaded.push(data);
140 | chrome.storage.local.set({'index':{'index':timeIndex}});
141 | });
142 |
143 | } else if (data.msg === 'setPreferences') {
144 | preferences = data.preferences;
145 | chrome.storage.local.set({'preferences':preferences});
146 | } else if (data.msg === 'setBlacklist') {
147 | blacklist = data.blacklist;
148 | chrome.storage.local.set({'blacklist':blacklist});
149 | }
150 | }
151 |
152 | function omnibarHandler(text, suggest) {
153 | dispatchSuggestions(text, suggestionsComplete, suggest);
154 | }
155 |
156 | function suggestionsComplete(suggestions, shouldDate, suggestCb) {
157 | var res = [];
158 | var i;
159 | for (i = 0; i < suggestions.length; i++) {
160 | var elem = suggestions[i];
161 | var date = new Date(elem.time).toISOString();
162 | var description = date.slice(0,10) + (shouldDate ? ', ' + date.slice(11,16) : '') + ': ' + escape(elem.title);
163 | res.push({content:elem.url, description:description});
164 | }
165 | if (res.length > 0) {
166 | chrome.omnibox.setDefaultSuggestion({description: "Tip: Use time filters, example: before:\"2 weeks ago\""});
167 | } else {
168 | chrome.omnibox.setDefaultSuggestion({description: "No results found"})
169 | }
170 | suggestCb(res);
171 | window.setTimeout(clearCache, CLEAR_DELAY);
172 | }
173 |
174 | function clearCache() {
175 | return;
176 | var now = +(new Date());
177 |
178 | for (var time in cache) {
179 | if (now - parseInt(time) > MILLIS_BEFORE_CLEAR) {
180 | delete cache[time];
181 | }
182 | }
183 | }
184 |
185 | function escapeRegExp(str) {
186 | return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
187 | }
188 |
189 | function shouldArchive(data) {
190 | var site = blacklist["SITE"];
191 | var page = blacklist["PAGE"];
192 | var regex = blacklist["REGEX"];
193 | var url = data.url;
194 |
195 | site = site.concat(DEFAULT_BLACKLIST);
196 | for (var i = 0; i < site.length; i++) {
197 | if (site[i].split('/').length > 2 && url.indexOf(site[i].split('/')[2]) != -1) {
198 | return false;
199 | }
200 | }
201 |
202 | for (var i = 0; i < page.length; i++) {
203 | if (cleanURL(data.url) == page[i]) {
204 | return false;
205 | }
206 | }
207 |
208 | for (var i = 0; i < regex.length; i++) {
209 | if (url.match(regex[i]) != null) {
210 | return false;
211 | }
212 | }
213 |
214 | return true;
215 | }
216 |
217 | function makeSuggestions(query, candidates, cb, suggestCb) {
218 | var res = [];
219 | var urls = {};
220 | var keywords = query.keywords;
221 | var keywordsLen = keywords.length;
222 | var negative = query.negative;
223 | var negativeLen = negative.length;
224 | var j = 0;
225 | for (var i = candidates.length - 1; i > -1; i--) {
226 | var text = candidates[i].text;
227 | var isMatching = true;
228 | for (var k = 0; k < negativeLen; k++) {
229 | if (text.indexOf(negative[k]) > -1) {
230 | isMatching = false;
231 | }
232 | }
233 |
234 | if (isMatching) {
235 | for (var k = 0; k < keywordsLen; k++) {
236 | if (text.indexOf(keywords[k]) === -1) {
237 | isMatching = false;
238 | break;
239 | }
240 | }
241 |
242 | if (isMatching) {
243 | var cleanedURL = cleanURL(candidates[i].url);
244 | if (!(cleanedURL in urls)) {
245 | res.push(candidates[i]);
246 | urls[cleanedURL] = true;
247 | j += 1;
248 | if (j === 6) {
249 | break;
250 | }
251 | }
252 | }
253 | }
254 | }
255 |
256 | cb(res,query.shouldDate,suggestCb);
257 | }
258 |
259 | function cleanURL(url) {
260 | return url.trim().replace(/(#.+?)$/, '');
261 | }
262 |
263 | function dispatchSuggestions(text, cb, suggestCb) {
264 | var query = makeQueryFromText(text);
265 | if (query.before !== false && query.after !== false && query.after >= query.before) return;
266 |
267 | query.keywords.sort(function(a,b){return b.length-a.length});
268 |
269 | if (query.after >= CUTOFF_DATE) {
270 | var start = Math.floor(binarySearch(preloaded, {'time':+query.after}, LT_OBJ,
271 | GT_OBJ, 0, preloaded.length));
272 | var end;
273 | if (query.before) {
274 | end = Math.ceil(binarySearch(preloaded, {'time':+query.before}, LT_OBJ,
275 | GT_OBJ, 0, preloaded.length));
276 | } else {
277 | end = preloaded.length;
278 | }
279 |
280 | makeSuggestions(query, preloaded.slice(start, end), cb, suggestCb)
281 | } else {
282 | var start = Math.floor(binarySearch(timeIndex, +query.after, LT,
283 | GT, 0, timeIndex.length));
284 | var end;
285 | if (query.before) {
286 | end = Math.ceil(binarySearch(timeIndex, +query.before, LT,
287 | GT, 0, timeIndex.length));
288 | } else {
289 | end = timeIndex.length;
290 | }
291 |
292 | window.sorted = [];
293 | var get = timeIndex.slice(start, end);
294 | var index = Math.ceil(binarySearch(get, +CUTOFF_DATE, LT, GT, 0, get.length));
295 | if (index < get.length) {
296 | sorted = preloaded.slice(0, get.length - index + 1);
297 | }
298 | get = get.slice(0,index);
299 |
300 | chrome.storage.local.get(get, function(items) {
301 | for (var key in items) {
302 | sorted.push(items[key]);
303 | }
304 | sorted.sort(function(a,b) {return a.time - b.time});
305 | makeSuggestions(query, sorted, cb, suggestCb);
306 | });
307 | }
308 | }
309 |
310 | function binarySearch(arr, value, lt, gt, i, j) {
311 | if (Math.abs(j - i) <= 1) {
312 | return (i + j)/2;
313 | }
314 |
315 | var m = Math.floor((i + j)/2)
316 | var cmpVal = arr[m];
317 | if (gt(cmpVal, value)) {
318 | j = m;
319 | } else if (lt(cmpVal, value)){
320 | i = m;
321 | } else {
322 | return m;
323 | }
324 | return binarySearch(arr, value, lt, gt, i, j);
325 | }
326 |
327 | init();
328 |
--------------------------------------------------------------------------------
/extension/js/preferences.js:
--------------------------------------------------------------------------------
1 | (function() {
2 | var allPageDisplay = null;
3 |
4 | var add = function(type, content) {
5 | var tab = document.getElementById('blacklist_tbl')
6 | var row = tab.insertRow()
7 | var stringCell = row.insertCell()
8 | stringCell.textContent = content ? content : ''
9 | stringCell.contentEditable = true
10 | stringCell.setAttribute('placeholder', 'Add a site \u2026');
11 |
12 | var typeCell = row.insertCell()
13 | var selectCell = document.createElement('select');
14 | var option1 = document.createElement('option');
15 | var option2 = document.createElement('option');
16 | var option3 = document.createElement('option');
17 | option1.value = 'PAGE';
18 | option2.value = 'SITE';
19 | option3.value = 'REGEX';
20 | option1.textContent = 'Specific Page';
21 | option2.textContent = 'Entire Website';
22 | option3.textContent = 'Regex';
23 | selectCell.appendChild(option1);
24 | selectCell.appendChild(option2);
25 | selectCell.appendChild(option3);
26 | selectCell.value = type
27 |
28 | typeCell.appendChild(selectCell);
29 |
30 | var enabledCell = row.insertCell()
31 | var checkbox = document.createElement('input');
32 | checkbox.type = 'checkbox';
33 | checkbox.checked = true;
34 | enabledCell.appendChild(checkbox);
35 |
36 | var deleteCell = row.insertCell();
37 | deleteCell.setAttribute('class', 'right aligned');
38 | var deleteThisCell = document.createElement('a');
39 | deleteThisCell.setAttribute('class', 'mini ui button red');
40 | deleteThisCell.textContent = 'Delete'
41 | deleteThisCell.onclick = function(e) {
42 | var r = e.target.parentElement.parentElement
43 | r.parentNode.removeChild(r);
44 | }
45 | deleteCell.appendChild(deleteThisCell);
46 | }
47 |
48 | function cutString(stringToCut) {
49 | if (stringToCut.length == 0)
50 | return 'No title'
51 | if (stringToCut.length <= 75)
52 | return stringToCut
53 | return stringToCut.slice(0, 75) + '\u2026'
54 | }
55 |
56 | function addHistoricPages(pages) {
57 | var history_table = document.getElementById('history_tbl')
58 | for(i in pages) {
59 | var thisRow = document.createElement('tr')
60 | var historyEntryColumn = document.createElement('td');
61 | var historyTitle = document.createElement('b');
62 | historyTitle.textContent = cutString(pages[i].title);
63 | var historyLineBreak = document.createElement('br');
64 | var historyLink = document.createElement('a');
65 | historyLink.href = pages[i].url;
66 | historyLink.target = '_blank';
67 | historyLink.textContent = cutString(pages[i].url);
68 | historyEntryColumn.appendChild(historyTitle);
69 | historyEntryColumn.appendChild(historyLineBreak);
70 | historyEntryColumn.appendChild(historyLink);
71 |
72 | var dateColumn = document.createElement('td')
73 | dateColumn.textContent = new Date(pages[i].time).toISOString().replace('T','\u00A0').slice(0,16)
74 |
75 | var exportColumn = document.createElement('td')
76 | var exportButton = document.createElement('a')
77 | exportButton.setAttribute('class', 'ui button mini blue');
78 | exportButton.textContent = 'Export';
79 | exportButton.onclick = function(e) {
80 | var r = e.target.parentElement.parentElement
81 | chrome.storage.local.get([r.id.toString()], result => {
82 | var a = document.createElement('a');
83 | var file = new Blob([JSON.stringify(result[r.id.toString()])], {type: 'application/json'});
84 | a.href = URL.createObjectURL(file);
85 | a.download = result[r.id.toString()].url.split('/')[2] + '_' + r.id.toString() + '.json';
86 | a.click();
87 | });
88 | }
89 | exportColumn.appendChild(exportButton)
90 |
91 | var deleteColumn = document.createElement('td')
92 | var deleteButton = document.createElement('a')
93 | deleteButton.setAttribute('class', 'ui button mini red');
94 | deleteButton.textContent = 'Delete'
95 | deleteButton.onclick = function(e) {
96 | var r = e.target.parentElement.parentElement
97 | chrome.storage.local.remove(r.id)
98 | notie.alert(4, 'Page deleted.', 2)
99 | r.parentNode.removeChild(r)
100 | }
101 | deleteColumn.appendChild(deleteButton)
102 |
103 | thisRow.appendChild(historyEntryColumn)
104 | thisRow.appendChild(dateColumn)
105 | thisRow.appendChild(exportColumn)
106 | thisRow.appendChild(deleteColumn)
107 |
108 | thisRow.id = pages[i].time;
109 | history_table.appendChild(thisRow)
110 | }
111 | }
112 |
113 | function normalize(str) {
114 | return str.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase();
115 | }
116 |
117 | function getHistory(query = '') {
118 | var history_table = document.getElementById('history_tbl')
119 | while (history_table.hasChildNodes()) {
120 | history_table.removeChild(history_table.lastChild);
121 | }
122 | chrome.storage.local.get(function(results) {
123 | var allPages = []
124 | var queryParts = query.match(/(?:[^\s"]+|"[^"]*")+/g)?.map(i => i.replace(/"/g, ''));
125 | for (key in results) {
126 | if (!isNaN(key) && (results[key].url + '/' + results[key].title + '/' + results[key].text).indexOf(query) != -1) {
127 | allPages.push(results[key])
128 | } else if (!isNaN(key) && queryParts.every(i => normalize(results[key].text).indexOf(normalize(i)) != -1)) {
129 | allPages.push(results[key])
130 | }
131 | }
132 | if (allPages.length == 0){
133 | while (history_table.hasChildNodes()) {
134 | history_table.removeChild(history_table.lastChild);
135 | }
136 | var emptyRow = document.createElement('tr');
137 | var emptyColumn = document.createElement('td');
138 | emptyColumn.setAttribute('class','center aligned');
139 | emptyColumn.textContent = 'No entry found';
140 | emptyRow.appendChild(emptyColumn);
141 | history_table.appendChild(emptyRow);
142 | }
143 | allPages.reverse()
144 | allPageDisplay = nextPages(allPages)
145 | addHistoricPages(allPageDisplay.next().value)
146 | })
147 | }
148 |
149 | function* nextPages(allPages){
150 | while(true)
151 | yield allPages.splice(0, 20)
152 | }
153 |
154 | chrome.storage.local.get('blacklist', function(result) {
155 | var bl = result.blacklist
156 | if (Object.keys(bl).length > 0 && (bl['SITE'].length + bl['PAGE'].length + bl['REGEX'].length > 0)) {
157 | var tab = document.getElementById('blacklist_tbl')
158 | var fields = ['SITE', 'PAGE', 'REGEX']
159 | for (var j = 0; j < fields.length; j++) {
160 | for (var i = 0; i < bl[fields[j]].length; i++) {
161 | add(fields[j], bl[fields[j]][i])
162 | }
163 | }
164 | } else {
165 | save(false);
166 | }
167 | });
168 |
169 | function save(showAlert) {
170 | var showAlert = (typeof showAlert !== 'undefined') ? showAlert : true;
171 | if (showAlert) { notie.alert(4, 'Saved Preferences.', 2); }
172 | var tab = document.getElementById('blacklist_tbl');
173 | var indices = [];
174 | for (var i = 1; i < tab.rows.length; i++) {
175 | var row = tab.rows[i]
176 | if (row.cells[0].innerHTML === '') {
177 | indices.push(i)
178 | }
179 | }
180 |
181 | for (var j = indices.length-1; j > -1; j--) {
182 | tab.deleteRow(indices[j]);
183 | }
184 |
185 | if (tab.rows.length == 1) {
186 | chrome.runtime.sendMessage({
187 | 'msg': 'setBlacklist',
188 | 'blacklist': []
189 | });
190 | add('SITE', '');
191 | } else {
192 | var b = {
193 | 'SITE': [],
194 | 'PAGE': [],
195 | 'REGEX': []
196 | }
197 | for(var i = 1; i < tab.rows.length; i++) {
198 | b[tab.rows[i].cells[1].childNodes[0].value].push(tab.rows[i].cells[0].innerHTML)
199 | }
200 |
201 | chrome.runtime.sendMessage({
202 | 'msg': 'setBlacklist',
203 | 'blacklist': b
204 | })
205 | }
206 | }
207 |
208 | function loadMore() {
209 | addHistoricPages(allPageDisplay.next().value);
210 | }
211 |
212 | function clearAllData() {
213 | chrome.storage.local.clear();
214 | notie.alert(1, 'Deleted All Data. Restarting Falcon …', 2);
215 | setTimeout(function() {
216 | chrome.runtime.reload()
217 | }, 2000);
218 | }
219 |
220 | function clearRules() {
221 | chrome.storage.local.get(['blacklist'], function(items) {
222 | var blacklist = items['blacklist'];
223 | blacklist['SITE'] = [];
224 | chrome.storage.local.set({'blacklist':blacklist});
225 | });
226 | notie.alert(1, 'Deleted Rules. Restarting Falcon …', 2);
227 | setTimeout(function() {
228 | chrome.runtime.reload()
229 | }, 2000);
230 | }
231 |
232 | function clearHistory() {
233 | chrome.storage.local.get(function(results) {
234 | var timestaps = results['index']['index'];
235 | for(key in timestaps){
236 | chrome.storage.local.remove(timestaps[key]);
237 | }
238 | chrome.storage.local.set({'index':{'index':[]}});
239 | });
240 | notie.alert(1, 'Deleted History. Restarting Falcon …', 2);
241 | setTimeout(function() {
242 | chrome.runtime.reload()
243 | }, 2000);
244 | }
245 |
246 | document.addEventListener('DOMContentLoaded', function(event){
247 | var query = unescape(location.search?.substring(7).replace(/(before|after): ?([^" ]+|"[^"]+") ?/g,''));
248 | document.getElementById('search_history').value = query;
249 | getHistory(query);
250 |
251 | document.getElementById('save').onclick = save;
252 | document.getElementById('add').onclick = add;
253 | document.getElementById('loadmore').onclick = loadMore;
254 |
255 | document.getElementById('clear').onclick = function () {
256 | notie.confirm('Are you sure you want to do that?', 'Yes', 'Cancel', function() {
257 | clearAllData();
258 | });
259 | }
260 |
261 | document.getElementById('clear-rules').onclick = function () {
262 | notie.confirm('Are you sure you want to do that?', 'Yes', 'Cancel', function() {
263 | clearRules();
264 | });
265 | }
266 |
267 | document.getElementById('clear-history').onclick = function () {
268 | notie.confirm('Are you sure you want to do that?', 'Yes', 'Cancel', function() {
269 | clearHistory();
270 | });
271 | }
272 |
273 | document.getElementById('search_history').onkeyup = function () {
274 | getHistory(document.getElementById('search_history').value);
275 | }
276 | });
277 | })();
278 |
--------------------------------------------------------------------------------
/extension/js/lib/notie.min.js:
--------------------------------------------------------------------------------
1 | var notie=function(){function e(e){for(var t in e)L[t]=e[t]}function t(e,t,i){L.colorText.length>0&&(S.style.color=L.colorText),H(),J++,setTimeout(function(){J--},L.animationDelay+10),1===J&&(Y?n(function(){o(e,t,i)}):o(e,t,i))}function o(e,t,o){Y=!0;var i=0;switch(i="undefined"==typeof o||0===o?864e5:o>0&&1>o?1e3:1e3*o,D(N,"notie-background-success"),D(N,"notie-background-warning"),D(N,"notie-background-error"),D(N,"notie-background-info"),e){case 1:case"success":L.colorSuccess.length>0?N.style.backgroundColor=L.colorSuccess:x(N,"notie-background-success");break;case 2:case"warning":L.colorWarning.length>0?N.style.backgroundColor=L.colorWarning:x(N,"notie-background-warning");break;case 3:case"error":L.colorError.length>0?N.style.backgroundColor=L.colorError:x(N,"notie-background-error");break;case 4:case"info":L.colorInfo.length>0?N.style.backgroundColor=L.colorInfo:x(N,"notie-background-info")}S.innerHTML=t,N.style.top="-10000px",N.style.display="table",N.style.top="-"+N.offsetHeight-5+"px",F=setTimeout(function(){x(N,"notie-transition"),N.style.top=0,W=setTimeout(function(){n(function(){})},i)},20)}function n(e){clearTimeout(F),clearTimeout(W),N.style.top="-"+N.offsetHeight-5+"px",setTimeout(function(){D(N,"notie-transition"),N.style.top="-10000px",Y=!1,e&&e()},L.animationDelay+10)}function i(e,t,o,i,l){L.colorInfo.length>0&&(j.style.backgroundColor=L.colorInfo),L.colorSuccess.length>0&&(B.style.backgroundColor=L.colorSuccess),L.colorError.length>0&&(K.style.backgroundColor=L.colorError),L.colorText.length>0&&(z.style.color=L.colorText,R.style.color=L.colorText,V.style.color=L.colorText),H(),Y?n(function(){c(e,t,o,i,l)}):c(e,t,o,i,l)}function c(e,t,o,n,i){function c(){z.innerHTML=e,R.innerHTML=t,V.innerHTML=o,O.style.top="-10000px",O.style.display="table",O.style.top="-"+O.offsetHeight-5+"px",$.style.display="block",setTimeout(function(){x(O,"notie-transition"),O.style.top=0,$.style.opacity="0.75",setTimeout(function(){q=!0},L.animationDelay+10)},20)}M(),B.onclick=function(){l(),n&&setTimeout(function(){n()},L.animationDelay+10)},K.onclick=function(){l(),i&&setTimeout(function(){i()},L.animationDelay+10)},q?(l(),setTimeout(function(){c()},L.animationDelay+10)):c()}function l(){O.style.top="-"+O.offsetHeight-5+"px",$.style.opacity="0",setTimeout(function(){D(O,"notie-transition"),O.style.top="-10000px",$.style.display="none",w(),q=!1},L.animationDelay+10)}function a(e,t,o,i,c,l){L.colorInfo.length>0&&(Q.style.backgroundColor=L.colorInfo),L.colorSuccess.length>0&&(X.style.backgroundColor=L.colorSuccess),L.colorError.length>0&&(Z.style.backgroundColor=L.colorError),L.colorText.length>0&&(_.style.color=L.colorText,ee.style.color=L.colorText,te.style.color=L.colorText),H(),U.setAttribute("autocapitalize",e.autocapitalize||"none"),U.setAttribute("autocomplete",e.autocomplete||"off"),U.setAttribute("autocorrect",e.autocorrect||"off"),U.setAttribute("autofocus",e.autofocus||"true"),U.setAttribute("inputmode",e.inputmode||"verbatim"),U.setAttribute("max",e.max||""),U.setAttribute("maxlength",e.maxlength||""),U.setAttribute("min",e.min||""),U.setAttribute("minlength",e.minlength||""),U.setAttribute("placeholder",e.placeholder||""),U.setAttribute("spellcheck",e.spellcheck||"default"),U.setAttribute("step",e.step||"any"),U.setAttribute("type",e.type||"text"),U.value=e.prefilledValue||"",Y?n(function(){r(t,o,i,c,l)}):r(t,o,i,c,l)}function r(e,t,o,n,i){function c(){_.innerHTML=e,ee.innerHTML=t,te.innerHTML=o,G.style.top="-10000px",G.style.display="table",G.style.top="-"+G.offsetHeight-5+"px",P.style.display="block",setTimeout(function(){x(G,"notie-transition"),G.style.top=0,P.style.opacity="0.75",setTimeout(function(){oe=!0,U.focus()},L.animationDelay+10)},20)}M(),X.onclick=function(){d(),n&&setTimeout(function(){n(U.value)},L.animationDelay+10)},Z.onclick=function(){d(),i&&setTimeout(function(){i(U.value)},L.animationDelay+10)},oe?(d(),setTimeout(function(){c()},L.animationDelay+10)):c()}function d(){G.style.top="-"+G.offsetHeight-5+"px",P.style.opacity="0",setTimeout(function(){D(G,"notie-transition"),P.style.display="none",G.style.top="-10000px",w(),oe=!1},L.animationDelay+10)}function s(e,t,o){L.colorInfo.length>0&&(ie.style.backgroundColor=L.colorInfo),L.colorNeutral.length>0&&(re.style.backgroundColor=L.colorNeutral),L.colorText.length>0&&(ce.style.color=L.colorText,re.style.color=L.colorText),H(),Y?n(function(){u(e,t,o)}):u(e,t,o)}function u(e,t,o){function n(e){ce.innerHTML=e,ne.style.bottom="-10000px",ne.style.display="table",ne.style.bottom="-"+ne.offsetHeight-5+"px",le.style.display="block",setTimeout(function(){x(ne,"notie-transition"),ne.style.bottom=0,le.style.opacity="0.75",setTimeout(function(){de=!0},L.animationDelay+10)},20)}M(),document.getElementById("notie-select-choices").innerHTML="",re.innerHTML=t;for(var i,c=0;c
]*>[ \n\r\t]*){2,}/gi,
53 | replaceFonts: /<(\/?)font[^>]*>/gi,
54 | trim: /^\s+|\s+$/g,
55 | normalize: /\s{2,}/g,
56 | killBreaks: /(
(\s| ?)*){1,}/g,
57 | videos: /http:\/\/(www\.)?(youtube|vimeo)\.com/i,
58 | skipFootnoteLink: /^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i,
59 | nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, // Match: next, continue, >, >>, » but not >|, »| as those usually mean last.
60 | prevLink: /(prev|earl|old|new|<|«)/i
61 | },
62 |
63 | /**
64 | * Run any post-process modifications to article content as necessary.
65 | *
66 | * @param Element
67 | * @return void
68 | **/
69 | postProcessContent: function(articleContent) {
70 | if(readability.convertLinksToFootnotes && !window.location.href.match(/wikipedia\.org/g)) {
71 | readability.addFootnotes(articleContent);
72 | }
73 |
74 | readability.fixImageFloats(articleContent);
75 | },
76 |
77 | /**
78 | * Some content ends up looking ugly if the image is too large to be floated.
79 | * If the image is wider than a threshold (currently 55%), no longer float it,
80 | * center it instead.
81 | *
82 | * @param Element
83 | * @return void
84 | **/
85 | fixImageFloats: function (articleContent) {
86 | var imageWidthThreshold = Math.min(articleContent.offsetWidth, 800) * 0.55,
87 | images = articleContent.getElementsByTagName('img');
88 |
89 | for(var i=0, il = images.length; i < il; i+=1) {
90 | var image = images[i];
91 |
92 | if(image.offsetWidth > imageWidthThreshold) {
93 | image.className += " blockImage";
94 | }
95 | }
96 | },
97 |
98 | /**
99 | * Get the article tools Element that has buttons like reload, print.
100 | *
101 | * @return void
102 | **/
103 | getArticleTools: function () {
104 | var articleTools = document.createElement("DIV");
105 |
106 | articleTools.id = "readTools";
107 | articleTools.innerHTML =
108 | "Reload Original Page" +
109 | "Print Page"
110 |
111 | return articleTools;
112 | },
113 |
114 | /**
115 | * retuns the suggested direction of the string
116 | *
117 | * @return "rtl" || "ltr"
118 | **/
119 | getSuggestedDirection: function(text) {
120 | function sanitizeText() {
121 | return text.replace(/@\w+/, "");
122 | }
123 |
124 | function countMatches(match) {
125 | var matches = text.match(new RegExp(match, "g"));
126 | return matches !== null ? matches.length : 0;
127 | }
128 |
129 | function isRTL() {
130 | var count_heb = countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]");
131 | var count_arb = countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]");
132 |
133 | // if 20% of chars are Hebrew or Arbic then direction is rtl
134 | return (count_heb + count_arb) * 100 / text.length > 20;
135 | }
136 |
137 | text = sanitizeText(text);
138 | return isRTL() ? "rtl" : "ltr";
139 | },
140 |
141 |
142 | /**
143 | * Get the article title as an H1.
144 | *
145 | * @return void
146 | **/
147 | getArticleTitle: function (doc) {
148 | var curTitle = "",
149 | origTitle = "";
150 |
151 | try {
152 | curTitle = origTitle = doc.title;
153 |
154 | if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */
155 | curTitle = origTitle = readability.getInnerText(doc.getElementsByTagName('title')[0]);
156 | }
157 | }
158 | catch(e) {}
159 |
160 | if(curTitle.match(/ [\|\-] /))
161 | {
162 | curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1');
163 |
164 | if(curTitle.split(' ').length < 3) {
165 | curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1');
166 | }
167 | }
168 | else if(curTitle.indexOf(': ') !== -1)
169 | {
170 | curTitle = origTitle.replace(/.*:(.*)/gi, '$1');
171 |
172 | if(curTitle.split(' ').length < 3) {
173 | curTitle = origTitle.replace(/[^:]*[:](.*)/gi,'$1');
174 | }
175 | }
176 | else if(curTitle.length > 150 || curTitle.length < 15)
177 | {
178 | var hOnes = doc.getElementsByTagName('h1');
179 | if(hOnes.length === 1)
180 | {
181 | curTitle = readability.getInnerText(hOnes[0]);
182 | }
183 | }
184 |
185 | curTitle = curTitle.replace( readability.regexps.trim, "" );
186 |
187 | if(curTitle.split(' ').length <= 4) {
188 | curTitle = origTitle;
189 | }
190 | return curTitle;
191 | },
192 |
193 | /**
194 | * Get the footer with the readability mark etc.
195 | *
196 | * @return void
197 | **/
198 | getArticleFooter: function () {
199 | var articleFooter = document.createElement("DIV");
200 | articleFooter.id = "readFooter";
201 | articleFooter.innerHTML = [
202 | "",
203 | ""].join('');
213 |
214 | return articleFooter;
215 | },
216 |
217 | /**
218 | * Prepare the HTML document for readability to scrape it.
219 | * This includes things like stripping javascript, CSS, and handling terrible markup.
220 | *
221 | * @return void
222 | **/
223 | prepDocument: function (doc) {
224 | /**
225 | * In some cases a body element can't be found (if the HTML is totally hosed for example)
226 | * so we create a new body node and append it to the document.
227 | */
228 | if(!doc.body)
229 | {
230 | doc.innerHTML = "" + doc.innerHTML + ""
231 | /*var body = doc.createElement("body");
232 | try {
233 | doc.body = body;
234 | }
235 | catch(e) {
236 | doc.documentElement.appendChild(body);
237 | dbg(e);
238 | }*/
239 | }
240 |
241 | doc.body.id = "readabilityBody";
242 |
243 | var frames = doc.getElementsByTagName('frame');
244 | if(frames.length > 0)
245 | {
246 | var bestFrame = null;
247 | var bestFrameSize = 0; /* The frame to try to run readability upon. Must be on same domain. */
248 | var biggestFrameSize = 0; /* Used for the error message. Can be on any domain. */
249 | for(var frameIndex = 0; frameIndex < frames.length; frameIndex+=1)
250 | {
251 | var frameSize = frames[frameIndex].offsetWidth + frames[frameIndex].offsetHeight;
252 | var canAccessFrame = false;
253 | try {
254 | var frameBody = frames[frameIndex].contentWindow.document.body;
255 | canAccessFrame = true;
256 | }
257 | catch(eFrames) {
258 | dbg(eFrames);
259 | }
260 |
261 | if(frameSize > biggestFrameSize) {
262 | biggestFrameSize = frameSize;
263 | readability.biggestFrame = frames[frameIndex];
264 | }
265 |
266 | if(canAccessFrame && frameSize > bestFrameSize)
267 | {
268 | readability.frameHack = true;
269 |
270 | bestFrame = frames[frameIndex];
271 | bestFrameSize = frameSize;
272 | }
273 | }
274 |
275 | if(bestFrame)
276 | {
277 | var newBody = doc.createElement('body');
278 | newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML;
279 | newBody.style.overflow = 'scroll';
280 | doc.body = newBody;
281 |
282 |
283 | var frameset = doc.getElementsByTagName('frameset')[0];
284 | if(frameset) {
285 | frameset.parentNode.removeChild(frameset); }
286 | }
287 | }
288 |
289 | /* Remove all stylesheets */
290 | for (var k=0;k < doc.styleSheets.length; k+=1) {
291 | if (doc.styleSheets[k].href !== null && doc.styleSheets[k].href.lastIndexOf("readability") === -1) {
292 | doc.styleSheets[k].disabled = true;
293 | }
294 | }
295 |
296 | /* Remove all style tags in head (not doing this on IE) - TODO: Why not? */
297 | var styleTags = doc.getElementsByTagName("style");
298 | for (var st=0;st < styleTags.length; st+=1) {
299 | styleTags[st].textContent = "";
300 | }
301 |
302 | /* Turn all double br's into p's */
303 | /* Note, this is pretty costly as far as processing goes. Maybe optimize later. */
304 | doc.body.innerHTML = doc.body.innerHTML.replace(readability.regexps.replaceBrs, '
').replace(readability.regexps.replaceFonts, '<$1span>'); 305 | }, 306 | 307 | /** 308 | * For easier reading, convert this document to have footnotes at the bottom rather than inline links. 309 | * @see http://www.roughtype.com/archives/2010/05/experiments_in.php 310 | * 311 | * @return void 312 | **/ 313 | addFootnotes: function(articleContent) { 314 | var footnotesWrapper = document.getElementById('readability-footnotes'), 315 | articleFootnotes = document.getElementById('readability-footnotes-list'); 316 | 317 | if(!footnotesWrapper) { 318 | footnotesWrapper = document.createElement("DIV"); 319 | footnotesWrapper.id = 'readability-footnotes'; 320 | footnotesWrapper.innerHTML = '
tags, etc.
386 | *
387 | * @param Element
388 | * @return void
389 | **/
390 | prepArticle: function (articleContent) {
391 | readability.cleanStyles(articleContent);
392 | readability.killBreaks(articleContent);
393 |
394 | /* Clean out junk from the article content */
395 | readability.cleanConditionally(articleContent, "form");
396 | readability.clean(articleContent, "object");
397 | readability.clean(articleContent, "h1");
398 |
399 | /**
400 | * If there is only one h2, they are probably using it
401 | * as a header and not a subheader, so remove it since we already have a header.
402 | ***/
403 | if(articleContent.getElementsByTagName('h2').length === 1) {
404 | readability.clean(articleContent, "h2");
405 | }
406 | readability.clean(articleContent, "iframe");
407 |
408 | readability.cleanHeaders(articleContent);
409 |
410 | /* Do these last as the previous stuff may have removed junk that will affect these */
411 | readability.cleanConditionally(articleContent, "table");
412 | readability.cleanConditionally(articleContent, "ul");
413 | readability.cleanConditionally(articleContent, "div");
414 |
415 | /* Remove extra paragraphs */
416 | var articleParagraphs = articleContent.getElementsByTagName('p');
417 | for(var i = articleParagraphs.length-1; i >= 0; i-=1) {
418 | var imgCount = articleParagraphs[i].getElementsByTagName('img').length;
419 | var embedCount = articleParagraphs[i].getElementsByTagName('embed').length;
420 | var objectCount = articleParagraphs[i].getElementsByTagName('object').length;
421 |
422 | if(imgCount === 0 && embedCount === 0 && objectCount === 0 && readability.getInnerText(articleParagraphs[i], false) === '') {
423 | articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]);
424 | }
425 | }
426 |
427 | try {
428 | articleContent.innerHTML = articleContent.innerHTML.replace(/
]*>\s*
topCandidate.readability.contentScore) { 637 | topCandidate = candidates[c]; } 638 | } 639 | 640 | /** 641 | * If we still have no top candidate, just use the body as a last resort. 642 | * We also have to copy the body node so it is something we can modify. 643 | **/ 644 | if (topCandidate === null || topCandidate.tagName === "BODY") 645 | { 646 | topCandidate = doc.createElement("DIV"); 647 | topCandidate.innerHTML = page.innerHTML; 648 | page.innerHTML = ""; 649 | page.appendChild(topCandidate); 650 | readability.initializeNode(topCandidate); 651 | } 652 | 653 | /** 654 | * Now that we have the top candidate, look through its siblings for content that might also be related. 655 | * Things like preambles, content split by ads that we removed, etc. 656 | **/ 657 | var articleContent = doc.createElement("DIV"); 658 | if (isPaging) { 659 | articleContent.id = "readability-content"; 660 | } 661 | var siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2); 662 | var siblingNodes = topCandidate.parentNode.childNodes; 663 | 664 | 665 | for(var s=0, sl=siblingNodes.length; s < sl; s+=1) { 666 | var siblingNode = siblingNodes[s]; 667 | var append = false; 668 | 669 | /** 670 | * Fix for odd IE7 Crash where siblingNode does not exist even though this should be a live nodeList. 671 | * Example of error visible here: http://www.esquire.com/features/honesty0707 672 | **/ 673 | if(!siblingNode) { 674 | continue; 675 | } 676 | 677 | dbg("Looking at sibling node: " + siblingNode + " (" + siblingNode.className + ":" + siblingNode.id + ")" + ((typeof siblingNode.readability !== 'undefined') ? (" with score " + siblingNode.readability.contentScore) : '')); 678 | dbg("Sibling has score " + (siblingNode.readability ? siblingNode.readability.contentScore : 'Unknown')); 679 | 680 | if(siblingNode === topCandidate) 681 | { 682 | append = true; 683 | } 684 | 685 | var contentBonus = 0; 686 | /* Give a bonus if sibling nodes and top candidates have the example same classname */ 687 | if(siblingNode.className === topCandidate.className && topCandidate.className !== "") { 688 | contentBonus += topCandidate.readability.contentScore * 0.2; 689 | } 690 | 691 | if(typeof siblingNode.readability !== 'undefined' && (siblingNode.readability.contentScore+contentBonus) >= siblingScoreThreshold) 692 | { 693 | append = true; 694 | } 695 | 696 | if(siblingNode.nodeName === "P") { 697 | var linkDensity = readability.getLinkDensity(siblingNode); 698 | var nodeContent = readability.getInnerText(siblingNode); 699 | var nodeLength = nodeContent.length; 700 | 701 | if(nodeLength > 80 && linkDensity < 0.25) 702 | { 703 | append = true; 704 | } 705 | else if(nodeLength < 80 && linkDensity === 0 && nodeContent.search(/\.( |$)/) !== -1) 706 | { 707 | append = true; 708 | } 709 | } 710 | 711 | if(append) { 712 | dbg("Appending node: " + siblingNode); 713 | 714 | var nodeToAppend = null; 715 | if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") { 716 | /* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ 717 | 718 | dbg("Altering siblingNode of " + siblingNode.nodeName + ' to div.'); 719 | nodeToAppend = doc.createElement("DIV"); 720 | try { 721 | nodeToAppend.id = siblingNode.id; 722 | nodeToAppend.innerHTML = siblingNode.innerHTML; 723 | } 724 | catch(er) { 725 | dbg("Could not alter siblingNode to div, probably an IE restriction, reverting back to original."); 726 | nodeToAppend = siblingNode; 727 | s-=1; 728 | sl-=1; 729 | } 730 | } else { 731 | nodeToAppend = siblingNode; 732 | s-=1; 733 | sl-=1; 734 | } 735 | 736 | /* To ensure a node does not interfere with readability styles, remove its classnames */ 737 | nodeToAppend.className = ""; 738 | 739 | /* Append sibling and subtract from our list because it removes the node when you append to another node */ 740 | articleContent.appendChild(nodeToAppend); 741 | } 742 | } 743 | 744 | /** 745 | * So we have all of the content that we need. Now we clean it up for presentation. 746 | **/ 747 | readability.prepArticle(articleContent); 748 | 749 | if (readability.curPageNum === 1) { 750 | articleContent.innerHTML = '
§
'; 1190 | 1191 | doc.getElementById("readability-content").appendChild(articlePage); 1192 | 1193 | if(readability.curPageNum > readability.maxPages) { 1194 | var nextPageMarkup = ""; 1195 | 1196 | articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup; 1197 | return; 1198 | } 1199 | 1200 | /** 1201 | * Now that we've built the article page DOM element, get the page content 1202 | * asynchronously and load the cleaned content into the div we created for it. 1203 | **/ 1204 | (function(pageUrl, thisPage) { 1205 | readability.ajax(pageUrl, { 1206 | success: function(r) { 1207 | 1208 | /* First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page. */ 1209 | var eTag = r.getResponseHeader('ETag'); 1210 | if(eTag) { 1211 | if(eTag in readability.pageETags) { 1212 | dbg("Exact duplicate page found via ETag. Aborting."); 1213 | articlePage.style.display = 'none'; 1214 | return; 1215 | } else { 1216 | readability.pageETags[eTag] = 1; 1217 | } 1218 | } 1219 | 1220 | // TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. 1221 | var page = doc.createElement("DIV"); 1222 | 1223 | /** 1224 | * Do some preprocessing to our HTML to make it ready for appending. 1225 | * • Remove any script tags. Swap and reswap newlines with a unicode character because multiline regex doesn't work in javascript. 1226 | * • Turn any noscript tags into divs so that we can parse them. This allows us to find any next page links hidden via javascript. 1227 | * • Turn all double br's into p's - was handled by prepDocument in the original view. 1228 | * Maybe in the future abstract out prepDocument to work for both the original document and AJAX-added pages. 1229 | **/ 1230 | var responseHtml = r.responseText.replace(/\n/g,'\uffff').replace(/');
1234 | responseHtml = responseHtml.replace(readability.regexps.replaceFonts, '<$1span>');
1235 |
1236 | page.innerHTML = responseHtml;
1237 |
1238 | /**
1239 | * Reset all flags for the next page, as they will search through it and disable as necessary at the end of grabArticle.
1240 | **/
1241 | readability.flags = 0x1 | 0x2 | 0x4;
1242 |
1243 | var nextPageLink = readability.findNextPageLink(page),
1244 | content = readability.grabArticle(page);
1245 |
1246 | if(!content) {
1247 | dbg("No content found in page to append. Aborting.");
1248 | return;
1249 | }
1250 |
1251 | /**
1252 | * Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
1253 | * Compare it against all of the the previous document's we've gotten. If the previous
1254 | * document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
1255 | **/
1256 | var firstP = content.getElementsByTagName("P").length ? content.getElementsByTagName("P")[0] : null;
1257 | if(firstP && firstP.innerHTML.length > 100) {
1258 | for(var i=1; i <= readability.curPageNum; i+=1) {
1259 | var rPage = doc.getElementById('readability-page-' + i);
1260 | if(rPage && rPage.innerHTML.indexOf(firstP.innerHTML) !== -1) {
1261 | dbg('Duplicate of page ' + i + ' - skipping.');
1262 | articlePage.style.display = 'none';
1263 | readability.parsedPages[pageUrl] = true;
1264 | return;
1265 | }
1266 | }
1267 | }
1268 |
1269 | readability.removeScripts(content);
1270 |
1271 | thisPage.innerHTML = thisPage.innerHTML + content.innerHTML;
1272 |
1273 | /**
1274 | * After the page has rendered, post process the content. This delay is necessary because,
1275 | * in webkit at least, offsetWidth is not set in time to determine image width. We have to
1276 | * wait a little bit for reflow to finish before we can fix floating images.
1277 | **/
1278 | window.setTimeout(
1279 | function() { readability.postProcessContent(thisPage); },
1280 | 500
1281 | );
1282 |
1283 | if(nextPageLink) {
1284 | readability.appendNextPage(nextPageLink);
1285 | }
1286 | }
1287 | });
1288 | }(nextPageLink, articlePage));
1289 | },
1290 |
1291 | /**
1292 | * Get an elements class/id weight. Uses regular expressions to tell if this
1293 | * element looks good or bad.
1294 | *
1295 | * @param Element
1296 | * @return number (Integer)
1297 | **/
1298 | getClassWeight: function (e) {
1299 | if(!readability.flagIsActive(readability.FLAG_WEIGHT_CLASSES)) {
1300 | return 0;
1301 | }
1302 |
1303 | var weight = 0;
1304 |
1305 | /* Look for a special classname */
1306 | if (typeof(e.className) === 'string' && e.className !== '')
1307 | {
1308 | if(e.className.search(readability.regexps.negative) !== -1) {
1309 | weight -= 25; }
1310 |
1311 | if(e.className.search(readability.regexps.positive) !== -1) {
1312 | weight += 25; }
1313 | }
1314 |
1315 | /* Look for a special ID */
1316 | if (typeof(e.id) === 'string' && e.id !== '')
1317 | {
1318 | if(e.id.search(readability.regexps.negative) !== -1) {
1319 | weight -= 25; }
1320 |
1321 | if(e.id.search(readability.regexps.positive) !== -1) {
1322 | weight += 25; }
1323 | }
1324 |
1325 | return weight;
1326 | },
1327 |
1328 | nodeIsVisible: function (node) {
1329 | return (node.offsetWidth !== 0 || node.offsetHeight !== 0) && node.style.display.toLowerCase() !== 'none';
1330 | },
1331 |
1332 | /**
1333 | * Remove extraneous break tags from a node.
1334 | *
1335 | * @param Element
1336 | * @return void
1337 | **/
1338 | killBreaks: function (e) {
1339 | try {
1340 | e.innerHTML = e.innerHTML.replace(readability.regexps.killBreaks,'
');
1341 | }
1342 | catch (eBreaks) {
1343 | dbg("KillBreaks failed - this is an IE bug. Ignoring.: " + eBreaks);
1344 | }
1345 | },
1346 |
1347 | /**
1348 | * Clean a node of all elements of type "tag".
1349 | * (Unless it's a youtube/vimeo video. People love movies.)
1350 | *
1351 | * @param Element
1352 | * @param string tag to clean
1353 | * @return void
1354 | **/
1355 | clean: function (e, tag) {
1356 | var targetList = e.getElementsByTagName( tag );
1357 | var isEmbed = (tag === 'object' || tag === 'embed');
1358 |
1359 | for (var y=targetList.length-1; y >= 0; y-=1) {
1360 | /* Allow youtube and vimeo videos through as people usually want to see those. */
1361 | if(isEmbed) {
1362 | var attributeValues = "";
1363 | for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) {
1364 | attributeValues += targetList[y].attributes[i].value + '|';
1365 | }
1366 |
1367 | /* First, check the elements attributes to see if any of them contain youtube or vimeo */
1368 | if (attributeValues.search(readability.regexps.videos) !== -1) {
1369 | continue;
1370 | }
1371 |
1372 | /* Then check the elements inside this element for the same. */
1373 | if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) {
1374 | continue;
1375 | }
1376 |
1377 | }
1378 |
1379 | targetList[y].parentNode.removeChild(targetList[y]);
1380 | }
1381 | },
1382 |
1383 | /**
1384 | * Clean an element of all tags of type "tag" if they look fishy.
1385 | * "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
1386 | *
1387 | * @return void
1388 | **/
1389 | cleanConditionally: function (e, tag) {
1390 |
1391 | if(!readability.flagIsActive(readability.FLAG_CLEAN_CONDITIONALLY)) {
1392 | return;
1393 | }
1394 |
1395 | var tagsList = e.getElementsByTagName(tag);
1396 | var curTagsLength = tagsList.length;
1397 |
1398 | /**
1399 | * Gather counts for other typical elements embedded within.
1400 | * Traverse backwards so we can remove nodes at the same time without effecting the traversal.
1401 | *
1402 | * TODO: Consider taking into account original contentScore here.
1403 | **/
1404 | for (var i=curTagsLength-1; i >= 0; i-=1) {
1405 | var weight = readability.getClassWeight(tagsList[i]);
1406 | var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0;
1407 |
1408 | dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : ''));
1409 |
1410 | if(weight+contentScore < 0)
1411 | {
1412 | tagsList[i].parentNode.removeChild(tagsList[i]);
1413 | }
1414 | else if ( readability.getCharCount(tagsList[i],',') < 10) {
1415 | /**
1416 | * If there are not very many commas, and the number of
1417 | * non-paragraph elements is more than paragraphs or other ominous signs, remove the element.
1418 | **/
1419 | var p = tagsList[i].getElementsByTagName("p").length;
1420 | var img = tagsList[i].getElementsByTagName("img").length;
1421 | var li = tagsList[i].getElementsByTagName("li").length-100;
1422 | var input = tagsList[i].getElementsByTagName("input").length;
1423 |
1424 | var embedCount = 0;
1425 | var embeds = tagsList[i].getElementsByTagName("embed");
1426 | for(var ei=0,il=embeds.length; ei < il; ei+=1) {
1427 | if (embeds[ei].src.search(readability.regexps.videos) === -1) {
1428 | embedCount+=1;
1429 | }
1430 | }
1431 |
1432 | var linkDensity = readability.getLinkDensity(tagsList[i]);
1433 | var contentLength = readability.getInnerText(tagsList[i]).length;
1434 | var toRemove = false;
1435 |
1436 | if ( img > p ) {
1437 | toRemove = true;
1438 | } else if(li > p && tag !== "ul" && tag !== "ol") {
1439 | toRemove = true;
1440 | } else if( input > Math.floor(p/3) ) {
1441 | toRemove = true;
1442 | } else if(contentLength < 25 && (img === 0 || img > 2) ) {
1443 | toRemove = true;
1444 | } else if(weight < 25 && linkDensity > 0.2) {
1445 | toRemove = true;
1446 | } else if(weight >= 25 && linkDensity > 0.5) {
1447 | toRemove = true;
1448 | } else if((embedCount === 1 && contentLength < 75) || embedCount > 1) {
1449 | toRemove = true;
1450 | }
1451 |
1452 | if(toRemove) {
1453 | tagsList[i].parentNode.removeChild(tagsList[i]);
1454 | }
1455 | }
1456 | }
1457 | },
1458 |
1459 | /**
1460 | * Clean out spurious headers from an Element. Checks things like classnames and link density.
1461 | *
1462 | * @param Element
1463 | * @return void
1464 | **/
1465 | cleanHeaders: function (e) {
1466 | for (var headerIndex = 1; headerIndex < 3; headerIndex+=1) {
1467 | var headers = e.getElementsByTagName('h' + headerIndex);
1468 | for (var i=headers.length-1; i >=0; i-=1) {
1469 | if (readability.getClassWeight(headers[i]) < 0 || readability.getLinkDensity(headers[i]) > 0.33) {
1470 | headers[i].parentNode.removeChild(headers[i]);
1471 | }
1472 | }
1473 | }
1474 | },
1475 |
1476 | /*** Smooth scrolling logic ***/
1477 |
1478 | /**
1479 | * easeInOut animation algorithm - returns an integer that says how far to move at this point in the animation.
1480 | * Borrowed from jQuery's easing library.
1481 | * @return integer
1482 | **/
1483 | easeInOut: function(start,end,totalSteps,actualStep) {
1484 | var delta = end - start;
1485 |
1486 | if ((actualStep/=totalSteps/2) < 1) {
1487 | return delta/2*actualStep*actualStep + start;
1488 | }
1489 | actualStep -=1;
1490 | return -delta/2 * ((actualStep)*(actualStep-2) - 1) + start;
1491 | },
1492 |
1493 | /**
1494 | * Helper function to, in a cross compatible way, get or set the current scroll offset of the document.
1495 | * @return mixed integer on get, the result of window.scrollTo on set
1496 | **/
1497 | scrollTop: function(scroll){
1498 | var setScroll = typeof scroll !== 'undefined';
1499 |
1500 | if(setScroll) {
1501 | return window.scrollTo(0, scroll);
1502 | }
1503 | if(typeof window.pageYOffset !== 'undefined') {
1504 | return window.pageYOffset;
1505 | }
1506 | else if(document.documentElement.clientHeight) {
1507 | return document.documentElement.scrollTop;
1508 | }
1509 | else {
1510 | return document.body.scrollTop;
1511 | }
1512 | },
1513 |
1514 | /**
1515 | * scrollTo - Smooth scroll to the point of scrollEnd in the document.
1516 | * @return void
1517 | **/
1518 | curScrollStep: 0,
1519 | scrollTo: function (scrollStart, scrollEnd, steps, interval) {
1520 | if(
1521 | (scrollStart < scrollEnd && readability.scrollTop() < scrollEnd) ||
1522 | (scrollStart > scrollEnd && readability.scrollTop() > scrollEnd)
1523 | ) {
1524 | readability.curScrollStep+=1;
1525 | if(readability.curScrollStep > steps) {
1526 | return;
1527 | }
1528 |
1529 | var oldScrollTop = readability.scrollTop();
1530 |
1531 | readability.scrollTop(readability.easeInOut(scrollStart, scrollEnd, steps, readability.curScrollStep));
1532 |
1533 | // We're at the end of the window.
1534 | if(oldScrollTop === readability.scrollTop()) {
1535 | return;
1536 | }
1537 |
1538 | window.setTimeout(function() {
1539 | readability.scrollTo(scrollStart, scrollEnd, steps, interval);
1540 | }, interval);
1541 | }
1542 | },
1543 |
1544 | htmlspecialchars: function (s) {
1545 | if (typeof(s) === "string") {
1546 | s = s.replace(/&/g, "&");
1547 | s = s.replace(/"/g, """);
1548 | s = s.replace(/'/g, "'");
1549 | s = s.replace(//g, ">");
1551 | }
1552 |
1553 | return s;
1554 | },
1555 |
1556 | flagIsActive: function(flag) {
1557 | return (readability.flags & flag) > 0;
1558 | },
1559 |
1560 | addFlag: function(flag) {
1561 | readability.flags = readability.flags | flag;
1562 | },
1563 |
1564 | removeFlag: function(flag) {
1565 | readability.flags = readability.flags & ~flag;
1566 | }
1567 |
1568 | };
1569 |
--------------------------------------------------------------------------------