├── .DS_Store ├── README ├── google_scraper.js ├── has_google_results.js ├── html_files └── fish_oil.html └── seomoz.js /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisle/seer.js-public/92587da39890016ee62c05c6edf00c917f4ebf22/.DS_Store -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | A public collection of Seer Interactive's Google Docs functions 2 | 3 | google_scraper.js: Google Scraper for Google Docs Spreadsheet. 4 | has_google_results.js: Determine if there are any results for a given a keyword / site. 5 | seomoz.js: Wrapper for SEOmoz API 6 | -------------------------------------------------------------------------------- /google_scraper.js: -------------------------------------------------------------------------------- 1 | /* --------------------------------------------------------------------------- 2 | * google_scraper.js 3 | * More info: http://bit.ly/wDFMn5 4 | * 5 | * @desc Google Scraper for Google Docs Spreadsheet. 6 | * @author Chris Le - @djchrisle - chrisl at seerinteractive.com 7 | * @license MIT (see: http://www.opensource.org/licenses/mit-license.php) 8 | * @version 1.2 9 | * 10 | * Change Log: 11 | * 1.2.0 - Added ability to offset results (thanks @johnsee) 12 | * 1.1.0 - Added ability to change the TLD 13 | * 1.0.1 - Fixed Non-URL universal searches getting returned (Thanks Gareth Brown) 14 | * 1.0.0 - Initial release 15 | * -------------------------------------------------------------------------*/ 16 | 17 | var SeerJs_GoogleScraper = (function() { 18 | 19 | var errorOccurred; 20 | 21 | /** 22 | * Gets stuff inside two tags 23 | * @param {string} haystack String to look into 24 | * @param {string} start Starting tag 25 | * @param {string} end Ending tag 26 | * @return {string} Stuff inside the two tags 27 | */ 28 | function getInside(haystack, start, end) { 29 | var startIndex = haystack.indexOf(start) + start.length; 30 | var endIndex = haystack.indexOf(end); 31 | return haystack.substr(startIndex, endIndex - startIndex); 32 | } 33 | 34 | /** 35 | * Fetch keywords from Google. Returns error message if an error occurs. 36 | * @param {string} kw Keyword 37 | * @param {array} optResults (Optional) Number of results to return (defaults to 10) 38 | * @param {string} optTld (Optional) Top level domain (eg: ".co.uk". Defaults to ".com") 39 | * @param {string} optStart (Optional) Sets the starting offset for results (defaults to 0) 40 | * filter=0 Force all results from Google. Important when using large offset 41 | */ 42 | function fetch(kw, optResults, optTld, optStart) { 43 | errorOccurred = false; 44 | optResults = optResults || 10; 45 | optStart = optStart || 0; 46 | optTld = optTld || '.com'; 47 | try { 48 | var url = 'http://www.google' + optTld + '/search?q=' + kw + '&start=' + optStart + '&num=' + optResults + '&filter=0'; 49 | return UrlFetchApp.fetch(url).getContentText() 50 | } catch(e) { 51 | errorOccurred = true; 52 | return e; 53 | } 54 | } 55 | 56 | /** 57 | * Extracts the URL from an organic result. Returns false if nothing is found. 58 | * @param {string} result XML string of the result 59 | */ 60 | function extractUrl(result) { 61 | var url; 62 | if (result.match(/\/url\?q=/)) { 63 | url = getInside(result, "?q=", "&"); 64 | return (url != '') ? url : false 65 | } 66 | return false; 67 | } 68 | 69 | /** 70 | * Extracts the organic results from the page and puts them into an array. 71 | * One per element. Each element is an XMLElement. 72 | */ 73 | function extractOrganic(html) { 74 | html = html.replace(/\n|\r/g, ''); 75 | var allOrganic = html.match(/
240 | |
244 |
245 | 246 | 272 | | 273 | 274 |275 | | |||||||||||||||
279 |
280 |
290 |
281 | Advanced
283 | search
284 |
285 |
286 |
287 | About 127,000,000 results
288 |
289 | |
291 |
292 | 293 | | ||||||||||||||||
297 |
298 | More
318 |
319 |
320 |
321 |
362 | Search Options322 | 323 |
|
363 |
364 |
365 |
366 |
882 |
883 |
368 |
436 |
437 | 370 | Ads371 | 372 |
438 |
834 | ***
835 |
836 |
439 |
833 |
837 |
881 |
838 | Searches related to: fish oil
839 |
840 |
841 |
884 |
982 |
935 |
971 |
972 |
936 |
962 |
963 |
964 |
970 |
974 | Google Home Advertising Programs
976 | Business Solutions New Privacy & Terms
979 | About Google
980 |
981 | |
983 |
984 |
985 |
987 |
1069 |
|
1070 |
379 | * 380 | * Original by {@link http://www.tomanthony.co.uk/blog/seomoz-linkscape-api-with-google-docs/} 381 | * Modified so that you can select a large range of URLs and it will get the 382 | * metrics in batches of 10.
383 | * 384 | * @param {string[]} urlRange One or more URLs to send to Linkscape 385 | * @param {boolean} optIncludeHeader Include the header? (Default is true) 386 | * @function getLinkscape 387 | * 388 | * @example 389 | * Cells: 390 | * A1: www.seerinteractive.com 391 | * A2: http://www.domain.com/blog 392 | * A3: http://www.anotherdomain.com/page.html 393 | * 394 | * // => Gets current data on www.seerinteractive.com 395 | * =getLinkscape("www.seerinteractive.com") 396 | * // => Gets current data on www.seerinteractive.com 397 | * =getLinkscape(A1) 398 | * // => Gets data for three URLS in a batch 399 | * =getLInkscape(A1:A3) 400 | * // => Gets data for three URLS in a batch and reomves the header row 401 | * =getLInkscape(A1:A3, false) 402 | * 403 | */ 404 | urlMetrics: function(urlRange, optIncludeHeader) { 405 | if (optIncludeHeader == undefined) optIncludeHeader = true; 406 | var expire = linkscapeExp_(); 407 | var retval = new Array; 408 | var first = true; 409 | var response; 410 | 411 | // POST in batches of 10 and merge results 412 | urlRange = SeerJs.Utils.strToArray(urlRange); 413 | var urlGroups = SeerJs.Utils.groupBy(linkscapePrepUrls_(urlRange), 414 | SEOMOZ_BATCH_SIZE); 415 | for (var g = 0; g < urlGroups.length; g++) { 416 | var payload = Utilities.jsonStringify(urlGroups[g]) 417 | response = linkscapeTranspose_(SeerJs.Http.fetchJson( 418 | "http://lsapi.seomoz.com/linkscape/url-metrics/", 419 | { 420 | "AccessID" : SEOMOZ_MEMBER_ID, 421 | "Expires" : expire, 422 | "Signature" : linkscapeSig_(expire), 423 | "Cols" : SEOMOZ_ALL_METRICS 424 | }, 425 | { 426 | "method" : "post", 427 | "payload" : payload 428 | } 429 | )); 430 | // merge results from batches together 431 | if (first == false) response.shift(); 432 | retval.push.apply(retval, response); 433 | first = false; 434 | if (OBEY_FREE_RATE_LIMIT) { Utilities.sleep(5000); } 435 | } 436 | // remove header if user requests. 437 | if (!optIncludeHeader) retval.shift(); 438 | return retval; 439 | } 440 | }; 441 | })(); 442 | 443 | //---------------------------------------------------------------------------- 444 | // test 445 | 446 | function _cheap_test() { 447 | /*var response = getLinkscape("www.seerinteractive.com"); 448 | (response[1][1] == 'www.seerinteractive.com/') 449 | ? (Logger.log('passed.')) 450 | : (Logger.log('failed.'));*/ 451 | 452 | var response = getLinkscape([ ["www.seerinteractive.com"], 453 | ['www.seomoz.org'], 454 | ['www.google.com'] ]); 455 | return response; 456 | } 457 | --------------------------------------------------------------------------------