├── .gitignore ├── README.md ├── data └── .blank └── mining.js /.gitignore: -------------------------------------------------------------------------------- 1 | /data/*.txt 2 | /node_modules 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Usage 2 | - Make sure you have the latest NodeJS isntalled 3 | - clone the repo onto your server / host 4 | - execute with `node mining.js` 5 | - wait for about 1-3 hours 6 | - Once complete you will have thousands of .txt files containing thousands of profile links 7 | - Simples. -------------------------------------------------------------------------------- /data/.blank: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robertpitt/plus-scraper/7f811f42ed9650114324b233baaa98921d14234b/data/.blank -------------------------------------------------------------------------------- /mining.js: -------------------------------------------------------------------------------- 1 | /* 2 | * Google Profile Information Fun 3 | */ 4 | 5 | /* 6 | * Require Libraries 7 | */ 8 | var http = require('http'); 9 | var url = require('url'); 10 | var fs = require('fs'); 11 | var zlib = require('zlib'); 12 | var locRegex = new RegExp("(.*?)","gi"); 13 | var packages = []; 14 | 15 | /* 16 | * Starting Link 17 | */ 18 | var baseOptions = { 19 | host : 'www.gstatic.com', 20 | port : 80, 21 | path : '/s2/sitemaps/profiles-sitemap.xml' 22 | }; 23 | 24 | http.get(baseOptions, function(response){ 25 | if(response.statusCode != 200) 26 | { 27 | console.error("Initial stack was not found"); 28 | process.exit(); 29 | }; 30 | 31 | var stack = ''; 32 | response.on('data', function(chunk){ 33 | stack += chunk; 34 | }); 35 | 36 | response.on('end', function(){ 37 | var match = null; 38 | 39 | while(match = locRegex.exec(stack)) 40 | { 41 | packages.push(match[1]); 42 | } 43 | 44 | processPackages(); 45 | }); 46 | }).on('error', function(e) { 47 | console.log("Got error: " + e.message); 48 | }); 49 | 50 | var processPackages = function() 51 | { 52 | var totalPackages = packages.length; 53 | for(var i = 0; i < packages.length; i++) 54 | { 55 | var requestOptions = url.parse(packages[i]); 56 | 57 | (function(index){ //Do not use i inside thise scope.. noob 58 | http.get(requestOptions, function(response){ 59 | if(response.statusCode != 200) 60 | { 61 | console.log("failed to get package form google"); 62 | return; 63 | } 64 | 65 | response.pipe(zlib.createGunzip()).pipe(fs.createWriteStream("./data/segment_" + index + ".txt")); 66 | console.log("Piping " + index + " of " + totalPackages * index + " into ./data/segment_" + index + ".txt"); 67 | }).on('error', function(e) { 68 | console.log("Got error: " + e.message); 69 | }); 70 | })(i); 71 | } 72 | } 73 | --------------------------------------------------------------------------------