├── .gitignore
├── README.md
├── data
└── .blank
└── mining.js
/.gitignore:
--------------------------------------------------------------------------------
1 | /data/*.txt
2 | /node_modules
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Usage
2 | - Make sure you have the latest NodeJS isntalled
3 | - clone the repo onto your server / host
4 | - execute with `node mining.js`
5 | - wait for about 1-3 hours
6 | - Once complete you will have thousands of .txt files containing thousands of profile links
7 | - Simples.
--------------------------------------------------------------------------------
/data/.blank:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robertpitt/plus-scraper/7f811f42ed9650114324b233baaa98921d14234b/data/.blank
--------------------------------------------------------------------------------
/mining.js:
--------------------------------------------------------------------------------
1 | /*
2 | * Google Profile Information Fun
3 | */
4 |
5 | /*
6 | * Require Libraries
7 | */
8 | var http = require('http');
9 | var url = require('url');
10 | var fs = require('fs');
11 | var zlib = require('zlib');
12 | var locRegex = new RegExp("(.*?)","gi");
13 | var packages = [];
14 |
15 | /*
16 | * Starting Link
17 | */
18 | var baseOptions = {
19 | host : 'www.gstatic.com',
20 | port : 80,
21 | path : '/s2/sitemaps/profiles-sitemap.xml'
22 | };
23 |
24 | http.get(baseOptions, function(response){
25 | if(response.statusCode != 200)
26 | {
27 | console.error("Initial stack was not found");
28 | process.exit();
29 | };
30 |
31 | var stack = '';
32 | response.on('data', function(chunk){
33 | stack += chunk;
34 | });
35 |
36 | response.on('end', function(){
37 | var match = null;
38 |
39 | while(match = locRegex.exec(stack))
40 | {
41 | packages.push(match[1]);
42 | }
43 |
44 | processPackages();
45 | });
46 | }).on('error', function(e) {
47 | console.log("Got error: " + e.message);
48 | });
49 |
50 | var processPackages = function()
51 | {
52 | var totalPackages = packages.length;
53 | for(var i = 0; i < packages.length; i++)
54 | {
55 | var requestOptions = url.parse(packages[i]);
56 |
57 | (function(index){ //Do not use i inside thise scope.. noob
58 | http.get(requestOptions, function(response){
59 | if(response.statusCode != 200)
60 | {
61 | console.log("failed to get package form google");
62 | return;
63 | }
64 |
65 | response.pipe(zlib.createGunzip()).pipe(fs.createWriteStream("./data/segment_" + index + ".txt"));
66 | console.log("Piping " + index + " of " + totalPackages * index + " into ./data/segment_" + index + ".txt");
67 | }).on('error', function(e) {
68 | console.log("Got error: " + e.message);
69 | });
70 | })(i);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------