├── .gitignore ├── LICENSE ├── README.md ├── index.js ├── package.json └── scrape-linkedin.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Directory for instrumented libs generated by jscoverage/JSCover 12 | lib-cov 13 | 14 | # Coverage directory used by tools like istanbul 15 | coverage 16 | 17 | # nyc test coverage 18 | .nyc_output 19 | 20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 21 | .grunt 22 | 23 | # node-waf configuration 24 | .lock-wscript 25 | 26 | # Compiled binary addons (http://nodejs.org/api/addons.html) 27 | build/Release 28 | 29 | # Dependency directories 30 | node_modules 31 | jspm_packages 32 | 33 | # Optional npm cache directory 34 | .npm 35 | 36 | # Optional REPL history 37 | .node_repl_history 38 | 39 | #intelliJ 40 | .idea 41 | *.iml 42 | 43 | #MAC 44 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Charly BERTHET 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NO LONGER WORKING AND I HAVE NO TIME TO MAINTAIN IT - JUST EMAIL ME IF YOU WANT TO CO-WORK ON IT. 2 | 3 | # scrape-linkedin-with-nodejs 4 | Scrape Linkedin profiles has never been so simple with nodejs :D ! 5 | 6 | 7 | * Npm page: [scrape-linkedin](https://www.npmjs.com/package/scrape-linkedin). 8 | 9 | ## Get started 10 | 11 | 1.Download the module 12 | ``` 13 | npm install scrape-linkedin 14 | ``` 15 | 2.Use the module 16 | ```javascript 17 | // Import the module in your file 18 | var ScrapeLinkedin = require("scrape-linkedin"); 19 | 20 | // Create the scraper object 21 | var scrapper = new ScrapeLinkedin(); 22 | 23 | // Fetch a profile 24 | scrapper.fetch("charlyberthet") 25 | // Handle the result 26 | .then(profile => console.log(profile)) 27 | // Handle an error 28 | .catch(err => console.log(err)); 29 | ``` 30 | 31 | ## Result 32 | ```javascript 33 | { 34 | curentCompany: ['Sopra Steria'], 35 | educations: [{ 36 | date: '2014 – 2017', 37 | degree: 'Engineer’s Degree, ', 38 | major: 'IT and Networking', 39 | title: 'Engineering school CPE, Lyon, France' 40 | }], 41 | experiences: [{ 42 | company: 'Sopra Steria', 43 | description: 'Project management (scrum). Conception, Development and Qualification of Android, Spring/AngularJS and Ionic applications. Use of Design patterns.', 44 | location: 'Région de Lyon, France', 45 | since: 'septembre 2014', 46 | title: 'IT Engineering, half studying at CPE Lyon, half working at Sopra Steria', 47 | until: '' 48 | }, { 49 | company: 'Culinarian', 50 | description: 'Joined an amazing american start up. Worked in a team of designers, data analysts and developers. I was charged to develop the web application using ReactJS, Redux and NodeJS.', 51 | location: 'Région de Greater Los Angeles, États-Unis', 52 | since: 'juillet 2016', 53 | title: 'IT Internship in USA, conception and development from scratch, ReactJS WebApp', 54 | until: 'septembre 2016' 55 | }, { 56 | company: 'Entrepreneur', 57 | description: 'Running a company and prospecting clients. It was not so easy !', 58 | location: 'France', 59 | since: 'janvier 2015', 60 | title: 'IT Entrepreneur in Web and Mobile Application development', 61 | until: 'décembre 2015' 62 | }], 63 | fullName: 'Charly Berthet', 64 | industry: 'Logiciels informatiques', 65 | languages: [{ 66 | language: 'Anglais', 67 | level: 'Capacité professionnelle complète' 68 | }, { 69 | language: 'Francais', 70 | level: 'Bilingue ou langue natale' 71 | }, { 72 | language: 'Espagnol', 73 | level: 'Compétence professionnelle limitée' 74 | }], 75 | locality: 'Région de Lyon, France', 76 | previousCompanies: ['Culinarian', 'Entrepreneur, ', 'NTN-SNR'], 77 | profilePicture: 'https://media.licdn.com/mpr/mpr/shrinknp_400_400/AAEAAQAAAAAAAAWiAAAAJGVmZWZhMjUzLTY5NDItNGNlNS04MjMxLTE3ODYzYWRhY2ViZA.jpg', 78 | skills: [{ 79 | name: 'Mobile app', 80 | recommendations: '10' 81 | }, { 82 | name: 'SDK Android', 83 | recommendations: '8' 84 | }, { 85 | name: 'Node.js', 86 | recommendations: '2' 87 | }, { 88 | name: 'Java', 89 | recommendations: '25' 90 | }, { 91 | name: 'JavaScript', 92 | recommendations: '13' 93 | }], 94 | title: 'IT Engineering student doing a dual learning course', 95 | volonteers: [{ 96 | association: '4L Trophy', 97 | description: 'I love helping people, in 2014 I participated in the 4L Trophy, a humanitarian cross-country drive in Africa to help poor children.', 98 | location: 'Aide humanitaire et secours en cas de catastrophes', 99 | major: '', 100 | since: 'février 2014', 101 | title: 'Copilot / Treasurer', 102 | until: '' 103 | }, { 104 | association: 'Red Bull', 105 | description: 'Athletes support during the Red Bull Element of 2013 and 2012.', 106 | location: '', 107 | major: '', 108 | since: 'septembre 2013', 109 | title: 'Athletes support', 110 | until: '' 111 | }] 112 | } 113 | ``` 114 | 115 | ## Customization 116 | 117 | 118 | You can customize the scraper by passing your configuration. 119 | ```javascript 120 | // Import the module in your file 121 | var ScrapeLinkedin = require("scrape-linkedin"); 122 | 123 | // Create the scraper object 124 | var scrapper = new ScrapeLinkedin({ 125 | debug : true, // optional, boolean 126 | token : "", // optional, string 127 | loginCsrf : "", // optional, string 128 | loginCookies : "", // optional, string 129 | loginEmail : "", // optional, string 130 | loginPassword : "" // optional, string 131 | }); 132 | 133 | ``` 134 | 135 | ## Thanks to 136 | 137 | * [phantom](https://github.com/amir20/phantomjs-node) 138 | * [request](https://github.com/request/request) 139 | 140 | 141 | And you ! <3 142 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = require("./scrape-linkedin"); 2 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scrape-linkedin", 3 | "version": "0.0.2", 4 | "description": "Scrape Linkedin profiles has never been so simple with nodejs :D !", 5 | "author" : "Charly Berthet ", 6 | "scripts": { 7 | "start": "node index.js" 8 | }, 9 | "repository" : { 10 | "type" : "git", 11 | "url" : "https://github.com/charlyBerthet/scrape-linkedin-with-nodejs" 12 | }, 13 | "bugs" : { 14 | "url" : "https://github.com/charlyBerthet/scrape-linkedin-with-nodejs/issues" 15 | }, 16 | "keywords" : [ 17 | "scraper", 18 | "linkedin", 19 | "nodejs", 20 | "crawler" 21 | ], 22 | "dependencies": { 23 | "phantom" : "latest", 24 | "request" : "latest" 25 | }, 26 | "license" : "MIT" 27 | } 28 | -------------------------------------------------------------------------------- /scrape-linkedin.js: -------------------------------------------------------------------------------- 1 | var phantom = require('phantom'); 2 | var request = require('request'); 3 | 4 | // 5 | // DEFAULT Config 6 | // 7 | const EMAIL = 'justforyou@kingkong.fr'; // login email > CAN BE CHANGED 8 | const PASSWORD = 'iamsecured'; // login pwd > CAN BE CHANGED 9 | const DEBUG = false; // set true to debug 10 | 11 | 12 | 13 | // 14 | // OBJECT 15 | // 16 | var ScrapeLinkedin = function(config){ 17 | config = config || {}; 18 | this.debug = config.debug || DEBUG; 19 | // Token used to fetch user profiles 20 | this.linkedinToken = { 21 | 'name' : 'li_at', 22 | 'value' : config.liAt || config.token || '', 23 | 'domain' : '.www.linkedin.com', 24 | 'path' : '/', 25 | 'httponly' : true, 26 | 'secure' : true, 27 | 'expires' : new Date("2217-11-11T18:24:17.000Z").getTime() 28 | }; 29 | // Used to login to linkedin 30 | this.loginCsrf = config.loginCsrf || null; 31 | this.loginCookies = config.loginCookies || null; 32 | this.loginEmail = config.loginEmail || EMAIL; 33 | this.loginPassword = config.loginPassword || PASSWORD; 34 | }; 35 | 36 | 37 | 38 | 39 | // 40 | // FETCH LINKEDIN PROFILE 41 | // 42 | ScrapeLinkedin.prototype.fetchLinkedinProfile = function(username){ 43 | const self = this; 44 | return new Promise((resolve, reject) => { 45 | phantom.create().then(function(ph){ 46 | ph.createPage().then( function(page){ 47 | 48 | 49 | page.setting('javascriptEnabled', true); 50 | page.setting('cookiesEnabled', true); 51 | page.addCookie(self.linkedinToken); 52 | 53 | 54 | if(self.debug) 55 | console.log("Fetch linkedin: " + username); 56 | return page.open("https://www.linkedin.com/in/" + username) 57 | .then(function(status){ 58 | 59 | return page.evaluate(function(){ 60 | // 61 | // Query helpers 62 | // 63 | function getText(selector){ 64 | return get(selector) ? get(selector).textContent : ""; 65 | }; 66 | 67 | function get(selector){ 68 | return document.querySelector(selector) ? document.querySelector(selector) : false; 69 | }; 70 | function getTextArray(mainSelector, textSelector){ 71 | var tmp = document.querySelectorAll(mainSelector); 72 | var array = []; 73 | if(tmp.length > 0){ 74 | for(var k = 0 ; k < tmp.length ; k++){ 75 | if(tmp[k].querySelector(textSelector)) 76 | array.push(tmp[k].querySelector(textSelector).textContent); 77 | else 78 | array.push(tmp[k].textContent); 79 | } 80 | } 81 | return array; 82 | }; 83 | function getJson(mainSelector, json){ 84 | var tmp = document.querySelectorAll(mainSelector); 85 | var array = []; 86 | if(tmp.length > 0){ 87 | for(var k = 0 ; k < tmp.length ; k++){ 88 | var jsonResult = {}; 89 | for(var j in json){ 90 | jsonResult[j] = tmp[k].querySelector(json[j]) ? tmp[k].querySelector(json[j]).textContent : ""; 91 | 92 | } 93 | array.push(jsonResult); 94 | } 95 | } 96 | return array; 97 | }; 98 | function getPicture(selector){ 99 | return document.querySelector(selector) ? document.querySelector(selector).getAttribute("src") : ""; 100 | }; 101 | 102 | // 103 | // Parsing -> can often change /!\ 104 | // 105 | var user = {}; 106 | user.fullName = getText(".full-name"); 107 | user.title = getText(".title"); 108 | user.locality = getText(".locality"); 109 | user.industry = getText(".industry"); 110 | user.profilePicture = getPicture(".profile-picture img"); 111 | user.curentCompany = getTextArray("#overview-summary-current li", "span"); 112 | user.previousCompanies = getTextArray("#overview-summary-past li", "span"); 113 | user.languages = getJson("#languages-view .section-item", { 114 | "language":"h4", 115 | "level":"div" 116 | }); 117 | user.experiences = getJson("#background-experience > div", { 118 | "title":"header h4", 119 | "company":"header h5:not(.experience-logo)", 120 | "since":".experience-date-locale time:nth-child(1)", 121 | "until":".experience-date-locale time:nth-child(2)", 122 | "location":".experience-date-locale .locality", 123 | "description":".description" 124 | }); 125 | user.skills = getJson("#profile-skills .skill-pill", { 126 | "recommendations":".endorse-count", 127 | "name":".endorse-item-name" 128 | }); 129 | user.educations = getJson("#background-education .education", { 130 | "title":"header h4", 131 | "degree":"header h5 .degree", 132 | "major":"header h5 .major", 133 | "date":".education-date" 134 | }); 135 | user.volonteers = getJson("#background-volunteering .experience", { 136 | "title":"h4", 137 | "association":"h5:not(.volunteering-logo)", 138 | "major":"header h5 .major", 139 | "since":".volunteering-date-cause time:nth-child(1)", 140 | "until":".volunteering-date-cause time:nth-child(2)", 141 | "location":".volunteering-date-cause .locality", 142 | "description":".description" 143 | }); 144 | 145 | return user; 146 | 147 | }) 148 | .then(function(result){ 149 | resolve(result); 150 | return ph.exit(); 151 | }) 152 | .catch(err => reject(err)); 153 | }); 154 | }); 155 | }); 156 | }); 157 | }; 158 | 159 | 160 | 161 | // 162 | // LOG INTO LINKEDIN -> get token 163 | // 164 | ScrapeLinkedin.prototype.getToken = function(){ 165 | const self = this; 166 | var form = { 167 | 'loginCsrfParam':self.loginCsrf, 168 | 'session_key':self.loginEmail, 169 | 'session_password':self.loginPassword 170 | }; 171 | var tokenOptions = { 172 | method: 'POST', 173 | url: 'https://www.linkedin.com/uas/login-submit', 174 | headers: 175 | { 176 | 'cache-control': 'no-cache', 177 | 'cookie': self.loginCookies, 178 | 'accept-language': 'fr-FR,fr;q=0.8,en-US;q=0.6,en;q=0.4', 179 | 'accept-encoding': 'gzip, deflate', 180 | 'referer': 'https://www.linkedin.com/', 181 | 'accept': '*/*', 182 | 'content-type': 'application/x-www-form-urlencoded', 183 | 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.130 Safari/537.36', 184 | 'x-requested-with': 'XMLHttpRequest', 185 | 'origin': 'https://www.linkedin.com', 186 | 'x-isajaxform': '1' 187 | }, 188 | 'form': form 189 | }; 190 | 191 | return new Promise(function(resolve, reject){ 192 | request(tokenOptions, function (error, response, body) { 193 | if (error) throw new Error(error); 194 | 195 | const cookies = response.headers["set-cookie"]; 196 | var liAt = ""; 197 | for(var i = 0; i{ 274 | if(/bcookie=/.test(v)) 275 | csrf = v.split('"')[1].split("&")[1]; 276 | strCookies += v.split(";")[0] +"; "; 277 | }); 278 | resolve({ 279 | cookies : strCookies, 280 | csrf : csrf 281 | }); 282 | }); 283 | }); 284 | }; 285 | 286 | 287 | 288 | // 289 | // Update cookie if needed 290 | // 291 | ScrapeLinkedin.prototype.updateCsrfAndCookiesIfNeeded = function(){ 292 | const self = this; 293 | return new Promise((resolve, reject)=>{ 294 | if(self.loginCsrf == null || self.loginCookies == null){ 295 | if(self.debug) 296 | console.log("fetch default cookies and csrf"); 297 | self.fetchCsrfAndCookies().then(json =>{ 298 | self.loginCsrf = json.csrf; 299 | self.loginCookies = json.cookies; 300 | if(self.debug) 301 | console.log("default cookies and csrf are:", json); 302 | resolve(); 303 | }).catch(err => { 304 | reject(err); 305 | }); 306 | }else{ 307 | return resolve(); 308 | } 309 | }); 310 | }; 311 | 312 | 313 | 314 | 315 | // 316 | // Login to linkedin if needed, get the token if needed and THEN fetch profile 317 | // 318 | ScrapeLinkedin.prototype.bindIfNeededThenfetch = function(username){ 319 | const self = this; 320 | return new Promise((resolve, reject) => { 321 | self.updateCsrfAndCookiesIfNeeded().then() 322 | .then(()=>{ 323 | self.updateTokenIfNeeded() 324 | .then(()=>{ 325 | self.fetchLinkedinProfile(username).then(profile => resolve(profile)).catch(err => reject(err)); 326 | }) 327 | .catch(function(err){ 328 | reject(err); 329 | }); 330 | }) 331 | .catch(err => { 332 | reject(err); 333 | }); 334 | }); 335 | 336 | }; 337 | ScrapeLinkedin.prototype.now = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 338 | ScrapeLinkedin.prototype.love = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 339 | ScrapeLinkedin.prototype.get = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 340 | ScrapeLinkedin.prototype.fetch = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 341 | ScrapeLinkedin.prototype.scrape = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 342 | ScrapeLinkedin.prototype.kingkong = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 343 | ScrapeLinkedin.prototype.zidane = ScrapeLinkedin.prototype.bindIfNeededThenfetch; 344 | 345 | 346 | 347 | 348 | // 349 | // Example of use 350 | // 351 | /* 352 | var scrapper = new ScrapeLinkedin(); 353 | scrapper.fetch("charlyberthet").then(profile => console.log(profile)).catch(err => console.log(err)); 354 | */ 355 | 356 | 357 | // EXPORT 358 | module.exports = ScrapeLinkedin; 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | --------------------------------------------------------------------------------