├── .DS_Store ├── data-containers └── .DS_Store ├── lib ├── company.js ├── honors.js ├── language.js ├── projects.js ├── experience.js ├── education.js └── profile.js ├── .travis.yml ├── gulpfile.js ├── .gitignore ├── LICENSE ├── package.json ├── index.js ├── gulp ├── development.js └── test.js ├── test └── scraper-test.js └── README.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieljoppi/node-linkedin-scraper2/HEAD/.DS_Store -------------------------------------------------------------------------------- /data-containers/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danieljoppi/node-linkedin-scraper2/HEAD/data-containers/.DS_Store -------------------------------------------------------------------------------- /lib/company.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | module.exports = function(company) { 3 | var name = company.text().trim(); 4 | if (name[name.length - 1] === ',') { 5 | name = name.substring(0, name.length - 1); 6 | } 7 | return name; 8 | }; 9 | -------------------------------------------------------------------------------- /lib/honors.js: -------------------------------------------------------------------------------- 1 | function Honors(awardName, organization, duration, description) { 2 | this.awardName = awardName; 3 | this.organization = organization; 4 | this.duration = duration; 5 | this.description = description; 6 | } 7 | 8 | module.exports = Honors; 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '5.4.0' 4 | 5 | install: 6 | # - npm config set registry http://23.251.144.68 7 | # Disable the spinner, it looks bad on Travis 8 | - 'npm config set spin false' 9 | # Log HTTP requests 10 | - 'npm config set loglevel http' 11 | - 'time npm install' 12 | 13 | after_script: 14 | - 'npm run coveralls' 15 | 16 | script: 17 | - 'gulp test' -------------------------------------------------------------------------------- /lib/language.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function(lang) { 4 | return { 5 | name: lang.find('.name').text(), 6 | proficiency: (function(prof){ 7 | if (~prof.indexOf('elementary')) { 8 | return 'elementary'; 9 | } else if (~prof.indexOf('native')) { 10 | return 'native_or_bilingual'; 11 | } else if (~prof.indexOf('professional')) { 12 | if (~prof.indexOf('fulle')) 13 | return 'full_professional'; 14 | else return 'professional_working'; 15 | } else { 16 | return 'limited_working'; 17 | } 18 | })(lang.find('.proficiency').text().toLowerCase()) 19 | } 20 | }; 21 | -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var gulp = require('gulp'); 4 | 5 | var env = process.env.NODE_ENV || 'development'; 6 | /* 7 | var defaultTasks = ['clean', 'jshint', 'csslint','serve','watch']; // initialize with development settings 8 | if (env === 'production') { var defaultTasks = ['clean', 'cssmin', 'uglify', 'serve', 'watch'];} 9 | if (env === 'test') { var defaultTasks = ['env:test', 'karma:unit', 'mochaTest'];} 10 | */ 11 | // read gulp directory contents for the tasks... 12 | require('require-dir')('./gulp'); 13 | console.log('Invoking gulp -', env); 14 | gulp.task('default', function (defaultTasks) { 15 | // run with paramater 16 | gulp.start(env); 17 | }); -------------------------------------------------------------------------------- /lib/projects.js: -------------------------------------------------------------------------------- 1 | function Project(name, dates, description, projectlink) { 2 | this.name = name; 3 | this.dates = (function (dates) { 4 | var dd = (~dates.indexOf('(') ? dates.substring(0, dates.indexOf('(')) : dates).split('–'), 5 | current = dd[1] && ~dd[1].indexOf('Present') ? true : false; 6 | 7 | return { 8 | start: dd[0] ? new Date(dd[0]).toJSON() : undefined, 9 | end: dd[1] && !current ? new Date(dd[1]).toJSON() : undefined, 10 | current: current 11 | }; 12 | })(dates); 13 | this.description = description; 14 | this.projectlink = projectlink; 15 | } 16 | 17 | module.exports = Project; 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | # Logs 3 | logs 4 | *.log 5 | 6 | # Runtime data 7 | pids 8 | *.pid 9 | *.seed 10 | 11 | # Directory for instrumented libs generated by jscoverage/JSCover 12 | lib-cov 13 | 14 | # Coverage directory used by tools like istanbul 15 | coverage 16 | 17 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 18 | .grunt 19 | 20 | # Compiled binary addons (http://nodejs.org/api/addons.html) 21 | build/Release 22 | 23 | # Dependency directory 24 | # Commenting this out is preferred by some people, see 25 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git- 26 | node_modules/ 27 | 28 | # Users Environment Variables 29 | .lock-wscript 30 | -------------------------------------------------------------------------------- /lib/experience.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function (experience) { 4 | return { 5 | title: experience.find('.item-title').text(), 6 | companyName: experience.find('.item-subtitle').text(), 7 | dates: (function (dates) { 8 | var dd = dates.map(function(idx) { 9 | if (typeof idx === 'number') { 10 | return dates[idx].innerHTML.trim(); 11 | } else { 12 | return undefined; 13 | } 14 | }); 15 | var current = !dd[1] || ~dd[1].toLowerCase().indexOf('present') ? true : false; 16 | return { 17 | start: dd[0] ? new Date(dd[0]).toJSON() : undefined, 18 | end: dd[1] && !current ? new Date(dd[1]).toJSON() : undefined, 19 | current: current 20 | }; 21 | })(experience.find('.date-range time')), 22 | locality: experience.find('.location').text(), 23 | description: experience.find('.description').html() 24 | }; 25 | }; 26 | -------------------------------------------------------------------------------- /lib/education.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function (education) { 4 | var subtitle = education.find('.item-subtitle').text().split(', '); 5 | 6 | return { 7 | name: education.find('.item-title').text(), 8 | link: education.find('.item-title a[href]').attr('href'), 9 | program: '', // TODO - get: 'High School', 'College', 'Others' 10 | degree: subtitle[0], 11 | major: subtitle[1], 12 | dates: (function (dates) { 13 | var dd = dates.map(function(idx) { 14 | if (typeof idx === 'number') { 15 | return dates[idx].innerHTML.trim(); 16 | } else { 17 | return undefined; 18 | } 19 | }); 20 | 21 | var current = !dd[1] || ~dd[1].toLowerCase().indexOf('present') ? true : false; 22 | return { 23 | start: dd[0] ? new Date(dd[0]).toJSON() : undefined, 24 | end: dd[1] && !current ? new Date(dd[1]).toJSON() : undefined, 25 | current: current 26 | }; 27 | })(education.find('.date-range time')) 28 | }; 29 | }; 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Aaditya Sriram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "linkedin-scraper2", 3 | "version": "2.1.0-pre", 4 | "description": "A scraper for LinkedIn, give us a linkedin profile URL and we'll give you a nice data structure", 5 | "main": "index.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/danieljoppi/node-linkedin-scraper2.git" 9 | }, 10 | "scripts": { 11 | "test": "gulp test", 12 | "coveralls": "cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./coverage" 13 | }, 14 | "author": "daniel.joppi@suiteplus.com", 15 | "license": "MIT", 16 | "bugs": { 17 | "url": "https://github.com/danieljoppi/node-linkedin-scraper2/issues" 18 | }, 19 | "homepage": "https://github.com/danieljoppi/node-linkedin-scraper2#readme", 20 | "dependencies": { 21 | "jquery": "^2.1.3", 22 | "jsdom": "danieljoppi/jsdom#5.6.2", 23 | "q": "^1.4.1" 24 | }, 25 | "devDependencies": { 26 | "coveralls": "^2.11.4", 27 | "eslint": "^2.1.0", 28 | "eslint-plugin-mocha": "^2.0.0", 29 | "gulp": "^3.9.1", 30 | "gulp-eslint": "^2.0.0", 31 | "gulp-istanbul": "^0.10.3", 32 | "gulp-load-plugins": "^1.2.0", 33 | "gulp-mocha": "^2.2.0", 34 | "gulp-plumber": "^1.1.0", 35 | "require-dir": "^0.3.0", 36 | "should": "^8.2.2" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var jsdom = require('jsdom'), 2 | Q = require('q'); 3 | 4 | module.exports = function (originalUrl, callback) { 5 | var deferred = Q.defer(); 6 | 7 | 8 | var profileFactory = require('./lib/profile'); 9 | var linkedInURL = originalUrl.replace(/[a-z]*\.linkedin/, 'www.linkedin'); 10 | 11 | jsdom.env({ 12 | url: linkedInURL, 13 | scripts: ['http://code.jquery.com/jquery.js'], 14 | headers: { 15 | 'Accept': 'text/html', 16 | //'Accept-Encoding': 'gzip', 17 | 'Accept-Language': 'en-US;q=0.6,en;q=0.4', 18 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36' 19 | }, 20 | proxy: process.env.PROXY_HOST, 21 | tunnel: process.env.PROXY_HOST ? false : undefined, 22 | done: function (errors, window) { 23 | if (errors) { 24 | if (callback) { 25 | callback(errors); 26 | } else { 27 | deferred.reject(errors); 28 | } 29 | } else { 30 | var profile = profileFactory(window); 31 | profile.publicProfileUrl = originalUrl; 32 | 33 | if (callback) { 34 | callback(null, profile); 35 | } else { 36 | deferred.resolve(profile); 37 | } 38 | } 39 | } 40 | }); 41 | 42 | return deferred.promise; 43 | }; -------------------------------------------------------------------------------- /gulp/development.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var file, msg = '**/*-test.js'; 3 | process.argv.forEach(function (val, index, array) { 4 | if (val === '-file' || val === '--f') { 5 | let env_val = array[index + 1]; 6 | msg = '**/*' + env_val+ '*-test.js'; 7 | file = env_val; 8 | } 9 | }); 10 | 11 | var gulp = require('gulp'), 12 | gulpLoadPlugins = require('gulp-load-plugins'), 13 | plugins = gulpLoadPlugins(), 14 | appRoot = process.cwd(), 15 | paths = { 16 | js: [ 17 | appRoot + '/index.js', 18 | appRoot + '/lib/**/*.js' 19 | ], 20 | jsTests: [appRoot + '/test/**/*-test.js'] 21 | }; 22 | 23 | var defaultTasks = ['env:development', 'dev:eslint', 'dev:mocha', 'watch']; 24 | 25 | gulp.task('env:development', function () { 26 | process.env.NODE_ENV = 'development'; 27 | console.log('use => load tests: ', msg); 28 | }); 29 | 30 | gulp.task('dev:eslint', function () { 31 | return gulp.src(paths.js.concat(paths.jsTests)) 32 | .pipe(plugins.plumber()) 33 | .pipe(plugins.eslint()) 34 | .pipe(plugins.eslint.format()) 35 | .pipe(plugins.eslint.failAfterError()); 36 | }); 37 | 38 | 39 | gulp.task('dev:mocha', ['dev:eslint'], function () { 40 | let jsTests = '/test/**/' + (file ? '*'+file : '' ) + '*-test.js'; 41 | return gulp.src(appRoot + jsTests) 42 | .pipe(plugins.plumber()) 43 | .pipe(plugins.mocha({ 44 | reporters: 'spec' 45 | })); 46 | }); 47 | 48 | gulp.task('watch', ['dev:mocha'], function () { 49 | gulp.watch(paths.js.concat(paths.jsTests), ['dev:eslint', 'dev:mocha']) 50 | .on('error', e => console.error(e)); 51 | }); 52 | 53 | gulp.task('development', defaultTasks); -------------------------------------------------------------------------------- /test/scraper-test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var linkedinScraper = require('../'), 3 | should = require('should'); 4 | 5 | describe('', function() { 6 | this.timeout(10000); 7 | 8 | var url = 'https://br.linkedin.com/in/danieljoppi'; 9 | 10 | function validateProfile(profile) { 11 | should(profile).be.ok(); 12 | should(profile).have.property('name', 'Daniel Henrique Joppi'); 13 | should(profile).have.property('headline', 'Software Architect na Rospo GeoTech'); 14 | should(profile).have.property('location', 'Florianópolis, Santa Catarina, Brazil'); 15 | should(profile).have.property('summary').be.ok(); 16 | should(profile).have.property('industry').be.equal('Program Development'); 17 | should(profile).have.property('languages').length(5); 18 | should(profile).have.property('skills').length(45); 19 | should(profile).have.property('currentPositions').length(3); 20 | should(profile).have.property('pastPositions').length(3); 21 | should(profile).have.property('educations').length(1); 22 | should(profile).have.property('positions').length(8); 23 | //should(profile).have.property('honors').length(0); 24 | //should(profile).have.property('projects').length(0); 25 | should(profile).have.property('publicProfileUrl', url); 26 | } 27 | 28 | it('get profile with callback', function(done) { 29 | linkedinScraper(url, function (err, profile) { 30 | //console.log(JSON.stringify(profile, null, ' ')); 31 | 32 | should(err).be.not.ok(); 33 | 34 | validateProfile(profile); 35 | return done(); 36 | }); 37 | }); 38 | 39 | it('get profile with promise', function(done) { 40 | linkedinScraper(url).then(function (profile) { 41 | validateProfile(profile); 42 | return done(); 43 | }); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /gulp/test.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var file, msg = '**/*-test.js'; 4 | process.argv.forEach(function (val, index, array) { 5 | if (val === '-file' || val === '--f') { 6 | let env_val = array[index + 1]; 7 | msg = '**/*' + env_val+ '*-test.js'; 8 | file = env_val; 9 | } 10 | }); 11 | 12 | var gulp = require('gulp'), 13 | gulpLoadPlugins = require('gulp-load-plugins'), 14 | plugins = gulpLoadPlugins(), 15 | appRoot = process.cwd(), 16 | paths = { 17 | js: [ 18 | appRoot + '/index.js', 19 | appRoot + '/lib/**/*.js' 20 | ] 21 | }; 22 | var defaultTasks = ['env:test', 'test:eslint', 'test:coverage']; 23 | 24 | gulp.task('env:test', function () { 25 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'; 26 | process.env.NODE_ENV = 'test'; 27 | process.env.running_under_istanbul = true; 28 | console.log('use => load tests: ', msg); 29 | }); 30 | 31 | gulp.task('test:eslint', function () { 32 | return gulp.src(paths.js) 33 | .pipe(plugins.eslint()) 34 | .pipe(plugins.eslint.format()) 35 | .pipe(plugins.eslint.failAfterError()); 36 | }); 37 | 38 | gulp.task('test:coverage', ['test:eslint'], function () { 39 | let executeTests = function () { 40 | let path = '/test/**/*' + (file ? file + '*' : '') + '-test.js'; 41 | gulp.src([appRoot + path]) 42 | .pipe(plugins.mocha({ 43 | reporters: 'spec' 44 | })) 45 | .pipe(plugins.istanbul.writeReports({ 46 | reports: ['lcovonly'] 47 | })); // Creating the reports after tests runned 48 | }; 49 | 50 | // instrumentation 51 | gulp.src(paths.js) 52 | .pipe(plugins.istanbul({ 53 | includeUntested: true 54 | 55 | })) // Covering files 56 | .pipe(plugins.istanbul.hookRequire())// Force `require` to return covered files 57 | .on('finish', () => executeTests()); 58 | 59 | }); 60 | 61 | gulp.task('test', defaultTasks); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # node-linkedin-scraper2 [![NPM version][npm-image]][npm-url] 2 | [![Build Status][travis-image]][travis-url] [![Coveralls Status][coveralls-image]][coveralls-url] 3 | 4 | A simple LinkedIn profile scraper for nodejs, based on original [linkedin-scraper](https://github.com/aadisriram/nodejs-linkedin-scraper). 5 | 6 | ## Install [![Dependency Status][david-image]][david-url] [![devDependency Status][david-image-dev]][david-url-dev] 7 | ```bash 8 | npm install linkedin-scraper2 --save 9 | ``` 10 | 11 | ### Usage 12 | 13 | #### with callback function 14 | ```javascript 15 | // Scrape a linkedin profile for the public contents 16 | var linkedinScraper = require('linkedin-scraper'); 17 | var url = 'https://www.linkedin.com/in/[user]'; 18 | 19 | linkedinScraper(url, function(err, profile) { 20 | if (err) { 21 | console.log(err); 22 | } else { 23 | console.log(profile); 24 | } 25 | }); 26 | ``` 27 | 28 | #### with promise 29 | ```javascript 30 | // Scrape a linkedin profile for the public contents 31 | var linkedinScraper = require('linkedin-scraper'); 32 | var url = 'https://www.linkedin.com/in/[user]'; 33 | 34 | linkedinScraper(url) 35 | .then(function(profile) { 36 | console.log(profile); 37 | }) 38 | .catch(function(err) { 39 | console.log(err); 40 | }); 41 | ``` 42 | 43 | #### output 44 | 45 | http://pastebin.com/629RHwTa 46 | 47 | [npm-url]: https://npmjs.org/package/linkedin-scraper2 48 | [npm-image]: http://img.shields.io/npm/v/linkedin-scraper2.svg 49 | 50 | [travis-url]: https://travis-ci.org/danieljoppi/node-linkedin-scraper2 51 | [travis-image]: https://img.shields.io/travis/danieljoppi/node-linkedin-scraper2.svg 52 | 53 | [coveralls-url]: https://coveralls.io/r/danieljoppi/node-linkedin-scraper2 54 | [coveralls-image]: http://img.shields.io/coveralls/danieljoppi/node-linkedin-scraper2/master.svg 55 | 56 | [david-url]: https://david-dm.org/danieljoppi/node-linkedin-scraper2 57 | [david-image]: https://david-dm.org/danieljoppi/node-linkedin-scraper2.svg 58 | 59 | [david-url-dev]: https://david-dm.org/danieljoppi/node-linkedin-scraper2#info=devDependencies 60 | [david-image-dev]: https://david-dm.org/danieljoppi/node-linkedin-scraper2/dev-status.svg -------------------------------------------------------------------------------- /lib/profile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var companyFactory = require('./company'), 3 | experienceFactory = require('./experience'), 4 | //Honors = require('./honors'), 5 | //Project = require('./projects'), 6 | educationFactory = require('./education'), 7 | languageFactory = require('./language'); 8 | 9 | module.exports = function(window) { 10 | if (!window) { 11 | return {}; 12 | } 13 | var $ = window.$; 14 | var profile = { 15 | name: $('#name').text(), 16 | pictureUrl: (function(img) { 17 | return img.attr('src') || img.attr('data-delayed-url'); 18 | })($('.profile-picture a .photo')), 19 | headline: $('p[data-section=headline]').text(), 20 | location: $('span.locality').text(), 21 | summary: $('#summary .description').text(), 22 | industry: $('[class=descriptor]', '#demographics').text(), 23 | currentPositions: [], 24 | pastPositions: [], 25 | websites: [], 26 | positions: [], 27 | //honors: [], 28 | //projects: [], 29 | educations: [], 30 | skills: [], 31 | languages: [] 32 | }; 33 | 34 | $('[data-section=currentPositions] td ol li').each(function () { 35 | profile.currentPositions.push(companyFactory($(this))); 36 | }); 37 | $('[data-section=pastPositions] td ol li').each(function () { 38 | profile.pastPositions.push(companyFactory($(this))); 39 | }); 40 | 41 | $('#experience .positions .position').each(function () { 42 | profile.positions.push(experienceFactory($(this))); 43 | }); 44 | //$('#background-honors div div div').each(function () { 45 | // profile.honors.push(new Honors($(this).find('h4 span').text(), 46 | // $(this).find('h5 span').text(), 47 | // $(this).find('> span').text(), 48 | // $(this).find('p').text() 49 | // )); 50 | //}); 51 | // 52 | //$('#background-projects div div').each(function () { 53 | // profile.projects.push(new Project($(this).find('hgroup h4 a span:first').text(), 54 | // $(this).find('> span.projects-date').text(), 55 | // $(this).find('> p').text(), 56 | // $(this).find('hgroup h4 a[href]').attr('href') 57 | // )); 58 | //}); 59 | 60 | $('#education .schools .school').each(function () { 61 | profile.educations.push(educationFactory($(this))); 62 | }); 63 | 64 | $('#skills .skill a').each(function () { 65 | profile.skills.push($(this).text()); 66 | }); 67 | 68 | $('#languages .language .wrap').each(function () { 69 | profile.languages.push(languageFactory($(this))); 70 | }); 71 | 72 | return profile; 73 | }; 74 | --------------------------------------------------------------------------------