├── .gitignore ├── LICENSE ├── README.md ├── app.js ├── compiled ├── .gitkeep ├── ErnaniJoppert P Martins.docx.json ├── resume.html.json └── resume.txt.json ├── docs ├── instructions.doc └── result.png ├── gulpfile.js ├── package-lock.json ├── package.json ├── parsed └── .gitkeep ├── public ├── ErnaniJoppert P Martins.docx ├── resume.html └── resume.txt └── src ├── ParseBoy.js ├── Resume.js ├── SomeHR.js ├── Speakable.js ├── dictionary.js └── libs ├── parser.js └── processing.js /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Lizurchik Alexey 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IMPORTANT NOTICE 2 | 3 | Library currently is not actively maintained, but I still read all the issues and try to give directions of their solving. 4 | 5 | I believe during that year I will find time to fix existing issues and make it more library-like rather then application for fun (like it is for now). 6 | 7 | If you want to collaborate and solve some existing issues, there is a branch `ce` (community edition) where I merge community's PRs. 8 | 9 | Don't miss a chance to make `resume-parser` great again :) 10 | 11 | Also, there is a [project](https://github.com/perminder-klair/resume-parser) based on my resume-parser, which solved most of issues and published it's solution to npm. I believe it's more stable then current version of `resume-parser`. 12 | 13 | # Resume parser 14 | Solution for [Code4Goal - Coding Contest](http://app.crowdsourcehire.com/code4goal/) 15 | 16 | Authored and maintained by Lizurchik Alexey, 2015 17 | 18 | # The Problem 19 | 20 | Often Companies have problems with sorting out large volumes of CVs / Resumes advertising for their job roles. In order to minimise their time in sorting out and have a benchmark way of comparing candidates, you've been tasked with the challenging task of assisting their problem. 21 | 22 | # Contest 23 | 24 | Develop a parser that is able to parse through CVs / Resumes in the word (.doc or .docx) / RTF / TXT / PDF / HTML format to extract the necessary information in a predefined JSON format. If the CVs / Resumes contain any social media profile links then the solution should also parse the public social profile web-pages and organize the data in JSON format (e.g. Linkedin public profile, Github, etc.) 25 | 26 | # Solution 27 | 28 | This Resume parser can run throught unlimited number of Resumes and get relevant info from that. With full-feature installation it supports most of the common use formats, provided by [textract](https://github.com/dbashford/textract): 29 | 30 | - HTML 31 | - PDF 32 | - DOC 33 | - RTF 34 | - DOCX 35 | - XLS 36 | - PPTX 37 | - DXF 38 | - PNG 39 | - JPG 40 | - GIF 41 | - application/javascript 42 | - All text/* mime-types. 43 | 44 | # Pre-Requirements 45 | Current solution tested on Windows 7 x64 Maximum (with [babun shell](http://babun.github.io/)), but it also may run on OSX, Linux. Application is hard dependend on text extracting library [textract](https://github.com/dbashford/textract). 46 | 47 | # Fast install 48 | 49 | Project is nodejs cli application with some dependencies. If you already have installed copy of nodejs, you can just clone this repo and run `npm install`: 50 | 51 | git clone git@github.com:likerRr/code4goal-resume-parser.git 52 | npm install 53 | 54 | # Step-by-step fresh installation 55 | 56 | - First, go to [nodejs](http://nodejs.org/) site, download and setup it for you platform 57 | - Then, clone this repo `git clone git@github.com:likerRr/code4goal-resume-parser.git` 58 | - Run `npm install` in terminal from root folder of project to setup dependencies 59 | - At this moment application will work fine, but! By default it supports only `.TXT` and `.HTML` text formats. For better performance you should install at least support of `.PDF` (and `.DOC`). Here is instructions, how to do it from [textract README](https://github.com/dbashford/textract#requirements) file: 60 | - `PDF` extraction requires `pdftotext` be installed, [link](http://www.foolabs.com/xpdf/download.html) 61 | - `DOC` extraction requires `catdoc` be installed, [link](http://www.wagner.pp.ru/~vitus/software/catdoc/), unless on OSX in which case textutil (installed by default) is used. 62 | - `DOCX` extraction requires `unzip` be available (e.g. `sudo apt-get install unzip` for Ubuntu) 63 | 64 | > Please, note, that it's not necessary install support of all formats but preferably. As for me, I didn't get setup `catdoc` for `.DOC` files under Windows 7, so I played only with `.TXT`, `.HTML`, `.PDF` formats, but I know, it will also work with the rest formats :) 65 | 66 | # Run 67 | 68 | When you finish installation it's time to run application. Just put some Resume files to `/public` (it already has 3 for tests) directory and run in terminal `node app.js` from project's root. Then you can access JSONed results in `/compiled` folder (all file there will represent JSON string of parsed data. 69 | 70 | Execution presents as dialog between `HR manager`, that has a lot of Resume to work with, and `ParseBoy`, who volunteered to help with it, i thought that it should have some fun. 71 | 72 | # How it works 73 | 74 | Base principle on how parser works, based on dictionary of rules of how to handle Resume file. So we have `/src/dictionary.js` file, where all rules places. It represents javascript object with the following structure: 75 | 76 | { 77 | titles: {}, 78 | profiles: [], 79 | inline: {}, 80 | regular: {} 81 | } 82 | 83 | All of these keys `titles`, `profiles`, `inline`, `regular` are converted to regular expressions, that handled by specific conditions: 84 | 85 | - `titles` - fires on each row of file. If string matches title, so it will capture all text between current title and next title except current. For example we have such dictionary file: 86 | 87 | { 88 | titles: { 89 | // values are the signs of the key that possibly may appears in the Resume 90 | objective: ['objective', 'objectives'], 91 | summary: ['summary'], 92 | } 93 | } 94 | 95 | And next Resume text is: 96 | 97 | > OBJECTIVE 98 | > 99 | > Seeking a challenging position to use my software Web development and process optimization skills. 100 | > 101 | > SUMMARY 102 | > 103 | > I worked on a wide range of products including building advanced dynamic multi language web sites, internal and external API's, well as creating new internal workflows. 104 | 105 | If we now run application it will go through next Application Loop (AL): 106 | 107 | - Remove unnecessary Resume file from any \n\r\t and trim all lines 108 | - Compile rules to regular expressions 109 | - Split file into lines, delimited by \n 110 | - Check each line for a match for each title rules 111 | - When match found, parse text between current title and next title into `titles` or until EOF 112 | - Save parsed text (if found) under title key (`objective` or (and) `summary`) 113 | 114 | So, according to this loop in the end we will have following JSON file: 115 | 116 | { 117 | objective: 'Seeking a challenging position to use my software Web development and process optimization skills.' 118 | summary: 'I worked on a wide range of products including building advanced dynamic multi language web sites, internal and external API's, well as creating new internal workflows.' 119 | } 120 | 121 | 122 | - `profiles` - fires on each row of file. If profile rule represent an array, so first key will be the name of key and second key will be an handler. If profile rule just a string, parser will try to found matched url without parsing it. Example: 123 | 124 | profiles: [ 125 | ['github.com', function(url, Resume, profilesWatcher) { 126 | download(url, function(data, err) { 127 | if (data) { 128 | var $ = cheerio.load(data), 129 | fullName = $('.vcard-fullname').text(), 130 | location = $('.octicon-location').parent().text(), 131 | mail = $('.octicon-mail').parent().text(), 132 | link = $('.octicon-link').parent().text(), 133 | clock = $('.octicon-clock').parent().text(), 134 | company = $('.octicon-organization').parent().text(); 135 | 136 | Resume.addObject('github', { 137 | name: fullName, 138 | location: location, 139 | email: mail, 140 | link: link, 141 | joined: clock, 142 | company: company 143 | }); 144 | } else { 145 | return console.log(err); 146 | } 147 | //profilesInProgress--; 148 | profilesWatcher.inProgress--; 149 | }); 150 | }], 151 | 'stackoverflow.com' 152 | ], 153 | 154 | It looks quite a big, but very flexible. 155 | 156 | So here we can see, that profiles contains two rules: `github.com` with callback and `stackoverflow.com`. When profile rule enters Application Loop (AL) and it has valid callback, so it will try to request profile page from Internet and parse data on requested page, according to rules in callback. Then it places all data into `Resume` object under the represented key (`github` in out case). If rule is just a string and it meets match in AL row, so it simple puts profile link to `profile` key in `Resume` object. 157 | 158 | - `inline` - fires on each row of file. It converts to regular expression, that matches all data after that: 159 | 160 | `expr+":?[\\s]*(.*)"` 161 | 162 | Example: 163 | 164 | inline: { 165 | skype: 'skype' 166 | }, 167 | 168 | Text: 169 | 170 | > skype: sweet-liker 171 | 172 | Result will be `skype` key with `sweet-liker` value in `Resume` object. So it can be extended with simple lines of data, e.g. `address` or `first name` or whatever. 173 | 174 | > Note, that these rules are unreliable, cause can touch sensitive data from context, e.g. "I don't have a skype, but I have IM". After parsing that string data in `Resume` will be as key `skype` and value `but I have IM`. So use on your own risk. 175 | 176 | - `regular` - fires on full data of file. It just search the first matches by regular expression, e.g: 177 | 178 | regular: { 179 | name: [ 180 | /([A-Z][a-z]*)(\s[A-Z][a-z]*)/ 181 | ], 182 | email: [ 183 | /([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})/ 184 | ], 185 | phone: [ 186 | /((?:\+?\d{1,3}[\s-])?\(?\d{2,3}\)?[\s.-]?\d{3}[\s.-]\d{4,5})/ 187 | ] 188 | } 189 | 190 | Will try find `name`, `email`, `phone` by expression sign. 191 | 192 | # Generic format 193 | This solution hasn't generic output format of JSON string, cause it filled if rule in dictionary match the condition. So, the full possible data, that may be extracted from Resume may have such format: 194 | 195 | { 196 | objective: '', 197 | summary: '', 198 | technology: '', 199 | experience: '', 200 | education: '', 201 | skills: '', 202 | languages: '', 203 | cources: '', 204 | projects: '', 205 | links: '', 206 | contacts: '', 207 | positions: '', 208 | profiles: '', 209 | awards: '', 210 | honors: '', 211 | additional: '', 212 | certification: '', 213 | interests: '', 214 | github: { 215 | name: '', 216 | location: '', 217 | email: '', 218 | link: '', 219 | joined: '', 220 | company: '' 221 | }, 222 | linkedin: { 223 | summary: '', 224 | name: '', 225 | positions: [], 226 | languages: [], 227 | skills: [], 228 | educations: [], 229 | volunteering: [], 230 | volunteeringOpportunities: [] 231 | }, 232 | skype: '', 233 | name: '', 234 | email: '', 235 | phone: '' 236 | } 237 | 238 | # Extending 239 | All 'action' are by building `dictionary.js` file. For now it has only basics rules, that I met while develop this solution, but it's very flexible (although a bit complicated) and extensible. Just put your rule according to existing and following main principles and enjoy! 240 | 241 | # Vocabulary 242 | - `Resume` object is a place, where all parsed data saves. After parsing whole document it will stringify to JSON and save on into `/compile` folder. 243 | - AL - Application Loop: 244 | - Remove unnecessary Resume file from any \n\r\t and trim all lines 245 | - Compile rules to regular expressions (under hood) 246 | - Split file into lines, delimited by \n 247 | - Check each line for a match for each title rules 248 | - When match found, parse text between current title and next title into `titles` or until EOF 249 | - Save parsed text (if found) under title key (`objective` or (and) `summary`) 250 | 251 | # Technologies / References 252 | Application built on javascript with [nodejs 0.10.31](http://nodejs.org/) under Windows 7 x64 253 | This [application on github](https://github.com/likerRr/code4goal-resume-parser) 254 | 255 | Dependencies are: 256 | - [cheerio](https://github.com/cheeriojs/cheerio) 257 | - [colors](https://github.com/Marak/colors.js) 258 | - [mime](https://github.com/broofa/node-mime) 259 | - [request](https://github.com/request/request) 260 | - [textract](https://github.com/dbashford/textract) 261 | - [underscore](https://github.com/jashkenas/underscore) 262 | 263 | # In action 264 | ![In action](/docs/result.png?raw=true "In action") -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var SomeHR = require('./src/SomeHR')(); 3 | require('colors'); 4 | 5 | console.log('Please, wait 2 sec to skip warnings'.bgRed.black); 6 | setTimeout(main, 2000); 7 | 8 | function main() { 9 | console.log('------------------------------------------------------------------------------------'.bgBlue.blue); 10 | console.log(' Somewhere in big & cool company... '.bgBlue.gray); 11 | console.log('------------------------------------------------------------------------------------'.bgBlue.blue); 12 | console.log(''); 13 | var getFileNames = function (filePaths) { 14 | return filePaths.map(function (file) { 15 | return path.basename(file); 16 | }).join(', '); 17 | }; 18 | 19 | var pack = __dirname + '/public'; 20 | SomeHR.iHaveCVPack(pack, function (err, files) { 21 | var Iam = this, 22 | ParseBoy, 23 | savedFiles = 0; 24 | 25 | if (err) { 26 | return Iam.explainError(err); 27 | } 28 | if (!files.length) { 29 | return Iam.nothingToDo(); 30 | } 31 | 32 | SomeHR.say('My stack for today are: ' + getFileNames(files)); 33 | /** @type {ParseBoy} */ 34 | ParseBoy = Iam.needSomeoneToSortCV(); 35 | 36 | ParseBoy.willHelpWithPleasure(files, function (PreparedFile) { 37 | ParseBoy.say('I\'m working with "' + PreparedFile.name + '" now'); 38 | ParseBoy.workingHardOn(PreparedFile, function (Resume) { 39 | ParseBoy.say('I got Resume for ' + PreparedFile.name + ', now saving...'); 40 | ParseBoy.storeResume(PreparedFile, Resume, __dirname + '/compiled', function (err) { 41 | if (err) { 42 | return ParseBoy.explainError(err); 43 | } 44 | 45 | ParseBoy.say('Resume ' + PreparedFile.name + ' saved'); 46 | savedFiles += 1; 47 | 48 | if (savedFiles == files.length) { 49 | ParseBoy.say('I finished! Please, check "/compile" folder where you can find each parsed profile in JSON'); 50 | SomeHR.say('Thank you a lot! I can\'t even imagine, what would I do without your help, ParseBoy!'); 51 | ParseBoy.say('You are welcome, have a nice day!'); 52 | 53 | console.log(''); 54 | console.log('------------------------------------------------------------------------------------'.bgBlue.blue); 55 | console.log(' The End! '.bgBlue.gray); 56 | console.log('------------------------------------------------------------------------------------'.bgBlue.blue); 57 | } 58 | }) 59 | }); 60 | }); 61 | }); 62 | } -------------------------------------------------------------------------------- /compiled/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/likerRr/code4goal-resume-parser/55d532325feb4ab3f990fe320991346d40fb5109/compiled/.gitkeep -------------------------------------------------------------------------------- /compiled/ErnaniJoppert P Martins.docx.json: -------------------------------------------------------------------------------- 1 | {"name":"Ernani Joppert","summary":"My professional experience is related to software development, architecture, design and performance tuning. I've also worked on data warehousing projects and business intelligence projects using commercial products such as PowerCenter and PowerDesigner. I am involved into JEE (former J2EE) Architecture, .Net, Flex, AIR, PHP Web based solutions and Mobile. Also develop on client side and webservices. I've worked on health, banking and insurance and EAI projects, along as with an internet solution provider, which tended to provide entertainment to end customers through quality of life, news, games, etc. I've worked abroad in Germany,\nto a small software development company in a Car insurance and Car Leasing Configuration System, using EuroTAX/Schwacke data model, Struts and Java Server Faces presentation Tier, along with Persistence using Hibernate framework and IBatis. My professional goals are to work abroad, acquire experience living and working in different countries and expand my knowledge as well as challenges. Specialties: JEE (former J2EE) framework, Java, JSP, JSF, SOAP, Axis, PHP, XML, SAML, Rest, Websphere, Jboss, BEA Weblogic, Tomcat, Struts, Eclipse, SWT, Swing, Hibernate, AMF, Flex, Adobe AIR, ActionScript development as well as DB2, Oracle, MySQL, SQL Server and other development solutions to achieve customer needs.","experience":"Application Integration / Web Identity at IBM Global Services\nJanuary 2005 - Present (10 years 2 months)\nTechnical consultant for Web Identity project at IBM Global Services, support and manage application integration with SSO (Tivoli Access Manager/WebSEAL) and custom based SOA Profiling for internal and external adopters, Business Partners profiling, authentication and entitlement, SAAJ and Federation also involved.\n5 recommendations available upon request\nPHP Consultant at PHPSP - PHP User Group in São Paulo\n2001 - Present (14 years)\nFounded the php-sp list on yahoo groups by answering their questions, also contributed to the PHP group fixing a few bugs as well as the PHP documentation translation to Brazilian Portuguese, attended some events and tried to help the users to share their knowledge. Also done some free lances here and there related to PHP, such as Wordpress setup, a blog commenting system and OSCommerce personalization.\n1 recommendation available upon request\nOpen Source App Development (Community Developer) at Spare Time Open Source Developer\n2009 - 2011 (2 years)\nIndependant Sr. Java EE/J2SE Consultant at Java/JEE Architect / Consultant\n1999 - 2010 (11 years)\nProvided several development solutions using the Java2SE and JEE technologies, both as an Architect Role as well as a lead developer. The variations included Web Applications, j2me applications (mobile), desktop applications using SWT, Swing and AWT, Unit Testing and Batch Applications to run at scheduled periods of time to process loads of information with or without JDBC, JMS, RMI, IIOP, JBoss and MQ systems.\nBusiness Intelligence/ETL Developer at BI and DW Consultant\n1999 - 2009 (10 years)\nProvided solutions and adjustments on several DW projects, ensuring Data Quality through ETL and BI concepts both based on relational data models and OLAP, using the common Data Warehousing techniques such as SnowFlake and StarSchema, Ad Hoc Queries and Data Quality in small and big data sets.\nWebsphere specialist at Websphere/Java Consultant\n2004 - 2004 (less than a year)\nProvided Advices on Java Development, training, recommendations, advices, articles and suggestions to a wide variety of friends and co-workers alike\n1 recommendation available upon request\nSystem Architect and J2EE Developer at Savista Corporation (former NewPOS International)\n2002 - 2004 (2 years)\nI've adjusted an existing reporting tool that was responsible for providing a KPI reporting, Fraud detection and other features for Domino Pizza in US. After that, transitioned to the NewPOS team which I've assisted in the development of the Java/J2EE Version of the POS at the version 8.X, also worked in a TDD environment, multi-threading Java Development and Raw TCP/IP communication mechanism.\n1 recommendation available upon request\nJava Software Architect (Consultant) at Ericsson\n2002 - 2002 (less than a year)\nDeveloped a integration between Ericsson's EHTP Setler for Interconnection and Brazil's tax rules to provide a way to calculate interconnection fees based off on call types. Worked with BMP (Billing Mediation Platform) with TCL/TK and C/C++ for file processing of CDR (Call Data Records) Also worked on a Java2 solution to address fixes of CDR's. Also worked on a project where it had the ability to process measurement of usage and critical analysis and fraud detection of measured services on telephony.\nSenior Web Developer at Internet Group do Brasil\n2000 - 2001 (1 year)\n1 recommendation available upon request\nWeb Developer at iG\n2000 - 2001 (1 year)\nI was responsible for the Development and Improvement of Árvore da Vida website, and then transitioned to the Software Factory Team which began developing paid services, such as numerology reports, astrology maps, and other paid services alike.\n1 recommendation available upon request\nTechnical Support and Intranet Software Development at Sudameris Corretora de Cambio e Valores Mobiliários\n1996 - 2000 (4 years)\nProvided User Technical Support, Intranet web development, Home Broker web development with Java Applets, Ticket Emission System using MS Visual Basic, Lotus Notes and Network Administration.\n1 recommendation available upon request","languages":"English","skills":"& Expertise\nJSP\nEclipse Tomcat Hibernate Struts MySQL\nJava JSF SOAP\nWebsphere Flex\nJBoss XML PHP\nSwing AMFPHP SWT\nAxis EJB J2EE\nWeb Services UML\nScrum AJAX\nDesign Patterns JavaScript SAML\nSubversion Spring PL/SQL\nDelphi CVS\nServlets jQuery Ant Apache JSON\nLinux SOA\nMaven DB2 REST CSS\nSQL Server Weblogic C++\nGit J2MEJSP\nEclipse Tomcat Hibernate Struts MySQL\nJava JSF SOAP\nWebsphere Flex\nJBoss XML PHP\nSwing AMFPHP SWT\nAxis EJB J2EE\nWeb Services UML\nScrum AJAX\nDesign Patterns JavaScript SAML\nSubversion Spring PL/SQL\nDelphi CVS\nServlets jQuery Ant Apache JSON\nLinux SOA\nMaven DB2 REST CSS\nSQL Server Weblogic C++\nGit J2ME","objective":"-C\nJava Enterprise Edition","education":"Universidade Paulista\nBachelor of Business Administration (B.B.A.), Business Administration and Management, General, 1999 - 2001","interests":"Java, J2EE, JEE, technology, web development, multi tier software architecture, entrepreneurship, Hibernate, Adobe Air, Adobe Flex, MySQL, PHP, J2ME, C, C++, SQL, DB2, Oracle, MySQL, Gaming, XNA Framework\nErnani Joppert P Martins\nSoftware Engineer / Lead Architect\n11 people have recommended Ernani\n\"One of the greatest web developer I have had opportunity to work with at IBM. Learning is just you can get from him, excellent knowledge, responsible, excellent developer and excellent person. During several years we have worked together, Always friendly, focused and compromise describe him. Ernani is a great\nelement, any team or enterprise will be proud to have such a great developer, he talks by its own work but, he is strongly recommended for any challenges, leading teams. And as a great person and friend.\"\n— Gabriel G., IT SPECIALIST, IBM, worked directly with Ernani at IBM Global Services\n\"This is the greatest web developer I have had opportunity to manage among many at IBM. It is difficult to scope Ernani's qualities, só many are they só I will point some such as *deep knowledge *fast execution\n*innovative problem solving approach *personal commitment *drive to achieve. One day, one of my Java development teams were fighting to deliver a large piece of code they have worked 3 days and nights...\nThe four of them were almost giving up when I suggested Ernani to take a look... He sits, opens The code, inspects it... In 15min he was able to fix it and they met The deadline. This java team was composed of The very best except for Ernani. During The time We havê worked together, I saw many displays of his\ncompetence at global level. Always calm, always friendly, always fixing impossível codes. Ernani programs in every language and understands quickly The most intricate business situtations. I strongly recommend Ernani for ANY development challenges, leading teams.\"\nCarlos G., GBS Application Services - Information & Data Management Leader (DBA Center of Competency Mgr), IBM, managed Ernani at IBM Global Services\n\"Working together with Ernani 5 years ago made me only have good recommendations about him. Ernani is a committed professional that holds great technical skills and problem solving ability in whatever he works on. Meanwhile, his friendly nature makes it easy to work in a team. It would be a pleasure to work with him again.\"\nJuliana R., Technical Consultant, IBMWeb Identity AIT, worked directly with Ernani at IBM Global Services\n\"Ernani is one of the most talented technical people I have worked. He is focused and full of bright ideas. His abilities are not constrained only in the technical field but also to address business needs with revolutionary\nsolutions either technically enabling or improving processes. I strongly recommend him as an extremely creative and motivated professional.\"\nFábio H., Info & Data Mgmt CoC Manager, IBM, worked directly with Ernani at IBM Global Services\n\"Ernani is a very intelligent guy. I remember several discussions we had about a lot of issues, about architectures, about ideas, about implementing systems. He is very smart, and if you ask him something, he can give you very interesting ideas. I know I can count on him if I need. I recommend Ernani and I hope to work with him again in the future.\"\nFernando G., IT Specialist, IBM Brazil, worked directly with Ernani at IBM Global Services\n\"Ernani is a great developer and friend. Always striving to learn more about technology so he can be better prepared for greater technical challenges. It is always good to count with the collaboration and technical feedback of such a qualified developer like Ernani.\"\nManuel L., Owner, PHP Classes repository, worked directly with Ernani at PHPSP - PHP User Group in São Paulo\n\"Uma pessoa que admiro muito. Um profissional singular, completo e dedicado. Faltam adjetivos para poder mensurar sua competência. Recomendadíssimo!\"\nDani K., Social media manager, Garage Interactive Marketing, was with another company when working with Ernani at Websphere/Java Consultant\n\"Ernani well as a great friend is an excellent professional of Technology, always updated, intelligent and patient. I recommend this excellent professional.\"\nAnderson S., Programmer Analyst TCL/C/C++, NewPos International, was a consultant or contractor to Ernani at Savista Corporation (former NewPOS International)\n\"Ernani is a great computer analyst. He self learned some programing languages like Perl, PHP and Java. After iG he started alone some Internet projects like a virtual shopping and a payment gateway. He is serious and focused guy.\"\nSandro E., Developement Manager, iG - Internet Group do Brasil, worked directly with Ernani at Internet Group do Brasil\n\"Ernani is a focused developer, he love techonology and inspires any person to work. He have strong knowlogement in Java plataform, and know web services deeply. At Internet Group (iG) we made some website to support millions of views. I recommend Ernani, he is a great profissional.\"\nSandro E., Developement Manager, iG - Internet Group do Brasil, worked directly with Ernani at iG\n\"O Ernani é uma pessoa determinada e muito competente. Possui conhecimento técnico avançado, capacidade analítica e experiência para gerenciar o alcance eficiente de metas e resultados.\"\nJose Eduardo F., Controladoria e Consultoria, Banco Sudameris de Investimento, was with another company when working with Ernani at Sudameris Corretora de Cambio e Valores Mobiliários\nContact Ernani on LinkedIn\nPage1\nPage1"} -------------------------------------------------------------------------------- /compiled/resume.html.json: -------------------------------------------------------------------------------- 1 | {"name":"Alex Dubinchyk","email":"alexs.dbk@gmail.com","objective":"Seeking a challenging position to use my software Web development and process optimization skills.","summary":"I worked on a wide range of products including building advanced dynamic multi language web sites,\ninternal and external API's, well as creating new internal workflows. My goal is to work in a passionate team,\nthat loves their work and the products they are creating together, supporting, mentoring, optimizing workflows and\ncreating high quality software.\nI'm energenic, solution oriented team-player, constantly learning and growing as a team, and bringing high spirits\nalong with me.","technology":"Server side PHP programming, REST, OPP, MVC, Yii framework.\nSQL Database programming: MySQL, SQL, MSSQL.\nClient-side programming: JavaScript, AJAX, jQuery.\nSmarty Template Engine, HTML5, CSS3.\nWebserver installation and configuration: Apache, Nginx, IIS.\nSource control: SVN/Subversion, GIT.\nPlatforms: Linux, Mac, Windows.","skills":"Server side PHP programming, REST, OPP, MVC, Yii framework.\nSQL Database programming: MySQL, SQL, MSSQL.\nClient-side programming: JavaScript, AJAX, jQuery.\nSmarty Template Engine, HTML5, CSS3.\nWebserver installation and configuration: Apache, Nginx, IIS.\nSource control: SVN/Subversion, GIT.\nPlatforms: Linux, Mac, Windows.","experience":"Actuate http://www.actuate.com/\nFull stack PHP Developer\nSan Manteo, CA. November 2014 - current\nImplemented web service(API, MVC, Php)\nIntroduced Git to the team\nBloomSky http://www.bloomsky.com\nBackend Developer\nSunnyvale, CA. October 2014 - November 2014\nIntegrated APIs (PHP, Python, Django, Nginx)\nSet up PHPUnit and functional testing\nDeploy merge DB script (MSSQL>MySQL)\nRozumsoft LLC, / Telecontact LLC\nhttp://www.telecontact.ru/\nFull stack PHP Developer\nBelarus, Minsk. February 2012 - September 2014\nProgramming modules of dynamically building statistics for quality control assessment\nproject.\nDesigned and developed project quality control assessment that estimated effective work of operators in\ncallcenters from different regions. Includes modules separation mapping based on more 20 users roles(RBAC),\nonline editors(logs, statistical formulas, projects rules and etc) for more 10k clients.\nFinalization coding script of internal protection algorithm authorization and validation.\nProgramming API service for quality control assessment. Interact with user interface(AJAX) with fast load up\nJSON data, audio files and extract large data in excel.\nAPI data exchange integrated with data parse, merge, view in table linkage, send emails. Support more 100\nsource, near 1000 onlineusers, more 10 servers.\nCodes cross-browsers users interfaces in project quality control assessment, using Javascript, jQuery, JSON,\nBootstrap.\nDeveloped JavaScript audio player with individual custom design, hardware acceleration, deceleration and the\norder to play audio files.\nDesigned and developed the company website ( http://www.rozumsoft.com/ ).\nImplemented 3 domain zones(ru/by/com) algorithm.\nCodes contents editor for 3 languages\nFixed and support custom seo map logic.\nDeveloped scripts products to callcenters operators\nDevelopment of сomplex reports and statistical summaries by Cisco data telephony.\nRedesigned and reimplemented projects using MVC approach and strong OOP design\nDesigned and conversion of scripts database, extensive SQL query optimization.\nReal Estate Agency Assistant heals LLC, Full stack PHP Developer\nBelarus, Minsk, http://www.a-h.by ; May 2011 - January 2012\nDynamic website design and programming using PHP, MySQL, HTML, CSS. Setup and administration of web servers\nand server software.\nBusiness consulting of securing/ planning project.\nDevelopment to online marketing, search engine placement and promotion ( http://www.mogu.by ;\nhttp://www.a-h.by ).","education":"Belarusian University of Informatics and Radioelectronics,\nBS in Modeling and computer-aided design of radioelectronics devices."} -------------------------------------------------------------------------------- /compiled/resume.txt.json: -------------------------------------------------------------------------------- 1 | {"name":"Alex Dubinchyk","email":"alexs.dbk@gmail.com","objective":"Seeking a challenging position to use my software Web development and process optimization skills.","summary":"I worked on a wide range of products including building advanced dynamic multi language web sites, internal and external API's, well as creating new internal workflows. My goal is to work in a passionate team, that loves their work and the products they are creating together, supporting, mentoring, optimizing workflows and creating high quality software. I'm energenic, solution oriented team-player, constantly learning and growing as a team, and bringing high spirits along with me.","technology":"Server side PHP programming, REST, OPP, MVC, Yii framework.\nSQL Database programming: MySQL, SQL, MSSQL.\nClient-side programming: JavaScript, AJAX, jQuery.\nSmarty Template Engine, HTML5, CSS3.\nWebserver installation and configuration: Apache, Nginx, IIS.\nSource control: SVN/Subversion, GIT.\nPlatforms: Linux, Mac, Windows.","skills":"Server side PHP programming, REST, OPP, MVC, Yii framework.\nSQL Database programming: MySQL, SQL, MSSQL.\nClient-side programming: JavaScript, AJAX, jQuery.\nSmarty Template Engine, HTML5, CSS3.\nWebserver installation and configuration: Apache, Nginx, IIS.\nSource control: SVN/Subversion, GIT.\nPlatforms: Linux, Mac, Windows.","experience":"Actuate http://www.actuate.com/\nFull stack PHP Developer\nSan Manteo, CA. November 2014 - current\nImplemented web service(API, MVC, Php)\nIntroduced Git to the team\nBloomSky http://www.bloomsky.com\nBackend Developer\nSunnyvale, CA. October 2014 - November 2014\nIntegrated APIs (PHP, Python, Django, Nginx)\nSet up PHPUnit and functional testing\nDeploy merge DB script (MSSQL>MySQL)\nRozumsoft LLC, / Telecontact LLC http://www.telecontact.ru/\nFull stack PHP Developer\nBelarus, Minsk. February 2012 - September 2014\nProgramming modules of dynamically building statistics for quality control assessment project.\nDesigned and developed project quality control assessment that estimated effective work of operators in callcenters from different regions. Includes modules separation mapping based on more 20 users roles(RBAC), online editors(logs, statistical formulas, projects rules and etc) for more 10k clients.\nFinalization coding script of internal protection algorithm authorization and validation.\nProgramming API service for quality control assessment. Interact with user interface(AJAX) with fast load up JSON data, audio files and extract large data in excel.\nAPI data exchange integrated with data parse, merge, view in table linkage, send emails. Support more 100 source, near 1000 onlineusers, more 10 servers.\nCodes cross-browsers users interfaces in project quality control assessment, using Javascript, jQuery, JSON, Bootstrap.\nDeveloped JavaScript audio player with individual custom design, hardware acceleration, deceleration and the order to play audio files.\nDesigned and developed the company website (http://www.rozumsoft.com/).\nImplemented 3 domain zones(ru/by/com) algorithm.\nCodes contents editor for 3 languages\nFixed and support custom seo map logic.\nDeveloped scripts products to callcenters operators\nDevelopment of сomplex reports and statistical summaries by Cisco data telephony.\nRedesigned and reimplemented projects using MVC approach and strong OOP design\nDesigned and conversion of scripts database, extensive SQL query optimization.\nReal Estate Agency Assistant heals LLC, Full stack PHP Developer\nBelarus, Minsk, http://www.a-h.by; May 2011 - January 2012\nDynamic website design and programming using PHP, MySQL, HTML, CSS. Setup and administration of web servers and server software.\nBusiness consulting of securing/ planning project.\nDevelopment to online marketing, search engine placement and promotion (http://www.mogu.by; http://www.a-h.by).","education":"Belarusian University of Informatics and Radioelectronics,\nBS in Modeling and computer-aided design of radioelectronics devices.\nskype: cool-skype-id","skype":"cool-skype-id"} -------------------------------------------------------------------------------- /docs/instructions.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/likerRr/code4goal-resume-parser/55d532325feb4ab3f990fe320991346d40fb5109/docs/instructions.doc -------------------------------------------------------------------------------- /docs/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/likerRr/code4goal-resume-parser/55d532325feb4ab3f990fe320991346d40fb5109/docs/result.png -------------------------------------------------------------------------------- /gulpfile.js: -------------------------------------------------------------------------------- 1 | var gulp = require('gulp'), 2 | run = require('gulp-run'), 3 | gutil = require('gulp-util'), 4 | spawn = require('child_process').spawn, 5 | node; 6 | 7 | /** 8 | * $ gulp server 9 | * description: launch the server. If there's a server already running, kill it. 10 | */ 11 | gulp.task('server', function () { 12 | // set DEBUG=* 13 | // set DEBUG=socket.io* 14 | if (node) node.kill(); 15 | //console.log('-f "' + __dirname + '/resume/file.txt' + '"'); 16 | node = spawn('node', ['app.js', '-f', '"'+__dirname+'/public/resume.txt"'], {stdio: 'inherit'}); 17 | node.on('close', function (code) { 18 | if (code === 8) { 19 | gutil.log('Error detected, waiting for changes...'); 20 | } 21 | }); 22 | }); 23 | 24 | gulp.task('default'); 25 | 26 | /** 27 | * $ gulp 28 | * description: start the development environment 29 | */ 30 | gulp.task('dev', ['server'], function () { 31 | gulp.watch(['app.js', './src/**/**'], ['server']); 32 | }); 33 | 34 | // clean up if an error goes unhandled. 35 | process.on('exit', function () { 36 | if (node) node.kill() 37 | }); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "resume-parser", 3 | "version": "1.0.0", 4 | "description": "CV parser for Code4goal challenge", 5 | "main": "app.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "node app.js" 9 | }, 10 | "author": "Alexey Lizurchik ", 11 | "license": "MIT", 12 | "devDependencies": { 13 | "gulp": "^3.9.1", 14 | "gulp-run": "^1.6.6", 15 | "gulp-util": "^1.0.0" 16 | }, 17 | "dependencies": { 18 | "cheerio": "^1.0.0-rc.12", 19 | "colors": "^1.0.3", 20 | "mime": "^1.2.11", 21 | "request": "^2.53.0", 22 | "textract": "^2.5.0", 23 | "underscore": "^1.7.0", 24 | "unzip": "^0.1.11", 25 | "update": "^0.4.2", 26 | "util": "^0.12.5" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /parsed/.gitkeep: -------------------------------------------------------------------------------- 1 | This folder just for debugging purposes -------------------------------------------------------------------------------- /public/ErnaniJoppert P Martins.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/likerRr/code4goal-resume-parser/55d532325feb4ab3f990fe320991346d40fb5109/public/ErnaniJoppert P Martins.docx -------------------------------------------------------------------------------- /public/resume.html: -------------------------------------------------------------------------------- 1 |
2 |

3 | 4 | README.md 5 |

6 | 7 |

Alex Dubinchyk
8 | Pleasant Hill, CA
9 | alexs.dbk@gmail.com
10 | Status:Green Card Holder 11 |

12 | 13 |
14 | OBJECTIVE
18 | 19 |

Seeking a challenging position to use my software Web development and process optimization skills.

20 | 21 |
22 | SUMMARY
26 | 27 |

  I worked on a wide range of products including building advanced dynamic multi language web sites, 28 | internal and external API's, well as creating new internal workflows. My goal is to work in a passionate team, 29 | that loves their work and the products they are creating together, supporting, mentoring, optimizing workflows and 30 | creating high quality software. 31 | I'm energenic, solution oriented team-player, constantly learning and growing as a team, and bringing high spirits 32 | along with me.

33 | 34 |
35 | Technology
39 | 40 |
    41 |
  • Server side PHP programming, REST, OPP, MVC, Yii framework.
  • 42 |
  • SQL Database programming: MySQL, SQL, MSSQL.
  • 43 |
  • Client-side programming: JavaScript, AJAX, jQuery.
  • 44 |
  • Smarty Template Engine, HTML5, CSS3.
  • 45 |
  • Webserver installation and configuration: Apache, Nginx, IIS.
  • 46 |
  • Source control: SVN/Subversion, GIT.
  • 47 |
  • Platforms: Linux, Mac, Windows.
  • 48 |
49 | 50 |
51 | EXPERIENCE
55 | 56 |

Actuate http://www.actuate.com/
57 | Full stack PHP Developer
58 | San Manteo, CA. November 2014 - current

59 | 60 |
    61 |
  • Implemented web service(API, MVC, Php)
  • 62 |
  • Introduced Git to the team
  • 63 |
64 | 65 |

BloomSky http://www.bloomsky.com
66 | Backend Developer
67 | Sunnyvale, CA. October 2014 - November 2014

68 | 69 |
    70 |
  • Integrated APIs (PHP, Python, Django, Nginx)
  • 71 |
  • Set up PHPUnit and functional testing
  • 72 |
  • Deploy merge DB script (MSSQL>MySQL)
  • 73 |
74 | 75 |

Rozumsoft LLC, / Telecontact LLC 76 | http://www.telecontact.ru/
77 | Full stack PHP Developer
78 | Belarus, Minsk. February 2012 - September 2014

79 |   Programming modules of dynamically building statistics for quality control assessment 80 | project.

81 | 82 |
    83 |
  • Designed and developed project quality control assessment that estimated effective work of operators in 84 | callcenters from different regions. Includes modules separation mapping based on more 20 users roles(RBAC), 85 | online editors(logs, statistical formulas, projects rules and etc) for more 10k clients. 86 |
  • 87 |
  • Finalization coding script of internal protection algorithm authorization and validation.
  • 88 |
  • Programming API service for quality control assessment. Interact with user interface(AJAX) with fast load up 89 | JSON data, audio files and extract large data in excel. 90 |
  • 91 |
  • API data exchange integrated with data parse, merge, view in table linkage, send emails. Support more 100 92 | source, near 1000 onlineusers, more 10 servers. 93 |
  • 94 |
  • Codes cross-browsers users interfaces in project quality control assessment, using Javascript, jQuery, JSON, 95 | Bootstrap. 96 |
  • 97 |
  • Developed JavaScript audio player with individual custom design, hardware acceleration, deceleration and the 98 | order to play audio files. 99 |
  • 100 |
101 | 102 |

  Designed and developed the company website (http://www.rozumsoft.com/). 103 |

104 | 105 |
    106 |
  • Implemented 3 domain zones(ru/by/com) algorithm.
  • 107 |
  • Codes contents editor for 3 languages
  • 108 |
  • Fixed and support custom seo map logic.
  • 109 |
110 | 111 |

  Developed scripts products to callcenters operators

112 | 113 |
    114 |
  • Development of сomplex reports and statistical summaries by Cisco data telephony.
  • 115 |
  • Redesigned and reimplemented projects using MVC approach and strong OOP design
  • 116 |
  • Designed and conversion of scripts database, extensive SQL query optimization.
  • 117 |
118 | 119 |

Real Estate Agency Assistant heals LLC, Full stack PHP Developer
120 | Belarus, Minsk, http://www.a-h.by; May 2011 - January 2012

121 | 122 |
    123 |
  • Dynamic website design and programming using PHP, MySQL, HTML, CSS. Setup and administration of web servers 124 | and server software. 125 |
  • 126 |
  • Business consulting of securing/ planning project.
  • 127 |
  • Development to online marketing, search engine placement and promotion (http://www.mogu.by; 128 | http://www.a-h.by). 129 |
  • 130 |
131 | 132 |
133 | EDUCATION
137 | 138 |
    139 |
  • Belarusian University of Informatics and Radioelectronics,
  • 140 |
  • BS in Modeling and computer-aided design of radioelectronics devices.
  • 141 |
142 |
143 |
-------------------------------------------------------------------------------- /public/resume.txt: -------------------------------------------------------------------------------- 1 | Alex Dubinchyk 2 | Pleasant Hill, CA 3 | alexs.dbk@gmail.com 4 | Status:Green Card Holder 5 | 6 | OBJECTIVE 7 | 8 | Seeking a challenging position to use my software Web development and process optimization skills. 9 | SUMMARY 10 | 11 | I worked on a wide range of products including building advanced dynamic multi language web sites, internal and external API's, well as creating new internal workflows. My goal is to work in a passionate team, that loves their work and the products they are creating together, supporting, mentoring, optimizing workflows and creating high quality software. I'm energenic, solution oriented team-player, constantly learning and growing as a team, and bringing high spirits along with me. 12 | Technology 13 | 14 | Server side PHP programming, REST, OPP, MVC, Yii framework. 15 | SQL Database programming: MySQL, SQL, MSSQL. 16 | Client-side programming: JavaScript, AJAX, jQuery. 17 | Smarty Template Engine, HTML5, CSS3. 18 | Webserver installation and configuration: Apache, Nginx, IIS. 19 | Source control: SVN/Subversion, GIT. 20 | Platforms: Linux, Mac, Windows. 21 | 22 | EXPERIENCE 23 | 24 | Actuate http://www.actuate.com/ 25 | Full stack PHP Developer 26 | San Manteo, CA. November 2014 - current 27 | 28 | Implemented web service(API, MVC, Php) 29 | Introduced Git to the team 30 | 31 | BloomSky http://www.bloomsky.com 32 | Backend Developer 33 | Sunnyvale, CA. October 2014 - November 2014 34 | 35 | Integrated APIs (PHP, Python, Django, Nginx) 36 | Set up PHPUnit and functional testing 37 | Deploy merge DB script (MSSQL>MySQL) 38 | 39 | Rozumsoft LLC, / Telecontact LLC http://www.telecontact.ru/ 40 | Full stack PHP Developer 41 | Belarus, Minsk. February 2012 - September 2014 42 | 43 | Programming modules of dynamically building statistics for quality control assessment project. 44 | 45 | Designed and developed project quality control assessment that estimated effective work of operators in callcenters from different regions. Includes modules separation mapping based on more 20 users roles(RBAC), online editors(logs, statistical formulas, projects rules and etc) for more 10k clients. 46 | Finalization coding script of internal protection algorithm authorization and validation. 47 | Programming API service for quality control assessment. Interact with user interface(AJAX) with fast load up JSON data, audio files and extract large data in excel. 48 | API data exchange integrated with data parse, merge, view in table linkage, send emails. Support more 100 source, near 1000 onlineusers, more 10 servers. 49 | Codes cross-browsers users interfaces in project quality control assessment, using Javascript, jQuery, JSON, Bootstrap. 50 | Developed JavaScript audio player with individual custom design, hardware acceleration, deceleration and the order to play audio files. 51 | 52 | Designed and developed the company website (http://www.rozumsoft.com/). 53 | 54 | Implemented 3 domain zones(ru/by/com) algorithm. 55 | Codes contents editor for 3 languages 56 | Fixed and support custom seo map logic. 57 | 58 | Developed scripts products to callcenters operators 59 | 60 | Development of сomplex reports and statistical summaries by Cisco data telephony. 61 | Redesigned and reimplemented projects using MVC approach and strong OOP design 62 | Designed and conversion of scripts database, extensive SQL query optimization. 63 | 64 | Real Estate Agency Assistant heals LLC, Full stack PHP Developer 65 | Belarus, Minsk, http://www.a-h.by; May 2011 - January 2012 66 | 67 | Dynamic website design and programming using PHP, MySQL, HTML, CSS. Setup and administration of web servers and server software. 68 | Business consulting of securing/ planning project. 69 | Development to online marketing, search engine placement and promotion (http://www.mogu.by; http://www.a-h.by). 70 | 71 | EDUCATION 72 | 73 | Belarusian University of Informatics and Radioelectronics, 74 | BS in Modeling and computer-aided design of radioelectronics devices. 75 | 76 | 77 | skype: cool-skype-id -------------------------------------------------------------------------------- /src/ParseBoy.js: -------------------------------------------------------------------------------- 1 | var Speakable = require('./Speakable'); 2 | var _ = require('underscore'); 3 | var path = require('path'); 4 | var processing = require('./libs/processing'); 5 | var parser = require('./libs/parser'); 6 | 7 | /** 8 | * 9 | * @constructor 10 | */ 11 | function ParseBoy() { 12 | Speakable.call(this); 13 | this.name = 'Parse Boy'; 14 | this.color = 'green'; 15 | } 16 | 17 | /** 18 | * 19 | * @param {Array} files 20 | * @param cbPreparedFile 21 | */ 22 | ParseBoy.prototype.willHelpWithPleasure = function(files, cbPreparedFile) { 23 | var type; 24 | 25 | this.say('Hi, i can help you with ' + files.map(function(file) {return path.basename(file);}).join(', ')); 26 | _.forEach(files, function(file) { 27 | processing.run(file, function (PreparedFile) { 28 | if (_.isFunction(cbPreparedFile)) { 29 | cbPreparedFile(PreparedFile); 30 | } else { 31 | return console.error('cbPreparedFile should be a function'); 32 | } 33 | }, type); 34 | }); 35 | }; 36 | 37 | /** 38 | * 39 | * @param PreparedFile 40 | * @param cbGetResume 41 | */ 42 | ParseBoy.prototype.workingHardOn = function(PreparedFile, cbGetResume) { 43 | parser.parse(PreparedFile, function(Resume) { 44 | if (_.isFunction(cbGetResume)) { 45 | cbGetResume(Resume); 46 | } else { 47 | console.error('cbGetResume should be a function'); 48 | } 49 | }); 50 | }; 51 | 52 | /** 53 | * 54 | * @param PreparedFile 55 | * @param Resume 56 | * @param path 57 | * @param cbOnSaved 58 | */ 59 | ParseBoy.prototype.storeResume = function(PreparedFile, Resume, path, cbOnSaved) { 60 | PreparedFile.addResume(Resume); 61 | 62 | if (!_.isFunction(cbOnSaved)) { 63 | return console.error('cbOnSaved should be a function'); 64 | } 65 | PreparedFile.saveResume(path, cbOnSaved); 66 | }; 67 | 68 | /** 69 | * 70 | * @type {ParseBoy} 71 | */ 72 | module.exports = ParseBoy; -------------------------------------------------------------------------------- /src/Resume.js: -------------------------------------------------------------------------------- 1 | var _ = require('underscore'); 2 | 3 | module.exports = function() { 4 | return new Resume(); 5 | }; 6 | 7 | function Resume() { 8 | // generic resume format 9 | this.parts = {}; 10 | } 11 | 12 | Resume.prototype.addKey = function(key, value) { 13 | value = value || ''; 14 | value = value.trim(); 15 | // reject falsy values 16 | if (value) { 17 | if (_.has(this.parts, key)) { 18 | value = this.parts[key] + value; 19 | } 20 | 21 | this.parts[key] = value; 22 | } 23 | }; 24 | 25 | Resume.prototype.addObject = function(key, options) { 26 | var self = this; 27 | 28 | if (!_.has(this.parts, key)) { 29 | this.parts[key] = {}; 30 | } 31 | 32 | _.forEach(options, function(optionVal, optionName) { 33 | if (optionVal) { 34 | self.parts[key][optionName] = optionVal; 35 | } 36 | }); 37 | }; 38 | 39 | /** 40 | * 41 | * @returns {String} 42 | */ 43 | Resume.prototype.jsoned = function() { 44 | return JSON.stringify(this.parts); 45 | }; -------------------------------------------------------------------------------- /src/SomeHR.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'); 2 | var _ = require('underscore'); 3 | var ParseBoy = require('./ParseBoy'); 4 | var Speakable = require('./Speakable'); 5 | 6 | /** 7 | * 8 | * @constructor 9 | */ 10 | function SomeHR() { 11 | Speakable.call(this); 12 | this.name = 'HR manager'; 13 | this.color = 'magenta'; 14 | } 15 | 16 | /** 17 | * 18 | * @param path 19 | * @param cbAcceptedFiles 20 | */ 21 | SomeHR.prototype.iHaveCVPack = function(path, cbAcceptedFiles) { 22 | var self = this; 23 | 24 | if (!_.isFunction(cbAcceptedFiles)) { 25 | return console.error('cbAcceptedFiles should be a function'); 26 | } 27 | 28 | if (!fs.existsSync(path)) { 29 | return cbAcceptedFiles.call(this, 'no one wants to work with us :('); 30 | } 31 | fs.readdir(path, function(err, files) { 32 | files = files.map(function(file) { 33 | return path + '/' + file; 34 | }); 35 | cbAcceptedFiles.call(self, err, files); 36 | }); 37 | }; 38 | 39 | /** 40 | * 41 | * @returns {string} 42 | */ 43 | SomeHR.prototype.nothingToDo = function() { 44 | return this.say('I haven\'t work! Should I have a date today?'); 45 | }; 46 | 47 | /** 48 | * 49 | * @returns {ParseBoy} 50 | */ 51 | SomeHR.prototype.needSomeoneToSortCV = function() { 52 | return new ParseBoy(); 53 | }; 54 | 55 | /** 56 | * 57 | * @type {SomeHR} 58 | */ 59 | module.exports = function() { 60 | return new SomeHR(); 61 | }; -------------------------------------------------------------------------------- /src/Speakable.js: -------------------------------------------------------------------------------- 1 | 2 | require('colors'); 3 | 4 | var currentSpeaker; 5 | 6 | /** 7 | * 8 | * @constructor 9 | */ 10 | function Speakable() { 11 | var self = this; 12 | this.name = 'UFO'; 13 | this.color = 'red'; 14 | 15 | this.speaker = function() { 16 | if (!this.name) { 17 | return ''; 18 | } else { 19 | return this.name + ': '; 20 | } 21 | }; 22 | 23 | this.say = function(phrase) { 24 | return console.log(coloredOutput(this.getSpeakerOutput() + '- ' + phrase + "\n")); 25 | }; 26 | 27 | this.explainError = function(err) { 28 | return console.log(coloredOutput(this.getSpeakerOutput() + '- Sorry, but ' + err + "\n")); 29 | }; 30 | 31 | this.getSpeakerOutput = function() { 32 | var showName = (currentSpeaker != this.name); 33 | 34 | currentSpeaker = this.name; 35 | 36 | return showName ? this.speaker() + '\n' : ''; 37 | }; 38 | 39 | function coloredOutput(output) { 40 | return (output)[self.color]; 41 | } 42 | } 43 | 44 | /** 45 | * 46 | * @type {Speakable} 47 | */ 48 | module.exports = Speakable; -------------------------------------------------------------------------------- /src/dictionary.js: -------------------------------------------------------------------------------- 1 | var request = require("request"); 2 | var cheerio = require("cheerio"); 3 | var _ = require('underscore'); 4 | 5 | module.exports = { 6 | titles: { 7 | objective: ['objective', 'objectives'], 8 | summary: ['summary'], 9 | technology: ['technology', 'technologies'], 10 | experience: ['experience'], 11 | education: ['education'], 12 | skills: ['skills', 'Skills & Expertise', 'technology', 'technologies'], 13 | languages: ['languages'], 14 | courses: ['courses'], 15 | projects: ['projects', 'personal projects'], 16 | links: ['links'], 17 | contacts: ['contacts'], 18 | positions: ['positions', 'position'], 19 | profiles: ['profiles', 'social connect', 'social-profiles', 'social profiles'], 20 | awards: ['awards'], 21 | honors: ['honors'], 22 | additional: ['additional'], 23 | certification: ['certification', 'certifications'], 24 | interests: ['interests'] 25 | }, 26 | profiles: [ 27 | ['github.com', function(url, Resume, profilesWatcher) { 28 | download(url, function(data, err) { 29 | if (data) { 30 | var $ = cheerio.load(data), 31 | fullName = $('.vcard-fullname').text(), 32 | location = $('.octicon-location').parent().text(), 33 | mail = $('.octicon-mail').parent().text(), 34 | link = $('.octicon-link').parent().text(), 35 | clock = $('.octicon-clock').parent().text(), 36 | company = $('.octicon-organization').parent().text(); 37 | 38 | Resume.addObject('github', { 39 | name: fullName, 40 | location: location, 41 | email: mail, 42 | link: link, 43 | joined: clock, 44 | company: company 45 | }); 46 | } else { 47 | return console.log(err); 48 | } 49 | //profilesInProgress--; 50 | profilesWatcher.inProgress--; 51 | }); 52 | }], 53 | ['linkedin.com', function(url, Resume, profilesWatcher) { 54 | download(url, function(data, err) { 55 | if (data) { 56 | var $ = cheerio.load(data), 57 | linkedData = { 58 | positions: { 59 | past: [], 60 | current: {} 61 | }, 62 | languages: [], 63 | skills: [], 64 | educations: [], 65 | volunteering: [], 66 | volunteeringOpportunities: [] 67 | }, 68 | $pastPositions = $('.past-position'), 69 | $currentPosition = $('.current-position'), 70 | $languages = $('#languages-view .section-item > h4 > span'), 71 | $skills = $('.skills-section .skill-pill .endorse-item-name-text'), 72 | $educations = $('.education'), 73 | $volunteeringListing = $('ul.volunteering-listing > li'), 74 | $volunteeringOpportunities = $('ul.volunteering-opportunities > li'); 75 | 76 | linkedData.summary = $('#summary-item .summary').text(); 77 | linkedData.name = $('.full-name').text(); 78 | // current position 79 | linkedData.positions.current = { 80 | title: $currentPosition.find('header > h4').text(), 81 | company: $currentPosition.find('header > h5').text(), 82 | description: $currentPosition.find('p.description').text(), 83 | period: $currentPosition.find('.experience-date-locale').text() 84 | }; 85 | // past positions 86 | _.forEach($pastPositions, function(pastPosition) { 87 | var $pastPosition = $(pastPosition); 88 | linkedData.positions.past.push({ 89 | title: $pastPosition.find('header > h4').text(), 90 | company: $pastPosition.find('header > h5').text(), 91 | description: $pastPosition.find('p.description').text(), 92 | period: $pastPosition.find('.experience-date-locale').text() 93 | }); 94 | }); 95 | _.forEach($languages, function(language) { 96 | linkedData.languages.push($(language).text()); 97 | }); 98 | _.forEach($skills, function(skill) { 99 | linkedData.skills.push($(skill).text()); 100 | }); 101 | _.forEach($educations, function(education) { 102 | var $education = $(education); 103 | linkedData.educations.push({ 104 | title: $education.find('header > h4').text(), 105 | major: $education.find('header > h5').text(), 106 | date: $education.find('.education-date').text() 107 | }); 108 | }); 109 | _.forEach($volunteeringListing, function(volunteering) { 110 | linkedData.volunteering.push($(volunteering).text()); 111 | }); 112 | _.forEach($volunteeringOpportunities, function(volunteering) { 113 | linkedData.volunteeringOpportunities.push($(volunteering).text()); 114 | }); 115 | 116 | Resume.addObject('linkedin', linkedData); 117 | } else { 118 | return console.log(err); 119 | } 120 | profilesWatcher.inProgress--; 121 | }); 122 | }], 123 | 'facebook.com', 124 | 'bitbucket.org', 125 | 'stackoverflow.com' 126 | ], 127 | inline: { 128 | //address: 'address', 129 | skype: 'skype' 130 | }, 131 | regular: { 132 | name: [ 133 | /([A-Z][a-z]*)(\s[A-Z][a-z]*)/ 134 | ], 135 | email: [ 136 | /([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})/ 137 | ], 138 | phone: [ 139 | /((?:\+?\d{1,3}[\s-])?\(?\d{2,3}\)?[\s.-]?\d{3}[\s.-]\d{4,5})/ 140 | ] 141 | } 142 | }; 143 | 144 | // helper method 145 | function download(url, callback) { 146 | request(url, function (error, response, body) { 147 | if (!error && response.statusCode == 200) { 148 | callback(body); 149 | } else { 150 | callback(null, error) 151 | } 152 | }); 153 | } -------------------------------------------------------------------------------- /src/libs/parser.js: -------------------------------------------------------------------------------- 1 | var _ = require('underscore'); 2 | var resume = require('../Resume'); 3 | var fs = require('fs'); 4 | var dictionary = require('./../dictionary.js'); 5 | 6 | var profilesWatcher = { 7 | // for change value by reference 8 | inProgress: 0 9 | }; 10 | 11 | module.exports = { 12 | parse: parse 13 | }; 14 | 15 | function makeRegExpFromDictionary() { 16 | var regularRules = { 17 | titles: {}, 18 | profiles: [], 19 | inline: {} 20 | }; 21 | 22 | _.forEach(dictionary.titles, function(titles, key) { 23 | regularRules.titles[key] = []; 24 | _.forEach(titles, function(title) { 25 | regularRules.titles[key].push(title.toUpperCase()); 26 | regularRules.titles[key].push(title[0].toUpperCase() + title.substr(1, title.length)); 27 | }); 28 | }); 29 | 30 | _.forEach(dictionary.profiles, function(profile) { 31 | var profileHandler, 32 | profileExpr; 33 | 34 | if (_.isArray(profile)) { 35 | if (_.isFunction(profile[1])) { 36 | profileHandler = profile[1]; 37 | } 38 | profile = profile[0]; 39 | } 40 | profileExpr = "((?:https?:\/\/)?(?:www\\.)?"+profile.replace('.', "\\.")+"[\/\\w \\.-]*)"; 41 | if (_.isFunction(profileHandler)) { 42 | regularRules.profiles.push([profileExpr, profileHandler]); 43 | } else { 44 | regularRules.profiles.push(profileExpr); 45 | } 46 | }); 47 | 48 | _.forEach(dictionary.inline, function(expr, name) { 49 | regularRules.inline[name] = expr+":?[\\s]*(.*)"; 50 | }); 51 | 52 | return _.extend(dictionary, regularRules); 53 | } 54 | 55 | // dictionary is object, so it will be extended by reference 56 | makeRegExpFromDictionary(); 57 | 58 | function parse(PreparedFile, cbReturnResume) { 59 | var rawFileData = PreparedFile.raw, 60 | Resume = new resume(), 61 | rows = rawFileData.split("\n"), 62 | row; 63 | 64 | // save prepared file text (for debug) 65 | //fs.writeFileSync('./parsed/'+PreparedFile.name + '.txt', rawFileData); 66 | 67 | // 1 parse regulars 68 | parseDictionaryRegular(rawFileData, Resume); 69 | 70 | for (var i = 0; i < rows.length; i++) { 71 | row = rows[i]; 72 | 73 | // 2 parse profiles 74 | row = rows[i] = parseDictionaryProfiles(row, Resume); 75 | // 3 parse titles 76 | parseDictionaryTitles(Resume, rows, i); 77 | parseDictionaryInline(Resume, row); 78 | } 79 | 80 | if (_.isFunction(cbReturnResume)) { 81 | // wait until download and handle internet profile 82 | var checkTimer = setInterval(function() { 83 | if (profilesWatcher.inProgress === 0) { 84 | cbReturnResume(Resume); 85 | clearInterval(checkTimer); 86 | } 87 | }, 200); 88 | } else { 89 | return console.error('cbReturnResume should be a function'); 90 | } 91 | } 92 | 93 | /** 94 | * Make text from @rowNum index of @allRows to the end of @allRows 95 | * @param rowNum 96 | * @param allRows 97 | * @returns {string} 98 | */ 99 | function restoreTextByRows(rowNum, allRows) { 100 | rowNum = rowNum - 1; 101 | var rows = []; 102 | 103 | do { 104 | rows.push(allRows[rowNum]); 105 | rowNum++; 106 | } while(rowNum < allRows.length); 107 | 108 | return rows.join("\n"); 109 | } 110 | 111 | /** 112 | * Count words in string 113 | * @param str 114 | * @returns {Number} 115 | */ 116 | function countWords(str) { 117 | return str.split(' ').length; 118 | } 119 | 120 | /** 121 | * 122 | * @param Resume 123 | * @param row 124 | */ 125 | function parseDictionaryInline(Resume, row) { 126 | var find; 127 | 128 | _.forEach(dictionary.inline, function(expression, key) { 129 | find = new RegExp(expression).exec(row); 130 | if (find) { 131 | Resume.addKey(key.toLowerCase(), find[1]); 132 | } 133 | }); 134 | } 135 | 136 | /** 137 | * 138 | * @param data 139 | * @param Resume 140 | */ 141 | function parseDictionaryRegular(data, Resume) { 142 | var regularDictionary = dictionary.regular, 143 | find; 144 | 145 | _.forEach(regularDictionary, function(expressions, key) { 146 | _.forEach(expressions, function(expression) { 147 | find = new RegExp(expression).exec(data); 148 | if (find) { 149 | Resume.addKey(key.toLowerCase(), find[0]); 150 | } 151 | }); 152 | }); 153 | } 154 | 155 | /** 156 | * 157 | * @param Resume 158 | * @param rows 159 | * @param rowIdx 160 | */ 161 | function parseDictionaryTitles(Resume, rows, rowIdx) { 162 | var allTitles = _.flatten(_.toArray(dictionary.titles)).join('|'), 163 | searchExpression = '', 164 | row = rows[rowIdx], 165 | ruleExpression, 166 | isRuleFound, 167 | result; 168 | 169 | _.forEach(dictionary.titles, function(expressions, key) { 170 | expressions = expressions || []; 171 | // means, that titled row is less than 5 words 172 | if (countWords(row) <= 5) { 173 | _.forEach(expressions, function(expression) { 174 | ruleExpression = new RegExp(expression); 175 | isRuleFound = ruleExpression.test(row); 176 | 177 | if (isRuleFound) { 178 | allTitles = _.without(allTitles.split('|'), key).join('|'); 179 | searchExpression = '(?:' + expression + ')((.*\n)+?)(?:'+allTitles+'|{end})'; 180 | // restore remaining text to search in relevant part of text 181 | result = new RegExp(searchExpression, 'gm').exec(restoreTextByRows(rowIdx, rows)); 182 | 183 | if (result) { 184 | Resume.addKey(key, result[1]); 185 | } 186 | } 187 | }); 188 | } 189 | }); 190 | } 191 | 192 | /** 193 | * 194 | * @param row 195 | * @param Resume 196 | * @returns {*} 197 | */ 198 | function parseDictionaryProfiles(row, Resume) { 199 | var regularDictionary = dictionary.profiles, 200 | find, 201 | modifiedRow = row; 202 | 203 | _.forEach(regularDictionary, function(expression) { 204 | var expressionHandler; 205 | 206 | if (_.isArray(expression)) { 207 | if (_.isFunction(expression[1])) { 208 | expressionHandler = expression[1]; 209 | } 210 | expression = expression[0]; 211 | } 212 | find = new RegExp(expression).exec(row); 213 | if (find) { 214 | Resume.addKey('profiles', find[0] + "\n"); 215 | modifiedRow = row.replace(find[0], ''); 216 | if (_.isFunction(expressionHandler)) { 217 | profilesWatcher.inProgress++; 218 | expressionHandler(find[0], Resume, profilesWatcher); 219 | } 220 | } 221 | }); 222 | 223 | return modifiedRow; 224 | } -------------------------------------------------------------------------------- /src/libs/processing.js: -------------------------------------------------------------------------------- 1 | var path = require('path'); 2 | var _ = require('underscore'); 3 | var textract = require('textract'); 4 | var mime = require('mime'); 5 | var fs = require('fs'); 6 | 7 | module.exports.run = processFile; 8 | 9 | /** 10 | * 11 | * @param file 12 | * @param cbAfterProcessing 13 | */ 14 | function processFile(file, cbAfterProcessing) { 15 | extractText(file, function(PreparedFile) { 16 | if (_.isFunction(cbAfterProcessing)) { 17 | cbAfterProcessing(PreparedFile); 18 | } else { 19 | return console.error('cbAfterProcessing should be a function'); 20 | } 21 | }); 22 | } 23 | 24 | /** 25 | * 26 | * @param data 27 | * @returns {string} 28 | */ 29 | function cleanTextByRows(data) { 30 | var rows, 31 | clearRow, 32 | clearRows = []; 33 | 34 | rows = data.split("\n"); 35 | for (var i = 0; i < rows.length; i++) { 36 | clearRow = cleanStr(rows[i]); 37 | if (clearRow) { 38 | clearRows.push(clearRow); 39 | } 40 | } 41 | 42 | return clearRows.join("\n") + "\n{end}"; 43 | } 44 | 45 | /** 46 | * 47 | * @param file 48 | * @param cbAfterExtract 49 | */ 50 | function extractText(file, cbAfterExtract) { 51 | textract.fromFileWithPath(path.resolve(file), {preserveLineBreaks: true}, function(err, data) { 52 | if (err) { 53 | return console.log(err); 54 | } 55 | if (_.isFunction(cbAfterExtract)) { 56 | data = cleanTextByRows(data); 57 | var File = new PreparedFile(file, data.replace(/^\s/gm, '')); 58 | cbAfterExtract(File); 59 | } else { 60 | return console.error('cbAfterExtract should be a function'); 61 | } 62 | }); 63 | } 64 | 65 | /** 66 | * 67 | * @param str 68 | * @returns {string} 69 | */ 70 | function cleanStr(str) { 71 | return str.replace(/\r?\n|\r|\t|\n/g, '').trim(); 72 | } 73 | 74 | function PreparedFile(file, raw) { 75 | this.path = file; 76 | this.mime = mime.lookup(file); 77 | this.ext = mime.extension(this.mime); 78 | this.raw = raw; 79 | this.name = path.basename(file); 80 | } 81 | 82 | /** 83 | * 84 | * @param Resume 85 | */ 86 | PreparedFile.prototype.addResume = function(Resume) { 87 | this.resume = Resume; 88 | }; 89 | 90 | PreparedFile.prototype.saveResume = function(path, cbSavedResume) { 91 | path = path || __dirname; 92 | 93 | if (!_.isFunction(cbSavedResume)) { 94 | return console.error('cbSavedResume should be a function'); 95 | } 96 | 97 | if (fs.statSync(path).isDirectory() && this.resume) { 98 | fs.writeFile(path + '/' + this.name + '.json', this.resume.jsoned(), cbSavedResume); 99 | } 100 | }; --------------------------------------------------------------------------------