├── .npmignore
├── index.js
├── .jshintrc
├── .github
    └── workflows
    │   ├── nodejs.yml
    │   └── npmpublish.yml
├── .gitignore
├── Gruntfile.js
├── index.d.ts
├── LICENSE
├── package.json
├── lib
    ├── util.js
    └── parser.js
├── README.md
└── test
    ├── util_test.js
    └── parser_test.js


/.npmignore:
--------------------------------------------------------------------------------
1 | test/**
2 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 | 
3 | module.exports = require('./lib/parser');


--------------------------------------------------------------------------------
/.jshintrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "curly": true,
 3 |   "immed": true,
 4 |   "latedef": "nofunc",
 5 |   "newcap": true,
 6 |   "noarg": true,
 7 |   "sub": true,
 8 |   "boss": true,
 9 |   "eqnull": true,
10 |   "node": true,
11 |   "eqeqeq": true,
12 |   "undef": true,
13 |   "unused": true,
14 |   "indent": 2,
15 |   "maxlen": 100,
16 |   "strict": true,
17 |   "trailing": true,
18 |   "expr": true,
19 |   "predef": [
20 |     "it",
21 |     "describe",
22 |     "beforeEach",
23 |     "afterEach",
24 |     "before",
25 |     "after"
26 |   ]
27 | }
28 | 


--------------------------------------------------------------------------------
/.github/workflows/nodejs.yml:
--------------------------------------------------------------------------------
 1 | name: Node CI
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     strategy:
11 |       matrix:
12 |         node-version: [8.x, 10.x, 12.x]
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v1
16 |     - name: Use Node.js ${{ matrix.node-version }}
17 |       uses: actions/setup-node@v1
18 |       with:
19 |         node-version: ${{ matrix.node-version }}
20 |     - name: npm install, build, and test
21 |       run: |
22 |         npm ci
23 |         npm run build --if-present
24 |         npm test
25 |       env:
26 |         CI: true
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Runtime data
 6 | pids
 7 | *.pid
 8 | *.seed
 9 | 
10 | # Directory for instrumented libs generated by jscoverage/JSCover
11 | lib-cov
12 | 
13 | # Coverage directory used by tools like istanbul
14 | coverage
15 | 
16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
17 | .grunt
18 | 
19 | # node-waf configuration
20 | .lock-wscript
21 | 
22 | # Compiled binary addons (http://nodejs.org/api/addons.html)
23 | build/Release
24 | 
25 | # Dependency directory
26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
27 | node_modules
28 | 
29 | # WebStorm
30 | .idea


--------------------------------------------------------------------------------
/Gruntfile.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | /*global module:false*/
 4 | module.exports = function(grunt) {
 5 | 
 6 |   grunt.initConfig({
 7 |     jshint: {
 8 |       options: {
 9 |         jshintrc: '.jshintrc',
10 |         reporter: require('jshint-stylish')
11 |       },
12 |       gruntfile: {
13 |         src: 'Gruntfile.js'
14 |       },
15 |       lib_test: {
16 |         src: ['lib/**/*.js', 'test/**/*.js']
17 |       }
18 |     },
19 |     mochaTest: {
20 |       src: ['test/**/*_test.js']
21 |     }
22 |   });
23 | 
24 |   grunt.loadNpmTasks('grunt-mocha-test');
25 |   grunt.loadNpmTasks('grunt-contrib-jshint');
26 | 
27 |   // Default task.
28 |   grunt.registerTask('default', ['jshint', 'mochaTest']);
29 |   grunt.registerTask('test', ['mochaTest']);
30 | 
31 | };
32 | 


--------------------------------------------------------------------------------
/index.d.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A parsed personal name
 3 |  */
 4 | export type ParsedName = {
 5 |     /**
 6 |      * Name prefix or title
 7 |      */
 8 |     prefix: string | null;
 9 |     /**
10 |      * First name or given name
11 |      */
12 |     first: string | null;
13 |     /**
14 |      * Middle name or initial
15 |      */
16 |     middle: string | null;
17 |     /**
18 |      * Last name or family name or surname
19 |      */
20 |     last: string | null;
21 |     /**
22 |      * Suffix
23 |      */
24 |     suffix: string | null;
25 |     /**
26 |      * Original input
27 |      */
28 |     original: string;
29 | }
30 | /**
31 |  * Attempts to parse the given personal name into components parts
32 |  * @param name a full personal name to parse
33 |  */
34 | export default function parse(name: string): ParsedName;


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Matt Klaber
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/.github/workflows/npmpublish.yml:
--------------------------------------------------------------------------------
 1 | name: Node.js Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v1
12 |       - uses: actions/setup-node@v1
13 |         with:
14 |           node-version: 12
15 |       - run: npm ci
16 |       - run: npm test
17 | 
18 |   publish-npm:
19 |     needs: build
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - uses: actions/checkout@v1
23 |       - uses: actions/setup-node@v1
24 |         with:
25 |           node-version: 12
26 |           registry-url: https://registry.npmjs.org/
27 |       - run: npm ci
28 |       - run: npm publish
29 |         env:
30 |           NODE_AUTH_TOKEN: ${{secrets.npm_token}}
31 | 
32 | #   publish-gpr:
33 | #     needs: build
34 | #     runs-on: ubuntu-latest
35 | #     steps:
36 | #       - uses: actions/checkout@v1
37 | #       - uses: actions/setup-node@v1
38 | #         with:
39 | #           node-version: 12
40 | #           registry-url: https://npm.pkg.github.com/
41 | #           scope: '@mklaber'
42 | #       - run: npm ci
43 | #       - run: npm publish
44 | #         env:
45 | #           NODE_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}}
46 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "another-name-parser",
 3 |   "version": "0.1.0",
 4 |   "description": "Yet another name parser (prefix, first, middle, last, and suffix)",
 5 |   "main": "index.js",
 6 |   "homepage": "https://github.com/mklaber/node-another-name-parser",
 7 |   "scripts": {
 8 |     "test": "mocha"
 9 |   },
10 |   "keywords": [
11 |     "personal",
12 |     "name",
13 |     "parser",
14 |     "parse",
15 |     "standardizer"
16 |   ],
17 |   "author": "Matt Klaber <klaber@gmail.com> (http://github.com/mklaber/)",
18 |   "license": "MIT",
19 |   "repository": {
20 |     "type": "git",
21 |     "url": "git://github.com/mklaber/node-another-name-parser.git"
22 |   },
23 |   "contributors": [
24 |     {
25 |       "name": "Matt Klaber",
26 |       "email": "klaber@gmail.com",
27 |       "web": "http://github.com/mklaber/"
28 |     }
29 |   ],
30 |   "bugs": {
31 |     "url": "http://github.com/mklaber/node-another-name-parser/issues/"
32 |   },
33 |   "engines": {
34 |     "node": ">=0.10.0"
35 |   },
36 |   "dependencies": {
37 |     "lodash": "^4.17.11"
38 |   },
39 |   "devDependencies": {
40 |     "chai": "^3.2.0",
41 |     "grunt": "^1.0.4",
42 |     "grunt-contrib-jshint": "^1.1.0",
43 |     "grunt-mocha-test": "^0.12.7",
44 |     "jshint-stylish": "^2.0.1",
45 |     "mocha": "^5.2.0"
46 |   },
47 |   "types": "./index.d.ts"
48 | }
49 | 


--------------------------------------------------------------------------------
/lib/util.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | var _ = require('lodash');
 4 | 
 5 | module.exports = {
 6 |   isBlank: function(s) {
 7 |     return s === null || s === undefined ? true : /^[\s\xa0]*$/.test(s);
 8 |   },
 9 | 	collapseWhitespace: function(s) {
10 | 		return s.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
11 | 	},
12 |   indexOf: function(haystack, needle) {
13 |     if (_.isString(haystack)) {
14 |       return haystack.toUpperCase().indexOf(needle.toUpperCase());
15 |     } else {
16 |       // assume haystack is an array-ish thing of strings
17 |       return _.findIndex(haystack, function(val) {
18 |         return val.toUpperCase() === needle.toUpperCase();
19 |       });
20 |     }
21 |   },
22 | 	count: function(haystack, needle) {
23 |     var cnt = 0,
24 |       n = needle.toUpperCase();
25 | 
26 |     if (_.isString(haystack)) {
27 |       var hs = haystack.toUpperCase(),
28 |         idx = hs.indexOf(n);
29 | 
30 |       while (idx >= 0) {
31 |         cnt++;
32 |         idx = hs.indexOf(n, idx + 1);
33 |       }
34 |     } else {
35 |       cnt = _.filter(haystack, function(val) {
36 |         return val.toUpperCase() === n;
37 |       }).length;
38 |     }
39 | 
40 |     return cnt;
41 | 	},
42 |   contains: function(haystack, needle) {
43 |     return this.indexOf(haystack, needle) > -1;
44 |   },
45 |   compact: function(haystack) {
46 |     return _.compact(haystack);
47 |   }
48 | };
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # another-name-parser
 2 | 
 3 | Here's yet another name parsing node.js library. It'll take a [personal name](https://en.wikipedia.org/wiki/Personal_name) 
 4 | and returns a `prefix` (title), `first` (or *given name*), `middle` (or initial), `last` (or *family name*), 
 5 | and `suffix`. It's designed for US English but could be useful elsewhere. It's inspired by a SQL script a 
 6 | colleague pulled together long ago combined with inspiration from a few other libraries.
 7 | 
 8 | 
 9 | [![NPM](https://nodei.co/npm/another-name-parser.png)](https://nodei.co/npm/another-name-parser/)
10 | 
11 | 
12 | ## Installation
13 | 
14 | Via [npm](https://www.npmjs.com/package/another-name-parser)
15 | 
16 | ```bash
17 | $ npm install another-name-parser
18 | ```
19 | 
20 | ## Usage
21 | 
22 | ```javascript
23 | 
24 | const parser = require('another-name-parser');
25 | 
26 | const name = parser('Commissioner James "Jim" W. Gordon, Sr.');
27 | // → { prefix: 'Commissioner',
28 | //     first: 'James',
29 | //     middle: 'W.',
30 | //     last: 'Gordon',
31 | //     suffix: 'Sr.',
32 | //     original: 'Commissioner James "Jim" W. Gordon, Sr.' }
33 | 
34 | ```
35 | 
36 | ## Features
37 | 
38 | * Handles many common prefixes/titles
39 | * Retains `.` that appear in the original name (*Dr.* &rarr; *Dr.*, *PhD* &rarr; *PhD*)
40 | * Recognizes common compound first names (*Mary Jo*, *Juan Carlos*, etc.)
41 | * Recognizes common compound last name prefixes (*St.*, *Mac*, *Bin*, etc.)
42 | * Recognizes *Last Name, First Name* order
43 | * Ignores quoted or parenthesized nicknames (*Catherine "Cathy" Smith* &rarr; *Catherine Smith*, *James (Jim) Von Trapp* &rarr; *James Von Trapp*)
44 | 
45 | 
46 | ## Tests
47 | 
48 | ```bash
49 | npm test
50 | ```
51 | 
52 | 
53 | ## TODO
54 | 
55 | * Handle multiple names, e.g, *Jim & Mark Anderson*
56 | * Handle likely company names (maybe)
57 | 
58 | ## Acknowledgements
59 | 
60 | * My former colleague and whomever he got his original script from
61 | * Some of the test cases and compound lastname prefixes came from the [humanname](https://www.npmjs.com/package/humanname) module
62 | 
63 | 
64 | ## License
65 | 
66 | [MIT License](http://www.opensource.org/licenses/mit-license.php)
67 | 
68 | ## Author
69 | 
70 | [Matt Klaber](https://github.com/mklaber)
71 | 


--------------------------------------------------------------------------------
/test/util_test.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | var sut = require('../lib/util'),
  4 |   expect = require('chai').expect;
  5 | 
  6 | describe('util', function () {
  7 | 
  8 |   describe('isBlank', function () {
  9 | 
 10 |     var blankTests = [null, undefined, '', '   ', ' \n '];
 11 | 
 12 |     blankTests.forEach(function(test) {
 13 |       it('should return true for: ' + test, function () {
 14 |         expect(sut.isBlank(test)).to.be.true;
 15 |       });
 16 |     });
 17 | 
 18 |     it('should return false for words', function () {
 19 |       expect(sut.isBlank('foo')).to.be.false;
 20 |     });
 21 | 
 22 |   });
 23 | 
 24 |   describe('collapseWhitespace', function () {
 25 | 
 26 |     it('should trim collapse some whitespace', function () {
 27 |       var word = '   blah \n asdf';
 28 |       expect(sut.collapseWhitespace(word)).to.eql('blah asdf');
 29 |     });
 30 | 
 31 |   });
 32 | 
 33 | 
 34 |   var tests = [
 35 |     {
 36 |       haystack: 'It was the best of times. It was the worst of times.',
 37 |       haystackType: 'string',
 38 |       index: {
 39 |         needle: 'Best',
 40 |         expected: 'It was the best of times'.indexOf('best')
 41 |       },
 42 |       count: {
 43 |         needle: 'i',
 44 |         expected: 4
 45 |       }
 46 |     },
 47 |     {
 48 |       haystack: ['ABC', 'ghi', 'GhI', 'jkm'],
 49 |       haystackType: 'array',
 50 |       index: {
 51 |         needle: 'GHI',
 52 |         expected: 1
 53 |       },
 54 |       count: {
 55 |         needle: 'GHI',
 56 |         expected: 2
 57 |       }
 58 |     },
 59 |     {
 60 |       haystack: 'It was the best of times. It was the worst of times.',
 61 |       haystackType: 'string',
 62 |       index: {
 63 |         needle: 'Awesome',
 64 |         expected: -1
 65 |       },
 66 |       count: {
 67 |         needle: 'needle',
 68 |         expected: 0
 69 |       }
 70 |     },
 71 |     {
 72 |       haystack: ['ABC', 'ghi', 'GhI', 'jkm'],
 73 |       haystackType: 'array',
 74 |       index: {
 75 |         needle: 'XYZ',
 76 |         expected: -1
 77 |       },
 78 |       count: {
 79 |         needle: 'needle',
 80 |         expected: 0
 81 |       }
 82 |     }
 83 |   ];
 84 | 
 85 |   describe('indexOf', function () {
 86 | 
 87 |     tests.forEach(function(test) {
 88 |       it('should return ' + test.index.expected + ' for ' + test.index.needle + ' in ' +
 89 |         test.haystackType, function () {
 90 |         expect(sut.indexOf(test.haystack, test.index.needle)).to.eql(test.index.expected);
 91 |       });
 92 |     });
 93 | 
 94 |   });
 95 | 
 96 |   describe('count', function () {
 97 | 
 98 |     tests.forEach(function(test) {
 99 |       it('should return ' + test.count.expected + ' for ' + test.count.needle + ' in ' +
100 |         test.haystackType, function () {
101 |         expect(sut.count(test.haystack, test.count.needle)).to.eql(test.count.expected);
102 |       });
103 |     });
104 | 
105 |   });
106 | 
107 |   describe('contains', function () {
108 | 
109 |     tests.forEach(function(test) {
110 |       it('should return ' + (test.count.expected !== 0) + ' for ' + test.count.needle + ' in ' +
111 |         test.haystackType, function () {
112 |         expect(sut.contains(test.haystack, test.count.needle)).to.eql(test.count.expected !== 0);
113 |       });
114 |     });
115 | 
116 |   });
117 | 
118 |   describe('compact', function () {
119 | 
120 |     it('should compact an array', function () {
121 |       var bloatedArr = [123, 0, null, undefined, false, true, -1];
122 |       var compactArr = [123, true, -1];
123 |       expect(sut.compact(bloatedArr)).to.eql(compactArr);
124 | 
125 |     });
126 | 
127 |   });
128 | 
129 | });
130 | 


--------------------------------------------------------------------------------
/lib/parser.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | var util = require('./util');
  4 | 
  5 | var suffixes = [
  6 |   'CCSP', 'CPA', 'DC', 'DDS', 'DMD', 'DO', 'DPM', 'DVM', 'ESQ', 'ESTATE',
  7 |   'FAM', 'FAMILY', 'II', 'III', 'IV', 'JR', 'LUTCF', 'MD', 'OC', 'OD', 'PA',
  8 |   'PE', 'PHD', 'SJ', 'SR', 'V', 'VI', 'VP', 'V', 'I', 'VI', 'VII', 'VIII',
  9 |   'CNP', 'CPA', 'DDS', 'DMin', 'DMA ', 'DMus', 'OD', 'DO', 'PharmD', 'PhD',
 10 |   'PsyD', 'DVM', 'EI', 'EIT', 'Esq', 'JD', 'LLS', 'LP', 'LPN',
 11 |   'MD', 'PE', 'RA', 'RLA', 'RLS', 'RN', 'SE', 'ESQUIRE', 'MSW', 'LCSW', 'ACSW',
 12 |   'ASW', 'PPSC', 'BSc', 'MBA', 'BSW', 'BSN', 'CHPN', 'MSN', 'CCRN', 'MSc'
 13 | ];
 14 | 
 15 | var prefixes = [
 16 |   'AB', 'AIRMAN', 'AN', 'AND', 'BG', 'BR', 'BRIG', 'BRIGADIER', 'CADET',
 17 |   'CAPT', 'CAPTAIN', 'CMDR', 'COL', 'COLONEL', 'COMMISSIONER', 'COMMANDER',
 18 |   'CORPORAL', 'CPL', 'CPT', 'DEP', 'DEPUTY', 'DOCTOR', 'DR', 'FATHER', 'FR',
 19 |   'GEN', 'GENERAL', 'HON', 'HONORABLE', 'JDGE', 'JUDGE', 'LIEUTENANT', 'LT',
 20 |   'LTCOL', 'LTGEN', 'MAJ', 'MAJGEN', 'MAJOR', 'MASTER', 'MISS', 'MISTER',
 21 |   'MR', 'MRMRS', 'MRS', 'MS', 'PASTOR', 'PFC', 'PRES', 'PRIVATE', 'PROF',
 22 |   'PROFESSOR', 'PVT', 'RABBI', 'REP', 'REPRESENTATIVE', 'REV', 'REVEREND',
 23 |   'SEN', 'SENATOR', 'SGT', 'SSGT', 'SHERIFF', 'SIR', 'SISTER', 'SM', 'SN',
 24 |   'SRA', 'SSGT'
 25 | ];
 26 | 
 27 | // based off of frequency > 1000 in US base file
 28 | var compoundFirstNames = [
 29 |   'ANA MARIA', 'ANN MARIE', 'ANNA MARIA', 'ANNA MARIE', 'ANNE MARIE',
 30 |   'BARBARA ANN', 'BETH ANN', 'BETTY ANN', 'BETTY JEAN', 'BETTY JO',
 31 |   'BILLIE JO', 'CAROL ANN', 'JO ANN', 'JO ANNA', 'JO ANNE', 'JO ELLEN',
 32 |   'JOHN PAUL', 'JOSE LUIS', 'JUAN CARLOS', 'JULIE ANN', 'LA DONNA', 'LA TOYA',
 33 |   'LA VERNE', 'LE ROY', 'LEE ANN', 'LEIGH ANN', 'LISA MARIE', 'LORI ANN',
 34 |   'LOU ANN', 'LU ANN', 'MARIA DE', 'MARIA DEL', 'MARIA ELENA', 'MARIA TERESA',
 35 |   'MARY ALICE', 'MARY ANN', 'MARY ANNE', 'MARY BETH', 'MARY ELIZABETH',
 36 |   'MARY ELLEN', 'MARY FRANCES', 'MARY GRACE', 'MARY JANE', 'MARY JEAN',
 37 |   'MARY JO', 'MARY KAY', 'MARY LEE', 'MARY LOU', 'MARY LOUISE', 'MARY LYNN',
 38 |   'PATRICIA ANN', 'ROSE ANN', 'ROSE MARIE', 'ROSE MARY', 'RUTH ANN',
 39 |   'SAN JUANA', 'SAN JUANITA', 'SUE ANN', 'WILLIE MAE'
 40 | ];
 41 | 
 42 | var compoundLastNamePrefixes = [
 43 |   'AL', 'BIN', 'DA', 'DE', 'DEL', 'DELLA', 'DI', 'DU', 'EL', 'IBN', 'LA',
 44 |   'LE', 'LO', 'MAC', 'MC', 'PIETRO', 'ST', 'TER', 'VAN', 'VANDEN',
 45 |   'VERE', 'VON'
 46 | ];
 47 | 
 48 | var isSuffix = function(s) {
 49 |   return util.contains(suffixes, s.replace(/\./g, ''));
 50 | };
 51 | 
 52 | var isPrefix = function(s) {
 53 |   return util.contains(prefixes, s.replace(/\./g, ''));
 54 | };
 55 | 
 56 | var isCompoundFirstName = function(s1, s2) {
 57 |   return util.contains(compoundFirstNames, s1 + ' ' + s2);
 58 | };
 59 | 
 60 | var isCompoundLastNamePrefix = function(s) {
 61 |   return util.contains(compoundLastNamePrefixes, s.replace(/\./g, ''));
 62 | };
 63 | 
 64 | var parser = function(name) {
 65 |   var originalName = name;
 66 |   var parsedName = {
 67 |     prefix: null,
 68 |     first: null,
 69 |     middle: null,
 70 |     last: null,
 71 |     suffix: null,
 72 |     original: originalName
 73 |   };
 74 |   if (util.isBlank(name)) {
 75 |     return parsedName;
 76 |   }
 77 | 
 78 | 
 79 |   // Ugh, probably shouldn't be stripping double quotes in the cleaner
 80 | 
 81 |   // strip out James (Jim) Gordon
 82 |   name = name.replace(/\s*\(.+\)\s*/g, ' ');
 83 |   // strip out James "Jim" Gordon
 84 |   name = name.replace(/\s*".+"\s*/g, ' ');
 85 | 
 86 |   // collapse whitespace
 87 |   name = util.collapseWhitespace(name);
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 |   // TODO: may not want to limit this to "1" (though it makes splitting weird)
 94 |   if (util.count(name, ',') === 1) {
 95 |     var commaTokens = name.split(',');
 96 |     var tokenAfterComma = commaTokens[1].trim();
 97 |     // check if the name ends with a suffix (ignore the '.' in Ph.D., Jr. etc.)
 98 |     if (isSuffix(tokenAfterComma)) {
 99 |       // assume it's Jim Gordon, Esq.
100 |       parsedName.suffix = tokenAfterComma;
101 |       name = commaTokens[0].trim();
102 |     } else {
103 |       // assume it's Gordon, Jim
104 |       // reverse it
105 |       name = commaTokens[1].trim() + ' ' + commaTokens[0].trim();
106 |     }
107 |   }
108 | 
109 |   var tokens = name.split(/\s+/),
110 |     totalTokens = tokens.length,
111 |     loop = 0,
112 |     token;
113 | 
114 |   while(loop < totalTokens) {
115 | 
116 |     token = tokens[loop];
117 | 
118 |     if (isPrefix(token)) {
119 |       parsedName.prefix = util.isBlank(parsedName.prefix) ?
120 |         token : parsedName.prefix + ' ' + token;
121 |       // null it out because we've taken care of it
122 |       tokens[loop] = null;
123 |     } else if (isSuffix(token)) {
124 |       parsedName.suffix = util.isBlank(parsedName.suffix) ?
125 |         token : parsedName.suffix + ' ' + token;
126 |       // null it out because we've taken care of it
127 |       tokens[loop] = null;
128 |     }
129 | 
130 |     loop++;
131 | 
132 |   }
133 | 
134 |   tokens = util.compact(tokens), // clear out the nulls induced above
135 |     totalTokens = tokens.length,
136 |     loop = 0;
137 | 
138 |   var hasCompoundFirst = false,
139 |     hasCompoundLast = false;
140 | 
141 |   // if we only have "Mary Ann", this will split Mary --> First, Ann --> Last
142 |   hasCompoundFirst = totalTokens > 2 && isCompoundFirstName(tokens[0], tokens[1]);
143 |   hasCompoundLast = totalTokens > 2 && isCompoundLastNamePrefix(tokens[totalTokens - 2]);
144 | 
145 | 
146 |   while(loop < totalTokens) {
147 | 
148 |     token = tokens[loop];
149 | 
150 |     if (loop === 0 || (loop === 1 && hasCompoundFirst)) {
151 |       parsedName.first = loop === 0 ? token : parsedName.first + ' ' + token;
152 |     } else if (hasCompoundLast && (loop === (totalTokens - 1) || loop === (totalTokens - 2))) {
153 |       // has a compound last and we're on the last 2 tokens
154 |       parsedName.last = util.isBlank(parsedName.last) ?
155 |         token : parsedName.last + ' ' + token;
156 |     } else if (loop === 1 || (loop === 2 && hasCompoundFirst)) {
157 |       // we're on the 2nd word; or the 3rd word but the 1st 2 are compound
158 |       if (totalTokens >= 3 && !hasCompoundFirst && !isCompoundLastNamePrefix(token)) {
159 |         // we're on the second word of a three or more word name
160 |         // and the first two weren't a compound
161 |         parsedName.middle = token;
162 |       } else if(totalTokens >= 4 && hasCompoundFirst) {
163 |         // we're on the third word of a four or more word name
164 |         // and the first two were compound
165 |         parsedName.middle = token;
166 |       } else {
167 |         // we're on the second word of a two word name
168 |         parsedName.last = token;
169 |       }
170 |     } else { //if ( loop > 1) {
171 |       // we're on the 3rd word of a 3 or more name
172 |       parsedName.last = util.isBlank(parsedName.last) ?
173 |         token : parsedName.last + ' ' + token;
174 |     }
175 | 
176 |     loop++;
177 |   }
178 | 
179 |   return parsedName;
180 | };
181 | 
182 | module.exports = parser;
183 | 


--------------------------------------------------------------------------------
/test/parser_test.js:
--------------------------------------------------------------------------------
  1 | "use strict";
  2 | 
  3 | var sut = require('../lib/parser'),
  4 |   expect = require('chai').expect;
  5 | 
  6 | describe('parser', function () {
  7 | 
  8 |   var tests = [
  9 |     {
 10 |       "test": "last, first",
 11 |       "result": {
 12 |         "prefix": null,
 13 |         "first": "John",
 14 |         "middle": null,
 15 |         "last": "Doe",
 16 |         "suffix": null,
 17 |         "original": "Doe, John"
 18 |       }
 19 |     }, {
 20 |       "test": "last, first middle with excessive whitespace",
 21 |       "result": {
 22 |         "prefix": null,
 23 |         "first": "John",
 24 |         "middle": "P",
 25 |         "last": "Doe",
 26 |         "suffix": null,
 27 |         "original": "Doe, \nJohn    P\t\t  \r"
 28 |       }
 29 |     }, {
 30 |       "test": "last, first middle",
 31 |       "result": {
 32 |         "prefix": null,
 33 |         "first": "John",
 34 |         "middle": "P",
 35 |         "last": "Doe",
 36 |         "suffix": null,
 37 |         "original": "Doe, John P"
 38 |       }
 39 |     }, {
 40 |       "test": "last, title first initial",
 41 |       "result": {
 42 |         "prefix": "Dr.",
 43 |         "first": "John",
 44 |         "middle": "P",
 45 |         "last": "Doe",
 46 |         "suffix": null,
 47 |         "original": "Doe, Dr. John P"
 48 |       }
 49 |     }, {
 50 |       "test": "hyphenated last name",
 51 |       "result": {
 52 |         "prefix": null,
 53 |         "first": "John",
 54 |         "middle": "R",
 55 |         "last": "Doe-Smith",
 56 |         "suffix": null,
 57 |         "original": "John R Doe-Smith"
 58 |       }
 59 |     }, {
 60 |       "test": "first last",
 61 |       "result": {
 62 |         "prefix": null,
 63 |         "first": "John",
 64 |         "middle": null,
 65 |         "last": "Doe",
 66 |         "suffix": null,
 67 |         "original": "John Doe"
 68 |       }
 69 |     }, {
 70 |       "test": "title, suffix, and compound last name",
 71 |       "result": {
 72 |         "prefix": "Mr.",
 73 |         "first": "Anthony",
 74 |         "middle": "R",
 75 |         "last": "Von Fange",
 76 |         "suffix": "III",
 77 |         "original": "Mr. Anthony R Von Fange III"
 78 |       }
 79 |     }, {
 80 |       "test": "first middle last",
 81 |       "result": {
 82 |         "prefix": null,
 83 |         "first": "Sara",
 84 |         "middle": "Ann",
 85 |         "last": "Fraser",
 86 |         "suffix": null,
 87 |         "original": "Sara Ann Fraser"
 88 |       }
 89 |     }, {
 90 |       "test": "compound first and last",
 91 |       "result": {
 92 |         "prefix": null,
 93 |         "first": "Mary Ann",
 94 |         "middle": null,
 95 |         "last": "Fraser",
 96 |         "suffix": null,
 97 |         "original": "Mary Ann Fraser"
 98 |       }
 99 |     }, {
100 |       "test": "last, compound first",
101 |       "result": {
102 |         "prefix": null,
103 |         "first": "Mary Ann",
104 |         "middle": null,
105 |         "last": "Fraser",
106 |         "suffix": null,
107 |         "original": "Fraser, Mary Ann"
108 |       }
109 |     }, {
110 |       "test": "compound first and compound last and middle",
111 |       "result": {
112 |         "prefix": null,
113 |         "first": "Jo Ellen",
114 |         "middle": "Mary",
115 |         "last": "St. Louis",
116 |         "suffix": null,
117 |         "original": "Jo Ellen Mary St. Louis"
118 |       }
119 |     }, {
120 |       "test": "single name is just first",
121 |       "result": {
122 |         "prefix": null,
123 |         "first": "Adam",
124 |         "middle": null,
125 |         "last": null,
126 |         "suffix": null,
127 |         "original": "Adam"
128 |       }
129 |     }, {
130 |       "test": "ignore quoted names",
131 |       "result": {
132 |         "prefix": null,
133 |         "first": "Donald",
134 |         "middle": "Rex",
135 |         "last": "St. Louis",
136 |         "suffix": null,
137 |         "original": "Donald \"Don\" Rex St. Louis"
138 |       }
139 |     }, {
140 |       "test": "ignore parenthesized names",
141 |       "result": {
142 |         "prefix": null,
143 |         "first": "Donald",
144 |         "middle": "Rex",
145 |         "last": "St. Louis",
146 |         "suffix": null,
147 |         "original": "Donald (Don) Rex St. Louis"
148 |       }
149 |     }, {
150 |       "test": "split compound first name if it's the only name given",
151 |       "result": {
152 |         "prefix": null,
153 |         "first": "Mary",
154 |         "middle": null,
155 |         "last": "Ann",
156 |         "suffix": null,
157 |         "original": "Mary Ann"
158 |       }
159 |     }, {
160 |       "test": "first and last",
161 |       "result": {
162 |         "prefix": null,
163 |         "first": "Jonathan",
164 |         "middle": null,
165 |         "last": "Smith",
166 |         "suffix": null,
167 |         "original": "Jonathan Smith"
168 |       }
169 |     }, {
170 |       "test": "first, compound last, and suffix",
171 |       "result": {
172 |         "prefix": null,
173 |         "first": "Anthony",
174 |         "middle": null,
175 |         "last": "Von Fange",
176 |         "suffix": "III",
177 |         "original": "Anthony Von Fange III"
178 |       }
179 |     }, {
180 |       "test": "title first and last",
181 |       "result": {
182 |         "prefix": "Mr",
183 |         "first": "John",
184 |         "middle": null,
185 |         "last": "Doe",
186 |         "suffix": null,
187 |         "original": "Mr John Doe"
188 |       }
189 |     }, {
190 |       "test": "multiple titles",
191 |       "result": {
192 |         "prefix": "Mr. Dr.",
193 |         "first": "Jane",
194 |         "middle": null,
195 |         "last": "Smith",
196 |         "suffix": null,
197 |         "original": "Mr. Dr. Jane Smith"
198 |       }
199 |     }, {
200 |       "test": "title and multiple suffix",
201 |       "result": {
202 |         "prefix": "Mr",
203 |         "first": "John",
204 |         "middle": null,
205 |         "last": "Doe",
206 |         "suffix": "PhD Esq",
207 |         "original": "Mr John Doe PhD Esq"
208 |       }
209 |     }, {
210 |       "test": "title first and last keeps punctuation",
211 |       "result": {
212 |         "prefix": "Mrs.",
213 |         "first": "Jane",
214 |         "middle": null,
215 |         "last": "Doe",
216 |         "suffix": null,
217 |         "original": "Mrs. Jane Doe"
218 |       }
219 |     }, {
220 |       "test": "first last and suffix",
221 |       "result": {
222 |         "prefix": null,
223 |         "first": "Smarty",
224 |         "middle": null,
225 |         "last": "Pants",
226 |         "suffix": "PhD",
227 |         "original": "Smarty Pants PhD"
228 |       }
229 |     }, {
230 |       "test": "first last, suffix",
231 |       "result": {
232 |         "prefix": null,
233 |         "first": "Smarty",
234 |         "middle": null,
235 |         "last": "Pants",
236 |         "suffix": "Ph.D.",
237 |         "original": "Smarty Pants, Ph.D."
238 |       }
239 |     }, {
240 |       "test": "first middle initial last",
241 |       "result": {
242 |         "prefix": null,
243 |         "first": "Mark",
244 |         "middle": "P",
245 |         "last": "Williams",
246 |         "suffix": null,
247 |         "original": "Mark P Williams"
248 |       }
249 |     }, {
250 |       "test": "first compound last name",
251 |       "result": {
252 |         "prefix": null,
253 |         "first": "Aaron",
254 |         "middle": null,
255 |         "last": "bin Omar",
256 |         "suffix": null,
257 |         "original": "Aaron bin Omar"
258 |       }
259 |     }, {
260 |       "test": "Dutch compound last name",
261 |       "result": {
262 |         "prefix": null,
263 |         "first": "Richard",
264 |         "middle": null,
265 |         "last": "van der Dys",
266 |         "suffix": null,
267 |         "original": "Richard van der Dys"
268 |       }
269 |     }, {
270 |       "test": "two compound last name prefixes",
271 |       "result": {
272 |         "prefix": null,
273 |         "first": "Joe",
274 |         "middle": null,
275 |         "last": "de la Cruz",
276 |         "suffix": null,
277 |         "original": "Joe de la Cruz"
278 |       }
279 |     }, {
280 |       "test": "first last esquire",
281 |       "result": {
282 |         "prefix": null,
283 |         "first": "John",
284 |         "middle": null,
285 |         "last": "Doe",
286 |         "suffix": "Esquire",
287 |         "original": "John Doe Esquire"
288 |       }
289 |     }
290 |   ];
291 | 
292 |   tests.forEach(function (test) {
293 | 
294 |     it('should parse ' + test.test + ' correctly', function () {
295 |       var result = sut(test.result.original);
296 |       expect(result).to.have.property('prefix', test.result.prefix);
297 |       expect(result).to.have.property('first', test.result.first);
298 |       expect(result).to.have.property('middle', test.result.middle);
299 |       expect(result).to.have.property('last', test.result.last);
300 |       expect(result).to.have.property('suffix', test.result.suffix);
301 |       expect(result).to.have.property('original', test.result.original);
302 |     });
303 |   });
304 | 
305 | });
306 | 


--------------------------------------------------------------------------------