├── README.md
├── index.js
├── package.json
└── test.js
/README.md:
--------------------------------------------------------------------------------
1 | # ner-server
2 | JavaScript api endpoint wrapper for communicating with stanford-ner server
3 |
4 | ##### SET UP
5 | 1. Have Java jdk 1.8 installed and in your path, stanford-ner requires java 1.8
6 | 2. Install dependencies
7 | a. manually install stanford-ner from stanford.edu website place in project directory
8 | https://nlp.stanford.edu/software/stanford-ner-2017-06-09.zip
9 | b. run `npm install`
10 | If no errors then you have set everything up correctly
11 |
12 | ##### START UP
13 |
14 | run these commands to start java server
15 |
16 | 1.
17 | ```
18 | java -Djava.ext.dirs=./lib -cp stanford-ner.jar edu.stanford.nlp.ie.NERServer -port 9191 -loadClassifier ./classifiers/english.muc.7class.distsim.crf.ser.gz -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false -outputFormat slashTags
19 | ```
20 | change `-port 9191` to whatever port you want the stanford-ner server to be listening to
21 |
22 |
23 |
24 | #### Example
25 | ```
26 | var ner = require('ner-server');
27 |
28 | var text = "The fate of Lehman Brothers, the beleaguered investment bank, \
29 | hung in the balance on Sunday as Federal Reserve officials and the leaders of \
30 | major financial institutions continued to gather in emergency meetings trying \
31 | to complete a plan to rescue the stricken bank. Several possible plans emerged \
32 | from the talks, held at the Federal Reserve Bank of New York and led by Timothy R. Geithner, \
33 | the president of the New York Fed, and Treasury Secretary Henry M. Paulson Jr."
34 |
35 |
36 | ner.cli(
37 | 9191, text,
38 | function(err, tags){
39 | console.log('cli tags: '+JSON.stringify(tags)+'\n');
40 | }
41 | );
42 |
43 | ner.post(
44 | 'localhost', 9191, text,
45 | function(err, res){
46 | console.log('post tags: '+JSON.stringify(res.tags)+'\n');
47 | }
48 | );
49 | ```
50 |
51 | #### Using ner-server
52 | Example return object
53 | 3class returns Person, Location, Organization
54 | 4class return 3class + Misc
55 | 7class returns 3class + Money, Percent, Date, Time
56 |
57 | ```
58 | entities : {
59 | Person:'ALL',
60 | Location:'ALL',
61 | Organization:'ALL',
62 | Misc:'4class Only',
63 | Money:'7class Only',
64 | Percent:'7class Only',
65 | Date:'7class Only',
66 | Time:'7class Only'
67 | }
68 | ```
69 |
70 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | // index.js
2 | var spawn = require('child_process').spawn;
3 | var _ = require('underscore');
4 | var net = require('net');
5 |
6 | var ner_port = 9191
7 | var ner_host = 'localhost'
8 |
9 | module.exports = {
10 | cli: cli,
11 | post: post
12 | };
13 |
14 | function post(host, port, text, callback) {
15 | var socket = new net.Socket();
16 | port = port? port: ner_port;
17 | host = host? host: ner_host;
18 |
19 | socket.connect(port, host, function () {
20 | socket.setNoDelay(true);
21 | socket.write(text.replace(/\r?\n|\r|\t/g, ' ') + '\n');
22 | });
23 |
24 | socket.on('data', function (data) {
25 | var re = /<([A-Z]+?)>(.+?)<\/\1>/g;
26 | var str = data.toString();
27 | var res = {};
28 | res.tags = parse(str);
29 | socket.destroy();
30 | callback(undefined, res);
31 | });
32 |
33 | socket.on('error', function (err) {
34 | callback(err, undefined);
35 | });
36 | }
37 |
38 | function cli(port, body, callback) {
39 | var parsed = '';
40 | var text = body.replace(/\r?\n|\r|\t/gm, ' ');
41 |
42 | port = port? port: ner_port;
43 | var process = spawn('C:/java-1.8/bin/java',
44 | ['-cp', '../sources/stanford-ner-2017-06-09/stanford-ner.jar',
45 | 'edu.stanford.nlp.ie.NERServer',
46 | '-port' ,port ,'-client'
47 | ]
48 | );
49 |
50 | //when java server returns data
51 | process.stdout.on('data', function (data) {
52 | //ignore if 'Input' write file text to stream
53 | if(String(data).indexOf('Input some text and press RETURN to NER tag it, or just RETURN to finish.')==0){
54 | process.stdin.write(text);
55 | process.stdin.write('\n');
56 | process.stdin.write('\n');
57 | return;
58 | }
59 | //concat returned data
60 | else if(String(data).length > 1){
61 | parsed += String(data);
62 | return;
63 | }
64 | });
65 |
66 | process.stdin.on('endData',function (data){
67 | console.log('endData: '+data);
68 | });
69 |
70 | process.stderr.on('data', function (err) {
71 | console.log('stderr: ' + err);
72 | callback(err, undefined);
73 | });
74 |
75 | //when process ends
76 | process.on('close', function (code) {
77 | //console.log('stanford-ner process exited with code ' + code);
78 | //return ner tags, after parsing
79 | callback(undefined, parse(parsed));
80 | return;
81 | });
82 | }
83 |
84 | var parse = function(slashtags) {
85 | var tokenized = slashtags.split(/\s/gmi);
86 | var splitRegex = new RegExp('(.+)/([A-Z]+)','g');
87 | var tagged = _.map(tokenized,
88 | function(token) {
89 | var parts = new RegExp('(.+)/([A-Z]+)','g').exec(token);
90 | return (parts)? { w: parts[1], t: parts[2] }: null;
91 | }
92 | );
93 |
94 | tagged = _.compact(tagged);
95 | // Now we extract the neighbors into one entity
96 | var entities = {};
97 | var prevEntity = false;
98 | var entityBuffer = [];
99 | for (var i=0;i0) {
105 | // There was! We save the entity
106 | if (!entities.hasOwnProperty(prevEntity)) {
107 | entities[prevEntity] = [];
108 | }
109 | entities[prevEntity].push(entityBuffer.join(' '));
110 | // Now we set the buffer
111 | entityBuffer = [];
112 | }
113 | // Push to the buffer
114 | entityBuffer.push(tagged[i].w);
115 | } else {
116 | // Prev entity is same a current one. We push to the buffer.
117 | entityBuffer.push(tagged[i].w);
118 | }
119 | } else {
120 | if (entityBuffer.length>0) {
121 | // There was! We save the entity
122 | if (!entities.hasOwnProperty(prevEntity)) {
123 | entities[prevEntity] = [];
124 | }
125 | entities[prevEntity].push(entityBuffer.join(' '));
126 | // Now we set the buffer
127 | entityBuffer = [];
128 | }
129 | }
130 | // Save the current entity
131 | prevEntity = tagged[i].t;
132 | }
133 | return entities;
134 | }
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ner-server",
3 | "version": "2.0.1",
4 | "description": "Server endpoint for communicating with stanford-ner server",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "node test.js"
8 | },
9 | "dependencies": {
10 | "body-parser": "1.12.2",
11 | "express": "4.12.3",
12 | "request": "*",
13 | "net": "*",
14 | "underscore": "1.8.3"
15 | },
16 | "author": "Pranav Herur",
17 | "license": "ISC",
18 | "directories": {
19 | "example": "example"
20 | },
21 | "repository": {
22 | "type": "git",
23 | "url": "https://github.com/PranavHerur/ner-server.git"
24 | },
25 | "keywords": [
26 | "ner",
27 | "keyword",
28 | "extraction"
29 | ],
30 | "bugs": {
31 | "url": "https://github.com/PranavHerur/ner-server/issues"
32 | },
33 | "homepage": "https://github.com/PranavHerur/ner-server"
34 | }
35 |
--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
1 | var ner = require('./index');
2 |
3 | var text = "The fate of Lehman Brothers, the beleaguered investment bank, \
4 | hung in the balance on Sunday as Federal Reserve officials and the leaders of \
5 | major financial institutions continued to gather in emergency meetings trying \
6 | to complete a plan to rescue the stricken bank. Several possible plans emerged \
7 | from the talks, held at the Federal Reserve Bank of New York and led by Timothy R. Geithner, \
8 | the president of the New York Fed, and Treasury Secretary Henry M. Paulson Jr."
9 |
10 | ner.post(
11 | 'localhost', 9191, text,
12 | function(err, res){
13 | console.log('post tags: '+JSON.stringify(res.tags)+'\n');
14 | }
15 | );
--------------------------------------------------------------------------------