├── README.md ├── index.js ├── package.json └── test.js /README.md: -------------------------------------------------------------------------------- 1 | # ner-server 2 | JavaScript api endpoint wrapper for communicating with stanford-ner server 3 | 4 | ##### SET UP 5 | 1. Have Java jdk 1.8 installed and in your path, stanford-ner requires java 1.8 6 | 2. Install dependencies
7 | a. manually install stanford-ner from stanford.edu website place in project directory
8 | https://nlp.stanford.edu/software/stanford-ner-2017-06-09.zip
9 | b. run `npm install`
10 | If no errors then you have set everything up correctly
11 | 12 | ##### START UP 13 | 14 | run these commands to start java server 15 | 16 | 1. 17 | ``` 18 | java -Djava.ext.dirs=./lib -cp stanford-ner.jar edu.stanford.nlp.ie.NERServer -port 9191 -loadClassifier ./classifiers/english.muc.7class.distsim.crf.ser.gz -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false -outputFormat slashTags 19 | ``` 20 | change `-port 9191` to whatever port you want the stanford-ner server to be listening to 21 | 22 | 23 | 24 | #### Example 25 | ``` 26 | var ner = require('ner-server'); 27 | 28 | var text = "The fate of Lehman Brothers, the beleaguered investment bank, \ 29 | hung in the balance on Sunday as Federal Reserve officials and the leaders of \ 30 | major financial institutions continued to gather in emergency meetings trying \ 31 | to complete a plan to rescue the stricken bank. Several possible plans emerged \ 32 | from the talks, held at the Federal Reserve Bank of New York and led by Timothy R. Geithner, \ 33 | the president of the New York Fed, and Treasury Secretary Henry M. Paulson Jr." 34 | 35 | 36 | ner.cli( 37 | 9191, text, 38 | function(err, tags){ 39 | console.log('cli tags: '+JSON.stringify(tags)+'\n'); 40 | } 41 | ); 42 | 43 | ner.post( 44 | 'localhost', 9191, text, 45 | function(err, res){ 46 | console.log('post tags: '+JSON.stringify(res.tags)+'\n'); 47 | } 48 | ); 49 | ``` 50 | 51 | #### Using ner-server 52 | Example return object
53 | 3class returns Person, Location, Organization
54 | 4class return 3class + Misc
55 | 7class returns 3class + Money, Percent, Date, Time
56 | 57 | ``` 58 | entities : {
59 | Person:'ALL',
60 | Location:'ALL',
61 | Organization:'ALL',
62 | Misc:'4class Only',
63 | Money:'7class Only',
64 | Percent:'7class Only',
65 | Date:'7class Only',
66 | Time:'7class Only'
67 | } 68 | ``` 69 | 70 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | // index.js 2 | var spawn = require('child_process').spawn; 3 | var _ = require('underscore'); 4 | var net = require('net'); 5 | 6 | var ner_port = 9191 7 | var ner_host = 'localhost' 8 | 9 | module.exports = { 10 | cli: cli, 11 | post: post 12 | }; 13 | 14 | function post(host, port, text, callback) { 15 | var socket = new net.Socket(); 16 | port = port? port: ner_port; 17 | host = host? host: ner_host; 18 | 19 | socket.connect(port, host, function () { 20 | socket.setNoDelay(true); 21 | socket.write(text.replace(/\r?\n|\r|\t/g, ' ') + '\n'); 22 | }); 23 | 24 | socket.on('data', function (data) { 25 | var re = /<([A-Z]+?)>(.+?)<\/\1>/g; 26 | var str = data.toString(); 27 | var res = {}; 28 | res.tags = parse(str); 29 | socket.destroy(); 30 | callback(undefined, res); 31 | }); 32 | 33 | socket.on('error', function (err) { 34 | callback(err, undefined); 35 | }); 36 | } 37 | 38 | function cli(port, body, callback) { 39 | var parsed = ''; 40 | var text = body.replace(/\r?\n|\r|\t/gm, ' '); 41 | 42 | port = port? port: ner_port; 43 | var process = spawn('C:/java-1.8/bin/java', 44 | ['-cp', '../sources/stanford-ner-2017-06-09/stanford-ner.jar', 45 | 'edu.stanford.nlp.ie.NERServer', 46 | '-port' ,port ,'-client' 47 | ] 48 | ); 49 | 50 | //when java server returns data 51 | process.stdout.on('data', function (data) { 52 | //ignore if 'Input' write file text to stream 53 | if(String(data).indexOf('Input some text and press RETURN to NER tag it, or just RETURN to finish.')==0){ 54 | process.stdin.write(text); 55 | process.stdin.write('\n'); 56 | process.stdin.write('\n'); 57 | return; 58 | } 59 | //concat returned data 60 | else if(String(data).length > 1){ 61 | parsed += String(data); 62 | return; 63 | } 64 | }); 65 | 66 | process.stdin.on('endData',function (data){ 67 | console.log('endData: '+data); 68 | }); 69 | 70 | process.stderr.on('data', function (err) { 71 | console.log('stderr: ' + err); 72 | callback(err, undefined); 73 | }); 74 | 75 | //when process ends 76 | process.on('close', function (code) { 77 | //console.log('stanford-ner process exited with code ' + code); 78 | //return ner tags, after parsing 79 | callback(undefined, parse(parsed)); 80 | return; 81 | }); 82 | } 83 | 84 | var parse = function(slashtags) { 85 | var tokenized = slashtags.split(/\s/gmi); 86 | var splitRegex = new RegExp('(.+)/([A-Z]+)','g'); 87 | var tagged = _.map(tokenized, 88 | function(token) { 89 | var parts = new RegExp('(.+)/([A-Z]+)','g').exec(token); 90 | return (parts)? { w: parts[1], t: parts[2] }: null; 91 | } 92 | ); 93 | 94 | tagged = _.compact(tagged); 95 | // Now we extract the neighbors into one entity 96 | var entities = {}; 97 | var prevEntity = false; 98 | var entityBuffer = []; 99 | for (var i=0;i0) { 105 | // There was! We save the entity 106 | if (!entities.hasOwnProperty(prevEntity)) { 107 | entities[prevEntity] = []; 108 | } 109 | entities[prevEntity].push(entityBuffer.join(' ')); 110 | // Now we set the buffer 111 | entityBuffer = []; 112 | } 113 | // Push to the buffer 114 | entityBuffer.push(tagged[i].w); 115 | } else { 116 | // Prev entity is same a current one. We push to the buffer. 117 | entityBuffer.push(tagged[i].w); 118 | } 119 | } else { 120 | if (entityBuffer.length>0) { 121 | // There was! We save the entity 122 | if (!entities.hasOwnProperty(prevEntity)) { 123 | entities[prevEntity] = []; 124 | } 125 | entities[prevEntity].push(entityBuffer.join(' ')); 126 | // Now we set the buffer 127 | entityBuffer = []; 128 | } 129 | } 130 | // Save the current entity 131 | prevEntity = tagged[i].t; 132 | } 133 | return entities; 134 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ner-server", 3 | "version": "2.0.1", 4 | "description": "Server endpoint for communicating with stanford-ner server", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "node test.js" 8 | }, 9 | "dependencies": { 10 | "body-parser": "1.12.2", 11 | "express": "4.12.3", 12 | "request": "*", 13 | "net": "*", 14 | "underscore": "1.8.3" 15 | }, 16 | "author": "Pranav Herur", 17 | "license": "ISC", 18 | "directories": { 19 | "example": "example" 20 | }, 21 | "repository": { 22 | "type": "git", 23 | "url": "https://github.com/PranavHerur/ner-server.git" 24 | }, 25 | "keywords": [ 26 | "ner", 27 | "keyword", 28 | "extraction" 29 | ], 30 | "bugs": { 31 | "url": "https://github.com/PranavHerur/ner-server/issues" 32 | }, 33 | "homepage": "https://github.com/PranavHerur/ner-server" 34 | } 35 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var ner = require('./index'); 2 | 3 | var text = "The fate of Lehman Brothers, the beleaguered investment bank, \ 4 | hung in the balance on Sunday as Federal Reserve officials and the leaders of \ 5 | major financial institutions continued to gather in emergency meetings trying \ 6 | to complete a plan to rescue the stricken bank. Several possible plans emerged \ 7 | from the talks, held at the Federal Reserve Bank of New York and led by Timothy R. Geithner, \ 8 | the president of the New York Fed, and Treasury Secretary Henry M. Paulson Jr." 9 | 10 | ner.post( 11 | 'localhost', 9191, text, 12 | function(err, res){ 13 | console.log('post tags: '+JSON.stringify(res.tags)+'\n'); 14 | } 15 | ); --------------------------------------------------------------------------------