├── .gitignore
├── config
    └── index.js
├── index.js
├── lib
    ├── hanlp-1.3.2
    │   ├── gson-2.2.2.jar
    │   ├── hanlp-1.3.2.jar
    │   ├── hanlp-1.3.2-sources.jar
    │   └── src-java
    │   │   ├── node
    │   │       ├── VarArgs.class
    │   │       ├── CastingUtils.class
    │   │       ├── NodeJsException.class
    │   │       ├── NodeDynamicProxyClass.class
    │   │       ├── NodeJsException.java
    │   │       ├── CastingUtils.java
    │   │       ├── VarArgs.java
    │   │       └── NodeDynamicProxyClass.java
    │   │   └── hanLP.properties
    └── index.js
├── .dockerignore
├── Dockerfile
├── package.json
├── scripts
    └── build-docker-image.sh
├── app.js
├── examples
    ├── conversion.js
    ├── extract.js
    └── tokenizer.js
├── router.js
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | data
3 | 


--------------------------------------------------------------------------------
/config/index.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | 	host : "0.0.0.0",
3 | 	port : 3000
4 | }


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * hanlp toolkit
3 |  */
4 | 
5 | module.exports = require("./lib/index");


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/gson-2.2.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/gson-2.2.2.jar


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/hanlp-1.3.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/hanlp-1.3.2.jar


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | */sftp-config.json
2 | */*.sublime-*
3 | .*
4 | node_modules/
5 | logs/*
6 | scripts/
7 | .vscode
8 | */dist
9 | 


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/hanlp-1.3.2-sources.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/hanlp-1.3.2-sources.jar


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/VarArgs.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/src-java/node/VarArgs.class


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/CastingUtils.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/src-java/node/CastingUtils.class


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/NodeJsException.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/src-java/node/NodeJsException.class


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/NodeDynamicProxyClass.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hailiang-wang/hanlp-api/HEAD/lib/hanlp-1.3.2/src-java/node/NodeDynamicProxyClass.class


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/NodeJsException.java:
--------------------------------------------------------------------------------
1 | package node;
2 | 
3 | public class NodeJsException extends RuntimeException {
4 |     public NodeJsException(String message) {
5 |         super(message);
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:7.9.0
 2 | MAINTAINER Hain Wang <hailiang.hl.wang@gmail.com>
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install openjdk-7-jdk -yy
 6 | 
 7 | RUN npm install -g cnpm --registry=https://registry.npm.taobao.org
 8 | RUN /bin/bash -c "mkdir -p /hanlp-api"
 9 | COPY . /hanlp-api
10 | WORKDIR /hanlp-api
11 | RUN cnpm install
12 | 
13 | ENTRYPOINT ["node"]
14 | CMD ["app.js"]
15 | 
16 | EXPOSE 3001


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "node-hanlp",
 3 |   "version": "1.0.2",
 4 |   "description": "HanLP for nodejs",
 5 |   "main": "index.js",
 6 |   "dependencies": {},
 7 |   "devDependencies": {
 8 |     "body-parser": "^1.17.1",
 9 |     "express": "^4.15.2",
10 |     "java": "^0.8.0",
11 |     "underscore": "^1.8.3"
12 |   },
13 |   "scripts": {
14 |     "test": "echo \"Error: no test specified\" && exit 1"
15 |   },
16 |   "keywords": [
17 |     "hanlp",
18 |     "node-hanlp"
19 |   ],
20 |   "author": "chanre",
21 |   "license": "ISC"
22 | }
23 | 


--------------------------------------------------------------------------------
/scripts/build-docker-image.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash 
 2 | ###########################################
 3 | # Build Docker Image
 4 | ###########################################
 5 | 
 6 | # constants
 7 | baseDir=$(cd `dirname "$0"`;pwd)
 8 | # functions
 9 | 
10 | # main 
11 | [ -z "${BASH_SOURCE[0]}" -o "${BASH_SOURCE[0]}" = "$0" ] || return
12 | cd $baseDir/..
13 | 
14 | # Version key/value should be on his own line
15 | PACKAGE_VERSION=$(cat package.json \
16 |   | grep version \
17 |   | head -1 \
18 |   | awk -F: '{ print $2 }' \
19 |   | sed 's/[",]//g' | xargs)
20 | 
21 | echo $PACKAGE_VERSION
22 | docker build --force-rm=true --tag samurais/hanlp-api:$PACKAGE_VERSION .
23 | 


--------------------------------------------------------------------------------
/app.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 
 3 |  * @authors chandre (chandre21cn@gmail.com)
 4 |  * @date    2017-04-09 21:01:20
 5 |  * @version $Id$
 6 |  */
 7 | 
 8 | const express 		= require('express');
 9 | const bodyParser 	= require('body-parser');
10 | const app 			= express();
11 | 
12 | let  router 	= require("./router");
13 | 
14 | // 收藏图标
15 | app.get("/favicon.ico", (req, res)  => {
16 | 	res.send("");
17 | });
18 | 
19 | app.use(bodyParser.json());
20 | app.use(bodyParser.urlencoded({ extended: true }));
21 | app.use("/" , router )
22 | 
23 | let config 	= require("./config")
24 | // 启动HTTP服务
25 | let server = app.listen({
26 | 	port : config.port || 3000,
27 | 	host : config.host || "0.0.0.0"
28 | },  () => {
29 | 	console.log('HanLP Service listening at http://%s:%s', server.address().address, server.address().port);
30 | });


--------------------------------------------------------------------------------
/examples/conversion.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 文本信息提取
 3 |  * @authors chandre (chandre21cn@gmail.com)
 4 |  * @date    2017-04-08 18:24:04
 5 |  * @version 1.0.0
 6 |  */
 7 | 
 8 | 
 9 | const Hanlp = require("../lib/index");
10 | const HanLP = new Hanlp();
11 | 
12 | // [ConversionFont 简繁转换]
13 | console.log("\n============================= 繁体 =============================")
14 | var text  = "用笔记本电脑写程序";
15 | var txt = HanLP.ConversionFont( text , "ft" );
16 | console.log( `${text} >>>> ${txt}` )
17 | 
18 | console.log("\n============================= 简体 =============================")
19 | var text  = "「以後等妳當上皇后，就能買士多啤梨慶祝了」";
20 | var txt = HanLP.ConversionFont( text );
21 | console.log( `${text} >>>> ${txt}` )
22 | 
23 | 
24 | // [Pinyin 拼音转换]
25 | console.log("\n============================= 拼音转换 =============================")
26 | var text  = "用笔记本电脑写程序";
27 | ['num','tone','outtone','shengmu','yunmu','head'].forEach(function(item){
28 | 	var txt = HanLP.Pinyin( text , item );
29 | 	console.log( `${item} >>>> ${txt}` )
30 | })
31 | 


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/CastingUtils.java:
--------------------------------------------------------------------------------
 1 | package node;
 2 | 
 3 | import java.lang.reflect.Method;
 4 | 
 5 | public class CastingUtils {
 6 |     public static void cast(Method method, Object[] args) throws Throwable {
 7 |         Class[] methodParameterTypes = method.getParameterTypes();
 8 |         if (methodParameterTypes.length != args.length) {
 9 |             throw new Exception("Method argument length mismatch. Expecting " + methodParameterTypes.length + " found " + args.length);
10 |         }
11 |         for (int i = 0; i < methodParameterTypes.length; i++) {
12 |             args[i] = cast(args[i], methodParameterTypes[i]);
13 |         }
14 |     }
15 | 
16 |     public static Object cast(Object o, Class t) {
17 |         if (o == null) {
18 |             return null;
19 |         }
20 | 
21 |         Class oClass = o.getClass();
22 |         if (oClass == Integer.class) {
23 |             Integer i = (Integer) o;
24 |             if (t == Double.class) {
25 |                 return i.doubleValue();
26 |             }
27 |         } else if (oClass == Double.class) {
28 |             Double d = (Double) o;
29 |             if (t == Integer.class) {
30 |                 return d.intValue();
31 |             }
32 |         }
33 | 
34 |         return o;
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/hanLP.properties:
--------------------------------------------------------------------------------
 1 | #本配置文件中的路径的根目录，根目录+其他路径=绝对路径
 2 | #Windows用户请注意，路径分隔符统一使用/
 3 | root=/node-hanlp
 4 | #核心词典路径
 5 | CoreDictionaryPath=data/dictionary/CoreNatureDictionary.txt
 6 | #2元语法词典路径
 7 | BiGramDictionaryPath=data/dictionary/CoreNatureDictionary.ngram.txt
 8 | #停用词词典路径
 9 | CoreStopWordDictionaryPath=data/dictionary/stopwords.txt
10 | #同义词词典路径
11 | CoreSynonymDictionaryDictionaryPath=data/dictionary/synonym/CoreSynonym.txt
12 | #人名词典路径
13 | PersonDictionaryPath=data/dictionary/person/nr.txt
14 | #人名词典转移矩阵路径
15 | PersonDictionaryTrPath=data/dictionary/person/nr.tr.txt
16 | #繁简词典根目录
17 | tcDictionaryRoot=data/dictionary/tc
18 | #自定义词典路径，用;隔开多个自定义词典，空格开头表示在同一个目录，使用“文件名 词性”形式则表示这个词典的词性默认是该词性。优先级递减。
19 | #另外data/dictionary/custom/CustomDictionary.txt是个高质量的词库，请不要删除。所有词典统一使用UTF-8编码。
20 | CustomDictionaryPath=data/dictionary/custom/CustomDictionary.txt; 现代汉语补充词库.txt; 全国地名大全.txt ns; 人名词典.txt; 机构名词典.txt; 上海地名.txt ns;data/dictionary/person/nrf.txt nrf;
21 | #CRF分词模型路径
22 | CRFSegmentModelPath=data/model/segment/CRFSegmentModel.txt
23 | #HMM分词模型
24 | HMMSegmentModelPath=data/model/segment/HMMSegmentModel.bin
25 | #分词结果是否展示词性
26 | ShowTermNature=true
27 | #IO适配器，实现com.hankcs.hanlp.corpus.io.IIOAdapter接口以在不同的平台（Hadoop、Redis等）上运行HanLP
28 | #默认的IO适配器如下，该适配器是基于普通文件系统的。
29 | #IOAdapter=com.hankcs.hanlp.corpus.io.FileIOAdapter
30 | 


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/VarArgs.java:
--------------------------------------------------------------------------------
 1 | package node;
 2 | 
 3 | import java.lang.reflect.Constructor;
 4 | import java.lang.reflect.Method;
 5 | import java.lang.reflect.Array;
 6 | import java.util.Arrays;
 7 | 
 8 | public class VarArgs {
 9 |      public static Object[] getVarArgs(Method method, Object[] args) {
10 |         if (method.isVarArgs()) {
11 |             Class<?>[] methodParameterTypes = method.getParameterTypes();
12 |             return getVarArgs(args, methodParameterTypes);
13 |         }
14 |         return args;
15 |      }
16 | 
17 |      public static Object[] getVarArgs(Constructor constructor, Object[] args) {
18 |         if (constructor.isVarArgs()) {
19 |             Class<?>[] constructorParameterTypes = constructor.getParameterTypes();
20 |             return getVarArgs(args, constructorParameterTypes);
21 |         }
22 |         return args;
23 |      }
24 | 
25 |      public static Object[] getVarArgs(Object[] args, Class<?>[] methodParameterTypes) {
26 | 	if(args.length == methodParameterTypes.length
27 |                 && args[args.length - 1].getClass().equals(methodParameterTypes[methodParameterTypes.length - 1])) {
28 |             return args;
29 |         }
30 | 
31 |         Object[] newArgs = new Object[methodParameterTypes.length];
32 |         System.arraycopy(args, 0, newArgs, 0, methodParameterTypes.length - 1);
33 |         Class<?> varArgComponentType = methodParameterTypes[methodParameterTypes.length - 1].getComponentType();
34 |         int varArgLength = args.length - methodParameterTypes.length + 1;
35 |         Object[] varArgsArray = (Object[])Array.newInstance(varArgComponentType, varArgLength);
36 | //         System.out.println("varArgComponentType: " + varArgComponentType);
37 | //         System.out.println("varArgsArray: " + Arrays.asList(varArgsArray).toString());
38 | //         System.out.println("args: " + Arrays.asList(args).toString());
39 |         System.arraycopy(args, methodParameterTypes.length - 1, varArgsArray, 0, varArgLength);
40 |         newArgs[methodParameterTypes.length - 1] = varArgsArray;
41 |         return newArgs;
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/lib/hanlp-1.3.2/src-java/node/NodeDynamicProxyClass.java:
--------------------------------------------------------------------------------
 1 | package node;
 2 | 
 3 | import java.lang.reflect.Method;
 4 | 
 5 | public class NodeDynamicProxyClass implements java.lang.reflect.InvocationHandler {
 6 |   private static final Method EQUALS;
 7 |   private static final Method HASHCODE;
 8 |   static {
 9 |     try {
10 |       EQUALS = Object.class.getMethod("equals", Object.class);
11 |       HASHCODE = Object.class.getMethod("hashCode");
12 |     } catch (NoSuchMethodException e) {
13 |       throw new ExceptionInInitializerError(e);
14 |     }
15 |   }
16 | 
17 |   private native Object callJs(long ptr, java.lang.reflect.Method m, Object[] args) throws Throwable;
18 |   private native void unref(long ptr) throws Throwable;
19 |   public final long ptr;
20 | 
21 |   public NodeDynamicProxyClass(String path, long ptr) {
22 |     try{
23 |       Runtime.getRuntime().load(path);
24 |     }catch(Exception e){
25 |       System.out.println(e.toString());
26 |     }
27 |     this.ptr = ptr;
28 |   }
29 | 
30 |   @Override
31 |   public Object invoke(Object proxy, java.lang.reflect.Method m, Object[] args) throws Throwable
32 |   {
33 |     try {
34 |       Object result = callJs(this.ptr, m, args);
35 |       //if(result == null) {
36 |       //  System.out.println("invoke: null");
37 |       //} else {
38 |       //  System.out.println("invoke: " + result + " class: " + result.getClass() + " to string: " + result.toString());
39 |       //}
40 |       return result;
41 |     } catch (NoSuchMethodError e) {
42 |       // use 'vanilla' implementations otherwise - the object that persists between multiple invocations is
43 |       // 'this', not the 'proxy' argument, so we operate on this.
44 |       if (EQUALS.equals(m)) {
45 |         // need to check if the arg is a Proxy, and if so, if its invocation handler == this!
46 |         return args[0] == proxy;
47 |       } else if (HASHCODE.equals(m)) {
48 |         return System.identityHashCode(proxy);
49 |       } else if ("unref".equals(m.getName()) && m.getParameterTypes().length == 0 && m.getReturnType() == Void.TYPE) {
50 |         this.unref();
51 |       }
52 |       throw e;
53 |     }
54 |   }
55 | 
56 |   public void unref() throws Throwable {
57 |     unref(this.ptr);
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/examples/extract.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 文本信息提取
 3 |  * @authors chandre (chandre21cn@gmail.com)
 4 |  * @date    2017-04-08 18:24:04
 5 |  * @version 1.0.0
 6 |  */
 7 | 
 8 | 
 9 | const Hanlp = require("../lib/index");
10 | const HanLP = new Hanlp();
11 | 
12 | var text  = [
13 |     '腾讯科技讯据外电报道美国电动车制造商特斯拉周日表示该公司第一季度电动车的交付总量同比增长69%，达到创纪录的2.5万辆，超出市场分析师此前的预期。',
14 | 	'特斯拉表示，该公司第一季度交付了1.345万辆轿车型Model S电动车，以及11,550辆SUV型Model X电动车，精确的交付量为2.5418万辆。特斯拉第一季度电动车交付量的提升，将有力的支撑公司此前制定的上半年交付5万辆电动车的目标。彭博社的统计数据显示，市场分析师此前平均预计，特斯拉第一季度电动车的交付总量为2.42万辆。',
15 | 	'投资公司Robert W. Baird & Co.分析师本·卡罗尔（Ben Kallo）就此表示，“市场目前把注意力都集中到了Model 3的量产问题，不过对特斯拉而言，第一季度电动车的交付量创出历史新高是一个非常好的消息。2.5万辆的交付量是我们能想到的最好数据，市场目前指望着Model 3推动特斯拉电动车销量的继续增长。”',
16 | 	'特斯拉此前曾表示，由于公司从去年10月底至12月初遇到制造方面的挑战，导致去年第四季度的电动车交付量未达市场预期。最终，大约2750辆电动车因为运输延迟或消费者未能实物提货，而未被计入到当季的交付量当中。特斯拉在周日的声明中还表示，大约有4650辆电动车在第一季度末转运给消费者，将被计入到第二季度的交付量当中。',
17 | 	'今年是特斯拉的关键一年，因为该公司首款量产、廉价版电动车Model 3将开始投产。受益于Model 3的市场前景和腾讯买入特斯拉5%股权的推动，特斯拉的股价在今年已累计上涨了30%。至本周五收盘时，特斯拉市值约为454亿美元，仅距年产数百万辆汽车的福特汽车相差8.7亿美元。',
18 | 	'作为美国最年轻的上市汽车制造商，特斯拉目前尚未向市场证明该公司有能力量产电动车。特斯拉首席执行官伊隆·马斯克（Elon Musk）曾表示，特斯拉明年要实现年产量50万辆的目标，不过这一雄心勃勃的计划将受制于该公司位于内华达州的超级电池工厂能否顺利投产。截至目前，特斯拉尚未给出今年全年的电动车出货量目标。',
19 | 	'特斯拉目前正在对产品线和价格结构进行调整，从而为今年7月量产Model 3进行准备。按照计划，特斯拉将从今年年底开始向美国市场销售Model 3。市场当前预计Model 3的起售价格将为3.5万美元左右。特斯拉在上月中旬决定停售售价为7.45万美元的低端版Model S轿车。4月16日之后，公司不再销售该款汽车。低端版Model S配备75kWh电池组，不过特斯拉用软件锁定电池，只提供60kWh的续航能力。特斯拉用户如果购买该汽车，可以通过升级软件解锁电池，让75kWh完全发挥作用，不过软件需要花钱购买。为什么停售该款汽车？原因是大多数客户最终会选择升级到75kWh，特斯拉想精简产品线。',
20 | 	'特斯拉目前公布的数据只是初步数据，可能会在该公司今年5月发布财报时出现略微变化。特斯拉会每个季度发布全球销售数据，而不是像传统汽车制造商那样每月发布一次汽车销售数据。交付数据只包括已转交给消费者、且所有文书工作都是正确的电动车。'
21 | ];
22 | 
23 | // [Keyword 关键词提取]
24 | console.log("\n============================= 关键词提取 =============================")
25 | var words = HanLP.Keyword( text.join("") , 3 );
26 | console.log(words)
27 | 
28 | // [Phrase 短语提取]
29 | console.log("\n============================= 短语提取 =============================")
30 | var words = HanLP.Phrase( text.join("") , 2 );
31 | console.log(words)
32 | 
33 | // [Summary 提取文章摘要]
34 | console.log("\n============================= 提取文章摘要 =============================")
35 | var words = HanLP.Summary( text.join("") , 3 );
36 | console.log(words)
37 | 
38 | 
39 | // [ Suggester 文本推荐(句子级别，从一系列句子中挑出与输入句子最相似的那一个)]
40 | console.log("\n============================= 文本推荐 =============================")
41 | var words = HanLP.Suggester( text , ["价格","业绩"] , 1 );
42 | console.log(words)


--------------------------------------------------------------------------------
/examples/tokenizer.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * 分词
 3 |  * @authors chandre (chandre21cn@gmail.com)
 4 |  * @date    2017-04-08 18:24:04
 5 |  * @version 1.0.0
 6 |  */
 7 | 
 8 | const Hanlp = require("../lib/index");
 9 | const HanLP = new Hanlp();
10 | 
11 | var text  = [
12 |     '腾讯科技讯据外电报道美国电动车制造商特斯拉周日表示该公司第一季度电动车的交付总量同比增长69%，达到创纪录的2.5万辆，超出市场分析师此前的预期。',
13 | 	'特斯拉表示，该公司第一季度交付了1.345万辆轿车型Model S电动车，以及11,550辆SUV型Model X电动车，精确的交付量为2.5418万辆。特斯拉第一季度电动车交付量的提升，将有力的支撑公司此前制定的上半年交付5万辆电动车的目标。彭博社的统计数据显示，市场分析师此前平均预计，特斯拉第一季度电动车的交付总量为2.42万辆。',
14 | 	'投资公司Robert W. Baird & Co.分析师本·卡罗尔（Ben Kallo）就此表示，“市场目前把注意力都集中到了Model 3的量产问题，不过对特斯拉而言，第一季度电动车的交付量创出历史新高是一个非常好的消息。2.5万辆的交付量是我们能想到的最好数据，市场目前指望着Model 3推动特斯拉电动车销量的继续增长。”',
15 | 	'特斯拉此前曾表示，由于公司从去年10月底至12月初遇到制造方面的挑战，导致去年第四季度的电动车交付量未达市场预期。最终，大约2750辆电动车因为运输延迟或消费者未能实物提货，而未被计入到当季的交付量当中。特斯拉在周日的声明中还表示，大约有4650辆电动车在第一季度末转运给消费者，将被计入到第二季度的交付量当中。',
16 | 	'今年是特斯拉的关键一年，因为该公司首款量产、廉价版电动车Model 3将开始投产。受益于Model 3的市场前景和腾讯买入特斯拉5%股权的推动，特斯拉的股价在今年已累计上涨了30%。至本周五收盘时，特斯拉市值约为454亿美元，仅距年产数百万辆汽车的福特汽车相差8.7亿美元。',
17 | 	'作为美国最年轻的上市汽车制造商，特斯拉目前尚未向市场证明该公司有能力量产电动车。特斯拉首席执行官伊隆·马斯克（Elon Musk）曾表示，特斯拉明年要实现年产量50万辆的目标，不过这一雄心勃勃的计划将受制于该公司位于内华达州的超级电池工厂能否顺利投产。截至目前，特斯拉尚未给出今年全年的电动车出货量目标。',
18 | 	'特斯拉目前正在对产品线和价格结构进行调整，从而为今年7月量产Model 3进行准备。按照计划，特斯拉将从今年年底开始向美国市场销售Model 3。市场当前预计Model 3的起售价格将为3.5万美元左右。特斯拉在上月中旬决定停售售价为7.45万美元的低端版Model S轿车。4月16日之后，公司不再销售该款汽车。低端版Model S配备75kWh电池组，不过特斯拉用软件锁定电池，只提供60kWh的续航能力。特斯拉用户如果购买该汽车，可以通过升级软件解锁电池，让75kWh完全发挥作用，不过软件需要花钱购买。为什么停售该款汽车？原因是大多数客户最终会选择升级到75kWh，特斯拉想精简产品线。',
19 | 	'特斯拉目前公布的数据只是初步数据，可能会在该公司今年5月发布财报时出现略微变化。特斯拉会每个季度发布全球销售数据，而不是像传统汽车制造商那样每月发布一次汽车销售数据。交付数据只包括已转交给消费者、且所有文书工作都是正确的电动车。'
20 | ];
21 | 
22 | // [Tokenizer 标准分词]
23 | console.log("\n============================= 标准分词 =============================")
24 | var words = HanLP.Tokenizer("商品和服务");
25 | console.log(words)
26 | 
27 | // [NLPTokenizer NLP分词]
28 | console.log("\n============================= NLP分词 =============================")
29 | var words = HanLP.NLPTokenizer("中国科学院计算技术研究所的宗成庆教授正在教授自然语言处理课程");
30 | console.log(words)
31 | 
32 | // [IndexTokenizer 索引分词]
33 | console.log("\n============================= 索引分词 =============================")
34 | var words = HanLP.IndexTokenizer("主副食品");
35 | console.log(words)
36 | 
37 | 
38 | // [ShortSegment 最短路分词]
39 | console.log("\n============================= 最短路分词 =============================")
40 | var words = HanLP.ShortSegment( "今天，刘志军案的关键人物,山西女商人丁书苗在市二中院出庭受审。" );
41 | console.log(words)
42 | 
43 | // [NShortSegment N-最短分词]
44 | console.log("\n============================= N-最短分词 =============================")
45 | var words = HanLP.NShortSegment( "刘喜杰石国祥会见吴亚琴先进事迹报告团成员" );
46 | console.log(words)
47 | 
48 | // [SpeedTokenizer 极速词典分词]
49 | console.log("\n============================= 极速词典分词 =============================")
50 | var words = HanLP.SpeedTokenizer( "江西鄱阳湖干枯，中国最大淡水湖变成大草原" );
51 | console.log(words)
52 | 
53 | 
54 | // [CRFTokenizer CRF分词]
55 | console.log("\n============================= CRF分词 =============================")
56 | text.forEach( ( item ) => {
57 | 	var words = HanLP.CRFTokenizer( item );
58 | 	console.log(words)
59 | })
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/router.js:
--------------------------------------------------------------------------------
  1 | const _ 		= require("underscore");
  2 | const express 	= require('express');
  3 | const Hanlp 	= require("./lib/index");
  4 | 
  5 | const router 	= express.Router();
  6 | const HanLP 	= new Hanlp();
  7 | 
  8 | 
  9 | 
 10 | // 路由中间件
 11 | router.use(function(req, res, next) {
 12 | 	if (req.method==="GET") return next();
 13 | 	let param = req.body;
 14 | 	if ( !_.has( param , "content" ) || _.isEmpty( param["content"] ) ) {
 15 | 		return res.status(500).send({
 16 | 			status : "error",
 17 | 			msg : "请求失败！"
 18 | 		})
 19 | 	};
 20 | 	next();
 21 | });
 22 | 
 23 | // 分词
 24 | router.post("/tokenizer", ( req , res ) => {
 25 | 	let param = req.body,
 26 | 		words = null,
 27 | 		type  = _.isEmpty(param.type) ? "standard" : param.type;
 28 | 
 29 | 	type = type.toLowerCase();
 30 | 	switch (type) {
 31 | 		case "crf" : //CRF分词
 32 | 			words = HanLP.CRFTokenizer( param.content );
 33 | 			break;
 34 | 		case "nostopword" : //去除停用词分词
 35 | 			words = HanLP.NoStopWord( param.content );
 36 | 			break;
 37 | 		case "nlp" : //NLP分词
 38 | 			words = HanLP.NLPTokenizer( param.content );
 39 | 			break;
 40 | 		case "index" : //索引分词
 41 | 			words = HanLP.IndexTokenizer( param.content );
 42 | 			break;
 43 | 		case "short" : //最短路分词
 44 | 			words = HanLP.ShortSegment( param.content );
 45 | 			break;
 46 | 		case "nshort" : //N-最短分词
 47 | 			words = HanLP.NShortSegment( param.content );
 48 | 			break;
 49 | 		case "speed" : //极速词典分词
 50 | 			words = HanLP.SpeedTokenizer( param.content );
 51 | 			break;
 52 | 		case "standard" : //标准分词
 53 | 		default:
 54 | 			words = HanLP.Tokenizer( param.content );
 55 | 	}
 56 | 	res.send({
 57 | 		status : "success",
 58 | 		data : words
 59 | 	});
 60 | })
 61 | 
 62 | // 关键词
 63 | router.post("/keyword", ( req , res ) => {
 64 | 	let param = req.body,
 65 | 		num = _.isEmpty(param.num) || _.isNaN( parseInt(param.num) ) ? 3 : parseInt(param.num),
 66 | 		words = HanLP.Keyword( param.content , num );
 67 | 	res.send({
 68 | 		status : "success",
 69 | 		data : words
 70 | 	});
 71 | })
 72 | 
 73 | // 摘要
 74 | router.post("/summary", ( req , res ) => {
 75 | 	let param = req.body,
 76 | 		num = _.isEmpty(param.num) || _.isNaN( parseInt(param.num) ) ? 3 : parseInt(param.num),
 77 | 		words = HanLP.Summary( param.content , num );
 78 | 	res.send({
 79 | 		status : "success",
 80 | 		data : words
 81 | 	});
 82 | })
 83 | 
 84 | // 短语提取
 85 | router.post("/phrase", ( req , res ) => {
 86 | 	let param = req.body,
 87 | 		num = _.isEmpty(param.num) || _.isNaN( parseInt(param.num) ) ? 3 : parseInt(param.num),
 88 | 		words = HanLP.Phrase( param.content , num );
 89 | 	res.send({
 90 | 		status : "success",
 91 | 		data : words
 92 | 	});
 93 | });
 94 | 
 95 | // 关键词、摘要
 96 | router.post("/query", ( req , res ) => {
 97 | 	let param = req.body;
 98 | 	let num = _.isEmpty(param.num) || _.isNaN( parseInt(param.num) ) ? 3 : parseInt(param.num) ;
 99 | 	res.send({
100 | 		status : "success",
101 | 		data : {
102 | 			keyword : HanLP.Keyword( param.content , num ),
103 | 			summary : HanLP.Summary( param.content , num ),
104 | 		}
105 | 	})
106 | })
107 | 
108 | // 简、繁、拼音转换
109 | router.post("/conversion" , (req , res ) => {
110 | 	let param = req.body,
111 | 		data = null,
112 | 		type  = _.isEmpty(param.type) ? "py" : param.type;
113 | 	type = type.toLowerCase();
114 | 	switch (type) {
115 | 		case "ft" : //繁体
116 | 			data = HanLP.ConversionFont( param.content , "ft" );
117 | 			break;
118 | 		case "jt" : //简体
119 | 			data = HanLP.ConversionFont( param.content , "jt"  );
120 | 			break;
121 | 		case "py" : //拼音转换
122 | 		default:
123 | 			data = HanLP.Pinyin( param.content , "outtone" );
124 | 	}
125 | 	res.send({
126 | 		status : "success",
127 | 		data : data
128 | 	})
129 | })
130 | 
131 | module.exports = router;


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Docker Pulls](https://img.shields.io/docker/pulls/samurais/hanlp-api.svg?maxAge=2592000)](https://hub.docker.com/r/samurais/hanlp-api/) [![Docker Stars](https://img.shields.io/docker/stars/samurais/hanlp-api.svg?maxAge=2592000)](https://hub.docker.com/r/samurais/hanlp-api/) [![Docker Layers](https://images.microbadger.com/badges/image/samurais/hanlp-api.svg)](https://microbadger.com/#/images/samurais/hanlp-api) [![](https://images.microbadger.com/badges/version/samurais/hanlp-api.svg)](https://microbadger.com/images/samurais/hanlp-api "Get your own version badge on microbadger.com")
  2 | 
  3 | HanLP 自然语言处理 for nodejs 
  4 | =====
  5 | * 支持中文分词（N-最短路分词、CRF分词、索引分词、用户自定义词典、词性标注），命名实体识别（中国人名、音译人名、日本人名、地名、实体机构名识别），关键词提取，自动摘要，短语提取，拼音转换，简繁转换，文本推荐，依存句法分析（MaxEnt依存句法分析、CRF依存句法分析）
  6 | * 官方文档：http://www.hankcs.com/nlp/hanlp.html
  7 | 
  8 | ### 环境要求
  9 | 	java 1.8
 10 | 	nodejs >= 6
 11 | 
 12 | ### docker
 13 | 
 14 | * build image 
 15 | ```
 16 | cd node-hanlp
 17 | ./scripts/build-docker-image.sh
 18 | ```
 19 | 
 20 | Or pull image
 21 | ```
 22 | docker pull samurais/hanlp-api:1.0.0
 23 | ```
 24 | 
 25 | * start container
 26 | ```
 27 | docker run -it --rm -p 3002:3000 samurais/hanlp-api:1.0.0
 28 | ```
 29 | 
 30 | * access service
 31 | 
 32 | ```
 33 | POST /tokenizer HTTP/1.1
 34 | Host: localhost:3002
 35 | Content-Type: application/json
 36 | 
 37 | {
 38 | 	"type": "nlp",
 39 | 	"content": "刘德华和张学友创作了很多流行歌曲"
 40 | }
 41 | 
 42 | RESPONSE
 43 | {
 44 |   "status": "success",
 45 |   "data": [
 46 |     {
 47 |       "word": "刘德华",
 48 |       "nature": "nr",
 49 |       "offset": 0
 50 |     },
 51 |     {
 52 |       "word": "和",
 53 |       "nature": "cc",
 54 |       "offset": 0
 55 |     },
 56 |     {
 57 |       "word": "张学友",
 58 |       "nature": "nr",
 59 |       "offset": 0
 60 |     },
 61 |     {
 62 |       "word": "创作",
 63 |       "nature": "v",
 64 |       "offset": 0
 65 |     },
 66 |     {
 67 |       "word": "了",
 68 |       "nature": "ule",
 69 |       "offset": 0
 70 |     },
 71 |     {
 72 |       "word": "很多",
 73 |       "nature": "m",
 74 |       "offset": 0
 75 |     },
 76 |     {
 77 |       "word": "流行歌曲",
 78 |       "nature": "n",
 79 |       "offset": 0
 80 |     }
 81 |   ]
 82 | }
 83 | ```
 84 | 
 85 | * Other APIs
 86 | 
 87 |     - tokenizer 分词
 88 |     - keyword 关键词
 89 |     - summary 摘要
 90 | 	- phrase 短语提取
 91 | 	- query 关键词、摘要
 92 | 	- conversion 简、繁、拼音转换
 93 | 
 94 | [源码](/router.js)
 95 | 
 96 | ### node module
 97 | 
 98 | * Install
 99 | 
100 | ```
101 | npm install node-hanlp
102 | ```
103 | 
104 | * Config
105 |     - 配置文件路径 node_modules/node-hanlp/lib/src-java/hanLP.proerties
106 | 	- **请修改root为您的目录路径**
107 | 
108 | 	- 词典文件目录 ./data
109 | 	- 请下载词典 https://pan.baidu.com/s/1pKUVNYF 放入 ./data (约800MB文件) 目录下
110 | 
111 | * Usage
112 | 
113 | ```js
114 | const Hanlp = require("node-hanlp");
115 | //分词库初始化及配置
116 | const HanLP = new Hanlp({
117 | 	CustomDict : true, //使用自定义词典
118 | 	NameRecognize : true, //中国人名识别
119 | 	TranslatedNameRecognize : true , //音译人名识别
120 | 	JapaneseNameRecognize : true, //日本人名识别
121 | 	PlaceRecognize : true , //地名识别
122 | 	OrgRecognize : true //机构名识别
123 | });
124 | let words = HanLP.Tokenizer("商品和服务");
125 | ```
126 | 
127 | ### 标准分词 HanLP.Tokenizer( text )
128 | 	@param String text [文本]
129 | 	@ruten Object
130 | ```js
131 | let words = HanLP.Tokenizer("商品和服务");
132 | 
133 | [
134 |   { word: '商品', nature: 'n', offset: 0 },
135 |   { word: '和', nature: 'cc', offset: 0 },
136 |   { word: '服务', nature: 'vn', offset: 0 }
137 | ]
138 | ```
139 | ### NLP分词 HanLP.NLPTokenizer( text )
140 | 	@param String text [文本]
141 | 	@ruten Object
142 | ```js
143 | let words = HanLP.NLPTokenizer("中国科学院计算技术研究所的宗成庆教授正在教授自然语言处理课程");
144 | 
145 | [
146 |   { word: '中国科学院计算技术研究所', nature: 'nt', offset: 0 },
147 |   { word: '的', nature: 'ude1', offset: 0 },
148 |   { word: '宗成庆', nature: 'nr', offset: 0 },
149 |   { word: '教授', nature: 'nnt', offset: 0 },
150 |   ...
151 | ]
152 | ```
153 | ### 索引分词 HanLP.IndexTokenizer( text )
154 | 	@param String text [文本]
155 | 	@ruten Object
156 | ```js
157 | let words = HanLP.IndexTokenizer("主副食品");
158 | 
159 | [
160 |   { word: '主副食品', nature: 'n', offset: 0 },
161 |   { word: '主副食', nature: 'j', offset: 0 },
162 |   { word: '副食', nature: 'n', offset: 1 },
163 |   { word: '副食品', nature: 'n', offset: 1 },
164 |   { word: '食品', nature: 'n', offset: 2 }
165 | ]
166 | ```
167 | ### CRF分词 HanLP.CRFTokenizer( text )
168 | 	@param String text [文本]
169 | 	@ruten Object
170 | ```js
171 | let words = HanLP.CRFTokenizer("你好，欢迎使用HanLP汉语处理包！");
172 | 
173 | [
174 |   { word: '你好', nature: 'vl', offset: 0 },
175 |   { word: '，', nature: 'w', offset: 0 },
176 |   { word: '欢迎', nature: 'v', offset: 0 },
177 |   { word: '使用', nature: 'v', offset: 0 },
178 |   { word: 'HanLP', nature: 'nz', offset: 0 },
179 |   { word: '汉语', nature: 'gi', offset: 0 },
180 |   ...
181 | ]
182 | ```
183 | ### 去除停用词分词 HanLP.NoStopWord( text )
184 | 	@param String text [文本]
185 | 	@ruten Object
186 | ```js
187 | let words = HanLP.NoStopWord("你好，欢迎使用HanLP汉语处理包！");
188 | 
189 | [
190 |   { word: '你好', nature: 'vl', offset: 0 },
191 |   { word: '欢迎', nature: 'v', offset: 0 },
192 |   { word: '使用', nature: 'v', offset: 0 },
193 |   { word: 'HanLP', nature: 'nz', offset: 0 },
194 |   { word: '汉语', nature: 'gi', offset: 0 },
195 |   ...
196 | ]
197 | ```
198 | 
199 | ### 最短路分词 HanLP.ShortSegment( text )
200 | 	@param String text [文本]
201 | 	@ruten Object
202 | ```js
203 | let words = HanLP.ShortSegment("今天，刘志军案的关键人物,山西女商人丁书苗在市二中院出庭受审。");
204 | 
205 | [
206 |   { word: '今天', nature: 't', offset: 0 },
207 |   { word: '，', nature: 'w', offset: 0 },
208 |   { word: '刘志军', nature: 'nr', offset: 0 },
209 |   { word: '案', nature: 'ng', offset: 0 },
210 |   { word: '的', nature: 'ude1', offset: 0 },
211 |   { word: '关键', nature: 'n', offset: 0 },
212 |   ...
213 | ]
214 | ```
215 | ### N-最短分词 HanLP.NShortSegment( text )
216 | 	@param String text [文本]
217 | 	@ruten Object
218 | ```js
219 | let words = HanLP.NShortSegment("刘喜杰石国祥会见吴亚琴先进事迹报告团成员");
220 | 
221 | [
222 |   { word: '刘喜杰', nature: 'nr', offset: 0 },
223 |   { word: '石国祥', nature: 'nr', offset: 0 },
224 |   { word: '会见', nature: 'v', offset: 0 },
225 |   { word: '吴亚琴', nature: 'nr', offset: 0 },
226 |   { word: '先进', nature: 'a', offset: 0 },
227 |   ...
228 | ]
229 | ```
230 | ### 极速词典分词 HanLP.SpeedTokenizer( text )
231 | 	@param String text [文本]
232 | 	@ruten Object
233 | ```js
234 | let words = HanLP.SpeedTokenizer("江西鄱阳湖干枯，中国最大淡水湖变成大草原");
235 | 
236 | [
237 |   { word: '江西', offset: 0 },
238 |   { word: '鄱阳湖', offset: 2 },
239 |   { word: '干枯', offset: 5 },
240 |   { word: '，', offset: 7 },
241 |   { word: '中国', offset: 8 },
242 | ]
243 | ```
244 | ### 关键词提取 HanLP.Keyword( text , nTop )
245 | 	@param String text [文本]
246 | 	@param Number nTop [关键词个数，默认5个]
247 | 	@ruten Object
248 | ```js
249 | let words = HanLP.Keyword("江西鄱阳湖干枯，中国最大淡水湖变成大草原" , 3);
250 | 
251 | [ '中国', '最大', '淡水湖' ]
252 | ```
253 | 
254 | ### 短语提取 HanLP.Phrase( text , nTop )
255 | 	@param String text [文本]
256 | 	@param Number nTop [短语个数，默认3个]
257 | 	@ruten Object
258 | ```js
259 | let words = HanLP.Phrase("江西鄱阳湖干枯，中国最大淡水湖变成大草原" , 2 );
260 | 
261 | [ '中国最大', '变成草原' ]
262 | ```
263 | ### 提取文章摘要 HanLP.Summary( text , nTop )
264 | 	@param String text [文本]
265 | 	@param Number nTop [文章摘要条数，默认3条]
266 | 	@ruten Object
267 | ```js
268 | let text = "据美国福克斯新闻报道，俄罗斯黑海舰队一艘护卫舰格里戈罗维奇海军上将号，正在驶向美国军舰发射导弹攻击叙利亚的区域。该护卫舰是俄罗斯最先进的护卫舰，2016年才刚服役，除防空、反舰导弹外，也可以发射巡航导弹。格里戈罗维奇海军上将号原定于本周访问叙利亚的塔尔图斯港。"
269 | 
270 | let words = HanLP.Summary( text , 3);
271 | 
272 | [
273 |   '俄罗斯黑海舰队一艘护卫舰格里戈罗维奇海军上将号',
274 |   '格里戈罗维奇海军上将号原定于本周访问叙利亚的塔尔图斯港',
275 |   '正在驶向美国军舰发射导弹攻击叙利亚的区域'
276 | ]
277 | ```
278 | ### 文本推荐 HanLP.Suggester( list, words,  Ntop )
279 | 	@param Array list 	句子列表
280 | 	@param Array words 词语
281 | 	@param Number nTop 相似句子推荐个数，默认1个
282 | 	@ruten Object
283 | 
284 | 	句子级别，从一系列句子中挑出与输入句子最相似的那一个
285 | 
286 | ### 语义距离 HanLP.WordDistance( words )
287 | 	@param Array words 	词
288 | 	@ruten Object
289 | 
290 | ### 简繁转换 HanLP.ConversionFont( text , type )
291 | 	@param String text 文本
292 | 	@ruten String type 类型 jt简体|ft繁体，默认jt
293 | 	@ruten String
294 | 
295 | ### 拼音转换 HanLP.Pinyin( text , type )
296 | 	@param String text 文本
297 | 	@ruten String type 类型 类型 num数字音调|tone符号音调|outtone无音调|shengmu声母|yunmu韵母|head输入法头，默认outtone 
298 | 	@ruten Object
299 | 
300 | 


--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * HanLP node版汉语言处理包
  3 |  * @authors chandre (chandre21cn@gmail.com)
  4 |  * @date    2017-04-08 18:24:04
  5 |  * @version 1.0.0
  6 |  */
  7 | 
  8 | const java 		= require("java");
  9 | const _ 		= require("underscore");
 10 | const path 		= require("path");
 11 | 
 12 | const BaseDir 		= path.resolve( __dirname , "./hanlp-1.3.2" );
 13 | java.options 		= ['-Xms1024m','-Xmx1024m'];
 14 | java.classpath[1] 	= BaseDir + "/src-java";
 15 | java.asyncOptions 	= {
 16 | 	asyncSuffix: undefined,
 17 |   	syncSuffix: "Sync",  
 18 | 	ifReadOnlySuffix: "_alt",
 19 | };
 20 | 
 21 | java.classpath.push( BaseDir + "/hanlp-1.3.2.jar" );
 22 | java.classpath.push( BaseDir + "/gson-2.2.2.jar" );
 23 | 
 24 | const HanLP 		= java.import('com.hankcs.hanlp.HanLP');
 25 | const Gson 			= java.newInstanceSync('com.google.gson.Gson');
 26 | 
 27 | module.exports = class HANLP {
 28 | 
 29 | 	constructor( args ) {
 30 | 
 31 | 		let opts = this.options = _.extend({
 32 | 			CustomDict : true, //使用自定义词典
 33 | 			NameRecognize : true, //中国人名识别
 34 | 			TranslatedNameRecognize : true , //音译人名识别
 35 | 			JapaneseNameRecognize : true, //日本人名识别
 36 | 			PlaceRecognize : true , //地名识别
 37 | 			OrgRecognize : true //机构名识别
 38 | 		}, args );
 39 | 
 40 | 		let Tokenizer = java.import('com.hankcs.hanlp.tokenizer.StandardTokenizer');
 41 | 		
 42 | 		// 识别设置
 43 | 		Tokenizer.SEGMENT.enableCustomDictionarySync( opts.CustomDict ) //使用自定义词典
 44 | 			.enableNameRecognizeSync(opts.NameRecognize) //中国人名识别
 45 | 			.enableTranslatedNameRecognizeSync(opts.TranslatedNameRecognize) //音译人名识别
 46 | 			.enableJapaneseNameRecognizeSync(opts.JapaneseNameRecognize) //日本人名识别
 47 | 			.enablePlaceRecognizeSync( opts.PlaceRecognize ) //地名识别
 48 | 			.enableOrganizationRecognizeSync( opts.OrgRecognize ); //机构名识别
 49 | 	}
 50 | 
 51 | 	/**
 52 | 	 * [Tokenizer 标准分词]
 53 | 	 * @param {[String]} text [文本]
 54 | 	 * @return Object
 55 | 	 */
 56 | 	Tokenizer ( text ) {
 57 | 		let StandardTokenizer = java.import('com.hankcs.hanlp.tokenizer.StandardTokenizer');
 58 | 		let words = StandardTokenizer.segmentSync( text );
 59 | 		return  JSON.parse( Gson.toJsonSync(words) );
 60 | 	}
 61 | 
 62 | 	/**
 63 | 	 * [NLPTokenizer NLP分词]
 64 | 	 * @param {[String]} text [文本]
 65 | 	 * @return Object
 66 | 	 */
 67 | 	NLPTokenizer ( text ) {
 68 | 		let NLPTokenizer = java.import('com.hankcs.hanlp.tokenizer.NLPTokenizer');
 69 | 		let words = NLPTokenizer.segmentSync( text );
 70 | 		return  JSON.parse( Gson.toJsonSync(words) );
 71 | 	}
 72 | 
 73 | 	/**
 74 | 	 * [IndexTokenizer 索引分词]
 75 | 	 * @param {[String]} text [文本]
 76 | 	 * @return Object
 77 | 	 */
 78 | 	IndexTokenizer ( text ) {
 79 | 		let IndexTokenizer = java.import('com.hankcs.hanlp.tokenizer.IndexTokenizer');
 80 | 		let words = IndexTokenizer.segmentSync( text );
 81 | 		return  JSON.parse( Gson.toJsonSync(words) );
 82 | 	}
 83 | 
 84 | 	
 85 | 	/**
 86 | 	 * [CRFTokenizer CRF分词]
 87 | 	 * @param {[String]} text [文本]
 88 | 	 * @return Object
 89 | 	 */
 90 | 	CRFTokenizer ( text ) {
 91 | 		let CRFSegment = java.newInstanceSync('com.hankcs.hanlp.seg.CRF.CRFSegment');
 92 | 		let words = CRFSegment.segSync( text );
 93 | 		return  JSON.parse( Gson.toJsonSync(words) );
 94 | 	}
 95 | 
 96 | 	/**
 97 | 	 * [ShortSegment 最短路分词]
 98 | 	 * @param {[String]} text [文本]
 99 | 	 * @return Object
100 | 	 */
101 | 	ShortSegment ( text ) {
102 | 		let opts = this.options,
103 | 			ShortSegment = java.newInstanceSync('com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment');
104 | 
105 | 		let words = ShortSegment.segSync( text );
106 | 		return  JSON.parse( Gson.toJsonSync(words) );
107 | 	}
108 | 
109 | 	/**
110 | 	 * [NShortSegment N-最短分词]
111 | 	 * @param {[String]} text [文本]
112 | 	 * @return Object
113 | 	 */
114 | 	NShortSegment ( text ) {
115 | 		let NShortSegment = java.newInstanceSync('com.hankcs.hanlp.seg.NShort.NShortSegment');
116 | 		let words = NShortSegment.segSync( text );
117 | 		return  JSON.parse( Gson.toJsonSync(words) );
118 | 	}
119 | 
120 | 	/**
121 | 	 * [SpeedTokenizer 极速词典分词]
122 | 	 * @param {[String]} text [文本]
123 | 	 * @return Object
124 | 	 */
125 | 	SpeedTokenizer ( text ) {
126 | 		let SpeedTokenizer = java.import('com.hankcs.hanlp.tokenizer.SpeedTokenizer');
127 | 		let words = SpeedTokenizer.segmentSync( text );
128 | 		return  JSON.parse( Gson.toJsonSync(words) );
129 | 	}
130 | 
131 | 	/**
132 | 	 * [NoStopWord 去除停用词分词]
133 | 	 * @param {[String]} text [文本]
134 | 	 * @return Object
135 | 	 */
136 | 	NoStopWord ( text ) {
137 | 		let opts = this.options,
138 | 			Tokenizer = java.import('com.hankcs.hanlp.tokenizer.StandardTokenizer'),
139 | 			StopWordDict = java.import('com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary');
140 | 		let words = Tokenizer.segmentSync( text );
141 | 		StopWordDict.applySync( words );
142 | 		return  JSON.parse( Gson.toJsonSync( words ) );
143 | 	}
144 | 
145 | 	/**
146 | 	 * [Keyword 关键词提取]
147 | 	 * @param {[String]} text [文本]
148 | 	 * @param {[Number]} Ntop [关键词个数，默认5个]
149 | 	 * @return Object
150 | 	 */
151 | 	Keyword ( text , Ntop ) {
152 | 		let words = HanLP.extractKeywordSync( text , Ntop || 5 );
153 | 		return  JSON.parse( Gson.toJsonSync(words) );
154 | 	}
155 | 
156 | 	/**
157 | 	 * [Phrase 短语提取]
158 | 	 * @param {[String]} text [文本]
159 | 	 * @param {[Number]} Ntop [短语个数，默认3个]
160 | 	 * @return Object
161 | 	 */
162 | 	Phrase ( text , Ntop ) {
163 | 		let words = HanLP.extractPhraseSync( text , Ntop || 3 );
164 | 		return  JSON.parse( Gson.toJsonSync(words) );
165 | 	}
166 | 
167 | 	/**
168 | 	 * [Summary 提取文章摘要]
169 | 	 * @param {[String]} text [文本]
170 | 	 * @param {[Number]} Ntop [文章摘要个数，默认3个]
171 | 	 * @return Object
172 | 	 */
173 | 	Summary ( text , Ntop ) {
174 | 		let words = HanLP.extractSummarySync( text , Ntop || 3 );
175 | 		return  JSON.parse( Gson.toJsonSync(words) );
176 | 	}
177 | 
178 | 	/**
179 | 	 * [ Suggester 文本推荐(句子级别，从一系列句子中挑出与输入句子最相似的那一个)]
180 | 	 * @param {[Array]} list  [句子列表]
181 | 	 * @param {[Array]} words [词语]
182 | 	 * @param {[Number]} Ntop [相似句子个数，默认1个]
183 | 	 * @return Object
184 | 	 */
185 | 	Suggester ( list, words,  Ntop ) {
186 | 		let Suggester = java.newInstanceSync('com.hankcs.hanlp.suggest.Suggester');
187 | 		list = _.isArray( list ) ? list : new Array( list );
188 | 		words = _.isArray( words ) ? words : new Array( words );
189 | 
190 | 		list.forEach( ( item , i ) => {
191 | 			Suggester.addSentenceSync( item );
192 | 		});
193 | 
194 | 		let data = [];
195 | 		words.forEach( (item , i)=> {
196 | 			data[i] = {
197 | 				word : item,
198 | 				value : JSON.parse( Gson.toJsonSync( Suggester.suggestSync( item , Ntop || 1 ) ) ),
199 | 			};
200 | 		});
201 | 		return data;
202 | 	}
203 | 
204 | 	/**
205 | 	 * [WordDistance 语义距离]
206 | 	 * @param {[Array]} text [词]
207 | 	 * @return Object
208 | 	 */
209 | 	WordDistance ( words ) {
210 | 		words = _.isArray( words ) ? words : new Array( words );
211 | 		let data  = [],
212 | 			SynonymDictionary = java.newInstanceSync('com.hankcs.hanlp.dictionary.CoreSynonymDictionary');
213 | 
214 | 		words.forEach( ( wordA , i ) => {
215 | 			let tmp = [];
216 | 			words.forEach( ( wordB , index ) => {
217 | 				tmp[index] = JSON.parse( Gson.toJsonSync( SynonymDictionary.distanceSync( wordA , wordB ) ) )
218 | 			});
219 | 			data[i]= {
220 | 				word : wordA,
221 | 				value : tmp
222 | 			};
223 | 		});
224 | 
225 | 		return data
226 | 	}
227 | 
228 | 	/**
229 | 	 * [ConversionFont 简繁转换]
230 | 	 * @param {[type]} text [文本]
231 | 	 * @param {[type]} type [类型 jt简体|ft繁体  默认jt ]
232 | 	 * @return Object
233 | 	 */
234 | 	ConversionFont ( text , type ) {
235 | 		type = type || "jt"
236 | 		if ( type.toLowerCase() === "ft" ) {
237 | 			return HanLP.convertToTraditionalChineseSync( text );
238 | 		} else {
239 | 			return HanLP.convertToSimplifiedChineseSync( text );
240 | 		}
241 | 	}
242 | 
243 | 	/**
244 | 	 * [Pinyin 拼音转换]
245 | 	 * @param {[type]} text [文本]
246 | 	 * @param {[type]} type [类型 num数字音调|tone符号音调|outtone无音调|shengmu声母|yunmu韵母|head输入法头  默认outtone ]
247 | 	 * @return Object
248 | 	 */
249 | 	Pinyin ( text , type ) {
250 | 		type = type || "jt"
251 | 		let PinYin = HanLP.convertToPinyinListSync( text ).toArraySync();
252 | 		type = type.toLowerCase();
253 | 
254 | 		return PinYin.map( (item , i ) => {
255 | 			switch (type) {
256 | 				case "num":
257 | 					return item.toStringSync();
258 | 					break;
259 | 				case "shengmu":
260 | 					return item.getShengmuSync().toStringSync();
261 | 					break;
262 | 				case "yunmu":
263 | 					return item.getYunmuSync().toStringSync();
264 | 					break;
265 | 				case "head":
266 | 					return item.getHeadSync().toStringSync();
267 | 					break;
268 | 				case "tone":
269 | 					return item.getPinyinWithToneMarkSync();
270 | 				case "outtone":
271 | 				default : 
272 | 					return item.getPinyinWithoutToneSync();
273 | 			}
274 | 		})
275 | 	}
276 | 
277 | }


--------------------------------------------------------------------------------