├── .editorconfig ├── .gitignore ├── .npmignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin └── install.js ├── index.js ├── package-lock.json ├── package.json ├── src ├── classes │ ├── abstract.ts │ ├── collections.ts │ ├── index.ts │ └── tokens.ts ├── index.ts └── processor │ ├── index.ts │ ├── processor.ts │ └── tokens.ts ├── test ├── processor.spec.js └── tokens.spec.js ├── tsconfig.json └── wallaby.js /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [{*.json,.*rc,*.yml}] 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [*.md] 16 | trim_trailing_whitespace = false 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Dependency directory 11 | node_modules 12 | jar 13 | 14 | # Transpiled result 15 | lib -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | **/* 2 | !lib/**/* 3 | !bin/**/* 4 | !index.js 5 | !index.d.ts 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: false 3 | jdk: 4 | - oraclejdk8 5 | env: 6 | - NODE_VERSION=7 CC=clang CXX=clang++ 7 | - NODE_VERSION=6 CC=clang CXX=clang++ 8 | - NODE_VERSION=4 CC=clang CXX=clang++ 9 | before_install: 10 | - nvm install $NODE_VERSION 11 | before_script: 12 | - echo $JAVA_OPTS 13 | - export JAVA_OPTS=-Xmx512m 14 | - npm install 15 | script: 16 | - npm test 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # open-korean-text-node 2 | 3 | [![npm version](https://badge.fury.io/js/open-korean-text-node.svg)](https://badge.fury.io/js/open-korean-text-node) 4 | [![Build Status](https://travis-ci.org/open-korean-text/open-korean-text-wrapper-node-2.svg)](https://travis-ci.org/open-korean-text/open-korean-text-wrapper-node-2) 5 | 6 | A nodejs binding for [open-korean-text](https://github.com/open-korean-text/open-korean-text) via [node-java](https://github.com/joeferner/node-java) interface. 7 | 8 | ## Dependency 9 | 10 | Currently wraps [open-korean-text 2.2.0](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.2.0) 11 | 12 | 현재 이 프로젝트는 [open-korean-text 2.2.0](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.2.0)을 사용중입니다. 13 | 14 | 15 | ## Requirement 16 | 17 | Since it uses java code compiled with Java 8, make sure you have both Java 8 JDK and JRE installed. 18 | For more details about installing java interface, see installation notes on below links. 19 | 20 | 이 프로젝트는 Java 8로 컴파일된 코드를 사용하기 때문에, Java 8 JDK/JRE가 설치되어 있어야 합니다. 21 | Java interface의 설치에 관련된 더 자세한 사항은 아래 링크에서 확인하세요. 22 | 23 | - [node-gyp#installation](https://github.com/nodejs/node-gyp#installation) 24 | - [node-java#installation](https://github.com/joeferner/node-java#installation) 25 | 26 | ## Installation 27 | 28 | ```bash 29 | npm install --save open-korean-text-node 30 | ``` 31 | 32 | ### Usage 33 | 34 | ```typescript 35 | import OpenKoreanText from 'open-korean-text-node'; 36 | // or 37 | const OpenKoreanText = require('open-korean-text-node').default; 38 | ``` 39 | 40 | - See [API](#api) section to get more informations. 41 | 42 | 43 | ## Examples 44 | 45 | - [test/processor.spec.js](./test/processor.spec.js) 46 | - [test/tokens.spec.js](./test/tokens.spec.js) 47 | 48 | ## API 49 | 50 | ### OpenKoreanText 51 | 52 | #### Tokenizing 53 | 54 | ```typescript 55 | OpenKoreanText.tokenize(text: string): Promise; 56 | OpenKoreanText.tokenizeSync(text: string): IntermediaryTokens; 57 | ``` 58 | 59 | - `text` a target string to tokenize 60 | 61 | #### Detokenizing 62 | 63 | ```typescript 64 | OpenKoreanText.detokenize(tokens: IntermediaryTokensObject): Promise; 65 | OpenKoreanText.detokenize(words: string[]): Promise; 66 | OpenKoreanText.detokenize(...words: string[]): Promise; 67 | OpenKoreanText.detokenizeSync(tokens: IntermediaryTokensObject): string; 68 | OpenKoreanText.detokenizeSync(words: string[]): string; 69 | OpenKoreanText.detokenizeSync(...words: string[]): string; 70 | ``` 71 | 72 | - `tokens` an intermediary token object from `tokenize` 73 | - `words` an array of words to detokenize 74 | 75 | #### Phrase Extracting 76 | 77 | ```typescript 78 | OpenKoreanText.extractPhrases(tokens: IntermediaryTokens, options?: ExcludePhrasesOptions): Promise; 79 | OpenKoreanText.extractPhrasesSync(tokens: IntermediaryTokens, options?: ExcludePhrasesOptions): KoreanToken; 80 | ``` 81 | 82 | - `tokens` an intermediary token object from `tokenize` or `stem` 83 | - `options` an object to pass options to extract phrases where 84 | - `filterSpam` - a flag to filter spam tokens. defaults to `true` 85 | - `includeHashtag` - a flag to include hashtag tokens. defaults to `false` 86 | 87 | #### Normalizing 88 | 89 | ```typescript 90 | OpenKoreanText.normalize(text: string): Promise; 91 | OpenKoreanText.normalizeSync(text: string): string; 92 | ``` 93 | 94 | - `text` a target string to normalize 95 | 96 | #### Sentence Splitting 97 | 98 | ```typescript 99 | OpenKoreanText.splitSentences(text: string): Promise; 100 | OpenKoreanText.splitSentencesSync(text: string): Sentence[]; 101 | ``` 102 | 103 | - `text` a target string to normalize 104 | * returns array of `Sentence` which includes: 105 | * `text`: string - the sentence's text 106 | * `start`: number - the sentence's start position from original string 107 | * `end`: number - the sentence's end position from original string 108 | 109 | #### Custom Dictionary 110 | 111 | ```typescript 112 | OpenKoreanText.addNounsToDictionary(...words: string[]): Promise; 113 | OpenKoreanText.addNounsToDictionarySync(...words: string[]): void; 114 | ``` 115 | 116 | - `words` words to add to dictionary 117 | 118 | #### toJSON 119 | 120 | ```typescript 121 | OpenKoreanText.tokensToJsonArray(tokens: IntermediaryTokensObject, keepSpace?: boolean): Promise; 122 | OpenKoreanText.tokensToJsonArraySync(tokens: IntermediaryTokensObject, keepSpace?: boolean): KoreanToken[]; 123 | ``` 124 | 125 | - `tokens` an intermediary token object from `tokenize` or `stem` 126 | - `keepSpace` a flag to omit 'Space' token or not, defaults to `false` 127 | 128 | ### **IntermediaryToken** object 129 | 130 | An intermediate token object required for internal processing. 131 | Provides a convenience wrapper functionS to process text without using processor object 132 | 133 | ```typescript 134 | tokens.extractPhrases(options?: ExcludePhrasesOptions): Promise; 135 | tokens.extractPhrasesSync(options?: ExcludePhrasesOptions): KoreanToken; 136 | tokens.detokenize(): Promise; 137 | tokens.detokenizeSync(): string; 138 | tokens.toJSON(): KoreanToken[]; 139 | ``` 140 | 141 | - NOTE: `tokens.toJSON()` method is equivalent with `OpenKoreanText.tokensToJsonArraySync(tokens, false)` 142 | 143 | ### **KoreanToken** object 144 | 145 | A JSON output object which contains: 146 | 147 | - `text`: string - token's text 148 | - `stem`: string - token's stem 149 | - `pos`: stirng - type of token. possible entries are: 150 | - Word level POS: 151 | `Noun`, `Verb`, `Adjective`, 152 | `Adverb`, `Determiner`, `Exclamation`, 153 | `Josa`, `Eomi`, `PreEomi`, `Conjunction`, 154 | `NounPrefix`, `VerbPrefix`, `Suffix`, `Unknown` 155 | - Chunk level POS: 156 | `Korean`, `Foreign`, `Number`, `KoreanParticle`, `Alpha`, 157 | `Punctuation`, `Hashtag`, `ScreenName`, 158 | `Email`, `URL`, `CashTag` 159 | - Functional POS: 160 | `Space`, `Others` 161 | - `offset`: number - position from original string 162 | - `length`: number - length of text 163 | - `isUnknown`: boolean 164 | -------------------------------------------------------------------------------- /bin/install.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by rokoroku on 2016-08-23. 3 | */ 4 | 5 | 'use strict'; 6 | 7 | const fs = require('fs'); 8 | const url = require("url"); 9 | const path = require("path"); 10 | const wget = require('node-wget'); 11 | const dependencies = require('../package.json').mavenDependencies; 12 | 13 | function clearPath(path) { 14 | if (fs.existsSync(path)) { 15 | fs.readdirSync(path).forEach(function (file, index) { 16 | const curPath = path + "/" + file; 17 | if (fs.lstatSync(curPath).isDirectory()) { 18 | clearPath(curPath); 19 | fs.rmdirSync(curPath); 20 | } else { 21 | fs.unlinkSync(curPath); 22 | } 23 | }); 24 | } else { 25 | fs.mkdirSync(path); 26 | } 27 | } 28 | 29 | function getDependencies(dependencies) { 30 | for (const key in dependencies) { 31 | const repository = dependencies[key]; 32 | const filename = path.basename(url.parse(repository).pathname); 33 | wget({ url: repository, dest: 'jar/' + filename }); 34 | } 35 | } 36 | 37 | clearPath('./jar'); 38 | getDependencies(dependencies); -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | const java = require('java'); 4 | 5 | // setup java interface 6 | java.asyncOptions = { 7 | syncSuffix: '', 8 | asyncSuffix: 'Async', 9 | promiseSuffix: 'Promise', 10 | promisify: require('es6-promisify').promisify 11 | }; 12 | 13 | // setup dependencies 14 | const baseDir = path.join(__dirname, 'jar'); 15 | const dependencies = fs.readdirSync(baseDir); 16 | dependencies.forEach((dependency) => { 17 | java.classpath.push(baseDir + "/" + dependency); 18 | }); 19 | 20 | // export 21 | module.exports = require('./lib'); 22 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "open-korean-text-node", 3 | "version": "2.2.0", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "@types/java": { 8 | "version": "0.7.32", 9 | "resolved": "https://registry.npmjs.org/@types/java/-/java-0.7.32.tgz", 10 | "integrity": "sha512-XXujvvigeT4Uwz7Sk+zbZjN/D+MDY2sGWJQWZj/sg4ITGkCPC6mqncZNlkcc36BZoALqWQ15dpzSc0nj+EDFwA==", 11 | "dev": true, 12 | "requires": { 13 | "@types/node": "9.4.7" 14 | } 15 | }, 16 | "@types/node": { 17 | "version": "9.4.7", 18 | "resolved": "http://registry.npmjs.org/@types/node/-/node-9.4.7.tgz", 19 | "integrity": "sha512-4Ba90mWNx8ddbafuyGGwjkZMigi+AWfYLSDCpovwsE63ia8w93r3oJ8PIAQc3y8U+XHcnMOHPIzNe3o438Ywcw==", 20 | "dev": true 21 | }, 22 | "asn1": { 23 | "version": "0.1.11", 24 | "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.1.11.tgz", 25 | "integrity": "sha1-VZvhg3bQik7E2+gId9J4GGObLfc=" 26 | }, 27 | "assert-plus": { 28 | "version": "0.1.5", 29 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.1.5.tgz", 30 | "integrity": "sha1-7nQAlBMALYTOxyGcasgRgS5yMWA=" 31 | }, 32 | "assertion-error": { 33 | "version": "1.1.0", 34 | "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", 35 | "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==", 36 | "dev": true 37 | }, 38 | "async": { 39 | "version": "2.5.0", 40 | "resolved": "https://registry.npmjs.org/async/-/async-2.5.0.tgz", 41 | "integrity": "sha512-e+lJAJeNWuPCNyxZKOBdaJGyLGHugXVQtrAwtuAe2vhxTYxFTKE73p8JuTmdH0qdQZtDvI4dhJwjZc5zsfIsYw==", 42 | "requires": { 43 | "lodash": "4.17.4" 44 | } 45 | }, 46 | "aws-sign": { 47 | "version": "0.3.0", 48 | "resolved": "https://registry.npmjs.org/aws-sign/-/aws-sign-0.3.0.tgz", 49 | "integrity": "sha1-PYHKabR0seFlGHKLUcJP8Lvtxuk=" 50 | }, 51 | "balanced-match": { 52 | "version": "1.0.0", 53 | "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", 54 | "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" 55 | }, 56 | "boom": { 57 | "version": "0.4.2", 58 | "resolved": "https://registry.npmjs.org/boom/-/boom-0.4.2.tgz", 59 | "integrity": "sha1-emNune1O/O+xnO9JR6PGffrukRs=", 60 | "requires": { 61 | "hoek": "0.9.1" 62 | } 63 | }, 64 | "brace-expansion": { 65 | "version": "1.1.11", 66 | "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", 67 | "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", 68 | "requires": { 69 | "balanced-match": "1.0.0", 70 | "concat-map": "0.0.1" 71 | } 72 | }, 73 | "browser-stdout": { 74 | "version": "1.3.1", 75 | "resolved": "https://registry.npmjs.org/browser-stdout/-/browser-stdout-1.3.1.tgz", 76 | "integrity": "sha512-qhAVI1+Av2X7qelOfAIYwXONood6XlZE/fXaBSmW/T5SzLAmCgzi+eiWE7fUvbHaeNBQH13UftjpXxsfLkMpgw==", 77 | "dev": true 78 | }, 79 | "chai": { 80 | "version": "4.1.2", 81 | "resolved": "https://registry.npmjs.org/chai/-/chai-4.1.2.tgz", 82 | "integrity": "sha1-D2RYS6ZC8PKs4oBiefTwbKI61zw=", 83 | "dev": true, 84 | "requires": { 85 | "assertion-error": "1.1.0", 86 | "check-error": "1.0.2", 87 | "deep-eql": "3.0.1", 88 | "get-func-name": "2.0.0", 89 | "pathval": "1.1.0", 90 | "type-detect": "4.0.8" 91 | } 92 | }, 93 | "check-error": { 94 | "version": "1.0.2", 95 | "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz", 96 | "integrity": "sha1-V00xLt2Iu13YkS6Sht1sCu1KrII=", 97 | "dev": true 98 | }, 99 | "combined-stream": { 100 | "version": "0.0.7", 101 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-0.0.7.tgz", 102 | "integrity": "sha1-ATfmV7qlp1QcV6w3rF/AfXO03B8=", 103 | "requires": { 104 | "delayed-stream": "0.0.5" 105 | } 106 | }, 107 | "commander": { 108 | "version": "2.11.0", 109 | "resolved": "https://registry.npmjs.org/commander/-/commander-2.11.0.tgz", 110 | "integrity": "sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ==", 111 | "dev": true 112 | }, 113 | "concat-map": { 114 | "version": "0.0.1", 115 | "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", 116 | "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" 117 | }, 118 | "cookie-jar": { 119 | "version": "0.3.0", 120 | "resolved": "https://registry.npmjs.org/cookie-jar/-/cookie-jar-0.3.0.tgz", 121 | "integrity": "sha1-vJon1OK5fhhs1XyeIGPLmfpozMw=" 122 | }, 123 | "cryptiles": { 124 | "version": "0.2.2", 125 | "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-0.2.2.tgz", 126 | "integrity": "sha1-7ZH/HxetE9N0gohZT4pIoNJvMlw=", 127 | "requires": { 128 | "boom": "0.4.2" 129 | } 130 | }, 131 | "ctype": { 132 | "version": "0.5.3", 133 | "resolved": "https://registry.npmjs.org/ctype/-/ctype-0.5.3.tgz", 134 | "integrity": "sha1-gsGMJGH3QRTvFsE1IkrQuRRMoS8=" 135 | }, 136 | "debug": { 137 | "version": "3.1.0", 138 | "resolved": "https://registry.npmjs.org/debug/-/debug-3.1.0.tgz", 139 | "integrity": "sha512-OX8XqP7/1a9cqkxYw2yXss15f26NKWBpDXQd0/uK/KPqdQhxbPa994hnzjcE2VqQpDslf55723cKPUOGSmMY3g==", 140 | "dev": true, 141 | "requires": { 142 | "ms": "2.0.0" 143 | } 144 | }, 145 | "deep-eql": { 146 | "version": "3.0.1", 147 | "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-3.0.1.tgz", 148 | "integrity": "sha512-+QeIQyN5ZuO+3Uk5DYh6/1eKO0m0YmJFGNmFHGACpf1ClL1nmlV/p4gNgbl2pJGxgXb4faqo6UE+M5ACEMyVcw==", 149 | "dev": true, 150 | "requires": { 151 | "type-detect": "4.0.8" 152 | } 153 | }, 154 | "delayed-stream": { 155 | "version": "0.0.5", 156 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-0.0.5.tgz", 157 | "integrity": "sha1-1LH0OpPoKW3+AmlPRoC8N6MTxz8=" 158 | }, 159 | "diff": { 160 | "version": "3.5.0", 161 | "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz", 162 | "integrity": "sha512-A46qtFgd+g7pDZinpnwiRJtxbC1hpgf0uzP3iG89scHk0AUC7A1TGxf5OiiOUv/JMZR8GOt8hL900hV0bOy5xA==", 163 | "dev": true 164 | }, 165 | "es6-promisify": { 166 | "version": "6.0.0", 167 | "resolved": "https://registry.npmjs.org/es6-promisify/-/es6-promisify-6.0.0.tgz", 168 | "integrity": "sha512-8Tbqjrb8lC85dd81haajYwuRmiU2rkqNAFnlvQOJeeKqdUloIlI+JcUqeJruV4rCm5Y7oNU7jfs2FbmxhRR/2g==" 169 | }, 170 | "escape-string-regexp": { 171 | "version": "1.0.5", 172 | "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", 173 | "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", 174 | "dev": true 175 | }, 176 | "find-java-home": { 177 | "version": "0.2.0", 178 | "resolved": "https://registry.npmjs.org/find-java-home/-/find-java-home-0.2.0.tgz", 179 | "integrity": "sha512-nq5PFOHxE1VSEbdDVkLoA2bAcRnG4ETqJO8ipFq3glIWA52hdWCXYX3emuUyMAQfaqFU4Ea85gqcgaPmOApEPA==", 180 | "requires": { 181 | "which": "1.0.9", 182 | "winreg": "1.2.4" 183 | } 184 | }, 185 | "forever-agent": { 186 | "version": "0.5.2", 187 | "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.5.2.tgz", 188 | "integrity": "sha1-bQ4JxJIflKJ/Y9O0nF/v8epMUTA=" 189 | }, 190 | "form-data": { 191 | "version": "0.1.4", 192 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-0.1.4.tgz", 193 | "integrity": "sha1-kavXiKupcCsaq/qLwBAxoqyeOxI=", 194 | "requires": { 195 | "async": "0.9.2", 196 | "combined-stream": "0.0.7", 197 | "mime": "1.2.11" 198 | }, 199 | "dependencies": { 200 | "async": { 201 | "version": "0.9.2", 202 | "resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz", 203 | "integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0=" 204 | } 205 | } 206 | }, 207 | "fs.realpath": { 208 | "version": "1.0.0", 209 | "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", 210 | "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" 211 | }, 212 | "get-func-name": { 213 | "version": "2.0.0", 214 | "resolved": "https://registry.npmjs.org/get-func-name/-/get-func-name-2.0.0.tgz", 215 | "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=", 216 | "dev": true 217 | }, 218 | "glob": { 219 | "version": "7.1.2", 220 | "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.2.tgz", 221 | "integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==", 222 | "requires": { 223 | "fs.realpath": "1.0.0", 224 | "inflight": "1.0.6", 225 | "inherits": "2.0.3", 226 | "minimatch": "3.0.4", 227 | "once": "1.4.0", 228 | "path-is-absolute": "1.0.1" 229 | } 230 | }, 231 | "growl": { 232 | "version": "1.10.3", 233 | "resolved": "https://registry.npmjs.org/growl/-/growl-1.10.3.tgz", 234 | "integrity": "sha512-hKlsbA5Vu3xsh1Cg3J7jSmX/WaW6A5oBeqzM88oNbCRQFz+zUaXm6yxS4RVytp1scBoJzSYl4YAEOQIt6O8V1Q==", 235 | "dev": true 236 | }, 237 | "has-flag": { 238 | "version": "2.0.0", 239 | "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-2.0.0.tgz", 240 | "integrity": "sha1-6CB68cx7MNRGzHC3NLXovhj4jVE=", 241 | "dev": true 242 | }, 243 | "hawk": { 244 | "version": "1.0.0", 245 | "resolved": "https://registry.npmjs.org/hawk/-/hawk-1.0.0.tgz", 246 | "integrity": "sha1-uQuxaYByhUEdp//LjdJZhQLTtS0=", 247 | "requires": { 248 | "boom": "0.4.2", 249 | "cryptiles": "0.2.2", 250 | "hoek": "0.9.1", 251 | "sntp": "0.2.4" 252 | } 253 | }, 254 | "he": { 255 | "version": "1.1.1", 256 | "resolved": "https://registry.npmjs.org/he/-/he-1.1.1.tgz", 257 | "integrity": "sha1-k0EP0hsAlzUVH4howvJx80J+I/0=", 258 | "dev": true 259 | }, 260 | "hoek": { 261 | "version": "0.9.1", 262 | "resolved": "https://registry.npmjs.org/hoek/-/hoek-0.9.1.tgz", 263 | "integrity": "sha1-PTIkYrrfB3Fup+uFuviAec3c5QU=" 264 | }, 265 | "http-signature": { 266 | "version": "0.10.1", 267 | "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-0.10.1.tgz", 268 | "integrity": "sha1-T72sEyVZqoMjEh5UB3nAoBKyfmY=", 269 | "requires": { 270 | "asn1": "0.1.11", 271 | "assert-plus": "0.1.5", 272 | "ctype": "0.5.3" 273 | } 274 | }, 275 | "inflight": { 276 | "version": "1.0.6", 277 | "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", 278 | "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", 279 | "requires": { 280 | "once": "1.4.0", 281 | "wrappy": "1.0.2" 282 | } 283 | }, 284 | "inherits": { 285 | "version": "2.0.3", 286 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", 287 | "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" 288 | }, 289 | "java": { 290 | "version": "0.9.0", 291 | "resolved": "https://registry.npmjs.org/java/-/java-0.9.0.tgz", 292 | "integrity": "sha1-1J2iw6rV4stYmpaKgKM0/nMIoP4=", 293 | "requires": { 294 | "async": "2.5.0", 295 | "find-java-home": "0.2.0", 296 | "glob": "7.1.2", 297 | "lodash": "4.17.4", 298 | "nan": "2.7.0" 299 | } 300 | }, 301 | "json-stringify-safe": { 302 | "version": "5.0.1", 303 | "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", 304 | "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" 305 | }, 306 | "lodash": { 307 | "version": "4.17.4", 308 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz", 309 | "integrity": "sha1-eCA6TRwyiuHYbcpkYONptX9AVa4=" 310 | }, 311 | "mime": { 312 | "version": "1.2.11", 313 | "resolved": "https://registry.npmjs.org/mime/-/mime-1.2.11.tgz", 314 | "integrity": "sha1-WCA+7Ybjpe8XrtK32evUfwpg3RA=" 315 | }, 316 | "minimatch": { 317 | "version": "3.0.4", 318 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", 319 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", 320 | "requires": { 321 | "brace-expansion": "1.1.11" 322 | } 323 | }, 324 | "minimist": { 325 | "version": "0.0.8", 326 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", 327 | "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", 328 | "dev": true 329 | }, 330 | "mkdirp": { 331 | "version": "0.5.1", 332 | "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", 333 | "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", 334 | "dev": true, 335 | "requires": { 336 | "minimist": "0.0.8" 337 | } 338 | }, 339 | "mocha": { 340 | "version": "5.0.4", 341 | "resolved": "https://registry.npmjs.org/mocha/-/mocha-5.0.4.tgz", 342 | "integrity": "sha512-nMOpAPFosU1B4Ix1jdhx5e3q7XO55ic5a8cgYvW27CequcEY+BabS0kUVL1Cw1V5PuVHZWeNRWFLmEPexo79VA==", 343 | "dev": true, 344 | "requires": { 345 | "browser-stdout": "1.3.1", 346 | "commander": "2.11.0", 347 | "debug": "3.1.0", 348 | "diff": "3.5.0", 349 | "escape-string-regexp": "1.0.5", 350 | "glob": "7.1.2", 351 | "growl": "1.10.3", 352 | "he": "1.1.1", 353 | "mkdirp": "0.5.1", 354 | "supports-color": "4.4.0" 355 | } 356 | }, 357 | "ms": { 358 | "version": "2.0.0", 359 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", 360 | "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", 361 | "dev": true 362 | }, 363 | "nan": { 364 | "version": "2.7.0", 365 | "resolved": "https://registry.npmjs.org/nan/-/nan-2.7.0.tgz", 366 | "integrity": "sha1-2Vv3IeyHfgjbJ27T/G63j5CDrUY=" 367 | }, 368 | "node-uuid": { 369 | "version": "1.4.8", 370 | "resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz", 371 | "integrity": "sha1-sEDrCSOWivq/jTL7HxfxFn/auQc=" 372 | }, 373 | "node-wget": { 374 | "version": "0.4.2", 375 | "resolved": "https://registry.npmjs.org/node-wget/-/node-wget-0.4.2.tgz", 376 | "integrity": "sha1-w4TXh/H6xusXpdUguZ0jtzzGinM=", 377 | "requires": { 378 | "request": "2.27.0" 379 | } 380 | }, 381 | "oauth-sign": { 382 | "version": "0.3.0", 383 | "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.3.0.tgz", 384 | "integrity": "sha1-y1QPk7srIqfVlBaRoojWDo6pOG4=" 385 | }, 386 | "once": { 387 | "version": "1.4.0", 388 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 389 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", 390 | "requires": { 391 | "wrappy": "1.0.2" 392 | } 393 | }, 394 | "path-is-absolute": { 395 | "version": "1.0.1", 396 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", 397 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=" 398 | }, 399 | "pathval": { 400 | "version": "1.1.0", 401 | "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.0.tgz", 402 | "integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=", 403 | "dev": true 404 | }, 405 | "qs": { 406 | "version": "0.6.6", 407 | "resolved": "https://registry.npmjs.org/qs/-/qs-0.6.6.tgz", 408 | "integrity": "sha1-bgFQmP9RlouKPIGQAdXyyJvEsQc=" 409 | }, 410 | "request": { 411 | "version": "2.27.0", 412 | "resolved": "https://registry.npmjs.org/request/-/request-2.27.0.tgz", 413 | "integrity": "sha1-37GiJN06Wput5DNwElA9cQ5Thmg=", 414 | "requires": { 415 | "aws-sign": "0.3.0", 416 | "cookie-jar": "0.3.0", 417 | "forever-agent": "0.5.2", 418 | "form-data": "0.1.4", 419 | "hawk": "1.0.0", 420 | "http-signature": "0.10.1", 421 | "json-stringify-safe": "5.0.1", 422 | "mime": "1.2.11", 423 | "node-uuid": "1.4.8", 424 | "oauth-sign": "0.3.0", 425 | "qs": "0.6.6", 426 | "tunnel-agent": "0.3.0" 427 | } 428 | }, 429 | "sntp": { 430 | "version": "0.2.4", 431 | "resolved": "https://registry.npmjs.org/sntp/-/sntp-0.2.4.tgz", 432 | "integrity": "sha1-+4hfGLDzqtGJ+CSGJTa87ux1CQA=", 433 | "requires": { 434 | "hoek": "0.9.1" 435 | } 436 | }, 437 | "supports-color": { 438 | "version": "4.4.0", 439 | "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-4.4.0.tgz", 440 | "integrity": "sha512-rKC3+DyXWgK0ZLKwmRsrkyHVZAjNkfzeehuFWdGGcqGDTZFH73+RH6S/RDAAxl9GusSjZSUWYLmT9N5pzXFOXQ==", 441 | "dev": true, 442 | "requires": { 443 | "has-flag": "2.0.0" 444 | } 445 | }, 446 | "tunnel-agent": { 447 | "version": "0.3.0", 448 | "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.3.0.tgz", 449 | "integrity": "sha1-rWgbaPUyGtKCfEz7G31d8s/pQu4=" 450 | }, 451 | "type-detect": { 452 | "version": "4.0.8", 453 | "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", 454 | "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", 455 | "dev": true 456 | }, 457 | "typescript": { 458 | "version": "2.7.2", 459 | "resolved": "https://registry.npmjs.org/typescript/-/typescript-2.7.2.tgz", 460 | "integrity": "sha512-p5TCYZDAO0m4G344hD+wx/LATebLWZNkkh2asWUFqSsD2OrDNhbAHuSjobrmsUmdzjJjEeZVU9g1h3O6vpstnw==", 461 | "dev": true 462 | }, 463 | "which": { 464 | "version": "1.0.9", 465 | "resolved": "https://registry.npmjs.org/which/-/which-1.0.9.tgz", 466 | "integrity": "sha1-RgwdoPgQED0DIam2M6+eV15kSG8=" 467 | }, 468 | "winreg": { 469 | "version": "1.2.4", 470 | "resolved": "https://registry.npmjs.org/winreg/-/winreg-1.2.4.tgz", 471 | "integrity": "sha1-ugZWKbepJRMOFXeRCM9UCZDpjRs=" 472 | }, 473 | "wrappy": { 474 | "version": "1.0.2", 475 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 476 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" 477 | } 478 | } 479 | } 480 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "open-korean-text-node", 3 | "version": "2.2.0", 4 | "description": "Nodejs binding for open-korean-text via java interface.", 5 | "main": "index.js", 6 | "types": "lib/index.d.ts", 7 | "scripts": { 8 | "pretest": "npm run build", 9 | "test": "mocha --ui tdd --timeout 10000", 10 | "build": "rm -rf lib && tsc", 11 | "prepare": "npm run build", 12 | "install": "node bin/install.js" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "https://github.com/open-korean-text/open-korean-text-wrapper-node-2" 17 | }, 18 | "keywords": [ 19 | "nlp", 20 | "korean", 21 | "open-korean-text", 22 | "twitter-korean-text" 23 | ], 24 | "author": "Youngrok Kim ", 25 | "license": "Apache-2.0", 26 | "engines": { 27 | "node": ">=4.0.0" 28 | }, 29 | "devDependencies": { 30 | "@types/java": "^0.7.32", 31 | "@types/node": "^9.4.7", 32 | "mocha": "^5.0.4", 33 | "chai": "^4.1.2", 34 | "typescript": "^2.7.2" 35 | }, 36 | "dependencies": { 37 | "es6-promisify": "^6.0.0", 38 | "java": "^0.9.0", 39 | "node-wget": "^0.4.2" 40 | }, 41 | "mavenDependencies": { 42 | "scala-library": "http://central.maven.org/maven2/org/scala-lang/scala-library/2.12.4/scala-library-2.12.4.jar", 43 | "twitter-text": "https://repo1.maven.org/maven2/com/twitter/twitter-text/1.14.7/twitter-text-1.14.7.jar", 44 | "open-korean-text": "http://central.maven.org/maven2/org/openkoreantext/open-korean-text/2.2.0/open-korean-text-2.2.0.jar" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/classes/abstract.ts: -------------------------------------------------------------------------------- 1 | import * as Java from 'java'; 2 | 3 | export type JavaClassInterface = any; 4 | export type JavaObjectInterface = any; 5 | export interface JavaClassConstructor { 6 | new (...args: any[]): T; 7 | } 8 | 9 | /** 10 | * Base object providing Java-interface bridge 11 | * 12 | * @export 13 | * @abstract 14 | * @class JavaObject 15 | */ 16 | export abstract class AbstractJavaClass { 17 | 18 | protected static className: string; 19 | 20 | protected static get class(): JavaClassInterface { 21 | const self = this as any; 22 | return self._class = self._class || Java.import(this.className); 23 | } 24 | 25 | protected static wrap(this: JavaClassConstructor, javaObject: any): T { 26 | return new this({ javaObject }); 27 | } 28 | 29 | constructor(...args: any[]); 30 | constructor() { 31 | const proto = this.constructor as typeof AbstractJavaClass; 32 | if (arguments[0] && arguments[0].javaObject) { 33 | // wrap existing java object 34 | this._interface = arguments[0].javaObject; 35 | } else { 36 | // create new java object 37 | this._interface = new (Function.prototype.bind.apply(proto.class, Array.from(arguments))); 38 | } 39 | } 40 | 41 | private _interface; 42 | 43 | get interface(): JavaObjectInterface { 44 | return this._interface; 45 | } 46 | 47 | toString() { 48 | return this.interface.toString(); 49 | } 50 | 51 | toJSON() { 52 | return this.toString(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/classes/collections.ts: -------------------------------------------------------------------------------- 1 | import { AbstractJavaClass } from './abstract'; 2 | 3 | export class Collections extends AbstractJavaClass { 4 | 5 | static className = 'java.util.Collections'; 6 | 7 | map(mapFunc: (item: T, index?: number) => R): R[] { 8 | let index = 0; 9 | const it = this.interface.iterator(); 10 | const res = []; 11 | 12 | while (it.hasNext()) { 13 | res.push(mapFunc(it.next(), index++)); 14 | } 15 | return res; 16 | } 17 | } 18 | 19 | /** 20 | * Partial wrapper for Java ArrayList 21 | * 22 | * @export 23 | * @class ArrayList 24 | * @extends {JavaObject} 25 | * @template T 26 | */ 27 | export class ArrayList extends Collections { 28 | 29 | static className = 'java.util.ArrayList'; 30 | 31 | constructor(values?: T[]) { 32 | super(); 33 | if (values && values.length) { 34 | this.addAllSync(values); 35 | } 36 | } 37 | 38 | add(item: T) { 39 | return this.interface.addPromise(item); 40 | } 41 | 42 | addSync(item: T) { 43 | return this.interface.add(item); 44 | } 45 | 46 | addAll(items: T[]) { 47 | return Promise.all(items.map(item => this.add(item))); 48 | } 49 | 50 | addAllSync(items: T[]) { 51 | items.forEach(item => this.addSync(item)); 52 | } 53 | } 54 | 55 | /** 56 | * Wrapper class for Scala Seq 57 | * 58 | * @export 59 | * @class Seq 60 | * @extends {AbstractCollections} 61 | * @template T 62 | */ 63 | export class Seq extends Collections { 64 | 65 | static className = 'scala.collection.Seq'; 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/classes/index.ts: -------------------------------------------------------------------------------- 1 | export * from './abstract'; 2 | export * from './collections'; 3 | export * from './tokens'; 4 | -------------------------------------------------------------------------------- /src/classes/tokens.ts: -------------------------------------------------------------------------------- 1 | import { AbstractJavaClass } from './abstract'; 2 | 3 | export interface KoreanToken { 4 | text?: string; 5 | stem?: string; 6 | pos?: KoreanPos; 7 | offset?: number; 8 | length?: number; 9 | isUnknown?: boolean; 10 | } 11 | 12 | export class KoreanTokenObject extends AbstractJavaClass implements KoreanToken { 13 | 14 | static className = 'org.openkoreantext.processor.KoreanTokenJava'; 15 | 16 | constructor( 17 | public text: string, 18 | public pos: KoreanPos, 19 | public offset: number, 20 | public length: number, 21 | public isUnknown: boolean, 22 | public stem?: string) { 23 | super(text, KoreanPosObject.valueOf(pos), offset, length, !!isUnknown, stem); 24 | } 25 | 26 | toJSON() { 27 | return { 28 | text: this.text, 29 | stem: this.stem, 30 | pos: this.pos, 31 | offset: this.offset, 32 | length: this.length, 33 | isUnknown: this.isUnknown, 34 | } 35 | } 36 | } 37 | 38 | export type KoreanPos = "Noun" | "Verb" | "Adjective" | "Adverb" | "Determiner" | "Exclamation" 39 | | "Josa" | "Eomi" | "PreEomi" | "Conjunction" | "NounPrefix" | "VerbPrefix" | "Suffix" 40 | | "Unknown" 41 | | "Korean" | "Foreign" | "Number" | "KoreanParticle" | "Alpha" | "Punctuation" | "Hashtag" 42 | | "ScreenName" | "Email" | "URL" | "CashTag" 43 | | "Space" | "Others" | "ProperNoun"; 44 | 45 | export class KoreanPosObject extends AbstractJavaClass { 46 | 47 | static className = 'org.openkoreantext.processor.KoreanPosJava'; 48 | 49 | static valueOf(name: KoreanPos) { 50 | return this.class.valueOf(name); 51 | } 52 | 53 | static get Noun(): KoreanPosObject { return this.class.Noun } 54 | static get Verb(): KoreanPosObject { return this.class.Verb; } 55 | static get Adjective(): KoreanPosObject { return this.class.Adjective; } 56 | static get Adverb(): KoreanPosObject { return this.class.Adverb; } 57 | static get Determiner(): KoreanPosObject { return this.class.Determiner; } 58 | static get Exclamation(): KoreanPosObject { return this.class.Exclamation; } 59 | static get Josa(): KoreanPosObject { return this.class.Josa; } 60 | static get Eomi(): KoreanPosObject { return this.class.Eomi; } 61 | static get PreEomi(): KoreanPosObject { return this.class.PreEomi; } 62 | static get Conjunction(): KoreanPosObject { return this.class.Conjunction; } 63 | static get NounPrefix(): KoreanPosObject { return this.class.NounPrefix; } 64 | static get VerbPrefix(): KoreanPosObject { return this.class.VerbPrefix; } 65 | static get Suffix(): KoreanPosObject { return this.class.Suffix; } 66 | static get Unknown(): KoreanPosObject { return this.class.Unknown; } 67 | 68 | static get Korean(): KoreanPosObject { return this.class.Korean; } 69 | static get Foreign(): KoreanPosObject { return this.class.Foreign; } 70 | static get Number(): KoreanPosObject { return this.class.Number; } 71 | static get KoreanParticle(): KoreanPosObject { return this.class.KoreanParticle; } 72 | static get Alpha(): KoreanPosObject { return this.class.Alpha; } 73 | static get Punctuation(): KoreanPosObject { return this.class.Punctuation; } 74 | static get Hashtag(): KoreanPosObject { return this.class.Hashtag; } 75 | static get ScreenName(): KoreanPosObject { return this.class.ScreenName; } 76 | static get Email(): KoreanPosObject { return this.class.Email; } 77 | static get URL(): KoreanPosObject { return this.class.URL; } 78 | static get CashTag(): KoreanPosObject { return this.class.CashTag; } 79 | 80 | static get Space(): KoreanPosObject { return this.class.Space; } 81 | static get Others(): KoreanPosObject { return this.class.Others; } 82 | static get ProperNoun(): KoreanPosObject { return this.class.ProperNoun; } 83 | } 84 | 85 | export interface Sentence { 86 | text?: string; 87 | start?: number; 88 | end?: number; 89 | } 90 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | export { 2 | KoreanToken, 3 | KoreanPos, 4 | Sentence, 5 | } from './classes'; 6 | 7 | export { 8 | OpenKoreanTextProcessor, 9 | IntermediaryTokensObject, 10 | OpenKoreanTextProcessor as default 11 | } from './processor'; 12 | -------------------------------------------------------------------------------- /src/processor/index.ts: -------------------------------------------------------------------------------- 1 | export * from './processor'; 2 | export * from './tokens'; 3 | -------------------------------------------------------------------------------- /src/processor/processor.ts: -------------------------------------------------------------------------------- 1 | import * as Java from 'java'; 2 | import { AbstractJavaClass, Collections, ArrayList, Seq, KoreanToken, Sentence, JavaClassInterface } from '../classes'; 3 | import { IntermediaryTokensObject } from './tokens'; 4 | 5 | export interface ExcludePhrasesOptions { 6 | filterSpam?: boolean; 7 | includeHashtag?: boolean; 8 | } 9 | 10 | /** 11 | * Node-js Wrapper for OpenKoreanTextProcessor 12 | * 13 | * @export 14 | * @class OpenKoreanTextProcessor 15 | */ 16 | export class OpenKoreanTextProcessor extends AbstractJavaClass { 17 | static className = 'org.openkoreantext.processor.OpenKoreanTextProcessorJava'; 18 | 19 | /** 20 | * Ensure JVM has been created 21 | * 22 | * @param [done] done callback 23 | */ 24 | static ensureJvm(done: () => void): void; 25 | static ensureJvm(): Promise; 26 | static ensureJvm() { 27 | if (arguments[0] && typeof arguments[0] == 'function') { 28 | return Java.ensureJvm(arguments[0]); 29 | } else { 30 | return new Promise((resolve, reject) => Java.ensureJvm((err) => (err ? reject(err) : resolve()))); 31 | } 32 | } 33 | 34 | /** 35 | * Check whether the JVM is created or not 36 | * 37 | * @returns boolean 38 | */ 39 | static isJvmCreated(): boolean { 40 | return Java.isJvmCreated(); 41 | } 42 | 43 | /** 44 | * Normalize Korean text 45 | * 그랰ㅋㅋㅋㅋㅋㅋ -> 그래ㅋㅋ 46 | * 47 | * @param text Input text. 48 | * @return Normalized text. 49 | */ 50 | static normalize(text: string): Promise { 51 | return this.class.normalizePromise(text); 52 | } 53 | 54 | static normalizeSync(text: string): string { 55 | return this.class.normalize(text); 56 | } 57 | 58 | /** 59 | * Tokenize with the builder options. 60 | * 61 | * @param text Input text. 62 | * @return A list of Korean Tokens (run tokensToJsonArray to transform to Java List) 63 | */ 64 | static tokenize(text: string): Promise { 65 | return this.class.tokenizePromise(text).then((tokensSeq) => IntermediaryTokensObject.wrap(tokensSeq)); 66 | } 67 | 68 | static tokenizeSync(text: string): IntermediaryTokensObject { 69 | return IntermediaryTokensObject.wrap(this.class.tokenize(text)); 70 | } 71 | 72 | /** 73 | * Add user-defined words to the noun dictionary. Spaced words are ignored. 74 | * 75 | * @static 76 | * @param words List of user nouns 77 | * @returns 78 | */ 79 | static addNounsToDictionary(...words: string[]): Promise; 80 | static addNounsToDictionary(): Promise { 81 | const listObject = new ArrayList(Array.from(arguments)); 82 | return this.class.addNounsToDictionaryPromise(listObject.interface); 83 | } 84 | 85 | static addNounsToDictionarySync(...words: string[]): void; 86 | static addNounsToDictionarySync(): void { 87 | const listObject = new ArrayList(Array.from(arguments)); 88 | return this.class.addNounsToDictionary(listObject.interface); 89 | } 90 | 91 | /** 92 | * Tokenize with the builder options into a Javascript Object. 93 | * 94 | * @param tokens Korean tokens (output of tokenize(CharSequence text)). 95 | * @param [keepSpace=false] Keep spaces 96 | * @return JSON array of token objects. 97 | */ 98 | static tokensToJsonArray(tokens: IntermediaryTokensObject, keepSpace?: boolean): Promise { 99 | return new Promise((resolve, reject) => { 100 | try { 101 | resolve(this.tokensToJsonArraySync(tokens, keepSpace)); 102 | } catch (error) { 103 | reject(error); 104 | } 105 | }); 106 | } 107 | 108 | static tokensToJsonArraySync(tokens: IntermediaryTokensObject, keepSpace?: boolean): KoreanToken[] { 109 | const list = tokens.toJSON(); 110 | return keepSpace ? list : list.filter((token) => token.pos !== 'Space'); 111 | } 112 | 113 | /** 114 | * Split input text into sentences. 115 | * 116 | * @param text Input text. 117 | * @return Array of Sentence objects. 118 | */ 119 | static splitSentences(text: string): Promise { 120 | return this.class.splitSentencesPromise(text).then((sentences) => 121 | Collections.wrap(sentences).map((sentence: any) => ({ 122 | text: sentence.text(), 123 | start: sentence.start(), 124 | end: sentence.end() 125 | })) 126 | ); 127 | } 128 | 129 | static splitSentencesSync(text: string): Sentence[] { 130 | return Collections.wrap(this.class.splitSentences(text)).map((sentence: any) => ({ 131 | text: sentence.text(), 132 | start: sentence.start(), 133 | end: sentence.end() 134 | })); 135 | } 136 | 137 | /** 138 | * Extract phrases from Korean input text 139 | * 140 | * @param tokens Korean tokens (output of tokenize(CharSequence text)). 141 | * @param [options.filterSpam = true] 142 | * @param [optons.includeHashtags = false] 143 | * @return Array of phrase CharSequences. 144 | */ 145 | static extractPhrases(tokens: IntermediaryTokensObject, options?: ExcludePhrasesOptions): Promise { 146 | options = { filterSpam: true, includeHashtag: false, ...options }; 147 | return this.class 148 | .extractPhrasesPromise(tokens.interface, options.filterSpam, options.includeHashtag) 149 | .then((phrasesSeq) => 150 | Collections.wrap(phrasesSeq).map((phrase: any) => ({ 151 | text: phrase.text(), 152 | pos: phrase.pos().toString(), 153 | offset: phrase.offset(), 154 | length: phrase.length() 155 | })) 156 | ); 157 | } 158 | 159 | static extractPhrasesSync(tokens: IntermediaryTokensObject, options?: ExcludePhrasesOptions): KoreanToken { 160 | options = { filterSpam: true, includeHashtag: false, ...options }; 161 | const phrasesSeq = Collections.wrap( 162 | this.class.extractPhrases(tokens.interface, options.filterSpam, options.includeHashtag) 163 | ); 164 | return phrasesSeq.map((phrase: any) => ({ 165 | text: phrase.text(), 166 | pos: phrase.pos().toString(), 167 | offset: phrase.offset(), 168 | length: phrase.length() 169 | })); 170 | } 171 | 172 | /** 173 | * Detokenize the input list of words. 174 | * 175 | * @param words List of words. 176 | * @return String Detokenized string. 177 | */ 178 | static detokenize(tokens: IntermediaryTokensObject): Promise; 179 | static detokenize(words: string[]): Promise; 180 | static detokenize(...words: string[]): Promise; 181 | static detokenize(): Promise { 182 | let words: string[]; 183 | if (arguments[0] instanceof IntermediaryTokensObject) { 184 | words = arguments[0] 185 | .toJSON() 186 | .filter((token) => token.pos !== 'Space') 187 | .map((token) => token.text); 188 | } else if (Array.isArray(arguments[0])) { 189 | words = arguments[0]; 190 | } else { 191 | words = Array.from(arguments); 192 | } 193 | const list = new ArrayList(words); 194 | return this.class.detokenizePromise(list.interface).then((detokenized) => detokenized.toString()); 195 | } 196 | 197 | static detokenizeSync(tokens: IntermediaryTokensObject): string; 198 | static detokenizeSync(words: string[]): string; 199 | static detokenizeSync(...words: string[]): string; 200 | static detokenizeSync(): string { 201 | let words: string[]; 202 | if (arguments[0] instanceof IntermediaryTokensObject) { 203 | words = arguments[0] 204 | .toJSON() 205 | .filter((token) => token.pos !== 'Space') 206 | .map((token) => token.text); 207 | } else if (Array.isArray(arguments[0])) { 208 | words = arguments[0]; 209 | } else { 210 | words = Array.from(arguments); 211 | } 212 | const list = new ArrayList(words); 213 | const detokenized = this.class.detokenize(list.interface); 214 | return detokenized.toString(); 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/processor/tokens.ts: -------------------------------------------------------------------------------- 1 | import { KoreanTokenObject, KoreanToken, Seq } from '../classes'; 2 | import { OpenKoreanTextProcessor, ExcludePhrasesOptions } from './processor'; 3 | 4 | export class IntermediaryTokensObject extends Seq { 5 | // KoreanTokenObject is not required yet. 6 | // 7 | // toTokenObjectList(): KoreanTokenObject[] { 8 | // return this.map(token => new KoreanTokenObject( 9 | // token.text(), 10 | // token.pos().toString(), 11 | // token.offset(), 12 | // token.length(), 13 | // token.unknown !== undefined ? token.unknown() : undefined 14 | // )); 15 | // }; 16 | 17 | toJSON(): KoreanToken[] { 18 | return this.map((item) => { 19 | const token: KoreanToken = { 20 | text: item.text(), 21 | pos: item.pos().toString(), 22 | offset: item.offset(), 23 | length: item.length(), 24 | isUnknown: item.unknown() 25 | }; 26 | if (item.stem().nonEmpty()) { 27 | token.stem = item.stem().get(); 28 | } 29 | return token; 30 | }); 31 | } 32 | 33 | extractPhrases(options?: ExcludePhrasesOptions) { 34 | return OpenKoreanTextProcessor.extractPhrases(this, options); 35 | } 36 | 37 | extractPhrasesSync(options?: ExcludePhrasesOptions) { 38 | return OpenKoreanTextProcessor.extractPhrasesSync(this, options); 39 | } 40 | 41 | detokenize(): Promise { 42 | return OpenKoreanTextProcessor.detokenize(this); 43 | } 44 | 45 | detokenizeSync(): string { 46 | return OpenKoreanTextProcessor.detokenizeSync(this); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /test/processor.spec.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const expect = require('chai').expect; 4 | const OpenKoreanTextProcessor = require('../').default; 5 | 6 | suite('OpenKoreanTextProcessor', () => { 7 | 8 | before('ensure JVM', () => OpenKoreanTextProcessor.ensureJvm()); 9 | 10 | suite('sync', () => { 11 | 12 | test('normalize', (done) => { 13 | const text = '힘들겟씀다 그래욬ㅋㅋㅋ'; 14 | const result = OpenKoreanTextProcessor.normalizeSync(text); 15 | expect(result).to.eql('힘들겠습니다 그래요ㅋㅋㅋ'); 16 | done(); 17 | }); 18 | 19 | test('tokenize', (done) => { 20 | const text = '착한강아지상을 받은 루루'; 21 | const intermediaryTokens = OpenKoreanTextProcessor.tokenizeSync(text); 22 | expect(intermediaryTokens.toString()).to.eql( 23 | 'List(착한(Adjective(착하다): 0, 2), 강아지(Noun: 2, 3), 상(Suffix: 5, 1), 을(Josa: 6, 1), ' + 24 | ' (Space: 7, 1), 받은(Verb(받다): 8, 2), (Space: 10, 1), 루루(Noun: 11, 2))'); 25 | expect(intermediaryTokens.toJSON()).to.eql([ 26 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 27 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 28 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 29 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 30 | { text: ' ', pos: 'Space', offset: 7, length: 1, isUnknown: false }, 31 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 32 | { text: ' ', pos: 'Space', offset: 10, length: 1, isUnknown: false }, 33 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 34 | ]); 35 | done(); 36 | }); 37 | 38 | test('tokens to json array', (done) => { 39 | const text = '착한강아지상을 받은 루루'; 40 | const tokens = OpenKoreanTextProcessor.tokenizeSync(text); 41 | expect(OpenKoreanTextProcessor.tokensToJsonArraySync(tokens, true)).to.eql([ 42 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 43 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 44 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 45 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 46 | { text: ' ', pos: 'Space', offset: 7, length: 1, isUnknown: false }, 47 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 48 | { text: ' ', pos: 'Space', offset: 10, length: 1, isUnknown: false }, 49 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 50 | ]); 51 | expect(OpenKoreanTextProcessor.tokensToJsonArraySync(tokens, false)).to.eql([ 52 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 53 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 54 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 55 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 56 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 57 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 58 | ]); 59 | done(); 60 | }); 61 | 62 | test('add to dictionary', (done) => { 63 | const text = '압뱌뱌어버벼부뷰'; 64 | const tokens = OpenKoreanTextProcessor.tokenizeSync(text); 65 | expect(OpenKoreanTextProcessor.tokensToJsonArraySync(tokens, false)).to.eql([ 66 | { text: '압뱌뱌어버벼부뷰', pos: 'Noun', offset: 0, length: 8, isUnknown: true } 67 | ]); 68 | 69 | OpenKoreanTextProcessor.addNounsToDictionarySync('압뱌뱌', '어버벼', '부뷰'); 70 | const tokensAfter = OpenKoreanTextProcessor.tokenizeSync(text); 71 | expect(OpenKoreanTextProcessor.tokensToJsonArraySync(tokensAfter, false)).to.eql([ 72 | { text: '압뱌뱌', pos: 'Noun', offset: 0, length: 3, isUnknown: false }, 73 | { text: '어버벼', pos: 'Noun', offset: 3, length: 3, isUnknown: false }, 74 | { text: '부뷰', pos: 'Noun', offset: 6, length: 2, isUnknown: false } 75 | ]); 76 | done(); 77 | }); 78 | 79 | test('phrase extractor', (done) => { 80 | const text = '아름다운 트위터를 만들어 보자. 시발 #욕하지_말자'; 81 | const tokens = OpenKoreanTextProcessor.tokenizeSync(text); 82 | expect(OpenKoreanTextProcessor.extractPhrasesSync(tokens, { filterSpam: true, includeHashtag: true })).to.eql([ 83 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 84 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 85 | { text: '#욕하지_말자', pos: 'Hashtag', offset: 21, length: 7 } 86 | ]); 87 | expect(OpenKoreanTextProcessor.extractPhrasesSync(tokens, { filterSpam: true, includeHashtag: false })).to.eql([ 88 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 89 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 90 | ]); 91 | expect(OpenKoreanTextProcessor.extractPhrasesSync(tokens, { filterSpam: false, includeHashtag: true })).to.eql([ 92 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 93 | { text: '시발', pos: 'Noun', offset: 18, length: 2 }, 94 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 95 | { text: '#욕하지_말자', pos: 'Hashtag', offset: 21, length: 7 } 96 | ]); 97 | expect(OpenKoreanTextProcessor.extractPhrasesSync(tokens, { filterSpam: false, includeHashtag: false })).to.eql([ 98 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 99 | { text: '시발', pos: 'Noun', offset: 18, length: 2 }, 100 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 } 101 | ]); 102 | done(); 103 | }); 104 | 105 | 106 | test('phrase extractor 2', (done) => { 107 | const text = '시발 토토가의 인기폭발을 보니 미국에서 뉴키즈온더블럭 백스트릿보이스 조인트 컨서트'; 108 | const tokens = OpenKoreanTextProcessor.tokenizeSync(text); 109 | const phrases = OpenKoreanTextProcessor.extractPhrasesSync(tokens); 110 | expect(phrases).to.eql([ 111 | { text: '토토가', pos: 'Noun', offset: 3, length: 3 }, 112 | { text: '토토가의 인기폭발', pos: 'Noun', offset: 3, length: 9 }, 113 | { text: '미국', pos: 'Noun', offset: 17, length: 2 }, 114 | { text: '뉴키즈온더블럭', pos: 'Noun', offset: 22, length: 7 }, 115 | { text: '뉴키즈온더블럭 백스트릿보이스', pos: 'Noun', offset: 22, length: 15 }, 116 | { text: '뉴키즈온더블럭 백스트릿보이스 조인트', pos: 'Noun', offset: 22, length: 19 }, 117 | { text: '뉴키즈온더블럭 백스트릿보이스 조인트 컨서트', pos: 'Noun', offset: 22, length: 23 }, 118 | { text: '인기', pos: 'Noun', offset: 8, length: 2 }, 119 | { text: '폭발', pos: 'Noun', offset: 10, length: 2 }, 120 | { text: '백스트릿', pos: 'Noun', offset: 30, length: 4 }, 121 | { text: '보이스', pos: 'Noun', offset: 34, length: 3 }, 122 | { text: '조인트', pos: 'Noun', offset: 38, length: 3 }, 123 | { text: '컨서트', pos: 'Noun', offset: 42, length: 3 } 124 | ]); 125 | done(); 126 | }); 127 | 128 | 129 | test('sentence splitter', (done) => { 130 | const text = '가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!'; 131 | const sentences = OpenKoreanTextProcessor.splitSentencesSync(text); 132 | expect(sentences).to.eql([ 133 | { text: '가을이다!', start: 0, end: 5 }, 134 | { text: '남자는 가을을 탄다......', start: 6, end: 22 }, 135 | { text: '그렇지?', start: 23, end: 27 }, 136 | { text: '루루야!', start: 28, end: 32 }, 137 | { text: '버버리코트 사러 가자!!!!', start: 33, end: 48 } 138 | ]); 139 | done(); 140 | }); 141 | 142 | test('detokenize with word', (done) => { 143 | const words = ['늘', '평온', '하게', '누워', '있', '는', '루루']; 144 | const detokenized = OpenKoreanTextProcessor.detokenizeSync(words); 145 | expect(detokenized).to.eql('늘 평온하게 누워있는 루루'); 146 | done(); 147 | }); 148 | 149 | test('detokenize with token object', (done) => { 150 | const tokens = OpenKoreanTextProcessor.tokenizeSync('늘평온하게 누워있는루루') 151 | const detokenized = OpenKoreanTextProcessor.detokenizeSync(tokens); 152 | expect(detokenized).to.eql('늘 평온하게 누워있는 루루'); 153 | done(); 154 | }); 155 | }); 156 | 157 | suite('async', () => { 158 | 159 | before((done) => require('java').ensureJvm(() => OpenKoreanTextProcessor.class && done())); 160 | 161 | test('normalize', () => { 162 | const text = '힘들겟씀다 그래욬ㅋㅋㅋ'; 163 | const expected = '힘들겠습니다 그래요ㅋㅋㅋ'; 164 | return OpenKoreanTextProcessor.normalize(text).then((result) => expect(result).to.eql(expected)); 165 | }); 166 | 167 | test('tokenize', () => { 168 | const text = '착한강아지상을 받은 루루'; 169 | return OpenKoreanTextProcessor.tokenize(text).then((result) => { 170 | expect(result.toString()).to.eql( 171 | 'List(착한(Adjective(착하다): 0, 2), 강아지(Noun: 2, 3), 상(Suffix: 5, 1), 을(Josa: 6, 1), ' + 172 | ' (Space: 7, 1), 받은(Verb(받다): 8, 2), (Space: 10, 1), 루루(Noun: 11, 2))'); 173 | expect(result.toJSON()).to.eql([ 174 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 175 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 176 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 177 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 178 | { text: ' ', pos: 'Space', offset: 7, length: 1, isUnknown: false }, 179 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 180 | { text: ' ', pos: 'Space', offset: 10, length: 1, isUnknown: false }, 181 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 182 | ]); 183 | }); 184 | }); 185 | 186 | test('tokens to json array', () => { 187 | const text = '착한강아지상을 받은 루루'; 188 | return OpenKoreanTextProcessor.tokenize(text).then((tokens) => Promise.all([ 189 | OpenKoreanTextProcessor.tokensToJsonArray(tokens, true) // keeping space 190 | .then((result) => expect(result).to.eql([ 191 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 192 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 193 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 194 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 195 | { text: ' ', pos: 'Space', offset: 7, length: 1, isUnknown: false }, 196 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 197 | { text: ' ', pos: 'Space', offset: 10, length: 1, isUnknown: false }, 198 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 199 | ])), 200 | OpenKoreanTextProcessor.tokensToJsonArray(tokens, false) // not keeping space 201 | .then((result) => expect(result).to.eql([ 202 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 203 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 204 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 205 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 206 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 207 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 208 | ])) 209 | ])); 210 | }); 211 | 212 | test('add to dictionary', () => { 213 | const text = '우햡냡업갑녀아뎌'; 214 | return OpenKoreanTextProcessor.tokenize(text) 215 | .then((tokens) => OpenKoreanTextProcessor.tokensToJsonArray(tokens, false)) 216 | .then((result) => expect(result).to.eql([ 217 | { text: '우햡냡업갑녀아뎌', pos: 'Noun', offset: 0, length: 8, isUnknown: true } 218 | ])) 219 | .then((result) => OpenKoreanTextProcessor.addNounsToDictionary('우햡냡', '업갑녀', '아뎌')) 220 | .then((result) => OpenKoreanTextProcessor.tokenize(text)) 221 | .then((tokens) => expect(tokens.toJSON()).to.eql([ 222 | { text: '우햡냡', pos: 'Noun', offset: 0, length: 3, isUnknown: false }, 223 | { text: '업갑녀', pos: 'Noun', offset: 3, length: 3, isUnknown: false }, 224 | { text: '아뎌', pos: 'Noun', offset: 6, length: 2, isUnknown: false } 225 | ])); 226 | }); 227 | 228 | test('phrase extractor', () => { 229 | const text = '아름다운 트위터를 만들어 보자. 시발 #욕하지_말자'; 230 | return OpenKoreanTextProcessor.tokenize(text).then((tokens) => Promise.all([ 231 | OpenKoreanTextProcessor.extractPhrases(tokens, { filterSpam: true, includeHashtag: true }).then((phrases) => 232 | expect(phrases).to.eql([ 233 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 234 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 235 | { text: '#욕하지_말자', pos: 'Hashtag', offset: 21, length: 7 } 236 | ])), 237 | OpenKoreanTextProcessor.extractPhrases(tokens, { filterSpam: true, includeHashtag: false }).then((phrases) => 238 | expect(phrases).to.eql([ 239 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 240 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 241 | ])), 242 | OpenKoreanTextProcessor.extractPhrases(tokens, { filterSpam: false, includeHashtag: true }).then((phrases) => 243 | expect(phrases).to.eql([ 244 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 245 | { text: '시발', pos: 'Noun', offset: 18, length: 2 }, 246 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 }, 247 | { text: '#욕하지_말자', pos: 'Hashtag', offset: 21, length: 7 } 248 | ])), 249 | OpenKoreanTextProcessor.extractPhrases(tokens, { filterSpam: false, includeHashtag: false }).then((phrases) => 250 | expect(phrases).to.eql([ 251 | { text: '아름다운 트위터', pos: 'Noun', offset: 0, length: 8 }, 252 | { text: '시발', pos: 'Noun', offset: 18, length: 2 }, 253 | { text: '트위터', pos: 'Noun', offset: 5, length: 3 } 254 | ])) 255 | ])); 256 | }); 257 | 258 | 259 | test('phrase extractor 2', () => { 260 | const text = '시발 토토가의 인기폭발을 보니 미국에서 뉴키즈온더블럭 백스트릿보이스 조인트 컨서트'; 261 | return OpenKoreanTextProcessor.tokenize(text) 262 | .then((tokens) => OpenKoreanTextProcessor.extractPhrases(tokens)) 263 | .then((phrases) => expect(phrases).to.eql([ 264 | { text: '토토가', pos: 'Noun', offset: 3, length: 3 }, 265 | { text: '토토가의 인기폭발', pos: 'Noun', offset: 3, length: 9 }, 266 | { text: '미국', pos: 'Noun', offset: 17, length: 2 }, 267 | { text: '뉴키즈온더블럭', pos: 'Noun', offset: 22, length: 7 }, 268 | { text: '뉴키즈온더블럭 백스트릿보이스', pos: 'Noun', offset: 22, length: 15 }, 269 | { text: '뉴키즈온더블럭 백스트릿보이스 조인트', pos: 'Noun', offset: 22, length: 19 }, 270 | { text: '뉴키즈온더블럭 백스트릿보이스 조인트 컨서트', pos: 'Noun', offset: 22, length: 23 }, 271 | { text: '인기', pos: 'Noun', offset: 8, length: 2 }, 272 | { text: '폭발', pos: 'Noun', offset: 10, length: 2 }, 273 | { text: '백스트릿', pos: 'Noun', offset: 30, length: 4 }, 274 | { text: '보이스', pos: 'Noun', offset: 34, length: 3 }, 275 | { text: '조인트', pos: 'Noun', offset: 38, length: 3 }, 276 | { text: '컨서트', pos: 'Noun', offset: 42, length: 3 } 277 | ])); 278 | }); 279 | 280 | 281 | test('sentence splitter', () => { 282 | const text = '가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!'; 283 | return OpenKoreanTextProcessor.splitSentences(text).then((result) => 284 | expect(result).to.eql([ 285 | { text: '가을이다!', start: 0, end: 5 }, 286 | { text: '남자는 가을을 탄다......', start: 6, end: 22 }, 287 | { text: '그렇지?', start: 23, end: 27 }, 288 | { text: '루루야!', start: 28, end: 32 }, 289 | { text: '버버리코트 사러 가자!!!!', start: 33, end: 48 } 290 | ])); 291 | }); 292 | 293 | test('detokenize with word', () => { 294 | const words = ['늘', '평온', '하게', '누워', '있', '는', '루루']; 295 | return OpenKoreanTextProcessor.detokenize(words).then((result) => 296 | expect(result).to.eql('늘 평온하게 누워있는 루루')); 297 | }); 298 | 299 | test('detokenize with token object', () => { 300 | return OpenKoreanTextProcessor.tokenize('늘평온하게 누워있는루루') 301 | .then((token) => OpenKoreanTextProcessor.detokenize(token)) 302 | .then((result) => expect(result).to.eql('늘 평온하게 누워있는 루루')); 303 | }); 304 | }); 305 | }); 306 | -------------------------------------------------------------------------------- /test/tokens.spec.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const expect = require('chai').expect; 4 | const OpenKoreanTextProcessor = require('../').default; 5 | 6 | suite('IntermediaryTokens', () => { 7 | 8 | const shared = {}; 9 | 10 | beforeEach(() => OpenKoreanTextProcessor.ensureJvm().then(() => { 11 | shared.token = OpenKoreanTextProcessor.tokenizeSync('착한강아지상을 받은 루루'); 12 | })); 13 | 14 | afterEach((done) => { 15 | delete shared.token && done(); 16 | }) 17 | 18 | suite('sync', () => { 19 | test('extractPhrases', (done) => { 20 | expect(shared.token.extractPhrasesSync()).to.eql([ 21 | { text: '착한강아지상', pos: 'Noun', offset: 0, length: 6 }, 22 | { text: '루루', pos: 'Noun', offset: 11, length: 2 }, 23 | { text: '강아지상', pos: 'Noun', offset: 2, length: 4 } 24 | ]); 25 | done(); 26 | }); 27 | 28 | test('detokenize', (done) => { 29 | expect(shared.token.detokenizeSync()).to.eql('착한 강아지상을 받은 루루'); 30 | done(); 31 | }); 32 | }); 33 | 34 | suite('async', () => { 35 | test('extractPhrases', () => { 36 | return shared.token.extractPhrases().then(result => expect(result).to.eql([ 37 | { text: '착한강아지상', pos: 'Noun', offset: 0, length: 6 }, 38 | { text: '루루', pos: 'Noun', offset: 11, length: 2 }, 39 | { text: '강아지상', pos: 'Noun', offset: 2, length: 4 } 40 | ])); 41 | }); 42 | 43 | test('detokenize', () => { 44 | return shared.token.detokenize().then(result => expect(result).to.eql('착한 강아지상을 받은 루루')); 45 | }); 46 | }); 47 | 48 | suite('common', () => { 49 | test('toJSON', (done) => { 50 | expect(shared.token.toJSON()).to.eql([ 51 | { text: '착한', pos: 'Adjective', stem: '착하다', offset: 0, length: 2, isUnknown: false }, 52 | { text: '강아지', pos: 'Noun', offset: 2, length: 3, isUnknown: false }, 53 | { text: '상', pos: 'Suffix', offset: 5, length: 1, isUnknown: false }, 54 | { text: '을', pos: 'Josa', offset: 6, length: 1, isUnknown: false }, 55 | { text: ' ', pos: 'Space', offset: 7, length: 1, isUnknown: false }, 56 | { text: '받은', pos: 'Verb', stem: '받다', offset: 8, length: 2, isUnknown: false }, 57 | { text: ' ', pos: 'Space', offset: 10, length: 1, isUnknown: false }, 58 | { text: '루루', pos: 'Noun', offset: 11, length: 2, isUnknown: false } 59 | ]); 60 | done(); 61 | }); 62 | }) 63 | }); 64 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "target": "es6", 5 | "moduleResolution": "node", 6 | "pretty": false, 7 | "noImplicitAny": false, 8 | "declaration": true, 9 | "skipDefaultLibCheck": true, 10 | "skipLibCheck": true, 11 | "removeComments": true, 12 | "sourceMap": false, 13 | "outDir": "lib" 14 | }, 15 | "include": [ 16 | "src/**/*.ts" 17 | ] 18 | } -------------------------------------------------------------------------------- /wallaby.js: -------------------------------------------------------------------------------- 1 | module.exports = function (wallaby) { 2 | return { 3 | files: [ 4 | 'index.js', 5 | 'jar/**/*', 6 | 'lib/**/*.js', 7 | '!test/**/*.spec.js', 8 | ], 9 | 10 | tests: [ 11 | 'test/**/*.spec.js', 12 | ], 13 | 14 | compilers: { 15 | '**/*.ts': wallaby.compilers.typeScript(), 16 | }, 17 | 18 | env: { 19 | type: 'node' 20 | } 21 | }; 22 | }; 23 | --------------------------------------------------------------------------------