├── .babelrc ├── .eslintrc ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── author └── bench.es ├── build └── .gitkeep ├── index.es ├── package.json └── test └── test.js /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "es2015", 4 | "stage-3" 5 | ], 6 | "plugins": [ 7 | ["transform-class-properties", { "spec": true }] 8 | ], 9 | "env": { 10 | "development": { 11 | "presets": [ 12 | "power-assert" 13 | ] 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "extends": ["eslint:recommended"], 4 | "rules": { 5 | "semi": "error", 6 | "semi-spacing": "error", 7 | "arrow-body-style": ["error", "always"], 8 | "arrow-parens": "error", 9 | "arrow-spacing": "error", 10 | "generator-star-spacing": "error", 11 | "no-confusing-arrow": "error", 12 | "no-duplicate-imports": "error", 13 | "no-restricted-imports": "error", 14 | "no-useless-computed-key": "error", 15 | "no-useless-constructor": "error", 16 | "no-useless-rename": "error", 17 | "no-var": "error", 18 | "object-shorthand": "error", 19 | "prefer-arrow-callback": "error", 20 | "prefer-const": "error", 21 | "prefer-numeric-literals": "error", 22 | "prefer-rest-params": "error", 23 | "prefer-spread": "error", 24 | "prefer-template": "error", 25 | "rest-spread-spacing": "error", 26 | "sort-imports": "error", 27 | "symbol-description": "error", 28 | "template-curly-spacing": "error", 29 | "yield-star-spacing": "error", 30 | "no-constant-condition": ["error", {"checkLoops": false}] 31 | }, 32 | "parserOptions": { 33 | "sourceType": "module", 34 | "allowImportExportEverywhere": false 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /build/* 3 | !/build/.gitkeep 4 | /author/bench.js 5 | 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | script: npm run check 3 | node_js: 4 | - "6" 5 | - "6.1" 6 | sudo: false 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | https://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright (C) 2016 moznion 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | https://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | neologd-normalizer-js [![npm version](https://badge.fury.io/js/neologd-normalizer.svg)](https://badge.fury.io/js/neologd-normalizer) [![Build Status](https://travis-ci.org/moznion/neologd-normalizer-js.svg?branch=master)](https://travis-ci.org/moznion/neologd-normalizer-js) 2 | == 3 | 4 | A normalizer of neologd for JavaScript. 5 | 6 | Rule 7 | -- 8 | 9 | [https://github.com/neologd/mecab-ipadic-neologd/wiki/Regexp.ja](https://github.com/neologd/mecab-ipadic-neologd/wiki/Regexp.ja) 10 | 11 | Synopsis 12 | -- 13 | 14 | ```js 15 | import NeologdNormalizer from "neologd-normalizer"; 16 | 17 | NeologdNormalizer.normalize("   PRML  副 読 本   "); // => "PRML副読本" 18 | NeologdNormalizer.normalize("南アルプスの 天然水- Sparking* Lemon+ レモン一絞り"); // => 南アルプスの天然水-Sparking*Lemon+レモン一絞り 19 | ``` 20 | 21 | How to build 22 | -- 23 | 24 | ```bash 25 | $ npm run build 26 | ``` 27 | 28 | How to run test 29 | -- 30 | 31 | ```bash 32 | $ npm test 33 | ``` 34 | 35 | Benchmark 36 | -- 37 | 38 | ### Run benchmark scirpt 39 | 40 | ```bash 41 | $ npm run bench 42 | ``` 43 | 44 | ### Result 45 | 46 | Benchmark script is [here](/author/bench.es). 47 | 48 | ``` 49 | $ npm run bench 50 | Bench x 6,247 ops/sec ±1.03% (85 runs sampled) 51 | ``` 52 | 53 | (Node: v6.9.2, Machine: MacBook Pro Retina, 15-inch, Early 2013 2.7 GHz Intel Core i7) 54 | 55 | License 56 | -- 57 | 58 | Copyright 2016 moznion () 59 | 60 | Licensed under the Apache License, Version 2.0 (the "License"); 61 | you may not use this file except in compliance with the License. 62 | You may obtain a copy of the License at 63 | 64 | http://www.apache.org/licenses/LICENSE-2.0 65 | 66 | Unless required by applicable law or agreed to in writing, software 67 | distributed under the License is distributed on an "AS IS" BASIS, 68 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 69 | See the License for the specific language governing permissions and 70 | limitations under the License. 71 | 72 | -------------------------------------------------------------------------------- /author/bench.es: -------------------------------------------------------------------------------- 1 | import Benchmark from "benchmark"; 2 | import NeologdNormalizer from "../build/index.js"; 3 | 4 | let suite = new Benchmark.Suite; 5 | suite.add('Bench', () => { 6 | NeologdNormalizer.normalize("0123456789"); 7 | NeologdNormalizer.normalize("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 8 | NeologdNormalizer.normalize("abcdefghijklmnopqrstuvwxyz"); 9 | NeologdNormalizer.normalize("!”#$%&’()*+,-./:;<>?@[¥]^_`{|}"); 10 | NeologdNormalizer.normalize("=。、・「」"); 11 | NeologdNormalizer.normalize("ハンカク"); 12 | NeologdNormalizer.normalize("o₋o"); 13 | NeologdNormalizer.normalize("majika━_"); 14 | NeologdNormalizer.normalize("わ〰い"); 15 | NeologdNormalizer.normalize("スーパーーーー"); 16 | NeologdNormalizer.normalize("!#"); 17 | NeologdNormalizer.normalize("ゼンカク スペース"); 18 | NeologdNormalizer.normalize("お お"); 19 | NeologdNormalizer.normalize(" おお"); 20 | NeologdNormalizer.normalize("おお "); 21 | NeologdNormalizer.normalize("検索 エンジン 自作 入門 を 買い ました!!!"); 22 | NeologdNormalizer.normalize("アルゴリズム C"); 23 | NeologdNormalizer.normalize("   PRML  副 読 本   "); 24 | NeologdNormalizer.normalize("Coding the Matrix"); 25 | NeologdNormalizer.normalize("南アルプスの 天然水 Sparking Lemon レモン一絞り"); 26 | NeologdNormalizer.normalize("南アルプスの 天然水- Sparking* Lemon+ レモン一絞り"); 27 | }) 28 | .on('cycle', function(event) { 29 | console.log(String(event.target)); 30 | }) 31 | .run(); 32 | 33 | -------------------------------------------------------------------------------- /build/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moznion/neologd-normalizer-js/c9c7c654b9e65b83c987d0e9c048033ffefb8334/build/.gitkeep -------------------------------------------------------------------------------- /index.es: -------------------------------------------------------------------------------- 1 | // ref: https://github.com/neologd/mecab-ipadic-neologd/wiki/Regexp 2 | export default class NeologdNormalizer { 3 | static _cjkUnifiedIdeographs = '\u{4E00}-\u{9FFF}'; 4 | static _cjkSymbolsAndPunctuation = '\u{3001}-\u{303F}'; // exclude 'IDEOGRAPHIC SPACE' (U+3000) 5 | static _halfwidthAndFullwidthForms = '\u{FF00}-\u{FFEF}'; 6 | static _basicLatin = '\u{0000}-\u{001F}\u{0021}-\u{007F}'; // exclude 'SPACE' (U+0020) 7 | static _hiragana = '\u{3040}-\u{309F}'; 8 | static _zenkakuKatakana = '\u{30A0}-\u{30FF}'; 9 | static _multiByte = `${NeologdNormalizer._cjkUnifiedIdeographs}${NeologdNormalizer._hiragana}${NeologdNormalizer._zenkakuKatakana}${NeologdNormalizer._cjkSymbolsAndPunctuation}${NeologdNormalizer._halfwidthAndFullwidthForms}`; 10 | 11 | static _spacesBetweenRe = new RegExp(`([${NeologdNormalizer._multiByte}${NeologdNormalizer._basicLatin}]+)[ ]+([${NeologdNormalizer._multiByte}]+)[ ]*|([${NeologdNormalizer._multiByte}]+)[ ]+([${NeologdNormalizer._basicLatin}]+)`, 'gu'); 12 | 13 | static normalize(str = '') { 14 | if (str === '') { 15 | return str; 16 | } 17 | 18 | let norm = this._convertEisuuZenkakuToHankaku(str); 19 | norm = this._convertKatakanaHankakuToZenkaku(norm); 20 | norm = norm.replace(/[\u{3000} ]+/gu, ' ') 21 | .replace(/[\u{02D7}\u{058A}\u{2010}\u{2011}\u{2012}\u{2013}\u{2043}\u{207B}\u{208B}\u{2212}]/gu, '-') 22 | .replace(/[\u{2014}\u{2015}\u{2500}\u{2501}\u{30fc}\u{FE63}\u{FF0D}\u{FF70}]+/gu, 'ー') 23 | .replace(/[~∼∾〜〰~]/gu, ''); 24 | 25 | norm = this._convertSpecialCharToZenkaku(norm); 26 | 27 | norm = norm.replace(/^[ ]?(.+?)[ ]?$/gu, '$1') 28 | .replace(this._spacesBetweenRe, (_, $1, $2, $3, $4) => { 29 | if ($1 !== undefined && $2 !== undefined) { 30 | return `${$1}${$2}`; 31 | } 32 | 33 | if ($3 !== undefined && $4 !== undefined) { 34 | return `${$3}${$4}`; 35 | } 36 | }); 37 | 38 | norm = this._convertSpecialCharToHankaku(norm); 39 | 40 | return norm; 41 | } 42 | 43 | static _specialCharacterHankakuToZenkakuMap = { 44 | '!': '!', 45 | '"': '”', 46 | '#': '#', 47 | '$': '$', 48 | '%': '%', 49 | '&': '&', 50 | '\'': '’', 51 | '(': '(', 52 | ')': ')', 53 | '*': '*', 54 | '+': '+', 55 | ',': ',', 56 | '-': '-', 57 | '.': '.', 58 | '/': '/', 59 | ':': ':', 60 | ';': ';', 61 | '<': '<', 62 | '=': '=', 63 | '>': '>', 64 | '?': '?', 65 | '@': '@', 66 | '[': '[', 67 | '¥': '¥', 68 | ']': ']', 69 | '^': '^', 70 | '_': '_', 71 | '`': '`', 72 | '{': '{', 73 | '|': '|', 74 | '}': '}', 75 | '~': '〜', 76 | '。': '。', 77 | '、': '、', 78 | '・': '・', 79 | '「': '「', 80 | '」': '」' 81 | } 82 | static _convertSpecialCharToZenkaku(str) { 83 | return str.replace( 84 | /[!"#$%&'()*+,-./:;<=>?@[¥\]^_`{|}~。、・「」]/g, 85 | (c) => { 86 | return this._specialCharacterHankakuToZenkakuMap[c]; 87 | } 88 | ); 89 | } 90 | 91 | static _specialCharacterZenkakuToHankakuMap = { 92 | '!': '!', 93 | '”': '"', 94 | '#': '#', 95 | '$': '$', 96 | '%': '%', 97 | '&': '&', 98 | '’': '\'', 99 | '(': '(', 100 | ')': ')', 101 | '*': '*', 102 | '+': '+', 103 | ',': ',', 104 | '-': '-', 105 | '.': '.', 106 | '/': '/', 107 | ':': ':', 108 | ';': ';', 109 | '<': '<', 110 | '=': '=', 111 | '>': '>', 112 | '?': '?', 113 | '@': '@', 114 | '[': '[', 115 | '¥': '¥', 116 | ']': ']', 117 | '^': '^', 118 | '_': '_', 119 | '`': '`', 120 | '{': '{', 121 | '|': '|', 122 | '}': '}', 123 | '〜': '~', 124 | '。': '。', 125 | '、': '、', 126 | '・': '・', 127 | '「': '「', 128 | '」': '」' 129 | } 130 | static _convertSpecialCharToHankaku(str) { 131 | return str.replace( 132 | /[!”#$%&’()*+,-./:;<=>?@[¥]^_`{|}〜。、・「」]/gu, 133 | (c) => { 134 | return this._specialCharacterZenkakuToHankakuMap[c]; 135 | } 136 | ); 137 | } 138 | 139 | static _eisuuZenkakuToHankakuMap = { 140 | '0': '0', 141 | '1': '1', 142 | '2': '2', 143 | '3': '3', 144 | '4': '4', 145 | '5': '5', 146 | '6': '6', 147 | '7': '7', 148 | '8': '8', 149 | '9': '9', 150 | 'A': 'A', 151 | 'B': 'B', 152 | 'C': 'C', 153 | 'D': 'D', 154 | 'E': 'E', 155 | 'F': 'F', 156 | 'G': 'G', 157 | 'H': 'H', 158 | 'I': 'I', 159 | 'J': 'J', 160 | 'K': 'K', 161 | 'L': 'L', 162 | 'M': 'M', 163 | 'N': 'N', 164 | 'O': 'O', 165 | 'P': 'P', 166 | 'Q': 'Q', 167 | 'R': 'R', 168 | 'S': 'S', 169 | 'T': 'T', 170 | 'U': 'U', 171 | 'V': 'V', 172 | 'W': 'W', 173 | 'X': 'X', 174 | 'Y': 'Y', 175 | 'Z': 'Z', 176 | 'a': 'a', 177 | 'b': 'b', 178 | 'c': 'c', 179 | 'd': 'd', 180 | 'e': 'e', 181 | 'f': 'f', 182 | 'g': 'g', 183 | 'h': 'h', 184 | 'i': 'i', 185 | 'j': 'j', 186 | 'k': 'k', 187 | 'l': 'l', 188 | 'm': 'm', 189 | 'n': 'n', 190 | 'o': 'o', 191 | 'p': 'p', 192 | 'q': 'q', 193 | 'r': 'r', 194 | 's': 's', 195 | 't': 't', 196 | 'u': 'u', 197 | 'v': 'v', 198 | 'w': 'w', 199 | 'x': 'x', 200 | 'y': 'y', 201 | 'z': 'z' 202 | } 203 | static _convertEisuuZenkakuToHankaku(str) { 204 | return str.replace( 205 | /[0-9A-Za-z]/gu, 206 | (c) => { 207 | return this._eisuuZenkakuToHankakuMap[c]; 208 | } 209 | ); 210 | } 211 | 212 | static _katakanaHankakuToZenkakuMap = { 213 | 'ア': 'ア', 214 | 'イ': 'イ', 215 | 'ウ': 'ウ', 216 | 'エ': 'エ', 217 | 'オ': 'オ', 218 | 'カ': 'カ', 219 | 'キ': 'キ', 220 | 'ク': 'ク', 221 | 'ケ': 'ケ', 222 | 'コ': 'コ', 223 | 'サ': 'サ', 224 | 'シ': 'シ', 225 | 'ス': 'ス', 226 | 'セ': 'セ', 227 | 'ソ': 'ソ', 228 | 'タ': 'タ', 229 | 'チ': 'チ', 230 | 'ツ': 'ツ', 231 | 'テ': 'テ', 232 | 'ト': 'ト', 233 | 'ナ': 'ナ', 234 | 'ニ': 'ニ', 235 | 'ヌ': 'ヌ', 236 | 'ネ': 'ネ', 237 | 'ノ': 'ノ', 238 | 'ハ': 'ハ', 239 | 'ヒ': 'ヒ', 240 | 'フ': 'フ', 241 | 'ヘ': 'ヘ', 242 | 'ホ': 'ホ', 243 | 'マ': 'マ', 244 | 'ミ': 'ミ', 245 | 'ム': 'ム', 246 | 'メ': 'メ', 247 | 'モ': 'モ', 248 | 'ヤ': 'ヤ', 249 | 'ユ': 'ユ', 250 | 'ヨ': 'ヨ', 251 | 'ラ': 'ラ', 252 | 'リ': 'リ', 253 | 'ル': 'ル', 254 | 'レ': 'レ', 255 | 'ロ': 'ロ', 256 | 'ワ': 'ワ', 257 | 'ヲ': 'ヲ', 258 | 'ン': 'ン', 259 | 'ヴ': 'ヴ', 260 | 'ガ': 'ガ', 261 | 'ギ': 'ギ', 262 | 'グ': 'グ', 263 | 'ゲ': 'ゲ', 264 | 'ゴ': 'ゴ', 265 | 'ザ': 'ザ', 266 | 'ジ': 'ジ', 267 | 'ズ': 'ズ', 268 | 'ゼ': 'ゼ', 269 | 'ゾ': 'ゾ', 270 | 'ダ': 'ダ', 271 | 'ヂ': 'ヂ', 272 | 'ヅ': 'ヅ', 273 | 'デ': 'デ', 274 | 'ド': 'ド', 275 | 'バ': 'バ', 276 | 'ビ': 'ビ', 277 | 'ブ': 'ブ', 278 | 'ベ': 'ベ', 279 | 'ボ': 'ボ', 280 | 'パ': 'パ', 281 | 'ピ': 'ピ', 282 | 'プ': 'プ', 283 | 'ペ': 'ペ', 284 | 'ポ': 'ポ', 285 | 'ァ': 'ァ', 286 | 'ィ': 'ィ', 287 | 'ゥ': 'ゥ', 288 | 'ェ': 'ェ', 289 | 'ォ': 'ォ', 290 | 'ャ': 'ャ', 291 | 'ュ': 'ュ', 292 | 'ョ': 'ョ', 293 | 'ッ': 'ッ' 294 | } 295 | static _convertKatakanaHankakuToZenkaku(str) { 296 | return str.replace( 297 | /[\u{FF66}-\u{FF6F}\u{FF71}-\u{FF9D}]\u{FF9E}?/gu, 298 | (c) => { 299 | return this._katakanaHankakuToZenkakuMap[c]; 300 | } 301 | ); 302 | } 303 | } 304 | 305 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neologd-normalizer", 3 | "version": "0.0.3", 4 | "description": "neologd normalizing script for JavaScript", 5 | "main": "build/index.js", 6 | "scripts": { 7 | "test": "mocha --compilers js:babel-register", 8 | "lint": "eslint ./index.es", 9 | "check": "npm test && npm run lint", 10 | "clean": "rm -f build/*", 11 | "build": "npm run clean && babel index.es -o build/index.js", 12 | "prepublish": "npm run build", 13 | "bench": "npm run build && babel ./author/bench.es -o ./author/bench.js && node ./author/bench.js" 14 | }, 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/moznion/neologd-normalizer-js.git" 18 | }, 19 | "keywords": [ 20 | "regexp", 21 | "neologd" 22 | ], 23 | "devDependencies": { 24 | "babel-cli": "^6.18.0", 25 | "babel-eslint": "^6.1.2", 26 | "babel-plugin-transform-class-properties": "^6.19.0", 27 | "babel-preset-es2015": "^6.18.0", 28 | "babel-preset-power-assert": "^1.0.0", 29 | "babel-preset-stage-3": "^6.17.0", 30 | "babel-register": "^6.18.0", 31 | "benchmark": "^2.1.2", 32 | "eslint": "^3.12.0", 33 | "mocha": "^3.2.0", 34 | "power-assert": "^1.4.2" 35 | }, 36 | "author": "moznion", 37 | "license": "Apache-2.0", 38 | "bugs": { 39 | "url": "https://github.com/moznion/neologd-normalizer-js/issues" 40 | }, 41 | "homepage": "https://github.com/moznion/neologd-normalizer-js" 42 | } 43 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | import assert from "assert"; 2 | import NeologdNormalizer from "../index"; 3 | 4 | describe("NeologdNormalize", function () { 5 | it("Should return correct normalized string", function () { 6 | assert.equal(NeologdNormalizer.normalize("0123456789"), "0123456789"); 7 | assert.equal(NeologdNormalizer.normalize("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 8 | assert.equal(NeologdNormalizer.normalize("abcdefghijklmnopqrstuvwxyz"), "abcdefghijklmnopqrstuvwxyz"); 9 | assert.equal(NeologdNormalizer.normalize("!”#$%&’()*+,⁃./:;<>?@[¥]^_`{|}"), "!\"#$%&'()*+,-./:;<>?@[¥]^_`{|}"); 10 | assert.equal(NeologdNormalizer.normalize("=。、・「」"), "=。、・「」"); 11 | assert.equal(NeologdNormalizer.normalize("ハンカクゼンカク"), "ハンカクゼンカク"); 12 | assert.equal(NeologdNormalizer.normalize("o₋o"), "o-o"); 13 | assert.equal(NeologdNormalizer.normalize("majika━"), "majikaー"); 14 | assert.equal(NeologdNormalizer.normalize("わ〰い"), "わい"); 15 | assert.equal(NeologdNormalizer.normalize("スーパーーーー"), "スーパー"); 16 | assert.equal(NeologdNormalizer.normalize("!#"), "!#"); 17 | assert.equal(NeologdNormalizer.normalize("ゼンカク スペース"), "ゼンカクスペース"); 18 | assert.equal(NeologdNormalizer.normalize("お お"), "おお"); 19 | assert.equal(NeologdNormalizer.normalize(" おお"), "おお"); 20 | assert.equal(NeologdNormalizer.normalize("おお "), "おお"); 21 | assert.equal(NeologdNormalizer.normalize("検索 エンジン 自作 入門 を 買い ました!!!"), "検索エンジン自作入門を買いました!!!"); 22 | assert.equal(NeologdNormalizer.normalize("アルゴリズム C"), "アルゴリズムC"); 23 | assert.equal(NeologdNormalizer.normalize("   PRML  副 読 本   "), "PRML副読本"); 24 | assert.equal(NeologdNormalizer.normalize("Coding the Matrix"), "Coding the Matrix"); 25 | assert.equal(NeologdNormalizer.normalize("南アルプスの 天然水 Sparking Lemon レモン一絞り"), "南アルプスの天然水Sparking Lemonレモン一絞り"); 26 | assert.equal(NeologdNormalizer.normalize("南アルプスの 天然水- Sparking* Lemon+ レモン一絞り"), "南アルプスの天然水-Sparking*Lemon+レモン一絞り"); 27 | assert.equal(NeologdNormalizer.normalize("南アルプスの 天然水 Sparking Lem on レモン一絞り"), "南アルプスの天然水Sparking Lem onレモン一絞り"); 28 | assert.equal(NeologdNormalizer.normalize("アルゴリズム C plus plus"), "アルゴリズムC plus plus"); 29 | }); 30 | }); 31 | --------------------------------------------------------------------------------