├── .eslintignore ├── versions.json ├── img ├── on.gif └── off.gif ├── src ├── cm5 │ ├── cm.d.ts │ └── index.ts ├── cm6 │ ├── range-for-click.ts │ ├── get-seg.ts │ ├── from-src.ts │ └── index.ts ├── chsp-main.ts ├── get-chs-seg.ts └── tiny_segmenter-0.2.js ├── .gitignore ├── manifest.json ├── .prettierrc ├── tsconfig.json ├── LICENSE ├── rollup.config.js ├── .eslintrc.js ├── README.md ├── NOTICE.md └── package.json /.eslintignore: -------------------------------------------------------------------------------- 1 | main.js -------------------------------------------------------------------------------- /versions.json: -------------------------------------------------------------------------------- 1 | { 2 | "1.0.0": "0.13.8" 3 | } 4 | -------------------------------------------------------------------------------- /img/on.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sonarAIT/cm-japanese-patch/HEAD/img/on.gif -------------------------------------------------------------------------------- /img/off.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sonarAIT/cm-japanese-patch/HEAD/img/off.gif -------------------------------------------------------------------------------- /src/cm5/cm.d.ts: -------------------------------------------------------------------------------- 1 | import CodeMirror from "codemirror"; 2 | 3 | declare global { 4 | interface Window { 5 | CodeMirror?: typeof CodeMirror; 6 | } 7 | } 8 | 9 | export {}; 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Intellij 2 | *.iml 3 | .idea 4 | 5 | # npm 6 | node_modules 7 | package-lock.json 8 | yarn.lock 9 | yarn-error.log 10 | 11 | # build 12 | main.js 13 | *.js.map 14 | styles.css 15 | /build/* 16 | !build/.hotreload 17 | 18 | # saved data 19 | data.json 20 | 21 | # macOS 22 | .DS_Store -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "japanese-word-splitter", 3 | "name": "Word Splitting for Japanese in Edit Mode", 4 | "version": "1.0.0", 5 | "minAppVersion": "0.13.8", 6 | "description": "A patch for Obsidian's built-in CodeMirror Editor to support Japanese word splitting", 7 | "author": "sonarAIT", 8 | "authorUrl": "https://github.com/sonarAIT", 9 | "isDesktopOnly": true 10 | } 11 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": false, 3 | "tabWidth": 2, 4 | "trailingComma": "all", 5 | "overrides": [ 6 | { 7 | "files": ".prettierrc", 8 | "options": { 9 | "parser": "json" 10 | } 11 | }, 12 | { 13 | "files": "*.yml", 14 | "options": { 15 | "tabWidth": 2, 16 | "singleQuote": false 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": "src", 4 | "outDir": "build", 5 | "inlineSourceMap": true, 6 | "inlineSources": true, 7 | "module": "ESNext", 8 | "target": "es6", 9 | "allowJs": true, 10 | "noImplicitAny": true, 11 | "moduleResolution": "node", 12 | "importHelpers": true, 13 | "strict": true, 14 | "allowSyntheticDefaultImports": true, 15 | "lib": ["dom", "es5", "scripthost", "es2015"] 16 | }, 17 | "include": ["src/**/*.ts", "node_modules/cm6-view-src/src/*.ts"], 18 | } 19 | -------------------------------------------------------------------------------- /src/cm6/range-for-click.ts: -------------------------------------------------------------------------------- 1 | import { SelectionRange } from "@codemirror/state"; 2 | import { EditorView } from "@codemirror/view"; 3 | import type { TinySegmenter } from "tiny_segmenter-0.2"; 4 | 5 | import { groupAt } from "./from-src"; 6 | import cm6GetChsSeg from "./get-seg"; 7 | 8 | /** only accept double click */ 9 | const rangeForClick = ( 10 | view: EditorView, 11 | pos: number, 12 | bias: -1 | 1, 13 | type: number, 14 | seg: TinySegmenter, 15 | ): SelectionRange => 16 | cm6GetChsSeg(pos, groupAt(view.state, pos, bias), view.state, seg); 17 | 18 | export default rangeForClick; 19 | -------------------------------------------------------------------------------- /src/chsp-main.ts: -------------------------------------------------------------------------------- 1 | import { Plugin } from "obsidian"; 2 | import { TinySegmenter } from "tiny_segmenter-0.2"; 3 | 4 | import patchGetWordAt from "./cm5"; 5 | import { getChsPatchExtension, getWordAtPatchUnloader } from "./cm6/index"; 6 | 7 | export default class CMJapanesePatch extends Plugin { 8 | async onload() { 9 | console.log("loading japanese-word-splitter"); 10 | let tinySegmenter: TinySegmenter = new TinySegmenter(); 11 | 12 | // for cm5 13 | const cm5PatchUnloader = patchGetWordAt(tinySegmenter); 14 | cm5PatchUnloader && this.register(cm5PatchUnloader); 15 | 16 | // for cm6 17 | this.registerEditorExtension(getChsPatchExtension(tinySegmenter)); 18 | this.register(getWordAtPatchUnloader(tinySegmenter)); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2021-present sonarAIT 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /src/cm6/get-seg.ts: -------------------------------------------------------------------------------- 1 | import { 2 | EditorSelection, 3 | EditorState, 4 | SelectionRange, 5 | } from "@codemirror/state"; 6 | import type { TinySegmenter } from "tiny_segmenter-0.2"; 7 | 8 | import { getChsSegFromRange } from "../get-chs-seg"; 9 | 10 | export default function cm6GetChsSeg( 11 | pos: number, 12 | srcRange: SelectionRange, 13 | state: EditorState, 14 | seg: TinySegmenter, 15 | ): SelectionRange; 16 | export default function cm6GetChsSeg( 17 | pos: number, 18 | srcRange: SelectionRange | null, 19 | state: EditorState, 20 | seg: TinySegmenter, 21 | ): SelectionRange | null; 22 | // eslint-disable-next-line prefer-arrow/prefer-arrow-functions 23 | export default function cm6GetChsSeg( 24 | pos: number, 25 | srcRange: SelectionRange | null, 26 | state: EditorState, 27 | seg: TinySegmenter, 28 | ): SelectionRange | null { 29 | if (!srcRange) return null; 30 | const { from, to } = srcRange, 31 | text = state.doc.sliceString(from, to); 32 | 33 | const chsSegResult = getChsSegFromRange(pos, { from, to, text }, seg); 34 | if (chsSegResult) { 35 | return EditorSelection.range(chsSegResult.from, chsSegResult.to); 36 | } else { 37 | return srcRange; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/get-chs-seg.ts: -------------------------------------------------------------------------------- 1 | import {TinySegmenter} from "tiny_segmenter-0.2" 2 | 3 | const RANGE_LIMIT = 6; 4 | 5 | export const getChsSegFromRange = ( 6 | cursor: number, 7 | range: { from: number; to: number; text: string }, 8 | seg: TinySegmenter, 9 | ) => { 10 | let { from, to, text } = range; 11 | if (!/[\u4e00-\u9fa5]/.test(text)) { 12 | return null; 13 | } else { 14 | // trim long text 15 | if (cursor - from > RANGE_LIMIT) { 16 | const newFrom = cursor - RANGE_LIMIT; 17 | text = text.slice(newFrom - from); 18 | from = newFrom; 19 | } 20 | if (to - cursor > RANGE_LIMIT) { 21 | const newTo = cursor + RANGE_LIMIT; 22 | text = text.slice(0, newTo - to); 23 | to = newTo; 24 | } 25 | const segResult = seg.segment(text); 26 | let chunkStart = 0; 27 | let chunkEnd = 0; 28 | const relativePos = cursor - from; 29 | 30 | for (const seg of segResult) { 31 | chunkEnd = chunkStart + seg.length; 32 | if (relativePos >= chunkStart && relativePos < chunkEnd) { 33 | break; 34 | } 35 | chunkStart += seg.length; 36 | } 37 | to = chunkEnd + from; 38 | from += chunkStart; 39 | return { from, to }; 40 | } 41 | }; 42 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import commonjs from "@rollup/plugin-commonjs"; 2 | import { nodeResolve } from "@rollup/plugin-node-resolve"; 3 | import typescript from "@rollup/plugin-typescript"; 4 | import copy from "rollup-plugin-copy"; 5 | 6 | const isProd = process.env.BUILD === "production"; 7 | 8 | const banner = `/* 9 | THIS IS A GENERATED/BUNDLED FILE BY ROLLUP 10 | if you want to view the source visit the plugins github repository 11 | */ 12 | `; 13 | 14 | export default { 15 | input: "src/chsp-main.ts", 16 | output: { 17 | file: "build/main.js", 18 | sourcemap: "inline", 19 | sourcemapExcludeSources: isProd, 20 | format: "cjs", 21 | exports: "default", 22 | banner, 23 | }, 24 | output: { 25 | file: "../main.js", 26 | sourcemap: "inline", 27 | sourcemapExcludeSources: isProd, 28 | format: "cjs", 29 | exports: "default", 30 | banner, 31 | }, 32 | external: ["obsidian", "@codemirror/state", "@codemirror/view"], 33 | plugins: [ 34 | typescript(), 35 | nodeResolve({ browser: true }), 36 | commonjs(), 37 | copy({ 38 | targets: [ 39 | { src: "manifest.json", dest: "build" }, 40 | { src: "manifest.json", dest: ".." }, 41 | ], 42 | }), 43 | ], 44 | }; 45 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | parser: "@typescript-eslint/parser", 4 | parserOptions: { 5 | ecmaVersion: 2020, 6 | sourceType: "module", 7 | }, 8 | extends: [ 9 | "prettier", 10 | "plugin:prettier/recommended", 11 | "plugin:import/typescript", 12 | ], 13 | env: { 14 | browser: true, 15 | node: true, 16 | }, 17 | plugins: [ 18 | "@typescript-eslint", 19 | "jsdoc", 20 | "prefer-arrow", 21 | "simple-import-sort", 22 | "import", 23 | ], 24 | settings: { 25 | "import/parsers": { 26 | "@typescript-eslint/parser": [".ts", ".tsx"], 27 | }, 28 | "import/resolver": { 29 | typescript: { 30 | alwaysTryTypes: true, // always try to resolve types under `@types` directory even it doesn't contain any source code, like `@types/unist` 31 | project: "tsconfig.json", 32 | }, 33 | }, 34 | }, 35 | rules: { 36 | "simple-import-sort/imports": "error", 37 | "simple-import-sort/exports": "error", 38 | "import/no-unresolved": "error", 39 | "prefer-arrow/prefer-arrow-functions": [ 40 | "warn", 41 | { 42 | disallowPrototype: true, 43 | singleReturnOnly: false, 44 | classPropertiesAllowed: false, 45 | }, 46 | ], 47 | }, 48 | }; 49 | -------------------------------------------------------------------------------- /src/cm5/index.ts: -------------------------------------------------------------------------------- 1 | import type CodeMirror from "codemirror"; 2 | import { around } from "monkey-around"; 3 | import type { TinySegmenter } from "tiny_segmenter-0.2"; 4 | import { getChsSegFromRange } from "../get-chs-seg"; 5 | 6 | const patchGetWordAt = (seg: TinySegmenter) => { 7 | if (!window.CodeMirror?.prototype) return null; 8 | return around(window.CodeMirror.prototype as CodeMirror.Editor, { 9 | findWordAt: (next) => 10 | // eslint-disable-next-line prefer-arrow/prefer-arrow-functions 11 | function ( 12 | this: CodeMirror.Editor, 13 | pos: CodeMirror.Position, 14 | ): CodeMirror.Range { 15 | let srcRange = next.call(this, pos); 16 | 17 | const cursor = this.indexFromPos(pos), 18 | fromPos = srcRange.from(), 19 | from = this.indexFromPos(fromPos), 20 | toPos = srcRange.to(), 21 | to = this.indexFromPos(toPos), 22 | text = this.getRange(fromPos, toPos); 23 | 24 | const chsSegResult = getChsSegFromRange( 25 | cursor, 26 | { from, to, text }, 27 | seg, 28 | ); 29 | 30 | if (chsSegResult) { 31 | const { from, to } = chsSegResult; 32 | srcRange.anchor = this.posFromIndex(from); 33 | srcRange.head = this.posFromIndex(to); 34 | } 35 | return srcRange; 36 | }, 37 | }); 38 | }; 39 | export default patchGetWordAt; 40 | -------------------------------------------------------------------------------- /src/cm6/from-src.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable prefer-arrow/prefer-arrow-functions */ 2 | import type { EditorView } from "@codemirror/view"; 3 | import { LineView } from "cm6-view-src/src/blockview"; 4 | import { Rect } from "cm6-view-src/src/dom"; 5 | 6 | // From https://github.com/codemirror/view/blob/0.19.30/src/input.ts 7 | 8 | export { groupAt } from "cm6-view-src/src/cursor"; 9 | export { LineView }; 10 | 11 | let insideY = (y: number, rect: Rect) => y >= rect.top && y <= rect.bottom; 12 | let inside = (x: number, y: number, rect: Rect) => 13 | insideY(y, rect) && x >= rect.left && x <= rect.right; 14 | 15 | // Try to determine, for the given coordinates, associated with the 16 | // given position, whether they are related to the element before or 17 | // the element after the position. 18 | function findPositionSide(view: EditorView, pos: number, x: number, y: number) { 19 | let line = LineView.find((view as any).docView, pos); 20 | if (!line) return 1; 21 | let off = pos - line.posAtStart; 22 | // Line boundaries point into the line 23 | if (off == 0) return 1; 24 | if (off == line.length) return -1; 25 | 26 | // Positions on top of an element point at that element 27 | let before = line.coordsAt(off, -1); 28 | if (before && inside(x, y, before)) return -1; 29 | let after = line.coordsAt(off, 1); 30 | if (after && inside(x, y, after)) return 1; 31 | // This is probably a line wrap point. Pick before if the point is 32 | // beside it. 33 | return before && insideY(y, before) ? -1 : 1; 34 | } 35 | 36 | export function queryPos( 37 | view: EditorView, 38 | event: MouseEvent, 39 | ): { pos: number; bias: 1 | -1 } | null { 40 | let pos = view.posAtCoords({ x: event.clientX, y: event.clientY }, false); 41 | return { 42 | pos, 43 | bias: findPositionSide(view, pos, event.clientX, event.clientY), 44 | }; 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Word Splitting for Japanese in Edit Mode 2 | 3 | A patch for Obsidian's built-in CodeMirror Editor to support Japanese word splitting 4 | 5 | Obsidian内蔵のCodeMirrorエディタが日本語の単語分割をサポートするためのパッチ. 6 | 7 | This plugin is based on [the chs word splitting module](https://github.com/linonetwo/segmentit) by [@linonetwo](https://github.com/linonetwo) and modified to Japanese version. 8 | For Japanese word segmentation, we used [tiny-segmenter](http://chasen.org/~taku/software/TinySegmenter/) created by [Taku Kudo](https://github.com/taku910). 9 | Special Thanks to [@linonetwo](https://github.com/linonetwo) and [Taku Kudo](https://github.com/taku910)! 10 | 11 | ## Demo 12 | 13 | | Obsidian's Default Word Splitting
パッチを適用していない場合 | Patched
パッチを適用した場合 | 14 | | ------------------ | ----------- | 15 | | ![ob-default-splitting](https://github.com/sonarAIT/cm-japanese-patch/blob/main/img/off.gif)|![ob-patched-splitting](https://github.com/sonarAIT/cm-japanese-patch/blob/main/img/on.gif)| 16 | 17 | ## Compatibility 18 | 19 | The required API feature is only available for Obsidian v0.13.8+. 20 | 必要なAPI機能は,Obsidian v0.13.8+でのみ利用可能です. 21 | 22 | ## Installation インストール方法 23 | 24 | ### From Obsidian 25 | 26 | 1. Open `Settings` > `Community plugin` 27 | 2. Make sure Safe mode is **off** 28 | 3. Click `Browse` in `Community plugins` 29 | 4. Search for this plugin 30 | 5. Click `Install` 31 | 6. Once installed, click `Enable` and the patch is ready to use. 32 | 33 | *** 34 | 35 | 1. `設定`から`コミュニティプラグイン`を開きます. 36 | 2. セーフモードが**オフ**であることを確認します. 37 | 3. `コミュニティプラグイン`の`閲覧`をクリックします. 38 | 4. このプラグインを検索します. 39 | 5. `インストール`をクリック 40 | 6. インストールが完了したら,`有効化`を押すことで,パッチは使用可能な状態になります. 41 | 42 | ### From GitHub 43 | 44 | 1. Download the Latest Release from the Releases section of the GitHub Repository 45 | 2. Put files to your vault's plugins folder: `/.obsidian/plugins/japanese-word-splitter` 46 | 3. Reload Obsidian 47 | 4. If prompted about Safe Mode, you can disable safe mode and enable the plugin. 48 | Otherwise head to Settings, third-party plugins, make sure safe mode is off and 49 | enable the plugin from there. 50 | 51 | > Note: The `.obsidian` folder may be hidden. On macOS you should be able to press `Command+Shift+Dot` to show the folder in Finder. 52 | 53 | *** 54 | 55 | 1. GitHubリポジトリのReleasesからLatest Releaseをダウンロードします. 56 | 2. vaultのpluginsフォルダにファイルを配置します.`/.obsidian/plugins/japanese-word-splitter`にファイルを置きます. 57 | 3. Obsidianをリロードします. 58 | 4. セーフモードについてプロンプトが表示されたら,セーフモードを無効にしてプラグインを有効にすることができます.それ以外の場合は,`設定`から`コミュニティプラグイン`に移動し,セーフモードがオフになっていることを確認し,そこからプラグインを有効にしてください. 59 | 60 | > 注意: `.obsidian` フォルダは隠されている可能性があります.macOSでは,`Command+Shift+Dot`を押すと,Finderでフォルダを表示できるはずです. 61 | -------------------------------------------------------------------------------- /src/cm6/index.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable prefer-arrow/prefer-arrow-functions */ 2 | import { EditorSelection, EditorState } from "@codemirror/state"; 3 | import { EditorView, MouseSelectionStyle } from "@codemirror/view"; 4 | import { around } from "monkey-around"; 5 | import type { TinySegmenter } from "tiny_segmenter-0.2"; 6 | 7 | import { queryPos } from "./from-src"; 8 | import cm6GetChsSeg from "./get-seg"; 9 | import rangeForClick from "./range-for-click"; 10 | 11 | export const getChsPatchExtension = (seg: TinySegmenter) => { 12 | const dblClickPatch = EditorView.mouseSelectionStyle.of((view, event) => { 13 | // Only handle double clicks 14 | if (event.button !== 0 || event.detail !== 2) return null; 15 | 16 | // From https://github.com/codemirror/view/blob/0.19.30/src/input.ts#L464-L495 17 | let start = queryPos(view, event), 18 | type = event.detail; // not targeting ie, no need for polyfill 19 | let startSel = view.state.selection; 20 | let last = start, 21 | lastEvent: MouseEvent | null = event; 22 | return { 23 | update(update) { 24 | if (update.docChanged) { 25 | if (start) start.pos = update.changes.mapPos(start.pos); 26 | startSel = startSel.map(update.changes); 27 | lastEvent = null; 28 | } 29 | }, 30 | get(event, extend, multiple) { 31 | let cur; 32 | if ( 33 | lastEvent && 34 | event.clientX == lastEvent.clientX && 35 | event.clientY == lastEvent.clientY 36 | ) 37 | cur = last; 38 | else { 39 | cur = last = queryPos(view, event); 40 | lastEvent = event; 41 | } 42 | if (!cur || !start) return startSel; 43 | let range = rangeForClick(view, cur.pos, cur.bias, type, seg); 44 | if (start.pos != cur.pos && !extend) { 45 | let startRange = rangeForClick( 46 | view, 47 | start.pos, 48 | start.bias, 49 | type, 50 | seg, 51 | ); 52 | let from = Math.min(startRange.from, range.from), 53 | to = Math.max(startRange.to, range.to); 54 | range = 55 | from < range.from 56 | ? EditorSelection.range(from, to) 57 | : EditorSelection.range(to, from); 58 | } 59 | if (extend) 60 | return startSel.replaceRange( 61 | startSel.main.extend(range.from, range.to), 62 | ); 63 | else if (multiple) return startSel.addRange(range); 64 | else return EditorSelection.create([range]); 65 | }, 66 | } as MouseSelectionStyle; 67 | }); 68 | return [dblClickPatch]; 69 | }; 70 | 71 | export const getWordAtPatchUnloader = (seg: TinySegmenter) => 72 | around(EditorState.prototype, { 73 | wordAt: (next) => 74 | function (this: EditorState, pos: number) { 75 | return cm6GetChsSeg(pos, next.call(this, pos), this, seg); 76 | }, 77 | }); 78 | -------------------------------------------------------------------------------- /NOTICE.md: -------------------------------------------------------------------------------- 1 | # ライセンス通知 2 | ## cm-chs-patchのライセンス通知 3 | このリポジトリには[cm-chs-patch](https://github.com/aidenlx/cm-chs-patch)からのソースプログラムが含まれています.以下に元のライセンスのテキストを示します. 4 | This repository contains the source program from [cm-chs-patch](https://github.com/aidenlx/cm-chs-patch). The text of the original license is shown below. 5 | 6 | ``` 7 | Copyright (C) 2021-present AidenLx 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | ``` 15 | 16 | ## tiny-segmenterのライセンス通知 17 | このリポジトリには[tiny-segmenter](http://chasen.org/~taku/software/TinySegmenter/)からのソースプログラムが含まれています.以下に元のライセンスのテキストを示します. 18 | This repository contains the source program from [tiny-segmenter](http://chasen.org/~taku/software/TinySegmenter/). The text of the original license is shown below. 19 | 20 | ``` 21 | Copyright (c) 2008, Taku Kudo 22 | 23 | All rights reserved. 24 | 25 | Redistribution and use in source and binary forms, with or without 26 | modification, are permitted provided that the following conditions are met: 27 | 28 | * Redistributions of source code must retain the above copyright notice, 29 | this list of conditions and the following disclaimer. 30 | * Redistributions in binary form must reproduce the above copyright 31 | notice, this list of conditions and the following disclaimer in the 32 | documentation and/or other materials provided with the distribution. 33 | * Neither the name of the nor the names of its 34 | contributors may be used to endorse or promote products derived from this 35 | software without specific prior written permission. 36 | 37 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 38 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 41 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 42 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 43 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 44 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 45 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 46 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 47 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@aidenlx/chs-patch", 3 | "version": "1.4.1", 4 | "description": "A patch for Obsidian's built-in CodeMirror Editor to support Simplified Chinese word splitting", 5 | "main": "", 6 | "types": "src/api.d.ts", 7 | "files": [ 8 | "src/*.d.ts" 9 | ], 10 | "scripts": { 11 | "dev": "rollup --config rollup.config.js -w", 12 | "build": "rollup --config rollup.config.js --environment BUILD:production", 13 | "prettier": "prettier --write 'src/**/*.+(ts|tsx|json|html|css)'", 14 | "eslint": "eslint . --ext .ts,.tsx --fix", 15 | "release": "release-it" 16 | }, 17 | "keywords": [], 18 | "author": "AidenLx", 19 | "license": "MIT", 20 | "dependencies": { 21 | "obsidian": "github:obsidianmd/obsidian-api", 22 | "tiny-segmenter": "^0.2.0" 23 | }, 24 | "devDependencies": { 25 | "@codemirror/view": "0.19.30", 26 | "@release-it/bumper": "^3.0.1", 27 | "@release-it/conventional-changelog": "^3.3.0", 28 | "@rollup/plugin-commonjs": "^21.0.1", 29 | "@rollup/plugin-node-resolve": "^13.0.6", 30 | "@rollup/plugin-typescript": "^8.3.0", 31 | "@types/node": "^16.11.12", 32 | "@typescript-eslint/eslint-plugin": "^5.6.0", 33 | "@typescript-eslint/parser": "^5.6.0", 34 | "cm6-view-src": "github:codemirror/view#0.19.30", 35 | "cz-conventional-changelog": "^3.3.0", 36 | "eslint": "^8.4.1", 37 | "eslint-config-prettier": "^8.3.0", 38 | "eslint-import-resolver-typescript": "^2.5.0", 39 | "eslint-plugin-import": "^2.25.3", 40 | "eslint-plugin-jsdoc": "^37.2.0", 41 | "eslint-plugin-prefer-arrow": "^1.2.3", 42 | "eslint-plugin-prettier": "^4.0.0", 43 | "eslint-plugin-simple-import-sort": "^7.0.0", 44 | "monkey-around": "^2.2.0", 45 | "pinyinlite": "^1.2.1", 46 | "prettier": "^2.5.1", 47 | "release-it": "^14.11.8", 48 | "rollup": "^2.61.1", 49 | "rollup-plugin-copy": "^3.4.0", 50 | "segmentit": "^2.0.3", 51 | "tslib": "^2.3.1", 52 | "typescript": "^4.5.3" 53 | }, 54 | "publishConfig": { 55 | "access": "public" 56 | }, 57 | "release-it": { 58 | "hooks": { 59 | "before:init": [ 60 | "npm run prettier", 61 | "npm run eslint" 62 | ], 63 | "after:bump": [ 64 | "json -I -f manifest.json -e \"this.version='${version}'\"", 65 | "json -I -f versions.json -e \"this['${version}']='$(cat manifest.json | json minAppVersion)'\"", 66 | "sed -i '' \"s/available for Obsidian v.*$/available for Obsidian v$(cat manifest.json | json minAppVersion)+./\" README.md", 67 | "git add .", 68 | "npm run build" 69 | ], 70 | "after:git:release": "echo After git push, before github release", 71 | "after:release": "echo Successfully released ${name} v${version} to ${repo.repository}." 72 | }, 73 | "git": { 74 | "commitMessage": "chore: release v${version}", 75 | "tagName": "${version}", 76 | "tagAnnotation": "Release v${version}" 77 | }, 78 | "npm": { 79 | "publish": true 80 | }, 81 | "github": { 82 | "release": true, 83 | "assets": [ 84 | "build/main.js", 85 | "build/manifest.json" 86 | ], 87 | "proxy": "http://127.0.0.1:7890", 88 | "releaseName": "${version}" 89 | }, 90 | "plugins": { 91 | "@release-it/bumper": { 92 | "out": "manifest.json" 93 | }, 94 | "@release-it/conventional-changelog": { 95 | "preset": "angular", 96 | "infile": "CHANGELOG.md" 97 | } 98 | } 99 | }, 100 | "config": { 101 | "commitizen": { 102 | "path": "./node_modules/cz-conventional-changelog" 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/tiny_segmenter-0.2.js: -------------------------------------------------------------------------------- 1 | // TinySegmenter 0.1 -- Super compact Japanese tokenizer in Javascript 2 | // (c) 2008 Taku Kudo 3 | // TinySegmenter is freely distributable under the terms of a new BSD licence. 4 | // For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt 5 | 6 | export function TinySegmenter() { 7 | var patterns = { 8 | "[一二三四五六七八九十百千万億兆]":"M", 9 | "[一-龠々〆ヵヶ]":"H", 10 | "[ぁ-ん]":"I", 11 | "[ァ-ヴーア-ン゙ー]":"K", 12 | "[a-zA-Za-zA-Z]":"A", 13 | "[0-90-9]":"N" 14 | } 15 | this.chartype_ = []; 16 | for (var i in patterns) { 17 | var regexp = new RegExp; 18 | regexp.compile(i) 19 | this.chartype_.push([regexp, patterns[i]]); 20 | } 21 | 22 | this.BIAS__ = -332 23 | this.BC1__ = {"HH":6,"II":2461,"KH":406,"OH":-1378}; 24 | this.BC2__ = {"AA":-3267,"AI":2744,"AN":-878,"HH":-4070,"HM":-1711,"HN":4012,"HO":3761,"IA":1327,"IH":-1184,"II":-1332,"IK":1721,"IO":5492,"KI":3831,"KK":-8741,"MH":-3132,"MK":3334,"OO":-2920}; 25 | this.BC3__ = {"HH":996,"HI":626,"HK":-721,"HN":-1307,"HO":-836,"IH":-301,"KK":2762,"MK":1079,"MM":4034,"OA":-1652,"OH":266}; 26 | this.BP1__ = {"BB":295,"OB":304,"OO":-125,"UB":352}; 27 | this.BP2__ = {"BO":60,"OO":-1762}; 28 | this.BQ1__ = {"BHH":1150,"BHM":1521,"BII":-1158,"BIM":886,"BMH":1208,"BNH":449,"BOH":-91,"BOO":-2597,"OHI":451,"OIH":-296,"OKA":1851,"OKH":-1020,"OKK":904,"OOO":2965}; 29 | this.BQ2__ = {"BHH":118,"BHI":-1159,"BHM":466,"BIH":-919,"BKK":-1720,"BKO":864,"OHH":-1139,"OHM":-181,"OIH":153,"UHI":-1146}; 30 | this.BQ3__ = {"BHH":-792,"BHI":2664,"BII":-299,"BKI":419,"BMH":937,"BMM":8335,"BNN":998,"BOH":775,"OHH":2174,"OHM":439,"OII":280,"OKH":1798,"OKI":-793,"OKO":-2242,"OMH":-2402,"OOO":11699}; 31 | this.BQ4__ = {"BHH":-3895,"BIH":3761,"BII":-4654,"BIK":1348,"BKK":-1806,"BMI":-3385,"BOO":-12396,"OAH":926,"OHH":266,"OHK":-2036,"ONN":-973}; 32 | this.BW1__ = {",と":660,",同":727,"B1あ":1404,"B1同":542,"、と":660,"、同":727,"」と":1682,"あっ":1505,"いう":1743,"いっ":-2055,"いる":672,"うし":-4817,"うん":665,"から":3472,"がら":600,"こう":-790,"こと":2083,"こん":-1262,"さら":-4143,"さん":4573,"した":2641,"して":1104,"すで":-3399,"そこ":1977,"それ":-871,"たち":1122,"ため":601,"った":3463,"つい":-802,"てい":805,"てき":1249,"でき":1127,"です":3445,"では":844,"とい":-4915,"とみ":1922,"どこ":3887,"ない":5713,"なっ":3015,"など":7379,"なん":-1113,"にし":2468,"には":1498,"にも":1671,"に対":-912,"の一":-501,"の中":741,"ませ":2448,"まで":1711,"まま":2600,"まる":-2155,"やむ":-1947,"よっ":-2565,"れた":2369,"れで":-913,"をし":1860,"を見":731,"亡く":-1886,"京都":2558,"取り":-2784,"大き":-2604,"大阪":1497,"平方":-2314,"引き":-1336,"日本":-195,"本当":-2423,"毎日":-2113,"目指":-724,"B1あ":1404,"B1同":542,"」と":1682}; 33 | this.BW2__ = {"..":-11822,"11":-669,"――":-5730,"−−":-13175,"いう":-1609,"うか":2490,"かし":-1350,"かも":-602,"から":-7194,"かれ":4612,"がい":853,"がら":-3198,"きた":1941,"くな":-1597,"こと":-8392,"この":-4193,"させ":4533,"され":13168,"さん":-3977,"しい":-1819,"しか":-545,"した":5078,"して":972,"しな":939,"その":-3744,"たい":-1253,"たた":-662,"ただ":-3857,"たち":-786,"たと":1224,"たは":-939,"った":4589,"って":1647,"っと":-2094,"てい":6144,"てき":3640,"てく":2551,"ては":-3110,"ても":-3065,"でい":2666,"でき":-1528,"でし":-3828,"です":-4761,"でも":-4203,"とい":1890,"とこ":-1746,"とと":-2279,"との":720,"とみ":5168,"とも":-3941,"ない":-2488,"なが":-1313,"など":-6509,"なの":2614,"なん":3099,"にお":-1615,"にし":2748,"にな":2454,"によ":-7236,"に対":-14943,"に従":-4688,"に関":-11388,"のか":2093,"ので":-7059,"のに":-6041,"のの":-6125,"はい":1073,"はが":-1033,"はず":-2532,"ばれ":1813,"まし":-1316,"まで":-6621,"まれ":5409,"めて":-3153,"もい":2230,"もの":-10713,"らか":-944,"らし":-1611,"らに":-1897,"りし":651,"りま":1620,"れた":4270,"れて":849,"れば":4114,"ろう":6067,"われ":7901,"を通":-11877,"んだ":728,"んな":-4115,"一人":602,"一方":-1375,"一日":970,"一部":-1051,"上が":-4479,"会社":-1116,"出て":2163,"分の":-7758,"同党":970,"同日":-913,"大阪":-2471,"委員":-1250,"少な":-1050,"年度":-8669,"年間":-1626,"府県":-2363,"手権":-1982,"新聞":-4066,"日新":-722,"日本":-7068,"日米":3372,"曜日":-601,"朝鮮":-2355,"本人":-2697,"東京":-1543,"然と":-1384,"社会":-1276,"立て":-990,"第に":-1612,"米国":-4268,"11":-669}; 34 | this.BW3__ = {"あた":-2194,"あり":719,"ある":3846,"い.":-1185,"い。":-1185,"いい":5308,"いえ":2079,"いく":3029,"いた":2056,"いっ":1883,"いる":5600,"いわ":1527,"うち":1117,"うと":4798,"えと":1454,"か.":2857,"か。":2857,"かけ":-743,"かっ":-4098,"かに":-669,"から":6520,"かり":-2670,"が,":1816,"が、":1816,"がき":-4855,"がけ":-1127,"がっ":-913,"がら":-4977,"がり":-2064,"きた":1645,"けど":1374,"こと":7397,"この":1542,"ころ":-2757,"さい":-714,"さを":976,"し,":1557,"し、":1557,"しい":-3714,"した":3562,"して":1449,"しな":2608,"しま":1200,"す.":-1310,"す。":-1310,"する":6521,"ず,":3426,"ず、":3426,"ずに":841,"そう":428,"た.":8875,"た。":8875,"たい":-594,"たの":812,"たり":-1183,"たる":-853,"だ.":4098,"だ。":4098,"だっ":1004,"った":-4748,"って":300,"てい":6240,"てお":855,"ても":302,"です":1437,"でに":-1482,"では":2295,"とう":-1387,"とし":2266,"との":541,"とも":-3543,"どう":4664,"ない":1796,"なく":-903,"など":2135,"に,":-1021,"に、":-1021,"にし":1771,"にな":1906,"には":2644,"の,":-724,"の、":-724,"の子":-1000,"は,":1337,"は、":1337,"べき":2181,"まし":1113,"ます":6943,"まっ":-1549,"まで":6154,"まれ":-793,"らし":1479,"られ":6820,"るる":3818,"れ,":854,"れ、":854,"れた":1850,"れて":1375,"れば":-3246,"れる":1091,"われ":-605,"んだ":606,"んで":798,"カ月":990,"会議":860,"入り":1232,"大会":2217,"始め":1681,"市":965,"新聞":-5055,"日,":974,"日、":974,"社会":2024,"カ月":990}; 35 | this.TC1__ = {"AAA":1093,"HHH":1029,"HHM":580,"HII":998,"HOH":-390,"HOM":-331,"IHI":1169,"IOH":-142,"IOI":-1015,"IOM":467,"MMH":187,"OOI":-1832}; 36 | this.TC2__ = {"HHO":2088,"HII":-1023,"HMM":-1154,"IHI":-1965,"KKH":703,"OII":-2649}; 37 | this.TC3__ = {"AAA":-294,"HHH":346,"HHI":-341,"HII":-1088,"HIK":731,"HOH":-1486,"IHH":128,"IHI":-3041,"IHO":-1935,"IIH":-825,"IIM":-1035,"IOI":-542,"KHH":-1216,"KKA":491,"KKH":-1217,"KOK":-1009,"MHH":-2694,"MHM":-457,"MHO":123,"MMH":-471,"NNH":-1689,"NNO":662,"OHO":-3393}; 38 | this.TC4__ = {"HHH":-203,"HHI":1344,"HHK":365,"HHM":-122,"HHN":182,"HHO":669,"HIH":804,"HII":679,"HOH":446,"IHH":695,"IHO":-2324,"IIH":321,"III":1497,"IIO":656,"IOO":54,"KAK":4845,"KKA":3386,"KKK":3065,"MHH":-405,"MHI":201,"MMH":-241,"MMM":661,"MOM":841}; 39 | this.TQ1__ = {"BHHH":-227,"BHHI":316,"BHIH":-132,"BIHH":60,"BIII":1595,"BNHH":-744,"BOHH":225,"BOOO":-908,"OAKK":482,"OHHH":281,"OHIH":249,"OIHI":200,"OIIH":-68}; 40 | this.TQ2__ = {"BIHH":-1401,"BIII":-1033,"BKAK":-543,"BOOO":-5591}; 41 | this.TQ3__ = {"BHHH":478,"BHHM":-1073,"BHIH":222,"BHII":-504,"BIIH":-116,"BIII":-105,"BMHI":-863,"BMHM":-464,"BOMH":620,"OHHH":346,"OHHI":1729,"OHII":997,"OHMH":481,"OIHH":623,"OIIH":1344,"OKAK":2792,"OKHH":587,"OKKA":679,"OOHH":110,"OOII":-685}; 42 | this.TQ4__ = {"BHHH":-721,"BHHM":-3604,"BHII":-966,"BIIH":-607,"BIII":-2181,"OAAA":-2763,"OAKK":180,"OHHH":-294,"OHHI":2446,"OHHO":480,"OHIH":-1573,"OIHH":1935,"OIHI":-493,"OIIH":626,"OIII":-4007,"OKAK":-8156}; 43 | this.TW1__ = {"につい":-4681,"東京都":2026}; 44 | this.TW2__ = {"ある程":-2049,"いった":-1256,"ころが":-2434,"しょう":3873,"その後":-4430,"だって":-1049,"ていた":1833,"として":-4657,"ともに":-4517,"もので":1882,"一気に":-792,"初めて":-1512,"同時に":-8097,"大きな":-1255,"対して":-2721,"社会党":-3216}; 45 | this.TW3__ = {"いただ":-1734,"してい":1314,"として":-4314,"につい":-5483,"にとっ":-5989,"に当た":-6247,"ので,":-727,"ので、":-727,"のもの":-600,"れから":-3752,"十二月":-2287}; 46 | this.TW4__ = {"いう.":8576,"いう。":8576,"からな":-2348,"してい":2958,"たが,":1516,"たが、":1516,"ている":1538,"という":1349,"ました":5543,"ません":1097,"ようと":-4258,"よると":5865}; 47 | this.UC1__ = {"A":484,"K":93,"M":645,"O":-505}; 48 | this.UC2__ = {"A":819,"H":1059,"I":409,"M":3987,"N":5775,"O":646}; 49 | this.UC3__ = {"A":-1370,"I":2311}; 50 | this.UC4__ = {"A":-2643,"H":1809,"I":-1032,"K":-3450,"M":3565,"N":3876,"O":6646}; 51 | this.UC5__ = {"H":313,"I":-1238,"K":-799,"M":539,"O":-831}; 52 | this.UC6__ = {"H":-506,"I":-253,"K":87,"M":247,"O":-387}; 53 | this.UP1__ = {"O":-214}; 54 | this.UP2__ = {"B":69,"O":935}; 55 | this.UP3__ = {"B":189}; 56 | this.UQ1__ = {"BH":21,"BI":-12,"BK":-99,"BN":142,"BO":-56,"OH":-95,"OI":477,"OK":410,"OO":-2422}; 57 | this.UQ2__ = {"BH":216,"BI":113,"OK":1759}; 58 | this.UQ3__ = {"BA":-479,"BH":42,"BI":1913,"BK":-7198,"BM":3160,"BN":6427,"BO":14761,"OI":-827,"ON":-3212}; 59 | this.UW1__ = {",":156,"、":156,"「":-463,"あ":-941,"う":-127,"が":-553,"き":121,"こ":505,"で":-201,"と":-547,"ど":-123,"に":-789,"の":-185,"は":-847,"も":-466,"や":-470,"よ":182,"ら":-292,"り":208,"れ":169,"を":-446,"ん":-137,"・":-135,"主":-402,"京":-268,"区":-912,"午":871,"国":-460,"大":561,"委":729,"市":-411,"日":-141,"理":361,"生":-408,"県":-386,"都":-718,"「":-463,"・":-135}; 60 | this.UW2__ = {",":-829,"、":-829,"〇":892,"「":-645,"」":3145,"あ":-538,"い":505,"う":134,"お":-502,"か":1454,"が":-856,"く":-412,"こ":1141,"さ":878,"ざ":540,"し":1529,"す":-675,"せ":300,"そ":-1011,"た":188,"だ":1837,"つ":-949,"て":-291,"で":-268,"と":-981,"ど":1273,"な":1063,"に":-1764,"の":130,"は":-409,"ひ":-1273,"べ":1261,"ま":600,"も":-1263,"や":-402,"よ":1639,"り":-579,"る":-694,"れ":571,"を":-2516,"ん":2095,"ア":-587,"カ":306,"キ":568,"ッ":831,"三":-758,"不":-2150,"世":-302,"中":-968,"主":-861,"事":492,"人":-123,"会":978,"保":362,"入":548,"初":-3025,"副":-1566,"北":-3414,"区":-422,"大":-1769,"天":-865,"太":-483,"子":-1519,"学":760,"実":1023,"小":-2009,"市":-813,"年":-1060,"強":1067,"手":-1519,"揺":-1033,"政":1522,"文":-1355,"新":-1682,"日":-1815,"明":-1462,"最":-630,"朝":-1843,"本":-1650,"東":-931,"果":-665,"次":-2378,"民":-180,"気":-1740,"理":752,"発":529,"目":-1584,"相":-242,"県":-1165,"立":-763,"第":810,"米":509,"自":-1353,"行":838,"西":-744,"見":-3874,"調":1010,"議":1198,"込":3041,"開":1758,"間":-1257,"「":-645,"」":3145,"ッ":831,"ア":-587,"カ":306,"キ":568}; 61 | this.UW3__ = {",":4889,"1":-800,"−":-1723,"、":4889,"々":-2311,"〇":5827,"」":2670,"〓":-3573,"あ":-2696,"い":1006,"う":2342,"え":1983,"お":-4864,"か":-1163,"が":3271,"く":1004,"け":388,"げ":401,"こ":-3552,"ご":-3116,"さ":-1058,"し":-395,"す":584,"せ":3685,"そ":-5228,"た":842,"ち":-521,"っ":-1444,"つ":-1081,"て":6167,"で":2318,"と":1691,"ど":-899,"な":-2788,"に":2745,"の":4056,"は":4555,"ひ":-2171,"ふ":-1798,"へ":1199,"ほ":-5516,"ま":-4384,"み":-120,"め":1205,"も":2323,"や":-788,"よ":-202,"ら":727,"り":649,"る":5905,"れ":2773,"わ":-1207,"を":6620,"ん":-518,"ア":551,"グ":1319,"ス":874,"ッ":-1350,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278,"・":-3794,"一":-1619,"下":-1759,"世":-2087,"両":3815,"中":653,"主":-758,"予":-1193,"二":974,"人":2742,"今":792,"他":1889,"以":-1368,"低":811,"何":4265,"作":-361,"保":-2439,"元":4858,"党":3593,"全":1574,"公":-3030,"六":755,"共":-1880,"円":5807,"再":3095,"分":457,"初":2475,"別":1129,"前":2286,"副":4437,"力":365,"動":-949,"務":-1872,"化":1327,"北":-1038,"区":4646,"千":-2309,"午":-783,"協":-1006,"口":483,"右":1233,"各":3588,"合":-241,"同":3906,"和":-837,"員":4513,"国":642,"型":1389,"場":1219,"外":-241,"妻":2016,"学":-1356,"安":-423,"実":-1008,"家":1078,"小":-513,"少":-3102,"州":1155,"市":3197,"平":-1804,"年":2416,"広":-1030,"府":1605,"度":1452,"建":-2352,"当":-3885,"得":1905,"思":-1291,"性":1822,"戸":-488,"指":-3973,"政":-2013,"教":-1479,"数":3222,"文":-1489,"新":1764,"日":2099,"旧":5792,"昨":-661,"時":-1248,"曜":-951,"最":-937,"月":4125,"期":360,"李":3094,"村":364,"東":-805,"核":5156,"森":2438,"業":484,"氏":2613,"民":-1694,"決":-1073,"法":1868,"海":-495,"無":979,"物":461,"特":-3850,"生":-273,"用":914,"町":1215,"的":7313,"直":-1835,"省":792,"県":6293,"知":-1528,"私":4231,"税":401,"立":-960,"第":1201,"米":7767,"系":3066,"約":3663,"級":1384,"統":-4229,"総":1163,"線":1255,"者":6457,"能":725,"自":-2869,"英":785,"見":1044,"調":-562,"財":-733,"費":1777,"車":1835,"軍":1375,"込":-1504,"通":-1136,"選":-681,"郎":1026,"郡":4404,"部":1200,"金":2163,"長":421,"開":-1432,"間":1302,"関":-1282,"雨":2009,"電":-1045,"非":2066,"駅":1620,"1":-800,"」":2670,"・":-3794,"ッ":-1350,"ア":551,"グ":1319,"ス":874,"ト":521,"ム":1109,"ル":1591,"ロ":2201,"ン":278}; 62 | this.UW4__ = {",":3930,".":3508,"―":-4841,"、":3930,"。":3508,"〇":4999,"「":1895,"」":3798,"〓":-5156,"あ":4752,"い":-3435,"う":-640,"え":-2514,"お":2405,"か":530,"が":6006,"き":-4482,"ぎ":-3821,"く":-3788,"け":-4376,"げ":-4734,"こ":2255,"ご":1979,"さ":2864,"し":-843,"じ":-2506,"す":-731,"ず":1251,"せ":181,"そ":4091,"た":5034,"だ":5408,"ち":-3654,"っ":-5882,"つ":-1659,"て":3994,"で":7410,"と":4547,"な":5433,"に":6499,"ぬ":1853,"ね":1413,"の":7396,"は":8578,"ば":1940,"ひ":4249,"び":-4134,"ふ":1345,"へ":6665,"べ":-744,"ほ":1464,"ま":1051,"み":-2082,"む":-882,"め":-5046,"も":4169,"ゃ":-2666,"や":2795,"ょ":-1544,"よ":3351,"ら":-2922,"り":-9726,"る":-14896,"れ":-2613,"ろ":-4570,"わ":-1783,"を":13150,"ん":-2352,"カ":2145,"コ":1789,"セ":1287,"ッ":-724,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637,"・":-4371,"ー":-11870,"一":-2069,"中":2210,"予":782,"事":-190,"井":-1768,"人":1036,"以":544,"会":950,"体":-1286,"作":530,"側":4292,"先":601,"党":-2006,"共":-1212,"内":584,"円":788,"初":1347,"前":1623,"副":3879,"力":-302,"動":-740,"務":-2715,"化":776,"区":4517,"協":1013,"参":1555,"合":-1834,"和":-681,"員":-910,"器":-851,"回":1500,"国":-619,"園":-1200,"地":866,"場":-1410,"塁":-2094,"士":-1413,"多":1067,"大":571,"子":-4802,"学":-1397,"定":-1057,"寺":-809,"小":1910,"屋":-1328,"山":-1500,"島":-2056,"川":-2667,"市":2771,"年":374,"庁":-4556,"後":456,"性":553,"感":916,"所":-1566,"支":856,"改":787,"政":2182,"教":704,"文":522,"方":-856,"日":1798,"時":1829,"最":845,"月":-9066,"木":-485,"来":-442,"校":-360,"業":-1043,"氏":5388,"民":-2716,"気":-910,"沢":-939,"済":-543,"物":-735,"率":672,"球":-1267,"生":-1286,"産":-1101,"田":-2900,"町":1826,"的":2586,"目":922,"省":-3485,"県":2997,"空":-867,"立":-2112,"第":788,"米":2937,"系":786,"約":2171,"経":1146,"統":-1169,"総":940,"線":-994,"署":749,"者":2145,"能":-730,"般":-852,"行":-792,"規":792,"警":-1184,"議":-244,"谷":-1000,"賞":730,"車":-1481,"軍":1158,"輪":-1433,"込":-3370,"近":929,"道":-1291,"選":2596,"郎":-4866,"都":1192,"野":-1100,"銀":-2213,"長":357,"間":-2344,"院":-2297,"際":-2604,"電":-878,"領":-1659,"題":-792,"館":-1984,"首":1749,"高":2120,"「":1895,"」":3798,"・":-4371,"ッ":-724,"ー":-11870,"カ":2145,"コ":1789,"セ":1287,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637}; 63 | this.UW5__ = {",":465,".":-299,"1":-514,"E2":-32768,"]":-2762,"、":465,"。":-299,"「":363,"あ":1655,"い":331,"う":-503,"え":1199,"お":527,"か":647,"が":-421,"き":1624,"ぎ":1971,"く":312,"げ":-983,"さ":-1537,"し":-1371,"す":-852,"だ":-1186,"ち":1093,"っ":52,"つ":921,"て":-18,"で":-850,"と":-127,"ど":1682,"な":-787,"に":-1224,"の":-635,"は":-578,"べ":1001,"み":502,"め":865,"ゃ":3350,"ょ":854,"り":-208,"る":429,"れ":504,"わ":419,"を":-1264,"ん":327,"イ":241,"ル":451,"ン":-343,"中":-871,"京":722,"会":-1153,"党":-654,"務":3519,"区":-901,"告":848,"員":2104,"大":-1296,"学":-548,"定":1785,"嵐":-1304,"市":-2991,"席":921,"年":1763,"思":872,"所":-814,"挙":1618,"新":-1682,"日":218,"月":-4353,"査":932,"格":1356,"機":-1508,"氏":-1347,"田":240,"町":-3912,"的":-3149,"相":1319,"省":-1052,"県":-4003,"研":-997,"社":-278,"空":-813,"統":1955,"者":-2233,"表":663,"語":-1073,"議":1219,"選":-1018,"郎":-368,"長":786,"間":1191,"題":2368,"館":-689,"1":-514,"E2":-32768,"「":363,"イ":241,"ル":451,"ン":-343}; 64 | this.UW6__ = {",":227,".":808,"1":-270,"E1":306,"、":227,"。":808,"あ":-307,"う":189,"か":241,"が":-73,"く":-121,"こ":-200,"じ":1782,"す":383,"た":-428,"っ":573,"て":-1014,"で":101,"と":-105,"な":-253,"に":-149,"の":-417,"は":-236,"も":-206,"り":187,"る":-135,"を":195,"ル":-673,"ン":-496,"一":-277,"中":201,"件":-800,"会":624,"前":302,"区":1792,"員":-1212,"委":798,"学":-960,"市":887,"広":-695,"後":535,"業":-697,"相":753,"社":-507,"福":974,"空":-822,"者":1811,"連":463,"郎":1082,"1":-270,"E1":306,"ル":-673,"ン":-496}; 65 | 66 | return this; 67 | } 68 | 69 | TinySegmenter.prototype.ctype_ = function(str) { 70 | for (var i in this.chartype_) { 71 | if (str.match(this.chartype_[i][0])) { 72 | return this.chartype_[i][1]; 73 | } 74 | } 75 | return "O"; 76 | } 77 | 78 | TinySegmenter.prototype.ts_ = function(v) { 79 | if (v) { return v; } 80 | return 0; 81 | } 82 | 83 | TinySegmenter.prototype.segment = function(input) { 84 | if (input == null || input == undefined || input == "") { 85 | return []; 86 | } 87 | var result = []; 88 | var seg = ["B3","B2","B1"]; 89 | var ctype = ["O","O","O"]; 90 | var o = input.split(""); 91 | for (i = 0; i < o.length; ++i) { 92 | seg.push(o[i]); 93 | ctype.push(this.ctype_(o[i])) 94 | } 95 | seg.push("E1"); 96 | seg.push("E2"); 97 | seg.push("E3"); 98 | ctype.push("O"); 99 | ctype.push("O"); 100 | ctype.push("O"); 101 | var word = seg[3]; 102 | var p1 = "U"; 103 | var p2 = "U"; 104 | var p3 = "U"; 105 | for (var i = 4; i < seg.length - 3; ++i) { 106 | var score = this.BIAS__; 107 | var w1 = seg[i-3]; 108 | var w2 = seg[i-2]; 109 | var w3 = seg[i-1]; 110 | var w4 = seg[i]; 111 | var w5 = seg[i+1]; 112 | var w6 = seg[i+2]; 113 | var c1 = ctype[i-3]; 114 | var c2 = ctype[i-2]; 115 | var c3 = ctype[i-1]; 116 | var c4 = ctype[i]; 117 | var c5 = ctype[i+1]; 118 | var c6 = ctype[i+2]; 119 | score += this.ts_(this.UP1__[p1]); 120 | score += this.ts_(this.UP2__[p2]); 121 | score += this.ts_(this.UP3__[p3]); 122 | score += this.ts_(this.BP1__[p1 + p2]); 123 | score += this.ts_(this.BP2__[p2 + p3]); 124 | score += this.ts_(this.UW1__[w1]); 125 | score += this.ts_(this.UW2__[w2]); 126 | score += this.ts_(this.UW3__[w3]); 127 | score += this.ts_(this.UW4__[w4]); 128 | score += this.ts_(this.UW5__[w5]); 129 | score += this.ts_(this.UW6__[w6]); 130 | score += this.ts_(this.BW1__[w2 + w3]); 131 | score += this.ts_(this.BW2__[w3 + w4]); 132 | score += this.ts_(this.BW3__[w4 + w5]); 133 | score += this.ts_(this.TW1__[w1 + w2 + w3]); 134 | score += this.ts_(this.TW2__[w2 + w3 + w4]); 135 | score += this.ts_(this.TW3__[w3 + w4 + w5]); 136 | score += this.ts_(this.TW4__[w4 + w5 + w6]); 137 | score += this.ts_(this.UC1__[c1]); 138 | score += this.ts_(this.UC2__[c2]); 139 | score += this.ts_(this.UC3__[c3]); 140 | score += this.ts_(this.UC4__[c4]); 141 | score += this.ts_(this.UC5__[c5]); 142 | score += this.ts_(this.UC6__[c6]); 143 | score += this.ts_(this.BC1__[c2 + c3]); 144 | score += this.ts_(this.BC2__[c3 + c4]); 145 | score += this.ts_(this.BC3__[c4 + c5]); 146 | score += this.ts_(this.TC1__[c1 + c2 + c3]); 147 | score += this.ts_(this.TC2__[c2 + c3 + c4]); 148 | score += this.ts_(this.TC3__[c3 + c4 + c5]); 149 | score += this.ts_(this.TC4__[c4 + c5 + c6]); 150 | // score += this.ts_(this.TC5__[c4 + c5 + c6]); 151 | score += this.ts_(this.UQ1__[p1 + c1]); 152 | score += this.ts_(this.UQ2__[p2 + c2]); 153 | score += this.ts_(this.UQ3__[p3 + c3]); 154 | score += this.ts_(this.BQ1__[p2 + c2 + c3]); 155 | score += this.ts_(this.BQ2__[p2 + c3 + c4]); 156 | score += this.ts_(this.BQ3__[p3 + c2 + c3]); 157 | score += this.ts_(this.BQ4__[p3 + c3 + c4]); 158 | score += this.ts_(this.TQ1__[p2 + c1 + c2 + c3]); 159 | score += this.ts_(this.TQ2__[p2 + c2 + c3 + c4]); 160 | score += this.ts_(this.TQ3__[p3 + c1 + c2 + c3]); 161 | score += this.ts_(this.TQ4__[p3 + c2 + c3 + c4]); 162 | var p = "O"; 163 | if (score > 0) { 164 | result.push(word); 165 | word = ""; 166 | p = "B"; 167 | } 168 | p1 = p2; 169 | p2 = p3; 170 | p3 = p; 171 | word += seg[i]; 172 | } 173 | result.push(word); 174 | 175 | return result; 176 | } --------------------------------------------------------------------------------