├── .devcontainer
    └── devcontainer.json
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── bug-report--不具合の報告-.md
    │   ├── feature-request--機能の提案-.md
    │   └── question--質問-.md
    ├── dependabot.yml
    └── workflows
    │   ├── swift-in-devcontainer.yml
    │   └── swift.yml
├── .gitignore
├── .gitmodules
├── Docs
    ├── Images
    │   ├── install-devcontainers-extension.png
    │   └── reopen-in-container.png
    ├── README.md
    ├── Visions
    │   ├── dictionary.md
    │   └── learning.md
    ├── about_windows_support.md
    ├── cli.md
    ├── composing_text.md
    ├── conversion_algorithms.md
    ├── converter_api.md
    ├── devcontainer.md
    ├── development_guide.md
    ├── dicdata_format.md
    ├── failures.md
    └── zenzai.md
├── LICENSE
├── Package.swift
├── README.md
├── Sources
    ├── CliTool
    │   ├── Anco.swift
    │   ├── DefaultStringInterpolation+CommandLineUtils.swift
    │   └── Subcommands
    │   │   ├── Commands.swift
    │   │   ├── DictCommands
    │   │       ├── BuildCommand.swift
    │   │       ├── DictCommand.swift
    │   │       └── ReadCommand.swift
    │   │   ├── EvaluateCommand.swift
    │   │   ├── ExperimentalPredict.swift
    │   │   ├── NGramCommands
    │   │       ├── InferenceCommand.swift
    │   │       ├── NGramCommand.swift
    │   │       └── TrainCommand.swift
    │   │   ├── RunCommand.swift
    │   │   ├── SessionCommand.swift
    │   │   └── ZenzEvaluateCommand.swift
    ├── EfficientNGram
    │   ├── Inference.swift
    │   ├── Tokenizer.swift
    │   ├── Trainer.swift
    │   └── tokenizer
    │   │   ├── README.md
    │   │   ├── config.json
    │   │   ├── merges.txt
    │   │   ├── special_tokens_map.json
    │   │   ├── tokenizer.json
    │   │   ├── tokenizer_config.json
    │   │   └── vocab.json
    ├── KanaKanjiConverterModule
    │   ├── CIDData.swift
    │   ├── Candidate.swift
    │   ├── ComposingText.swift
    │   ├── Converter
    │   │   ├── CalendarCandidate.swift
    │   │   ├── CommaSeparatedNumber.swift
    │   │   ├── ConverisonResult.swift
    │   │   ├── ConvertRequestOptions.swift
    │   │   ├── EmailAddress.swift
    │   │   ├── KanaKanjiConverter.swift
    │   │   ├── RomanTypographys.swift
    │   │   ├── SpecialCandidateProvider.swift
    │   │   ├── SpellChecker.swift
    │   │   ├── TimeExpression.swift
    │   │   ├── Unicode.swift
    │   │   └── VersionCandidate.swift
    │   ├── DicdataStore
    │   │   ├── DicdataElement.swift
    │   │   ├── DicdataStore.swift
    │   │   ├── JapaneseNumber.swift
    │   │   ├── LearningMemory.swift
    │   │   └── TypoCorrection.swift
    │   ├── Kana2Kanji
    │   │   ├── Kana2Kanji.swift
    │   │   ├── added_last_1_character.swift
    │   │   ├── added_last_n_character.swift
    │   │   ├── all.swift
    │   │   ├── all_with_prefix_constraint.swift
    │   │   ├── changed_last_n_character.swift
    │   │   ├── completed_first.swift
    │   │   ├── deleted_last_n_character.swift
    │   │   ├── mid_composition_prediction.swift
    │   │   ├── no_change.swift
    │   │   ├── post_composition_prediction.swift
    │   │   └── zenzai.swift
    │   ├── LOUDS
    │   │   ├── LOUDS.swift
    │   │   └── extension LOUDS.swift
    │   ├── LatticeNode.swift
    │   ├── MIDData.swift
    │   ├── PostCompositionPredictionCandidate.swift
    │   ├── RegisteredNodeProtocol.swift
    │   ├── Replacer
    │   │   └── TextReplacer.swift
    │   ├── Roman2Kana.swift
    │   ├── States.swift
    │   ├── TemplateData.swift
    │   └── Zenz
    │   │   ├── Zenz.swift
    │   │   ├── ZenzContext.swift
    │   │   └── llama-mock.swift
    ├── KanaKanjiConverterModuleWithDefaultDictionary
    │   └── KanaKanjiConverterModuleWithDefaultDictionary.swift
    ├── SwiftUtils
    │   ├── ArrayUtils.swift
    │   ├── CharacterUtils.swift
    │   ├── DataUtils.swift
    │   ├── Debug.swift
    │   ├── StringUtils.swift
    │   └── WithMutableValue.swift
    └── llama.cpp
    │   ├── ggml-alloc.h
    │   ├── ggml-backend.h
    │   ├── ggml-cpp.h
    │   ├── ggml-cpu.h
    │   ├── ggml-metal.h
    │   ├── ggml.h
    │   ├── llama.h
    │   └── module.modulemap
├── Tests
    ├── EfficientNGramTests
    │   └── EfficientNGramTests.swift
    ├── KanaKanjiConverterModuleTests
    │   ├── ClauseDataUnitTests.swift
    │   ├── ComposingTextTests.swift
    │   ├── ConverterTests
    │   │   ├── CandidateTests.swift
    │   │   ├── CommaSeparatedNumberTests.swift
    │   │   ├── ConvesionTests.swift
    │   │   ├── EmailAddressConversionTests.swift
    │   │   ├── TimeExpressionTests.swift
    │   │   ├── UnicodeConversionTests.swift
    │   │   └── WarekiConversionTests.swift
    │   ├── DictionaryMock
    │   │   ├── .gitkeep
    │   │   ├── cb
    │   │   │   ├── .gitkeep
    │   │   │   └── 1285.binary
    │   │   ├── louds
    │   │   │   ├── .gitkeep
    │   │   │   ├── charID.chid
    │   │   │   ├── シ.louds
    │   │   │   ├── シ.loudschars2
    │   │   │   ├── シ0.loudstxt3
    │   │   │   ├── シ1.loudstxt3
    │   │   │   ├── シ10.loudstxt3
    │   │   │   ├── シ11.loudstxt3
    │   │   │   ├── シ12.loudstxt3
    │   │   │   ├── シ13.loudstxt3
    │   │   │   ├── シ2.loudstxt3
    │   │   │   ├── シ3.loudstxt3
    │   │   │   ├── シ4.loudstxt3
    │   │   │   ├── シ5.loudstxt3
    │   │   │   ├── シ6.loudstxt3
    │   │   │   ├── シ7.loudstxt3
    │   │   │   ├── シ8.loudstxt3
    │   │   │   └── シ9.loudstxt3
    │   │   ├── mm.binary
    │   │   └── p
    │   │   │   ├── .gitkeep
    │   │   │   └── p_シ.csv
    │   ├── JapaneseNumberConversionTests.swift
    │   ├── KanaKanjiConverterModuleTests.swift
    │   ├── LOUDSTests.swift
    │   ├── LearningMemoryTests.swift
    │   ├── RegisteredNodeTests.swift
    │   └── TemporalLearningMemoryTrieTests.swift
    ├── KanaKanjiConverterModuleWithDefaultDictionaryTests
    │   ├── ConverterTests
    │   │   └── ConverterTests.swift
    │   └── DicdataStoreTests
    │   │   ├── DicdataStoreTests.swift
    │   │   └── TextReplacerTests.swift
    └── SwiftUtilsTests
    │   ├── CharacterUtilsTests.swift
    │   ├── StringExtensionTests.swift
    │   ├── StringUtilsTests.swift
    │   └── WithMutableValueTests.swift
└── install_cli.sh


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Swift",
 3 |     "image": "swift:6.1",
 4 |     "features": {
 5 |         "ghcr.io/devcontainers/features/common-utils:2": {
 6 |             "installZsh": "false",
 7 |             "username": "vscode",
 8 |             "userUid": "1001",
 9 |             "userGid": "1001",
10 |             "upgradePackages": "false"
11 |         },
12 |         "ghcr.io/devcontainers/features/git:1": {
13 |             "version": "os-provided",
14 |             "ppa": "false"
15 |         }
16 |     },
17 |     "runArgs": [
18 |         "--cap-add=SYS_PTRACE",
19 |         "--security-opt",
20 |         "seccomp=unconfined"
21 |     ],
22 |     // Configure tool-specific properties.
23 |     "customizations": {
24 |         // Configure properties specific to VS Code.
25 |         "vscode": {
26 |             // Set *default* container specific settings.json values on container create.
27 |             "settings": {
28 |                 "lldb.library": "/usr/lib/liblldb.so"
29 |             },
30 |             // Add the IDs of extensions you want installed when the container is created.
31 |             "extensions": [
32 |                 "sswg.swift-lang"
33 |             ]
34 |         }
35 |     },
36 |     // Use 'forwardPorts' to make a list of ports inside the container available locally.
37 |     // "forwardPorts": [],
38 | 
39 |     // Use 'postCreateCommand' to run commands after the container is created.
40 |     "postCreateCommand": "swift --version",
41 | 
42 |     // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
43 |     "remoteUser": "vscode"
44 | }
45 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [ensan-hcl]
 4 | # patreon: # Replace with a single Patreon username
 5 | # open_collective: # Replace with a single Open Collective username
 6 | # ko_fi: # Replace with a single Ko-fi username
 7 | # tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | # liberapay: # Replace with a single Liberapay username
10 | # issuehunt: # Replace with a single IssueHunt username
11 | # otechie: # Replace with a single Otechie username
12 | # lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report--不具合の報告-.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report (不具合の報告)
 3 | about: 不具合の修正のための情報提供
 4 | title: "[BUG] Description of bug"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug (不具合を説明してください)**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce (どのような手順で実行すると不具合が生じるのか、詳しく教えてください)**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior (どういう動作になるのが正しいと思いますか？)**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots (スクリーンショットがあれば添付してください)**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Environment (環境を詳しく教えてください):**
27 |  - Device: [e.g. iPhone12]
28 |  - iOS / iPadOS and its version: [e.g. iOS 16.2]
29 |  - App Running Keyboard [e.g. LINE, notes, Slack]
30 |  - App Version [e.g. 1.9]
31 | 
32 | **Is it possible to keep in touch with you on an ongoing basis to fix this bug? (バグの修正のため、継続的なやり取りは可能ですか？)**
33 | 
34 | **Additional context (その他共有したい事項があれば記述してください)**
35 | Add any other context about the problem here.
36 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request--機能の提案-.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request (機能の提案)
 3 | about: Suggest an idea for azooKey (azooKeyへの機能の提案)
 4 | title: "[Feature Request] Description of feature"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe. (提案は利用上の問題に関連していますか？記述してください。)**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like (あなたの考える解決策を記述してください)**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered (他の解決方法があれば記述してください)**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **If we were to implement this feature, will you help implement it? (もし提案する機能を実装する場合、実装を手伝う余裕はありますか？)**
20 | 
21 | **Additional context (その他共有したい事項があれば記述してください)**
22 | Add any other context or screenshots about the feature request here.
23 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question--質問-.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Question (質問)
 3 | about: Ask anything about azooKey
 4 | title: "[Question]"
 5 | labels: question
 6 | assignees: ensan-hcl
 7 | 
 8 | ---
 9 | 
10 | <!-- 質問の内容を書いてください -->
11 | <!-- Write your question here -->
12 | 
13 | 
14 | <!-- あなたがそれを知りたい理由があれば教えてください -->
15 | <!-- If possible, write why you want to know about that -->
16 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "github-actions" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 
13 |   - package-ecosystem: "swift" # See documentation for possible values
14 |     directory: "/AzooKeyCore" # Location of package manifests
15 |     schedule:
16 |       interval: "daily"
17 | 
18 |   - package-ecosystem: "gitsubmodule" # See documentation for possible values
19 |     directory: "/" # Location of package manifests
20 |     schedule:
21 |       interval: "daily"
22 | 
23 |   - package-ecosystem: "devcontainers" # See documentation for possible values
24 |     directory: "/.devcontainer" # Location of package manifests
25 |     schedule:
26 |       interval: "daily"
27 | 


--------------------------------------------------------------------------------
/.github/workflows/swift-in-devcontainer.yml:
--------------------------------------------------------------------------------
 1 | name: Swift Build and Test in DevContainer
 2 | on:
 3 |   push:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   build:
 8 |     name: Swift on ubuntu-latest
 9 |     runs-on: ubuntu-latest
10 | 
11 |     steps:
12 |     - name: Checkout
13 |       uses: actions/checkout@v4
14 |       with:
15 |         submodules: true
16 | 
17 |     - name: Download pre-built llama.cpp binaries
18 |       run: |
19 |         wget -O llama-cpp-bin.zip "https://github.com/fkunn1326/llama.cpp/releases/download/b4846/llama-b4846-bin-ubuntu-x64.zip"
20 |         unzip llama-cpp-bin.zip -d llama-cpp-bin
21 | 
22 |     - name: Copy llama.cpp binaries
23 |       run: cp llama-cpp-bin/build/bin/lib*.so ./
24 | 
25 |     - name: Build and Test in DevContainer
26 |       uses: devcontainers/ci@v0.3
27 |       with:
28 |         push: never
29 |         runCmd: |
30 |           swift build -c release -Xswiftc -strict-concurrency=complete -Xlinker -L./ -v
31 |           cp llama-cpp-bin/build/bin/lib*.so .build/*/release/
32 |           swift test -c release -Xswiftc -strict-concurrency=complete -Xlinker -L./ -v
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .build
 2 | DerivedData
 3 | /.previous-build
 4 | xcuserdata
 5 | .DS_Store
 6 | *~
 7 | \#*
 8 | .\#*
 9 | .*.sw[nop]
10 | *.xcscmblueprint
11 | /default.profraw
12 | *.xcodeproj
13 | Utilities/Docker/*.tar.gz
14 | .swiftpm
15 | Package.resolved
16 | /build
17 | *.pyc
18 | .docc-build
19 | .vscode
20 | *.gguf
21 | *.dll
22 | *.lib
23 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage"]
2 | 	path = Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage
3 | 	url = https://github.com/ensan-hcl/azooKey_dictionary_storage
4 | [submodule "Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_emoji_dictionary_storage"]
5 | 	path = Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_emoji_dictionary_storage
6 | 	url = https://github.com/ensan-hcl/azooKey_emoji_dictionary_storage
7 | 


--------------------------------------------------------------------------------
/Docs/Images/install-devcontainers-extension.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Docs/Images/install-devcontainers-extension.png


--------------------------------------------------------------------------------
/Docs/Images/reopen-in-container.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Docs/Images/reopen-in-container.png


--------------------------------------------------------------------------------
/Docs/README.md:
--------------------------------------------------------------------------------
1 | # Documents
2 | 
3 | 本ディレクトリはAzooKeyKanaKanjiConverterModuleに関するドキュメントをまとめています。
4 | 
5 | azooKey本体については[azooKey/docs](https://github.com/ensan-hcl/azooKey/tree/develop/docs/overview.md)をご覧ください。
6 | 
7 | 開発ガイドは[development_guide.md](./development_guide.md)をご覧ください。
8 | 
9 | 


--------------------------------------------------------------------------------
/Docs/Visions/dictionary.md:
--------------------------------------------------------------------------------
 1 | # Dictionary Vision
 2 | 
 3 | ## System Dictionary
 4 | 
 5 | 最近のazooKeyの内部変更で、システム辞書を任意のディレクトリから読み出せるようになりました。この変更によって、将来的に以下のようなことが可能です。
 6 | 
 7 | * オプションで語彙数の大きな辞書を選べるようにする
 8 | * 辞書データをazooKeyのシステムに同梱せず、定期更新を可能にする（Background Assets APIを利用できます）
 9 | * 外部からDLした辞書（サードパーティ製辞書、ベータ版辞書など）を利用する
10 | 
11 | 辞書サーバの準備などが問題となります。
12 | 
13 | * 比較的サイズが大きい（~100MB）データを配布するので、費用がバカにならない
14 |   * 高く見積もると、S3の場合、月あたりに100MBの辞書データを1000回転送するとして、月あたり約2000円
15 |   * 課金プランにするという手はある（月100円としてユーザが20人いればペイする）
16 |   * ただし、開発の体制上、安定して辞書の更新ができるとは限らないので、今のところはここに責任を入れたくない
17 | 
18 | ## User Dictionary
19 | 
20 | ユーザ辞書は非常に有用な機能ですが、導入当初からあまり仕様を変化させていません。そこで、今後考えられる機能の変更について議論します。
21 | 
22 | ### More Configurable Entry
23 | 
24 | 現在のユーザ辞書は高度な設定が不可能です。品詞をマニュアルで設定するなどの方法でより高度な設定ができると良さそうです。
25 | 
26 | ### Import / Export
27 | 
28 | ユーザ辞書を外部からインポートしたり、逆に外部向けにエクスポートする機能が必要です。
29 | 
30 | ### Custom Action
31 | 
32 | 現在、カッコを入力すると自動でカーソルを中央に移動する機能があります。これをユーザ辞書でも実現できると良さそうです。
33 | 
34 | ### Dictionary Market
35 | 
36 | ユーザが作成したユーザ辞書データをアプリ内で配布・ダウンロードできるようにします。オフィシャルな方法があることによって、辞書データの作成が活性化すると良さそうです。


--------------------------------------------------------------------------------
/Docs/Visions/learning.md:
--------------------------------------------------------------------------------
 1 | # Learning Vision
 2 | 
 3 | 学習機能の性能向上は重要です。
 4 | 
 5 | ## 今後実現したい機能
 6 | 
 7 | ### プライバシー
 8 | 
 9 | キーボード上から「新たな学習を停止（プライベートモード）」「これまでの学習を利用しない（ゲストモード）」を有効化できることにより、ユーザがプライバシーを守りやすくなる可能性がある。
10 | 
11 | この機能の技術的な課題はキーボードからの設定の上書きである。上書きそのものは、設定の構造を次のようにすることで対応できる。
12 | 
13 | - アプリ側：App Groupの共有領域に「(設定内容, 更新日時)」のデータを保存
14 | - キーボード側：Private領域に「(設定内容, 更新日時)」のデータを保存
15 | - 読み出し：更新日時の新しい方を利用
16 | 
17 | しかし、キーボード側から上書きした場合にアプリ側の設定の表示を更新する方法が存在しない（フルアクセスがある場合は表示を更新できる）。これが実際の使用感にどれほど悪影響を与えるかは不明であり、おそらくあまり気にしなくて良いと思う。
18 | 
19 | また「ゲストモードの解除をキーボード上で可能にするか」という論点がある。ゲストがゲストモードを解除できたら意味がないかもしれないが、これは今後に回しても問題ないと思う。
20 | 
21 | ### バックアップ
22 | 
23 | 学習のバックアップを定期的に取り、そこから過去の学習を復元できるようにすることが考えられる。
24 | 
25 | この機能の課題は、「復元」をどのようなUIで実現するかと、実際に「定期的に」は取れないのをどう解決するかである。
26 | 
27 | ### 修正の自動検出
28 | 
29 | 誤変換をユーザが「修正」したことを自動的に検出し、その候補の学習を変更することができると良い。
30 | 
31 | ### 仕様の再検討
32 | 
33 | - 現在の学習は、特に長い候補において強すぎるため、弱くした方が良いかもしれない。


--------------------------------------------------------------------------------
/Docs/about_windows_support.md:
--------------------------------------------------------------------------------
 1 | # Windows対応について
 2 | 
 3 | Windows上でAzookeyKanaKanjiConverterを使用するためには、`llama.cpp`をビルドして`llama.lib`と`llama.dll`を準備する必要があります。
 4 | 
 5 | ## 対応の背景
 6 | 
 7 | `llama.cpp`をCUDAに対応させるには`.cu` ファイルのビルドが必要ですが、Swiftが内部で使用している`clang`ではこれに対応していない[^1]ので、[cxx-interop](https://www.swift.org/documentation/cxx-interop/)を使うことができません。WindowsではCUDA対応を実現するために、外部のDLLに依存する形を取ることにしました。
 8 | 
 9 | ## 実行手順
10 | 
11 | Windows上で AzookeyKanaKanjiConverter を動作させるためには、以下の手順で`llama.cpp`をビルドする必要があります。
12 | 
13 | ```cmd
14 | git clone -b ku-nlp/gpt2-japanese-char https://github.com/ensan-hcl/llama.cpp.git
15 | cmake -B build -DBUILD_SHARED_LIBS=ON
16 | cmake --build build --config Release
17 | ```
18 | 
19 | > [!TIP]
20 | > CUDAに対応させてビルドする場合、`-DLLAMA_CUDA=ON`オプションを指定してビルドします。
21 | 
22 | 必要なファイルは以下のパスに存在します。
23 | ```
24 | build/bin/Release/llama.dll
25 | build/Release/llama.lib
26 | ```
27 | 
28 | ## 配置方法
29 | 
30 | AzookeyKanaKanjiConverterを使って開発を行うとき、`llama.lib` はビルド時に必要になるので、プロジェクトのルートディレクトリ（`Package.swift`と同じフォルダ）に配置します。
31 | 
32 | また、`llama.dll` は実行時に必要となるため、[DLL検索パス](https://learn.microsoft.com/ja-jp/windows/win32/dlls/dynamic-link-library-search-order#standard-search-order-for-unpackaged-apps)に沿って配置する必要があります。プロジェクトがビルドされたファイルと同じディレクトリに配置するのが適切だと考えられます。
33 | 
34 | 
35 | [^1]: https://llvm.org/docs/CompileCudaWithLLVM.html#id3


--------------------------------------------------------------------------------
/Docs/cli.md:
--------------------------------------------------------------------------------
  1 | #  anco (azooKey CLI)
  2 | 
  3 | `anco`コマンドにより、AzooKeyKanaKanjiConverterをコマンドライン上で利用することができます。`anco`はデバッグ用ツールの位置付けです。
  4 | 
  5 | `anco`を利用するには、最初にinstallが必要です。`/usr/local/bin/`に`anco`が追加されます。
  6 | 
  7 | ```bash
  8 | ./install_cli.sh
  9 | ```
 10 | 
 11 | Zenzaiを利用する場合は、`--zenzai`オプションを付けてください。
 12 | 
 13 | ```bash
 14 | ./install_cli.sh --zenzai
 15 | ```
 16 | 
 17 | デフォルトでは、ほとんどの情報は出力されません。デバッグモードで実行するには`--debug`オプションを付けてください。
 18 | 
 19 | ```bash
 20 | ./install_cli.sh --debug
 21 | ```
 22 | 
 23 | 例えば以下のように利用できます。
 24 | 
 25 | ```bash
 26 | your@pc Desktop % anco にほんごにゅうりょく --disable_prediction -n 10
 27 | 日本語入力
 28 | にほんご入力
 29 | 2本ご入力
 30 | 2本後入力
 31 | 2本語入力
 32 | 日本語
 33 | 2本
 34 | 日本
 35 | にほんご
 36 | 2本後
 37 | ```
 38 | 
 39 | ## 変換API
 40 | 
 41 | `anco run`コマンドを利用して変換を行うことが出来ます。`run`はデフォルトコマンドなので、`anco`だけでも`run`相当の動作をします。
 42 | 
 43 | ## 評価API
 44 | 
 45 | `anco evaluate`コマンドを利用して変換器の評価を行うことが出来ます。
 46 | 
 47 | 以下のようなフォーマットの`.tsv`ファイルを用意します。
 48 | ```tsv
 49 | しかくとさんかく	四角と三角
 50 | かんたんなさんすう	簡単な算数
 51 | しけんにでないえいたんご	試験に出ない英単語
 52 | しごととごらくとべんきょう	仕事と娯楽と勉強
 53 | しかいをつとめる	司会を務める
 54 | ```
 55 | 
 56 | これを入力し、変換器を評価します。
 57 | 
 58 | ```bash
 59 | $ anco evaluate ./evaluation.tsv --config_n_best 1
 60 | ```
 61 | 
 62 | 出力はJSONフォーマットです。出力内容の安定が必要な場合`--stable`を指定することで比較的安定した出力を得られます。ただしスコアやエントロピーは辞書バージョンに依存します。
 63 | 
 64 | ## 対話的実行API
 65 | 
 66 | 少しずつ入力を進めるような実用的な場面を模した環境として`anco session`コマンドが用意されています。
 67 | 
 68 | ```bash
 69 | $ anco session --roman2kana -n 10 --disable_prediction
 70 | 
 71 | == Type :q to end session, type :d to delete character, type :c to stop composition. For other commands, type :h ==
 72 | ```
 73 | 
 74 | キーを入力してEnterを押すと変換候補が表示されます。`:`で始まる特殊コマンドを利用することで、削除、確定、文脈の設定などの諸操作を行うことが出来ます。
 75 | 
 76 | ### リプレイ
 77 | 
 78 | `--replay`を用いると、セッションの中での一連の動作を再現することができます。
 79 | 
 80 | ```yaml
 81 | anco session --roman2kana -n 10 --disable_prediction --replay history.txt
 82 | ```
 83 | 
 84 | `history.txt`は例えば以下のような内容が含まれます。
 85 | 
 86 | ```
 87 | a
 88 | i
 89 | u
 90 | e
 91 | e
 92 | :del
 93 | o
 94 | :0
 95 | ```
 96 | 
 97 | 現在実行中のセッションから`history.txt`を作成するには`:dump history.txt`と入力します。
 98 | 
 99 | ### 学習機能のデバッグ
100 | 学習機能のデバッグのため、セッションコマンドには複数の機能が用意されています。`--enable_memory`の状態では、デフォルトで学習が有効になり、一時ディレクトリに学習データが蓄積されます。
101 | 
102 | ```bash
103 | $ anco session --roman2kana -n 10 --disable_prediction --enable_memory
104 | ```
105 | 
106 | セーブを実施するには以下のように`:save`を入力します。
107 | 
108 | ```txt
109 | rime
110 | :h
111 | :n
112 | :14
113 | :4
114 | :save
115 | ```
116 | 
117 | すでに存在する学習データをread onlyで読み込むこともできます。
118 | 
119 | ```bash
120 | $ anco session --roman2kana -n 10 --disable_prediction --readonly_memory ./memory
121 | ```
122 | 
123 | この場合、`:save`コマンドは何も行いません。
124 | 
125 | ## 辞書リーダ
126 | 
127 | `anco dict`コマンドを利用して辞書データを解析することが出来ます。
128 | 
129 | ```bash
130 | your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/                       
131 | === Summary for target ア ===
132 | - directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
133 | - target: ア
134 | - memory?: false
135 | - count of entry: 24189
136 | - time for execute: 0.0378040075302124
137 | ```
138 | 
139 | `--ruby`および`--word`オプションを利用して、正規表現でフィルターをかけることが出来ます。
140 | 
141 | ```bash
142 | your@pc Desktop % anco dict read ア -d ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/ --word ".*全"
143 | === Summary for target ア ===
144 | - directory: ./Sources/KanaKanjiConverterModuleWithDefaultDictionary/azooKey_dictionary_storage/Dictionary/
145 | - target: ア
146 | - memory?: false
147 | - count of entry: 24189
148 | - time for execute: 0.07062792778015137
149 | === Found Entries ===
150 | - count of found entry: 3
151 | Ruby: アキラ Word: 全 Value: -11.7107 CID: (1291, 1291) MID: 424
152 | Ruby: アンゼン Word: 安全 Value: -7.241 CID: (1287, 1287) MID: 169
153 | Ruby: アンシンアンゼン Word: 安心安全 Value: -11.7638 CID: (1283, 1287) MID: 17
154 | ```
155 | 
156 | `--sort`オプションを使うとエントリーの並び替えが可能です。
157 | 


--------------------------------------------------------------------------------
/Docs/composing_text.md:
--------------------------------------------------------------------------------
 1 | #  ComposingText API
 2 | 
 3 | AzooKeyKanaKanjiConverterにおいて、変換を要求するには`ComposingText`のAPIを使う必要があります。この`ComposingText` APIについて説明します。
 4 | 
 5 | ## 基本的なアイデア
 6 | 
 7 | `ComposingText`の基本的なアイデアは「入力操作との対応」です。ユーザが日本語IMEを操作するとき、「きょうはいいてんきですね」と一文字ずつ入力することもあれば、「kyouhaiitenkidesune」のようにローマ字入力を行うこともあります。azooKeyでは前者をダイレクト入力、後者をローマ字入力と呼んでいます。`ComposingText`はこのような入力操作をうまく扱いながら、変換を逐次的に実行するために役立ちます。
 8 | 
 9 | ## 基本的な使い方
10 | 
11 | `ComposingText`を使い始めるには、まず空の値を作ります。
12 | 
13 | ```swift
14 | var composingText = ComposingText()
15 | ```
16 | 
17 | 次に、末尾に文字を追加します。このために使うのが`insertAtCursorPosition`です。
18 | 
19 | ```swift
20 | composingText.insertAtCursorPosition("あ", inputStyle: .direct)
21 | ```
22 | 
23 | 
24 | このとき、`ComposingText`の内部状態は次のようになっています。
25 | 
26 | 
27 | ```swift
28 | print(composingText.input)                        // [InputElement("あ", .direct)]
29 | print(composingText.convertTargetCursorPosition)  // 1 (あ|)
30 | print(composingText.convertTarget)                // あ
31 | ```
32 | 
33 | 非常に自明です。ではローマ字入力の場合はどうなるでしょうか。
34 | 
35 | ```swift
36 | composingText.insertAtCursorPosition("o", inputStyle: .roman2kana)
37 | ```
38 | 
39 | この場合は少し異なることが起こります。`input`に`"o"`が正しく保存されるのです。
40 | 
41 | 
42 | ```swift
43 | print(composingText.input)                        // [InputElement("あ", .direct), InputElement("o", .roman2kana)]
44 | print(composingText.convertTargetCursorPosition)  // 2 (あお|)
45 | print(composingText.convertTarget)                // あお
46 | ```
47 | 
48 | 一方、`convertTarget`の方は正しくローマ字入力した仮名表記になっています。このように`convertTarget`の方はユーザに実際に見える「見かけの文字列」であり、実装側はこれが実際にユーザに見えているよう保障する必要があります。`convertTargetCursorPosition`についても同様で、実装側は`convertTargetCursorPosition`に示されたカーソル位置が実際にユーザに見えているカーソル位置と一致するよう配慮する必要があります。
49 | 
50 | ## 操作するAPI
51 | 
52 | ### 削除
53 | 
54 | `deleteForwardFromCursorPosition`および`deleteBackwardFromCursorPosition`が使えます。
55 | 
56 | 
57 | ### カーソル移動
58 | 
59 | `moveCursorFromCursorPosition`が使えます。
60 | 
61 | 
62 | ### 文頭の削除
63 | 
64 | `prefixComplete`が使えます。
65 | 
66 | 
67 | ### 置換
68 | 
69 | 専用のAPIはありません。削除と挿入で代用してください。
70 | 


--------------------------------------------------------------------------------
/Docs/converter_api.md:
--------------------------------------------------------------------------------
 1 | #  KanaKanjiConverter API
 2 | 
 3 | KanaKanjiConverterのインスタンスに対して利用できるいくつかのAPIを示します。
 4 | 
 5 | ## `setKeyboardLanguage`
 6 | 
 7 | これから入力しようとしている言語を設定します。このAPIを呼ぶのは必須ではありません。
 8 | 
 9 | 英語入力の場合、この関数を入力開始前に呼ぶことで事前に必要なデータをロードすることができるため、ユーザ体験が向上する可能性があります。
10 | 
11 | ## `sendToDicdataStore`
12 | 
13 | 辞書データに関する情報を追加します。
14 | 
15 | ### `importDynamicUserDict`
16 | 
17 | 動的ユーザ辞書を登録します。`DicdataElement`構造体の配列を直接渡します。
18 | 
19 | ```Swift
20 | converter.sendToDicdataStore(.importDynamicUserDict([
21 |     DicdataElement(word: "anco", ruby: "アンコ", cid: 1288, mid: 501, value: -5),
22 | ]))
23 | ```
24 | 
25 | `ruby`には読みを指定します。カタカナで指定してください。 `cid`はIPADIC品詞ID、`mid`は「501」としてください。`value`は`-5`から`-10`程度の範囲で設定してください。小さい値ほど変換されにくくなります。
26 | 
27 | ### `forgetMemory`
28 | 
29 | 特定の`Candidate`を渡すと、その`Candidate`に含まれている学習データを全てリセットします。
30 | 
31 | ## `setCompletedData`
32 | 
33 | prefixとして確定された候補を与えてください。
34 | 
35 | ## `updateLearningData`
36 | 
37 | 確定された候補を与えると、学習を更新します。
38 | 
39 | 


--------------------------------------------------------------------------------
/Docs/devcontainer.md:
--------------------------------------------------------------------------------
 1 | # Dev Container
 2 | 
 3 | このリポジトリーには、VS Code の Dev Container を使用して開発するための設定が含まれています。
 4 | 確実に動作する Swift の開発環境が自動的に構築され、すべて Docker コンテナー内で実行されます。
 5 | Docker コンテナーとのやりとりは VS Code が行ってくれます。
 6 | もちろん、Docker コンテナーの外に影響を与えることはありません。
 7 | 
 8 | ## 前提条件
 9 | 
10 | - Docker がインストールされていること
11 | - VS Code （または互換性のあるエディター、たとえば Cursor など）がインストールされていること
12 | 
13 | ## 開発環境の起動
14 | 
15 | こちらも参考にしてください：[開発ガイド](./development_guide.md)
16 | 
17 | 1. VS Code でこのリポジトリーを開きます
18 | 
19 | 2. もし、[Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) 拡張機能がインストールされていない場合は、インストールします。
20 |    ただし、VS Code は Dev Container の設定ファイルを検出すると、自動的に拡張機能をインストールするように求めます。
21 | 
22 |    ![Dev Containers 拡張機能をインストールする。`ms-vscode-remote.remote-containers` を検索するか、右下の通知をクリックしてインストールします。](Images/install-devcontainers-extension.png)
23 | 
24 | 3. 左下の `><` アイコンをクリックし、`Reopen in Container` を選択します。
25 |    ただし、こちらも同様に、VS Code は Dev Container の設定ファイルを検出して、拡張機能がインストールされていれば、自動的に Dev Container を開くように求めます。
26 | 
27 |    ![Dev Container を開く。左下の `><` アイコンをクリックし、`Reopen in Container` を選択するか、右下の通知をクリックして開きます。](Images/reopen-in-container.png)
28 | 
29 | 4. しばらくすると、Dev Container が起動します。
30 |    初回の起動時には、Docker イメージをダウンロードする必要があるので、かなり時間がかかります。
31 |    次回以降は、Docker イメージがキャッシュされるため、起動時間は短縮されます。
32 | 
33 | 5. [Swift](https://marketplace.visualstudio.com/items?itemName=sswg.swift-lang) の拡張機能が自動的にインストールされるようになっていて、この拡張機能が依存関係の解決を行います。
34 | 
35 | 6. これで、Swift の開発環境が起動しました。
36 |    Docker コンテナー内でコマンドを実行したければ、VS Code のターミナルを使用するのがいいでしょう。
37 |    


--------------------------------------------------------------------------------
/Docs/development_guide.md:
--------------------------------------------------------------------------------
 1 | # 開発ガイド
 2 | 
 3 | 開発にはSwift 5.9以上が必要です。
 4 | 
 5 | 開発にはレポジトリをクローンしてください。サブモジュールを含むため、`--recursive`オプションが必要です。
 6 | 
 7 | ```bash
 8 | git clone https://github.com/ensan-hcl/AzooKeyKanaKanjiConverter --recursive
 9 | ```
10 | 
11 | ## Cliツール
12 | 
13 | デバッグ用のCliツールとして`anco`コマンドがあります。`install_cli.sh`を実行してインストールしてください。場合によっては、`sudo`が必要です。
14 | 
15 | ```bash
16 | sh install_cli.sh
17 | ```
18 | 
19 | 詳しくは[cli.md](./cli.md)をお読みください。
20 | 
21 | ## DevContainer
22 | 
23 | 開発にDevContainerを利用できます。詳しくは[devcontainer.md](./devcontainer.md)をお読みください。
24 | 
25 | ## コントリビュート
26 | 
27 | コントリビュートは歓迎です！
28 | 


--------------------------------------------------------------------------------
/Docs/dicdata_format.md:
--------------------------------------------------------------------------------
 1 | # Dicdata Format
 2 | 
 3 | azooKeyの辞書データは次のようなフォーマットになっています。
 4 | 
 5 | NOTE: LOUDSそのものに関する解説は行いません。
 6 | 
 7 | ## DicdataElement型
 8 | 
 9 | ```swift
10 | struct DicdataElement {
11 |     // 単語の表記
12 |     var word: String
13 |     // 単語のルビ（カタカナ）
14 |     var ruby: String
15 |     // 単語の左連接ID
16 |     var lcid: Int
17 |     // 単語の右連接ID
18 |     var rcid: Int
19 |     // 単語のMID
20 |     var mid: Int
21 |     // 単語の基礎コスト (PValue = Float16)
22 |     var baseValue: PValue
23 |     // コストの動的調整
24 |     var adjust: PValue
25 | }
26 | ```
27 | 
28 | 注意すべき点は次のとおりです。
29 | 
30 | * 連語などの場合、`lcid`と`rcid`が異なる値を取ることがあります。
31 | * 単語の基礎コストは、歴史的な事情によって負の小数です。大きいほど頻出する単語でus。
32 | * コストの動的調整は、誤り訂正などのためにコストを調整したい場合に使います。例えば「大学生」の基礎コストを「-10」としたとき、「たいがくせい」と入力した誤り訂正の結果として「大学生」が得られている場合は、`adjust`を-3のような値として、合計コストが-13であるかのように振る舞わせます。
33 | 
34 | `DicdataElement`は`DicdataStore`で辞書データファイルから生成されます。
35 | 
36 | ## 辞書データファイル
37 | 
38 | 辞書データは次の4つの種類のファイルからなります。
39 | 
40 | * `.louds`
41 | * `.loudschars2`
42 | * `.charID`
43 | * `.loudstxt3`
44 | 
45 | まず、`.louds`のファイルがLOUDS Trieをバイナリ形式で保存したものです。
46 | 
47 | 次に、`.loudschars2`は各ノードに割り当てられた文字を記録するものです。ただしUnicode文字列の代わりに、1バイトのCharacter IDで表現されています。このため、`.loudschars2`は1バイトずつ処理できます。`.charID`がCharacterをIDに割り当てるためのデータを格納します。
48 | 
49 | 最後に、`.loudstxt3`に各ノードに割り当てられたエントリーのデータが記録されています。
50 | 
51 | azooKeyの辞書ルックアップは次のように進みます。
52 | 
53 | 1. 起動時に一度だけ`charID`が読み込みます。以降はこれを参照してクエリをID列に変換します。
54 | 1. クエリを受け取ったら、ID列に変換します。クエリの先頭の文字に対応する`louds`と`loudschars2`を読み込みます。Swift側ではこの2つをセットにして`LOUDS`構造体が作られ、キャッシュされます。
55 | 1. `LOUDS`を検索し、必要なノードの番号を列挙します。
56 | 1. クエリの先頭の文字に対応する`loudstxt3`を読み込み、必要な番号のノードに記録されたデータを読み出します。読み出したデータを`DicdataElement`形式に変換し、以降の処理で利用します。なお、`loudstxt3`の方はキャッシュしないので、必要になるたびにIOが走ります。
57 | 
58 | ### `.louds`の構造
59 | 
60 | `.louds`ファイルはLOUDSのbit列を保存したものです。
61 | 
62 | ### `.loudschars2`の構造
63 | 
64 | TBW
65 | 
66 | ### `.charID`の構造
67 | 
68 | TBW
69 | 
70 | ### `.loudstxt3`の構造
71 | 
72 | TBW
73 | 
74 | ## 重みデータ（CID）
75 | 
76 | 品詞バイグラムの重み行列が疎行列になることから、CIDの重みデータはフォーマットを工夫しています。
77 | 
78 | TBW
79 | 
80 | ## 重みデータ（MID）
81 | 
82 | こちらは疎行列ではないため、重み行列をそのままバイナリ化したものが`mm.binary`として保存されています。
83 | 
84 | TBW
85 | 


--------------------------------------------------------------------------------
/Docs/failures.md:
--------------------------------------------------------------------------------
 1 | # Failures
 2 | 
 3 | AzooKeyKanaKanjiConverterの開発上、明確に失敗だったと考えている実装や仕様をまとめます。これらは将来的に修正できるかもしれないし、できないかもしれないです。
 4 | 
 5 | このドキュメントの目的はAzooKeyKanaKanjiConverterの判断ミスを明確にして、今後AzooKeyKanaKanjiConverterのフォークを作成する方や、新たな日本語入力ソフトウェアを作ろうとする方に向けて知見を残しておくことです。
 6 | 
 7 | AzooKeyKanaKanjiConverter本体については[azooKeyをベースとするプロダクトの開発開始時に注意すべき点](https://github.com/ensan-hcl/azooKey/tree/develop/docs/advice_for_azooKey_based_development.md)もご覧ください。
 8 | 
 9 | ## コストの設計
10 | 
11 | 辞書データのコストはざっくり言って対数尤度です。azooKeyの開発初期、コストは対数尤度をそのまま使っていました。これを続けてしまったため、普通は`Int`などにするのですが、azooKeyのコストは`Float16`になっています。
12 | 
13 | `Float16`の現実的に不便な点は以下の通りです。
14 | 
15 | * 整数型に比べて(おそらく)和の計算が遅い
16 | * macOSが`Float16`に対応していないことに由来する困難がある
17 | 
18 | 今から作り直すなら、`UInt16`か`UInt8`にしています。サンプリングを工夫することで、`UInt8`でも十分な表現ができる可能性があります。
19 | 
20 | なお、今後のバージョンアップでこの変更を行う場合、ユーザ側でもマイグレーションが必要になります。
21 | 
22 | * 絵文字・顔文字・ユーザ辞書の再コンパイル
23 | * 学習データの再コンパイル
24 | 
25 | 逆にいうと、気合と正しいマイグレーションの実装ができれば今からでも修正可能ではあります。
26 | 
27 | ## CharIDの設計
28 | 
29 | `LOUDS`検索を高速化するため、`LOUDS`のラベルは文字コードそのものではなく、それに対応するCharIDにしています。
30 | 
31 | しかし、このCharIDをかなり雑に決めたところがあり、なぜか入っている文字やなぜか入っていない文字があります。
32 | 
33 | この辺りは一度再設計したいのですが、これもやはり上記のマイグレーションが必要になります。
34 | 
35 | ## 品詞IDの設計
36 | 
37 | 品詞IDは「ipadic+独自拡張」という形になっています。具体的には、かな漢字変換上分離したほうが良い3つの品詞を追加しています。
38 | 
39 | * 1316: EOS（文頭と文末は区別されるべきである）
40 | * 1317: ？（疑問助詞等との接続の可能性が高い）
41 | * 1318: ！（強意の助詞等との接続の可能性が高い）
42 | 
43 | しかし、ipadicはさまざまな点で難しい面のある品詞体系です（[参考](https://zenn.dev/azookey/articles/c201408af14ae0)）。もし最初から作り直すのであれば、品詞IDはUnidicのものを用いていたのではないかと思います。
44 | 
45 | 上の2つと違って、品詞IDの変更は現実的でないと考えています。というのも、辞書生成モジュールを含む無数のコードでipadicが前提となっており、作業量が膨大になりすぎるからです。もちろんマイグレーションも必要になります。
46 | 
47 | 


--------------------------------------------------------------------------------
/Docs/zenzai.md:
--------------------------------------------------------------------------------
 1 | # Zenzai
 2 | 
 3 | ニューラルかな漢字変換エンジン「Zenzai」を有効化することで、高精度な変換を提供できます。利用するには変換オプションの`zenzaiMode`を設定します。
 4 | 
 5 | ```swift
 6 | let options = ConvertRequestOptions.withDefaultDictionary(
 7 |     // ...
 8 |     zenzaiMode: .on(
 9 |         weight: url,
10 |         inferenceLimit: 1,
11 |         versionDependentMode: .v3(.init(profile: "三輪/azooKeyの開発者", leftSideContext: "私の名前は"))
12 |     )
13 |     // ...
14 | )
15 | ```
16 | 
17 | * `weight`には`gguf`形式の重みファイルを指定します。重みファイルは[Hugging Face](https://huggingface.co/Miwa-Keita/zenz-v3-small-gguf)からダウンロードできます。
18 | * `inferenceLimit`には推論回数の上限を指定します。通常`1`で十分ですが、低速でも高精度な変換を得たい場合は`5`程度の値にすることもできます。
19 | 
20 | ## 動作環境
21 | * M1以上のスペックのあるmacOS環境が望ましいです。GPUを利用します。
22 | * モデルサイズに依存しますが、現状150MB程度のメモリを必要とします
23 | * Linux環境・Windows環境でもCUDAを用いて動作します。
24 | 
25 | ## 仕組み
26 | [Zennのブログ](https://zenn.dev/azookey/articles/ea15bacf81521e)をお読みいただくのが最もわかりやすい解説です。
27 | 
28 | ## 用語
29 | * Zenzai: ニューラルかな漢字変換システム
30 | * zenz-v1: Zenzaiで用いることのできるかな漢字変換モデル「zenz」の第1世代。`\uEE00<input_katakana>\uEE01<output></s>`というフォーマットでかな漢字変換タスクを行う機能に特化。
31 | * zenz-v2: Zenzaiで用いることのできるかな漢字変換モデル「zenz」の第2世代。第1世代の機能に加えて`\uEE00<input_katakana>\uEE02<context>\uEE01<output></s>`というフォーマットで、左文脈を読み込む機能を追加。
32 | * zenz-v3: Zenzaiで用いることのできるかな漢字変換モデル「zenz」の第3世代。第2世代と異なり、`\uEE02<context>\uEE00<input_katakana>\uEE01<output></s>`のようにコンテキストを前置する方式を推奨。また、`\uEE03`に続けて入力されたプロフィール情報を考慮する動作をネイティブに学習済み。このほか、実験的に`\uEE04`+トピック、`\uEE05`+スタイル、`\uEE06`+設定も考慮できるようになっています。
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Miwa / Ensan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AzooKeyKanaKanjiConverter
  2 | 
  3 | AzooKeyKanaKanjiConverterは[azooKey](https://github.com/ensan-hcl/azooKey)のために開発したかな漢字変換エンジンです。数行のコードでかな漢字変換をiOS / macOS / visionOSのアプリケーションに組み込むことができます。
  4 | 
  5 | また、AzooKeyKanaKanjiConverterはニューラルかな漢字変換システム「Zenzai」を利用した高精度な変換もサポートしています。
  6 | 
  7 | ## 動作環境
  8 | iOS 16以降, macOS 13以降, visionOS 1以降, Ubuntu 22.04以降で動作を確認しています。Swift 6.1以上が必要です。
  9 | 
 10 | AzooKeyKanaKanjiConverterの開発については[開発ガイド](Docs/development_guide.md)をご覧ください。
 11 | 
 12 | ## KanaKanjiConverterModule
 13 | かな漢字変換を受け持つモジュールです。
 14 | 
 15 | ### セットアップ
 16 | * Xcodeprojの場合、XcodeでAdd Packageしてください。
 17 | 
 18 | * Swift Packageの場合、Package.swiftの`Package`の引数に`dependencies`以下の記述を追加してください。
 19 |   ```swift
 20 |   dependencies: [
 21 |       .package(url: "https://github.com/azooKey/AzooKeyKanaKanjiConverter", .upToNextMinor(from: "0.8.0"))
 22 |   ],
 23 |   ```
 24 |   また、ターゲットの`dependencies`にも同様に追加してください。
 25 |   ```swift
 26 |   .target(
 27 |       name: "MyPackage",
 28 |       dependencies: [
 29 |           .product(name: "KanaKanjiConverterModuleWithDefaultDictionary", package: "AzooKeyKanaKanjiConverter")
 30 |       ],
 31 |   ),
 32 |   ```
 33 | 
 34 | > [!IMPORTANT]  
 35 | > AzooKeyKanaKanjiConverterはバージョン1.0のリリースまで開発版として運用するため、マイナーバージョンの変更で破壊的変更を実施する可能性があります。バージョンを指定する際にはマイナーバージョンが上がらないよう、`.upToNextMinor(from: "0.8.0")`のように指定することを推奨します。
 36 | 
 37 | 
 38 | ### 使い方
 39 | ```swift
 40 | // デフォルト辞書つきの変換モジュールをインポート
 41 | import KanaKanjiConverterModuleWithDefaultDictionary
 42 | 
 43 | // 変換器を初期化する
 44 | let converter = KanaKanjiConverter()
 45 | // 入力を初期化する
 46 | var c = ComposingText()
 47 | // 変換したい文章を追加する
 48 | c.insertAtCursorPosition("あずーきーはしんじだいのきーぼーどあぷりです", inputStyle: .direct)
 49 | // 変換のためのオプションを指定して、変換を要求
 50 | let results = converter.requestCandidates(c, options: .withDefaultDictionary(...))
 51 | // 結果の一番目を表示
 52 | print(results.mainResults.first!.text)  // azooKeyは新時代のキーボードアプリです
 53 | ```
 54 | `options: .withDefaultDictionary(...)`は、`ConvertRequestOptions`を生成し、変換リクエストに必要な情報を指定します。詳しくはコード内のドキュメントコメントを参照してください。
 55 | 
 56 | 
 57 | ### `ConvertRequestOptions`
 58 | `ConvertRequestOptions`は変換リクエストに必要な設定値です。例えば以下のように設定します。
 59 | 
 60 | ```swift
 61 | let options = ConvertRequestOptions.withDefaultDictionary(
 62 |     // 日本語予測変換
 63 |     requireJapanesePrediction: true,
 64 |     // 英語予測変換 
 65 |     requireEnglishPrediction: false,
 66 |     // 入力言語 
 67 |     keyboardLanguage: .ja_JP,
 68 |     // 学習タイプ 
 69 |     learningType: .nothing, 
 70 |     // 学習データを保存するディレクトリのURL（書類フォルダを指定）
 71 |     memoryDirectoryURL: .documentsDirectory, 
 72 |     // ユーザ辞書データのあるディレクトリのURL（書類フォルダを指定）
 73 |     sharedContainerURL: .documentsDirectory, 
 74 |     // メタデータ
 75 |     metadata: .init(versionString: "You App Version X")
 76 | )
 77 | ```
 78 | 
 79 | 開く際に保存処理が中断された `.pause` ファイルが残っている場合は、変換器が自動的に復旧を試みてファイルを削除します。
 80 | 
 81 | ### `ComposingText`
 82 | `ComposingText`は入力管理を行いつつ変換をリクエストするためのAPIです。ローマ字入力などを適切にハンドルするために利用できます。詳しくは[ドキュメント](./Docs/composing_text.md)を参照してください。
 83 | 
 84 | ### Zenzaiを使う
 85 | ニューラルかな漢字変換システム「Zenzai」を利用するには、追加で[Swift Package Traits](https://github.com/swiftlang/swift-evolution/blob/main/proposals/0450-swiftpm-package-traits.md)の設定を行う必要があります。AzooKeyKanaKanjiConverterは「Zenzai」というTraitをサポートしているので、これを追加してください。
 86 | 
 87 | ```swift
 88 | dependencies: [
 89 |     .package(url: "https://github.com/azooKey/AzooKeyKanaKanjiConverter", .upToNextMinor(from: "0.8.0"), traits: ["Zenzai"])
 90 | ],
 91 | ```
 92 | 
 93 | `ConvertRequestOptions`の`zenzaiMode`を指定します。詳しい引数の情報については[ドキュメント](./Docs/zenzai.md)を参照してください。
 94 | 
 95 | ```swift
 96 | let options = ConvertRequestOptions.withDefaultDictionary(
 97 |     // ...
 98 |     zenzaiMode: .on(weight: url, inferenceLimit: 10)
 99 |     // ...
100 | )
101 | ```
102 | 
103 | ### 辞書データ
104 | 
105 | AzooKeyKanaKanjiConverterのデフォルト辞書として[azooKey_dictionary_storage](https://github.com/ensan-hcl/azooKey_dictionary_storage)がサブモジュールとして指定されています。過去のバージョンの辞書データは[Google Drive](https://drive.google.com/drive/folders/1Kh7fgMFIzkpg7YwP3GhWTxFkXI-yzT9E?usp=sharing)からもダウンロードすることができます。
106 | 
107 | また、以下のフォーマットであれば自前で用意した辞書データを利用することもできます。カスタム辞書データのサポートは限定的なので、ソースコードを確認の上ご利用ください。
108 | 
109 | ```
110 | - Dictionary/
111 |   - louds/
112 |     - charId.chid
113 |     - X.louds
114 |     - X.loudschars2
115 |     - X.loudstxt3
116 |     - ...
117 |   - p/
118 |     - X.csv
119 |   - cb/
120 |     - 0.binary
121 |     - 1.binary
122 |     - ...
123 |   - mm.binary
124 | ```
125 | 
126 | デフォルト以外の辞書データを利用する場合、ターゲットの`dependencies`に以下を追加してください。
127 | ```swift
128 | .target(
129 |   name: "MyPackage",
130 |   dependencies: [
131 |       .product(name: "KanaKanjiConverterModule", package: "AzooKeyKanaKanjiConverter")
132 |   ],
133 | ),
134 | ```
135 | 
136 | 利用時に、辞書データのディレクトリを明示的に指定する必要があります。
137 | ```swift
138 | // デフォルト辞書を含まない変換モジュールを指定
139 | import KanaKanjiConverterModule
140 | 
141 | let options = ConvertRequestOptions(
142 |     // 日本語予測変換
143 |     requireJapanesePrediction: true,
144 |     // 英語予測変換 
145 |     requireEnglishPrediction: false,
146 |     // 入力言語 
147 |     keyboardLanguage: .ja_JP,
148 |     // 学習タイプ 
149 |     learningType: .nothing, 
150 |     // ここが必要
151 |     // 辞書データのURL（先ほど追加した辞書リソースを指定）
152 |     dictionaryResourceURL: Bundle.main.bundleURL.appending(path: "Dictionary", directoryHint: .isDirectory),
153 |     // 学習データを保存するディレクトリのURL（書類フォルダを指定）
154 |     memoryDirectoryURL: .documentsDirectory, 
155 |     // ユーザ辞書データのあるディレクトリのURL（書類フォルダを指定）
156 |     sharedContainerURL: .documentsDirectory, 
157 |     // メタデータ
158 |     metadata: .init(versionString: "You App Version X")
159 | )
160 | ```
161 | 
162 | `dictionaryResourceURL`のオプションは`KanaKanjiConverterModuleWithDefaultDictionary`モジュールでも利用できますが、バンドルに含まれる辞書リソースが利用されないため、アプリケーションサイズが不必要に大きくなります。デフォルトでない辞書データを利用する場合は`KanaKanjiConverterModule`を利用してください。
163 | 
164 | ## SwiftUtils
165 | Swift一般に利用できるユーティリティのモジュールです。
166 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Anco.swift:
--------------------------------------------------------------------------------
 1 | import KanaKanjiConverterModuleWithDefaultDictionary
 2 | import ArgumentParser
 3 | 
 4 | @main
 5 | public struct Anco: AsyncParsableCommand {
 6 |     public static let configuration = CommandConfiguration(
 7 |         abstract: "Anco is A(zooKey) Kana-Ka(n)ji (co)nverter",
 8 |         subcommands: [
 9 |             Subcommands.Run.self,
10 |             Subcommands.Dict.self,
11 |             Subcommands.Evaluate.self,
12 |             Subcommands.ZenzEvaluate.self,
13 |             Subcommands.Session.self,
14 |             Subcommands.ExperimentalPredict.self,
15 |             Subcommands.NGram.self
16 |         ],
17 |         defaultSubcommand: Subcommands.Run.self
18 |     )
19 | 
20 |     public init() {}
21 | }
22 | 


--------------------------------------------------------------------------------
/Sources/CliTool/DefaultStringInterpolation+CommandLineUtils.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  DefaultStringInterpolation+CommandLineUtils.swift
 3 | //
 4 | //
 5 | //  Created by miwa on 2024/04/29.
 6 | //
 7 | 
 8 | import Foundation
 9 | 
10 | extension DefaultStringInterpolation {
11 |     mutating func appendInterpolation(bold value: String){
12 |         self.appendInterpolation("\u{1B}[1m" + value + "\u{1B}[m")
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/Commands.swift:
--------------------------------------------------------------------------------
1 | /// namespace for subcommands
2 | enum Subcommands {}
3 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/DictCommands/DictCommand.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import KanaKanjiConverterModuleWithDefaultDictionary
 3 | import ArgumentParser
 4 | 
 5 | extension Subcommands {
 6 |     struct Dict: ParsableCommand {
 7 |         static let configuration = CommandConfiguration(
 8 |             commandName: "dict",
 9 |             abstract: "Show dict information",
10 |             subcommands: [Self.Read.self, Self.Build.self]
11 |         )
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/DictCommands/ReadCommand.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import KanaKanjiConverterModule
  3 | import ArgumentParser
  4 | 
  5 | extension Subcommands.Dict {
  6 |     struct Read: ParsableCommand {
  7 |         enum SortOrder: String, Codable, ExpressibleByArgument {
  8 |             case value
  9 |             case ruby
 10 |             case word
 11 | 
 12 |             init?(argument: String) {
 13 |                 self.init(rawValue: argument)
 14 |             }
 15 |         }
 16 | 
 17 |         @Argument(help: "辞書データのfilename")
 18 |         var target: String = ""
 19 | 
 20 |         @Option(name: [.customLong("dictionary_dir"), .customShort("d")], help: "The directory for dictionary data.")
 21 |         var dictionaryDirectory: String = "./"
 22 | 
 23 |         @Option(name: [.customLong("ruby")], help: "Regex for entry ruby filter")
 24 |         var rubyFilter: String = ""
 25 | 
 26 |         @Option(name: [.customLong("word")], help: "Regex for entry word filter")
 27 |         var wordFilter: String = ""
 28 | 
 29 |         @Option(name: [.customLong("sort")], help: "Sort order")
 30 |         var sortOrder: SortOrder = .ruby
 31 | 
 32 |         static let configuration = CommandConfiguration(
 33 |             commandName: "read",
 34 |             abstract: "Read dictionary data and extract informations"
 35 |         )
 36 | 
 37 |         mutating func run() throws {
 38 |             guard #available(macOS 13, *) else {
 39 |                 return
 40 |             }
 41 |             let start = Date()
 42 |             let isMemory = self.target == "memory"
 43 |             guard let louds = LOUDS.load(self.target, option: self.requestOptions()) else {
 44 |                 print(
 45 |                     """
 46 |                     \(bold: "=== Summary for target \(self.target) ===")
 47 |                     - directory: \(self.dictionaryDirectory)
 48 |                     - target: \(self.target)
 49 |                     - memory?: \(isMemory)
 50 |                     - result: LOUDS data was not found
 51 |                     - time for execute: \(Date().timeIntervalSince(start))
 52 |                     """
 53 |                 )
 54 |                 return
 55 |             }
 56 |             // ありったけ取り出す
 57 |             let nodeIndices = louds.prefixNodeIndices(chars: [], maxDepth: .max, maxCount: .max)
 58 |             let store = DicdataStore(convertRequestOptions: self.requestOptions())
 59 |             let result = store.getDicdataFromLoudstxt3(identifier: self.target, indices: nodeIndices)
 60 |             var filteredResult = result
 61 |             var hasFilter = false
 62 |             if !rubyFilter.isEmpty {
 63 |                 let filter = try Regex(rubyFilter)
 64 |                 hasFilter = true
 65 |                 filteredResult = filteredResult.filter {
 66 |                     $0.ruby.wholeMatch(of: filter) != nil
 67 |                 }
 68 |             }
 69 |             if !wordFilter.isEmpty {
 70 |                 let filter = try Regex(wordFilter)
 71 |                 hasFilter = true
 72 |                 filteredResult = filteredResult.filter {
 73 |                     $0.word.wholeMatch(of: filter) != nil
 74 |                 }
 75 |             }
 76 | 
 77 |             print(
 78 |                 """
 79 |                 \(bold: "=== Summary for target \(self.target) ===")
 80 |                 - directory: \(self.dictionaryDirectory)
 81 |                 - target: \(self.target)
 82 |                 - memory?: \(isMemory)
 83 |                 - count of entry: \(result.count)
 84 |                 - time for execute: \(Date().timeIntervalSince(start))
 85 |                 """
 86 |             )
 87 | 
 88 |             if hasFilter {
 89 |                 let sortFunction: (DicdataElement, DicdataElement) -> Bool = switch self.sortOrder {
 90 |                 case .ruby: { $0.ruby < $1.ruby || $0.ruby.count < $1.ruby.count}
 91 |                 case .value: { $0.value() < $1.value() }
 92 |                 case .word: { $0.word < $1.word }
 93 |                 }
 94 | 
 95 |                 print("\(bold: "=== Found Entries ===")")
 96 |                 print("- count of found entry: \(filteredResult.count)")
 97 |                 for entry in filteredResult.sorted(by: sortFunction) {
 98 |                     print("\(bold: "Ruby:") \(entry.ruby) \(bold: "Word:") \(entry.word) \(bold: "Value:") \(entry.value()) \(bold: "CID:") \((entry.lcid, entry.rcid)) \(bold: "MID:") \(entry.mid)")
 99 |                 }
100 |             }
101 |         }
102 | 
103 |         func requestOptions() -> ConvertRequestOptions {
104 |             .init(
105 |                 N_best: 0,
106 |                 requireJapanesePrediction: false,
107 |                 requireEnglishPrediction: false,
108 |                 keyboardLanguage: .ja_JP,
109 |                 englishCandidateInRoman2KanaInput: true,
110 |                 fullWidthRomanCandidate: false,
111 |                 halfWidthKanaCandidate: false,
112 |                 learningType: .nothing,
113 |                 maxMemoryCount: 0,
114 |                 dictionaryResourceURL: URL(fileURLWithPath: self.dictionaryDirectory),
115 |                 memoryDirectoryURL: URL(fileURLWithPath: self.dictionaryDirectory),
116 |                 sharedContainerURL: URL(fileURLWithPath: self.dictionaryDirectory),
117 |                 textReplacer: .empty,
118 |                 specialCandidateProviders: nil,
119 |                 metadata: .init(versionString: "anco for debugging")
120 |             )
121 |         }
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/ExperimentalPredict.swift:
--------------------------------------------------------------------------------
 1 | import KanaKanjiConverterModuleWithDefaultDictionary
 2 | import ArgumentParser
 3 | import Foundation
 4 | 
 5 | extension Subcommands {
 6 |     struct ExperimentalPredict: AsyncParsableCommand {
 7 |         @Argument(help: "通常の文字列")
 8 |         var input: String = ""
 9 | 
10 |         @Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
11 |         var displayTopN: Int = 1
12 |         @Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
13 |         var zenzWeightPath: String = ""
14 | 
15 |         static let configuration = CommandConfiguration(commandName: "experimental_predict", abstract: "Show help for this utility.")
16 | 
17 |         @MainActor mutating func run() async {
18 |             let converter = KanaKanjiConverter()
19 |             let result = converter.predictNextCharacter(leftSideContext: self.input, count: 10, options: requestOptions())
20 |             for (i, res) in result.indexed() {
21 |                 print("\(i). \(res.character): \(res.value)")
22 |             }
23 |         }
24 | 
25 |         func requestOptions() -> ConvertRequestOptions {
26 |             .withDefaultDictionary(
27 |                 N_best: 10,
28 |                 requireJapanesePrediction: true,
29 |                 requireEnglishPrediction: false,
30 |                 keyboardLanguage: .ja_JP,
31 |                 typographyLetterCandidate: false,
32 |                 unicodeCandidate: true,
33 |                 englishCandidateInRoman2KanaInput: true,
34 |                 fullWidthRomanCandidate: false,
35 |                 halfWidthKanaCandidate: false,
36 |                 learningType: .nothing,
37 |                 maxMemoryCount: 0,
38 |                 shouldResetMemory: false,
39 |                 memoryDirectoryURL: URL(fileURLWithPath: ""),
40 |                 sharedContainerURL: URL(fileURLWithPath: ""),
41 |                 zenzaiMode: self.zenzWeightPath.isEmpty ? .off : .on(weight: URL(string: self.zenzWeightPath)!, inferenceLimit: .max, personalizationMode: nil, versionDependentMode: .v3(.init())),
42 |                 metadata: .init(versionString: "anco for debugging")
43 |             )
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/NGramCommands/InferenceCommand.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import EfficientNGram
 3 | import ArgumentParser
 4 | 
 5 | extension Subcommands.NGram {
 6 |     struct Inference: ParsableCommand {
 7 |         @Argument(help: "学習済みのLM")
 8 |         var lmPattern: String = ""
 9 | 
10 |         @Option(name: [.customLong("another_lm")], help: "Another lm for flavored decoding")
11 |         var anotherLMPattern: String?
12 | 
13 |         @Option(name: [.customLong("alpha")], help: "alpha for flavored decoding")
14 |         var alpha: Double = 0.5
15 | 
16 |         @Option(name: [.customLong("prompt"), .customShort("p")], help: "The prompt for inference.")
17 |         var prompt: String = "これは"
18 | 
19 |         @Option(name: [.customShort("n")], help: "n-gram's n")
20 |         var n: Int = 5
21 | 
22 |         @Option(name: [.customLong("length"), .customShort("l")], help: "token length for generation")
23 |         var length: Int = 100
24 | 
25 |         static let configuration = CommandConfiguration(
26 |             commandName: "inference",
27 |             abstract: "Inference using ngram"
28 |         )
29 | 
30 |         private func measureExecutionTime(block: () -> String) -> (String, Double) {
31 |             let start = DispatchTime.now()
32 |             let result = block()
33 |             let end = DispatchTime.now()
34 |             let nanoTime = end.uptimeNanoseconds - start.uptimeNanoseconds
35 |             let milliTime = Double(nanoTime) / 1_000_000 // ミリ秒単位
36 |             return (result, milliTime)
37 |         }
38 | 
39 |         mutating func run() throws {
40 |             print("Loading LM base: \(self.lmPattern)")
41 |             let tokenizer = ZenzTokenizer()
42 |             let lmBase = EfficientNGram(baseFilename: self.lmPattern, n: self.n, d: 0.75, tokenizer: tokenizer)
43 |             let lmPerson = if let anotherLMPattern {
44 |                 EfficientNGram(baseFilename: anotherLMPattern, n: self.n, d: 0.75, tokenizer: tokenizer)
45 |             } else {
46 |                 lmBase
47 |             }
48 |             let (generatedText, elapsedTime) = measureExecutionTime {
49 |                 generateText(
50 |                     inputText: self.prompt,
51 |                     mixAlpha: self.alpha,
52 |                     lmBase: lmBase,
53 |                     lmPerson: lmPerson,
54 |                     tokenizer: tokenizer,
55 |                     maxCount: self.length
56 |                 )
57 |             }
58 |             print("\(bold: "Generated"): \(generatedText)")
59 |             print("\(bold: "Execution Time"): \(elapsedTime) ms")
60 |         }
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/NGramCommands/NGramCommand.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import ArgumentParser
 3 | 
 4 | extension Subcommands {
 5 |     struct NGram: ParsableCommand {
 6 |         static let configuration = CommandConfiguration(
 7 |             commandName: "ngram",
 8 |             abstract: "Use EfficientNGram Implementation",
 9 |             subcommands: [Self.Train.self, Self.Inference.self]
10 |         )
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/NGramCommands/TrainCommand.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import EfficientNGram
 3 | import ArgumentParser
 4 | 
 5 | extension Subcommands.NGram {
 6 |     struct Train: ParsableCommand {
 7 |         @Argument(help: "学習テキストデータのfilename")
 8 |         var target: String = ""
 9 | 
10 |         @Option(name: [.customLong("output_dir"), .customShort("o")], help: "The directory for output lm data.")
11 |         var outputDirectory: String = "./"
12 | 
13 |         @Option(name: [.customShort("n")], help: "n-gram's n")
14 |         var n: Int = 5
15 | 
16 |         @Option(name: [.customLong("resume")], help: "Resume from these lm data")
17 |         var resumeFilePattern: String?
18 | 
19 |         static let configuration = CommandConfiguration(
20 |             commandName: "train",
21 |             abstract: "Train ngram and write the data"
22 |         )
23 | 
24 |         mutating func run() throws {
25 |             let pattern = URL(fileURLWithPath: self.outputDirectory).path() + "lm_"
26 |             print("Saving for \(pattern)")
27 |             trainNGramFromFile(
28 |                 filePath: self.target,
29 |                 n: self.n,
30 |                 baseFilePattern: "lm",
31 |                 outputDir: self.outputDirectory,
32 |                 resumeFilePattern: self.resumeFilePattern
33 |             )
34 |         }
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/RunCommand.swift:
--------------------------------------------------------------------------------
  1 | import KanaKanjiConverterModuleWithDefaultDictionary
  2 | import ArgumentParser
  3 | import Foundation
  4 | 
  5 | extension Subcommands {
  6 |     struct Run: AsyncParsableCommand {
  7 |         @Argument(help: "ひらがなで表記された入力")
  8 |         var input: String = ""
  9 | 
 10 |         @Option(name: [.customLong("config_n_best")], help: "The parameter n (n best parameter) for internal viterbi search.")
 11 |         var configNBest: Int = 10
 12 |         @Option(name: [.customShort("n"), .customLong("top_n")], help: "Display top n candidates.")
 13 |         var displayTopN: Int = 1
 14 |         @Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
 15 |         var zenzWeightPath: String = ""
 16 |         @Option(name: [.customLong("config_zenzai_inference_limit")], help: "inference limit for zenzai.")
 17 |         var configZenzaiInferenceLimit: Int = .max
 18 |         @Option(name: [.customLong("config_zenzai_base_lm")], help: "Marisa files for Base LM.")
 19 |         var configZenzaiBaseLM: String?
 20 |         @Option(name: [.customLong("config_zenzai_personal_lm")], help: "Marisa files for Personal LM.")
 21 |         var configZenzaiPersonalLM: String?
 22 |         @Option(name: [.customLong("config_zenzai_personalization_alpha")], help: "Strength of personalization (0.5 by default)")
 23 |         var configZenzaiPersonalizationAlpha: Float = 0.5
 24 | 
 25 |         @Flag(name: [.customLong("disable_prediction")], help: "Disable producing prediction candidates.")
 26 |         var disablePrediction = false
 27 | 
 28 |         @Flag(name: [.customLong("only_whole_conversion")], help: "Show only whole conversion (完全一致変換).")
 29 |         var onlyWholeConversion = false
 30 | 
 31 |         @Flag(name: [.customLong("report_score")], help: "Show internal score for the candidate.")
 32 |         var reportScore = false
 33 | 
 34 |         static let configuration = CommandConfiguration(commandName: "run", abstract: "Show help for this utility.")
 35 | 
 36 |         @MainActor mutating func run() async {
 37 |             let converter = KanaKanjiConverter()
 38 |             var composingText = ComposingText()
 39 |             composingText.insertAtCursorPosition(input, inputStyle: .direct)
 40 |             let result = converter.requestCandidates(composingText, options: requestOptions())
 41 |             let mainResults = result.mainResults.filter {
 42 |                 !self.onlyWholeConversion || $0.data.reduce(into: "", {$0.append(contentsOf: $1.ruby)}) == input.toKatakana()
 43 |             }
 44 |             for candidate in mainResults.prefix(self.displayTopN) {
 45 |                 if self.reportScore {
 46 |                     print("\(candidate.text) \(bold: "score:") \(candidate.value)")
 47 |                 } else {
 48 |                     print(candidate.text)
 49 |                 }
 50 |             }
 51 |             if self.onlyWholeConversion {
 52 |                 // entropyを示す
 53 |                 let mean = mainResults.reduce(into: 0) { $0 += Double($1.value) } / Double(mainResults.count)
 54 |                 let expValues = mainResults.map { exp(Double($0.value) - mean) }
 55 |                 let sumOfExpValues = expValues.reduce(into: 0, +=)
 56 |                 // 確率値に補正
 57 |                 let probs = mainResults.map { exp(Double($0.value) - mean) / sumOfExpValues }
 58 |                 let entropy = -probs.reduce(into: 0) { $0 += $1 * log($1) }
 59 |                 print("\(bold: "Entropy:") \(entropy)")
 60 |             }
 61 |         }
 62 | 
 63 |         func requestOptions() -> ConvertRequestOptions {
 64 |             let personalizationMode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode?
 65 |             if let base = self.configZenzaiBaseLM, let personal = self.configZenzaiPersonalLM {
 66 |                 personalizationMode = .init(
 67 |                     baseNgramLanguageModel: base,
 68 |                     personalNgramLanguageModel: personal,
 69 |                     n: 5,
 70 |                     d: 0.75,
 71 |                     alpha: self.configZenzaiPersonalizationAlpha
 72 |                 )
 73 |             } else if self.configZenzaiBaseLM != nil || self.configZenzaiPersonalLM != nil {
 74 |                 fatalError("Both --config_zenzai_base_lm and --config_zenzai_personal_lm must be set")
 75 |             } else {
 76 |                 personalizationMode = nil
 77 |             }
 78 |             var option: ConvertRequestOptions = .withDefaultDictionary(
 79 |                 N_best: self.onlyWholeConversion ? max(self.configNBest, self.displayTopN) : self.configNBest,
 80 |                 requireJapanesePrediction: !self.onlyWholeConversion && !self.disablePrediction,
 81 |                 requireEnglishPrediction: false,
 82 |                 keyboardLanguage: .ja_JP,
 83 |                 typographyLetterCandidate: false,
 84 |                 unicodeCandidate: true,
 85 |                 englishCandidateInRoman2KanaInput: true,
 86 |                 fullWidthRomanCandidate: false,
 87 |                 halfWidthKanaCandidate: false,
 88 |                 learningType: .nothing,
 89 |                 maxMemoryCount: 0,
 90 |                 shouldResetMemory: false,
 91 |                 memoryDirectoryURL: URL(fileURLWithPath: ""),
 92 |                 sharedContainerURL: URL(fileURLWithPath: ""),
 93 |                 zenzaiMode: self.zenzWeightPath.isEmpty ? .off : .on(weight: URL(string: self.zenzWeightPath)!, inferenceLimit: self.configZenzaiInferenceLimit, personalizationMode: personalizationMode),
 94 |                 metadata: .init(versionString: "anco for debugging")
 95 |             )
 96 |             if self.onlyWholeConversion {
 97 |                 option.requestQuery = .完全一致
 98 |             }
 99 |             return option
100 |         }
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/Sources/CliTool/Subcommands/ZenzEvaluateCommand.swift:
--------------------------------------------------------------------------------
 1 | import KanaKanjiConverterModuleWithDefaultDictionary
 2 | import ArgumentParser
 3 | import Foundation
 4 | 
 5 | extension Subcommands {
 6 |     struct ZenzEvaluate: AsyncParsableCommand {
 7 |         @Argument(help: "query, answer, tagを備えたjsonファイルへのパス")
 8 |         var inputFile: String = ""
 9 | 
10 |         @Option(name: [.customLong("output")], help: "Output file path.")
11 |         var outputFilePath: String? = nil
12 |         @Flag(name: [.customLong("stable")], help: "Report only stable properties; timestamps and values will not be reported.")
13 |         var stable: Bool = false
14 |         @Option(name: [.customLong("zenz")], help: "gguf format model weight for zenz.")
15 |         var zenzWeightPath: String = ""
16 | 
17 |         static let configuration = CommandConfiguration(commandName: "zenz_evaluate", abstract: "Evaluate quality of pure zenz's Conversion for input data.")
18 | 
19 |         private func parseInputFile() throws -> [EvaluationInputItem] {
20 |             let url = URL(fileURLWithPath: self.inputFile)
21 |             let data = try Data(contentsOf: url)
22 |             return try JSONDecoder().decode([EvaluationInputItem].self, from: data)
23 |         }
24 | 
25 |         private func greedyDecoding(query: String, leftContext: String?, zenz: Zenz, maxCount: Int) async -> String {
26 |             var leftContext = if let leftContext {
27 |                 "\u{EE02}" + String(leftContext.suffix(40))
28 |             } else {
29 |                 ""
30 |             }
31 |             leftContext = "\u{EE00}\(query)\(leftContext)\u{EE01}"
32 |             return await zenz.pureGreedyDecoding(pureInput: leftContext, maxCount: maxCount)
33 |         }
34 | 
35 |         mutating func run() async throws {
36 |             let inputItems = try parseInputFile()
37 |             let converter = await KanaKanjiConverter()
38 |             var executionTime: Double = 0
39 |             var resultItems: [EvaluateItem] = []
40 | 
41 |             guard let zenz = await converter.getModel(modelURL: URL(string: self.zenzWeightPath)!) else {
42 |                 print("Failed to initialize zenz model")
43 |                 return
44 |             }
45 | 
46 |             for item in inputItems {
47 |                 let start = Date()
48 |                 if item.user_dictionary != nil {
49 |                     print("Warning: zenz_evaluate command does not suppport user dictionary. User Dictionary Contents are just ignored.")
50 |                 }
51 |                 // 変換
52 |                 let result = await self.greedyDecoding(query: item.query, leftContext: item.left_context, zenz: zenz, maxCount: item.answer.map(\.utf8.count).max()!)
53 |                 print("Results:", result)
54 |                 resultItems.append(
55 |                     EvaluateItem(
56 |                         query: item.query,
57 |                         answers: item.answer,
58 |                         left_context: item.left_context,
59 |                         outputs: [
60 |                             EvaluateItemOutput(text: result, score: 0.0)
61 |                         ]
62 |                     )
63 |                 )
64 |                 executionTime += Date().timeIntervalSince(start)
65 |                 await zenz.endSession()
66 |             }
67 |             var result = EvaluateResult(n_best: 1, execution_time: executionTime, items: resultItems)
68 |             if stable {
69 |                 result.execution_time = 0
70 |                 result.timestamp = 0
71 |                 result.items.mutatingForeach {
72 |                     $0.outputs.mutatingForeach {
73 |                         $0.score = Double(Int($0.score))
74 |                     }
75 |                 }
76 |             }
77 |             let encoder = JSONEncoder()
78 |             encoder.outputFormatting = [.prettyPrinted, .sortedKeys]
79 |             let json = try encoder.encode(result)
80 | 
81 |             if let outputFilePath {
82 |                 try json.write(to: URL(fileURLWithPath: outputFilePath))
83 |             } else {
84 |                 let string = String(data: json, encoding: .utf8)!
85 |                 print(string)
86 |             }
87 |         }
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/Sources/EfficientNGram/Tokenizer.swift:
--------------------------------------------------------------------------------
 1 | import Tokenizers
 2 | import Hub
 3 | import Foundation
 4 | 
 5 | public struct ZenzTokenizer {
 6 |     private let tokenizer: any Tokenizer
 7 |     public init() {
 8 |         let modelFolder = Bundle.module.resourceURL!.appendingPathComponent("tokenizer", isDirectory: true)
 9 |         let hubApi = HubApi.shared
10 |         let tokenizerConfig = try! hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer_config.json"))
11 |         let tokenizerData = try! hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer.json"))
12 |         let tokenizer = try! AutoTokenizer.from(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData)
13 |         self.tokenizer = tokenizer
14 |     }
15 |     func encode(text: String) -> [Int] {
16 |         return self.tokenizer.encode(text: text)
17 |     }
18 |     func decode(tokens: [Int]) -> String {
19 |         return self.tokenizer.decode(tokens: tokens)
20 |     }
21 |     var startTokenID: Int {
22 |         self.tokenizer.bosTokenId!
23 |     }
24 |     var endTokenID: Int {
25 |         self.tokenizer.eosTokenId!
26 |     }
27 |     var vocabSize: Int {
28 |         // FIXME
29 |         6000
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/Sources/EfficientNGram/tokenizer/README.md:
--------------------------------------------------------------------------------
1 | # tokenizer
2 | 
3 | This tokenizer data is from [ku-nlp/gpt2-small-japanese-char](https://huggingface.co/ku-nlp/gpt2-small-japanese-char), following CC BY-SA 4.0 License.


--------------------------------------------------------------------------------
/Sources/EfficientNGram/tokenizer/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "ku-nlp/gpt2-small-japanese-char",
 3 |   "activation_function": "gelu_new",
 4 |   "architectures": [
 5 |     "GPT2LMHeadModel"
 6 |   ],
 7 |   "attn_pdrop": 0.1,
 8 |   "bos_token_id": 1,
 9 |   "embd_pdrop": 0.1,
10 |   "eos_token_id": 2,
11 |   "initializer_range": 0.02,
12 |   "layer_norm_epsilon": 1e-05,
13 |   "model_type": "gpt2",
14 |   "n_embd": 768,
15 |   "n_ctx": 1024,
16 |   "n_head": 12,
17 |   "n_inner": null,
18 |   "n_layer": 12,
19 |   "n_positions": 1024,
20 |   "pad_token_id": 1,
21 |   "reorder_and_upcast_attn": false,
22 |   "resid_pdrop": 0.1,
23 |   "scale_attn_by_inverse_layer_idx": false,
24 |   "scale_attn_weights": true,
25 |   "summary_activation": null,
26 |   "summary_first_dropout": 0.1,
27 |   "summary_proj_to_labels": true,
28 |   "summary_type": "cls_index",
29 |   "summary_use_proj": true,
30 |   "torch_dtype": "float32",
31 |   "transformers_version": "4.30.0",
32 |   "use_cache": true,
33 |   "vocab_size": 6000
34 | }
35 | 


--------------------------------------------------------------------------------
/Sources/EfficientNGram/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bos_token": {
 3 |     "content": "<s>",
 4 |     "lstrip": false,
 5 |     "normalized": true,
 6 |     "rstrip": false,
 7 |     "single_word": false
 8 |   },
 9 |   "eos_token": {
10 |     "content": "</s>",
11 |     "lstrip": false,
12 |     "normalized": true,
13 |     "rstrip": false,
14 |     "single_word": false
15 |   },
16 |   "pad_token": {
17 |     "content": "[PAD]",
18 |     "lstrip": false,
19 |     "normalized": true,
20 |     "rstrip": false,
21 |     "single_word": false
22 |   },
23 |   "unk_token": {
24 |     "content": "[UNK]",
25 |     "lstrip": false,
26 |     "normalized": true,
27 |     "rstrip": false,
28 |     "single_word": false
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/Sources/EfficientNGram/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "add_bos_token": false,
 3 |   "add_prefix_space": false,
 4 |   "added_tokens_decoder": {
 5 |     "0": {
 6 |       "content": "[UNK]",
 7 |       "lstrip": false,
 8 |       "normalized": true,
 9 |       "rstrip": false,
10 |       "single_word": false,
11 |       "special": true
12 |     },
13 |     "1": {
14 |       "content": "[PAD]",
15 |       "lstrip": false,
16 |       "normalized": true,
17 |       "rstrip": false,
18 |       "single_word": false,
19 |       "special": true
20 |     },
21 |     "2": {
22 |       "content": "<s>",
23 |       "lstrip": false,
24 |       "normalized": true,
25 |       "rstrip": false,
26 |       "single_word": false,
27 |       "special": true
28 |     },
29 |     "3": {
30 |       "content": "</s>",
31 |       "lstrip": false,
32 |       "normalized": true,
33 |       "rstrip": false,
34 |       "single_word": false,
35 |       "special": true
36 |     }
37 |   },
38 |   "bos_token": "<s>",
39 |   "clean_up_tokenization_spaces": true,
40 |   "eos_token": "</s>",
41 |   "errors": "replace",
42 |   "extra_special_tokens": {},
43 |   "model_max_length": 1000000000000000019884624838656,
44 |   "pad_token": "[PAD]",
45 |   "tokenizer_class": "GPT2Tokenizer",
46 |   "unk_token": "[UNK]"
47 | }
48 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/CIDData.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  CIDData.swift
 3 | //  azooKey
 4 | //
 5 | //  Created by ensan on 2022/05/05.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | public enum CIDData: Sendable {
12 |     static var totalCount: Int {
13 |         1319
14 |     }
15 |     case BOS
16 |     case 記号
17 |     case 係助詞ハ
18 |     case 助動詞デス基本形
19 |     case 一般名詞
20 |     case 固有名詞
21 |     case 人名一般
22 |     case 人名姓
23 |     case 人名名
24 |     case 固有名詞組織
25 |     case 地名一般
26 |     case 数
27 |     case EOS
28 |     public var cid: Int {
29 |         switch self {
30 |         case .BOS: 0
31 |         case .記号: 5
32 |         case .係助詞ハ: 261
33 |         case .助動詞デス基本形: 460
34 |         case .一般名詞: 1285
35 |         case .固有名詞: 1288
36 |         case .人名一般: 1289
37 |         case .人名姓: 1290
38 |         case .人名名: 1291
39 |         case .固有名詞組織: 1292
40 |         case .地名一般: 1293
41 |         case .数: 1295
42 |         case .EOS: 1316
43 |         }
44 |     }
45 | 
46 |     public static func isJoshi(cid: Int) -> Bool {
47 |         return 147 <= cid && cid <= 368
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Candidate.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  Candidate.swift
  3 | //  Keyboard
  4 | //
  5 | //  Created by ensan on 2020/10/26.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | 
 11 | /// Data of clause.
 12 | final class ClauseDataUnit {
 13 |     /// The MID of the clause.
 14 |     var mid: Int = MIDData.EOS.mid
 15 |     /// The LCID in the next clause.
 16 |     var nextLcid = CIDData.EOS.cid
 17 |     /// The text of the unit.
 18 |     var text: String = ""
 19 |     /// The range of the unit in input text.
 20 |     var inputRange: Range<Int> = 0 ..< 0
 21 | 
 22 |     /// Merge the given unit to this unit.
 23 |     /// - Parameter:
 24 |     ///   - unit: The unit to merge.
 25 |     func merge(with unit: ClauseDataUnit) {
 26 |         self.text.append(unit.text)
 27 |         self.inputRange = self.inputRange.startIndex ..< unit.inputRange.endIndex
 28 |         self.nextLcid = unit.nextLcid
 29 |     }
 30 | }
 31 | 
 32 | extension ClauseDataUnit: Equatable {
 33 |     static func == (lhs: ClauseDataUnit, rhs: ClauseDataUnit) -> Bool {
 34 |         lhs.mid == rhs.mid && lhs.nextLcid == rhs.nextLcid && lhs.text == rhs.text && lhs.inputRange == rhs.inputRange
 35 |     }
 36 | }
 37 | 
 38 | #if DEBUG
 39 | extension ClauseDataUnit: CustomDebugStringConvertible {
 40 |     var debugDescription: String {
 41 |         "ClauseDataUnit(mid: \(mid), nextLcid: \(nextLcid), text: \(text), inputRange: \(inputRange))"
 42 |     }
 43 | }
 44 | #endif
 45 | 
 46 | struct CandidateData {
 47 |     typealias ClausesUnit = (clause: ClauseDataUnit, value: PValue)
 48 |     var clauses: [ClausesUnit]
 49 |     var data: [DicdataElement]
 50 | 
 51 |     init(clauses: [ClausesUnit], data: [DicdataElement]) {
 52 |         self.clauses = clauses
 53 |         self.data = data
 54 |     }
 55 | 
 56 |     var lastClause: ClauseDataUnit? {
 57 |         self.clauses.last?.clause
 58 |     }
 59 | 
 60 |     var isEmpty: Bool {
 61 |         clauses.isEmpty
 62 |     }
 63 | }
 64 | 
 65 | public enum CompleteAction: Equatable, Sendable {
 66 |     /// カーソルを調整する
 67 |     case moveCursor(Int)
 68 | }
 69 | 
 70 | /// 変換候補のデータ
 71 | public struct Candidate: Sendable {
 72 |     /// 入力となるテキスト
 73 |     public var text: String
 74 |     /// 評価値
 75 |     public var value: PValue
 76 |     /// composingText.inputにおいて対応する文字数。
 77 |     public var correspondingCount: Int
 78 |     /// 最後のmid(予測変換に利用)
 79 |     public var lastMid: Int
 80 |     /// DicdataElement列
 81 |     public var data: [DicdataElement]
 82 |     /// 変換として選択した際に実行する`action`。
 83 |     /// - note: 括弧を入力した際にカーソルを移動するために追加した変数
 84 |     public var actions: [CompleteAction]
 85 |     /// 入力できるものか
 86 |     /// - note: 文字数表示のために追加したフラグ
 87 |     public let inputable: Bool
 88 | 
 89 |     public init(text: String, value: PValue, correspondingCount: Int, lastMid: Int, data: [DicdataElement], actions: [CompleteAction] = [], inputable: Bool = true) {
 90 |         self.text = text
 91 |         self.value = value
 92 |         self.correspondingCount = correspondingCount
 93 |         self.lastMid = lastMid
 94 |         self.data = data
 95 |         self.actions = actions
 96 |         self.inputable = inputable
 97 |     }
 98 |     /// 後から`action`を追加した形を生成する関数
 99 |     /// - parameters:
100 |     ///  - actions: 実行する`action`
101 |     @inlinable public mutating func withActions(_ actions: [CompleteAction]) {
102 |         self.actions = actions
103 |     }
104 | 
105 |     private static let dateExpression = "<date format=\".*?\" type=\".*?\" language=\".*?\" delta=\".*?\" deltaunit=\".*?\">"
106 |     private static let randomExpression = "<random type=\".*?\" value=\".*?\">"
107 | 
108 |     /// テンプレートをパースして、変換候補のテキストを生成する。
109 |     public static func parseTemplate(_ text: String) -> String {
110 |         var newText = text
111 |         while let range = newText.range(of: Self.dateExpression, options: .regularExpression) {
112 |             let templateString = String(newText[range])
113 |             let template = DateTemplateLiteral.import(from: templateString)
114 |             let value = template.previewString()
115 |             newText.replaceSubrange(range, with: value)
116 |         }
117 |         while let range = newText.range(of: Self.randomExpression, options: .regularExpression) {
118 |             let templateString = String(newText[range])
119 |             let template = RandomTemplateLiteral.import(from: templateString)
120 |             let value = template.previewString()
121 |             newText.replaceSubrange(range, with: value)
122 |         }
123 |         return newText
124 |     }
125 | 
126 |     /// テンプレートをパースして、変換候補のテキストを生成し、反映する。
127 |     @inlinable public mutating func parseTemplate() {
128 |         // ここでCandidate.textとdata.map(\.word).join("")の整合性が壊れることに注意
129 |         // ただし、dataの方を加工するのは望ましい挙動ではない。
130 |         self.text = Self.parseTemplate(text)
131 |     }
132 | 
133 |     /// 入力を文としたとき、prefixになる文節に対応するCandidateを作る
134 |     public static func makePrefixClauseCandidate(data: some Collection<DicdataElement>) -> Candidate {
135 |         var text = ""
136 |         var correspondingCount = 0
137 |         var lastRcid = CIDData.BOS.cid
138 |         var lastMid = 501
139 |         var candidateData: [DicdataElement] = []
140 |         for item in data {
141 |             // 文節だったら
142 |             if DicdataStore.isClause(lastRcid, item.lcid) {
143 |                 break
144 |             }
145 |             text.append(item.word)
146 |             correspondingCount += item.ruby.count
147 |             lastRcid = item.rcid
148 |             // 最初だった場合を想定している
149 |             if item.mid != 500 && DicdataStore.includeMMValueCalculation(item) {
150 |                 lastMid = item.mid
151 |             }
152 |             candidateData.append(item)
153 |         }
154 |         return Candidate(
155 |             text: text,
156 |             value: -5,
157 |             correspondingCount: correspondingCount,
158 |             lastMid: lastMid,
159 |             data: candidateData
160 |         )
161 |     }
162 | 
163 | }
164 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/CommaSeparatedNumber.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | extension KanaKanjiConverter {
 4 |     func commaSeparatedNumberCandidates(_ inputData: ComposingText) -> [Candidate] {
 5 |         var text = inputData.convertTarget
 6 |         guard !text.isEmpty else { return [] }
 7 | 
 8 |         var negative = false
 9 |         if text.first == "-" {
10 |             negative = true
11 |             text.removeFirst()
12 |         }
13 |         let parts = text.split(separator: ".", omittingEmptySubsequences: false)
14 |         guard parts.count <= 2,
15 |               parts.allSatisfy({ !$0.isEmpty && $0.allSatisfy({ $0.isNumber && $0.isASCII }) }) else {
16 |             return []
17 |         }
18 |         let integerPart = parts[0]
19 |         guard integerPart.count > 3 else { return [] }
20 | 
21 |         var reversed = Array(integerPart.reversed())
22 |         var formatted = ""
23 |         for (i, ch) in reversed.enumerated() {
24 |             if i > 0 && i % 3 == 0 {
25 |                 formatted.append(",")
26 |             }
27 |             formatted.append(ch)
28 |         }
29 |         let integerString = String(formatted.reversed())
30 |         var result = (negative ? "-" : "") + integerString
31 |         if parts.count == 2 {
32 |             let fractional = parts[1]
33 |             result += "." + fractional
34 |         }
35 | 
36 |         let ruby = inputData.convertTarget.toKatakana()
37 |         let candidate = Candidate(
38 |             text: result,
39 |             value: -10,
40 |             correspondingCount: inputData.input.count,
41 |             lastMid: MIDData.一般.mid,
42 |             data: [DicdataElement(word: result, ruby: ruby, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
43 |         )
44 |         return [candidate]
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/ConverisonResult.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ConversionResult.swift
 3 | //
 4 | //
 5 | //  Created by miwa on 2023/08/31.
 6 | //
 7 | 
 8 | public struct ConversionResult: Sendable {
 9 |     /// 変換候補欄にこのままの順で並べることのできる候補
10 |     public var mainResults: [Candidate]
11 |     /// 変換候補のうち最初の文節を変換したもの
12 |     public var firstClauseResults: [Candidate]
13 | }
14 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/EmailAddress.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | extension KanaKanjiConverter {
 4 |     private static let domains = [
 5 |         "@gmail.com",
 6 |         "@icloud.com",
 7 |         "@yahoo.co.jp",
 8 |         "@au.com",
 9 |         "@docomo.ne.jp",
10 |         "@excite.co.jp",
11 |         "@ezweb.ne.jp",
12 |         "@googlemail.com",
13 |         "@hotmail.co.jp",
14 |         "@hotmail.com",
15 |         "@i.softbank.jp",
16 |         "@live.jp",
17 |         "@me.com",
18 |         "@mineo.jp",
19 |         "@nifty.com",
20 |         "@outlook.com",
21 |         "@outlook.jp",
22 |         "@softbank.ne.jp",
23 |         "@yahoo.ne.jp",
24 |         "@ybb.ne.jp",
25 |         "@ymobile.ne.jp"
26 |     ]
27 |     /// 入力が@で終わる場合に、メアドのような候補を追加する関数
28 |     /// - parameters:
29 |     func toEmailAddressCandidates(_ inputData: ComposingText) -> [Candidate] {
30 |         guard let atIndex = inputData.convertTarget.lastIndex(of: "@") else {
31 |             return []
32 |         }
33 |         let id = inputData.convertTarget[..<atIndex]
34 |         let domainPrefix = inputData.convertTarget[inputData.convertTarget.index(after: atIndex)...]
35 |         if !(id.isEnglishSentence || id.isEmpty) {
36 |             return []
37 |         }
38 |         let baseValue: PValue = id.isEmpty ? -20 : -13
39 |         let string = inputData.convertTarget.toKatakana()
40 |         var results: [Candidate] = []
41 |         for (i, domain) in Self.domains.enumerated() {
42 |             if domain.hasPrefix("@\(domainPrefix)") {
43 |                 let address = id.appending(domain)
44 |                 results.append(
45 |                     Candidate(
46 |                         text: address,
47 |                         value: baseValue - PValue(i),
48 |                         correspondingCount: inputData.input.count,
49 |                         lastMid: MIDData.一般.mid,
50 |                         data: [DicdataElement(word: address, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: baseValue - PValue(i))]
51 |                     )
52 |                 )
53 |             }
54 |         }
55 |         return results
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/SpecialCandidateProvider.swift:
--------------------------------------------------------------------------------
 1 | public protocol SpecialCandidateProvider: Sendable {
 2 |     @MainActor
 3 |     func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate]
 4 | }
 5 | 
 6 | public struct CalendarSpecialCandidateProvider: SpecialCandidateProvider {
 7 |     public init() {}
 8 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
 9 |         converter.toWarekiCandidates(inputData) + converter.toSeirekiCandidates(inputData)
10 |     }
11 | }
12 | 
13 | public struct EmailAddressSpecialCandidateProvider: SpecialCandidateProvider {
14 |     public init() {}
15 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
16 |         converter.toEmailAddressCandidates(inputData)
17 |     }
18 | }
19 | 
20 | public struct TypographySpecialCandidateProvider: SpecialCandidateProvider {
21 |     public init() {}
22 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
23 |         converter.typographicalCandidates(inputData)
24 |     }
25 | }
26 | 
27 | public struct UnicodeSpecialCandidateProvider: SpecialCandidateProvider {
28 |     public init() {}
29 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
30 |         converter.unicodeCandidates(inputData)
31 |     }
32 | }
33 | 
34 | public struct VersionSpecialCandidateProvider: SpecialCandidateProvider {
35 |     public init() {}
36 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
37 |         converter.toVersionCandidate(inputData, options: options)
38 |     }
39 | }
40 | 
41 | public struct TimeExpressionSpecialCandidateProvider: SpecialCandidateProvider {
42 |     public init() {}
43 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
44 |         converter.convertToTimeExpression(inputData)
45 |     }
46 | }
47 | 
48 | public struct CommaSeparatedNumberSpecialCandidateProvider: SpecialCandidateProvider {
49 |     public init() {}
50 |     @MainActor public func provideCandidates(converter: KanaKanjiConverter, inputData: ComposingText, options _: ConvertRequestOptions) -> [Candidate] {
51 |         converter.commaSeparatedNumberCandidates(inputData)
52 |     }
53 | }
54 | 
55 | public extension SpecialCandidateProvider where Self == CalendarSpecialCandidateProvider {
56 |     static var calendar: Self { .init() }
57 | }
58 | 
59 | public extension SpecialCandidateProvider where Self == EmailAddressSpecialCandidateProvider {
60 |     static var emailAddress: Self { .init() }
61 | }
62 | 
63 | public extension SpecialCandidateProvider where Self == TypographySpecialCandidateProvider {
64 |     static var typography: Self { .init() }
65 | }
66 | 
67 | public extension SpecialCandidateProvider where Self == UnicodeSpecialCandidateProvider {
68 |     static var unicode: Self { .init() }
69 | }
70 | 
71 | public extension SpecialCandidateProvider where Self == VersionSpecialCandidateProvider {
72 |     static var version: Self { .init() }
73 | }
74 | 
75 | public extension SpecialCandidateProvider where Self == TimeExpressionSpecialCandidateProvider {
76 |     static var timeExpression: Self { .init() }
77 | }
78 | 
79 | public extension SpecialCandidateProvider where Self == CommaSeparatedNumberSpecialCandidateProvider {
80 |     static var commaSeparatedNumber: Self { .init() }
81 | }
82 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/SpellChecker.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  SpellChecker.swift
 3 | //
 4 | //
 5 | //  Created by ensan on 2023/05/20.
 6 | //
 7 | 
 8 | import Foundation
 9 | #if os(iOS) || os(tvOS) || os(visionOS)
10 | import UIKit
11 | #elseif os(macOS)
12 | import AppKit
13 | #endif
14 | 
15 | @MainActor final class SpellChecker {
16 |     #if os(iOS) || os(tvOS) || os(visionOS)
17 |     private let checker = UITextChecker()
18 |     #elseif os(macOS)
19 |     private let checker = NSSpellChecker.shared
20 |     #endif
21 | 
22 |     func completions(forPartialWordRange range: NSRange, in string: String, language: String) -> [String]? {
23 |         #if os(iOS) || os(tvOS) || os(visionOS)
24 |         return checker.completions(forPartialWordRange: range, in: string, language: language)
25 |         #elseif os(macOS)
26 |         return checker.completions(forPartialWordRange: range, in: string, language: language, inSpellDocumentWithTag: 0)
27 |         #else
28 |         return nil
29 |         #endif
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/TimeExpression.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | 
 3 | extension KanaKanjiConverter {
 4 |     func convertToTimeExpression(_ inputData: ComposingText) -> [Candidate] {
 5 |         var candidates: [Candidate] = []
 6 |         let numberString = inputData.convertTarget
 7 | 
 8 |         // Check if all chars are digit.
 9 |         if numberString.contains(where: { !($0.isNumber && $0.isASCII) }) {
10 |             return []
11 |         }
12 |         if numberString.count == 3 {
13 |             let firstDigit = Int(numberString.prefix(1))!
14 |             let lastTwoDigits = Int(numberString.suffix(2))!
15 |             if (0...9).contains(firstDigit) && (0...59).contains(lastTwoDigits) {
16 |                 let timeExpression = "\(firstDigit):\(String(format: "%02d", lastTwoDigits))"
17 |                 let candidate = Candidate(
18 |                     text: timeExpression,
19 |                     value: -10,
20 |                     correspondingCount: numberString.count,
21 |                     lastMid: MIDData.一般.mid,
22 |                     data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
23 |                 )
24 |                 candidates.append(candidate)
25 |             }
26 |         } else if numberString.count == 4 {
27 |             let firstTwoDigits = Int(numberString.prefix(2))!
28 |             let lastTwoDigits = Int(numberString.suffix(2))!
29 |             if (0...24).contains(firstTwoDigits) && (0...59).contains(lastTwoDigits) {
30 |                 let timeExpression = "\(String(format: "%02d", firstTwoDigits)):\(String(format: "%02d", lastTwoDigits))"
31 |                 let candidate = Candidate(
32 |                     text: timeExpression,
33 |                     value: -10,
34 |                     correspondingCount: numberString.count,
35 |                     lastMid: MIDData.一般.mid,
36 |                     data: [DicdataElement(word: timeExpression, ruby: numberString, cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)]
37 |                 )
38 |                 candidates.append(candidate)
39 |             }
40 |         }
41 |         return candidates
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/Unicode.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  Unicode.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2020/11/04.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | extension KanaKanjiConverter {
12 |     /// unicode文字列`"uxxxx, Uxxxx, u+xxxx, U+xxxx"`を対応する記号に変換する関数
13 |     /// - parameters:
14 |     func unicodeCandidates(_ inputData: ComposingText) -> [Candidate] {
15 |         let value0: PValue = -10
16 |         let string = inputData.convertTarget.toKatakana()
17 |         for prefix in ["u", "U", "u+", "U+"] where string.hasPrefix(prefix) {
18 |             if let number = Int(string.dropFirst(prefix.count), radix: 16), let unicodeScalar = UnicodeScalar(number) {
19 |                 let char = String(unicodeScalar)
20 |                 return [
21 |                     Candidate(
22 |                         text: char,
23 |                         value: value0,
24 |                         correspondingCount: inputData.input.count,
25 |                         lastMid: MIDData.一般.mid,
26 |                         data: [DicdataElement(word: char, ruby: string, cid: .zero, mid: MIDData.一般.mid, value: value0)]
27 |                     )
28 |                 ]
29 |             }
30 |         }
31 |         return []
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Converter/VersionCandidate.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  VersionCandidate.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by N-i-ke on 2023/05/13.
 6 | //  Copyright © 2023 ensan All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | extension KanaKanjiConverter {
12 | 
13 |     /// バージョン情報を表示する関数。
14 |     /// Mozcは「バージョン」で言語モデルのバージョンが表示されるらしいので、azooKeyもこれをつけて「azooKey 1.7.2」とか表示させよう。
15 |     /// - parameters:
16 |     ///  - inputData: 入力情報。
17 |     func toVersionCandidate(_ inputData: ComposingText, options: ConvertRequestOptions) -> [Candidate] {
18 |         if inputData.convertTarget.toKatakana() == "バージョン", let versionString = options.metadata?.versionString {
19 |             return [Candidate(
20 |                 text: versionString,
21 |                 value: -30,
22 |                 correspondingCount: inputData.input.count,
23 |                 lastMid: MIDData.一般.mid,
24 |                 data: [DicdataElement(word: versionString, ruby: inputData.convertTarget.toKatakana(), cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -30)]
25 |             )]
26 |         }
27 |         return []
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/DicdataStore/DicdataElement.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  DicdataElement.swift
  3 | //  Keyboard
  4 | //
  5 | //  Created by ensan on 2020/09/10.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | 
 11 | public struct DicdataElement: Equatable, Hashable, Sendable {
 12 |     static let BOSData = Self(word: "", ruby: "", cid: CIDData.BOS.cid, mid: MIDData.BOS.mid, value: 0, adjust: 0)
 13 |     static let EOSData = Self(word: "", ruby: "", cid: CIDData.EOS.cid, mid: MIDData.EOS.mid, value: 0, adjust: 0)
 14 | 
 15 |     public init(word: String, ruby: String, lcid: Int, rcid: Int, mid: Int, value: PValue, adjust: PValue = .zero, metadata: DicdataElementMetadata = .empty) {
 16 |         self.word = word
 17 |         self.ruby = ruby
 18 |         self.lcid = lcid
 19 |         self.rcid = rcid
 20 |         self.mid = mid
 21 |         self.baseValue = value
 22 |         self.adjust = adjust
 23 |         self.metadata = metadata
 24 |     }
 25 | 
 26 |     public init(word: String, ruby: String, cid: Int, mid: Int, value: PValue, adjust: PValue = .zero, metadata: DicdataElementMetadata = .empty) {
 27 |         self.word = word
 28 |         self.ruby = ruby
 29 |         self.lcid = cid
 30 |         self.rcid = cid
 31 |         self.mid = mid
 32 |         self.baseValue = value
 33 |         self.adjust = adjust
 34 |         self.metadata = metadata
 35 |     }
 36 | 
 37 |     public init(ruby: String, cid: Int, mid: Int, value: PValue, adjust: PValue = .zero, metadata: DicdataElementMetadata = .empty) {
 38 |         self.word = ruby
 39 |         self.ruby = ruby
 40 |         self.lcid = cid
 41 |         self.rcid = cid
 42 |         self.mid = mid
 43 |         self.baseValue = value
 44 |         self.adjust = adjust
 45 |         self.metadata = metadata
 46 |     }
 47 | 
 48 |     public consuming func adjustedData(_ adjustValue: PValue) -> Self {
 49 |         self.adjust += adjustValue
 50 |         return self
 51 |     }
 52 | 
 53 |     public var word: String
 54 |     public var ruby: String
 55 |     public var lcid: Int
 56 |     public var rcid: Int
 57 |     public var mid: Int
 58 |     var baseValue: PValue
 59 |     public var adjust: PValue
 60 |     public var metadata: DicdataElementMetadata
 61 | 
 62 |     public func value() -> PValue {
 63 |         min(.zero, self.baseValue + self.adjust)
 64 |     }
 65 | 
 66 |     public static func == (lhs: Self, rhs: Self) -> Bool {
 67 |         lhs.word == rhs.word && lhs.ruby == rhs.ruby && lhs.lcid == rhs.lcid && lhs.mid == rhs.mid && lhs.rcid == rhs.rcid && lhs.metadata == rhs.metadata
 68 |     }
 69 | 
 70 |     public func hash(into hasher: inout Hasher) {
 71 |         hasher.combine(word)
 72 |         hasher.combine(ruby)
 73 |         hasher.combine(lcid)
 74 |         hasher.combine(rcid)
 75 |         hasher.combine(metadata)
 76 |     }
 77 | }
 78 | 
 79 | extension DicdataElement: CustomDebugStringConvertible {
 80 |     public var debugDescription: String {
 81 |         "("
 82 |         + "ruby: \(self.ruby), "
 83 |         + "word: \(self.word), "
 84 |         + "cid: (\(self.lcid), \(self.rcid)), "
 85 |         + "mid: \(self.mid), "
 86 |         + "value: \(self.baseValue)+\(self.adjust)=\(self.value()), "
 87 |         + "metadata: ("
 88 |         + "isLearned: \(self.metadata.contains(.isLearned)), "
 89 |         + "isFromUserDictionary: \(self.metadata.contains(.isFromUserDictionary))"
 90 |         + ")"
 91 |         + ")"
 92 |     }
 93 | }
 94 | 
 95 | public struct DicdataElementMetadata: OptionSet, Sendable, Hashable, Equatable {
 96 |     public let rawValue: UInt32
 97 |     public init(rawValue: UInt32) {
 98 |         self.rawValue = rawValue
 99 |     }
100 | 
101 |     public static let empty: Self = []
102 |     /// 学習データから得られた候補にはこのフラグを立てる
103 |     public static let isLearned = DicdataElementMetadata(rawValue: 1 << 0) // 1
104 |     /// ユーザ辞書から得られた候補にはこのフラグを立てる
105 |     public static let isFromUserDictionary = DicdataElementMetadata(rawValue: 1 << 1) // 2
106 | }
107 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/Kana2Kanji.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  kana2kanji.swift
 3 | //  Kana2KajiProject
 4 | //
 5 | //  Created by ensan on 2020/09/02.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | #if os(iOS) || os(tvOS)
12 | public typealias PValue = Float16
13 | #else
14 | public typealias PValue = Float32
15 | #endif
16 | 
17 | struct Kana2Kanji {
18 |     var dicdataStore = DicdataStore()
19 | 
20 |     /// CandidateDataの状態からCandidateに変更する関数
21 |     /// - parameters:
22 |     ///   - data: CandidateData
23 |     /// - returns:
24 |     ///    Candidateとなった値を返す。
25 |     /// - note:
26 |     ///     この関数の役割は意味連接の考慮にある。
27 |     func processClauseCandidate(_ data: CandidateData) -> Candidate {
28 |         let mmValue: (value: PValue, mid: Int) = data.clauses.reduce((value: .zero, mid: MIDData.EOS.mid)) { result, data in
29 |             (
30 |                 value: result.value + self.dicdataStore.getMMValue(result.mid, data.clause.mid),
31 |                 mid: data.clause.mid
32 |             )
33 |         }
34 |         let text = data.clauses.map {$0.clause.text}.joined()
35 |         let value = data.clauses.last!.value + mmValue.value
36 |         let lastMid = data.clauses.last!.clause.mid
37 |         let correspondingCount = data.clauses.reduce(into: 0) {$0 += $1.clause.inputRange.count}
38 |         return Candidate(
39 |             text: text,
40 |             value: value,
41 |             correspondingCount: correspondingCount,
42 |             lastMid: lastMid,
43 |             data: data.data
44 |         )
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/added_last_1_character.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  afterCharacterAdded.swift
  3 | //  Keyboard
  4 | //
  5 | //  Created by ensan on 2020/09/14.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | import SwiftUtils
 11 | 
 12 | extension Kana2Kanji {
 13 |     /// カナを漢字に変換する関数, 最後の一文字を追加した場合。
 14 |     /// - Parameters:
 15 |     ///   - addedCharacter: 追加された文字。
 16 |     ///   - N_best: N_best。
 17 |     ///   - previousResult: 追加される前のデータ。
 18 |     /// - Returns:
 19 |     ///   - 変換候補。
 20 |     /// ### 実装状況
 21 |     /// (0)多用する変数の宣言。
 22 |     ///
 23 |     /// (1)まず、追加された一文字に繋がるノードを列挙する。
 24 |     ///
 25 |     /// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
 26 |     ///
 27 |     /// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接コストを計算しておく。
 28 |     ///
 29 |     /// (4)ノードをアップデートした上で返却する。
 30 |     func kana2lattice_addedLast(_ inputData: ComposingText, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes), needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) {
 31 |         debug("一文字追加。内部文字列は\(inputData.input).\(previousResult.nodes.map {($0.first?.data.ruby, $0.first?.inputRange)})")
 32 |         // (0)
 33 |         var nodes = previousResult.nodes
 34 |         let count = previousResult.inputData.input.count
 35 | 
 36 |         // (1)
 37 |         let addedNodes: [[LatticeNode]] = (0...count).map {(i: Int) in
 38 |             self.dicdataStore.getLOUDSData(inputData: inputData, from: i, to: count, needTypoCorrection: needTypoCorrection)
 39 |         }
 40 | 
 41 |         // ココが一番時間がかかっていた。
 42 |         // (2)
 43 |         for nodeArray in nodes {
 44 |             for node in nodeArray {
 45 |                 if node.prevs.isEmpty {
 46 |                     continue
 47 |                 }
 48 |                 if self.dicdataStore.shouldBeRemoved(data: node.data) {
 49 |                     continue
 50 |                 }
 51 |                 // 変換した文字数
 52 |                 let nextIndex = node.inputRange.endIndex
 53 |                 for nextnode in addedNodes[nextIndex] {
 54 |                     // この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
 55 |                     if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
 56 |                         continue
 57 |                     }
 58 |                     // クラスの連続確率を計算する。
 59 |                     let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
 60 |                     // nodeの持っている全てのprevnodeに対して
 61 |                     for (index, value) in node.values.enumerated() {
 62 |                         let newValue: PValue = ccValue + value
 63 |                         // 追加すべきindexを取得する
 64 |                         let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
 65 |                         if lastindex == N_best {
 66 |                             continue
 67 |                         }
 68 |                         let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
 69 |                         // カウントがオーバーしている場合は除去する
 70 |                         if nextnode.prevs.count >= N_best {
 71 |                             nextnode.prevs.removeLast()
 72 |                         }
 73 |                         // removeしてからinsertした方が速い (insertはO(N)なので)
 74 |                         nextnode.prevs.insert(newnode, at: lastindex)
 75 |                     }
 76 |                 }
 77 |             }
 78 |         }
 79 | 
 80 |         // (3)
 81 |         let result = LatticeNode.EOSNode
 82 |         for (i, nodeArray) in addedNodes.enumerated() {
 83 |             for node in nodeArray {
 84 |                 if node.prevs.isEmpty {
 85 |                     continue
 86 |                 }
 87 |                 // 生起確率を取得する。
 88 |                 let wValue = node.data.value()
 89 |                 if i == 0 {
 90 |                     // valuesを更新する
 91 |                     node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
 92 |                 } else {
 93 |                     // valuesを更新する
 94 |                     node.values = node.prevs.map {$0.totalValue + wValue}
 95 |                 }
 96 |                 // 最後に至るので
 97 |                 for index in node.prevs.indices {
 98 |                     let newnode = node.getRegisteredNode(index, value: node.values[index])
 99 |                     result.prevs.append(newnode)
100 |                 }
101 |             }
102 |         }
103 | 
104 |         // (4)
105 |         for (index, nodeArray) in addedNodes.enumerated() where index < nodes.endIndex {
106 |             nodes[index].append(contentsOf: nodeArray)
107 |         }
108 |         nodes.append(addedNodes.last ?? [])
109 |         return (result: result, nodes: nodes)
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/all.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  all.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2020/09/14.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | import SwiftUtils
11 | 
12 | extension Kana2Kanji {
13 |     /// Latticeを構成する基本単位
14 |     typealias Nodes = [[LatticeNode]]
15 | 
16 |     /// カナを漢字に変換する関数, 前提はなくかな列が与えられた場合。
17 |     /// - Parameters:
18 |     ///   - inputData: 入力データ。
19 |     ///   - N_best: N_best。
20 |     /// - Returns:
21 |     ///   変換候補。
22 |     /// ### 実装状況
23 |     /// (0)多用する変数の宣言。
24 |     ///
25 |     /// (1)まず、追加された一文字に繋がるノードを列挙する。
26 |     ///
27 |     /// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
28 |     ///
29 |     /// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接計算を行っておく。
30 |     ///
31 |     /// (4)ノードをアップデートした上で返却する。
32 |     func kana2lattice_all(_ inputData: ComposingText, N_best: Int, needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) {
33 |         debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)")
34 |         let count: Int = inputData.input.count
35 |         let result: LatticeNode = LatticeNode.EOSNode
36 |         let nodes: [[LatticeNode]] = (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: needTypoCorrection)}
37 |         // 「i文字目から始まるnodes」に対して
38 |         for (i, nodeArray) in nodes.enumerated() {
39 |             // それぞれのnodeに対して
40 |             for node in nodeArray {
41 |                 if node.prevs.isEmpty {
42 |                     continue
43 |                 }
44 |                 if self.dicdataStore.shouldBeRemoved(data: node.data) {
45 |                     continue
46 |                 }
47 |                 // 生起確率を取得する。
48 |                 let wValue: PValue = node.data.value()
49 |                 if i == 0 {
50 |                     // valuesを更新する
51 |                     node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
52 |                 } else {
53 |                     // valuesを更新する
54 |                     node.values = node.prevs.map {$0.totalValue + wValue}
55 |                 }
56 |                 // 変換した文字数
57 |                 let nextIndex: Int = node.inputRange.endIndex
58 |                 // 文字数がcountと等しい場合登録する
59 |                 if nextIndex == count {
60 |                     for index in node.prevs.indices {
61 |                         let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
62 |                         result.prevs.append(newnode)
63 |                     }
64 |                 } else {
65 |                     // nodeの繋がる次にあり得る全てのnextnodeに対して
66 |                     for nextnode in nodes[nextIndex] {
67 |                         // この関数はこの時点で呼び出して、後のnode.registered.isEmptyで最終的に弾くのが良い。
68 |                         if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
69 |                             continue
70 |                         }
71 |                         // クラスの連続確率を計算する。
72 |                         let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
73 |                         // nodeの持っている全てのprevnodeに対して
74 |                         for (index, value) in node.values.enumerated() {
75 |                             let newValue: PValue = ccValue + value
76 |                             // 追加すべきindexを取得する
77 |                             let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
78 |                             if lastindex == N_best {
79 |                                 continue
80 |                             }
81 |                             let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
82 |                             // カウントがオーバーしている場合は除去する
83 |                             if nextnode.prevs.count >= N_best {
84 |                                 nextnode.prevs.removeLast()
85 |                             }
86 |                             // removeしてからinsertした方が速い (insertはO(N)なので)
87 |                             nextnode.prevs.insert(newnode, at: lastindex)
88 |                         }
89 |                     }
90 |                 }
91 |             }
92 |         }
93 |         return (result: result, nodes: nodes)
94 |     }
95 | 
96 | }
97 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/all_with_prefix_constraint.swift:
--------------------------------------------------------------------------------
  1 | import Foundation
  2 | import SwiftUtils
  3 | 
  4 | extension Kana2Kanji {
  5 |     /// カナを漢字に変換する関数, 前提はなくかな列が与えられた場合。
  6 |     /// - Parameters:
  7 |     ///   - inputData: 入力データ。
  8 |     ///   - N_best: N_best。
  9 |     /// - Returns:
 10 |     ///   変換候補。
 11 |     /// ### 実装状況
 12 |     /// (0)多用する変数の宣言。
 13 |     ///
 14 |     /// (1)まず、追加された一文字に繋がるノードを列挙する。
 15 |     ///
 16 |     /// (2)次に、計算済みノードから、(1)で求めたノードにつながるようにregisterして、N_bestを求めていく。
 17 |     ///
 18 |     /// (3)(1)のregisterされた結果をresultノードに追加していく。この際EOSとの連接計算を行っておく。
 19 |     ///
 20 |     /// (4)ノードをアップデートした上で返却する。
 21 |     func kana2lattice_all_with_prefix_constraint(_ inputData: ComposingText, N_best: Int, constraint: PrefixConstraint) -> (result: LatticeNode, nodes: Nodes) {
 22 |         debug("新規に計算を行います。inputされた文字列は\(inputData.input.count)文字分の\(inputData.convertTarget)。制約は\(constraint)")
 23 |         let count: Int = inputData.input.count
 24 |         let result: LatticeNode = LatticeNode.EOSNode
 25 |         let nodes: [[LatticeNode]] = (.zero ..< count).map {dicdataStore.getLOUDSDataInRange(inputData: inputData, from: $0, needTypoCorrection: false)}
 26 |         // 「i文字目から始まるnodes」に対して
 27 |         for (i, nodeArray) in nodes.enumerated() {
 28 |             // それぞれのnodeに対して
 29 |             for node in nodeArray {
 30 |                 if node.prevs.isEmpty {
 31 |                     continue
 32 |                 }
 33 |                 // 生起確率を取得する。
 34 |                 let wValue: PValue = node.data.value()
 35 |                 if i == 0 {
 36 |                     // valuesを更新する
 37 |                     node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
 38 |                 } else {
 39 |                     // valuesを更新する
 40 |                     node.values = node.prevs.map {$0.totalValue + wValue}
 41 |                 }
 42 |                 // 変換した文字数
 43 |                 let nextIndex: Int = node.inputRange.endIndex
 44 |                 // 文字数がcountと等しい場合登録する
 45 |                 if nextIndex == count {
 46 |                     for index in node.prevs.indices {
 47 |                         let newnode: RegisteredNode = node.getRegisteredNode(index, value: node.values[index])
 48 |                         // 学習データやユーザ辞書由来の場合は素通しする
 49 |                         if node.data.metadata.isDisjoint(with: [.isLearned, .isFromUserDictionary]) {
 50 |                             let utf8Text = newnode.getCandidateData().data.reduce(into: []) { $0.append(contentsOf: $1.word.utf8)} + node.data.word.utf8
 51 |                             // 最終チェック
 52 |                             let condition = (!constraint.hasEOS && utf8Text.hasPrefix(constraint.constraint)) || (constraint.hasEOS && utf8Text == constraint.constraint)
 53 |                             guard condition else {
 54 |                                 continue
 55 |                             }
 56 |                         }
 57 |                         result.prevs.append(newnode)
 58 |                     }
 59 |                 } else {
 60 |                     let candidates: [[String.UTF8View.Element]] = node.getCandidateData().map {
 61 |                         Array(($0.data.reduce(into: "") { $0.append(contentsOf: $1.word)} + node.data.word).utf8)
 62 |                     }
 63 |                     // nodeの繋がる次にあり得る全てのnextnodeに対して
 64 |                     for nextnode in nodes[nextIndex] {
 65 |                         // クラスの連続確率を計算する。
 66 |                         let ccValue: PValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
 67 |                         // nodeの持っている全てのprevnodeに対して
 68 |                         for (index, value) in node.values.enumerated() {
 69 |                             // 制約を少なくとも満たしている必要がある
 70 |                             // common prefixが単語か制約のどちらかに一致している必要
 71 |                             // 制約 AB 単語 ABC (OK)
 72 |                             // 制約 AB 単語 A   (OK)
 73 |                             // 制約 AB 単語 AC  (NG)
 74 |                             // ただし、学習データやユーザ辞書由来の場合は素通しする
 75 |                             if nextnode.data.metadata.isDisjoint(with: [.isLearned, .isFromUserDictionary]) {
 76 |                                 let utf8Text = candidates[index] + nextnode.data.word.utf8
 77 |                                 let condition = (!constraint.hasEOS && (utf8Text.hasPrefix(constraint.constraint) || constraint.constraint.hasPrefix(utf8Text))) || (constraint.hasEOS && utf8Text.count < constraint.constraint.count && constraint.constraint.hasPrefix(utf8Text))
 78 |                                 guard condition else {
 79 |                                     continue
 80 |                                 }
 81 |                             }
 82 |                             let newValue: PValue = ccValue + value
 83 |                             // 追加すべきindexを取得する
 84 |                             let lastindex: Int = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
 85 |                             if lastindex == N_best {
 86 |                                 continue
 87 |                             }
 88 |                             let newnode: RegisteredNode = node.getRegisteredNode(index, value: newValue)
 89 |                             // カウントがオーバーしている場合は除去する
 90 |                             if nextnode.prevs.count >= N_best {
 91 |                                 nextnode.prevs.removeLast()
 92 |                             }
 93 |                             // removeしてからinsertした方が速い (insertはO(N)なので)
 94 |                             nextnode.prevs.insert(newnode, at: lastindex)
 95 |                         }
 96 |                     }
 97 |                 }
 98 |             }
 99 |         }
100 |         return (result: result, nodes: nodes)
101 |     }
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/completed_first.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  afterPartlyCompleted.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2020/09/14.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | import SwiftUtils
11 | 
12 | extension Kana2Kanji {
13 |     /// カナを漢字に変換する関数, 部分的に確定した後の場合。
14 |     /// ### 実装方法
15 |     /// (1)まず、計算済みnodeの確定分以降を取り出し、registeredにcompletedDataの値を反映したBOSにする。
16 |     ///
17 |     /// (2)次に、再度計算して良い候補を得る。
18 |     func kana2lattice_afterComplete(_ inputData: ComposingText, completedData: Candidate, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes), needTypoCorrection: Bool) -> (result: LatticeNode, nodes: Nodes) {
19 |         debug("確定直後の変換、前は：", previousResult.inputData, "後は：", inputData)
20 |         let count = inputData.input.count
21 |         // (1)
22 |         let start = RegisteredNode.fromLastCandidate(completedData)
23 |         let nodes: Nodes = previousResult.nodes.suffix(count)
24 |         for (i, nodeArray) in nodes.enumerated() {
25 |             if i == .zero {
26 |                 for node in nodeArray {
27 |                     node.prevs = [start]
28 |                     // inputRangeを確定した部分のカウント分ずらす
29 |                     node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
30 |                 }
31 |             } else {
32 |                 for node in nodeArray {
33 |                     node.prevs = []
34 |                     // inputRangeを確定した部分のカウント分ずらす
35 |                     node.inputRange = node.inputRange.startIndex - completedData.correspondingCount ..< node.inputRange.endIndex - completedData.correspondingCount
36 |                 }
37 |             }
38 |         }
39 |         // (2)
40 |         let result = LatticeNode.EOSNode
41 | 
42 |         for (i, nodeArray) in nodes.enumerated() {
43 |             for node in nodeArray {
44 |                 if node.prevs.isEmpty {
45 |                     continue
46 |                 }
47 |                 if self.dicdataStore.shouldBeRemoved(data: node.data) {
48 |                     continue
49 |                 }
50 |                 // 生起確率を取得する。
51 |                 let wValue = node.data.value()
52 |                 if i == 0 {
53 |                     // valuesを更新する
54 |                     node.values = node.prevs.map {$0.totalValue + wValue + self.dicdataStore.getCCValue($0.data.rcid, node.data.lcid)}
55 |                 } else {
56 |                     // valuesを更新する
57 |                     node.values = node.prevs.map {$0.totalValue + wValue}
58 |                 }
59 |                 // 変換した文字数
60 |                 let nextIndex = node.inputRange.endIndex
61 |                 // 文字数がcountと等しくない場合は先に進む
62 |                 if nextIndex != count {
63 |                     for nextnode in nodes[nextIndex] {
64 |                         if self.dicdataStore.shouldBeRemoved(data: nextnode.data) {
65 |                             continue
66 |                         }
67 |                         // クラスの連続確率を計算する。
68 |                         let ccValue = self.dicdataStore.getCCValue(node.data.rcid, nextnode.data.lcid)
69 |                         // nodeの持っている全てのprevnodeに対して
70 |                         for (index, value) in node.values.enumerated() {
71 |                             let newValue = ccValue + value
72 |                             // 追加すべきindexを取得する
73 |                             let lastindex = (nextnode.prevs.lastIndex(where: {$0.totalValue >= newValue}) ?? -1) + 1
74 |                             if lastindex == N_best {
75 |                                 continue
76 |                             }
77 |                             let newnode = node.getRegisteredNode(index, value: newValue)
78 |                             // カウントがオーバーしている場合は除去する
79 |                             if nextnode.prevs.count >= N_best {
80 |                                 nextnode.prevs.removeLast()
81 |                             }
82 |                             // removeしてからinsertした方が速い (insertはO(N)なので)
83 |                             nextnode.prevs.insert(newnode, at: lastindex)
84 |                         }
85 |                     }
86 |                     // countと等しければ変換が完成したので終了する
87 |                 } else {
88 |                     for index in node.prevs.indices {
89 |                         let newnode = node.getRegisteredNode(index, value: node.values[index])
90 |                         result.prevs.append(newnode)
91 |                     }
92 |                 }
93 |             }
94 | 
95 |         }
96 |         return (result: result, nodes: nodes)
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/deleted_last_n_character.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  afterLastCharacterDeleted.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2020/09/14.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | import SwiftUtils
11 | 
12 | extension Kana2Kanji {
13 | 
14 |     /// カナを漢字に変換する関数, 最後の複数文字を削除した場合。
15 |     /// - Parameters:
16 |     ///   - deletedCount: 消した文字数。
17 |     ///   - N_best: N_best値。
18 |     ///   - previousResult: ひとつ前のデータ。つまり消した文字があった時の変換のデータ。
19 |     /// - Returns:
20 |     ///   発見された候補のリスト。
21 |     ///
22 |     /// ### 実装方法
23 |     /// (1)まず、計算済みノードを捜査して、新しい文末につながるものをresultにregisterしていく。
24 |     ///   N_bestの計算は既にやってあるので不要。
25 |     ///
26 |     /// (2)次に、返却用ノードを計算する。文字数が超過するものはfilterで除去する。
27 | 
28 |     func kana2lattice_deletedLast(deletedCount: Int, N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
29 |         debug("削除の連続性を利用した変換、元の文字は：", previousResult.inputData.convertTarget)
30 |         let count = previousResult.inputData.input.count - deletedCount
31 |         // (1)
32 |         let result = LatticeNode.EOSNode
33 | 
34 |         for nodeArray in previousResult.nodes {
35 |             for node in nodeArray {
36 |                 if node.prevs.isEmpty {
37 |                     continue
38 |                 }
39 |                 if self.dicdataStore.shouldBeRemoved(data: node.data) {
40 |                     continue
41 |                 }
42 |                 let nextIndex = node.inputRange.endIndex
43 |                 if nextIndex == count {
44 |                     // 変換した文字数
45 |                     for (index, value) in node.values.enumerated() {
46 |                         let newnode = node.getRegisteredNode(index, value: value)
47 |                         result.prevs.append(newnode)
48 |                     }
49 |                 }
50 |             }
51 |         }
52 | 
53 |         // (2)
54 |         let updatedNodes = previousResult.nodes.prefix(count).map {(nodeArray: [LatticeNode]) in
55 |             nodeArray.filter {$0.inputRange.endIndex <= count}
56 |         }
57 |         return (result: result, nodes: updatedNodes)
58 |     }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/mid_composition_prediction.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  mid_composition_prediction.swift
  3 | //  AzooKeyKanaKanjiConverter
  4 | //
  5 | //  Created by ensan on 2020/12/09.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | import SwiftUtils
 11 | 
 12 | // 変換中の予測変換に関する実装
 13 | extension Kana2Kanji {
 14 |     /// CandidateDataの状態から予測変換候補を取得する関数
 15 |     /// - parameters:
 16 |     ///   - prepart: CandidateDataで、予測変換候補に至る前の部分。例えば「これはき」の「き」の部分から予測をする場合「これは」の部分がprepart。
 17 |     ///   - lastRuby:
 18 |     ///     「これはき」の「き」の部分
 19 |     ///   - N_best: 取得する数
 20 |     /// - returns:
 21 |     ///    「これはき」から「これは今日」に対応する候補などを作って返す。
 22 |     /// - note:
 23 |     ///     この関数の役割は意味連接の考慮にある。
 24 |     func getPredictionCandidates(composingText: ComposingText, prepart: CandidateData, lastClause: ClauseDataUnit, N_best: Int) -> [Candidate] {
 25 |         debug("getPredictionCandidates", composingText, lastClause.inputRange, lastClause.text)
 26 |         let lastRuby = ComposingText.getConvertTarget(for: composingText.input[lastClause.inputRange]).toKatakana()
 27 |         let lastRubyCount = lastClause.inputRange.count
 28 |         let datas: [DicdataElement]
 29 |         do {
 30 |             var _str = ""
 31 |             let prestring: String = prepart.clauses.reduce(into: "") {$0.append(contentsOf: $1.clause.text)}
 32 |             var count: Int = .zero
 33 |             while true {
 34 |                 if prestring == _str {
 35 |                     break
 36 |                 }
 37 |                 _str += prepart.data[count].word
 38 |                 count += 1
 39 |             }
 40 |             datas = Array(prepart.data.prefix(count))
 41 |         }
 42 | 
 43 |         let osuserdict: [DicdataElement] = dicdataStore.getPrefixMatchDynamicUserDict(lastRuby)
 44 | 
 45 |         let lastCandidate: Candidate = prepart.isEmpty ? Candidate(text: "", value: .zero, correspondingCount: 0, lastMid: MIDData.EOS.mid, data: []) : self.processClauseCandidate(prepart)
 46 |         let lastRcid: Int = lastCandidate.data.last?.rcid ?? CIDData.EOS.cid
 47 |         let nextLcid: Int = prepart.lastClause?.nextLcid ?? CIDData.EOS.cid
 48 |         let lastMid: Int = lastCandidate.lastMid
 49 |         let correspoindingCount: Int = lastCandidate.correspondingCount + lastRubyCount
 50 |         let ignoreCCValue: PValue = self.dicdataStore.getCCValue(lastRcid, nextLcid)
 51 | 
 52 |         let inputStyle = composingText.input.last?.inputStyle ?? .direct
 53 |         let dicdata: [DicdataElement]
 54 |         switch inputStyle {
 55 |         case .direct:
 56 |             dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
 57 |         case .roman2kana:
 58 |             let roman = lastRuby.suffix(while: {String($0).onlyRomanAlphabet})
 59 |             if !roman.isEmpty {
 60 |                 let ruby: Substring = lastRuby.dropLast(roman.count)
 61 |                 if ruby.isEmpty {
 62 |                     dicdata = []
 63 |                     break
 64 |                 }
 65 |                 let possibleNexts: [Substring] = DicdataStore.possibleNexts[String(roman), default: []].map {ruby + $0}
 66 |                 debug("getPredictionCandidates", lastRuby, ruby, roman, possibleNexts, prepart, lastRubyCount)
 67 |                 dicdata = possibleNexts.flatMap { self.dicdataStore.getPredictionLOUDSDicdata(key: $0) }
 68 |             } else {
 69 |                 debug("getPredicitonCandidates", lastRuby, roman)
 70 |                 dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: lastRuby)
 71 |             }
 72 |         }
 73 | 
 74 |         var result: [Candidate] = []
 75 | 
 76 |         result.reserveCapacity(N_best &+ 1)
 77 |         for data in (dicdata + osuserdict) {
 78 |             let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
 79 |             let mmValue: PValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(lastMid, data.mid):.zero
 80 |             let ccValue: PValue = self.dicdataStore.getCCValue(lastRcid, data.lcid)
 81 |             let penalty: PValue = -PValue(data.ruby.count &- lastRuby.count) * 3.0   // 文字数差をペナルティとする
 82 |             let wValue: PValue = data.value()
 83 |             let newValue: PValue = lastCandidate.value + mmValue + ccValue + wValue + penalty - ignoreCCValue
 84 |             // 追加すべきindexを取得する
 85 |             let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
 86 |             if lastindex >= N_best {
 87 |                 continue
 88 |             }
 89 |             var nodedata: [DicdataElement] = datas
 90 |             nodedata.append(data)
 91 |             let candidate: Candidate = Candidate(
 92 |                 text: lastCandidate.text + data.word,
 93 |                 value: newValue,
 94 |                 correspondingCount: correspoindingCount,
 95 |                 lastMid: includeMMValueCalculation ? data.mid:lastMid,
 96 |                 data: nodedata
 97 |             )
 98 |             // カウントがオーバーしそうな場合は除去する
 99 |             if result.count >= N_best {
100 |                 result.removeLast()
101 |             }
102 |             // removeしてからinsertした方が速い (insertはO(N)なので)
103 |             result.insert(candidate, at: lastindex)
104 |         }
105 | 
106 |         return result
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/no_change.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  no_change.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2022/11/09.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | import SwiftUtils
11 | 
12 | extension Kana2Kanji {
13 | 
14 |     /// カナを漢字に変換する関数, キャッシュから単に復元する。
15 |     /// - Parameters:
16 |     ///   - N_best: N_best値。
17 |     ///   - previousResult: ひとつ前のデータ。
18 |     /// - Returns:
19 |     ///   発見された候補のリスト。
20 |     ///
21 |     /// ### 実装方法
22 |     /// (1)まず、計算済みノードを捜査して、新しい文末につながるものをresultにregisterしていく。
23 |     ///   N_bestの計算は既にやってあるので不要。
24 |     ///
25 |     /// (2)次に、返却用ノードを計算する。
26 | 
27 |     func kana2lattice_no_change(N_best: Int, previousResult: (inputData: ComposingText, nodes: Nodes)) -> (result: LatticeNode, nodes: Nodes) {
28 |         debug("キャッシュから復元、元の文字は：", previousResult.inputData.convertTarget)
29 |         let count = previousResult.inputData.input.count
30 |         // (1)
31 |         let result = LatticeNode.EOSNode
32 | 
33 |         for nodeArray in previousResult.nodes {
34 |             for node in nodeArray {
35 |                 if node.prevs.isEmpty {
36 |                     continue
37 |                 }
38 |                 if self.dicdataStore.shouldBeRemoved(data: node.data) {
39 |                     continue
40 |                 }
41 |                 let nextIndex = node.inputRange.endIndex
42 |                 if nextIndex == count {
43 |                     // 変換した文字数
44 |                     for (index, value) in node.values.enumerated() {
45 |                         let newnode = node.getRegisteredNode(index, value: value)
46 |                         result.prevs.append(newnode)
47 |                     }
48 |                 }
49 |             }
50 |         }
51 | 
52 |         // (2)
53 |         return (result: result, nodes: previousResult.nodes)
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Kana2Kanji/post_composition_prediction.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  post_composition_prediction.swift
  3 | //
  4 | //
  5 | //  Created by miwa on 2023/09/19.
  6 | //
  7 | 
  8 | import Foundation
  9 | 
 10 | // 確定後の予測変換に関係する実装
 11 | extension Kana2Kanji {
 12 |     func mergeCandidates(_ left: Candidate, _ right: Candidate) -> Candidate {
 13 |         guard let leftLast = left.data.last, let rightFirst = right.data.first else {
 14 |             return Candidate(
 15 |                 text: left.text + right.text,
 16 |                 value: left.value + right.value,
 17 |                 correspondingCount: left.correspondingCount + right.correspondingCount,
 18 |                 lastMid: right.lastMid,
 19 |                 data: left.data + right.data
 20 |             )
 21 |         }
 22 |         let ccValue = self.dicdataStore.getCCValue(leftLast.lcid, rightFirst.lcid)
 23 |         let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(rightFirst)
 24 |         let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(left.lastMid, rightFirst.mid):.zero
 25 |         let newValue = left.value + mmValue + ccValue + right.value
 26 |         return Candidate(
 27 |             text: left.text + right.text,
 28 |             value: newValue,
 29 |             correspondingCount: left.correspondingCount + right.correspondingCount,
 30 |             lastMid: right.lastMid,
 31 |             data: left.data + right.data
 32 |         )
 33 |     }
 34 | 
 35 |     func getPredictionCandidates(prepart: Candidate, N_best: Int) -> [PostCompositionPredictionCandidate] {
 36 |         var result: [PostCompositionPredictionCandidate] = []
 37 |         var count = 1
 38 |         var prefixCandidate = prepart
 39 |         prefixCandidate.actions = []
 40 |         var prefixCandidateData = prepart.data
 41 |         var totalWord = ""
 42 |         var totalRuby = ""
 43 |         var totalData: [DicdataElement] = []
 44 |         while count <= min(prepart.data.count, 3), let element = prefixCandidateData.popLast() {
 45 |             defer {
 46 |                 count += 1
 47 |             }
 48 |             // prefixCandidateを更新する
 49 |             do {
 50 |                 prefixCandidate.value -= element.value()
 51 |                 prefixCandidate.value -= self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, element.lcid)
 52 |                 if DicdataStore.includeMMValueCalculation(element) {
 53 |                     let previousMid = prefixCandidateData.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
 54 |                     prefixCandidate.lastMid = previousMid
 55 |                     prefixCandidate.value -= self.dicdataStore.getMMValue(previousMid, element.mid)
 56 |                 }
 57 |                 prefixCandidate.data = prefixCandidateData
 58 | 
 59 |                 prefixCandidate.text = prefixCandidateData.reduce(into: "") { $0 += $1.word }
 60 |                 prefixCandidate.correspondingCount = prefixCandidateData.reduce(into: 0) { $0 += $1.ruby.count }
 61 |             }
 62 | 
 63 |             totalWord.insert(contentsOf: element.word, at: totalWord.startIndex)
 64 |             totalRuby.insert(contentsOf: element.ruby, at: totalRuby.startIndex)
 65 |             totalData.insert(element, at: 0)
 66 |             let dicdata = self.dicdataStore.getPredictionLOUDSDicdata(key: totalRuby).filter {
 67 |                 DicdataStore.predictionUsable[$0.rcid] && $0.word.hasPrefix(totalWord)
 68 |             }
 69 | 
 70 |             for data in dicdata {
 71 |                 let ccValue = self.dicdataStore.getCCValue(prefixCandidateData.last?.rcid ?? CIDData.BOS.cid, data.lcid)
 72 |                 let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
 73 |                 let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(prefixCandidate.lastMid, data.mid):.zero
 74 |                 let wValue = data.value()
 75 |                 let newValue = prefixCandidate.value + mmValue + ccValue + wValue
 76 |                 // 追加すべきindexを取得する
 77 |                 let lastindex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
 78 |                 if lastindex == N_best {
 79 |                     continue
 80 |                 }
 81 |                 // カウントがオーバーしている場合は除去する
 82 |                 if result.count >= N_best {
 83 |                     result.removeLast()
 84 |                 }
 85 |                 // 共通接頭辞を切り落とす
 86 |                 let text = String(data.word.dropFirst(totalWord.count))
 87 |                 result.insert(.init(text: text, value: newValue, type: .replacement(targetData: totalData, replacementData: [data])), at: lastindex)
 88 |             }
 89 |         }
 90 |         return result
 91 |     }
 92 | 
 93 |     /// 入力がない状態から、妥当な候補を探す
 94 |     /// - parameters:
 95 |     ///   - preparts: Candidate列。以前確定した候補など
 96 |     ///   - N_best: 取得する候補数
 97 |     /// - returns:
 98 |     ///   ゼロヒント予測変換の結果
 99 |     /// - note:
100 |     ///   「食べちゃ-てる」「食べちゃ-いる」などの間抜けな候補を返すことが多いため、学習によるもの以外を無効化している。
101 |     func getZeroHintPredictionCandidates(preparts: some Collection<Candidate>, N_best: Int) -> [PostCompositionPredictionCandidate] {
102 |         var result: [PostCompositionPredictionCandidate] = []
103 |         for candidate in preparts {
104 |             if let last = candidate.data.last {
105 |                 let dicdata = self.dicdataStore.getZeroHintPredictionDicdata(lastRcid: last.rcid)
106 |                 for data in dicdata {
107 |                     let ccValue = self.dicdataStore.getCCValue(last.rcid, data.lcid)
108 |                     let includeMMValueCalculation = DicdataStore.includeMMValueCalculation(data)
109 |                     let mmValue = includeMMValueCalculation ? self.dicdataStore.getMMValue(candidate.lastMid, data.mid):.zero
110 |                     let wValue = data.value()
111 |                     let newValue = candidate.value + mmValue + ccValue + wValue
112 | 
113 |                     // 追加すべきindexを取得する
114 |                     let lastIndex: Int = (result.lastIndex(where: {$0.value >= newValue}) ?? -1) + 1
115 |                     if lastIndex == N_best {
116 |                         continue
117 |                     }
118 |                     // カウントがオーバーしている場合は除去する
119 |                     if result.count >= N_best {
120 |                         result.removeLast()
121 |                     }
122 |                     result.insert(.init(text: data.word, value: newValue, type: .additional(data: [data])), at: lastIndex)
123 |                 }
124 |             }
125 |         }
126 |         return result
127 |     }
128 | 
129 | }
130 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/LatticeNode.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  LatticeNode.swift
 3 | //  Keyboard
 4 | //
 5 | //  Created by ensan on 2020/09/11.
 6 | //  Copyright © 2020 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | /// ラティスのノード。これを用いて計算する。
12 | public final class LatticeNode {
13 |     /// このノードが保持する辞書データ
14 |     public let data: DicdataElement
15 |     /// このノードの前に来ているノード。`N_best`の分だけ保存する
16 |     var prevs: [RegisteredNode] = []
17 |     /// `prevs`の各要素に対応するスコアのデータ
18 |     var values: [PValue] = []
19 |     /// inputData.input内のrange
20 |     var inputRange: Range<Int>
21 | 
22 |     /// `EOS`に対応するノード。
23 |     static var EOSNode: LatticeNode {
24 |         LatticeNode(data: DicdataElement.EOSData, inputRange: 0..<0)
25 |     }
26 | 
27 |     init(data: DicdataElement, inputRange: Range<Int>) {
28 |         self.data = data
29 |         self.values = [data.value()]
30 |         self.inputRange = inputRange
31 |     }
32 | 
33 |     /// `LatticeNode`の持っている情報を反映した`RegisteredNode`を作成する
34 |     /// `LatticeNode`は複数の過去のノードを持つことができるが、`RegisteredNode`は1つしか持たない。
35 |     func getRegisteredNode(_ index: Int, value: PValue) -> RegisteredNode {
36 |         RegisteredNode(data: self.data, registered: self.prevs[index], totalValue: value, inputRange: self.inputRange)
37 |     }
38 | 
39 |     /// 再帰的にノードを遡り、`CandidateData`を構築する関数
40 |     /// - Returns: 文節単位の区切り情報を持った変換候補データのリスト。
41 |     /// - Note: 最終的に`EOS`ノードにおいて実行する想定のAPIになっている。
42 |     func getCandidateData() -> [CandidateData] {
43 |         self.prevs.map {$0.getCandidateData()}
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/MIDData.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  MIDData.swift
 3 | //  azooKey
 4 | //
 5 | //  Created by ensan on 2022/10/25.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | public enum MIDData: Sendable {
12 |     static var totalCount: Int {
13 |         503
14 |     }
15 |     case BOS
16 |     case EOS
17 |     case 一般
18 |     case 数
19 |     case 英単語
20 |     case 小さい数字
21 |     case 年
22 |     case 絵文字
23 |     case 人名姓
24 |     case 人名名
25 |     case 組織
26 | 
27 |     public var mid: Int {
28 |         switch self {
29 |         case .BOS: 500
30 |         case .EOS: 500
31 |         case .一般: 501
32 |         case .人名姓: 344
33 |         case .人名名: 370
34 |         case .組織: 378
35 |         case .年: 237
36 |         case .英単語: 40
37 |         case .数: 452
38 |         case .小さい数字: 361
39 |         case .絵文字: 501  // 502を追加する
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/PostCompositionPredictionCandidate.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  PostCompositionPredictionCandidate.swift
 3 | //
 4 | //
 5 | //  Created by miwa on 2023/09/19.
 6 | //
 7 | 
 8 | import Foundation
 9 | 
10 | /// 確定後予測変換候補を表す型
11 | public struct PostCompositionPredictionCandidate {
12 |     public init(text: String, value: PValue, type: PostCompositionPredictionCandidate.PredictionType) {
13 |         self.text = text
14 |         self.value = value
15 |         self.type = type
16 |         if Set(["。", ".", "．"]).contains(text) {
17 |             self.isTerminal = true
18 |         } else {
19 |             self.isTerminal = false
20 |         }
21 |     }
22 | 
23 |     public var text: String
24 |     public var value: PValue
25 |     public var type: PredictionType
26 | 
27 |     /// 確定後予測変換を終了すべきか否か。句点では終了する。
28 |     public var isTerminal: Bool
29 | 
30 |     public func join(to candidate: Candidate) -> Candidate {
31 |         var candidate = candidate
32 |         switch self.type {
33 |         case .additional(let data):
34 |             for data in data {
35 |                 candidate.text.append(contentsOf: data.word)
36 |                 candidate.data.append(data)
37 |             }
38 |             candidate.value = self.value
39 |             candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
40 |             candidate.lastMid = data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? candidate.lastMid
41 |             return candidate
42 |         case .replacement(let targetData, let replacementData):
43 |             candidate.data.removeLast(targetData.count)
44 |             candidate.data.append(contentsOf: replacementData)
45 |             candidate.text = candidate.data.reduce(into: "") {$0 += $1.word}
46 |             candidate.value = self.value
47 |             candidate.lastMid = candidate.data.last(where: DicdataStore.includeMMValueCalculation)?.mid ?? MIDData.BOS.mid
48 |             candidate.correspondingCount = candidate.data.reduce(into: 0) { $0 += $1.ruby.count }
49 |             return candidate
50 |         }
51 |     }
52 | 
53 |     public enum PredictionType: Sendable, Hashable {
54 |         case additional(data: [DicdataElement])
55 |         case replacement(targetData: [DicdataElement], replacementData: [DicdataElement])
56 |     }
57 | 
58 |     var lastData: DicdataElement? {
59 |         switch self.type {
60 |         case .additional(let data):
61 |             return data.last
62 |         case .replacement(_, let replacementData):
63 |             return replacementData.last
64 |         }
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/RegisteredNodeProtocol.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  RegisteredNode.swift
  3 | //  Keyboard
  4 | //
  5 | //  Created by ensan on 2020/09/16.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | 
 11 | /// `struct`の`RegisteredNode`を再帰的に所持できるようにするため、Existential Typeで抽象化する。
 12 | /// - Note: `indirect enum`との比較はまだやっていない。
 13 | protocol RegisteredNodeProtocol {
 14 |     var data: DicdataElement {get}
 15 |     var prev: (any RegisteredNodeProtocol)? {get}
 16 |     var totalValue: PValue {get}
 17 |     var inputRange: Range<Int> {get}
 18 | }
 19 | 
 20 | struct RegisteredNode: RegisteredNodeProtocol {
 21 |     /// このノードが保持する辞書データ
 22 |     let data: DicdataElement
 23 |     /// 1つ前のノードのデータ
 24 |     let prev: (any RegisteredNodeProtocol)?
 25 |     /// 始点からこのノードまでのコスト
 26 |     let totalValue: PValue
 27 |     /// `composingText`の`input`で対応する範囲
 28 |     let inputRange: Range<Int>
 29 | 
 30 |     init(data: DicdataElement, registered: RegisteredNode?, totalValue: PValue, inputRange: Range<Int>) {
 31 |         self.data = data
 32 |         self.prev = registered
 33 |         self.totalValue = totalValue
 34 |         self.inputRange = inputRange
 35 |     }
 36 | 
 37 |     /// 始点ノードを生成する関数
 38 |     /// - Returns: 始点ノードのデータ
 39 |     static func BOSNode() -> RegisteredNode {
 40 |         RegisteredNode(data: DicdataElement.BOSData, registered: nil, totalValue: 0, inputRange: 0 ..< 0)
 41 |     }
 42 | 
 43 |     /// 入力中、確定した部分を考慮した始点ノードを生成する関数
 44 |     /// - Returns: 始点ノードのデータ
 45 |     static func fromLastCandidate(_ candidate: Candidate) -> RegisteredNode {
 46 |         RegisteredNode(
 47 |             data: DicdataElement(word: "", ruby: "", lcid: CIDData.BOS.cid, rcid: candidate.data.last?.rcid ?? CIDData.BOS.cid, mid: candidate.lastMid, value: 0),
 48 |             registered: nil,
 49 |             totalValue: 0,
 50 |             inputRange: 0 ..< 0
 51 |         )
 52 |     }
 53 | }
 54 | 
 55 | extension RegisteredNodeProtocol {
 56 |     /// 再帰的にノードを遡り、`CandidateData`を構築する関数
 57 |     /// - Returns: 文節単位の区切り情報を持った変換候補データ
 58 |     func getCandidateData() -> CandidateData {
 59 |         guard let prev else {
 60 |             let unit = ClauseDataUnit()
 61 |             unit.mid = self.data.mid
 62 |             unit.inputRange = self.inputRange
 63 |             return CandidateData(clauses: [(clause: unit, value: .zero)], data: [])
 64 |         }
 65 |         var lastcandidate = prev.getCandidateData()    // 自分に至るregisterdそれぞれのデータに処理
 66 | 
 67 |         if self.data.word.isEmpty {
 68 |             return lastcandidate
 69 |         }
 70 | 
 71 |         guard let lastClause = lastcandidate.lastClause else {
 72 |             return lastcandidate
 73 |         }
 74 | 
 75 |         if lastClause.text.isEmpty || !DicdataStore.isClause(prev.data.rcid, self.data.lcid) {
 76 |             // 文節ではないので、最後に追加する。
 77 |             lastClause.text.append(self.data.word)
 78 |             lastClause.inputRange = lastClause.inputRange.startIndex ..< self.inputRange.endIndex
 79 |             // 最初だった場合を想定している
 80 |             if (lastClause.mid == 500 && self.data.mid != 500) || DicdataStore.includeMMValueCalculation(self.data) {
 81 |                 lastClause.mid = self.data.mid
 82 |             }
 83 |             lastcandidate.clauses[lastcandidate.clauses.count - 1].value = self.totalValue
 84 |             lastcandidate.data.append(self.data)
 85 |             return lastcandidate
 86 |         }
 87 |         // 文節の区切りだった場合
 88 |         else {
 89 |             let unit = ClauseDataUnit()
 90 |             unit.text = self.data.word
 91 |             unit.inputRange = self.inputRange
 92 |             if DicdataStore.includeMMValueCalculation(self.data) {
 93 |                 unit.mid = self.data.mid
 94 |             }
 95 |             // 前の文節の処理
 96 |             lastClause.nextLcid = self.data.lcid
 97 |             lastcandidate.clauses.append((clause: unit, value: self.totalValue))
 98 |             lastcandidate.data.append(self.data)
 99 |             return lastcandidate
100 |         }
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/States.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  States.swift
 3 | //
 4 | //
 5 | //  Created by ensan on 2023/04/30.
 6 | //
 7 | 
 8 | public enum InputStyle: String, Sendable {
 9 |     /// 入力された文字を直接入力するスタイル
10 |     case direct = "direct"
11 |     /// ローマ字日本語入力とするスタイル
12 |     case roman2kana = "roman"
13 | }
14 | 
15 | public enum KeyboardLanguage: String, Codable, Equatable, Sendable {
16 |     case en_US
17 |     case ja_JP
18 |     case el_GR
19 |     case none
20 | }
21 | 
22 | public enum LearningType: Int, CaseIterable, Sendable {
23 |     /// 学習情報は変換結果(output)に反映され、学習情報は更新(input)されます
24 |     case inputAndOutput
25 |     /// 学習情報は変換結果(output)に反映されるのみで、学習情報は更新されません
26 |     case onlyOutput
27 |     /// 学習情報は一切用いません
28 |     case nothing
29 | 
30 |     package var needUpdateMemory: Bool {
31 |         self == .inputAndOutput
32 |     }
33 | 
34 |     var needUsingMemory: Bool {
35 |         self != .nothing
36 |     }
37 | }
38 | 
39 | public enum ConverterBehaviorSemantics: Sendable {
40 |     /// 標準的な日本語入力のように、変換する候補を選ぶパターン
41 |     case conversion
42 |     /// iOSの英語入力のように、確定は不要だが、左右の文字列の置き換え候補が出てくるパターン
43 |     case replacement([ReplacementTarget])
44 | 
45 |     public enum ReplacementTarget: UInt8, Sendable {
46 |         case emoji
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Zenz/Zenz.swift:
--------------------------------------------------------------------------------
 1 | import Foundation
 2 | import SwiftUtils
 3 | import EfficientNGram
 4 | 
 5 | @MainActor package final class Zenz {
 6 |     package var resourceURL: URL
 7 |     private var zenzContext: ZenzContext?
 8 |     init(resourceURL: URL) throws {
 9 |         self.resourceURL = resourceURL
10 |         do {
11 | #if canImport(Darwin)
12 |             if #available(iOS 16, macOS 13, *) {
13 |                 self.zenzContext = try ZenzContext.createContext(path: resourceURL.path(percentEncoded: false))
14 |             } else {
15 |                 // this is not percent-encoded
16 |                 self.zenzContext = try ZenzContext.createContext(path: resourceURL.path)
17 |             }
18 | #else
19 |             // this is not percent-encoded
20 |             self.zenzContext = try ZenzContext.createContext(path: resourceURL.path)
21 | #endif
22 |             debug("Loaded model \(resourceURL.lastPathComponent)")
23 |         } catch {
24 |             throw error
25 |         }
26 |     }
27 | 
28 |     package func endSession() {
29 |         try? self.zenzContext?.reset_context()
30 |     }
31 | 
32 |     func candidateEvaluate(
33 |         convertTarget: String,
34 |         candidates: [Candidate],
35 |         requestRichCandidates: Bool,
36 |         personalizationMode: (mode: ConvertRequestOptions.ZenzaiMode.PersonalizationMode, base: EfficientNGram, personal: EfficientNGram)?,
37 |         versionDependentConfig: ConvertRequestOptions.ZenzaiVersionDependentMode
38 |     ) -> ZenzContext.CandidateEvaluationResult {
39 |         guard let zenzContext else {
40 |             return .error
41 |         }
42 |         for candidate in candidates {
43 |             let result = zenzContext.evaluate_candidate(
44 |                 input: convertTarget.toKatakana(),
45 |                 candidate: candidate,
46 |                 requestRichCandidates: requestRichCandidates,
47 |                 personalizationMode: personalizationMode,
48 |                 versionDependentConfig: versionDependentConfig
49 |             )
50 |             return result
51 |         }
52 |         return .error
53 |     }
54 | 
55 |     func predictNextCharacter(leftSideContext: String, count: Int) -> [(character: Character, value: Float)] {
56 |         guard let zenzContext else {
57 |             return []
58 |         }
59 |         let result = zenzContext.predict_next_character(leftSideContext: leftSideContext, count: count)
60 |         return result
61 |     }
62 | 
63 |     package func pureGreedyDecoding(pureInput: String, maxCount: Int = .max) -> String {
64 |         return self.zenzContext?.pure_greedy_decoding(leftSideContext: pureInput, maxCount: maxCount) ?? ""
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModule/Zenz/llama-mock.swift:
--------------------------------------------------------------------------------
 1 | #if !Zenzai
 2 | // Zenzaiが有効でない場合、このMock実装を有効化する
 3 | private func unimplemented<T>() -> T {
 4 |     fatalError("unimplemented")
 5 | }
 6 | 
 7 | package typealias llama_token = Int32
 8 | package typealias llama_pos = Int32
 9 | package typealias llama_seq_id = Int32
10 | 
11 | package struct llama_context_params {
12 |     package var seed: Int
13 |     package var n_ctx: Int
14 |     package var n_threads: Int32
15 |     package var n_threads_batch: Int32
16 |     package var n_batch: Int
17 | }
18 | package func llama_context_default_params() -> llama_context_params { unimplemented() }
19 | 
20 | package typealias llama_context = OpaquePointer
21 | package func llama_init_from_model(_ model: llama_model, _ ctx_params: llama_context_params) -> llama_context? { unimplemented() }
22 | package func llama_free(_ context: llama_context) {}
23 | 
24 | package typealias llama_model = OpaquePointer
25 | package typealias llama_vocab = OpaquePointer
26 | 
27 | package func llama_model_free(_ model: llama_model) {}
28 | 
29 | package func llama_backend_init() {}
30 | package func llama_backend_free() {}
31 | 
32 | package struct llama_model_params {
33 |     package var use_mmap: Bool
34 | }
35 | package func llama_model_default_params() -> llama_model_params { unimplemented() }
36 | 
37 | package func llama_model_get_vocab(_ model: llama_model) -> llama_vocab? { unimplemented() }
38 | 
39 | package func llama_model_load_from_file(_ path: String, _ model_params: llama_model_params) -> llama_model? { unimplemented() }
40 | 
41 | package func llama_kv_cache_seq_rm(_ ctx: llama_context, _ seq_id: llama_seq_id, _ p0: llama_pos, _ p1: llama_pos) {}
42 | package func llama_kv_cache_seq_pos_max(_ ctx: llama_context, _ seq_id: llama_seq_id) -> Int { unimplemented() }
43 | 
44 | package struct llama_batch {
45 |     package var token: [llama_token]
46 |     package var pos: [llama_pos]
47 |     package var n_seq_id: [llama_seq_id]
48 |     package var seq_id: [[llama_seq_id]?]
49 |     package var logits: UnsafeMutablePointer<Float>
50 |     package var n_tokens: Int
51 | 
52 | }
53 | package func llama_batch_init(_ n_tokens: Int, _ embd: Int, _ n_seq_max: Int) -> llama_batch { unimplemented() }
54 | 
55 | package func llama_n_ctx(_ ctx: llama_context) -> Int { unimplemented() }
56 | package func llama_vocab_n_tokens(_ vocab: llama_vocab) -> Int { unimplemented() }
57 | 
58 | package func llama_tokenize(_ model: llama_model, _ text: String, _ text_len: Int32, _ tokens: UnsafeMutablePointer<llama_token>, _ n_tokens_max: Int32, _ add_special: Bool, _ parse_special: Bool) -> Int { unimplemented() }
59 | package func llama_vocab_eos(_ vocab: llama_vocab) -> llama_token { unimplemented() }
60 | package func llama_vocab_bos(_ vocab: llama_vocab) -> llama_token { unimplemented() }
61 | package func llama_token_to_piece(_ vocab: llama_vocab, _ token: llama_token, _ buf: UnsafeMutablePointer<Int8>, _ length: Int32, _ lstrip: Int32, _ special: Bool) -> Int32 { unimplemented() }
62 | 
63 | package func llama_decode(_ ctx: llama_context, _ batch: llama_batch) -> Int { unimplemented() }
64 | package func llama_get_logits(_ ctx: llama_context) -> UnsafeMutablePointer<Float>? { unimplemented() }
65 | #endif
66 | 


--------------------------------------------------------------------------------
/Sources/KanaKanjiConverterModuleWithDefaultDictionary/KanaKanjiConverterModuleWithDefaultDictionary.swift:
--------------------------------------------------------------------------------
 1 | @_exported import KanaKanjiConverterModule
 2 | import Foundation
 3 | 
 4 | public extension ConvertRequestOptions {
 5 |     static func withDefaultDictionary(
 6 |         N_best: Int = 10,
 7 |         requireJapanesePrediction: Bool,
 8 |         requireEnglishPrediction: Bool,
 9 |         keyboardLanguage: KeyboardLanguage,
10 |         typographyLetterCandidate: Bool = false,
11 |         unicodeCandidate: Bool = true,
12 |         englishCandidateInRoman2KanaInput: Bool = false,
13 |         fullWidthRomanCandidate: Bool = false,
14 |         halfWidthKanaCandidate: Bool = false,
15 |         learningType: LearningType,
16 |         maxMemoryCount: Int = 65536,
17 |         shouldResetMemory: Bool = false,
18 |         memoryDirectoryURL: URL,
19 |         sharedContainerURL: URL,
20 |         zenzaiMode: ZenzaiMode = .off,
21 |         textReplacer: TextReplacer = .withDefaultEmojiDictionary(),
22 |         preloadDictionary: Bool = false,
23 |         metadata: ConvertRequestOptions.Metadata?
24 |     ) -> Self {
25 |         #if os(iOS) || os(watchOS) || os(tvOS) || os(visionOS)
26 |         let dictionaryDirectory = Bundle.module.bundleURL.appendingPathComponent("Dictionary", isDirectory: true)
27 |         #elseif os(macOS)
28 |         let dictionaryDirectory = Bundle.module.resourceURL!.appendingPathComponent("Dictionary", isDirectory: true)
29 |         #else
30 |         let dictionaryDirectory = Bundle.module.resourceURL!.appendingPathComponent("Dictionary", isDirectory: true)
31 |         #endif
32 |         return Self(
33 |             N_best: N_best,
34 |             requireJapanesePrediction: requireJapanesePrediction,
35 |             requireEnglishPrediction: requireEnglishPrediction,
36 |             keyboardLanguage: keyboardLanguage,
37 |             typographyLetterCandidate: typographyLetterCandidate,
38 |             unicodeCandidate: unicodeCandidate,
39 |             englishCandidateInRoman2KanaInput: englishCandidateInRoman2KanaInput,
40 |             fullWidthRomanCandidate: fullWidthRomanCandidate,
41 |             halfWidthKanaCandidate: halfWidthKanaCandidate,
42 |             learningType: learningType,
43 |             maxMemoryCount: maxMemoryCount,
44 |             shouldResetMemory: shouldResetMemory,
45 |             dictionaryResourceURL: dictionaryDirectory,
46 |             memoryDirectoryURL: memoryDirectoryURL,
47 |             sharedContainerURL: sharedContainerURL,
48 |             textReplacer: textReplacer,
49 |             zenzaiMode: zenzaiMode,
50 |             preloadDictionary: preloadDictionary,
51 |             metadata: metadata
52 |         )
53 |     }
54 | }
55 | 
56 | 
57 | public extension TextReplacer {
58 |     static func withDefaultEmojiDictionary() -> Self {
59 |         self.init {
60 |             let directoryName = "EmojiDictionary"
61 | #if os(iOS) || os(watchOS) || os(tvOS) || os(visionOS)
62 |             let directory = Bundle.module.bundleURL.appendingPathComponent(directoryName, isDirectory: true)
63 |             return if #available(iOS 18.4, *) {
64 |                 directory.appendingPathComponent("emoji_all_E16.0.txt", isDirectory: false)
65 |             } else if #available(iOS 17.4, *) {
66 |                 directory.appendingPathComponent("emoji_all_E15.1.txt", isDirectory: false)
67 |             } else if #available(iOS 16.4, *) {
68 |                 directory.appendingPathComponent("emoji_all_E15.0.txt", isDirectory: false)
69 |             } else if #available(iOS 15.4, *) {
70 |                 directory.appendingPathComponent("emoji_all_E14.0.txt", isDirectory: false)
71 |             } else {
72 |                 directory.appendingPathComponent("emoji_all_E13.1.txt", isDirectory: false)
73 |             }
74 |             #elseif os(macOS)
75 |             let directory = Bundle.module.resourceURL!.appendingPathComponent(directoryName, isDirectory: true)
76 |             return if #available(macOS 15.3, *) {
77 |                 directory.appendingPathComponent("emoji_all_E16.0.txt", isDirectory: false)
78 |             } else if #available(macOS 14.4, *) {
79 |                 directory.appendingPathComponent("emoji_all_E15.1.txt", isDirectory: false)
80 |             } else {
81 |                 directory.appendingPathComponent("emoji_all_E15.0.txt", isDirectory: false)
82 |             }
83 |             #else
84 |             return Bundle.module.resourceURL!
85 |                 .appendingPathComponent(directoryName, isDirectory: true)
86 |                 .appendingPathComponent("emoji_all_E16.0.txt", isDirectory: false)
87 |             #endif
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/Sources/SwiftUtils/ArrayUtils.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  ArrayUtils.swift
  3 | //
  4 | //
  5 | //  Created by ensan on 2023/04/30.
  6 | //
  7 | 
  8 | import Algorithms
  9 | import Foundation
 10 | 
 11 | public extension Sequence {
 12 |     /// Returns a sequence that contains the elements of this sequence followed by the elements of the given sequence.
 13 |     /// - Parameters:
 14 |     ///   - sequence: A sequence of elements to chain.
 15 |     /// - Returns: A sequence that contains the elements of this sequence followed by the elements of the given sequence.
 16 |     @inlinable func chained<S: Sequence<Element>>(_ sequence: S) -> Chain2Sequence<Self, S> {
 17 |         chain(self, sequence)
 18 |     }
 19 | }
 20 | 
 21 | public extension Collection {
 22 |     /// Returns a `Set` containing the elements of this sequence with transformed values.
 23 |     /// - Parameters:
 24 |     ///   - transform: A closure that transforms each element of this sequence into a value that can be hashed.
 25 |     /// - Returns: A `Set` containing the elements of this sequence.
 26 |     @inlinable func mapSet<T>(transform closure: (Element) throws -> T) rethrows -> Set<T> {
 27 |         var set = Set<T>()
 28 |         set.reserveCapacity(self.count)
 29 |         for item in self {
 30 |             set.update(with: try closure(item))
 31 |         }
 32 |         return set
 33 |     }
 34 | 
 35 |     /// Returns a `Set` containing the elements of this sequence with transformed values.
 36 |     /// - Parameters:
 37 |     ///   - transform: A closure that transforms each element of this sequence into a sequence of values that can be hashed.
 38 |     /// - Returns: A `Set` containing the elements of this sequence.
 39 |     @inlinable func flatMapSet<T: Sequence>(transform closure: (Element) throws -> T) rethrows -> Set<T.Element> {
 40 |         var set = Set<T.Element>()
 41 |         for item in self {
 42 |             set.formUnion(try closure(item))
 43 |         }
 44 |         return set
 45 |     }
 46 | 
 47 |     /// Returns a `Set` containing the non-nil elements of this sequence with transformed values.
 48 |     /// - Parameters:
 49 |     ///   - transform: A closure that transforms each element of this sequence into an optional value that can be hashed.
 50 |     /// - Returns: A `Set` containing the non-nil elements of this sequence.
 51 |     @inlinable func compactMapSet<T>(transform closure: (Element) throws -> T?) rethrows -> Set<T> {
 52 |         var set = Set<T>()
 53 |         set.reserveCapacity(self.count)
 54 |         for item in self {
 55 |             if let value = try closure(item) {
 56 |                 set.update(with: value)
 57 |             }
 58 |         }
 59 |         return set
 60 |     }
 61 | }
 62 | 
 63 | public extension MutableCollection {
 64 |     /// Calls the given closure with a pointer to the array's mutable contiguous storage.
 65 |     /// - Parameter
 66 |     ///   - transform: A closure that takes a pointer to the array's mutable contiguous storage.
 67 |     @inlinable mutating func mutatingForeach(transform closure: (inout Element) throws -> Void) rethrows {
 68 |         for index in self.indices {
 69 |             try closure(&self[index])
 70 |         }
 71 |     }
 72 | }
 73 | 
 74 | public extension Collection {
 75 |     /// Returns a SubSequence containing the elements of this sequence up to the first element that does not satisfy the given predicate.
 76 |     /// - Parameters:
 77 |     ///   - condition: A closure that takes an element of the sequence as its argument and returns a Boolean value indicating whether the element should be included.
 78 |     /// - Returns: A SubSequence containing the elements of this sequence up to the first element that does not satisfy the given predicate.
 79 |     @inlinable func suffix(while condition: (Element) -> Bool) -> SubSequence {
 80 |         var left = self.endIndex
 81 |         while left != self.startIndex, condition(self[self.index(left, offsetBy: -1)]) {
 82 |             left = self.index(left, offsetBy: -1)
 83 |         }
 84 |         return self[left ..< self.endIndex]
 85 |     }
 86 | }
 87 | 
 88 | public extension Collection where Self.Element: Equatable {
 89 |     /// Returns a Bool value indicating whether the collection has the given prefix.
 90 |     /// - Parameters:
 91 |     ///   - prefix: A collection to search for at the start of this collection.
 92 |     /// - Returns: A Bool value indicating whether the collection has the given prefix.
 93 |     @inlinable func hasPrefix(_ prefix: some Collection<Element>) -> Bool {
 94 |         if self.count < prefix.count {
 95 |             return false
 96 |         }
 97 |         for (u, v) in zip(self, prefix) where u != v {
 98 |             return false
 99 |         }
100 |         return true
101 |     }
102 | 
103 |     /// Returns a Bool value indicating whether the collection has the given suffix.
104 |     /// - Parameters:
105 |     ///   - suffix: A collection to search for at the end of this collection.
106 |     /// - Returns: A Bool value indicating whether the collection has the given suffix.
107 |     @inlinable func hasSuffix(_ suffix: some Collection<Element>) -> Bool {
108 |         if self.count < suffix.count {
109 |             return false
110 |         }
111 |         let count = suffix.count
112 |         for (i, value) in suffix.enumerated() {
113 |             if self[self.index(self.endIndex, offsetBy: i - count)] != value {
114 |                 return false
115 |             }
116 |         }
117 |         return true
118 |     }
119 | 
120 |     /// Returns an Array containing the common prefix of this collection and the given collection.
121 |     /// - Parameters:
122 |     ///   - collection: A collection to search for a common prefix with this collection.
123 |     /// - Returns: An Array containing the common prefix of this collection and the given collection.
124 |     @inlinable func commonPrefix(with collection: some Collection<Element>) -> [Element] {
125 |         var prefix: [Element] = []
126 |         for (i, value) in self.enumerated() where i < collection.count {
127 |             if value == collection[collection.index(collection.startIndex, offsetBy: i)] {
128 |                 prefix.append(value)
129 |             } else {
130 |                 break
131 |             }
132 |         }
133 |         return prefix
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/Sources/SwiftUtils/DataUtils.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  extension Data.swift
 3 | //  azooKey
 4 | //
 5 | //  Created by ensan on 2022/10/22.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | extension Data {
12 |     /// Converts this data to an array of the given type.
13 |     /// - Parameter:
14 |     ///   - type: The type to convert this data to.
15 |     /// - Returns: An array of the given type.
16 |     @inlinable public func toArray<T>(of type: T.Type) -> [T] {
17 |         self.withUnsafeBytes {pointer -> [T] in
18 |             Array(
19 |                 UnsafeBufferPointer(
20 |                     start: pointer.baseAddress!.assumingMemoryBound(to: type),
21 |                     count: pointer.count / MemoryLayout<T>.size
22 |                 )
23 |             )
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/Sources/SwiftUtils/Debug.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  Debug.swift
 3 | //
 4 | //
 5 | //  Created by ensan on 2023/04/30.
 6 | //
 7 | 
 8 | import Foundation
 9 | 
10 | /// Prints the given items to the standard output if the build setting "DEBUG" is set.
11 | /// - Parameter:
12 | ///   - items: The items to print.
13 | /// - Note: This function is always preferred over `print` in the codebase.
14 | @_disfavoredOverload
15 | @inlinable public func debug(_ items: Any...) {
16 |     #if DEBUG
17 |     var result = ""
18 |     for value in items {
19 |         if result.isEmpty {
20 |             result.append("\(value)")
21 |         } else {
22 |             result.append(" ")
23 |             result.append("\(value)")
24 |         }
25 |     }
26 |     print(result)
27 |     #endif
28 | }
29 | 
30 | @inlinable public func debug(_ item1: @autoclosure () -> Any) {
31 |     #if DEBUG
32 |     print(item1())
33 |     #endif
34 | }
35 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any) {
36 |     #if DEBUG
37 |     print(item1(), item2())
38 |     #endif
39 | }
40 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any) {
41 |     #if DEBUG
42 |     print(item1(), item2(), item3())
43 |     #endif
44 | }
45 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any) {
46 |     #if DEBUG
47 |     print(item1(), item2(), item3(), item4())
48 |     #endif
49 | }
50 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any) {
51 |     #if DEBUG
52 |     print(item1(), item2(), item3(), item4(), item5())
53 |     #endif
54 | }
55 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any) {
56 |     #if DEBUG
57 |     print(item1(), item2(), item3(), item4(), item5(), item6())
58 |     #endif
59 | }
60 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any) {
61 |     #if DEBUG
62 |     print(item1(), item2(), item3(), item4(), item5(), item6(), item7())
63 |     #endif
64 | }
65 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any) {
66 |     #if DEBUG
67 |     print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8())
68 |     #endif
69 | }
70 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any, _ item9: @autoclosure () -> Any) {
71 |     #if DEBUG
72 |     print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8(), item9())
73 |     #endif
74 | }
75 | @inlinable public func debug(_ item1: @autoclosure () -> Any, _ item2: @autoclosure () -> Any, _ item3: @autoclosure () -> Any, _ item4: @autoclosure () -> Any, _ item5: @autoclosure () -> Any, _ item6: @autoclosure () -> Any, _ item7: @autoclosure () -> Any, _ item8: @autoclosure () -> Any, _ item9: @autoclosure () -> Any, _ item10: @autoclosure () -> Any) {
76 |     #if DEBUG
77 |     print(item1(), item2(), item3(), item4(), item5(), item6(), item7(), item8(), item9(), item10())
78 |     #endif
79 | }
80 | 


--------------------------------------------------------------------------------
/Sources/SwiftUtils/StringUtils.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  extension StringProtocol.swift
  3 | //  Keyboard
  4 | //
  5 | //  Created by ensan on 2020/10/16.
  6 | //  Copyright © 2020 ensan. All rights reserved.
  7 | //
  8 | 
  9 | import Foundation
 10 | 
 11 | public extension StringProtocol {
 12 |     /// ローマ字と数字のみかどうか
 13 |     ///  - note: 空文字列の場合`false`を返す。
 14 |     @inlinable
 15 |     var onlyRomanAlphabetOrNumber: Bool {
 16 |         !isEmpty && range(of: "[^a-zA-Z0-9]", options: .regularExpression) == nil
 17 |     }
 18 |     /// ローマ字のみかどうか
 19 |     ///  - note: 空文字列の場合`false`を返す。
 20 |     @inlinable
 21 |     var onlyRomanAlphabet: Bool {
 22 |         !isEmpty && range(of: "[^a-zA-Z]", options: .regularExpression) == nil
 23 |     }
 24 |     /// ローマ字を含むかどうか
 25 |     ///  - note: 空文字列の場合`false`を返す。
 26 |     /// 以前は正規表現ベースで実装していたが、パフォーマンス上良くなかったので以下のような実装にしたところ40倍程度高速化した。
 27 |     @inlinable
 28 |     var containsRomanAlphabet: Bool {
 29 |         for value in self.utf8 {
 30 |             if (UInt8(ascii: "a") <= value && value <= UInt8(ascii: "z")) || (UInt8(ascii: "A") <= value && value <= UInt8(ascii: "Z")) {
 31 |                 return true
 32 |             }
 33 |         }
 34 |         return false
 35 |     }
 36 |     /// 英語として許容可能な文字のみで構成されているか。
 37 |     ///  - note: 空文字列の場合`false`を返す。
 38 |     @inlinable
 39 |     var isEnglishSentence: Bool {
 40 |         !isEmpty && range(of: "[^0-9a-zA-Z\n !'_<>\\[\\]{}*@`\\^|~=\"#$%&\\+\\(\\),\\-\\./:;?’\\\\]", options: .regularExpression) == nil
 41 |     }
 42 | 
 43 |     /// 仮名か
 44 |     @inlinable
 45 |     var isKana: Bool {
 46 |         !isEmpty && range(of: "[^ぁ-ゖァ-ヶ]", options: .regularExpression) == nil
 47 |     }
 48 | 
 49 |     /// Returns a String value in which Hiraganas are all converted to Katakana.
 50 |     /// - Returns: A String value in which Hiraganas are all converted to Katakana.
 51 |     @inlinable func toKatakana() -> String {
 52 |         // カタカナはutf16で常に2バイトなので、utf16単位で処理して良い
 53 |         let result = self.utf16.map { scalar -> UInt16 in
 54 |             if 0x3041 <= scalar && scalar <= 0x3096 {
 55 |                 return scalar + 96
 56 |             } else {
 57 |                 return scalar
 58 |             }
 59 |         }
 60 |         return String(utf16CodeUnits: result, count: result.count)
 61 |     }
 62 | 
 63 |     /// Returns a String value in which Katakana are all converted to Hiragana.
 64 |     /// - Returns: A String value in which Katakana are all converted to Hiragana.
 65 |     @inlinable func toHiragana() -> String {
 66 |         // ひらがなはutf16で常に2バイトなので、utf16単位で処理して良い
 67 |         let result = self.utf16.map { scalar -> UInt16 in
 68 |             if 0x30A1 <= scalar && scalar <= 0x30F6 {
 69 |                 return scalar - 96
 70 |             } else {
 71 |                 return scalar
 72 |             }
 73 |         }
 74 |         return String(utf16CodeUnits: result, count: result.count)
 75 |     }
 76 | 
 77 |     // FIXME: レガシーな実装なのでどうにかしたい。Migrationする……？
 78 |     // エスケープが必要なのは次の文字:
 79 |     /*
 80 |      \ -> \\
 81 |      \0 -> \0
 82 |      \n -> \n
 83 |      \t -> \t
 84 |      , -> \c
 85 |      " -> \d
 86 |      */
 87 |     // please use these letters in order to avoid user-inputting text crash
 88 |     func templateDataSpecificEscaped() -> String {
 89 |         var result = self.replacingOccurrences(of: "\\", with: "\\b")
 90 |         result = result.replacingOccurrences(of: "\0", with: "\\0")
 91 |         result = result.replacingOccurrences(of: "\n", with: "\\n")
 92 |         result = result.replacingOccurrences(of: "\t", with: "\\t")
 93 |         result = result.replacingOccurrences(of: ",", with: "\\c")
 94 |         result = result.replacingOccurrences(of: " ", with: "\\s")
 95 |         result = result.replacingOccurrences(of: "\"", with: "\\d")
 96 |         return result
 97 |     }
 98 | 
 99 |     func templateDataSpecificUnescaped() -> String {
100 |         var result = self.replacingOccurrences(of: "\\d", with: "\"")
101 |         result = result.replacingOccurrences(of: "\\s", with: " ")
102 |         result = result.replacingOccurrences(of: "\\c", with: ",")
103 |         result = result.replacingOccurrences(of: "\\t", with: "\t")
104 |         result = result.replacingOccurrences(of: "\\n", with: "\n")
105 |         result = result.replacingOccurrences(of: "\\0", with: "\0")
106 |         result = result.replacingOccurrences(of: "\\b", with: "\\")
107 |         return result
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/Sources/SwiftUtils/WithMutableValue.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  WithMutableValue.swift
 3 | //  azooKey
 4 | //
 5 | //  Created by ensan on 2022/10/10.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | import Foundation
10 | 
11 | /// Modifies the given value and returns the result.
12 | /// - Parameters:
13 | ///   - value: The value to modify.
14 | ///   - process: The process to modify the value.
15 | /// - Note: This function should be used when specific subscript setter is called for multiple times.
16 | @inlinable public func withMutableValue<T>(_ value: inout T, process: (inout T) -> Void) {
17 |     process(&value)
18 | }
19 | 


--------------------------------------------------------------------------------
/Sources/llama.cpp/ggml-alloc.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ggml.h"
 4 | 
 5 | #ifdef  __cplusplus
 6 | extern "C" {
 7 | #endif
 8 | 
 9 | typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
10 | typedef struct      ggml_backend_buffer * ggml_backend_buffer_t;
11 | typedef struct             ggml_backend * ggml_backend_t;
12 | 
13 | // Tensor allocator
14 | struct ggml_tallocr {
15 |     ggml_backend_buffer_t buffer;
16 |     void * base;
17 |     size_t alignment;
18 |     size_t offset;
19 | };
20 | 
21 | GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
22 | GGML_API void                ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
23 | 
24 | // Graph allocator
25 | /*
26 |   Example usage:
27 |     ggml_gallocr_t galloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
28 | 
29 |     // optional: create a worst-case graph and reserve the buffers to avoid reallocations
30 |     ggml_gallocr_reserve(galloc, build_graph(max_batch));
31 | 
32 |     // allocate the graph
33 |     struct ggml_cgraph * graph = build_graph(batch);
34 |     ggml_gallocr_alloc_graph(galloc, graph);
35 | 
36 |     printf("compute buffer size: %zu bytes\n", ggml_gallocr_get_buffer_size(galloc, 0));
37 | 
38 |     // evaluate the graph
39 |     ggml_backend_graph_compute(backend, graph);
40 | */
41 | 
42 | // special tensor flags for use with the graph allocator:
43 | //   ggml_set_input(): all input tensors are allocated at the beginning of the graph in non-overlapping addresses
44 | //   ggml_set_output(): output tensors are never freed and never overwritten
45 | 
46 | typedef struct ggml_gallocr * ggml_gallocr_t;
47 | 
48 | GGML_API ggml_gallocr_t ggml_gallocr_new(ggml_backend_buffer_type_t buft);
49 | GGML_API ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs);
50 | GGML_API void           ggml_gallocr_free(ggml_gallocr_t galloc);
51 | 
52 | // pre-allocate buffers from a measure graph - does not allocate or modify the graph
53 | // call with a worst-case graph to avoid buffer reallocations
54 | // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
55 | // returns false if the buffer allocation failed
56 | GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
57 | GGML_API bool ggml_gallocr_reserve_n(
58 |     ggml_gallocr_t galloc,
59 |     struct ggml_cgraph * graph,
60 |     const int * node_buffer_ids,
61 |     const int * leaf_buffer_ids);
62 | 
63 | // automatic reallocation if the topology changes when using a single buffer
64 | // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
65 | GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
66 | 
67 | GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
68 | 
69 | // Utils
70 | // Create a buffer and allocate all the tensors in a ggml_context
71 | GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft);
72 | GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend);
73 | 
74 | #ifdef  __cplusplus
75 | }
76 | #endif
77 | 


--------------------------------------------------------------------------------
/Sources/llama.cpp/ggml-cpp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef __cplusplus
 4 | #error "This header is for C++ only"
 5 | #endif
 6 | 
 7 | #include "ggml.h"
 8 | #include "ggml-alloc.h"
 9 | #include "ggml-backend.h"
10 | #include "gguf.h"
11 | #include <memory>
12 | 
13 | // Smart pointers for ggml types
14 | 
15 | // ggml
16 | 
17 | struct ggml_context_deleter { void operator()(ggml_context * ctx) { ggml_free(ctx); } };
18 | struct gguf_context_deleter { void operator()(gguf_context * ctx) { gguf_free(ctx); } };
19 | 
20 | typedef std::unique_ptr<ggml_context, ggml_context_deleter> ggml_context_ptr;
21 | typedef std::unique_ptr<gguf_context, gguf_context_deleter> gguf_context_ptr;
22 | 
23 | // ggml-alloc
24 | 
25 | struct ggml_gallocr_deleter { void operator()(ggml_gallocr_t galloc) { ggml_gallocr_free(galloc); } };
26 | 
27 | typedef std::unique_ptr<ggml_gallocr_t, ggml_gallocr_deleter> ggml_gallocr_ptr;
28 | 
29 | // ggml-backend
30 | 
31 | struct ggml_backend_deleter        { void operator()(ggml_backend_t backend)       { ggml_backend_free(backend); } };
32 | struct ggml_backend_buffer_deleter { void operator()(ggml_backend_buffer_t buffer) { ggml_backend_buffer_free(buffer); } };
33 | struct ggml_backend_event_deleter  { void operator()(ggml_backend_event_t event)   { ggml_backend_event_free(event); } };
34 | struct ggml_backend_sched_deleter  { void operator()(ggml_backend_sched_t sched)   { ggml_backend_sched_free(sched); } };
35 | 
36 | typedef std::unique_ptr<ggml_backend,        ggml_backend_deleter>        ggml_backend_ptr;
37 | typedef std::unique_ptr<ggml_backend_buffer, ggml_backend_buffer_deleter> ggml_backend_buffer_ptr;
38 | typedef std::unique_ptr<ggml_backend_event,  ggml_backend_event_deleter>  ggml_backend_event_ptr;
39 | typedef std::unique_ptr<ggml_backend_sched,  ggml_backend_sched_deleter>  ggml_backend_sched_ptr;
40 | 


--------------------------------------------------------------------------------
/Sources/llama.cpp/ggml-metal.h:
--------------------------------------------------------------------------------
 1 | // Note: this description is outdated
 2 | //
 3 | // An interface allowing to compute ggml_cgraph with Metal
 4 | //
 5 | // This is a fully functional interface that extends ggml with GPU support for Apple devices.
 6 | // A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
 7 | //
 8 | // How it works?
 9 | //
10 | // As long as your program can create and evaluate a ggml_cgraph on the CPU, you can use this
11 | // interface to evaluate the same graph on the GPU. Instead of using ggml_graph_compute(), you
12 | // use ggml_metal_graph_compute() (or ggml_vulkan_graph_compute(), etc.)
13 | //
14 | // You only need to make sure that all memory buffers that you used during the graph creation
15 | // are mapped to the device memory with the ggml_metal_add_buffer() function. This mapping is
16 | // used during the graph evaluation to determine the arguments of the compute kernels.
17 | //
18 | // Synchronization between device and host memory (for example for input and output tensors)
19 | // is done with the ggml_metal_set_tensor() and ggml_metal_get_tensor() functions.
20 | //
21 | 
22 | #pragma once
23 | 
24 | #include "ggml.h"
25 | #include "ggml-backend.h"
26 | 
27 | #include <stddef.h>
28 | #include <stdbool.h>
29 | 
30 | struct ggml_tensor;
31 | struct ggml_cgraph;
32 | 
33 | #ifdef __cplusplus
34 | extern "C" {
35 | #endif
36 | 
37 | //
38 | // backend API
39 | // user-code should use only these functions
40 | //
41 | 
42 | GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
43 | 
44 | GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
45 | 
46 | GGML_DEPRECATED(
47 |         GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48 |         "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
49 | 
50 | GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
51 | 
52 | GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
53 | 
54 | // helper to check if the device supports a specific family
55 | // ideally, the user code should be doing these checks
56 | // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
57 | GGML_BACKEND_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
58 | 
59 | // capture all command buffers committed the next time `ggml_backend_graph_compute` is called
60 | GGML_BACKEND_API void ggml_backend_metal_capture_next_compute(ggml_backend_t backend);
61 | 
62 | GGML_BACKEND_API ggml_backend_reg_t ggml_backend_metal_reg(void);
63 | 
64 | #ifdef __cplusplus
65 | }
66 | #endif
67 | 


--------------------------------------------------------------------------------
/Sources/llama.cpp/module.modulemap:
--------------------------------------------------------------------------------
 1 | module llama [system] {
 2 |     header "llama.h"
 3 |     header "ggml.h"
 4 |     header "ggml-alloc.h"
 5 |     header "ggml-backend.h"
 6 | 
 7 |     link "llama"
 8 | 
 9 |     export *
10 | }


--------------------------------------------------------------------------------
/Tests/EfficientNGramTests/EfficientNGramTests.swift:
--------------------------------------------------------------------------------
 1 | import XCTest
 2 | @testable import EfficientNGram
 3 | import Tokenizers
 4 | 
 5 | class SwiftNGramTests: XCTestCase {
 6 | #if canImport(SwiftyMarisa)
 7 |     func testTokenizers() throws {
 8 |         let tokenizer = ZenzTokenizer()
 9 |         let inputIds = tokenizer.encode(text: "これは日本語です")
10 |         XCTAssertEqual(inputIds, [268, 262, 253, 304, 358, 698, 246, 255])
11 |         XCTAssertEqual(tokenizer.decode(tokens: inputIds), "これは日本語です")
12 |     }
13 | #endif
14 | }
15 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ClauseDataUnitTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ClauseDataUnitTests.swift
 3 | //  KanaKanjiConverterModuleTests
 4 | //
 5 | //  Created by ensan on 2022/12/30.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import KanaKanjiConverterModule
10 | import XCTest
11 | 
12 | final class ClauseDataUnitTests: XCTestCase {
13 |     func testMerge() throws {
14 |         do {
15 |             let unit1 = ClauseDataUnit()
16 |             unit1.text = "僕が"
17 |             unit1.inputRange = 0 ..< 3
18 |             unit1.mid = 0
19 |             unit1.nextLcid = 0
20 | 
21 |             let unit2 = ClauseDataUnit()
22 |             unit2.text = "走る"
23 |             unit2.inputRange = 3 ..< 6
24 |             unit2.mid = 1
25 |             unit2.nextLcid = 1
26 | 
27 |             unit1.merge(with: unit2)
28 |             XCTAssertEqual(unit1.text, "僕が走る")
29 |             XCTAssertEqual(unit1.inputRange, 0 ..< 6)
30 |             XCTAssertEqual(unit1.nextLcid, 1)
31 |             XCTAssertEqual(unit1.mid, 0)
32 |         }
33 | 
34 |         do {
35 |             let unit1 = ClauseDataUnit()
36 |             unit1.text = "君は"
37 |             unit1.inputRange = 0 ..< 3
38 |             unit1.mid = 0
39 |             unit1.nextLcid = 0
40 | 
41 |             let unit2 = ClauseDataUnit()
42 |             unit2.text = "笑った"
43 |             unit2.inputRange = 3 ..< 7
44 |             unit2.mid = 3
45 |             unit2.nextLcid = 3
46 | 
47 |             unit1.merge(with: unit2)
48 |             XCTAssertEqual(unit1.text, "君は笑った")
49 |             XCTAssertEqual(unit1.inputRange, 0 ..< 7)
50 |             XCTAssertEqual(unit1.nextLcid, 3)
51 |             XCTAssertEqual(unit1.mid, 0)
52 |         }
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/CandidateTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  CandidateTests.swift
 3 | //  
 4 | //
 5 | //  Created by miwa on 2023/08/16.
 6 | //
 7 | 
 8 | import XCTest
 9 | @testable import KanaKanjiConverterModule
10 | 
11 | final class CandidateTests: XCTestCase {
12 |     // テンプレートのパース
13 |     func testParseTemplate() throws {
14 |         do {
15 |             let text = #"<random type="int" value="1,3">"#
16 |             let candidate = Candidate(
17 |                 text: text,
18 |                 value: -40,
19 |                 correspondingCount: 4,
20 |                 lastMid: 5,
21 |                 data: [DicdataElement(word: text, ruby: "サイコロ", cid: 0, mid: 5, value: -40)]
22 |             )
23 |             // ランダムなので繰り返し実行しておく
24 |             for _ in 0 ..< 10 {
25 |                 var candidate2 = candidate
26 |                 candidate2.parseTemplate()
27 |                 print(candidate2.text)
28 |                 XCTAssertTrue(Set((1...3).map(String.init)).contains(candidate2.text))
29 |                 XCTAssertEqual(candidate.value, candidate2.value)
30 |                 XCTAssertEqual(candidate.correspondingCount, candidate2.correspondingCount)
31 |                 XCTAssertEqual(candidate.lastMid, candidate2.lastMid)
32 |                 XCTAssertEqual(candidate.data, candidate2.data)
33 |                 XCTAssertEqual(candidate.actions, candidate2.actions)
34 |             }
35 |         }
36 |         do {
37 |             let text = #"\n"#
38 |             let candidate = Candidate(
39 |                 text: text,
40 |                 value: 0,
41 |                 correspondingCount: 0,
42 |                 lastMid: 0,
43 |                 data: [DicdataElement(word: text, ruby: "", cid: 0, mid: 0, value: 0)]
44 |             )
45 |             var candidate2 = candidate
46 |             candidate2.parseTemplate()
47 |             XCTAssertEqual(candidate.text, candidate2.text)
48 |         }
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/CommaSeparatedNumberTests.swift:
--------------------------------------------------------------------------------
 1 | import XCTest
 2 | @testable import KanaKanjiConverterModule
 3 | 
 4 | final class CommaSeparatedNumberTests: XCTestCase {
 5 |     private func makeDirectInput(direct input: String) -> ComposingText {
 6 |         ComposingText(
 7 |             convertTargetCursorPosition: input.count,
 8 |             input: input.map { .init(character: $0, inputStyle: .direct) },
 9 |             convertTarget: input
10 |         )
11 |     }
12 | 
13 |     func testCommaSeparatedNumberCandidates() async throws {
14 |         let converter = await KanaKanjiConverter()
15 | 
16 |         func result(_ text: String) async -> [Candidate] {
17 |             await converter.commaSeparatedNumberCandidates(makeDirectInput(direct: text))
18 |         }
19 | 
20 |         let r1 = await result("49000")
21 |         XCTAssertEqual(r1.first?.text, "49,000")
22 | 
23 |         let r2 = await result("109428081")
24 |         XCTAssertEqual(r2.first?.text, "109,428,081")
25 | 
26 |         let r3 = await result("2129.49")
27 |         XCTAssertEqual(r3.first?.text, "2,129.49")
28 | 
29 |         let r4 = await result("-13932")
30 |         XCTAssertEqual(r4.first?.text, "-13,932")
31 | 
32 |         let r5 = await result("12")
33 |         XCTAssertTrue(r5.isEmpty)
34 | 
35 |         let r6 = await result("1A9B")
36 |         XCTAssertTrue(r6.isEmpty)
37 | 
38 |         let r7 = await result("１２３")
39 |         XCTAssertTrue(r7.isEmpty)
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/ConvesionTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  ConversionTests.swift
 3 | //  
 4 | //
 5 | //  Created by miwa on 2023/08/16.
 6 | //
 7 | 
 8 | import XCTest
 9 | @testable import KanaKanjiConverterModule
10 | 
11 | final class ConverterTests: XCTestCase {
12 |     func requestOptions() -> ConvertRequestOptions {
13 |         ConvertRequestOptions(
14 |             N_best: 5,
15 |             requireJapanesePrediction: true,
16 |             requireEnglishPrediction: false,
17 |             keyboardLanguage: .ja_JP,
18 |             typographyLetterCandidate: false,
19 |             unicodeCandidate: true,
20 |             englishCandidateInRoman2KanaInput: true,
21 |             fullWidthRomanCandidate: false,
22 |             halfWidthKanaCandidate: false,
23 |             learningType: .nothing,
24 |             maxMemoryCount: 0,
25 |             shouldResetMemory: false,
26 |             dictionaryResourceURL: Bundle(for: type(of: self)).bundleURL.appendingPathComponent("DictionaryMock", isDirectory: true),
27 |             memoryDirectoryURL: URL(fileURLWithPath: ""),
28 |             sharedContainerURL: URL(fileURLWithPath: ""),
29 |             metadata: nil
30 |         )
31 |     }
32 | 
33 |     // 変換されてはいけないケースを示す
34 |     func testMustNotCases() async throws {
35 |         do {
36 |             // 改行文字に対して本当に改行が入ってしまうケース
37 |             let converter = await KanaKanjiConverter()
38 |             var c = ComposingText()
39 |             c.insertAtCursorPosition("\\n", inputStyle: .direct)
40 |             let results = await converter.requestCandidates(c, options: requestOptions())
41 |             XCTAssertFalse(results.mainResults.contains(where: {$0.text == "\n"}))
42 |         }
43 |     }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/EmailAddressConversionTests.swift:
--------------------------------------------------------------------------------
 1 | import XCTest
 2 | @testable import KanaKanjiConverterModule
 3 | 
 4 | final class EmailAddressConversionTests: XCTestCase {
 5 |     func makeDirectInput(direct input: String) -> ComposingText {
 6 |         ComposingText(
 7 |             convertTargetCursorPosition: input.count,
 8 |             input: input.map {.init(character: $0, inputStyle: .direct)},
 9 |             convertTarget: input
10 |         )
11 |     }
12 | 
13 |     func testtoEmailAddressCandidates() async throws {
14 |         do {
15 |             let converter = await KanaKanjiConverter()
16 |             let input = makeDirectInput(direct: "azooKey@")
17 |             let result = await converter.toEmailAddressCandidates(input)
18 |             XCTAssertFalse(result.isEmpty)
19 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@gmail.com"}))
20 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@icloud.com"}))
21 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@yahoo.co.jp"}))
22 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@i.softbank.jp"}))
23 |         }
24 | 
25 |         do {
26 |             let converter = await KanaKanjiConverter()
27 |             let input = makeDirectInput(direct: "my.dev_az@")
28 |             let result = await converter.toEmailAddressCandidates(input)
29 |             XCTAssertFalse(result.isEmpty)
30 |             XCTAssertTrue(result.contains(where: {$0.text == "my.dev_az@gmail.com"}))
31 |             XCTAssertTrue(result.contains(where: {$0.text == "my.dev_az@icloud.com"}))
32 |             XCTAssertTrue(result.contains(where: {$0.text == "my.dev_az@yahoo.co.jp"}))
33 |             XCTAssertTrue(result.contains(where: {$0.text == "my.dev_az@i.softbank.jp"}))
34 |         }
35 | 
36 |         do {
37 |             let converter = await KanaKanjiConverter()
38 |             let input = makeDirectInput(direct: "@")
39 |             let result = await converter.toEmailAddressCandidates(input)
40 |             XCTAssertFalse(result.isEmpty)
41 |             XCTAssertTrue(result.contains(where: {$0.text == "@gmail.com"}))
42 |             XCTAssertTrue(result.contains(where: {$0.text == "@icloud.com"}))
43 |             XCTAssertTrue(result.contains(where: {$0.text == "@yahoo.co.jp"}))
44 |             XCTAssertTrue(result.contains(where: {$0.text == "@i.softbank.jp"}))
45 |         }
46 | 
47 |         // New tests for partial domain inputs
48 |         do {
49 |             let converter = await KanaKanjiConverter()
50 |             let input = makeDirectInput(direct: "azooKey@g")
51 |             let result = await converter.toEmailAddressCandidates(input)
52 |             XCTAssertFalse(result.isEmpty)
53 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@gmail.com"}))
54 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@googlemail.com"}))
55 |             XCTAssertFalse(result.contains(where: {$0.text == "azooKey@yahoo.co.jp"}))
56 |         }
57 | 
58 |         do {
59 |             let converter = await KanaKanjiConverter()
60 |             let input = makeDirectInput(direct: "azooKey@y")
61 |             let result = await converter.toEmailAddressCandidates(input)
62 |             XCTAssertFalse(result.isEmpty)
63 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@yahoo.co.jp"}))
64 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@yahoo.ne.jp"}))
65 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@ybb.ne.jp"}))
66 |             XCTAssertTrue(result.contains(where: {$0.text == "azooKey@ymobile.ne.jp"}))
67 |             XCTAssertFalse(result.contains(where: {$0.text == "azooKey@gmail.com"}))
68 |         }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/TimeExpressionTests.swift:
--------------------------------------------------------------------------------
 1 | import XCTest
 2 | @testable import KanaKanjiConverterModule
 3 | 
 4 | final class TimeExpressionTests: XCTestCase {
 5 |     private func makeDirectInput(direct input: String) -> ComposingText {
 6 |         ComposingText(
 7 |             convertTargetCursorPosition: input.count,
 8 |             input: input.map {.init(character: $0, inputStyle: .direct)},
 9 |             convertTarget: input
10 |         )
11 |     }
12 | 
13 |     func testConvertToTimeExpression() async throws {
14 |         let converter = await KanaKanjiConverter()
15 | 
16 |         let input1 = makeDirectInput(direct: "123")
17 |         let input2 = makeDirectInput(direct: "1234")
18 |         let input3 = makeDirectInput(direct: "999")
19 |         let input4 = makeDirectInput(direct: "1260")
20 |         let input5 = makeDirectInput(direct: "2440")
21 |         let input6 = makeDirectInput(direct: "")
22 |         let input7 = makeDirectInput(direct: "あいうえ")
23 |         let input8 = makeDirectInput(direct: "13122")
24 | 
25 |         let candidates1 = await converter.convertToTimeExpression(input1)
26 |         let candidates2 = await converter.convertToTimeExpression(input2)
27 |         let candidates3 = await converter.convertToTimeExpression(input3)
28 |         let candidates4 = await converter.convertToTimeExpression(input4)
29 |         let candidates5 = await converter.convertToTimeExpression(input5)
30 |         let candidates6 = await converter.convertToTimeExpression(input6)
31 |         let candidates7 = await converter.convertToTimeExpression(input7)
32 |         let candidates8 = await converter.convertToTimeExpression(input8)
33 | 
34 |         XCTAssertEqual(candidates1.count, 1)
35 |         XCTAssertEqual(candidates1.first?.text, "1:23")
36 | 
37 |         XCTAssertEqual(candidates2.count, 1)
38 |         XCTAssertEqual(candidates2.first?.text, "12:34")
39 | 
40 |         XCTAssertEqual(candidates3.count, 0)
41 | 
42 |         XCTAssertEqual(candidates4.count, 0)
43 | 
44 |         XCTAssertEqual(candidates5.count, 1)
45 |         XCTAssertEqual(candidates5.first?.text, "24:40")
46 | 
47 |         XCTAssertEqual(candidates6.count, 0)
48 | 
49 |         XCTAssertEqual(candidates7.count, 0)
50 | 
51 |         XCTAssertEqual(candidates8.count, 0)
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/UnicodeConversionTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  UnicodeConversionTests.swift
 3 | //  azooKeyTests
 4 | //
 5 | //  Created by ensan on 2022/12/29.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import KanaKanjiConverterModule
10 | import XCTest
11 | 
12 | final class UnicodeConversionTests: XCTestCase {
13 |     func makeDirectInput(direct input: String) -> ComposingText {
14 |         ComposingText(
15 |             convertTargetCursorPosition: input.count,
16 |             input: input.map {.init(character: $0, inputStyle: .direct)},
17 |             convertTarget: input
18 |         )
19 |     }
20 | 
21 |     func testFromUnicode() async throws {
22 |         do {
23 |             let converter = await KanaKanjiConverter()
24 |             let input = makeDirectInput(direct: "U+3042")
25 |             let result = await converter.unicodeCandidates(input)
26 |             XCTAssertEqual(result.count, 1)
27 |             XCTAssertEqual(result[0].text, "あ")
28 |         }
29 |         do {
30 |             let converter = await KanaKanjiConverter()
31 |             let input = makeDirectInput(direct: "U+1F607")
32 |             let result = await converter.unicodeCandidates(input)
33 |             XCTAssertEqual(result.count, 1)
34 |             XCTAssertEqual(result[0].text, "😇")
35 |         }
36 |         do {
37 |             let converter = await KanaKanjiConverter()
38 |             let input = makeDirectInput(direct: "u+3042")
39 |             let result = await converter.unicodeCandidates(input)
40 |             XCTAssertEqual(result.count, 1)
41 |             XCTAssertEqual(result[0].text, "あ")
42 |         }
43 |         do {
44 |             let converter = await KanaKanjiConverter()
45 |             let input = makeDirectInput(direct: "U3042")
46 |             let result = await converter.unicodeCandidates(input)
47 |             XCTAssertEqual(result.count, 1)
48 |             XCTAssertEqual(result[0].text, "あ")
49 |         }
50 |         do {
51 |             let converter = await KanaKanjiConverter()
52 |             let input = makeDirectInput(direct: "u3042")
53 |             let result = await converter.unicodeCandidates(input)
54 |             XCTAssertEqual(result.count, 1)
55 |             XCTAssertEqual(result[0].text, "あ")
56 |         }
57 |         do {
58 |             let converter = await KanaKanjiConverter()
59 |             let input = makeDirectInput(direct: "U+61")
60 |             let result = await converter.unicodeCandidates(input)
61 |             XCTAssertEqual(result.count, 1)
62 |             XCTAssertEqual(result[0].text, "a")
63 |         }
64 |         do {
65 |             let converter = await KanaKanjiConverter()
66 |             let input = makeDirectInput(direct: "U+189")
67 |             let result = await converter.unicodeCandidates(input)
68 |             XCTAssertEqual(result.count, 1)
69 |             XCTAssertEqual(result[0].text, "Ɖ")
70 |         }
71 |     }
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/ConverterTests/WarekiConversionTests.swift:
--------------------------------------------------------------------------------
  1 | //
  2 | //  WarekiConversionTests.swift
  3 | //  azooKeyTests
  4 | //
  5 | //  Created by ensan on 2022/12/22.
  6 | //  Copyright © 2022 ensan. All rights reserved.
  7 | //
  8 | 
  9 | @testable import KanaKanjiConverterModule
 10 | import XCTest
 11 | 
 12 | final class WarekiConversionTests: XCTestCase {
 13 |     func makeDirectInput(direct input: String) -> ComposingText {
 14 |         ComposingText(
 15 |             convertTargetCursorPosition: input.count,
 16 |             input: input.map {.init(character: $0, inputStyle: .direct)},
 17 |             convertTarget: input
 18 |         )
 19 |     }
 20 | 
 21 |     func testSeireki2Wareki() async throws {
 22 |         do {
 23 |             let converter = await KanaKanjiConverter()
 24 |             let input = makeDirectInput(direct: "2019ねん")
 25 |             let result = await converter.toWarekiCandidates(input)
 26 |             XCTAssertEqual(result.count, 2)
 27 |             if result.count == 2 {
 28 |                 XCTAssertEqual(result[0].text, "令和元年")
 29 |                 XCTAssertEqual(result[1].text, "平成31年")
 30 |             }
 31 |         }
 32 | 
 33 |         do {
 34 |             let converter = await KanaKanjiConverter()
 35 |             let input = makeDirectInput(direct: "2020ねん")
 36 |             let result = await converter.toWarekiCandidates(input)
 37 |             XCTAssertEqual(result.count, 1)
 38 |             if result.count == 1 {
 39 |                 XCTAssertEqual(result[0].text, "令和2年")
 40 |             }
 41 |         }
 42 | 
 43 |         do {
 44 |             let converter = await KanaKanjiConverter()
 45 |             let input = makeDirectInput(direct: "2001ねん")
 46 |             let result = await converter.toWarekiCandidates(input)
 47 |             XCTAssertEqual(result.count, 1)
 48 |             if result.count == 1 {
 49 |                 XCTAssertEqual(result[0].text, "平成13年")
 50 |             }
 51 |         }
 52 | 
 53 |         do {
 54 |             let converter = await KanaKanjiConverter()
 55 |             let input = makeDirectInput(direct: "1945ねん")
 56 |             let result = await converter.toWarekiCandidates(input)
 57 |             XCTAssertEqual(result.count, 1)
 58 |             if result.count == 1 {
 59 |                 XCTAssertEqual(result[0].text, "昭和20年")
 60 |             }
 61 |         }
 62 | 
 63 |         do {
 64 |             let converter = await KanaKanjiConverter()
 65 |             let input = makeDirectInput(direct: "9999ねん")
 66 |             let result = await converter.toWarekiCandidates(input)
 67 |             XCTAssertEqual(result.count, 1)
 68 |             if result.count == 1 {
 69 |                 XCTAssertEqual(result[0].text, "令和7981年")
 70 |             }
 71 |         }
 72 | 
 73 |         // invalid cases
 74 |         do {
 75 |             let converter = await KanaKanjiConverter()
 76 |             let input = makeDirectInput(direct: "せいれき2001ねん")
 77 |             let result = await converter.toWarekiCandidates(input)
 78 |             XCTAssertTrue(result.isEmpty)
 79 |         }
 80 |         do {
 81 |             let converter = await KanaKanjiConverter()
 82 |             let input = makeDirectInput(direct: "1582ねん")
 83 |             let result = await converter.toWarekiCandidates(input)
 84 |             XCTAssertTrue(result.isEmpty)
 85 |         }
 86 |         do {
 87 |             let converter = await KanaKanjiConverter()
 88 |             let input = makeDirectInput(direct: "10000ねん")
 89 |             let result = await converter.toWarekiCandidates(input)
 90 |             XCTAssertTrue(result.isEmpty)
 91 |         }
 92 | 
 93 |     }
 94 | 
 95 |     func testWareki2Seireki() async throws {
 96 |         do {
 97 |             let converter = await KanaKanjiConverter()
 98 |             let input = ComposingText(
 99 |                 convertTargetCursorPosition: 8,
100 |                 input: "れいわがんねん".map {.init(character: $0, inputStyle: .direct)},
101 |                 convertTarget: "れいわがんねん"
102 |             )
103 |             let result = await converter.toSeirekiCandidates(input)
104 |             XCTAssertEqual(result.count, 1)
105 |             if result.count == 1 {
106 |                 XCTAssertEqual(result[0].text, "2019年")
107 |             }
108 |         }
109 | 
110 |         do {
111 |             let converter = await KanaKanjiConverter()
112 |             let input = ComposingText(
113 |                 convertTargetCursorPosition: 8,
114 |                 input: "れいわ1ねん".map {.init(character: $0, inputStyle: .direct)},
115 |                 convertTarget: "れいわ1ねん"
116 |             )
117 |             let result = await converter.toSeirekiCandidates(input)
118 |             XCTAssertEqual(result.count, 1)
119 |             if result.count == 1 {
120 |                 XCTAssertEqual(result[0].text, "2019年")
121 |             }
122 |         }
123 | 
124 |         do {
125 |             let converter = await KanaKanjiConverter()
126 |             let input = ComposingText(
127 |                 convertTargetCursorPosition: 8,
128 |                 input: "しょうわ25ねん".map {.init(character: $0, inputStyle: .direct)},
129 |                 convertTarget: "しょうわ25ねん"
130 |             )
131 |             let result = await converter.toSeirekiCandidates(input)
132 |             XCTAssertEqual(result.count, 1)
133 |             if result.count == 1 {
134 |                 XCTAssertEqual(result[0].text, "1950年")
135 |             }
136 |         }
137 | 
138 |         do {
139 |             let converter = await KanaKanjiConverter()
140 |             let input = ComposingText(
141 |                 convertTargetCursorPosition: 8,
142 |                 input: "めいじ9ねん".map {.init(character: $0, inputStyle: .direct)},
143 |                 convertTarget: "めいじ9ねん"
144 |             )
145 |             let result = await converter.toSeirekiCandidates(input)
146 |             XCTAssertEqual(result.count, 1)
147 |             if result.count == 1 {
148 |                 XCTAssertEqual(result[0].text, "1876年")
149 |             }
150 |         }
151 | 
152 |         // invalid cases
153 |         do {
154 |             let converter = await KanaKanjiConverter()
155 |             let input = makeDirectInput(direct: "れいわ100ねん")
156 |             let result = await converter.toSeirekiCandidates(input)
157 |             XCTAssertTrue(result.isEmpty)
158 |         }
159 | 
160 |         do {
161 |             let converter = await KanaKanjiConverter()
162 |             let input = makeDirectInput(direct: "けいおう5ねん")
163 |             let result = await converter.toSeirekiCandidates(input)
164 |             XCTAssertTrue(result.isEmpty)
165 |         }
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/.gitkeep


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/cb/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/cb/.gitkeep


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/cb/1285.binary:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/cb/1285.binary


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/.gitkeep


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/charID.chid:
--------------------------------------------------------------------------------
1 |  　￣‐―〜・、…‥。‘’“”〈〉《》「」『』【】〔〕‖*′〃※´¨゛゜←→↑↓─■□▲△▼▽◆◇○◎●★☆々ゝヽゞヾー〇Qァアィイゥウヴェエォオヵカガキギクグヶケゲコゴサザシジ〆スズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲン仝&A！0123456789？abcdefghijklmnopqrstuvwxyzBCDEFGHIJKLMNOPRSTUVWXYZ^_=ㇻ()#%'"+-ㇼ


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ.louds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ.louds


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ.loudschars2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ.loudschars2


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ0.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ0.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ1.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ1.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ10.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ10.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ11.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ11.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ12.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ12.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ13.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ13.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ2.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ2.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ3.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ3.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ4.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ4.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ5.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ5.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ6.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ6.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ7.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ7.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ8.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ8.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ9.loudstxt3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/louds/シ9.loudstxt3


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/mm.binary:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/mm.binary


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/p/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/azooKey/AzooKeyKanaKanjiConverter/a30e8b27a8763df24665bfc460837d283fc74cfa/Tests/KanaKanjiConverterModuleTests/DictionaryMock/p/.gitkeep


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/DictionaryMock/p/p_シ.csv:
--------------------------------------------------------------------------------
 1 | シカ,しか,257,257,268,-4.2795
 2 | シカシ,しかし,555,555,282,-5.4644
 3 | シカタ,仕方,1284,1284,468,-6.3727
 4 | シゴト,仕事,1283,1283,290,-6.7303
 5 | シタ,した,610,435,17,-5.8511
 6 | シッ,知っ,786,786,427,-6.2581
 7 | シテ,して,610,307,17,-5.1961
 8 | シテイル,している,610,919,17,-6.8068
 9 | シナ,死な,748,748,161,-4.9477
10 | シニ,死に,751,751,161,-4.9485
11 | シニャ,死にゃ,745,745,161,-4.9347
12 | シヌ,死ぬ,746,746,161,-4.9348
13 | シノ,死の,747,747,161,-4.9476
14 | シマイ,しまい,1273,1273,282,-4.1023
15 | シマウ,しまう,1253,1253,282,-4.0578
16 | シマエ,しまえ,1249,1249,47,-4.1475
17 | シマエ,しまえ,1265,1265,47,-4.1502
18 | シマオ,しまお,1257,1257,78,-4.1498
19 | シマッ,しまっ,1269,1269,282,-4.0663
20 | シマワ,しまわ,1261,1261,268,-4.1497
21 | シメサ,示さ,733,733,481,-6.6955
22 | シメシ,示し,735,735,481,-6.7511
23 | シメシャ,示しゃ,730,730,481,-6.6753
24 | シメス,示す,731,731,481,-6.6789
25 | シメセ,示せ,729,729,481,-6.6685
26 | シメセ,示せ,734,734,481,-6.6744
27 | シメソ,示そ,732,732,481,-6.6763
28 | シモ,しも,333,333,437,-6.1361
29 | シャ,しゃ,878,878,331,-6.2638
30 | ショ,しょ,603,603,331,-3.0303
31 | シヨ,しよ,603,603,78,-3.0235
32 | シヨウ,使用,1283,1283,375,-6.8956
33 | シラ,知ら,780,780,441,-6.1623
34 | シリ,知り,788,788,427,-6.3249
35 | シリャ,知りゃ,770,770,438,-6.3237
36 | シル,知る,772,772,438,-6.2017
37 | シレ,知れ,784,784,438,-6.3237
38 | シレ,知れ,768,768,438,-6.3238
39 | シレ,しれ,622,622,268,-6.6306
40 | シレ,しれ,625,625,268,-6.8119
41 | シレヨ,しれよ,624,624,501,-6.8937
42 | シレリャ,しれりゃ,618,618,501,-6.8749
43 | シレレ,しれれ,617,617,501,-6.8950
44 | シレロ,しれろ,623,623,501,-6.8939
45 | シレン,しれん,620,620,501,-6.8940
46 | シロ,しろ,608,608,437,-2.6815
47 | シロ,知ろ,778,778,438,-6.3241
48 | シン,死ん,750,750,292,-4.9321
49 | シン,知ん,782,782,438,-6.3238
50 | シン,知ん,774,774,438,-6.3238


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/JapaneseNumberConversionTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  JapaneseNumberConversionTests.swift
 3 | //  KanaKanjiConverterModuleTests
 4 | //
 5 | //  Created by ensan on 2023/04/18.
 6 | //  Copyright © 2023 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import KanaKanjiConverterModule
10 | import XCTest
11 | 
12 | final class JapaneseNumberConversionTests: XCTestCase {
13 |     func testJapaneseNumberConversion() throws {
14 |         let dicdataStore = DicdataStore()
15 |         do {
16 |             let result = dicdataStore.getJapaneseNumberDicdata(head: "イチマン")
17 |             XCTAssertEqual(result.count, 2)
18 |             XCTAssertTrue(result.contains(where: {$0.word == "一万"}))
19 |             XCTAssertTrue(result.contains(where: {$0.word == "10000"}))
20 |         }
21 |         do {
22 |             let result = dicdataStore.getJapaneseNumberDicdata(head: "ニオクロクセンヨンヒャクマンキュウ")
23 |             XCTAssertEqual(result.count, 2)
24 |             XCTAssertTrue(result.contains(where: {$0.word == "二億六千四百万九"}))
25 |             XCTAssertTrue(result.contains(where: {$0.word == "264000009"}))
26 |         }
27 |         do {
28 |             XCTAssertEqual(dicdataStore.getJapaneseNumberDicdata(head: "マルマン").count, 0)
29 |             XCTAssertEqual(dicdataStore.getJapaneseNumberDicdata(head: "アマン").count, 0)
30 |             XCTAssertEqual(dicdataStore.getJapaneseNumberDicdata(head: "イチリン").count, 0)
31 |             XCTAssertEqual(dicdataStore.getJapaneseNumberDicdata(head: "ニムリョウタイスウサンガイ").count, 0)
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/KanaKanjiConverterModuleTests.swift:
--------------------------------------------------------------------------------
 1 | @testable import KanaKanjiConverterModule
 2 | import XCTest
 3 | 
 4 | final class KanaKanjiConverterModuleTests: XCTestCase {
 5 |     func testExample() throws {
 6 |         // This is an example of a functional test case.
 7 |         // Use XCTAssert and related functions to verify your tests produce the correct
 8 |         // results.
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/LOUDSTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  LOUDSTests.swift
 3 | //  KanaKanjiConverterModuleTests
 4 | //
 5 | //  Created by ensan on 2023/02/02.
 6 | //  Copyright © 2023 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import KanaKanjiConverterModule
10 | import XCTest
11 | 
12 | final class LOUDSTests: XCTestCase {
13 |     static let resourceURL = Bundle.module.resourceURL!.standardizedFileURL.appendingPathComponent("DictionaryMock", isDirectory: true)
14 |     func requestOptions() -> ConvertRequestOptions {
15 |         var options: ConvertRequestOptions = .default
16 |         options.dictionaryResourceURL = Self.resourceURL
17 |         return options
18 |     }
19 | 
20 |     func loadCharIDs() -> [Character: UInt8] {
21 |         do {
22 |             let string = try String(contentsOf: Self.resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false), encoding: String.Encoding.utf8)
23 |             return [Character: UInt8](uniqueKeysWithValues: string.enumerated().map {($0.element, UInt8($0.offset))})
24 |         } catch {
25 |             print("ファイルが見つかりませんでした")
26 |             return [:]
27 |         }
28 |     }
29 | 
30 |     func testSearchNodeIndex() throws {
31 |         // データリソースの場所を指定する
32 |         print("Options: ", requestOptions())
33 |         let louds = LOUDS.load("シ", option: requestOptions())
34 |         XCTAssertNotNil(louds)
35 |         guard let louds else { return }
36 |         let charIDs = loadCharIDs()
37 |         let key = "シカイ"
38 |         let chars = key.map {charIDs[$0, default: .max]}
39 |         let index = louds.searchNodeIndex(chars: chars)
40 |         XCTAssertNotNil(index)
41 |         guard let index else { return }
42 | 
43 |         let dicdata: [DicdataElement] = LOUDS.getDataForLoudstxt3("シ" + "\(index >> 11)", indices: [index & 2047], option: requestOptions())
44 |         XCTAssertTrue(dicdata.contains {$0.word == "司会"})
45 |         XCTAssertTrue(dicdata.contains {$0.word == "視界"})
46 |         XCTAssertTrue(dicdata.contains {$0.word == "死界"})
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/RegisteredNodeTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  RegisteredNodeTests.swift
 3 | //  KanaKanjiConverterModuleTests
 4 | //
 5 | //  Created by ensan on 2023/01/31.
 6 | //  Copyright © 2023 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import KanaKanjiConverterModule
10 | import XCTest
11 | 
12 | final class RegisteredNodeTests: XCTestCase {
13 |     func testBOSNode() throws {
14 |         let bos = RegisteredNode.BOSNode()
15 |         XCTAssertEqual(bos.inputRange, 0..<0)
16 |         XCTAssertNil(bos.prev)
17 |         XCTAssertEqual(bos.totalValue, 0)
18 |         XCTAssertEqual(bos.data.rcid, CIDData.BOS.cid)
19 |     }
20 | 
21 |     func testFromLastCandidate() throws {
22 |         let candidate = Candidate(text: "我輩は猫", value: -20, correspondingCount: 7, lastMid: 100, data: [DicdataElement(word: "我輩は猫", ruby: "ワガハイハネコ", cid: CIDData.一般名詞.cid, mid: 100, value: -20)])
23 |         let bos = RegisteredNode.fromLastCandidate(candidate)
24 |         XCTAssertEqual(bos.inputRange, 0..<0)
25 |         XCTAssertNil(bos.prev)
26 |         XCTAssertEqual(bos.totalValue, 0)
27 |         XCTAssertEqual(bos.data.rcid, CIDData.一般名詞.cid)
28 |         XCTAssertEqual(bos.data.mid, 100)
29 |     }
30 | 
31 |     func testGetCandidateData() throws {
32 |         let bos = RegisteredNode.BOSNode()
33 |         let node1 = RegisteredNode(
34 |             data: DicdataElement(word: "我輩", ruby: "ワガハイ", cid: CIDData.一般名詞.cid, mid: 1, value: -5),
35 |             registered: bos,
36 |             totalValue: -10,
37 |             inputRange: 0..<4
38 |         )
39 |         let node2 = RegisteredNode(
40 |             data: DicdataElement(word: "は", ruby: "ハ", cid: CIDData.係助詞ハ.cid, mid: 2, value: -2),
41 |             registered: node1,
42 |             totalValue: -13,
43 |             inputRange: 4..<5
44 |         )
45 |         let node3 = RegisteredNode(
46 |             data: DicdataElement(word: "猫", ruby: "ネコ", cid: CIDData.一般名詞.cid, mid: 3, value: -4),
47 |             registered: node2,
48 |             totalValue: -20,
49 |             inputRange: 5..<7
50 |         )
51 |         let node4 = RegisteredNode(
52 |             data: DicdataElement(word: "です", ruby: "デス", cid: CIDData.助動詞デス基本形.cid, mid: 4, value: -3),
53 |             registered: node3,
54 |             totalValue: -25,
55 |             inputRange: 7..<9
56 |         )
57 |         let result = node4.getCandidateData()
58 |         let clause1 = ClauseDataUnit()
59 |         clause1.text = "我輩は"
60 |         clause1.nextLcid = CIDData.一般名詞.cid
61 |         clause1.inputRange = 0..<5
62 |         clause1.mid = 1
63 | 
64 |         let clause2 = ClauseDataUnit()
65 |         clause2.text = "猫です"
66 |         clause2.nextLcid = CIDData.EOS.cid
67 |         clause2.inputRange = 5..<9
68 |         clause2.mid = 3
69 | 
70 |         let expectedResult: CandidateData = CandidateData(
71 |             clauses: [(clause1, -13), (clause2, -25)],
72 |             data: [node1.data, node2.data, node3.data, node4.data]
73 |         )
74 |         XCTAssertEqual(result.data, expectedResult.data)
75 |         XCTAssertEqual(result.clauses.map {$0.value}, expectedResult.clauses.map {$0.value})
76 |         XCTAssertEqual(result.clauses.map {$0.clause}, expectedResult.clauses.map {$0.clause})
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleTests/TemporalLearningMemoryTrieTests.swift:
--------------------------------------------------------------------------------
 1 | @testable import KanaKanjiConverterModule
 2 | import XCTest
 3 | 
 4 | final class TemporalLearningMemoryTrieTests: XCTestCase {
 5 |     static let resourceURL = Bundle.module.resourceURL!.appendingPathComponent("DictionaryMock", isDirectory: true)
 6 | 
 7 |     static func loadCharMap() -> [Character: UInt8] {
 8 |         let chidURL = resourceURL.appendingPathComponent("louds/charID.chid", isDirectory: false)
 9 |         let string = try! String(contentsOf: chidURL, encoding: .utf8)
10 |         return Dictionary(uniqueKeysWithValues: string.enumerated().map { ($0.element, UInt8($0.offset)) })
11 |     }
12 | 
13 |     func chars(for string: String) -> [UInt8] {
14 |         LearningManager.keyToChars(string, char2UInt8: Self.loadCharMap())!
15 |     }
16 | 
17 |     func testMemorizeAndMatch() throws {
18 |         var trie = TemporalLearningMemoryTrie()
19 |         let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
20 |         let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -12)
21 | 
22 |         trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby))
23 |         trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby))
24 | 
25 |         let result1 = trie.perfectMatch(chars: chars(for: element1.ruby))
26 |         XCTAssertEqual(result1.count, 1)
27 |         XCTAssertEqual(result1.first?.word, element1.word)
28 |         XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
29 | 
30 |         let result2 = trie.throughMatch(chars: chars(for: element2.ruby), depth: (element2.ruby.count - 1)..<element2.ruby.count)
31 |         XCTAssertEqual(result2.map { $0.word }, [element2.word])
32 | 
33 |         let prefixResult = trie.prefixMatch(chars: chars(for: "テス"))
34 |         XCTAssertEqual(Set(prefixResult.map { $0.word }), Set([element1.word, element2.word]))
35 |     }
36 | 
37 |     func testMemorizeTwice() throws {
38 |         var trie = TemporalLearningMemoryTrie()
39 |         let element1 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
40 |         trie.memorize(dicdataElement: element1, chars: chars(for: element1.ruby))
41 | 
42 |         let element2 = DicdataElement(word: "テスト", ruby: "テスト", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10, adjust: 1.5)
43 |         trie.memorize(dicdataElement: element2, chars: chars(for: element2.ruby))
44 | 
45 |         let result1 = trie.perfectMatch(chars: chars(for: element1.ruby))
46 |         XCTAssertEqual(result1.count, 1)
47 |         XCTAssertEqual(result1.first?.word, element1.word)
48 |         XCTAssertTrue(result1.first?.metadata.contains(.isLearned) ?? false)
49 |     }
50 | 
51 |     func testMemorizeUpdateCountAndForget() throws {
52 |         var trie = TemporalLearningMemoryTrie()
53 |         let element = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
54 |         let charIDs = chars(for: element.ruby)
55 | 
56 |         trie.memorize(dicdataElement: element, chars: charIDs)
57 |         var stored = trie.perfectMatch(chars: charIDs).first!
58 |         let adjust1 = stored.adjust
59 | 
60 |         trie.memorize(dicdataElement: element, chars: charIDs)
61 |         stored = trie.perfectMatch(chars: charIDs).first!
62 |         let adjust2 = stored.adjust
63 | 
64 |         XCTAssertGreaterThan(adjust2, adjust1)
65 |         XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 1)
66 | 
67 |         XCTAssertTrue(trie.forget(dicdataElement: stored, chars: charIDs))
68 |         XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
69 |     }
70 | 
71 |     func testCoarseForget() throws {
72 |         var trie = TemporalLearningMemoryTrie()
73 |         let element1 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.一般名詞.cid, mid: MIDData.一般.mid, value: -10)
74 |         let element2 = DicdataElement(word: "テスター", ruby: "テスター", cid: CIDData.固有名詞.cid, mid: MIDData.一般.mid, value: -10)
75 |         let charIDs = chars(for: "テスター")
76 | 
77 |         trie.memorize(dicdataElement: element1, chars: charIDs)
78 |         trie.memorize(dicdataElement: element2, chars: charIDs)
79 | 
80 |         // 単語としては2種類存在
81 |         XCTAssertEqual(trie.perfectMatch(chars: charIDs).count, 2)
82 | 
83 |         // forgetする場合、両方が同時に削除される（表層形の一致で判断＝粗い一致）
84 |         XCTAssertTrue(trie.forget(dicdataElement: element1, chars: charIDs))
85 |         XCTAssertTrue(trie.perfectMatch(chars: charIDs).isEmpty)
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/Tests/KanaKanjiConverterModuleWithDefaultDictionaryTests/DicdataStoreTests/TextReplacerTests.swift:
--------------------------------------------------------------------------------
 1 | @testable import KanaKanjiConverterModule
 2 | @testable import KanaKanjiConverterModuleWithDefaultDictionary
 3 | import XCTest
 4 | 
 5 | final class TextReplacerTests: XCTestCase {
 6 |     func testEmojiTextReplacer() throws {
 7 |         let textReplacer = TextReplacer.withDefaultEmojiDictionary()
 8 |         XCTAssertFalse(textReplacer.isEmpty)
 9 |         do {
10 |             let searchResult = textReplacer.getSearchResult(query: "カニ", target: [.emoji])
11 |             XCTAssertEqual(searchResult.count, 1)
12 |             XCTAssertEqual(searchResult[0], .init(query: "かに", text: "🦀️"))
13 |         }
14 |         if #available(iOS 18.4, macOS 15.3, *) {
15 |             let searchResult = textReplacer.getSearchResult(query: "テツヤ", target: [.emoji])
16 |             XCTAssertEqual(searchResult.count, 1)
17 |             XCTAssertEqual(searchResult[0], .init(query: "てつや", text: "🫩️"))
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/Tests/SwiftUtilsTests/StringExtensionTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  StringExtensionTests.swift
 3 | //  azooKeyTests
 4 | //
 5 | //  Created by ensan on 2022/12/23.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import SwiftUtils
10 | import XCTest
11 | 
12 | final class StringExtensionTests: XCTestCase {
13 | 
14 |     func testToKatakana() throws {
15 |         XCTAssertEqual("かゔぁあーんじょ123+++リスク".toKatakana(), "カヴァアーンジョ123+++リスク")
16 |         XCTAssertEqual("".toKatakana(), "")
17 |         XCTAssertEqual("コレハロン".toKatakana(), "コレハロン")
18 |     }
19 | 
20 |     func testToHiragana() throws {
21 |         XCTAssertEqual("カヴァアーンじょ123+++リスク".toHiragana(), "かゔぁあーんじょ123+++りすく")
22 |         XCTAssertEqual("".toHiragana(), "")
23 |         XCTAssertEqual("これはろん".toHiragana(), "これはろん")
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/Tests/SwiftUtilsTests/StringUtilsTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  StringUtilsTests.swift
 3 | //  KanaKanjiConverterModuleTests
 4 | //
 5 | //  Created by ensan on 2022/12/18.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import SwiftUtils
10 | import XCTest
11 | 
12 | final class StringTests: XCTestCase {
13 | 
14 |     func testIsKana() throws {
15 |         XCTAssertTrue("あ".isKana)
16 |         XCTAssertTrue("ぁ".isKana)
17 |         XCTAssertTrue("ン".isKana)
18 |         XCTAssertTrue("ァ".isKana)
19 |         XCTAssertTrue("が".isKana)
20 |         XCTAssertTrue("ゔ".isKana)
21 | 
22 |         XCTAssertFalse("k".isKana)
23 |         XCTAssertFalse("@".isKana)
24 |         XCTAssertFalse("ｶ".isKana)  // 半角カタカナはカナ扱いしない
25 |     }
26 | 
27 |     func testOnlyRomanAlphabetOrNumber() throws {
28 |         XCTAssertTrue("and13".onlyRomanAlphabetOrNumber)
29 |         XCTAssertTrue("vmaoNFIU".onlyRomanAlphabetOrNumber)
30 |         XCTAssertTrue("1332".onlyRomanAlphabetOrNumber)
31 | 
32 |         // 文字がない場合はfalse
33 |         XCTAssertFalse("".onlyRomanAlphabetOrNumber)
34 |         XCTAssertFalse("and 13".onlyRomanAlphabetOrNumber)
35 |         XCTAssertFalse("can't".onlyRomanAlphabetOrNumber)
36 |         XCTAssertFalse("Mt.".onlyRomanAlphabetOrNumber)
37 |     }
38 | 
39 |     func testOnlyRomanAlphabet() throws {
40 |         XCTAssertTrue("vmaoNFIU".onlyRomanAlphabet)
41 |         XCTAssertTrue("NAO".onlyRomanAlphabet)
42 | 
43 |         // 文字がない場合はfalse
44 |         XCTAssertFalse("".onlyRomanAlphabet)
45 |         XCTAssertFalse("and 13".onlyRomanAlphabet)
46 |         XCTAssertFalse("can't".onlyRomanAlphabet)
47 |         XCTAssertFalse("Mt.".onlyRomanAlphabet)
48 |         XCTAssertFalse("and13".onlyRomanAlphabet)
49 |         XCTAssertFalse("vmaoNFIU83942".onlyRomanAlphabet)
50 |     }
51 | 
52 |     func testContainsRomanAlphabet() throws {
53 |         XCTAssertTrue("vmaoNFIU".containsRomanAlphabet)
54 |         XCTAssertTrue("変数x".containsRomanAlphabet)
55 |         XCTAssertTrue("and 13".containsRomanAlphabet)
56 |         XCTAssertTrue("can't".containsRomanAlphabet)
57 |         XCTAssertTrue("Mt.".containsRomanAlphabet)
58 |         XCTAssertTrue("(^v^)".containsRomanAlphabet)
59 | 
60 |         // 文字がない場合はfalse
61 |         XCTAssertFalse("".containsRomanAlphabet)
62 |         XCTAssertFalse("!?!?".containsRomanAlphabet)
63 |         XCTAssertFalse("(^_^)".containsRomanAlphabet)
64 |         XCTAssertFalse("問題ア".containsRomanAlphabet)
65 |     }
66 | 
67 |     func testIsEnglishSentence() throws {
68 |         XCTAssertTrue("Is this an English sentence?".isEnglishSentence)
69 |         XCTAssertTrue("English sentences can include symbols like '!?/\\=-+^`{}()[].".isEnglishSentence)
70 | 
71 |         // 文字がない場合はfalse
72 |         XCTAssertFalse("".isEnglishSentence)
73 |         XCTAssertFalse("The word '変数' is not an English word.".isEnglishSentence)
74 |         XCTAssertFalse("これは完全に日本語の文章です".isEnglishSentence)
75 |     }
76 | 
77 |     func testToKatakana() throws {
78 |         XCTAssertEqual("あいうえお".toKatakana(), "アイウエオ")
79 |         XCTAssertEqual("これは日本語の文章です".toKatakana(), "コレハ日本語ノ文章デス")
80 |         XCTAssertEqual("えモじ😇".toKatakana(), "エモジ😇")
81 |     }
82 | 
83 |     func testToHiragana() throws {
84 |         XCTAssertEqual("アイウエオ".toHiragana(), "あいうえお")
85 |         XCTAssertEqual("僕はロボットです".toHiragana(), "僕はろぼっとです")
86 |         XCTAssertEqual("えモじ😇".toHiragana(), "えもじ😇")
87 |     }
88 | 
89 |     func testPerformanceExample() throws {
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/Tests/SwiftUtilsTests/WithMutableValueTests.swift:
--------------------------------------------------------------------------------
 1 | //
 2 | //  WithMutableValueTests.swift
 3 | //  azooKeyTests
 4 | //
 5 | //  Created by ensan on 2022/12/22.
 6 | //  Copyright © 2022 ensan. All rights reserved.
 7 | //
 8 | 
 9 | @testable import SwiftUtils
10 | import XCTest
11 | 
12 | final class WithMutableValueTests: XCTestCase {
13 |     func testWithMutableValue() throws {
14 |         var array = [0, 1, 2, 3]
15 |         withMutableValue(&array[1]) { value in
16 |             value = value * value + value / value
17 |         }
18 |         XCTAssertEqual(array, [0, 2, 2, 3])
19 |     }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/install_cli.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | USE_ZENZAI=0
 5 | USE_DEBUG=0
 6 | 
 7 | # 引数の解析
 8 | for arg in "$@"; do
 9 |   if [ "$arg" = "--zenzai" ]; then
10 |     USE_ZENZAI=1
11 |   fi
12 |   if [ "$arg" = "--debug" ]; then
13 |     echo "⚠️ Debug mode is enabled. This may cause performance issues."
14 |     USE_DEBUG=1
15 |   fi
16 | done
17 | 
18 | if [ "$USE_DEBUG" -eq 1 ]; then
19 |   CONFIGURATION="debug"
20 | else
21 |   CONFIGURATION="release"
22 | fi
23 | 
24 | if [ "$USE_ZENZAI" -eq 1 ]; then
25 |   echo "📦 Building with Zenzai support..."
26 |   swift build -c $CONFIGURATION -Xcxx -xobjective-c++ --traits Zenzai
27 | else
28 |   echo "📦 Building..."
29 |   swift build -c $CONFIGURATION -Xcxx -xobjective-c++
30 | fi
31 | 
32 | # Copy Required Resources
33 | sudo cp -R .build/${CONFIGURATION}/llama.framework /usr/local/lib/
34 | 
35 | # add rpath
36 | RPATH="/usr/local/lib/"
37 | BINARY_PATH=".build/${CONFIGURATION}/CliTool"
38 | 
39 | if ! otool -l "$BINARY_PATH" | grep -q "$RPATH"; then
40 |     install_name_tool -add_rpath "$RPATH" "$BINARY_PATH"
41 | else
42 |     echo "✅ RPATH $RPATH is already present in $BINARY_PATH"
43 | fi
44 | # if debug mode, codesign is required to execute
45 | if [ "$USE_DEBUG" -eq 1 ]; then
46 |   echo "🔒 Signing the binary for debug mode..."
47 |   codesign --force --sign - .build/${CONFIGURATION}/CliTool
48 | fi
49 | 
50 | # Install
51 | sudo cp -f .build/${CONFIGURATION}/CliTool /usr/local/bin/anco
52 | 


--------------------------------------------------------------------------------