├── .editorconfig ├── .gitattributes ├── .github └── workflows │ ├── build.yml │ ├── codeql-analysis.yml │ ├── lint.yml │ └── vulns.yml ├── .gitignore ├── .goreleaser.yml ├── .travis.yml ├── LICENSE ├── README.md ├── Taskfile.yml ├── b64 ├── base64.go └── example_test.go ├── bcrypt ├── bcrypt.go └── bcrypt_test.go ├── dependency.png ├── dot-config.toml ├── dump ├── dump.go ├── dump_test.go └── example_test.go ├── ecode └── ecode.go ├── enc ├── conversion.go ├── decode.go ├── enc_test.go ├── encode.go ├── encoding.go └── example_test.go ├── facade ├── base64.go ├── bcrypt.go ├── completion.go ├── dump.go ├── enc.go ├── facade.go ├── guess.go ├── hash.go ├── kana.go ├── newline.go ├── nrm.go ├── rbom.go ├── version.go ├── version_test.go └── width.go ├── go.mod ├── go.sum ├── guess ├── example_test.go ├── guess.go ├── guess_test.go └── testdata │ ├── hello-euc.txt │ ├── hello-sjis.txt │ └── hello-utf8.txt ├── hash ├── algorithm.go ├── check.go ├── checker.go ├── checker_test.go ├── hash.go ├── hash_test.go └── testdata │ └── null.dat ├── kana ├── example_test.go ├── form.go ├── kana.go └── kana_test.go ├── main.go ├── newline ├── example_test.go ├── form.go ├── newline.go └── newline_test.go ├── nrm ├── example_test.go ├── form.go ├── kangxi-radicals.go ├── norm.go ├── nrm_test.go └── radicals-sample │ ├── equivalent-unified-ideograph.csv │ └── main.go ├── rbom ├── rbom.go └── rbom_test.go └── width ├── example_test.go ├── form.go ├── width.go └── width_test.go /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | indent_style = tab 7 | indent_size = 4 8 | trim_trailing_whitespace = false 9 | insert_final_newline = true 10 | 11 | [*.go] 12 | trim_trailing_whitespace = true 13 | 14 | [*.md] 15 | indent_style = space 16 | indent_size = 4 17 | 18 | [*.yml] 19 | indent_style = space 20 | indent_size = 2 21 | trim_trailing_whitespace = true 22 | 23 | [*.toml] 24 | indent_style = space 25 | indent_size = 2 26 | trim_trailing_whitespace = true 27 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.go text eol=lf 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | jobs: 8 | goreleaser: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | - name: Set up Go 16 | uses: actions/setup-go@v5 17 | with: 18 | go-version-file: 'go.mod' 19 | - name: Run GoReleaser 20 | uses: goreleaser/goreleaser-action@v6 21 | with: 22 | # either 'goreleaser' (default) or 'goreleaser-pro' 23 | distribution: goreleaser 24 | version: latest 25 | args: release --clean 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | # Your GoReleaser Pro key, if you are using the 'goreleaser-pro' distribution 29 | # GORELEASER_KEY: ${{ secrets.GORELEASER_KEY }} 30 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | name: "CodeQL" 7 | 8 | on: 9 | push: 10 | branches: [master] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [master] 14 | schedule: 15 | - cron: '0 20 * * 0' 16 | 17 | jobs: 18 | CodeQL-Build: 19 | # CodeQL runs on ubuntu-latest, windows-latest, and macos-latest 20 | runs-on: ubuntu-latest 21 | 22 | permissions: 23 | # required for all workflows 24 | security-events: write 25 | 26 | # only required for workflows in private repositories 27 | actions: read 28 | contents: read 29 | 30 | steps: 31 | - name: Checkout repository 32 | uses: actions/checkout@v3 33 | 34 | # Initializes the CodeQL tools for scanning. 35 | - name: Initialize CodeQL 36 | uses: github/codeql-action/init@v2 37 | # Override language selection by uncommenting this and choosing your languages 38 | with: 39 | languages: go 40 | 41 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 42 | # If this step fails, then you should remove it and run the build manually (see below). 43 | - name: Autobuild 44 | uses: github/codeql-action/autobuild@v2 45 | 46 | # ℹ️ Command-line programs to run using the OS shell. 47 | # 📚 https://git.io/JvXDl 48 | 49 | # ✏️ If the Autobuild fails above, remove it and uncomment the following 50 | # three lines and modify them (or add more) to build your code if your 51 | # project uses a compiled language 52 | 53 | #- run: | 54 | # make bootstrap 55 | # make release 56 | 57 | - name: Perform CodeQL Analysis 58 | uses: github/codeql-action/analyze@v2 59 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | 8 | permissions: 9 | contents: read 10 | # Optional: allow read access to pull request. Use with `only-new-issues` option. 11 | # pull-requests: read 12 | jobs: 13 | golangci: 14 | name: lint 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: actions/setup-go@v5 19 | with: 20 | go-version-file: 'go.mod' 21 | - name: golangci-lint 22 | uses: golangci/golangci-lint-action@v3 23 | with: 24 | # Optional: version of golangci-lint to use in form of v1.2 or v1.2.3 or `latest` to use the latest version 25 | version: latest 26 | 27 | # Optional: working directory, useful for monorepos 28 | # working-directory: somedir 29 | 30 | # Optional: golangci-lint command line arguments. 31 | args: --enable gosec --exclude "G501|G505" 32 | 33 | # Optional: show only new issues if it's a pull request. The default value is `false`. 34 | # only-new-issues: true 35 | 36 | # Optional: if set to true then the all caching functionality will be complete disabled, 37 | # takes precedence over all other caching options. 38 | # skip-cache: true 39 | 40 | # Optional: if set to true then the action don't cache or restore ~/go/pkg. 41 | # skip-pkg-cache: true 42 | 43 | # Optional: if set to true then the action don't cache or restore ~/.cache/go-build. 44 | # skip-build-cache: true 45 | - name: testing 46 | run: go test -shuffle on ./... 47 | - name: install govulncheck 48 | run: go install golang.org/x/vuln/cmd/govulncheck@latest 49 | - name: running govulncheck 50 | run: govulncheck ./... 51 | -------------------------------------------------------------------------------- /.github/workflows/vulns.yml: -------------------------------------------------------------------------------- 1 | name: vulns 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | jobs: 8 | vulns: 9 | name: Vulnerability scanner 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-go@v5 14 | with: 15 | go-version-file: 'go.mod' 16 | - name: install depm 17 | run: go install github.com/goark/depm@latest 18 | - name: WriteGoList 19 | run: depm list --json > go.list 20 | - name: Nancy 21 | uses: sonatype-nexus-community/nancy-github-action@main 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | # Other files and directories 18 | dist/ 19 | .task/ 20 | work/ 21 | *.bak 22 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | project_name: gnkf 4 | release: 5 | github: 6 | owner: goark 7 | name: gnkf 8 | 9 | builds: 10 | - 11 | env: 12 | - GO111MODULE=on 13 | - CGO_ENABLED=0 14 | goos: 15 | - linux 16 | - darwin 17 | - windows 18 | - freebsd 19 | goarch: 20 | - amd64 21 | - riscv64 22 | - arm64 23 | main: ./ 24 | flags: 25 | - -trimpath 26 | ldflags: -s -w -X github.com/goark/gnkf/facade.Version=v{{ .Version }} 27 | binary: gnkf 28 | 29 | archives: 30 | - 31 | format: tar.gz 32 | format_overrides: 33 | - goos: windows 34 | format: zip 35 | name_template: >- 36 | {{ .Binary }}_ 37 | {{- .Version }}_ 38 | {{- if eq .Os "freebsd" }}FreeBSD 39 | {{- else }}{{ title .Os }}{{ end }}_ 40 | {{- if eq .Arch "amd64" }}64bit 41 | {{- else if eq .Arch "386" }}32bit 42 | {{- else if eq .Arch "arm64" }}ARM64 43 | {{- else if eq .Arch "riscv64" }}RISCV 44 | {{- else }}{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}{{ end }} 45 | files: 46 | - LICENSE* 47 | - README* 48 | - dependency.png 49 | 50 | changelog: 51 | sort: asc 52 | filters: 53 | exclude: 54 | - docs 55 | - typo 56 | - Merge pull request 57 | - Merge branch 58 | 59 | snapshot: 60 | version_template: SNAPSHOT-{{ .Commit }} 61 | 62 | checksum: 63 | name_template: '{{ .ProjectName }}_{{ .Version }}_checksums.txt' 64 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - "1.15.x" 5 | 6 | env: 7 | global: 8 | - GO111MODULE=on 9 | 10 | install: 11 | - go mod download 12 | 13 | script: 14 | - go test ./... 15 | 16 | deploy: 17 | - provider: script 18 | skip_cleanup: true 19 | script: curl -sL https://git.io/goreleaser | bash 20 | on: 21 | tags: true 22 | condition: $TRAVIS_OS_NAME = linux 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [gnkf] -- Network Kanji Filter by Golang 2 | 3 | [![check vulns](https://github.com/goark/gnkf/workflows/vulns/badge.svg)](https://github.com/goark/gnkf/actions) 4 | [![lint status](https://github.com/goark/gnkf/workflows/lint/badge.svg)](https://github.com/goark/gnkf/actions) 5 | [![GitHub license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://raw.githubusercontent.com/goark/gnkf/master/LICENSE) 6 | [![GitHub release](https://img.shields.io/github/release/goark/gnkf.svg)](https://github.com/goark/gnkf/releases/latest) 7 | 8 | This package is required Go 1.16 or later. 9 | 10 | **Migrated repository to [github.com/goark/gnkf][gnkf]** 11 | 12 | ## Build and Install 13 | 14 | ``` 15 | $ go install github.com/goark/gnkf@latest 16 | ``` 17 | 18 | ### Binaries 19 | 20 | See [latest release](https://github.com/goark/gnkf/releases/latest). 21 | 22 | ## Usage 23 | 24 | ``` 25 | $ gnkf -h 26 | Network Kanji Filter by Golang 27 | 28 | Usage: 29 | gnkf [flags] 30 | gnkf [command] 31 | 32 | Available Commands: 33 | base64 Encode/Decode BASE64 34 | bcrypt Hash and compare by BCrypt 35 | completion Generate completion script 36 | dump Hexadecimal view of octet data stream 37 | enc Convert character encoding of the text 38 | guess Guess character encoding of the text 39 | hash Print or check hash value 40 | help Help about any command 41 | kana Convert kana characters in the text 42 | newline Convert newline form in the text 43 | norm Unicode normalization of the text 44 | remove-bom Remove BOM character in UTF-8 string 45 | version Print the version number 46 | width Convert character width in the text 47 | 48 | Flags: 49 | --debug for debug 50 | -h, --help help for gnkf 51 | 52 | Use "gnkf [command] --help" for more information about a command. 53 | ``` 54 | 55 | ### gnkf guess command 56 | 57 | ``` 58 | $ gnkf guess -h 59 | Guess character encoding of the text 60 | 61 | Usage: 62 | gnkf guess [flags] 63 | 64 | Aliases: 65 | guess, g 66 | 67 | Flags: 68 | --all print all guesses 69 | -f, --file string path of input text file 70 | -h, --help help for guess 71 | 72 | Global Flags: 73 | --debug for debug 74 | 75 | $ echo こんにちは,世界 | gnkf guess --all 76 | UTF-8 77 | windows-1255 78 | windows-1253 79 | Big5 80 | GB-18030 81 | Shift_JIS 82 | ``` 83 | 84 | ### gnkf enc command 85 | 86 | ``` 87 | $ gnkf enc -h 88 | Convert character encoding of the text. 89 | Using MIME and IANA name as the character encoding name. 90 | Refer: http://www.iana.org/assignments/character-sets/character-sets.xhtml 91 | 92 | Usage: 93 | gnkf enc [flags] 94 | 95 | Aliases: 96 | enc, encoding, e 97 | 98 | Flags: 99 | -d, --dst-encoding string character encoding name of output text (default "utf-8") 100 | -f, --file string path of input text file 101 | -g, --guess guess character encoding of source text 102 | -h, --help help for enc 103 | -o, --output string path of output file 104 | -b, --remove-bom remove BOM character in source text (UTF-8 only) 105 | -s, --src-encoding string character encoding name of source text (default "utf-8") 106 | 107 | Global Flags: 108 | --debug for debug 109 | 110 | $ echo こんにちは,世界 | gnkf enc -g -d shift_jis | gnkf dump 111 | 0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x0a 112 | ``` 113 | 114 | ### gnkf newline command 115 | 116 | ``` 117 | $ gnkf newline -h 118 | Convert newline form in the text. 119 | 120 | Usage: 121 | gnkf newline [flags] 122 | 123 | Aliases: 124 | newline, nwln, nl 125 | 126 | Flags: 127 | -f, --file string path of input text file 128 | -h, --help help for newline 129 | -n, --newline-form string newline form: [lf|cr|crlf] (default "lf") 130 | -o, --output string path of output file 131 | 132 | Global Flags: 133 | --debug for debug 134 | 135 | $ echo こんにちは,世界 | gnkf newline -n crlf | gnkf dump --unicode 136 | 0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0xff0c, 0x4e16, 0x754c, 0x000d, 0x000a 137 | ``` 138 | 139 | ### gnkf norm command 140 | 141 | ``` 142 | $ gnkf norm -h 143 | Unicode normalization of the text (UTF-8 encoding only). 144 | 145 | Usage: 146 | gnkf norm [flags] 147 | 148 | Aliases: 149 | norm, normalize, nrm, nm 150 | 151 | Flags: 152 | -f, --file string path of input text file 153 | -h, --help help for norm 154 | -k, --kangxi-radicals normalize kangxi radicals only (with nfkc or nfkd form) 155 | -n, --norm-form string Unicode normalization form: [nfc|nfd|nfkc|nfkd] (default "nfc") 156 | -o, --output string path of output file 157 | -b, --remove-bom remove BOM character 158 | 159 | Global Flags: 160 | --debug for debug 161 | 162 | $ echo ペンギン | gnkf norm -n nfkc | gnkf dump --unicode 163 | 0x30da, 0x30f3, 0x30ae, 0x30f3, 0x000a 164 | ``` 165 | 166 | #### Normalize Kangxi Radicals 167 | 168 | ``` 169 | $ echo ㈱埼⽟ | gnkf dump --unicode 170 | 0x3231, 0x57fc, 0x2f5f, 0x000a 171 | 172 | $ echo ㈱埼⽟ | gnkf norm -n nfkc -k | gnkf dump --unicode 173 | 0x3231, 0x57fc, 0x7389, 0x000a 174 | ``` 175 | 176 | ### gnkf width command 177 | 178 | ``` 179 | $ gnkf width -h 180 | Convert character width in the text (UTF-8 encoding only). 181 | 182 | Usage: 183 | gnkf width [flags] 184 | 185 | Aliases: 186 | width, wdth, w 187 | 188 | Flags: 189 | -c, --conversion-form string conversion form: [fold|narrow|widen] (default "fold") 190 | -f, --file string path of input text file 191 | -h, --help help for width 192 | -o, --output string path of output file 193 | -b, --remove-bom remove BOM character 194 | 195 | Global Flags: 196 | --debug for debug 197 | 198 | $ echo ペンギン | gnkf width -c narrow | gnkf dump --unicode 199 | 0xff8d, 0xff9f, 0xff9d, 0xff77, 0xff9e, 0xff9d, 0x000a 200 | ``` 201 | 202 | ### gnkf kana command 203 | 204 | ``` 205 | $ gnkf kana -h 206 | Convert kana characters in the text. 207 | UTF-8 encoding only. 208 | "hiragana" and "katakana" forms are valid only for full-width kana character. 209 | 210 | Usage: 211 | gnkf kana [flags] 212 | 213 | Aliases: 214 | kana, k 215 | 216 | Flags: 217 | -c, --conversion-form string conversion form: [hiragana|katakana|chokuon] (default "katakana") 218 | -f, --file string path of input text file 219 | --fold convert character width by fold form 220 | -h, --help help for kana 221 | -o, --output string path of output file 222 | -b, --remove-bom remove BOM character 223 | 224 | Global Flags: 225 | --debug for debug 226 | 227 | $ echo こんにちは | gnkf kana -c katakana 228 | コンニチハ 229 | ``` 230 | 231 | #### Convert 直音 (Chokuon; Upper kana characters) 232 | 233 | ``` 234 | $ echo ニッポン | gnkf kana -c chokuon 235 | ニツポン 236 | ``` 237 | 238 | ### gnkf base64 command 239 | 240 | ``` 241 | $ gnkf base64 -h 242 | Encode/Decode BASE64. 243 | 244 | Usage: 245 | gnkf base64 [flags] 246 | 247 | Aliases: 248 | base64, b64 249 | 250 | Flags: 251 | -d, --decode decode BASE64 string 252 | -f, --file string path of input text file 253 | -u, --for-url encoding/decoding defined in RFC 4648 254 | -h, --help help for base64 255 | -p, --no-padding no padding 256 | -o, --output string path of output file 257 | 258 | Global Flags: 259 | --debug for debug 260 | 261 | $ echo Hello World | gnkf b64 262 | SGVsbG8gV29ybGQK 263 | 264 | $ echo SGVsbG8gV29ybGQK | gnkf b64 -d 265 | Hello World 266 | ``` 267 | 268 | ### gnkf bcrypt command 269 | 270 | ``` 271 | $ gnkf bcrypt -h 272 | Hash and compare by BCrypt. 273 | 274 | Usage: 275 | gnkf bcrypt [flags] string [string...] 276 | 277 | Aliases: 278 | bcrypt, bc 279 | 280 | Flags: 281 | --compare string compare to BCrypt hashed string 282 | -c, --cost int BCrypt cost (4-31) (default 10) 283 | -h, --help help for bcrypt 284 | 285 | Global Flags: 286 | --debug for debug 287 | 288 | $ gnkf bc password 289 | $2a$10$vvbBuQoVR9AFis6J4xtZ0espSfe976pZ1Em669nhdg2loAm2Yjxl2 290 | 291 | $ gnkf bc --compare '$2a$10$vvbBuQoVR9AFis6J4xtZ0espSfe976pZ1Em669nhdg2loAm2Yjxl2' password 292 | compare BCrypt hashed string '$2a$10$vvbBuQoVR9AFis6J4xtZ0espSfe976pZ1Em669nhdg2loAm2Yjxl2' to... 293 | password : match! 294 | ``` 295 | 296 | ### gnkf hash command 297 | 298 | ``` 299 | $ gnkf hash -h 300 | Print or check hash value. 301 | Support algorithm: 302 | MD5, SHA-1, SHA-224, SHA-256, SHA-384, SHA-512, SHA-512/224, SHA-512/256 303 | 304 | Usage: 305 | gnkf hash [flags] [file] 306 | 307 | Aliases: 308 | hash, h 309 | 310 | Flags: 311 | -a, --algorithm string hash algorithm (default "SHA-256") 312 | -c, --check don't fail or report status for missing files 313 | -h, --help help for hash 314 | --ignore-missing don't fail or report status for missing files (with check option) 315 | --quiet don't print OK for each successfully verified file (with check option) 316 | 317 | Global Flags: 318 | --debug for debug 319 | 320 | $ echo Hello World | gnkf h 321 | d2a84f4b8b650937ec8f73cd8be2c74add5a911ba64df27458ed8229da804a26 - 322 | 323 | $ gnkf h hash/testdata/null.dat 324 | e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 hash/testdata/null.dat 325 | 326 | $ gnkf h hash/testdata/null.dat | gnkf h -c 327 | hash/testdata/null.dat: OK 328 | ``` 329 | 330 | ### gnkf remove-bom command 331 | 332 | ``` 333 | $ gnkf remove-bom -h 334 | Remove BOM character in UTF-8 string. 335 | 336 | Usage: 337 | gnkf remove-bom [flags] 338 | 339 | Aliases: 340 | remove-bom, rbom, rb 341 | 342 | Flags: 343 | -f, --file string path of input text file 344 | -h, --help help for remove-bom 345 | -o, --output string path of output file 346 | 347 | Global Flags: 348 | --debug for debug 349 | 350 | $ echo Hello | gnkf dump 351 | 0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x0a 352 | 353 | $ echo Hello | gnkf remove-bom | gnkf dump 354 | 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x0a 355 | ``` 356 | 357 | ### gnkf dump command 358 | 359 | ``` 360 | $ gnkf dump -h 361 | Hexadecimal view of octet data stream with C language array style. 362 | 363 | Usage: 364 | gnkf dump [flags] 365 | 366 | Aliases: 367 | dump, hexdump, d, hd 368 | 369 | Flags: 370 | -f, --file string path of input text file 371 | -h, --help help for dump 372 | -u, --unicode print by Unicode code point (UTF-8 only) 373 | 374 | Global Flags: 375 | --debug for debug 376 | 377 | $ echo ペンギン | gnkf dump 378 | 0xe3, 0x83, 0x9a, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xae, 0xe3, 0x83, 0xb3, 0x0a 379 | 380 | $ echo ペンギン | gnkf dump --unicode 381 | 0x30da, 0x30f3, 0x30ae, 0x30f3, 0x000a 382 | ``` 383 | 384 | ## Modules Requirement Graph 385 | 386 | [![dependency.png](./dependency.png)](./dependency.png) 387 | 388 | [gnkf]: https://github.com/goark/gnkf "goark/gnkf: Network Kanji Filter by Golang" 389 | -------------------------------------------------------------------------------- /Taskfile.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | tasks: 4 | default: 5 | cmds: 6 | - task: prepare 7 | - task: test 8 | - task: nancy 9 | - task: graph 10 | 11 | build-all: 12 | desc: Build executable binary with GoReleaser. 13 | cmds: 14 | - goreleaser release --snapshot --skip=publish --clean 15 | 16 | test: 17 | desc: Test and lint. 18 | cmds: 19 | - go mod verify 20 | - go test -shuffle on ./... 21 | - golangci-lint run --enable gosec --exclude "G501|G505" --timeout 3m0s ./... 22 | sources: 23 | - ./go.mod 24 | - '**/*.go' 25 | 26 | nancy: 27 | desc: Check vulnerability of external packages with Nancy. 28 | cmds: 29 | - depm list -j | nancy sleuth -n 30 | sources: 31 | - ./go.mod 32 | - '**/*.go' 33 | 34 | prepare: 35 | - go mod tidy -v -go=1.24 36 | 37 | clean: 38 | desc: Initialize module and build cache, and remake go.sum file. 39 | cmds: 40 | - rm -f ./go.sum 41 | - go clean -cache 42 | - go clean -modcache 43 | 44 | graph: 45 | desc: Make grapth of dependency modules. 46 | cmds: 47 | - depm m --dot --dot-config dot-config.toml | dot -Tpng -o ./dependency.png 48 | sources: 49 | - ./go.mod 50 | - '**/*.go' 51 | generates: 52 | - ./dependency.png 53 | -------------------------------------------------------------------------------- /b64/base64.go: -------------------------------------------------------------------------------- 1 | package b64 2 | 3 | import ( 4 | "encoding/base64" 5 | "io" 6 | 7 | "github.com/goark/errs" 8 | ) 9 | 10 | //Encode outputs base64 encoding string from raw data. 11 | func Encode(forURL, noPadding bool, r io.Reader, w io.Writer) error { 12 | wc := base64.NewEncoder(encoder(forURL, noPadding), w) 13 | defer wc.Close() 14 | if _, err := io.Copy(wc, r); err != nil { 15 | return errs.Wrap(err) 16 | } 17 | return nil 18 | } 19 | 20 | //Decode outputs raw data from base64 encoding string. 21 | func Decode(forURL, noPadding bool, r io.Reader, w io.Writer) error { 22 | if _, err := io.Copy(w, base64.NewDecoder(encoder(forURL, noPadding), r)); err != nil { 23 | return errs.Wrap(err) 24 | } 25 | return nil 26 | } 27 | 28 | func encoder(forURL, noPadding bool) *base64.Encoding { 29 | var enc *base64.Encoding 30 | if forURL { 31 | enc = base64.URLEncoding 32 | } else { 33 | enc = base64.StdEncoding 34 | } 35 | if noPadding { 36 | enc = enc.WithPadding(base64.NoPadding) 37 | } 38 | return enc 39 | } 40 | 41 | /* Copyright 2020 Spiegel 42 | * 43 | * Licensed under the Apache License, Version 2.0 (the "License"); 44 | * you may not use this file except in compliance with the License. 45 | * You may obtain a copy of the License at 46 | * 47 | * http://www.apache.org/licenses/LICENSE-2.0 48 | * 49 | * Unless required by applicable law or agreed to in writing, software 50 | * distributed under the License is distributed on an "AS IS" BASIS, 51 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 52 | * See the License for the specific language governing permissions and 53 | * limitations under the License. 54 | */ 55 | -------------------------------------------------------------------------------- /b64/example_test.go: -------------------------------------------------------------------------------- 1 | package b64_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | 8 | "github.com/goark/gnkf/b64" 9 | ) 10 | 11 | func ExampleEncode() { 12 | input := strings.NewReader("Hello World\n") 13 | output := &bytes.Buffer{} 14 | if err := b64.Encode(false, false, input, output); err != nil { 15 | fmt.Println(err) 16 | return 17 | } 18 | fmt.Println(output.String()) 19 | // Output: 20 | // SGVsbG8gV29ybGQK 21 | } 22 | 23 | func ExampleDecode() { 24 | input := strings.NewReader("SGVsbG8gV29ybGQK") 25 | output := &bytes.Buffer{} 26 | if err := b64.Decode(false, false, input, output); err != nil { 27 | fmt.Println(err) 28 | return 29 | } 30 | fmt.Print(output.String()) 31 | // Output: 32 | // Hello World 33 | } 34 | 35 | /* Copyright 2020 Spiegel 36 | * 37 | * Licensed under the Apache License, Version 2.0 (the "License"); 38 | * you may not use this file except in compliance with the License. 39 | * You may obtain a copy of the License at 40 | * 41 | * http://www.apache.org/licenses/LICENSE-2.0 42 | * 43 | * Unless required by applicable law or agreed to in writing, software 44 | * distributed under the License is distributed on an "AS IS" BASIS, 45 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 46 | * See the License for the specific language governing permissions and 47 | * limitations under the License. 48 | */ 49 | -------------------------------------------------------------------------------- /bcrypt/bcrypt.go: -------------------------------------------------------------------------------- 1 | package bcrypt 2 | 3 | import ( 4 | "github.com/goark/errs" 5 | "golang.org/x/crypto/bcrypt" 6 | ) 7 | 8 | const ( 9 | MinCost int = bcrypt.MinCost 10 | MaxCost int = bcrypt.MaxCost 11 | DefaultCost int = bcrypt.DefaultCost 12 | ) 13 | 14 | //Hash function returns hashed string by BCrypt algorithm. 15 | func Hash(s string, cost int) (string, error) { 16 | b, err := bcrypt.GenerateFromPassword([]byte(s), cost) 17 | if err != nil { 18 | return "", errs.Wrap(err) 19 | } 20 | return string(b), nil 21 | } 22 | 23 | //Compare function compares a bcrypt hashed string with its possible plaintext equivalent. Returns nil on success, or an error on failure. 24 | func Compare(h, s string) error { 25 | if err := bcrypt.CompareHashAndPassword([]byte(h), []byte(s)); err != nil { 26 | return errs.Wrap(err) 27 | } 28 | return nil 29 | } 30 | 31 | /* Copyright 2021 Spiegel 32 | * 33 | * Licensed under the Apache License, Version 2.0 (the "License"); 34 | * you may not use this file except in compliance with the License. 35 | * You may obtain a copy of the License at 36 | * 37 | * http://www.apache.org/licenses/LICENSE-2.0 38 | * 39 | * Unless required by applicable law or agreed to in writing, software 40 | * distributed under the License is distributed on an "AS IS" BASIS, 41 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 42 | * See the License for the specific language governing permissions and 43 | * limitations under the License. 44 | */ 45 | -------------------------------------------------------------------------------- /bcrypt/bcrypt_test.go: -------------------------------------------------------------------------------- 1 | package bcrypt_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/goark/gnkf/bcrypt" 8 | ) 9 | 10 | func TestBCrypt(t *testing.T) { 11 | s := "password" 12 | h, err := bcrypt.Hash(s, bcrypt.DefaultCost) 13 | if err != nil { 14 | t.Errorf("Hash() error = \"%+v\", want nil.", err) 15 | } 16 | fmt.Println(h) 17 | err = bcrypt.Compare(h, s) 18 | if err != nil { 19 | t.Errorf("Compare() is \"%+v\", want nil.", err) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dependency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goark/gnkf/3ccbb0e62c44011c6838b64d486f6f1f65d1ed43/dependency.png -------------------------------------------------------------------------------- /dot-config.toml: -------------------------------------------------------------------------------- 1 | [node] 2 | fontname = "Inconsolata" 3 | [edge] 4 | color = "red" 5 | -------------------------------------------------------------------------------- /dump/dump.go: -------------------------------------------------------------------------------- 1 | package dump 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "unicode/utf8" 9 | 10 | "github.com/goark/errs" 11 | "github.com/goark/gnkf/ecode" 12 | ) 13 | 14 | //Octet output io.Writer hex-dump of byte stream. 15 | func Octet(w io.Writer, r io.Reader) error { 16 | sep := "" 17 | inp := bufio.NewReader(r) 18 | for { 19 | b, err := inp.ReadByte() 20 | if err != nil { 21 | if errs.Is(err, io.EOF) { 22 | break 23 | } 24 | return errs.Wrap(err) 25 | } 26 | fmt.Fprintf(w, "%s0x%02x", sep, b) 27 | sep = ", " 28 | } 29 | return nil 30 | } 31 | 32 | //OctetString output hex-dump string. 33 | func OctetString(r io.Reader) string { 34 | buf := &bytes.Buffer{} 35 | if err := Octet(buf, r); err != nil { 36 | return "" 37 | } 38 | return buf.String() 39 | } 40 | 41 | //UnicodePoint output io.Writer hex-dump of Unicode code point (input text is UTF-8 only). 42 | func UnicodePoint(w io.Writer, r io.Reader) error { 43 | buf := &bytes.Buffer{} 44 | if _, err := buf.ReadFrom(r); err != nil { 45 | return errs.Wrap(err) 46 | } 47 | if !utf8.Valid(buf.Bytes()) { 48 | return errs.Wrap(ecode.ErrInvalidUTF8Text) 49 | } 50 | 51 | sep := "" 52 | for _, rn := range buf.String() { 53 | if (rn & 0x7fff0000) == 0 { 54 | fmt.Fprintf(w, "%s0x%04x", sep, rn) 55 | } else { 56 | fmt.Fprintf(w, "%s0x%08x", sep, rn) 57 | } 58 | sep = ", " 59 | } 60 | return nil 61 | } 62 | 63 | //UnicodePointString output hex-dump string of Unicode code point (input text is UTF-8 only). 64 | func UnicodePointString(r io.Reader) string { 65 | buf := &bytes.Buffer{} 66 | if err := UnicodePoint(buf, r); err != nil { 67 | return "" 68 | } 69 | return buf.String() 70 | } 71 | 72 | /* Copyright 2020 Spiegel 73 | * 74 | * Licensed under the Apache License, Version 2.0 (the "License"); 75 | * you may not use this file except in compliance with the License. 76 | * You may obtain a copy of the License at 77 | * 78 | * http://www.apache.org/licenses/LICENSE-2.0 79 | * 80 | * Unless required by applicable law or agreed to in writing, software 81 | * distributed under the License is distributed on an "AS IS" BASIS, 82 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 83 | * See the License for the specific language governing permissions and 84 | * limitations under the License. 85 | */ 86 | -------------------------------------------------------------------------------- /dump/dump_test.go: -------------------------------------------------------------------------------- 1 | package dump 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/goark/gnkf/ecode" 8 | ) 9 | 10 | var ( 11 | textUTF8 = []byte("こんにちは,世界!\n私の名前は Spiegel です。") 12 | textSJIS = []byte{0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49, 0x0a, 0x8e, 0x84, 0x82, 0xcc, 0x96, 0xbc, 0x91, 0x4f, 0x82, 0xcd, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0x82, 0xc5, 0x82, 0xb7, 0x81, 0x42} 13 | textEUC = []byte{0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf, 0xa1, 0xa4, 0xc0, 0xa4, 0xb3, 0xa6, 0xa1, 0xaa, 0x0a, 0xbb, 0xe4, 0xa4, 0xce, 0xcc, 0xbe, 0xc1, 0xb0, 0xa4, 0xcf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xa4, 0xc7, 0xa4, 0xb9, 0xa1, 0xa3} 14 | ) 15 | 16 | func TestOctet(t *testing.T) { 17 | testCases := []struct { 18 | text []byte 19 | res string 20 | err error 21 | }{ 22 | {text: textUTF8, res: "0xe3, 0x81, 0x93, 0xe3, 0x82, 0x93, 0xe3, 0x81, 0xab, 0xe3, 0x81, 0xa1, 0xe3, 0x81, 0xaf, 0xef, 0xbc, 0x8c, 0xe4, 0xb8, 0x96, 0xe7, 0x95, 0x8c, 0xef, 0xbc, 0x81, 0x0a, 0xe7, 0xa7, 0x81, 0xe3, 0x81, 0xae, 0xe5, 0x90, 0x8d, 0xe5, 0x89, 0x8d, 0xe3, 0x81, 0xaf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xe3, 0x81, 0xa7, 0xe3, 0x81, 0x99, 0xe3, 0x80, 0x82", err: nil}, 23 | {text: textSJIS, res: "0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49, 0x0a, 0x8e, 0x84, 0x82, 0xcc, 0x96, 0xbc, 0x91, 0x4f, 0x82, 0xcd, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0x82, 0xc5, 0x82, 0xb7, 0x81, 0x42", err: nil}, 24 | {text: textEUC, res: "0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf, 0xa1, 0xa4, 0xc0, 0xa4, 0xb3, 0xa6, 0xa1, 0xaa, 0x0a, 0xbb, 0xe4, 0xa4, 0xce, 0xcc, 0xbe, 0xc1, 0xb0, 0xa4, 0xcf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xa4, 0xc7, 0xa4, 0xb9, 0xa1, 0xa3", err: nil}, 25 | {text: []byte{0xff}, res: "0xff", err: nil}, 26 | {text: nil, res: "", err: nil}, 27 | } 28 | 29 | for _, tc := range testCases { 30 | str := OctetString(bytes.NewReader(tc.text)) 31 | if str != tc.res { 32 | t.Errorf("OctetString() = \"%v\", want \"%v\".", str, tc.res) 33 | } 34 | } 35 | } 36 | 37 | func TestUnicodePoint(t *testing.T) { 38 | testCases := []struct { 39 | text []byte 40 | res string 41 | err error 42 | }{ 43 | {text: textUTF8, res: "0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0xff0c, 0x4e16, 0x754c, 0xff01, 0x000a, 0x79c1, 0x306e, 0x540d, 0x524d, 0x306f, 0x0020, 0x0053, 0x0070, 0x0069, 0x0065, 0x0067, 0x0065, 0x006c, 0x0020, 0x3067, 0x3059, 0x3002", err: nil}, 44 | {text: textSJIS, res: "", err: ecode.ErrInvalidUTF8Text}, 45 | {text: textEUC, res: "", err: ecode.ErrInvalidUTF8Text}, 46 | {text: []byte{0xff}, res: "", err: ecode.ErrInvalidUTF8Text}, 47 | {text: nil, res: "", err: nil}, 48 | } 49 | 50 | for _, tc := range testCases { 51 | str := UnicodePointString(bytes.NewReader(tc.text)) 52 | if str != tc.res { 53 | t.Errorf("UnicodePointString() = \"%v\", want \"%v\".", str, tc.res) 54 | } 55 | } 56 | } 57 | 58 | /* Copyright 2020 Spiegel 59 | * 60 | * Licensed under the Apache License, Version 2.0 (the "License"); 61 | * you may not use this file except in compliance with the License. 62 | * You may obtain a copy of the License at 63 | * 64 | * http://www.apache.org/licenses/LICENSE-2.0 65 | * 66 | * Unless required by applicable law or agreed to in writing, software 67 | * distributed under the License is distributed on an "AS IS" BASIS, 68 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 69 | * See the License for the specific language governing permissions and 70 | * limitations under the License. 71 | */ 72 | -------------------------------------------------------------------------------- /dump/example_test.go: -------------------------------------------------------------------------------- 1 | package dump_test 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/goark/gnkf/dump" 9 | ) 10 | 11 | func ExampleOctet() { 12 | if err := dump.Octet(os.Stdout, strings.NewReader("こんにちは,世界!\n私の名前は Spiegel です。")); err != nil { 13 | fmt.Fprintln(os.Stderr, err) 14 | return 15 | } 16 | //Output: 17 | //0xe3, 0x81, 0x93, 0xe3, 0x82, 0x93, 0xe3, 0x81, 0xab, 0xe3, 0x81, 0xa1, 0xe3, 0x81, 0xaf, 0xef, 0xbc, 0x8c, 0xe4, 0xb8, 0x96, 0xe7, 0x95, 0x8c, 0xef, 0xbc, 0x81, 0x0a, 0xe7, 0xa7, 0x81, 0xe3, 0x81, 0xae, 0xe5, 0x90, 0x8d, 0xe5, 0x89, 0x8d, 0xe3, 0x81, 0xaf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xe3, 0x81, 0xa7, 0xe3, 0x81, 0x99, 0xe3, 0x80, 0x82 18 | } 19 | 20 | func ExampleUnicodePoint() { 21 | if err := dump.UnicodePoint(os.Stdout, strings.NewReader("こんにちは,世界!\n私の名前は Spiegel です。")); err != nil { 22 | fmt.Fprintln(os.Stderr, err) 23 | return 24 | } 25 | //Output: 26 | //0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0xff0c, 0x4e16, 0x754c, 0xff01, 0x000a, 0x79c1, 0x306e, 0x540d, 0x524d, 0x306f, 0x0020, 0x0053, 0x0070, 0x0069, 0x0065, 0x0067, 0x0065, 0x006c, 0x0020, 0x3067, 0x3059, 0x3002 27 | } 28 | 29 | /* Copyright 2020 Spiegel 30 | * 31 | * Licensed under the Apache License, Version 2.0 (the "License"); 32 | * you may not use this file except in compliance with the License. 33 | * You may obtain a copy of the License at 34 | * 35 | * http://www.apache.org/licenses/LICENSE-2.0 36 | * 37 | * Unless required by applicable law or agreed to in writing, software 38 | * distributed under the License is distributed on an "AS IS" BASIS, 39 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 40 | * See the License for the specific language governing permissions and 41 | * limitations under the License. 42 | */ 43 | -------------------------------------------------------------------------------- /ecode/ecode.go: -------------------------------------------------------------------------------- 1 | package ecode 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrNullPointer = errors.New("null reference instance") 7 | ErrNoCommand = errors.New("no command") 8 | ErrNoData = errors.New("no data") 9 | ErrCannotDetect = errors.New("cannot detect character encoding") 10 | ErrInvalidUTF8Text = errors.New("invalid UTF-8 text") 11 | ErrNotSuppotEncoding = errors.New("not support IANA encoding name") 12 | ErrInvalidEncoding = errors.New("text is invalid encoding") 13 | ErrInvalidNormForm = errors.New("invalid Unicode normalization form") 14 | ErrInvalidNewlineForm = errors.New("invalid newline form") 15 | ErrInvalidWidthForm = errors.New("invalid width form") 16 | ErrInvalidKanaForm = errors.New("invalid kana form") 17 | ErrInvalidHashAlg = errors.New("not support hash algorithm") 18 | ErrImproperlyHashFormat = errors.New("improperly formatted hash string") 19 | ErrUnmatchHashString = errors.New("hash value did NOT match") 20 | ErrInvalidChekerFormat = errors.New("invalid checker format") 21 | ) 22 | 23 | /* Copyright 2020-2021 Spiegel 24 | * 25 | * Licensed under the Apache License, Version 2.0 (the "License"); 26 | * you may not use this file except in compliance with the License. 27 | * You may obtain a copy of the License at 28 | * 29 | * http://www.apache.org/licenses/LICENSE-2.0 30 | * 31 | * Unless required by applicable law or agreed to in writing, software 32 | * distributed under the License is distributed on an "AS IS" BASIS, 33 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 34 | * See the License for the specific language governing permissions and 35 | * limitations under the License. 36 | */ 37 | -------------------------------------------------------------------------------- /enc/conversion.go: -------------------------------------------------------------------------------- 1 | package enc 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | "golang.org/x/text/encoding" 9 | "golang.org/x/text/encoding/unicode" 10 | ) 11 | 12 | //Convert function converts character encoding text stream. 13 | func Convert(toIanaName string, writer io.Writer, fromIanaName string, txt io.Reader) error { 14 | encoder, err := Encoding(toIanaName) 15 | if err != nil { 16 | return errs.Wrap(err, errs.WithContext("toIanaName", toIanaName)) 17 | } 18 | decoder, err := Encoding(fromIanaName) 19 | if err != nil { 20 | return errs.Wrap(err, errs.WithContext("fromIanaName", fromIanaName)) 21 | } 22 | if encoder == unicode.UTF8 { 23 | return decode(decoder, writer, txt) 24 | } 25 | if decoder == unicode.UTF8 { 26 | return encode(encoder, writer, txt) 27 | } 28 | return convert(encoder, decoder, writer, txt) 29 | } 30 | 31 | func convert(encoder, decoder encoding.Encoding, writer io.Writer, txt io.Reader) error { 32 | if encoder == decoder { 33 | return notConvert(writer, txt) 34 | } 35 | if _, err := io.Copy(encoder.NewEncoder().Writer(writer), decoder.NewDecoder().Reader(txt)); err != nil { 36 | return errs.Wrap(ecode.ErrInvalidEncoding, errs.WithCause(err)) 37 | } 38 | return nil 39 | } 40 | 41 | func notConvert(writer io.Writer, txt io.Reader) error { 42 | if _, err := io.Copy(writer, txt); err != nil { 43 | return errs.Wrap(err) 44 | } 45 | return nil 46 | } 47 | 48 | /* Copyright 2020 Spiegel 49 | * 50 | * Licensed under the Apache License, Version 2.0 (the "License"); 51 | * you may not use this file except in compliance with the License. 52 | * You may obtain a copy of the License at 53 | * 54 | * http://www.apache.org/licenses/LICENSE-2.0 55 | * 56 | * Unless required by applicable law or agreed to in writing, software 57 | * distributed under the License is distributed on an "AS IS" BASIS, 58 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 59 | * See the License for the specific language governing permissions and 60 | * limitations under the License. 61 | */ 62 | -------------------------------------------------------------------------------- /enc/decode.go: -------------------------------------------------------------------------------- 1 | package enc 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | "golang.org/x/text/encoding" 9 | "golang.org/x/text/encoding/unicode" 10 | ) 11 | 12 | //Decode converts from UTF-8 encodeing text. 13 | func Decode(writer io.Writer, ianaName string, txt io.Reader) error { 14 | decoder, err := Encoding(ianaName) 15 | if err != nil { 16 | return errs.Wrap(err, errs.WithContext("ianaName", ianaName)) 17 | } 18 | return decode(decoder, writer, txt) 19 | } 20 | 21 | func decode(decoder encoding.Encoding, writer io.Writer, txt io.Reader) error { 22 | if decoder == unicode.UTF8 { 23 | return notConvert(writer, txt) 24 | } 25 | if _, err := io.Copy(writer, decoder.NewDecoder().Reader(txt)); err != nil { 26 | return errs.Wrap(ecode.ErrInvalidEncoding, errs.WithCause(err)) 27 | } 28 | return nil 29 | } 30 | 31 | /* Copyright 2020 Spiegel 32 | * 33 | * Licensed under the Apache License, Version 2.0 (the "License"); 34 | * you may not use this file except in compliance with the License. 35 | * You may obtain a copy of the License at 36 | * 37 | * http://www.apache.org/licenses/LICENSE-2.0 38 | * 39 | * Unless required by applicable law or agreed to in writing, software 40 | * distributed under the License is distributed on an "AS IS" BASIS, 41 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 42 | * See the License for the specific language governing permissions and 43 | * limitations under the License. 44 | */ 45 | -------------------------------------------------------------------------------- /enc/enc_test.go: -------------------------------------------------------------------------------- 1 | package enc 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/goark/errs" 8 | "github.com/goark/gnkf/ecode" 9 | ) 10 | 11 | var ( 12 | textUTF8 = []byte("こんにちは,世界!\n私の名前は Spiegel です。") 13 | textSJIS = []byte{0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49, 0x0a, 0x8e, 0x84, 0x82, 0xcc, 0x96, 0xbc, 0x91, 0x4f, 0x82, 0xcd, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0x82, 0xc5, 0x82, 0xb7, 0x81, 0x42} 14 | textEUC = []byte{0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf, 0xa1, 0xa4, 0xc0, 0xa4, 0xb3, 0xa6, 0xa1, 0xaa, 0x0a, 0xbb, 0xe4, 0xa4, 0xce, 0xcc, 0xbe, 0xc1, 0xb0, 0xa4, 0xcf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xa4, 0xc7, 0xa4, 0xb9, 0xa1, 0xa3} 15 | ) 16 | 17 | func TestEncode(t *testing.T) { 18 | testCases := []struct { 19 | inp, out []byte 20 | ianaName string 21 | err error 22 | }{ 23 | {inp: textUTF8, out: textSJIS, ianaName: "shift_jis", err: nil}, 24 | {inp: textUTF8, out: textEUC, ianaName: "euc-jp", err: nil}, 25 | {inp: textUTF8, out: textUTF8, ianaName: "utf-8", err: nil}, 26 | {inp: textUTF8, out: []byte{}, ianaName: "foo", err: ecode.ErrNotSuppotEncoding}, 27 | {inp: textUTF8, out: []byte{}, ianaName: "us-ascii", err: ecode.ErrInvalidEncoding}, 28 | {inp: textEUC, out: []byte{}, ianaName: "shift_jis", err: ecode.ErrInvalidEncoding}, 29 | {inp: textSJIS, out: []byte{}, ianaName: "euc-jp", err: ecode.ErrInvalidEncoding}, 30 | } 31 | for _, tc := range testCases { 32 | buf := &bytes.Buffer{} 33 | if err := Encode(tc.ianaName, buf, bytes.NewReader(tc.inp)); err != nil { 34 | if !errs.Is(err, tc.err) { 35 | t.Errorf("Encode() error = \"%+v\", want \"%+v\".", err, tc.err) 36 | } 37 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 38 | t.Errorf("Encode(%s) result wrong translation.", tc.ianaName) 39 | } 40 | } 41 | } 42 | 43 | func TestDecode(t *testing.T) { 44 | testCases := []struct { 45 | inp, out []byte 46 | ianaName string 47 | err error 48 | }{ 49 | {inp: textSJIS, out: textUTF8, ianaName: "shift_jis", err: nil}, 50 | {inp: textEUC, out: textUTF8, ianaName: "euc-jp", err: nil}, 51 | {inp: textUTF8, out: textUTF8, ianaName: "utf-8", err: nil}, 52 | {inp: textUTF8, out: []byte{}, ianaName: "foo", err: ecode.ErrNotSuppotEncoding}, 53 | } 54 | for _, tc := range testCases { 55 | buf := &bytes.Buffer{} 56 | if err := Decode(buf, tc.ianaName, bytes.NewReader(tc.inp)); err != nil { 57 | if !errs.Is(err, tc.err) { 58 | t.Errorf("Decode() error = \"%+v\", want \"%+v\".", err, tc.err) 59 | } 60 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 61 | t.Errorf("Decode(%s) result wrong translation.", tc.ianaName) 62 | } 63 | } 64 | } 65 | 66 | func TestTranslate(t *testing.T) { 67 | testCases := []struct { 68 | inp, out []byte 69 | from, to string 70 | err error 71 | }{ 72 | {inp: textUTF8, out: textSJIS, from: "utf-8", to: "shift_jis", err: nil}, 73 | {inp: textUTF8, out: textEUC, from: "utf-8", to: "euc-jp", err: nil}, 74 | {inp: textSJIS, out: textUTF8, from: "shift_jis", to: "utf-8", err: nil}, 75 | {inp: textEUC, out: textUTF8, from: "euc-jp", to: "utf-8", err: nil}, 76 | {inp: textSJIS, out: textEUC, from: "shift_jis", to: "euc-jp", err: nil}, 77 | {inp: textEUC, out: textSJIS, from: "euc-jp", to: "shift_jis", err: nil}, 78 | {inp: textSJIS, out: textSJIS, from: "shift_jis", to: "shift_jis", err: nil}, 79 | {inp: textUTF8, out: textUTF8, from: "utf-8", to: "utf-8", err: nil}, 80 | {inp: textUTF8, out: textUTF8, from: "foo", to: "utf-8", err: ecode.ErrNotSuppotEncoding}, 81 | {inp: textUTF8, out: textUTF8, from: "utf-8", to: "bar", err: ecode.ErrNotSuppotEncoding}, 82 | {inp: textSJIS, out: textEUC, from: "euc-jp", to: "shift_jis", err: ecode.ErrInvalidEncoding}, 83 | } 84 | for _, tc := range testCases { 85 | buf := &bytes.Buffer{} 86 | if err := Convert(tc.to, buf, tc.from, bytes.NewReader(tc.inp)); err != nil { 87 | if !errs.Is(err, tc.err) { 88 | t.Errorf("Encode() error = \"%+v\", want \"%+v\".", err, tc.err) 89 | } 90 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 91 | t.Errorf("Encode(%s -> %s) result wrong translation.", tc.from, tc.to) 92 | } 93 | } 94 | } 95 | 96 | /* Copyright 2020 Spiegel 97 | * 98 | * Licensed under the Apache License, Version 2.0 (the "License"); 99 | * you may not use this file except in compliance with the License. 100 | * You may obtain a copy of the License at 101 | * 102 | * http://www.apache.org/licenses/LICENSE-2.0 103 | * 104 | * Unless required by applicable law or agreed to in writing, software 105 | * distributed under the License is distributed on an "AS IS" BASIS, 106 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 107 | * See the License for the specific language governing permissions and 108 | * limitations under the License. 109 | */ 110 | -------------------------------------------------------------------------------- /enc/encode.go: -------------------------------------------------------------------------------- 1 | package enc 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | "golang.org/x/text/encoding" 9 | "golang.org/x/text/encoding/unicode" 10 | ) 11 | 12 | //Encode converts UTF-8 from other character encoding text. 13 | func Encode(ianaName string, writer io.Writer, txt io.Reader) error { 14 | encoder, err := Encoding(ianaName) 15 | if err != nil { 16 | return errs.Wrap(err, errs.WithContext("ianaName", ianaName)) 17 | } 18 | return encode(encoder, writer, txt) 19 | } 20 | 21 | func encode(encoder encoding.Encoding, writer io.Writer, txt io.Reader) error { 22 | if encoder == unicode.UTF8 { 23 | return notConvert(writer, txt) 24 | } 25 | if _, err := io.Copy(encoder.NewEncoder().Writer(writer), txt); err != nil { 26 | return errs.Wrap(ecode.ErrInvalidEncoding, errs.WithCause(err)) 27 | } 28 | return nil 29 | } 30 | 31 | /* Copyright 2020 Spiegel 32 | * 33 | * Licensed under the Apache License, Version 2.0 (the "License"); 34 | * you may not use this file except in compliance with the License. 35 | * You may obtain a copy of the License at 36 | * 37 | * http://www.apache.org/licenses/LICENSE-2.0 38 | * 39 | * Unless required by applicable law or agreed to in writing, software 40 | * distributed under the License is distributed on an "AS IS" BASIS, 41 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 42 | * See the License for the specific language governing permissions and 43 | * limitations under the License. 44 | */ 45 | -------------------------------------------------------------------------------- /enc/encoding.go: -------------------------------------------------------------------------------- 1 | package enc 2 | 3 | import ( 4 | "github.com/goark/errs" 5 | "github.com/goark/gnkf/ecode" 6 | "golang.org/x/text/encoding" 7 | "golang.org/x/text/encoding/ianaindex" 8 | ) 9 | 10 | //GetEncoding returns encoding.Encoding instance from MIME or IANA name 11 | func Encoding(ianaName string) (encoding.Encoding, error) { 12 | e, err := ianaindex.IANA.Encoding(ianaName) 13 | if err != nil { 14 | e, err = ianaindex.MIME.Encoding(ianaName) 15 | if err != nil { 16 | return nil, errs.Wrap(ecode.ErrNotSuppotEncoding, errs.WithCause(err), errs.WithContext("ianaName", ianaName)) 17 | } 18 | } 19 | if e == nil { 20 | return nil, errs.Wrap(ecode.ErrNotSuppotEncoding, errs.WithCause(err), errs.WithContext("ianaName", ianaName)) 21 | } 22 | return e, nil 23 | } 24 | 25 | /* Copyright 2020 Spiegel 26 | * 27 | * Licensed under the Apache License, Version 2.0 (the "License"); 28 | * you may not use this file except in compliance with the License. 29 | * You may obtain a copy of the License at 30 | * 31 | * http://www.apache.org/licenses/LICENSE-2.0 32 | * 33 | * Unless required by applicable law or agreed to in writing, software 34 | * distributed under the License is distributed on an "AS IS" BASIS, 35 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | * See the License for the specific language governing permissions and 37 | * limitations under the License. 38 | */ 39 | -------------------------------------------------------------------------------- /enc/example_test.go: -------------------------------------------------------------------------------- 1 | package enc_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/gnkf/dump" 10 | "github.com/goark/gnkf/enc" 11 | ) 12 | 13 | func ExampleConvert() { 14 | buf := &bytes.Buffer{} 15 | if err := enc.Convert("Shift_JIS", buf, "UTF-8", strings.NewReader("こんにちは,世界!\n私の名前は Spiegel です。")); err != nil { 16 | fmt.Fprintln(os.Stderr, err) 17 | return 18 | } 19 | if err := dump.Octet(os.Stdout, buf); err != nil { 20 | fmt.Fprintln(os.Stderr, err) 21 | return 22 | } 23 | //Output: 24 | //0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49, 0x0a, 0x8e, 0x84, 0x82, 0xcc, 0x96, 0xbc, 0x91, 0x4f, 0x82, 0xcd, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0x82, 0xc5, 0x82, 0xb7, 0x81, 0x42 25 | } 26 | 27 | /* Copyright 2020 Spiegel 28 | * 29 | * Licensed under the Apache License, Version 2.0 (the "License"); 30 | * you may not use this file except in compliance with the License. 31 | * You may obtain a copy of the License at 32 | * 33 | * http://www.apache.org/licenses/LICENSE-2.0 34 | * 35 | * Unless required by applicable law or agreed to in writing, software 36 | * distributed under the License is distributed on an "AS IS" BASIS, 37 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 38 | * See the License for the specific language governing permissions and 39 | * limitations under the License. 40 | */ 41 | -------------------------------------------------------------------------------- /facade/base64.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/b64" 8 | "github.com/goark/gocli/rwi" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | //newNormCmd returns cobra.Command instance for show sub-command 13 | func newBase64Cmd(ui *rwi.RWI) *cobra.Command { 14 | base64Cmd := &cobra.Command{ 15 | Use: "base64 [flags] [file]", 16 | Aliases: []string{"b64"}, 17 | Short: "Encode/Decode BASE64", 18 | Long: "Encode/Decode BASE64.", 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | //Options 21 | out, err := cmd.Flags().GetString("output") 22 | if err != nil { 23 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 24 | } 25 | decodeFlag, err := cmd.Flags().GetBool("decode") 26 | if err != nil { 27 | return debugPrint(ui, errs.New("Error in --decode option", errs.WithCause(err))) 28 | } 29 | noPadding, err := cmd.Flags().GetBool("no-padding") 30 | if err != nil { 31 | return debugPrint(ui, errs.New("Error in --no-padding option", errs.WithCause(err))) 32 | } 33 | forURL, err := cmd.Flags().GetBool("for-url") 34 | if err != nil { 35 | return debugPrint(ui, errs.New("Error in --for-url option", errs.WithCause(err))) 36 | } 37 | 38 | //Input stream 39 | r := ui.Reader() 40 | if len(args) > 0 { 41 | file, err := os.Open(args[0]) 42 | if err != nil { 43 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", args[0]))) 44 | } 45 | defer file.Close() 46 | r = file 47 | } 48 | 49 | //Output stream 50 | w := ui.Writer() 51 | if len(out) > 0 { 52 | file, err := os.Create(out) 53 | if err != nil { 54 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 55 | } 56 | defer file.Close() 57 | w = file 58 | } 59 | 60 | //Run command 61 | if decodeFlag { 62 | err = b64.Decode(forURL, noPadding, r, w) 63 | } else { 64 | err = b64.Encode(forURL, noPadding, r, w) 65 | } 66 | if err != nil { 67 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 68 | } 69 | return nil 70 | }, 71 | } 72 | base64Cmd.Flags().StringP("output", "o", "", "path of output file") 73 | _ = base64Cmd.MarkFlagFilename("output") 74 | base64Cmd.Flags().BoolP("decode", "d", false, "decode BASE64 string") 75 | base64Cmd.Flags().BoolP("no-padding", "p", false, "no padding") 76 | base64Cmd.Flags().BoolP("for-url", "u", false, "encoding/decoding defined in RFC 4648") 77 | 78 | return base64Cmd 79 | } 80 | 81 | /* Copyright 2020-2021 Spiegel 82 | * 83 | * Licensed under the Apache License, Version 2.0 (the "License"); 84 | * you may not use this file except in compliance with the License. 85 | * You may obtain a copy of the License at 86 | * 87 | * http://www.apache.org/licenses/LICENSE-2.0 88 | * 89 | * Unless required by applicable law or agreed to in writing, software 90 | * distributed under the License is distributed on an "AS IS" BASIS, 91 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 92 | * See the License for the specific language governing permissions and 93 | * limitations under the License. 94 | */ 95 | -------------------------------------------------------------------------------- /facade/bcrypt.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/bcrypt" 8 | "github.com/goark/gnkf/ecode" 9 | "github.com/goark/gocli/rwi" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | //newNormCmd returns cobra.Command instance for show sub-command 14 | func newBCryptCmd(ui *rwi.RWI) *cobra.Command { 15 | bcryptCmd := &cobra.Command{ 16 | Use: "bcrypt [flags] string [string...]", 17 | Aliases: []string{"bc"}, 18 | Short: "Hash and compare by BCrypt", 19 | Long: "Hash and compare by BCrypt.", 20 | RunE: func(cmd *cobra.Command, args []string) error { 21 | //Options 22 | cost, err := cmd.Flags().GetInt("cost") 23 | if err != nil { 24 | return debugPrint(ui, errs.New("Error in --cost option", errs.WithCause(err))) 25 | } 26 | hashed, err := cmd.Flags().GetString("compare") 27 | if err != nil { 28 | return debugPrint(ui, errs.New("Error in --compare option", errs.WithCause(err))) 29 | } 30 | 31 | if len(args) == 0 { 32 | return debugPrint(ui, errs.Wrap(ecode.ErrNoData)) 33 | } 34 | if len(hashed) > 0 { 35 | _ = ui.OutputErrln(fmt.Sprintf("compare BCrypt hashed string '%s' to...", hashed)) 36 | } 37 | 38 | //Run command 39 | var lastErr error 40 | for _, s := range args { 41 | if len(hashed) > 0 { 42 | if err := bcrypt.Compare(hashed, s); err != nil { 43 | _ = ui.OutputErrln(s, ":", err) 44 | } else { 45 | _ = ui.OutputErrln(s, ":", "match!") 46 | } 47 | } else { 48 | if h, err := bcrypt.Hash(s, cost); err != nil { 49 | lastErr = errs.Wrap(err, errs.WithContext("string", s), errs.WithContext("cost", cost)) 50 | _ = ui.OutputErrln(err) 51 | } else { 52 | _ = ui.Outputln(h) 53 | } 54 | } 55 | } 56 | return debugPrint(ui, lastErr) 57 | }, 58 | } 59 | bcryptCmd.Flags().IntP("cost", "c", bcrypt.DefaultCost, fmt.Sprintf("BCrypt cost (%d-%d)", bcrypt.MinCost, bcrypt.MaxCost)) 60 | bcryptCmd.Flags().StringP("compare", "", "", "compare to BCrypt hashed string") 61 | 62 | return bcryptCmd 63 | } 64 | 65 | /* Copyright 2020-2021 Spiegel 66 | * 67 | * Licensed under the Apache License, Version 2.0 (the "License"); 68 | * you may not use this file except in compliance with the License. 69 | * You may obtain a copy of the License at 70 | * 71 | * http://www.apache.org/licenses/LICENSE-2.0 72 | * 73 | * Unless required by applicable law or agreed to in writing, software 74 | * distributed under the License is distributed on an "AS IS" BASIS, 75 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 76 | * See the License for the specific language governing permissions and 77 | * limitations under the License. 78 | */ 79 | -------------------------------------------------------------------------------- /facade/completion.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/goark/gocli/rwi" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | var ( 13 | longDescription = `To load completions: 14 | 15 | Bash: 16 | 17 | $ source <(%[1]v completion bash) 18 | 19 | # To load completions for each session, execute once: 20 | Linux: 21 | $ %[1]v completion bash > /etc/bash_completion.d/%[1]v 22 | MacOS: 23 | $ %[1]v completion bash > /usr/local/etc/bash_completion.d/%[1]v 24 | 25 | Zsh: 26 | 27 | # If shell completion is not already enabled in your environment you will need 28 | # to enable it. You can execute the following once: 29 | 30 | $ echo "autoload -U compinit; compinit" >> ~/.zshrc 31 | 32 | # To load completions for each session, execute once: 33 | $ %[1]v completion zsh > "${fpath[1]}/_%[1]v" 34 | 35 | # You will need to start a new shell for this setup to take effect. 36 | 37 | Fish: 38 | 39 | $ %[1]v completion fish | source 40 | 41 | # To load completions for each session, execute once: 42 | $ %[1]v completion fish > ~/.config/fish/completions/%[1]v.fish 43 | 44 | Powershell: 45 | 46 | PS> %[1]v completion powershell | Out-String | Invoke-Expression 47 | 48 | # To load completions for every new session, run: 49 | PS> %[1]v completion powershell > %[1]v.ps1 50 | # and source this file from your powershell profile. 51 | ` 52 | 53 | shells = []string{"bash", "zsh", "fish", "powershell"} 54 | ) 55 | 56 | //newCompletionCmd returns cobra.Command instance for show sub-command 57 | func newCompletionCmd(ui *rwi.RWI) *cobra.Command { 58 | completionCmd := &cobra.Command{ 59 | Use: "completion [" + strings.Join(shells, "|") + "]", 60 | Aliases: []string{"compl", "cmp"}, 61 | Short: "Generate completion script", 62 | Long: fmt.Sprintf(longDescription, Name), 63 | ValidArgs: shells, 64 | RunE: func(cmd *cobra.Command, args []string) error { 65 | if len(args) == 0 { 66 | return debugPrint(ui, cmd.Root().GenBashCompletion(ui.Writer())) 67 | } else if len(args) == 1 { 68 | switch { 69 | case strings.EqualFold(args[0], "bash"): 70 | return debugPrint(ui, cmd.Root().GenBashCompletion(ui.Writer())) 71 | case strings.EqualFold(args[0], "zsh"): 72 | return debugPrint(ui, cmd.Root().GenZshCompletion(ui.Writer())) 73 | case strings.EqualFold(args[0], "fish"): 74 | return debugPrint(ui, cmd.Root().GenFishCompletion(ui.Writer(), true)) 75 | case strings.EqualFold(args[0], "powershell"): 76 | return debugPrint(ui, cmd.Root().GenPowerShellCompletion(ui.Writer())) 77 | } 78 | } 79 | return debugPrint(ui, os.ErrInvalid) 80 | }, 81 | } 82 | return completionCmd 83 | } 84 | 85 | /* Copyright 2021 Spiegel 86 | * 87 | * Licensed under the Apache License, Version 2.0 (the "License"); 88 | * you may not use this file except in compliance with the License. 89 | * You may obtain a copy of the License at 90 | * 91 | * http://www.apache.org/licenses/LICENSE-2.0 92 | * 93 | * Unless required by applicable law or agreed to in writing, software 94 | * distributed under the License is distributed on an "AS IS" BASIS, 95 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 96 | * See the License for the specific language governing permissions and 97 | * limitations under the License. 98 | */ 99 | -------------------------------------------------------------------------------- /facade/dump.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/dump" 8 | "github.com/goark/gocli/rwi" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | //newDumpCmd returns cobra.Command instance for show sub-command 13 | func newDumpCmd(ui *rwi.RWI) *cobra.Command { 14 | dumpCmd := &cobra.Command{ 15 | Use: "dump", 16 | Aliases: []string{"hexdump", "d", "hd"}, 17 | Short: "Hexadecimal view of octet data stream", 18 | Long: "Hexadecimal view of octet data stream with C language array style.", 19 | RunE: func(cmd *cobra.Command, args []string) error { 20 | //Options 21 | path, err := cmd.Flags().GetString("file") 22 | if err != nil { 23 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 24 | } 25 | flagUnicode, err := cmd.Flags().GetBool("unicode") 26 | if err != nil { 27 | return debugPrint(ui, errs.New("Error in --unicode option", errs.WithCause(err))) 28 | } 29 | 30 | //Input stream 31 | r := ui.Reader() 32 | if len(path) > 0 { 33 | file, err := os.Open(path) 34 | if err != nil { 35 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", path))) 36 | } 37 | defer file.Close() 38 | r = file 39 | } 40 | 41 | //Run command 42 | if flagUnicode { 43 | err = dump.UnicodePoint(ui.Writer(), r) 44 | } else { 45 | err = dump.Octet(ui.Writer(), r) 46 | } 47 | if err != nil { 48 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", path))) 49 | } 50 | return debugPrint(ui, errs.Wrap(ui.Outputln(), errs.WithContext("file", path))) 51 | }, 52 | } 53 | dumpCmd.Flags().StringP("file", "f", "", "path of input text file") 54 | _ = dumpCmd.MarkFlagFilename("file") 55 | dumpCmd.Flags().BoolP("unicode", "u", false, "print by Unicode code point (UTF-8 only)") 56 | 57 | return dumpCmd 58 | } 59 | 60 | /* Copyright 2020-2021 Spiegel 61 | * 62 | * Licensed under the Apache License, Version 2.0 (the "License"); 63 | * you may not use this file except in compliance with the License. 64 | * You may obtain a copy of the License at 65 | * 66 | * http://www.apache.org/licenses/LICENSE-2.0 67 | * 68 | * Unless required by applicable law or agreed to in writing, software 69 | * distributed under the License is distributed on an "AS IS" BASIS, 70 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 71 | * See the License for the specific language governing permissions and 72 | * limitations under the License. 73 | */ 74 | -------------------------------------------------------------------------------- /facade/enc.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/gnkf/enc" 10 | "github.com/goark/gnkf/guess" 11 | "github.com/goark/gnkf/rbom" 12 | "github.com/goark/gocli/rwi" 13 | "github.com/spf13/cobra" 14 | "golang.org/x/text/encoding/unicode" 15 | ) 16 | 17 | var descriptionEnc = `Convert character encoding of the text. 18 | Using MIME and IANA name as the character encoding name. 19 | Refer: http://www.iana.org/assignments/character-sets/character-sets.xhtml` 20 | 21 | //newEncCmd returns cobra.Command instance for show sub-command 22 | func newEncCmd(ui *rwi.RWI) *cobra.Command { 23 | encCmd := &cobra.Command{ 24 | Use: "enc", 25 | Aliases: []string{"encoding", "e"}, 26 | Short: "Convert character encoding of the text", 27 | Long: descriptionEnc, 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | //Options 30 | inp, err := cmd.Flags().GetString("file") 31 | if err != nil { 32 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 33 | } 34 | out, err := cmd.Flags().GetString("output") 35 | if err != nil { 36 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 37 | } 38 | from, err := cmd.Flags().GetString("src-encoding") 39 | if err != nil { 40 | return debugPrint(ui, errs.New("Error in --src-encoding option", errs.WithCause(err))) 41 | } 42 | to, err := cmd.Flags().GetString("dst-encoding") 43 | if err != nil { 44 | return debugPrint(ui, errs.New("Error in --dst-encoding option", errs.WithCause(err))) 45 | } 46 | flagGuess, err := cmd.Flags().GetBool("guess") 47 | if err != nil { 48 | return debugPrint(ui, errs.New("Error in --guess option", errs.WithCause(err))) 49 | } 50 | rbFlag, err := cmd.Flags().GetBool("remove-bom") 51 | if err != nil { 52 | return debugPrint(ui, errs.New("Error in --remove-bom option", errs.WithCause(err))) 53 | } 54 | 55 | //Input stream 56 | r := ui.Reader() 57 | if len(inp) > 0 { 58 | file, err := os.Open(inp) 59 | if err != nil { 60 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 61 | } 62 | defer file.Close() 63 | r = file 64 | } 65 | if flagGuess { 66 | dup := &bytes.Buffer{} 67 | ss, err := guess.Encoding(io.TeeReader(r, dup)) 68 | if err != nil { 69 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 70 | } 71 | if len(ss) > 0 { 72 | from = ss[0] 73 | } 74 | r = dup 75 | } 76 | 77 | //Output stream 78 | w := ui.Writer() 79 | if len(out) > 0 { 80 | file, err := os.Create(out) 81 | if err != nil { 82 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 83 | } 84 | defer file.Close() 85 | w = file 86 | } 87 | 88 | //Remove BOM 89 | if rbFlag { 90 | e, err := enc.Encoding(from) 91 | if err != nil { 92 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 93 | } 94 | if e == unicode.UTF8 { 95 | b, err := rbom.RemoveBom(r) 96 | if err != nil { 97 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 98 | } 99 | r = bytes.NewReader(b) 100 | } 101 | } 102 | 103 | //Run command 104 | if err := enc.Convert(to, w, from, r); err != nil { 105 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 106 | } 107 | return nil 108 | }, 109 | } 110 | encCmd.Flags().StringP("file", "f", "", "path of input text file") 111 | _ = encCmd.MarkFlagFilename("file") 112 | encCmd.Flags().StringP("output", "o", "", "path of output file") 113 | _ = encCmd.MarkFlagFilename("output") 114 | encCmd.Flags().StringP("src-encoding", "s", "utf-8", "character encoding name of source text") 115 | encCmd.Flags().StringP("dst-encoding", "d", "utf-8", "character encoding name of output text") 116 | encCmd.Flags().BoolP("guess", "g", false, "guess character encoding of source text") 117 | encCmd.Flags().BoolP("remove-bom", "b", false, "remove BOM character in source text (UTF-8 only)") 118 | 119 | return encCmd 120 | } 121 | 122 | /* Copyright 2020-2021 Spiegel 123 | * 124 | * Licensed under the Apache License, Version 2.0 (the "License"); 125 | * you may not use this file except in compliance with the License. 126 | * You may obtain a copy of the License at 127 | * 128 | * http://www.apache.org/licenses/LICENSE-2.0 129 | * 130 | * Unless required by applicable law or agreed to in writing, software 131 | * distributed under the License is distributed on an "AS IS" BASIS, 132 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 133 | * See the License for the specific language governing permissions and 134 | * limitations under the License. 135 | */ 136 | -------------------------------------------------------------------------------- /facade/facade.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "fmt" 5 | "runtime" 6 | 7 | "github.com/goark/errs" 8 | "github.com/goark/gnkf/ecode" 9 | "github.com/goark/gocli/exitcode" 10 | "github.com/goark/gocli/rwi" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | var ( 15 | //Name is applicatin name 16 | Name = "gnkf" 17 | //Version is version for applicatin 18 | Version = "developer version" 19 | ) 20 | var ( 21 | debugFlag bool //debug flag 22 | ) 23 | 24 | //newRootCmd returns cobra.Command instance for root command 25 | func newRootCmd(ui *rwi.RWI, args []string) *cobra.Command { 26 | rootCmd := &cobra.Command{ 27 | Use: Name, 28 | Short: "Network Kanji Filter by Golang", 29 | Long: "Network Kanji Filter by Golang", 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | return debugPrint(ui, errs.Wrap(ecode.ErrNoCommand)) 32 | }, 33 | } 34 | rootCmd.SilenceUsage = true 35 | rootCmd.SetArgs(args) //arguments of command-line 36 | rootCmd.SetIn(ui.Reader()) //Stdin 37 | rootCmd.SetOut(ui.ErrorWriter()) //Stdout -> Stderr 38 | rootCmd.SetErr(ui.ErrorWriter()) //Stderr 39 | rootCmd.AddCommand( 40 | newVersionCmd(ui), 41 | newGuessCmd(ui), 42 | newDumpCmd(ui), 43 | newEncCmd(ui), 44 | newNormCmd(ui), 45 | newNwlnCmd(ui), 46 | newWidthCmd(ui), 47 | newKanaCmd(ui), 48 | newBase64Cmd(ui), 49 | newRemoveBomCmd(ui), 50 | newCompletionCmd(ui), 51 | newhashCmd(ui), 52 | newBCryptCmd(ui), 53 | ) 54 | 55 | //global options 56 | rootCmd.PersistentFlags().BoolVarP(&debugFlag, "debug", "", false, "for debug") 57 | 58 | return rootCmd 59 | } 60 | 61 | func debugPrint(ui *rwi.RWI, err error) error { 62 | if debugFlag && err != nil { 63 | fmt.Fprintf(ui.Writer(), "%+v\n", err) 64 | } 65 | return err 66 | } 67 | 68 | //Execute is called from main function 69 | func Execute(ui *rwi.RWI, args []string) (exit exitcode.ExitCode) { 70 | defer func() { 71 | //panic hundling 72 | if r := recover(); r != nil { 73 | _ = ui.OutputErrln("Panic:", r) 74 | for depth := 0; ; depth++ { 75 | pc, src, line, ok := runtime.Caller(depth) 76 | if !ok { 77 | break 78 | } 79 | _ = ui.OutputErrln(" ->", depth, ":", runtime.FuncForPC(pc).Name(), ":", src, ":", line) 80 | } 81 | exit = exitcode.Abnormal 82 | } 83 | }() 84 | 85 | //execution 86 | exit = exitcode.Normal 87 | if err := newRootCmd(ui, args).Execute(); err != nil { 88 | exit = exitcode.Abnormal 89 | } 90 | return 91 | } 92 | 93 | /* Copyright 2020-2021 Spiegel 94 | * 95 | * Licensed under the Apache License, Version 2.0 (the "License"); 96 | * you may not use this file except in compliance with the License. 97 | * You may obtain a copy of the License at 98 | * 99 | * http://www.apache.org/licenses/LICENSE-2.0 100 | * 101 | * Unless required by applicable law or agreed to in writing, software 102 | * distributed under the License is distributed on an "AS IS" BASIS, 103 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 104 | * See the License for the specific language governing permissions and 105 | * limitations under the License. 106 | */ 107 | -------------------------------------------------------------------------------- /facade/guess.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | 7 | "github.com/goark/errs" 8 | "github.com/goark/gnkf/ecode" 9 | "github.com/goark/gnkf/guess" 10 | "github.com/goark/gocli/rwi" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | //newGuessCmd returns cobra.Command instance for show sub-command 15 | func newGuessCmd(ui *rwi.RWI) *cobra.Command { 16 | guessCmd := &cobra.Command{ 17 | Use: "guess", 18 | Aliases: []string{"g"}, 19 | Short: "Guess character encoding of the text", 20 | Long: "Guess character encoding of the text", 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | //Options 23 | path, err := cmd.Flags().GetString("file") 24 | if err != nil { 25 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 26 | } 27 | flagAll, err := cmd.Flags().GetBool("all") 28 | if err != nil { 29 | return debugPrint(ui, errs.New("Error in --all option", errs.WithCause(err))) 30 | } 31 | 32 | //Input stream 33 | r := ui.Reader() 34 | if len(path) > 0 { 35 | file, err := os.Open(path) 36 | if err != nil { 37 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", path))) 38 | } 39 | defer file.Close() 40 | r = file 41 | } 42 | 43 | //Run command 44 | ss, err := guess.Encoding(r) 45 | if err != nil { 46 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", path))) 47 | } 48 | if len(ss) == 0 { 49 | return debugPrint(ui, errs.Wrap(ecode.ErrNoData, errs.WithContext("file", path))) 50 | } 51 | if flagAll { 52 | err = ui.Outputln(strings.Join(ss, "\n")) 53 | } else { 54 | err = ui.Outputln(ss[0]) 55 | } 56 | return debugPrint(ui, errs.Wrap(err)) 57 | }, 58 | } 59 | guessCmd.Flags().StringP("file", "f", "", "path of input text file") 60 | _ = guessCmd.MarkFlagFilename("file") 61 | guessCmd.Flags().BoolP("all", "", false, "print all guesses") 62 | 63 | return guessCmd 64 | } 65 | 66 | /* Copyright 2020-2021 Spiegel 67 | * 68 | * Licensed under the Apache License, Version 2.0 (the "License"); 69 | * you may not use this file except in compliance with the License. 70 | * You may obtain a copy of the License at 71 | * 72 | * http://www.apache.org/licenses/LICENSE-2.0 73 | * 74 | * Unless required by applicable law or agreed to in writing, software 75 | * distributed under the License is distributed on an "AS IS" BASIS, 76 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 77 | * See the License for the specific language governing permissions and 78 | * limitations under the License. 79 | */ 80 | -------------------------------------------------------------------------------- /facade/hash.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "crypto" 5 | "fmt" 6 | "io" 7 | "os" 8 | "syscall" 9 | 10 | "github.com/goark/errs" 11 | "github.com/goark/gnkf/ecode" 12 | "github.com/goark/gnkf/hash" 13 | "github.com/goark/gocli/rwi" 14 | "github.com/spf13/cobra" 15 | ) 16 | 17 | // newhashCmd returns cobra.Command instance for show sub-command 18 | func newhashCmd(ui *rwi.RWI) *cobra.Command { 19 | hashCmd := &cobra.Command{ 20 | Use: "hash [flags] [file]", 21 | Aliases: []string{"h"}, 22 | Short: "Print or check hash value", 23 | Long: "Print or check hash value.\n Support algorithm:\n " + hash.AlgorithmList(", "), 24 | RunE: func(cmd *cobra.Command, args []string) error { 25 | //Options 26 | s, err := cmd.Flags().GetString("algorithm") 27 | if err != nil { 28 | return debugPrint(ui, errs.New("Error in --algorithm option", errs.WithCause(err))) 29 | } 30 | alg, err := hash.Algorithm(s) 31 | if err != nil { 32 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("algorithm", s))) 33 | } 34 | checkerFlag, err := cmd.Flags().GetBool("check") 35 | if err != nil { 36 | return debugPrint(ui, errs.New("Error in --check option", errs.WithCause(err))) 37 | } 38 | ignoreMissingFlag, err := cmd.Flags().GetBool("ignore-missing") 39 | if err != nil { 40 | return debugPrint(ui, errs.New("Error in --ignore-missing option", errs.WithCause(err))) 41 | } 42 | quietFlag, err := cmd.Flags().GetBool("quiet") 43 | if err != nil { 44 | return debugPrint(ui, errs.New("Error in --quiet option", errs.WithCause(err))) 45 | } 46 | 47 | //Input stream 48 | inp := "-" 49 | r := ui.Reader() 50 | if len(args) > 0 && args[0] != inp { 51 | inp = args[0] 52 | file, err := os.Open(inp) 53 | if err != nil { 54 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 55 | } 56 | defer file.Close() 57 | r = file 58 | } 59 | 60 | //Run command 61 | var lastError error 62 | if checkerFlag { 63 | checkers, err := hash.NewCheckers(r, alg) 64 | if err != nil { 65 | return debugPrint(ui, errs.Wrap(lastError, errs.WithContext("algorithm", alg.String()), errs.WithContext("file", inp))) 66 | } 67 | lastError = hashChecks(checkers, ui, ignoreMissingFlag, quietFlag) 68 | if hashValidCount(checkers) == 0 { 69 | lastError = errs.New(fmt.Sprintf("%s: no file was verified", inp), errs.WithContext("algorithm", alg.String()), errs.WithContext("file", inp)) 70 | } 71 | } else { 72 | res, err := newHashValue(alg, r, inp) 73 | if err != nil { 74 | return debugPrint(ui, errs.Wrap(lastError, errs.WithContext("algorithm", alg.String()), errs.WithContext("file", inp))) 75 | } 76 | lastError = ui.Outputln(res.String()) 77 | } 78 | return debugPrint(ui, errs.Wrap(lastError, errs.WithContext("algorithm", alg.String()), errs.WithContext("file", inp))) 79 | }, 80 | } 81 | hashCmd.Flags().StringP("algorithm", "a", "SHA-256", "hash algorithm") 82 | hashCmd.Flags().BoolP("check", "c", false, "don't fail or report status for missing files") 83 | hashCmd.Flags().BoolP("ignore-missing", "", false, "don't fail or report status for missing files (with check option)") 84 | hashCmd.Flags().BoolP("quiet", "", false, "don't print OK for each successfully verified file (with check option)") 85 | 86 | return hashCmd 87 | } 88 | 89 | type hashValue struct { 90 | alg crypto.Hash 91 | path string 92 | value []byte 93 | } 94 | 95 | func newHashValue(alg crypto.Hash, r io.Reader, path string) (*hashValue, error) { 96 | value, err := hash.Value(alg, r) 97 | if err != nil { 98 | return nil, errs.Wrap(err, errs.WithContext("algorithm", alg.String())) 99 | } 100 | return &hashValue{alg: alg, path: path, value: value}, nil 101 | } 102 | 103 | func (hv *hashValue) hashString() string { 104 | if hv == nil { 105 | return "" 106 | } 107 | return fmt.Sprintf("%x", hv.value) 108 | } 109 | 110 | func (hv *hashValue) String() string { 111 | if hv == nil { 112 | return "" 113 | } 114 | return fmt.Sprintf("%v %s", hv.hashString(), hv.path) 115 | } 116 | 117 | type warn struct { 118 | count int 119 | err error 120 | } 121 | 122 | func (w warn) Error() string { 123 | if w.count > 1 { 124 | return fmt.Sprintf("WARNING in %d items: %v", w.count, w.err) 125 | } 126 | return fmt.Sprintf("Warning in %d item: %v", w.count, w.err) 127 | } 128 | 129 | func appendHashError(wlist []*warn, err error) []*warn { 130 | if len(wlist) == 0 { 131 | return append(wlist, &warn{count: 1, err: err}) 132 | } 133 | for i := 0; i < len(wlist); i++ { 134 | if errs.Is(err, wlist[i].err) { 135 | wlist[i].count++ 136 | return wlist 137 | } 138 | } 139 | return append(wlist, &warn{count: 1, err: err}) 140 | } 141 | 142 | func hashChecks(checkers []hash.Checker, ui *rwi.RWI, ignoreMissingFlag, quietFlag bool) error { 143 | wlist := []*warn{} 144 | var lastError error 145 | for _, chkr := range checkers { 146 | err := chkr.Check() 147 | if err != nil { 148 | switch true { 149 | case errs.Is(err, syscall.ENOENT): 150 | wlist = appendHashError(wlist, syscall.ENOENT) 151 | if !ignoreMissingFlag { 152 | lastError = ui.OutputErrln(err) 153 | } 154 | case errs.Is(err, ecode.ErrUnmatchHashString): 155 | wlist = appendHashError(wlist, ecode.ErrUnmatchHashString) 156 | lastError = ui.Outputln(fmt.Sprintf("%s: FAILED", chkr.Path())) 157 | default: 158 | wlist = appendHashError(wlist, err) 159 | lastError = ui.OutputErrln(err) 160 | } 161 | } else if !quietFlag { 162 | lastError = ui.Outputln(fmt.Sprintf("%s: OK", chkr.Path())) 163 | } 164 | if lastError != nil { 165 | return lastError 166 | } 167 | } 168 | for _, w := range wlist { 169 | if err := ui.Outputln(w); err != nil { 170 | return err 171 | } 172 | } 173 | return nil 174 | } 175 | 176 | func hashValidCount(checkers []hash.Checker) int { 177 | count := 0 178 | for _, chk := range checkers { 179 | if chk.Err() == nil { 180 | count++ 181 | } 182 | } 183 | return count 184 | } 185 | 186 | /* Copyright 2021 Spiegel 187 | * 188 | * Licensed under the Apache License, Version 2.0 (the "License"); 189 | * you may not use this file except in compliance with the License. 190 | * You may obtain a copy of the License at 191 | * 192 | * http://www.apache.org/licenses/LICENSE-2.0 193 | * 194 | * Unless required by applicable law or agreed to in writing, software 195 | * distributed under the License is distributed on an "AS IS" BASIS, 196 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 197 | * See the License for the specific language governing permissions and 198 | * limitations under the License. 199 | */ 200 | -------------------------------------------------------------------------------- /facade/kana.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/kana" 11 | "github.com/goark/gnkf/newline" 12 | "github.com/goark/gnkf/rbom" 13 | "github.com/goark/gocli/rwi" 14 | "github.com/spf13/cobra" 15 | ) 16 | 17 | var descriptionKana = `Convert kana characters in the text. 18 | UTF-8 encoding only. 19 | "hiragana" and "katakana" forms are valid only for full-width kana character.` 20 | 21 | //newNormCmd returns cobra.Command instance for show sub-command 22 | func newKanaCmd(ui *rwi.RWI) *cobra.Command { 23 | kanaCmd := &cobra.Command{ 24 | Use: "kana", 25 | Aliases: []string{"k"}, 26 | Short: "Convert kana characters in the text", 27 | Long: descriptionKana, 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | //Options 30 | inp, err := cmd.Flags().GetString("file") 31 | if err != nil { 32 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 33 | } 34 | out, err := cmd.Flags().GetString("output") 35 | if err != nil { 36 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 37 | } 38 | formName, err := cmd.Flags().GetString("conversion-form") 39 | if err != nil { 40 | return debugPrint(ui, errs.New("Error in --conversion-form option", errs.WithCause(err))) 41 | } 42 | form, err := kana.FormOf(formName) 43 | if err != nil { 44 | return debugPrint(ui, err) 45 | } 46 | foldFlag, err := cmd.Flags().GetBool("fold") 47 | if err != nil { 48 | return debugPrint(ui, errs.New("Error in --fold option", errs.WithCause(err))) 49 | } 50 | rbFlag, err := cmd.Flags().GetBool("remove-bom") 51 | if err != nil { 52 | return debugPrint(ui, errs.New("Error in --remove-bom option", errs.WithCause(err))) 53 | } 54 | 55 | //Input stream 56 | r := ui.Reader() 57 | if len(inp) > 0 { 58 | file, err := os.Open(inp) 59 | if err != nil { 60 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 61 | } 62 | defer file.Close() 63 | r = file 64 | } 65 | 66 | //Output stream 67 | w := ui.Writer() 68 | if len(out) > 0 { 69 | file, err := os.Create(out) 70 | if err != nil { 71 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 72 | } 73 | defer file.Close() 74 | w = file 75 | } 76 | 77 | //Remove BOM 78 | if rbFlag { 79 | b, err := rbom.RemoveBom(r) 80 | if err != nil { 81 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 82 | } 83 | r = bytes.NewReader(b) 84 | } 85 | 86 | //Run command 87 | if err := kana.Convert(form, w, r, foldFlag); err != nil { 88 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 89 | } 90 | return nil 91 | }, 92 | } 93 | kanaCmd.Flags().StringP("file", "f", "", "path of input text file") 94 | _ = kanaCmd.MarkFlagFilename("file") 95 | kanaCmd.Flags().StringP("output", "o", "", "path of output file") 96 | _ = kanaCmd.MarkFlagFilename("output") 97 | kanaCmd.Flags().StringP("conversion-form", "c", "katakana", fmt.Sprintf("conversion form: [%s]", strings.Join(kana.FormList(), "|"))) 98 | _ = kanaCmd.RegisterFlagCompletionFunc("conversion-form", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { 99 | return newline.FormList(), cobra.ShellCompDirectiveNoFileComp 100 | }) 101 | kanaCmd.Flags().BoolP("fold", "", false, "convert character width by fold form") 102 | kanaCmd.Flags().BoolP("remove-bom", "b", false, "remove BOM character") 103 | 104 | return kanaCmd 105 | } 106 | 107 | /* Copyright 2020-2021 Spiegel 108 | * 109 | * Licensed under the Apache License, Version 2.0 (the "License"); 110 | * you may not use this file except in compliance with the License. 111 | * You may obtain a copy of the License at 112 | * 113 | * http://www.apache.org/licenses/LICENSE-2.0 114 | * 115 | * Unless required by applicable law or agreed to in writing, software 116 | * distributed under the License is distributed on an "AS IS" BASIS, 117 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 118 | * See the License for the specific language governing permissions and 119 | * limitations under the License. 120 | */ 121 | -------------------------------------------------------------------------------- /facade/newline.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/gnkf/newline" 10 | "github.com/goark/gocli/rwi" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | //newNormCmd returns cobra.Command instance for show sub-command 15 | func newNwlnCmd(ui *rwi.RWI) *cobra.Command { 16 | nwlnCmd := &cobra.Command{ 17 | Use: "newline", 18 | Aliases: []string{"nwln", "nl"}, 19 | Short: "Convert newline form in the text", 20 | Long: "Convert newline form in the text.", 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | //Options 23 | inp, err := cmd.Flags().GetString("file") 24 | if err != nil { 25 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 26 | } 27 | out, err := cmd.Flags().GetString("output") 28 | if err != nil { 29 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 30 | } 31 | form, err := cmd.Flags().GetString("newline-form") 32 | if err != nil { 33 | return debugPrint(ui, errs.New("Error in --newline-form option", errs.WithCause(err))) 34 | } 35 | 36 | //Input stream 37 | r := ui.Reader() 38 | if len(inp) > 0 { 39 | file, err := os.Open(inp) 40 | if err != nil { 41 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 42 | } 43 | defer file.Close() 44 | r = file 45 | } 46 | 47 | //Output stream 48 | w := ui.Writer() 49 | if len(out) > 0 { 50 | file, err := os.Create(out) 51 | if err != nil { 52 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 53 | } 54 | defer file.Close() 55 | w = file 56 | } 57 | 58 | //Run command 59 | if err := newline.Convert(form, w, r); err != nil { 60 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 61 | } 62 | return nil 63 | }, 64 | } 65 | nwlnCmd.Flags().StringP("file", "f", "", "path of input text file") 66 | _ = nwlnCmd.MarkFlagFilename("file") 67 | nwlnCmd.Flags().StringP("output", "o", "", "path of output file") 68 | _ = nwlnCmd.MarkFlagFilename("output") 69 | nwlnCmd.Flags().StringP("newline-form", "n", "lf", fmt.Sprintf("newline form: [%s]", strings.Join(newline.FormList(), "|"))) 70 | _ = nwlnCmd.RegisterFlagCompletionFunc("newline-form", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { 71 | return newline.FormList(), cobra.ShellCompDirectiveNoFileComp 72 | }) 73 | 74 | return nwlnCmd 75 | } 76 | 77 | /* Copyright 2020-2021 Spiegel 78 | * 79 | * Licensed under the Apache License, Version 2.0 (the "License"); 80 | * you may not use this file except in compliance with the License. 81 | * You may obtain a copy of the License at 82 | * 83 | * http://www.apache.org/licenses/LICENSE-2.0 84 | * 85 | * Unless required by applicable law or agreed to in writing, software 86 | * distributed under the License is distributed on an "AS IS" BASIS, 87 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 88 | * See the License for the specific language governing permissions and 89 | * limitations under the License. 90 | */ 91 | -------------------------------------------------------------------------------- /facade/nrm.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/nrm" 11 | "github.com/goark/gnkf/rbom" 12 | "github.com/goark/gocli/rwi" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | //newNormCmd returns cobra.Command instance for show sub-command 17 | func newNormCmd(ui *rwi.RWI) *cobra.Command { 18 | normCmd := &cobra.Command{ 19 | Use: "norm", 20 | Aliases: []string{"normalize", "nrm", "nm"}, 21 | Short: "Unicode normalization of the text", 22 | Long: "Unicode normalization of the text (UTF-8 encoding only).", 23 | RunE: func(cmd *cobra.Command, args []string) error { 24 | //Options 25 | inp, err := cmd.Flags().GetString("file") 26 | if err != nil { 27 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 28 | } 29 | out, err := cmd.Flags().GetString("output") 30 | if err != nil { 31 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 32 | } 33 | form, err := cmd.Flags().GetString("norm-form") 34 | if err != nil { 35 | return debugPrint(ui, errs.New("Error in --norm-form option", errs.WithCause(err))) 36 | } 37 | krFlag, err := cmd.Flags().GetBool("kangxi-radicals") 38 | if err != nil { 39 | return debugPrint(ui, errs.New("Error in --kangxi-radicals option", errs.WithCause(err))) 40 | } 41 | rbFlag, err := cmd.Flags().GetBool("remove-bom") 42 | if err != nil { 43 | return debugPrint(ui, errs.New("Error in --remove-bom option", errs.WithCause(err))) 44 | } 45 | 46 | //Input stream 47 | r := ui.Reader() 48 | if len(inp) > 0 { 49 | file, err := os.Open(inp) 50 | if err != nil { 51 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 52 | } 53 | defer file.Close() 54 | r = file 55 | } 56 | 57 | //Output stream 58 | w := ui.Writer() 59 | if len(out) > 0 { 60 | file, err := os.Create(out) 61 | if err != nil { 62 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 63 | } 64 | defer file.Close() 65 | w = file 66 | } 67 | 68 | //Remove BOM 69 | if rbFlag { 70 | b, err := rbom.RemoveBom(r) 71 | if err != nil { 72 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 73 | } 74 | r = bytes.NewReader(b) 75 | } 76 | 77 | //Run command 78 | if err := nrm.Normalize(form, w, r, krFlag); err != nil { 79 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 80 | } 81 | return nil 82 | }, 83 | } 84 | normCmd.Flags().StringP("file", "f", "", "path of input text file") 85 | _ = normCmd.MarkFlagFilename("file") 86 | normCmd.Flags().StringP("output", "o", "", "path of output file") 87 | _ = normCmd.MarkFlagFilename("output") 88 | normCmd.Flags().StringP("norm-form", "n", "nfc", fmt.Sprintf("Unicode normalization form: [%s]", strings.Join(nrm.FormList(), "|"))) 89 | _ = normCmd.RegisterFlagCompletionFunc("norm-form", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { 90 | return nrm.FormList(), cobra.ShellCompDirectiveDefault 91 | }) 92 | normCmd.Flags().BoolP("kangxi-radicals", "k", false, "normalize kangxi radicals only (with nfkc or nfkd form)") 93 | normCmd.Flags().BoolP("remove-bom", "b", false, "remove BOM character") 94 | 95 | return normCmd 96 | } 97 | 98 | /* Copyright 2020-2021 Spiegel 99 | * 100 | * Licensed under the Apache License, Version 2.0 (the "License"); 101 | * you may not use this file except in compliance with the License. 102 | * You may obtain a copy of the License at 103 | * 104 | * http://www.apache.org/licenses/LICENSE-2.0 105 | * 106 | * Unless required by applicable law or agreed to in writing, software 107 | * distributed under the License is distributed on an "AS IS" BASIS, 108 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 109 | * See the License for the specific language governing permissions and 110 | * limitations under the License. 111 | */ 112 | -------------------------------------------------------------------------------- /facade/rbom.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "os" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/gnkf/rbom" 10 | "github.com/goark/gocli/rwi" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | //newNormCmd returns cobra.Command instance for show sub-command 15 | func newRemoveBomCmd(ui *rwi.RWI) *cobra.Command { 16 | rbomCmd := &cobra.Command{ 17 | Use: "remove-bom", 18 | Aliases: []string{"rbom", "rb"}, 19 | Short: "Remove BOM character in UTF-8 string", 20 | Long: "Remove BOM character in UTF-8 string.", 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | //Options 23 | inp, err := cmd.Flags().GetString("file") 24 | if err != nil { 25 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 26 | } 27 | out, err := cmd.Flags().GetString("output") 28 | if err != nil { 29 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 30 | } 31 | 32 | //Input stream 33 | r := ui.Reader() 34 | if len(inp) > 0 { 35 | file, err := os.Open(inp) 36 | if err != nil { 37 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 38 | } 39 | defer file.Close() 40 | r = file 41 | } 42 | 43 | //Output stream 44 | w := ui.Writer() 45 | if len(out) > 0 { 46 | file, err := os.Create(out) 47 | if err != nil { 48 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 49 | } 50 | defer file.Close() 51 | w = file 52 | } 53 | 54 | //Run command 55 | b, err := rbom.RemoveBom(r) 56 | if err != nil { 57 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 58 | } 59 | if _, err := io.Copy(w, bytes.NewReader(b)); err != nil { 60 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 61 | } 62 | return nil 63 | }, 64 | } 65 | rbomCmd.Flags().StringP("file", "f", "", "path of input text file") 66 | _ = rbomCmd.MarkFlagFilename("file") 67 | rbomCmd.Flags().StringP("output", "o", "", "path of output file") 68 | _ = rbomCmd.MarkFlagFilename("output") 69 | 70 | return rbomCmd 71 | } 72 | 73 | /* Copyright 2020-2021 Spiegel 74 | * 75 | * Licensed under the Apache License, Version 2.0 (the "License"); 76 | * you may not use this file except in compliance with the License. 77 | * You may obtain a copy of the License at 78 | * 79 | * http://www.apache.org/licenses/LICENSE-2.0 80 | * 81 | * Unless required by applicable law or agreed to in writing, software 82 | * distributed under the License is distributed on an "AS IS" BASIS, 83 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 84 | * See the License for the specific language governing permissions and 85 | * limitations under the License. 86 | */ 87 | -------------------------------------------------------------------------------- /facade/version.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/goark/gocli/rwi" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | var ( 11 | usage = []string{ //output message of version 12 | Name + " " + Version, 13 | "repository: https://github.com/goark/gnkf", 14 | } 15 | ) 16 | 17 | //newVersionCmd returns cobra.Command instance for show sub-command 18 | func newVersionCmd(ui *rwi.RWI) *cobra.Command { 19 | versionCmd := &cobra.Command{ 20 | Use: "version", 21 | Short: "Print the version number", 22 | Long: "Print the version number of " + Name, 23 | RunE: func(cmd *cobra.Command, args []string) error { 24 | return ui.OutputErrln(strings.Join(usage, "\n")) 25 | }, 26 | } 27 | 28 | return versionCmd 29 | } 30 | 31 | /* Copyright 2020 Spiegel 32 | * 33 | * Licensed under the Apache License, Version 2.0 (the "License"); 34 | * you may not use this file except in compliance with the License. 35 | * You may obtain a copy of the License at 36 | * 37 | * http://www.apache.org/licenses/LICENSE-2.0 38 | * 39 | * Unless required by applicable law or agreed to in writing, software 40 | * distributed under the License is distributed on an "AS IS" BASIS, 41 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 42 | * See the License for the specific language governing permissions and 43 | * limitations under the License. 44 | */ 45 | -------------------------------------------------------------------------------- /facade/version_test.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/goark/gocli/exitcode" 8 | "github.com/goark/gocli/rwi" 9 | ) 10 | 11 | func TestVersionNormal(t *testing.T) { 12 | testCases := []struct { 13 | args []string 14 | out string 15 | outErr string 16 | }{ 17 | {args: []string{"version"}, out: "", outErr: "gnkf developer version\nrepository: https://github.com/goark/gnkf\n"}, 18 | } 19 | 20 | for _, tc := range testCases { 21 | out := new(bytes.Buffer) 22 | errOut := new(bytes.Buffer) 23 | ui := rwi.New( 24 | rwi.WithWriter(out), 25 | rwi.WithErrorWriter(errOut), 26 | ) 27 | exit := Execute(ui, tc.args) 28 | if exit != exitcode.Normal { 29 | t.Errorf("Execute() err = \"%v\", want \"%v\".", exit, exitcode.Normal) 30 | } 31 | if out.String() != tc.out { 32 | t.Errorf("Execute() Stdout = \"%v\", want \"%v\".", out.String(), tc.out) 33 | } 34 | if errOut.String() != tc.outErr { 35 | t.Errorf("Execute() Stderr = \"%v\", want \"%v\".", errOut.String(), tc.outErr) 36 | } 37 | } 38 | } 39 | 40 | /* Copyright 2019 Spiegel 41 | * 42 | * Licensed under the Apache License, Version 2.0 (the "License"); 43 | * you may not use this file except in compliance with the License. 44 | * You may obtain a copy of the License at 45 | * 46 | * http://www.apache.org/licenses/LICENSE-2.0 47 | * 48 | * Unless required by applicable law or agreed to in writing, software 49 | * distributed under the License is distributed on an "AS IS" BASIS, 50 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 51 | * See the License for the specific language governing permissions and 52 | * limitations under the License. 53 | */ 54 | -------------------------------------------------------------------------------- /facade/width.go: -------------------------------------------------------------------------------- 1 | package facade 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/rbom" 11 | "github.com/goark/gnkf/width" 12 | "github.com/goark/gocli/rwi" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | //newNormCmd returns cobra.Command instance for show sub-command 17 | func newWidthCmd(ui *rwi.RWI) *cobra.Command { 18 | widthCmd := &cobra.Command{ 19 | Use: "width", 20 | Aliases: []string{"wdth", "w"}, 21 | Short: "Convert character width in the text", 22 | Long: "Convert character width in the text (UTF-8 encoding only).", 23 | RunE: func(cmd *cobra.Command, args []string) error { 24 | //Options 25 | inp, err := cmd.Flags().GetString("file") 26 | if err != nil { 27 | return debugPrint(ui, errs.New("Error in --file option", errs.WithCause(err))) 28 | } 29 | out, err := cmd.Flags().GetString("output") 30 | if err != nil { 31 | return debugPrint(ui, errs.New("Error in --output option", errs.WithCause(err))) 32 | } 33 | form, err := cmd.Flags().GetString("conversion-form") 34 | if err != nil { 35 | return debugPrint(ui, errs.New("Error in --conversion-form option", errs.WithCause(err))) 36 | } 37 | rbFlag, err := cmd.Flags().GetBool("remove-bom") 38 | if err != nil { 39 | return debugPrint(ui, errs.New("Error in --remove-bom option", errs.WithCause(err))) 40 | } 41 | 42 | //Input stream 43 | r := ui.Reader() 44 | if len(inp) > 0 { 45 | file, err := os.Open(inp) 46 | if err != nil { 47 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp))) 48 | } 49 | defer file.Close() 50 | r = file 51 | } 52 | 53 | //Output stream 54 | w := ui.Writer() 55 | if len(out) > 0 { 56 | file, err := os.Create(out) 57 | if err != nil { 58 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("output", out))) 59 | } 60 | defer file.Close() 61 | w = file 62 | } 63 | 64 | //Remove BOM 65 | if rbFlag { 66 | b, err := rbom.RemoveBom(r) 67 | if err != nil { 68 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 69 | } 70 | r = bytes.NewReader(b) 71 | } 72 | 73 | //Run command 74 | if err := width.Convert(form, w, r); err != nil { 75 | return debugPrint(ui, errs.Wrap(err, errs.WithContext("file", inp), errs.WithContext("output", out))) 76 | } 77 | return nil 78 | }, 79 | } 80 | widthCmd.Flags().StringP("file", "f", "", "path of input text file") 81 | _ = widthCmd.MarkFlagFilename("file") 82 | widthCmd.Flags().StringP("output", "o", "", "path of output file") 83 | _ = widthCmd.MarkFlagFilename("output") 84 | widthCmd.Flags().StringP("conversion-form", "c", "fold", fmt.Sprintf("conversion form: [%s]", strings.Join(width.FormList(), "|"))) 85 | _ = widthCmd.RegisterFlagCompletionFunc("conversion-form", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { 86 | return width.FormList(), cobra.ShellCompDirectiveDefault 87 | }) 88 | widthCmd.Flags().BoolP("remove-bom", "b", false, "remove BOM character") 89 | 90 | return widthCmd 91 | } 92 | 93 | /* Copyright 2020-2021 Spiegel 94 | * 95 | * Licensed under the Apache License, Version 2.0 (the "License"); 96 | * you may not use this file except in compliance with the License. 97 | * You may obtain a copy of the License at 98 | * 99 | * http://www.apache.org/licenses/LICENSE-2.0 100 | * 101 | * Unless required by applicable law or agreed to in writing, software 102 | * distributed under the License is distributed on an "AS IS" BASIS, 103 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 104 | * See the License for the specific language governing permissions and 105 | * limitations under the License. 106 | */ 107 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/goark/gnkf 2 | 3 | go 1.24 4 | 5 | toolchain go1.24.2 6 | 7 | require ( 8 | github.com/goark/csvdata v0.7.3 9 | github.com/goark/errs v1.3.2 10 | github.com/goark/gocli v0.13.0 11 | github.com/goark/kkconv v0.3.3 12 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d 13 | github.com/spf13/cobra v1.9.1 14 | golang.org/x/crypto v0.37.0 15 | golang.org/x/text v0.24.0 16 | ) 17 | 18 | require ( 19 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 20 | github.com/spf13/pflag v1.0.6 // indirect 21 | ) 22 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 2 | github.com/goark/csvdata v0.7.3 h1:IkWPbaeIEVH6jEw0G18OTEPaE4s8RQN8a2k2ooDFx2g= 3 | github.com/goark/csvdata v0.7.3/go.mod h1:vhn11zhff8ORS4ZkiTJv/EHXOQqk29jKGo7MYL8Rp0I= 4 | github.com/goark/errs v1.3.2 h1:ifccNe1aK7Xezt4XVYwHUqalmnfhuphnEvh3FshCReQ= 5 | github.com/goark/errs v1.3.2/go.mod h1:ZsQucxaDFVfSB8I99j4bxkDRfNOrlKINwg72QMuRWKw= 6 | github.com/goark/gocli v0.13.0 h1:hR/5E4JGMEcbQxkSqR7K/0XnYY2Hd6GDpuazXGC3jn4= 7 | github.com/goark/gocli v0.13.0/go.mod h1:pFYWXAXZ5G4QqPcXsDTSFbCuVg0qO40NYkp2XKthc18= 8 | github.com/goark/kkconv v0.3.3 h1:Cv9XBLbsqMEYRP2RKHOj9nxDxzLuEPgPn0/SJbi7N1U= 9 | github.com/goark/kkconv v0.3.3/go.mod h1:L16PcMzduVmw1ywyqFcpRhqadX75MT/af4RE2CSI210= 10 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 11 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 12 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 13 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= 14 | github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 15 | github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo= 16 | github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0= 17 | github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= 18 | github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 19 | golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 20 | golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 21 | golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 22 | golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 23 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 24 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 25 | -------------------------------------------------------------------------------- /guess/example_test.go: -------------------------------------------------------------------------------- 1 | package guess_test 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/goark/gnkf/guess" 8 | ) 9 | 10 | func ExampleEncoding() { 11 | elist, err := guess.Encoding(strings.NewReader("こんにちは,世界!\n私の名前は Spiegel です。")) 12 | if err != nil { 13 | fmt.Println(err) 14 | return 15 | } 16 | fmt.Println(strings.Join(elist, ",")) 17 | //Output: 18 | //UTF-8,windows-1252,windows-1253,Shift_JIS,windows-1255 19 | } 20 | 21 | func ExampleEncodingBytes() { 22 | elist, err := guess.EncodingBytes([]byte("こんにちは,世界!\n私の名前は Spiegel です。")) 23 | if err != nil { 24 | fmt.Println(err) 25 | return 26 | } 27 | fmt.Println(strings.Join(elist, ",")) 28 | //Output: 29 | //UTF-8,windows-1252,windows-1253,Shift_JIS,windows-1255 30 | } 31 | 32 | /* Copyright 2020 Spiegel 33 | * 34 | * Licensed under the Apache License, Version 2.0 (the "License"); 35 | * you may not use this file except in compliance with the License. 36 | * You may obtain a copy of the License at 37 | * 38 | * http://www.apache.org/licenses/LICENSE-2.0 39 | * 40 | * Unless required by applicable law or agreed to in writing, software 41 | * distributed under the License is distributed on an "AS IS" BASIS, 42 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 43 | * See the License for the specific language governing permissions and 44 | * limitations under the License. 45 | */ 46 | -------------------------------------------------------------------------------- /guess/guess.go: -------------------------------------------------------------------------------- 1 | package guess 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "sort" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/gnkf/ecode" 10 | "github.com/saintfish/chardet" 11 | ) 12 | 13 | //Encoding detects guesses of character encoding name from byte stream 14 | func Encoding(txt io.Reader) ([]string, error) { 15 | if txt == nil { 16 | return nil, errs.Wrap(ecode.ErrNullPointer) 17 | } 18 | buf := &bytes.Buffer{} 19 | if _, err := buf.ReadFrom(txt); err != nil { 20 | return nil, errs.Wrap(err) 21 | } 22 | return EncodingBytes(buf.Bytes()) 23 | } 24 | 25 | //EncodingBytes detects guesses of character encoding name from byte array 26 | func EncodingBytes(b []byte) ([]string, error) { 27 | all, err := chardet.NewTextDetector().DetectAll(b) 28 | if err != nil { 29 | return nil, errs.Wrap(ecode.ErrCannotDetect, errs.WithCause(err)) 30 | } 31 | sort.SliceStable(all, func(i, j int) bool { 32 | if all[i].Confidence != all[j].Confidence { 33 | return all[i].Confidence > all[j].Confidence 34 | } 35 | return all[i].Charset < all[j].Charset 36 | }) 37 | ss := []string{} 38 | for _, r := range all { 39 | ss = append(ss, r.Charset) 40 | } 41 | return ss, nil 42 | } 43 | 44 | /* Copyright 2020 Spiegel 45 | * 46 | * Licensed under the Apache License, Version 2.0 (the "License"); 47 | * you may not use this file except in compliance with the License. 48 | * You may obtain a copy of the License at 49 | * 50 | * http://www.apache.org/licenses/LICENSE-2.0 51 | * 52 | * Unless required by applicable law or agreed to in writing, software 53 | * distributed under the License is distributed on an "AS IS" BASIS, 54 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 55 | * See the License for the specific language governing permissions and 56 | * limitations under the License. 57 | */ 58 | -------------------------------------------------------------------------------- /guess/guess_test.go: -------------------------------------------------------------------------------- 1 | package guess 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/ecode" 11 | ) 12 | 13 | var ( 14 | textUTF8 = []byte("こんにちは,世界!\n私の名前は Spiegel です。") 15 | textSJIS = []byte{0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49, 0x0a, 0x8e, 0x84, 0x82, 0xcc, 0x96, 0xbc, 0x91, 0x4f, 0x82, 0xcd, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0x82, 0xc5, 0x82, 0xb7, 0x81, 0x42} 16 | textEUC = []byte{0xa4, 0xb3, 0xa4, 0xf3, 0xa4, 0xcb, 0xa4, 0xc1, 0xa4, 0xcf, 0xa1, 0xa4, 0xc0, 0xa4, 0xb3, 0xa6, 0xa1, 0xaa, 0x0a, 0xbb, 0xe4, 0xa4, 0xce, 0xcc, 0xbe, 0xc1, 0xb0, 0xa4, 0xcf, 0x20, 0x53, 0x70, 0x69, 0x65, 0x67, 0x65, 0x6c, 0x20, 0xa4, 0xc7, 0xa4, 0xb9, 0xa1, 0xa3} 17 | testCases = []struct { 18 | text []byte 19 | res string 20 | err error 21 | }{ 22 | {text: textUTF8, res: "UTF-8,windows-1252,windows-1253,Shift_JIS,windows-1255", err: nil}, 23 | {text: textSJIS, res: "Shift_JIS,windows-1252,Big5,GB-18030,KOI8-R", err: nil}, 24 | {text: textEUC, res: "EUC-JP,Big5,GB-18030,ISO-8859-7,EUC-KR,Shift_JIS,ISO-8859-1", err: nil}, 25 | {text: []byte{0xff}, res: "", err: ecode.ErrCannotDetect}, 26 | {text: nil, res: "UTF-8", err: nil}, 27 | } 28 | ) 29 | 30 | func TestEncodingBytes(t *testing.T) { 31 | for _, tc := range testCases { 32 | res, err := EncodingBytes(tc.text) 33 | if !errs.Is(err, tc.err) { 34 | t.Errorf("EncodingBytes() error = \"%+v\", want \"%+v\".", err, tc.err) 35 | } 36 | str := strings.Join(res, ",") 37 | if str != tc.res { 38 | t.Errorf("EncodingBytes() = \"%v\", want \"%v\".", str, tc.res) 39 | } 40 | } 41 | } 42 | 43 | func TestEncoding(t *testing.T) { 44 | for _, tc := range testCases { 45 | res, err := Encoding(bytes.NewReader(tc.text)) 46 | if !errs.Is(err, tc.err) { 47 | t.Errorf("Encoding() error = \"%+v\", want \"%+v\".", err, tc.err) 48 | } 49 | str := strings.Join(res, ",") 50 | if str != tc.res { 51 | t.Errorf("Encoding() = \"%v\", want \"%v\".", str, tc.res) 52 | } 53 | } 54 | } 55 | 56 | func TestEncodingNil(t *testing.T) { 57 | _, err := Encoding(io.Reader(nil)) 58 | if !errs.Is(err, ecode.ErrNullPointer) { 59 | t.Errorf("Encoding() error = \"%+v\", want \"%+v\".", err, ecode.ErrNullPointer) 60 | } 61 | } 62 | 63 | /* Copyright 2020 Spiegel 64 | * 65 | * Licensed under the Apache License, Version 2.0 (the "License"); 66 | * you may not use this file except in compliance with the License. 67 | * You may obtain a copy of the License at 68 | * 69 | * http://www.apache.org/licenses/LICENSE-2.0 70 | * 71 | * Unless required by applicable law or agreed to in writing, software 72 | * distributed under the License is distributed on an "AS IS" BASIS, 73 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 74 | * See the License for the specific language governing permissions and 75 | * limitations under the License. 76 | */ 77 | -------------------------------------------------------------------------------- /guess/testdata/hello-euc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goark/gnkf/3ccbb0e62c44011c6838b64d486f6f1f65d1ed43/guess/testdata/hello-euc.txt -------------------------------------------------------------------------------- /guess/testdata/hello-sjis.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goark/gnkf/3ccbb0e62c44011c6838b64d486f6f1f65d1ed43/guess/testdata/hello-sjis.txt -------------------------------------------------------------------------------- /guess/testdata/hello-utf8.txt: -------------------------------------------------------------------------------- 1 | こんにちは,世界! 2 | 私の名前は Spiegel です。 3 | -------------------------------------------------------------------------------- /hash/algorithm.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "crypto" 5 | "strings" 6 | 7 | "github.com/goark/errs" 8 | "github.com/goark/gnkf/ecode" 9 | ) 10 | 11 | var algOrder = []crypto.Hash{ 12 | crypto.MD5, //require "crypto/md5" package 13 | crypto.SHA1, //require "crypto/sha1" package 14 | crypto.SHA224, //require "crypto/sha256" package 15 | crypto.SHA256, //require "crypto/sha256" package 16 | crypto.SHA384, //require "crypto/sha512" package 17 | crypto.SHA512, //require "crypto/sha512" package 18 | crypto.SHA512_224, //require "crypto/sha512" package 19 | crypto.SHA512_256, //require "crypto/sha512" package 20 | } 21 | 22 | //AlgorithmList returns string of hash functions list. 23 | func AlgorithmList(sep string) string { 24 | ss := []string{} 25 | for _, alg := range algOrder { 26 | if s := AlgoString(alg); len(s) > 0 { 27 | ss = append(ss, s) 28 | } 29 | } 30 | return strings.Join(ss, sep) 31 | } 32 | 33 | //Algorithm returns crypto.Hash from string. 34 | func Algorithm(s string) (crypto.Hash, error) { 35 | if len(s) == 0 { 36 | return crypto.Hash(0), errs.Wrap(ecode.ErrInvalidHashAlg, errs.WithContext("algorithm", s)) 37 | } 38 | for _, alg := range algOrder { 39 | if strings.EqualFold(AlgoString(alg), s) { 40 | return alg, nil 41 | } 42 | } 43 | return crypto.Hash(0), errs.Wrap(ecode.ErrInvalidHashAlg, errs.WithContext("algorithm", s)) 44 | } 45 | 46 | //AlgoString returns string of hash algorithm. 47 | func AlgoString(alg crypto.Hash) string { 48 | if alg.Available() { 49 | return alg.String() 50 | } 51 | return "" 52 | } 53 | 54 | /* Copyright 2021 Spiegel 55 | * 56 | * Licensed under the Apache License, Version 2.0 (the "License"); 57 | * you may not use this file except in compliance with the License. 58 | * You may obtain a copy of the License at 59 | * 60 | * http://www.apache.org/licenses/LICENSE-2.0 61 | * 62 | * Unless required by applicable law or agreed to in writing, software 63 | * distributed under the License is distributed on an "AS IS" BASIS, 64 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 65 | * See the License for the specific language governing permissions and 66 | * limitations under the License. 67 | */ 68 | -------------------------------------------------------------------------------- /hash/check.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "crypto" 5 | "fmt" 6 | "io" 7 | "os" 8 | "strings" 9 | 10 | "github.com/goark/errs" 11 | "github.com/goark/gnkf/ecode" 12 | ) 13 | 14 | //Check function returns true if computed hash value is match. 15 | func Check(alg crypto.Hash, r io.Reader, hashStr string) (bool, error) { 16 | v, err := Value(alg, r) 17 | if err != nil { 18 | return false, errs.Wrap(ecode.ErrInvalidHashAlg, errs.WithContext("algorithm", AlgoString(alg)), errs.WithContext("hash", hashStr)) 19 | } 20 | str := fmt.Sprintf("%x", v) 21 | if len(str) != len(hashStr) { 22 | return false, errs.Wrap(ecode.ErrImproperlyHashFormat, errs.WithContext("algorithm", AlgoString(alg)), errs.WithContext("hash", hashStr)) 23 | } 24 | if !strings.EqualFold(str, hashStr) { 25 | return false, nil 26 | } 27 | return true, nil 28 | } 29 | 30 | //Check function returns true if computed hash value is match. 31 | func CheckFile(alg crypto.Hash, path string, hashStr string) (bool, error) { 32 | file, err := os.Open(path) 33 | if err != nil { 34 | return false, errs.Wrap(err, errs.WithContext("algorithm", AlgoString(alg)), errs.WithContext("path", path), errs.WithContext("hash", hashStr)) 35 | } 36 | defer file.Close() 37 | return Check(alg, file, hashStr) 38 | } 39 | 40 | /* Copyright 2021 Spiegel 41 | * 42 | * Licensed under the Apache License, Version 2.0 (the "License"); 43 | * you may not use this file except in compliance with the License. 44 | * You may obtain a copy of the License at 45 | * 46 | * http://www.apache.org/licenses/LICENSE-2.0 47 | * 48 | * Unless required by applicable law or agreed to in writing, software 49 | * distributed under the License is distributed on an "AS IS" BASIS, 50 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 51 | * See the License for the specific language governing permissions and 52 | * limitations under the License. 53 | */ 54 | -------------------------------------------------------------------------------- /hash/checker.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "bufio" 5 | "crypto" 6 | "io" 7 | "strings" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/ecode" 11 | ) 12 | 13 | type Checker interface { 14 | Path() string 15 | Err() error 16 | Check() error 17 | } 18 | 19 | //NewCheckers returns list of Checker instances from io.Reader. 20 | func NewCheckers(r io.Reader, alg crypto.Hash) ([]Checker, error) { 21 | scanner := bufio.NewScanner(r) 22 | chks := []Checker{} 23 | for scanner.Scan() { 24 | line := strings.Fields(scanner.Text()) 25 | if len(line) < 2 { 26 | return chks, errs.Wrap(ecode.ErrInvalidChekerFormat) 27 | } 28 | if line[len(line)-1] != "-" { 29 | chks = append(chks, newChecker(alg, line[len(line)-1], line[0])) 30 | } 31 | } 32 | if err := scanner.Err(); err != nil { 33 | return chks, errs.Wrap(err) 34 | } 35 | return chks, nil 36 | } 37 | 38 | // checker is hash checker class. 39 | type checker struct { 40 | alg crypto.Hash 41 | path string 42 | hashStr string 43 | err error 44 | } 45 | 46 | func newChecker(alg crypto.Hash, path string, hashStr string) Checker { 47 | return &checker{alg: alg, path: path, hashStr: hashStr, err: nil} 48 | } 49 | 50 | //Path method returns path element in checker. 51 | func (c *checker) Path() string { return c.path } 52 | 53 | //Err method returns error element in checker. 54 | func (c *checker) Err() error { return c.err } 55 | 56 | //Check method checks hash code with checker info. 57 | func (c *checker) Check() error { 58 | if c == nil { 59 | return nil 60 | } 61 | if ok, err := CheckFile(c.alg, c.path, c.hashStr); err != nil { 62 | c.err = errs.Wrap(err, errs.WithContext("alg", c.alg.String()), errs.WithContext("path", c.path), errs.WithContext("hashStr", c.hashStr)) 63 | } else if !ok { 64 | c.err = errs.Wrap(ecode.ErrUnmatchHashString, errs.WithContext("alg", c.alg.String()), errs.WithContext("path", c.path), errs.WithContext("hashStr", c.hashStr)) 65 | } else { 66 | c.err = nil 67 | } 68 | return c.err 69 | } 70 | 71 | /* Copyright 2021 Spiegel 72 | * 73 | * Licensed under the Apache License, Version 2.0 (the "License"); 74 | * you may not use this file except in compliance with the License. 75 | * You may obtain a copy of the License at 76 | * 77 | * http://www.apache.org/licenses/LICENSE-2.0 78 | * 79 | * Unless required by applicable law or agreed to in writing, software 80 | * distributed under the License is distributed on an "AS IS" BASIS, 81 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 82 | * See the License for the specific language governing permissions and 83 | * limitations under the License. 84 | */ 85 | -------------------------------------------------------------------------------- /hash/checker_test.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "bytes" 5 | "crypto" 6 | "errors" 7 | "syscall" 8 | "testing" 9 | 10 | "github.com/goark/gnkf/ecode" 11 | ) 12 | 13 | const ( 14 | checkerFile0 = `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85a` 15 | checkerFile1 = `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85a not-exist.dat` 16 | checkerFile2 = `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 testdata/null.dat` 17 | checkerFile3 = `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85a testdata/null.dat` 18 | checkerFile4 = `e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85aa testdata/null.dat` 19 | ) 20 | 21 | func TestCheckerFile(t *testing.T) { 22 | testCases := []struct { 23 | alg crypto.Hash 24 | inp string 25 | err error 26 | }{ 27 | {alg: crypto.SHA256, inp: checkerFile0, err: ecode.ErrInvalidChekerFormat}, 28 | {alg: crypto.SHA256, inp: checkerFile1, err: syscall.ENOENT}, 29 | {alg: crypto.SHA256, inp: checkerFile2, err: nil}, 30 | {alg: crypto.SHA256, inp: checkerFile3, err: ecode.ErrUnmatchHashString}, 31 | {alg: crypto.SHA256, inp: checkerFile4, err: ecode.ErrImproperlyHashFormat}, 32 | } 33 | for _, tc := range testCases { 34 | checkers, err := NewCheckers(bytes.NewReader([]byte(tc.inp)), tc.alg) 35 | if err != nil { 36 | if !errors.Is(err, tc.err) { 37 | t.Errorf("NewCheckers() error = \"%+v\", want \"%+v\".", err, tc.err) 38 | } 39 | } else if len(checkers) < 1 { 40 | t.Errorf("count ofNewCheckers() are %d.", len(checkers)) 41 | } else { 42 | err := checkers[0].Check() 43 | if !errors.Is(err, tc.err) { 44 | t.Errorf("Checkers.Check() error = \"%+v\", want \"%+v\".", err, tc.err) 45 | } 46 | } 47 | } 48 | } 49 | 50 | /* Copyright 2021 Spiegel 51 | * 52 | * Licensed under the Apache License, Version 2.0 (the "License"); 53 | * you may not use this file except in compliance with the License. 54 | * You may obtain a copy of the License at 55 | * 56 | * http://www.apache.org/licenses/LICENSE-2.0 57 | * 58 | * Unless required by applicable law or agreed to in writing, software 59 | * distributed under the License is distributed on an "AS IS" BASIS, 60 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 61 | * See the License for the specific language governing permissions and 62 | * limitations under the License. 63 | */ 64 | -------------------------------------------------------------------------------- /hash/hash.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "crypto" 5 | "io" 6 | 7 | "github.com/goark/errs" 8 | "github.com/goark/gnkf/ecode" 9 | ) 10 | 11 | //Value returns hash value string from io.Reader 12 | func Value(alg crypto.Hash, r io.Reader) ([]byte, error) { 13 | if !alg.Available() { 14 | return nil, errs.Wrap(ecode.ErrInvalidHashAlg, errs.WithContext("algorithm", AlgoString(alg))) 15 | } 16 | h := alg.New() 17 | if _, err := io.Copy(h, r); err != nil { 18 | return nil, errs.Wrap(err, errs.WithContext("algorithm", AlgoString(alg))) 19 | } 20 | return h.Sum(nil), nil 21 | } 22 | 23 | //ValueFromBytes returns hash value string from []byte 24 | func ValueFromBytes(alg crypto.Hash, b []byte) ([]byte, error) { 25 | if !alg.Available() { 26 | return nil, errs.Wrap(ecode.ErrInvalidHashAlg, errs.WithContext("algorithm", AlgoString(alg))) 27 | } 28 | return alg.New().Sum(b), nil 29 | } 30 | 31 | /* Copyright 2021 Spiegel 32 | * 33 | * Licensed under the Apache License, Version 2.0 (the "License"); 34 | * you may not use this file except in compliance with the License. 35 | * You may obtain a copy of the License at 36 | * 37 | * http://www.apache.org/licenses/LICENSE-2.0 38 | * 39 | * Unless required by applicable law or agreed to in writing, software 40 | * distributed under the License is distributed on an "AS IS" BASIS, 41 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 42 | * See the License for the specific language governing permissions and 43 | * limitations under the License. 44 | */ 45 | -------------------------------------------------------------------------------- /hash/hash_test.go: -------------------------------------------------------------------------------- 1 | package hash 2 | 3 | import ( 4 | "crypto" 5 | _ "crypto/md5" 6 | _ "crypto/sha1" 7 | _ "crypto/sha256" 8 | _ "crypto/sha512" 9 | "errors" 10 | "fmt" 11 | "strings" 12 | "syscall" 13 | 14 | "testing" 15 | 16 | "github.com/goark/gnkf/ecode" 17 | ) 18 | 19 | func TestAlgorithmList(t *testing.T) { 20 | res := "MD5|SHA-1|SHA-224|SHA-256|SHA-384|SHA-512|SHA-512/224|SHA-512/256" 21 | str := AlgorithmList("|") 22 | if str != res { 23 | t.Errorf("AlgorithmList() = \"%+v\", want \"%+v\".", str, res) 24 | } 25 | } 26 | 27 | func TestAlgorithm(t *testing.T) { 28 | testCases := []struct { 29 | name string 30 | alg crypto.Hash 31 | err error 32 | }{ 33 | {name: "", alg: crypto.Hash(0), err: ecode.ErrInvalidHashAlg}, 34 | {name: "foo", alg: crypto.Hash(0), err: ecode.ErrInvalidHashAlg}, 35 | {name: "md5", alg: crypto.MD5, err: nil}, 36 | {name: "SHA-1", alg: crypto.SHA1, err: nil}, 37 | {name: "SHA-224", alg: crypto.SHA224, err: nil}, 38 | {name: "SHA-256", alg: crypto.SHA256, err: nil}, 39 | {name: "SHA-384", alg: crypto.SHA384, err: nil}, 40 | {name: "SHA-512", alg: crypto.SHA512, err: nil}, 41 | {name: "SHA-512/224", alg: crypto.SHA512_224, err: nil}, 42 | {name: "SHA-512/256", alg: crypto.SHA512_256, err: nil}, 43 | } 44 | for _, tc := range testCases { 45 | alg, err := Algorithm(tc.name) 46 | if !errors.Is(err, tc.err) { 47 | t.Errorf("Algorithm(%v) error = \"%+v\", want \"%+v\".", tc.name, err, tc.err) 48 | } 49 | if alg != tc.alg { 50 | t.Errorf("Algorithm(%v) = \"%+v\", want \"%+v\".", tc.name, alg.String(), tc.alg.String()) 51 | } 52 | } 53 | } 54 | 55 | func TestCheck(t *testing.T) { 56 | testCases := []struct { 57 | algName string 58 | hashStr string 59 | res bool 60 | err error 61 | }{ 62 | {algName: "", hashStr: "", res: false, err: ecode.ErrInvalidHashAlg}, 63 | {algName: "foo", hashStr: "", res: false, err: ecode.ErrInvalidHashAlg}, 64 | {algName: "md5", hashStr: "aa", res: false, err: ecode.ErrImproperlyHashFormat}, 65 | {algName: "md5", hashStr: "d41d8cd98f00b204e9800998ecf8427a", res: false, err: nil}, 66 | {algName: "md5", hashStr: "d41d8cd98f00b204e9800998ecf8427e", res: true, err: nil}, //see https://en.wikipedia.org/wiki/MD5 67 | {algName: "SHA-1", hashStr: "da39a3ee5e6b4b0d3255bfef95601890afd80709", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-1 68 | {algName: "SHA-224", hashStr: "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 69 | {algName: "SHA-256", hashStr: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 70 | {algName: "SHA-384", hashStr: "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 71 | {algName: "SHA-512", hashStr: "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 72 | {algName: "SHA-512/224", hashStr: "6ed0dd02806fa89e25de060c19d3ac86cabb87d6a0ddd05c333b84f4", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 73 | {algName: "SHA-512/256", hashStr: "c672b8d1ef56ed28ab87c3622c5114069bdd3ad7b8f9737498d0c01ecef0967a", res: true, err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 74 | } 75 | for _, tc := range testCases { 76 | alg, err := Algorithm(tc.algName) 77 | if err != nil { 78 | if !errors.Is(err, tc.err) { 79 | t.Errorf("Algorithm(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 80 | } 81 | } else { 82 | res, err := Check(alg, strings.NewReader(""), tc.hashStr) 83 | if res != tc.res { 84 | t.Errorf("Check(%v) \"%v\", want \"%v\".", tc.algName, res, tc.res) 85 | } 86 | if !errors.Is(err, tc.err) { 87 | t.Errorf("Value(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 88 | } 89 | } 90 | } 91 | } 92 | 93 | func TestCheckFile(t *testing.T) { 94 | testCases := []struct { 95 | algName string 96 | path string 97 | hashStr string 98 | res bool 99 | err error 100 | }{ 101 | {algName: "md5", path: "not-exist.dat", hashStr: "d41d8cd98f00b204e9800998ecf8427e", res: false, err: syscall.ENOENT}, 102 | {algName: "md5", path: "testdata/null.dat", hashStr: "d41d8cd98f00b204e9800998ecf8427e", res: true, err: nil}, //see https://en.wikipedia.org/wiki/MD5 103 | } 104 | for _, tc := range testCases { 105 | alg, err := Algorithm(tc.algName) 106 | if err != nil { 107 | if !errors.Is(err, tc.err) { 108 | t.Errorf("Algorithm(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 109 | } 110 | } else { 111 | res, err := CheckFile(alg, tc.path, tc.hashStr) 112 | if res != tc.res { 113 | t.Errorf("Check(%v) \"%v\", want \"%v\".", tc.algName, res, tc.res) 114 | } 115 | if !errors.Is(err, tc.err) { 116 | t.Errorf("Value(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 117 | } 118 | } 119 | } 120 | } 121 | 122 | func TestValueFromBytes(t *testing.T) { 123 | testCases := []struct { 124 | algName string 125 | hashStr string 126 | err error 127 | }{ 128 | {algName: "", hashStr: "", err: ecode.ErrInvalidHashAlg}, 129 | {algName: "foo", hashStr: "", err: ecode.ErrInvalidHashAlg}, 130 | {algName: "md5", hashStr: "d41d8cd98f00b204e9800998ecf8427e", err: nil}, //see https://en.wikipedia.org/wiki/MD5 131 | {algName: "SHA-1", hashStr: "da39a3ee5e6b4b0d3255bfef95601890afd80709", err: nil}, //see https://en.wikipedia.org/wiki/SHA-1 132 | {algName: "SHA-224", hashStr: "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 133 | {algName: "SHA-256", hashStr: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 134 | {algName: "SHA-384", hashStr: "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 135 | {algName: "SHA-512", hashStr: "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 136 | {algName: "SHA-512/224", hashStr: "6ed0dd02806fa89e25de060c19d3ac86cabb87d6a0ddd05c333b84f4", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 137 | {algName: "SHA-512/256", hashStr: "c672b8d1ef56ed28ab87c3622c5114069bdd3ad7b8f9737498d0c01ecef0967a", err: nil}, //see https://en.wikipedia.org/wiki/SHA-2 138 | } 139 | for _, tc := range testCases { 140 | alg, err := Algorithm(tc.algName) 141 | if !errors.Is(err, tc.err) { 142 | t.Errorf("Algorithm(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 143 | } else { 144 | 145 | if v, err := ValueFromBytes(alg, []byte("")); !errors.Is(err, tc.err) { 146 | t.Errorf("Value(%v) error = \"%+v\", want \"%+v\".", tc.algName, err, tc.err) 147 | } else { 148 | str := fmt.Sprintf("%x", v) 149 | if str != tc.hashStr { 150 | t.Errorf("Value(%v) \"%+v\", want \"%+v\".", tc.algName, str, tc.hashStr) 151 | } 152 | } 153 | 154 | } 155 | } 156 | } 157 | 158 | /* Copyright 2021 Spiegel 159 | * 160 | * Licensed under the Apache License, Version 2.0 (the "License"); 161 | * you may not use this file except in compliance with the License. 162 | * You may obtain a copy of the License at 163 | * 164 | * http://www.apache.org/licenses/LICENSE-2.0 165 | * 166 | * Unless required by applicable law or agreed to in writing, software 167 | * distributed under the License is distributed on an "AS IS" BASIS, 168 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 169 | * See the License for the specific language governing permissions and 170 | * limitations under the License. 171 | */ 172 | -------------------------------------------------------------------------------- /hash/testdata/null.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goark/gnkf/3ccbb0e62c44011c6838b64d486f6f1f65d1ed43/hash/testdata/null.dat -------------------------------------------------------------------------------- /kana/example_test.go: -------------------------------------------------------------------------------- 1 | package kana_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/goark/gnkf/kana" 7 | ) 8 | 9 | func ExampleConvertString() { 10 | txt := "あいうえおわゐゑをんゔゕゖゝゞアイウエオワヰヱヲンヴヵヶヽヾ" 11 | fmt.Println(kana.ConvertString(kana.Hiragana, txt, false)) 12 | fmt.Println(kana.ConvertString(kana.Katakana, txt, false)) 13 | fmt.Println(kana.ConvertString(kana.Chokuon, txt, false)) 14 | //Output: 15 | //あいうえおわゐゑをんゔゕゖゝゞあいうえおわゐゑをんゔゕゖゝゞ 16 | //アイウエオワヰヱヲンヴヵヶヽヾアイウエオワヰヱヲンヴヵヶヽヾ 17 | //あいうえおわゐゑをんゔかけゝゞアイウエオワヰヱヲンヴカケヽヾ 18 | } 19 | 20 | /* Copyright 2020-2021 Spiegel 21 | * 22 | * Licensed under the Apache License, Version 2.0 (the "License"); 23 | * you may not use this file except in compliance with the License. 24 | * You may obtain a copy of the License at 25 | * 26 | * http://www.apache.org/licenses/LICENSE-2.0 27 | * 28 | * Unless required by applicable law or agreed to in writing, software 29 | * distributed under the License is distributed on an "AS IS" BASIS, 30 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 31 | * See the License for the specific language governing permissions and 32 | * limitations under the License. 33 | */ 34 | -------------------------------------------------------------------------------- /kana/form.go: -------------------------------------------------------------------------------- 1 | package kana 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | ) 9 | 10 | //Form is type of newline form 11 | type Form int 12 | 13 | const ( 14 | Hiragana Form = iota //Hiragana form 15 | Katakana //Katakana form 16 | Chokuon //Chokuon (Upper kana) form 17 | ) 18 | 19 | var formNamesMap = map[string]Form{ 20 | "hiragana": Hiragana, 21 | "katakana": Katakana, 22 | "chokuon": Chokuon, 23 | } 24 | 25 | func (f Form) String() string { 26 | return formName(f) 27 | } 28 | 29 | func formName(f Form) string { 30 | for key, value := range formNamesMap { 31 | if value == f { 32 | return key 33 | } 34 | } 35 | return "" 36 | } 37 | 38 | //FormList returns list of newline form 39 | func FormList() []string { 40 | return []string{ 41 | formName(Hiragana), 42 | formName(Katakana), 43 | formName(Chokuon), 44 | } 45 | } 46 | 47 | //FormOf returns newline form name string 48 | func FormOf(name string) (Form, error) { 49 | if f, ok := formNamesMap[strings.ToLower(name)]; ok { 50 | return f, nil 51 | } 52 | return Form(0), errs.Wrap(ecode.ErrInvalidKanaForm, errs.WithContext("name", name)) 53 | } 54 | 55 | /* Copyright 2020-2021 Spiegel 56 | * 57 | * Licensed under the Apache License, Version 2.0 (the "License"); 58 | * you may not use this file except in compliance with the License. 59 | * You may obtain a copy of the License at 60 | * 61 | * http://www.apache.org/licenses/LICENSE-2.0 62 | * 63 | * Unless required by applicable law or agreed to in writing, software 64 | * distributed under the License is distributed on an "AS IS" BASIS, 65 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 66 | * See the License for the specific language governing permissions and 67 | * limitations under the License. 68 | */ 69 | -------------------------------------------------------------------------------- /kana/kana.go: -------------------------------------------------------------------------------- 1 | package kana 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "strings" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/kkconv" 10 | ) 11 | 12 | //Convert function converts kana character in text stream. 13 | func Convert(f Form, writer io.Writer, txt io.Reader, foldFlag bool) error { 14 | buf := &bytes.Buffer{} 15 | if _, err := buf.ReadFrom(txt); err != nil { 16 | return errs.Wrap(err) 17 | } 18 | if _, err := strings.NewReader(ConvertString(f, buf.String(), foldFlag)).WriteTo(writer); err != nil { 19 | return errs.Wrap(err) 20 | } 21 | return nil 22 | } 23 | 24 | //ConvertString function converts kana character in text string. 25 | func ConvertString(f Form, txt string, foldFlag bool) string { 26 | switch f { 27 | case Hiragana: 28 | return kkconv.Hiragana(txt, foldFlag) 29 | case Katakana: 30 | return kkconv.Katakana(txt, foldFlag) 31 | case Chokuon: 32 | return kkconv.Chokuon(txt, foldFlag) 33 | default: 34 | return txt 35 | } 36 | } 37 | 38 | /* Copyright 2020-2021 Spiegel 39 | * 40 | * Licensed under the Apache License, Version 2.0 (the "License"); 41 | * you may not use this file except in compliance with the License. 42 | * You may obtain a copy of the License at 43 | * 44 | * http://www.apache.org/licenses/LICENSE-2.0 45 | * 46 | * Unless required by applicable law or agreed to in writing, software 47 | * distributed under the License is distributed on an "AS IS" BASIS, 48 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 49 | * See the License for the specific language governing permissions and 50 | * limitations under the License. 51 | */ 52 | -------------------------------------------------------------------------------- /kana/kana_test.go: -------------------------------------------------------------------------------- 1 | package kana 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/goark/gnkf/dump" 11 | "github.com/goark/gnkf/ecode" 12 | ) 13 | 14 | func TestFormList(t *testing.T) { 15 | res := "hiragana|katakana|chokuon" 16 | str := strings.Join(FormList(), "|") 17 | if str != res { 18 | t.Errorf("FormList() = \"%+v\", want \"%+v\".", str, res) 19 | } 20 | } 21 | 22 | func TestTranslate(t *testing.T) { 23 | testCases := []struct { 24 | inp, out []byte 25 | formName string 26 | err error 27 | }{ 28 | { 29 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 30 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 31 | out: []byte("ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖわ゙ゐ゙ゑ゙を゙ゝゞヿくしすとぬはひふへほむらりるれろヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b150, 0x1b151, 0x1b152}) + "ん"), 32 | formName: "hiragana", 33 | err: nil, 34 | }, 35 | { 36 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 37 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 38 | out: []byte("ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})), 39 | formName: "hiragana", 40 | err: nil, 41 | }, 42 | { 43 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 44 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 45 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})), 46 | formName: "katakana", 47 | err: nil, 48 | }, 49 | { 50 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 51 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 52 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヽヾゟ" + string([]rune{0x1b164, 0x1b165, 0x1b166})), 53 | formName: "katakana", 54 | err: nil, 55 | }, 56 | { 57 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 58 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 59 | out: []byte("アアイイウウエエオオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂツツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモヤヤユユヨヨラリルレロワワヰヱヲンヴカケヷヸヹヺヽヾヿクシストヌハヒフヘホムラリルレロヲアイウエオヤユヨツアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワンヰヱヲン"), 60 | formName: "chokuon", 61 | err: nil, 62 | }, 63 | { 64 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 65 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 66 | out: []byte("ああいいううええおおかがきぎくぐけげこごさざしじすずせぜそぞただちぢつつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもややゆゆよよらりるれろわわゐゑをんゔかけゝゞゟゐゑを"), 67 | formName: "chokuon", 68 | err: nil, 69 | }, 70 | { 71 | inp: []byte("あいうえおわゐゑをんゔゕゖゝゞアイウエオワヰヱヲンヴヵヶヽヾアイウエオツヤユヨァィゥェォッャュョ"), 72 | out: []byte{}, 73 | formName: "foo", 74 | err: ecode.ErrInvalidKanaForm, 75 | }, 76 | } 77 | for _, tc := range testCases { 78 | f, err := FormOf(tc.formName) 79 | if !errors.Is(err, tc.err) { 80 | t.Errorf("FormOf(%v) error = \"%+v\", want \"%+v\".", tc.formName, err, tc.err) 81 | } 82 | if err == nil { 83 | buf := &bytes.Buffer{} 84 | if err := Convert(f, buf, bytes.NewReader(tc.inp), false); err != nil { 85 | if err != nil { 86 | t.Errorf("Convert() error = \"%+v\", want nil.", err) 87 | } 88 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 89 | fmt.Println(buf.String()) 90 | t.Errorf("Convert(%s) -> %s, want %s", tc.formName, dump.OctetString(bytes.NewReader(tc.inp)), dump.OctetString(buf)) 91 | } 92 | } 93 | } 94 | } 95 | 96 | func TestTranslateFold(t *testing.T) { 97 | testCases := []struct { 98 | inp, out []byte 99 | formName string 100 | err error 101 | }{ 102 | { 103 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 104 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 105 | out: []byte("ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖわ゙ゐ゙ゑ゙を゙ゝゞヿくしすとぬはひふへほむらりるれろをぁぃぅぇぉゃゅょっあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん" + string([]rune{0x1b150, 0x1b151, 0x1b152}) + "ん"), 106 | formName: "hiragana", 107 | err: nil, 108 | }, 109 | { 110 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 111 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 112 | out: []byte("ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})), 113 | formName: "hiragana", 114 | err: nil, 115 | }, 116 | { 117 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 118 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 119 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})), 120 | formName: "katakana", 121 | err: nil, 122 | }, 123 | { 124 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 125 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 126 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヽヾゟ" + string([]rune{0x1b164, 0x1b165, 0x1b166})), 127 | formName: "katakana", 128 | err: nil, 129 | }, 130 | { 131 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 132 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 133 | out: []byte("アアイイウウエエオオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂツツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモヤヤユユヨヨラリルレロワワヰヱヲンヴカケヷヸヹヺヽヾヿクシストヌハヒフヘホムラリルレロヲアイウエオヤユヨツアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワンヰヱヲン"), 134 | formName: "chokuon", 135 | err: nil, 136 | }, 137 | { 138 | //"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" + string([]rune{0x1b150, 0x1b151, 0x1b152})) 139 | inp: []byte(string([]rune{0x3041, 0x3042, 0x3043, 0x3044, 0x3045, 0x3046, 0x3047, 0x3048, 0x3049, 0x304a, 0x304b, 0x304c, 0x304d, 0x304e, 0x304f, 0x3050, 0x3051, 0x3052, 0x3053, 0x3054, 0x3055, 0x3056, 0x3057, 0x3058, 0x3059, 0x305a, 0x305b, 0x305c, 0x305d, 0x305e, 0x305f, 0x3060, 0x3061, 0x3062, 0x3063, 0x3064, 0x3065, 0x3066, 0x3067, 0x3068, 0x3069, 0x306a, 0x306b, 0x306c, 0x306d, 0x306e, 0x306f, 0x3070, 0x3071, 0x3072, 0x3073, 0x3074, 0x3075, 0x3076, 0x3077, 0x3078, 0x3079, 0x307a, 0x307b, 0x307c, 0x307d, 0x307e, 0x307f, 0x3080, 0x3081, 0x3082, 0x3083, 0x3084, 0x3085, 0x3086, 0x3087, 0x3088, 0x3089, 0x308a, 0x308b, 0x308c, 0x308d, 0x308e, 0x308f, 0x3090, 0x3091, 0x3092, 0x3093, 0x3094, 0x3095, 0x3096, 0x309d, 0x309e, 0x309f, 0x1b150, 0x1b151, 0x1b152})), 140 | out: []byte("ああいいううええおおかがきぎくぐけげこごさざしじすずせぜそぞただちぢつつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもややゆゆよよらりるれろわわゐゑをんゔかけゝゞゟゐゑを"), 141 | formName: "chokuon", 142 | err: nil, 143 | }, 144 | { 145 | inp: []byte("あいうえおわゐゑをんゔゕゖゝゞアイウエオワヰヱヲンヴヵヶヽヾアイウエオツヤユヨァィゥェォッャュョ"), 146 | out: []byte{}, 147 | formName: "foo", 148 | err: ecode.ErrInvalidKanaForm, 149 | }, 150 | } 151 | for _, tc := range testCases { 152 | f, err := FormOf(tc.formName) 153 | if !errors.Is(err, tc.err) { 154 | t.Errorf("FormOf(%v) error = \"%+v\", want \"%+v\".", tc.formName, err, tc.err) 155 | } 156 | if err == nil { 157 | buf := &bytes.Buffer{} 158 | if err := Convert(f, buf, bytes.NewReader(tc.inp), true); err != nil { 159 | if err != nil { 160 | t.Errorf("Convert() error = \"%+v\", want nil.", err) 161 | } 162 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 163 | fmt.Println(buf.String()) 164 | t.Errorf("Translate(%s) -> %s, want %s", tc.formName, dump.OctetString(bytes.NewReader(tc.inp)), dump.OctetString(buf)) 165 | } 166 | } 167 | } 168 | } 169 | 170 | /* Copyright 2020-2021 Spiegel 171 | * 172 | * Licensed under the Apache License, Version 2.0 (the "License"); 173 | * you may not use this file except in compliance with the License. 174 | * You may obtain a copy of the License at 175 | * 176 | * http://www.apache.org/licenses/LICENSE-2.0 177 | * 178 | * Unless required by applicable law or agreed to in writing, software 179 | * distributed under the License is distributed on an "AS IS" BASIS, 180 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 181 | * See the License for the specific language governing permissions and 182 | * limitations under the License. 183 | */ 184 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | _ "crypto/md5" 5 | _ "crypto/sha1" 6 | _ "crypto/sha256" 7 | _ "crypto/sha512" 8 | "os" 9 | 10 | "github.com/goark/gnkf/facade" 11 | "github.com/goark/gocli/rwi" 12 | ) 13 | 14 | func main() { 15 | facade.Execute( 16 | rwi.New( 17 | rwi.WithReader(os.Stdin), 18 | rwi.WithWriter(os.Stdout), 19 | rwi.WithErrorWriter(os.Stderr), 20 | ), 21 | os.Args[1:], 22 | ).Exit() 23 | } 24 | 25 | /* Copyright 2020-2021 Spiegel 26 | * 27 | * Licensed under the Apache License, Version 2.0 (the "License"); 28 | * you may not use this file except in compliance with the License. 29 | * You may obtain a copy of the License at 30 | * 31 | * http://www.apache.org/licenses/LICENSE-2.0 32 | * 33 | * Unless required by applicable law or agreed to in writing, software 34 | * distributed under the License is distributed on an "AS IS" BASIS, 35 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | * See the License for the specific language governing permissions and 37 | * limitations under the License. 38 | */ 39 | -------------------------------------------------------------------------------- /newline/example_test.go: -------------------------------------------------------------------------------- 1 | package newline_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/gnkf/dump" 10 | "github.com/goark/gnkf/newline" 11 | ) 12 | 13 | var text = `こんにちは 14 | 世界!` 15 | 16 | func ExampleConvert() { 17 | buf := &bytes.Buffer{} 18 | if err := newline.Convert("crlf", buf, strings.NewReader(text)); err != nil { 19 | fmt.Fprintln(os.Stderr, err) 20 | return 21 | } 22 | if err := dump.UnicodePoint(os.Stdout, buf); err != nil { 23 | fmt.Fprintln(os.Stderr, err) 24 | return 25 | } 26 | //Output: 27 | //0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0x000d, 0x000a, 0x4e16, 0x754c, 0xff01 28 | } 29 | 30 | /* Copyright 2020 Spiegel 31 | * 32 | * Licensed under the Apache License, Version 2.0 (the "License"); 33 | * you may not use this file except in compliance with the License. 34 | * You may obtain a copy of the License at 35 | * 36 | * http://www.apache.org/licenses/LICENSE-2.0 37 | * 38 | * Unless required by applicable law or agreed to in writing, software 39 | * distributed under the License is distributed on an "AS IS" BASIS, 40 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 41 | * See the License for the specific language governing permissions and 42 | * limitations under the License. 43 | */ 44 | -------------------------------------------------------------------------------- /newline/form.go: -------------------------------------------------------------------------------- 1 | package newline 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | ) 9 | 10 | //Form is type of newline form 11 | type Form int 12 | 13 | const ( 14 | LF Form = iota //newline is '\n' only 15 | CR //newline is '\r' only 16 | CRLF //newline is '\r'+'\n' 17 | ) 18 | 19 | var ( 20 | formNamesMap = map[string]Form{ 21 | "lf": LF, 22 | "cr": CR, 23 | "crlf": CRLF, 24 | } 25 | newlineCodeMap = map[Form]string{ 26 | LF: "\n", 27 | CR: "\r", 28 | CRLF: "\r\n", 29 | } 30 | ) 31 | 32 | func formName(f Form) string { 33 | for key, value := range formNamesMap { 34 | if value == f { 35 | return key 36 | } 37 | } 38 | return "" 39 | } 40 | 41 | //FormList returns list of newline form 42 | func FormList() []string { 43 | return []string{ 44 | formName(LF), 45 | formName(CR), 46 | formName(CRLF), 47 | } 48 | } 49 | 50 | //FormOf returns newline form name string 51 | func FormOf(name string) (Form, error) { 52 | if f, ok := formNamesMap[strings.ToLower(name)]; ok { 53 | return f, nil 54 | } 55 | return Form(0), errs.Wrap(ecode.ErrInvalidNewlineForm, errs.WithContext("name", name)) 56 | } 57 | 58 | //Code returns newline code string 59 | func (f Form) Code() string { 60 | if c, ok := newlineCodeMap[f]; ok { 61 | return c 62 | } 63 | return "" 64 | } 65 | 66 | //NewReplacer returns strings.Replacer instance for translating newline 67 | func NewReplacer(frm Form) *strings.Replacer { 68 | return strings.NewReplacer( 69 | CRLF.Code(), frm.Code(), 70 | LF.Code(), frm.Code(), 71 | CR.Code(), frm.Code(), 72 | ) 73 | } 74 | 75 | /* Copyright 2020 Spiegel 76 | * 77 | * Licensed under the Apache License, Version 2.0 (the "License"); 78 | * you may not use this file except in compliance with the License. 79 | * You may obtain a copy of the License at 80 | * 81 | * http://www.apache.org/licenses/LICENSE-2.0 82 | * 83 | * Unless required by applicable law or agreed to in writing, software 84 | * distributed under the License is distributed on an "AS IS" BASIS, 85 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 86 | * See the License for the specific language governing permissions and 87 | * limitations under the License. 88 | */ 89 | -------------------------------------------------------------------------------- /newline/newline.go: -------------------------------------------------------------------------------- 1 | package newline 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | 7 | "github.com/goark/errs" 8 | ) 9 | 10 | //Convert function convert newline in the text stream. 11 | func Convert(formName string, writer io.Writer, txt io.Reader) error { 12 | f, err := FormOf(formName) 13 | if err != nil { 14 | return errs.Wrap(err, errs.WithContext("formName", formName)) 15 | } 16 | 17 | buf := &bytes.Buffer{} 18 | if _, err := buf.ReadFrom(txt); err != nil { 19 | return errs.Wrap(err) 20 | } 21 | if _, err := NewReplacer(f).WriteString(writer, buf.String()); err != nil { 22 | return errs.Wrap(err) 23 | } 24 | return nil 25 | } 26 | 27 | /* Copyright 2020 Spiegel 28 | * 29 | * Licensed under the Apache License, Version 2.0 (the "License"); 30 | * you may not use this file except in compliance with the License. 31 | * You may obtain a copy of the License at 32 | * 33 | * http://www.apache.org/licenses/LICENSE-2.0 34 | * 35 | * Unless required by applicable law or agreed to in writing, software 36 | * distributed under the License is distributed on an "AS IS" BASIS, 37 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 38 | * See the License for the specific language governing permissions and 39 | * limitations under the License. 40 | */ 41 | -------------------------------------------------------------------------------- /newline/newline_test.go: -------------------------------------------------------------------------------- 1 | package newline 2 | 3 | import ( 4 | "bytes" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/gnkf/dump" 10 | "github.com/goark/gnkf/ecode" 11 | ) 12 | 13 | func TestFormList(t *testing.T) { 14 | res := "lf|cr|crlf" 15 | str := strings.Join(FormList(), "|") 16 | if str != res { 17 | t.Errorf("FormList() = \"%+v\", want \"%+v\".", str, res) 18 | } 19 | } 20 | 21 | func TestTranslate(t *testing.T) { 22 | testCases := []struct { 23 | inp, out []byte 24 | formName string 25 | err error 26 | }{ 27 | { 28 | inp: []byte("abc\ndef\rghi\r\njkl"), 29 | out: []byte("abc\ndef\nghi\njkl"), 30 | formName: "lf", 31 | err: nil, 32 | }, 33 | { 34 | inp: []byte("abc\ndef\rghi\r\njkl"), 35 | out: []byte("abc\rdef\rghi\rjkl"), 36 | formName: "cr", 37 | err: nil, 38 | }, 39 | { 40 | inp: []byte("abc\ndef\rghi\r\njkl"), 41 | out: []byte("abc\r\ndef\r\nghi\r\njkl"), 42 | formName: "crlf", 43 | err: nil, 44 | }, 45 | { 46 | inp: []byte("abc\ndef\rghi\r\njkl"), 47 | out: []byte{}, 48 | formName: "foo", 49 | err: ecode.ErrInvalidNewlineForm, 50 | }, 51 | } 52 | for _, tc := range testCases { 53 | buf := &bytes.Buffer{} 54 | if err := Convert(tc.formName, buf, bytes.NewReader(tc.inp)); err != nil { 55 | if !errs.Is(err, tc.err) { 56 | t.Errorf("Translate() error = \"%+v\", want \"%+v\".", err, tc.err) 57 | } 58 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 59 | t.Errorf("Translate(%s) -> %s, want %s", tc.formName, dump.OctetString(bytes.NewReader(tc.inp)), dump.OctetString(buf)) 60 | } 61 | } 62 | } 63 | 64 | /* Copyright 2020 Spiegel 65 | * 66 | * Licensed under the Apache License, Version 2.0 (the "License"); 67 | * you may not use this file except in compliance with the License. 68 | * You may obtain a copy of the License at 69 | * 70 | * http://www.apache.org/licenses/LICENSE-2.0 71 | * 72 | * Unless required by applicable law or agreed to in writing, software 73 | * distributed under the License is distributed on an "AS IS" BASIS, 74 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 75 | * See the License for the specific language governing permissions and 76 | * limitations under the License. 77 | */ 78 | -------------------------------------------------------------------------------- /nrm/example_test.go: -------------------------------------------------------------------------------- 1 | package nrm_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/goark/gnkf/dump" 10 | "github.com/goark/gnkf/nrm" 11 | ) 12 | 13 | func ExampleNormalize() { 14 | buf := &bytes.Buffer{} 15 | if err := nrm.Normalize("nfkc", buf, strings.NewReader("ペンギン"), false); err != nil { 16 | fmt.Fprintln(os.Stderr, err) 17 | } 18 | if err := dump.UnicodePoint(os.Stdout, buf); err != nil { 19 | fmt.Fprintln(os.Stderr, err) 20 | } 21 | //Output: 22 | //0x30da, 0x30f3, 0x30ae, 0x30f3 23 | } 24 | 25 | /* Copyright 2020 Spiegel 26 | * 27 | * Licensed under the Apache License, Version 2.0 (the "License"); 28 | * you may not use this file except in compliance with the License. 29 | * You may obtain a copy of the License at 30 | * 31 | * http://www.apache.org/licenses/LICENSE-2.0 32 | * 33 | * Unless required by applicable law or agreed to in writing, software 34 | * distributed under the License is distributed on an "AS IS" BASIS, 35 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 | * See the License for the specific language governing permissions and 37 | * limitations under the License. 38 | */ 39 | -------------------------------------------------------------------------------- /nrm/form.go: -------------------------------------------------------------------------------- 1 | package nrm 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | "golang.org/x/text/unicode/norm" 9 | ) 10 | 11 | var formNamesMap = map[string]norm.Form{ 12 | "nfc": norm.NFC, 13 | "nfd": norm.NFD, 14 | "nfkc": norm.NFKC, 15 | "nfkd": norm.NFKD, 16 | } 17 | 18 | func formName(f norm.Form) string { 19 | for key, value := range formNamesMap { 20 | if value == f { 21 | return key 22 | } 23 | } 24 | return "" 25 | } 26 | 27 | //FormList returns list of Unicode normalization form 28 | func FormList() []string { 29 | return []string{ 30 | formName(norm.NFC), 31 | formName(norm.NFD), 32 | formName(norm.NFKC), 33 | formName(norm.NFKD), 34 | } 35 | } 36 | 37 | //FormOf returns Unicode normalization form type from name string 38 | func FormOf(name string) (norm.Form, error) { 39 | if f, ok := formNamesMap[strings.ToLower(name)]; ok { 40 | return f, nil 41 | } 42 | return norm.Form(0), errs.Wrap(ecode.ErrInvalidNormForm, errs.WithContext("name", name)) 43 | } 44 | 45 | /* Copyright 2020 Spiegel 46 | * 47 | * Licensed under the Apache License, Version 2.0 (the "License"); 48 | * you may not use this file except in compliance with the License. 49 | * You may obtain a copy of the License at 50 | * 51 | * http://www.apache.org/licenses/LICENSE-2.0 52 | * 53 | * Unless required by applicable law or agreed to in writing, software 54 | * distributed under the License is distributed on an "AS IS" BASIS, 55 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 56 | * See the License for the specific language governing permissions and 57 | * limitations under the License. 58 | */ 59 | -------------------------------------------------------------------------------- /nrm/norm.go: -------------------------------------------------------------------------------- 1 | package nrm 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/goark/errs" 7 | "golang.org/x/text/unicode/norm" 8 | ) 9 | 10 | //Normalize function normalize Unicode text 11 | func Normalize(formName string, writer io.Writer, txt io.Reader, krFlag bool) error { 12 | f, err := FormOf(formName) 13 | if err != nil { 14 | return errs.Wrap(err, errs.WithContext("formName", formName)) 15 | } 16 | if (f == norm.NFKC || f == norm.NFKD) && krFlag { 17 | return NormKangxiRadicals(writer, txt) 18 | } 19 | 20 | if _, err := io.Copy(writer, f.Reader(txt)); err != nil { 21 | return errs.Wrap(err, errs.WithContext("formName", formName)) 22 | } 23 | return nil 24 | } 25 | 26 | /* Copyright 2020 Spiegel 27 | * 28 | * Licensed under the Apache License, Version 2.0 (the "License"); 29 | * you may not use this file except in compliance with the License. 30 | * You may obtain a copy of the License at 31 | * 32 | * http://www.apache.org/licenses/LICENSE-2.0 33 | * 34 | * Unless required by applicable law or agreed to in writing, software 35 | * distributed under the License is distributed on an "AS IS" BASIS, 36 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 37 | * See the License for the specific language governing permissions and 38 | * limitations under the License. 39 | */ 40 | -------------------------------------------------------------------------------- /nrm/nrm_test.go: -------------------------------------------------------------------------------- 1 | package nrm 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/dump" 11 | "github.com/goark/gnkf/ecode" 12 | ) 13 | 14 | func TestFormList(t *testing.T) { 15 | res := "nfc|nfd|nfkc|nfkd" 16 | str := strings.Join(FormList(), "|") 17 | if str != res { 18 | t.Errorf("FormList() = \"%+v\", want \"%+v\".", str, res) 19 | } 20 | } 21 | 22 | func TestNormalize(t *testing.T) { 23 | testCases := []struct { 24 | inp, out []byte 25 | formName string 26 | err error 27 | }{ 28 | { 29 | inp: []byte{0xe3, 0x83, 0x98, 0xe3, 0x82, 0x9a, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xad, 0xe3, 0x82, 0x99, 0xe3, 0x83, 0xb3}, 30 | out: []byte("ペンギン"), 31 | formName: "nfc", 32 | err: nil, 33 | }, 34 | { 35 | inp: []byte("ペンギン"), 36 | out: []byte{0xe3, 0x83, 0x98, 0xe3, 0x82, 0x9a, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xad, 0xe3, 0x82, 0x99, 0xe3, 0x83, 0xb3}, 37 | formName: "nfd", 38 | err: nil, 39 | }, 40 | { 41 | inp: []byte("ペンギン"), 42 | out: []byte("ペンギン"), 43 | formName: "nfkc", 44 | err: nil, 45 | }, 46 | { 47 | inp: []byte("ペンギン"), 48 | out: []byte{0xe3, 0x83, 0x98, 0xe3, 0x82, 0x9a, 0xe3, 0x83, 0xb3, 0xe3, 0x82, 0xad, 0xe3, 0x82, 0x99, 0xe3, 0x83, 0xb3}, 49 | formName: "nfkd", 50 | err: nil, 51 | }, 52 | { 53 | inp: []byte("ペンギン"), 54 | out: []byte{}, 55 | formName: "foo", 56 | err: ecode.ErrInvalidNormForm, 57 | }, 58 | } 59 | for _, tc := range testCases { 60 | buf := &bytes.Buffer{} 61 | if err := Normalize(tc.formName, buf, bytes.NewReader(tc.inp), false); err != nil { 62 | if !errs.Is(err, tc.err) { 63 | t.Errorf("Normalize() error = \"%+v\", want \"%+v\".", err, tc.err) 64 | } 65 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 66 | t.Errorf("Normalize(%s) result wrong translation: ", tc.formName) 67 | _ = dump.Octet(os.Stdout, buf) 68 | } 69 | } 70 | } 71 | 72 | func TestNormKangxiRadicals(t *testing.T) { 73 | testCases := []struct { 74 | inp, out []byte 75 | formName string 76 | err error 77 | }{ 78 | { 79 | inp: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F 80 | out: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F (not translate) 81 | formName: "nfc", 82 | err: nil, 83 | }, 84 | { 85 | inp: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F 86 | out: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F (not translate) 87 | formName: "nfd", 88 | err: nil, 89 | }, 90 | { 91 | inp: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F 92 | out: []byte("㈱埼玉"), //U+3231, U+57FC, U+7389 93 | formName: "nfkc", 94 | err: nil, 95 | }, 96 | { 97 | inp: []byte("㈱埼⽟"), //U+3231, U+57FC, U+2F5F 98 | out: []byte("㈱埼玉"), //U+3231, U+57FC, U+7389 99 | formName: "nfkd", 100 | err: nil, 101 | }, 102 | { 103 | inp: []byte{0x82, 0xb1, 0x82, 0xf1, 0x82, 0xc9, 0x82, 0xbf, 0x82, 0xcd, 0x81, 0x43, 0x90, 0xa2, 0x8a, 0x45, 0x81, 0x49}, //"こんにちは,世界!" by Shift_JIS encoding 104 | out: []byte{}, 105 | formName: "nfkc", 106 | err: ecode.ErrInvalidUTF8Text, 107 | }, 108 | } 109 | for _, tc := range testCases { 110 | buf := &bytes.Buffer{} 111 | if err := Normalize(tc.formName, buf, bytes.NewReader(tc.inp), true); err != nil { 112 | if !errs.Is(err, tc.err) { 113 | t.Errorf("NormKangxiRadicals() error = \"%+v\", want \"%+v\".", err, tc.err) 114 | } 115 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 116 | t.Error("NormKangxiRadicals() result wrong translation: ") 117 | _ = dump.Octet(os.Stdout, buf) 118 | } 119 | } 120 | } 121 | 122 | /* Copyright 2020 Spiegel 123 | * 124 | * Licensed under the Apache License, Version 2.0 (the "License"); 125 | * you may not use this file except in compliance with the License. 126 | * You may obtain a copy of the License at 127 | * 128 | * http://www.apache.org/licenses/LICENSE-2.0 129 | * 130 | * Unless required by applicable law or agreed to in writing, software 131 | * distributed under the License is distributed on an "AS IS" BASIS, 132 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 133 | * See the License for the specific language governing permissions and 134 | * limitations under the License. 135 | */ 136 | -------------------------------------------------------------------------------- /nrm/radicals-sample/equivalent-unified-ideograph.csv: -------------------------------------------------------------------------------- 1 | radicals , normalize,note 2 | 2E81 , 5382 ,# CJK RADICAL CLIFF 3 | 2E82 , 4E5B ,# CJK RADICAL SECOND ONE 4 | 2E83 , 4E5A ,# CJK RADICAL SECOND TWO 5 | 2E84 , 4E59 ,# CJK RADICAL SECOND THREE 6 | 2E85 , 4EBB ,# CJK RADICAL PERSON 7 | 2E86 , 5182 ,# CJK RADICAL BOX 8 | 2E87 , 20628 ,# CJK RADICAL TABLE 9 | 2E88 , 5200 ,# CJK RADICAL KNIFE ONE 10 | 2E89 , 5202 ,# CJK RADICAL KNIFE TWO 11 | 2E8A , 535C ,# CJK RADICAL DIVINATION 12 | 2E8B , 353E ,# CJK RADICAL SEAL 13 | 2E8C , 5C0F ,# [2] CJK RADICAL SMALL ONE..CJK RADICAL SMALL TWO 14 | 2E8D , 5C0F ,# [2] CJK RADICAL SMALL ONE..CJK RADICAL SMALL TWO 15 | 2E8E , 5140 ,# CJK RADICAL LAME ONE 16 | 2E8F , 5C23 ,# CJK RADICAL LAME TWO 17 | 2E90 , 5C22 ,# CJK RADICAL LAME THREE 18 | 2E91 , 21BC2 ,# CJK RADICAL LAME FOUR 19 | 2E92 , 5DF3 ,# CJK RADICAL SNAKE 20 | 2E93 , 5E7A ,# CJK RADICAL THREAD 21 | 2E94 , 5F51 ,# CJK RADICAL SNOUT ONE 22 | 2E95 , 5F50 ,# CJK RADICAL SNOUT TWO 23 | 2E96 , 5FC4 ,# CJK RADICAL HEART ONE 24 | 2E97 , 5FC3 ,# CJK RADICAL HEART TWO 25 | 2E98 , 624C ,# CJK RADICAL HAND 26 | 2E99 , 6535 ,# CJK RADICAL RAP 27 | 2E9B , 65E1 ,# CJK RADICAL CHOKE 28 | 2E9C , 65E5 ,# CJK RADICAL SUN 29 | 2E9D , 6708 ,# CJK RADICAL MOON 30 | 2E9E , 6B7A ,# CJK RADICAL DEATH 31 | 2E9F , 6BCD ,# CJK RADICAL MOTHER 32 | 2EA0 , 6C11 ,# CJK RADICAL CIVILIAN 33 | 2EA1 , 6C35 ,# CJK RADICAL WATER ONE 34 | 2EA2 , 6C3A ,# CJK RADICAL WATER TWO 35 | 2EA3 , 706C ,# CJK RADICAL FIRE 36 | 2EA4 , 722B ,# [2] CJK RADICAL PAW ONE..CJK RADICAL PAW TWO 37 | 2EA5 , 722B ,# [2] CJK RADICAL PAW ONE..CJK RADICAL PAW TWO 38 | 2EA6 , 4E2C ,# CJK RADICAL SIMPLIFIED HALF TREE TRUNK 39 | 2EA7 , 725B ,# CJK RADICAL COW 40 | 2EA8 , 72AD ,# CJK RADICAL DOG 41 | 2EA9 , 738B ,# CJK RADICAL JADE 42 | 2EAA , 24D14 ,# CJK RADICAL BOLT OF CLOTH 43 | 2EAB , 76EE ,# CJK RADICAL EYE 44 | 2EAC , 793A ,# CJK RADICAL SPIRIT ONE 45 | 2EAD , 793B ,# CJK RADICAL SPIRIT TWO 46 | 2EAE , 25AD7 ,# CJK RADICAL BAMBOO 47 | 2EAF , 7CF9 ,# CJK RADICAL SILK 48 | 2EB0 , 7E9F ,# CJK RADICAL C-SIMPLIFIED SILK 49 | 2EB1 , 7F53 ,# CJK RADICAL NET ONE 50 | 2EB2 , 7F52 ,# CJK RADICAL NET TWO 51 | 2EB3 , 34C1 ,# CJK RADICAL NET THREE 52 | 2EB4 , 5197 ,# CJK RADICAL NET FOUR 53 | 2EB5 , 2626B ,# CJK RADICAL MESH 54 | 2EB6 , 7F8A ,# CJK RADICAL SHEEP 55 | 2EB7 , 2634C ,# CJK RADICAL RAM 56 | 2EB8 , 2634B ,# CJK RADICAL EWE 57 | 2EB9 , 8002 ,# CJK RADICAL OLD 58 | 2EBA , 8080 ,# CJK RADICAL BRUSH ONE 59 | 2EBB , 807F ,# CJK RADICAL BRUSH TWO 60 | 2EBC , 8089 ,# CJK RADICAL MEAT 61 | 2EBD , 26951 ,# CJK RADICAL MORTAR 62 | 2EBE , 8279 ,# [3] CJK RADICAL GRASS ONE..CJK RADICAL GRASS THREE 63 | 2EC0 , 8279 ,# [3] CJK RADICAL GRASS ONE..CJK RADICAL GRASS THREE 64 | 2EC1 , 864E ,# CJK RADICAL TIGER 65 | 2EC2 , 8864 ,# CJK RADICAL CLOTHES 66 | 2EC3 , 8980 ,# CJK RADICAL WEST ONE 67 | 2EC4 , 897F ,# CJK RADICAL WEST TWO 68 | 2EC5 , 89C1 ,# CJK RADICAL C-SIMPLIFIED SEE 69 | 2EC6 , 89D2 ,# CJK RADICAL SIMPLIFIED HORN 70 | 2EC7 , 278B2 ,# CJK RADICAL HORN 71 | 2EC8 , 8BA0 ,# CJK RADICAL C-SIMPLIFIED SPEECH 72 | 2EC9 , 8D1D ,# CJK RADICAL C-SIMPLIFIED SHELL 73 | 2ECA , 27FB7 ,# CJK RADICAL FOOT 74 | 2ECB , 8F66 ,# CJK RADICAL C-SIMPLIFIED CART 75 | 2ECC , 8FB6 ,# [3] CJK RADICAL SIMPLIFIED WALK..CJK RADICAL WALK TWO 76 | 2ECE , 8FB6 ,# [3] CJK RADICAL SIMPLIFIED WALK..CJK RADICAL WALK TWO 77 | 2ECF , 9091 ,# CJK RADICAL CITY 78 | 2ED0 , 9485 ,# CJK RADICAL C-SIMPLIFIED GOLD 79 | 2ED1 , 9577 ,# CJK RADICAL LONG ONE 80 | 2ED2 , 9578 ,# CJK RADICAL LONG TWO 81 | 2ED3 , 957F ,# CJK RADICAL C-SIMPLIFIED LONG 82 | 2ED4 , 95E8 ,# CJK RADICAL C-SIMPLIFIED GATE 83 | 2ED5 , 28E0F ,# CJK RADICAL MOUND ONE 84 | 2ED6 , 961D ,# CJK RADICAL MOUND TWO 85 | 2ED7 , 96E8 ,# CJK RADICAL RAIN 86 | 2ED8 , 9752 ,# CJK RADICAL BLUE 87 | 2ED9 , 97E6 ,# CJK RADICAL C-SIMPLIFIED TANNED LEATHER 88 | 2EDA , 9875 ,# CJK RADICAL C-SIMPLIFIED LEAF 89 | 2EDB , 98CE ,# CJK RADICAL C-SIMPLIFIED WIND 90 | 2EDC , 98DE ,# CJK RADICAL C-SIMPLIFIED FLY 91 | 2EDD , 98DF ,# CJK RADICAL EAT ONE 92 | 2EDE , 2967F ,# CJK RADICAL EAT TWO 93 | 2EDF , 98E0 ,# CJK RADICAL EAT THREE 94 | 2EE0 , 9963 ,# CJK RADICAL C-SIMPLIFIED EAT 95 | 2EE1 , 29810 ,# CJK RADICAL HEAD 96 | 2EE2 , 9A6C ,# CJK RADICAL C-SIMPLIFIED HORSE 97 | 2EE3 , 9AA8 ,# CJK RADICAL BONE 98 | 2EE4 , 9B3C ,# CJK RADICAL GHOST 99 | 2EE5 , 9C7C ,# CJK RADICAL C-SIMPLIFIED FISH 100 | 2EE6 , 9E1F ,# CJK RADICAL C-SIMPLIFIED BIRD 101 | 2EE7 , 5364 ,# CJK RADICAL C-SIMPLIFIED SALT 102 | 2EE8 , 9EA6 ,# CJK RADICAL SIMPLIFIED WHEAT 103 | 2EE9 , 9EC4 ,# CJK RADICAL SIMPLIFIED YELLOW 104 | 2EEA , 9EFE ,# CJK RADICAL C-SIMPLIFIED FROG 105 | 2EEB , 6589 ,# CJK RADICAL J-SIMPLIFIED EVEN 106 | 2EEC , 9F50 ,# CJK RADICAL C-SIMPLIFIED EVEN 107 | 2EED , 6B6F ,# CJK RADICAL J-SIMPLIFIED TOOTH 108 | 2EEE , 9F7F ,# CJK RADICAL C-SIMPLIFIED TOOTH 109 | 2EEF , 7ADC ,# CJK RADICAL J-SIMPLIFIED DRAGON 110 | 2EF0 , 9F99 ,# CJK RADICAL C-SIMPLIFIED DRAGON 111 | 2EF1 , 9F9C ,# CJK RADICAL TURTLE 112 | 2EF2 , 4E80 ,# CJK RADICAL J-SIMPLIFIED TURTLE 113 | 2EF3 , 9F9F ,# CJK RADICAL C-SIMPLIFIED TURTLE 114 | 2F00 , 4E00 ,# KANGXI RADICAL ONE 115 | 2F01 , 4E28 ,# KANGXI RADICAL LINE 116 | 2F02 , 4E36 ,# KANGXI RADICAL DOT 117 | 2F03 , 4E3F ,# KANGXI RADICAL SLASH 118 | 2F04 , 4E59 ,# KANGXI RADICAL SECOND 119 | 2F05 , 4E85 ,# KANGXI RADICAL HOOK 120 | 2F06 , 4E8C ,# KANGXI RADICAL TWO 121 | 2F07 , 4EA0 ,# KANGXI RADICAL LID 122 | 2F08 , 4EBA ,# KANGXI RADICAL MAN 123 | 2F09 , 513F ,# KANGXI RADICAL LEGS 124 | 2F0A , 5165 ,# KANGXI RADICAL ENTER 125 | 2F0B , 516B ,# KANGXI RADICAL EIGHT 126 | 2F0C , 5182 ,# KANGXI RADICAL DOWN BOX 127 | 2F0D , 5196 ,# KANGXI RADICAL COVER 128 | 2F0E , 51AB ,# KANGXI RADICAL ICE 129 | 2F0F , 51E0 ,# KANGXI RADICAL TABLE 130 | 2F10 , 51F5 ,# KANGXI RADICAL OPEN BOX 131 | 2F11 , 5200 ,# KANGXI RADICAL KNIFE 132 | 2F12 , 529B ,# KANGXI RADICAL POWER 133 | 2F13 , 52F9 ,# KANGXI RADICAL WRAP 134 | 2F14 , 5315 ,# KANGXI RADICAL SPOON 135 | 2F15 , 531A ,# KANGXI RADICAL RIGHT OPEN BOX 136 | 2F16 , 5338 ,# KANGXI RADICAL HIDING ENCLOSURE 137 | 2F17 , 5341 ,# KANGXI RADICAL TEN 138 | 2F18 , 535C ,# KANGXI RADICAL DIVINATION 139 | 2F19 , 5369 ,# KANGXI RADICAL SEAL 140 | 2F1A , 5382 ,# KANGXI RADICAL CLIFF 141 | 2F1B , 53B6 ,# KANGXI RADICAL PRIVATE 142 | 2F1C , 53C8 ,# KANGXI RADICAL AGAIN 143 | 2F1D , 53E3 ,# KANGXI RADICAL MOUTH 144 | 2F1E , 56D7 ,# KANGXI RADICAL ENCLOSURE 145 | 2F1F , 571F ,# KANGXI RADICAL EARTH 146 | 2F20 , 58EB ,# KANGXI RADICAL SCHOLAR 147 | 2F21 , 5902 ,# KANGXI RADICAL GO 148 | 2F22 , 590A ,# KANGXI RADICAL GO SLOWLY 149 | 2F23 , 5915 ,# KANGXI RADICAL EVENING 150 | 2F24 , 5927 ,# KANGXI RADICAL BIG 151 | 2F25 , 5973 ,# KANGXI RADICAL WOMAN 152 | 2F26 , 5B50 ,# KANGXI RADICAL CHILD 153 | 2F27 , 5B80 ,# KANGXI RADICAL ROOF 154 | 2F28 , 5BF8 ,# KANGXI RADICAL INCH 155 | 2F29 , 5C0F ,# KANGXI RADICAL SMALL 156 | 2F2A , 5C22 ,# KANGXI RADICAL LAME 157 | 2F2B , 5C38 ,# KANGXI RADICAL CORPSE 158 | 2F2C , 5C6E ,# KANGXI RADICAL SPROUT 159 | 2F2D , 5C71 ,# KANGXI RADICAL MOUNTAIN 160 | 2F2E , 5DDB ,# KANGXI RADICAL RIVER 161 | 2F2F , 5DE5 ,# KANGXI RADICAL WORK 162 | 2F30 , 5DF1 ,# KANGXI RADICAL ONESELF 163 | 2F31 , 5DFE ,# KANGXI RADICAL TURBAN 164 | 2F32 , 5E72 ,# KANGXI RADICAL DRY 165 | 2F33 , 5E7A ,# KANGXI RADICAL SHORT THREAD 166 | 2F34 , 5E7F ,# KANGXI RADICAL DOTTED CLIFF 167 | 2F35 , 5EF4 ,# KANGXI RADICAL LONG STRIDE 168 | 2F36 , 5EFE ,# KANGXI RADICAL TWO HANDS 169 | 2F37 , 5F0B ,# KANGXI RADICAL SHOOT 170 | 2F38 , 5F13 ,# KANGXI RADICAL BOW 171 | 2F39 , 5F50 ,# KANGXI RADICAL SNOUT 172 | 2F3A , 5F61 ,# KANGXI RADICAL BRISTLE 173 | 2F3B , 5F73 ,# KANGXI RADICAL STEP 174 | 2F3C , 5FC3 ,# KANGXI RADICAL HEART 175 | 2F3D , 6208 ,# KANGXI RADICAL HALBERD 176 | 2F3E , 6236 ,# KANGXI RADICAL DOOR 177 | 2F3F , 624B ,# KANGXI RADICAL HAND 178 | 2F40 , 652F ,# KANGXI RADICAL BRANCH 179 | 2F41 , 6534 ,# KANGXI RADICAL RAP 180 | 2F42 , 6587 ,# KANGXI RADICAL SCRIPT 181 | 2F43 , 6597 ,# KANGXI RADICAL DIPPER 182 | 2F44 , 65A4 ,# KANGXI RADICAL AXE 183 | 2F45 , 65B9 ,# KANGXI RADICAL SQUARE 184 | 2F46 , 65E0 ,# KANGXI RADICAL NOT 185 | 2F47 , 65E5 ,# KANGXI RADICAL SUN 186 | 2F48 , 66F0 ,# KANGXI RADICAL SAY 187 | 2F49 , 6708 ,# KANGXI RADICAL MOON 188 | 2F4A , 6728 ,# KANGXI RADICAL TREE 189 | 2F4B , 6B20 ,# KANGXI RADICAL LACK 190 | 2F4C , 6B62 ,# KANGXI RADICAL STOP 191 | 2F4D , 6B79 ,# KANGXI RADICAL DEATH 192 | 2F4E , 6BB3 ,# KANGXI RADICAL WEAPON 193 | 2F4F , 6BCB ,# KANGXI RADICAL DO NOT 194 | 2F50 , 6BD4 ,# KANGXI RADICAL COMPARE 195 | 2F51 , 6BDB ,# KANGXI RADICAL FUR 196 | 2F52 , 6C0F ,# KANGXI RADICAL CLAN 197 | 2F53 , 6C14 ,# KANGXI RADICAL STEAM 198 | 2F54 , 6C34 ,# KANGXI RADICAL WATER 199 | 2F55 , 706B ,# KANGXI RADICAL FIRE 200 | 2F56 , 722A ,# KANGXI RADICAL CLAW 201 | 2F57 , 7236 ,# KANGXI RADICAL FATHER 202 | 2F58 , 723B ,# KANGXI RADICAL DOUBLE X 203 | 2F59 , 723F ,# KANGXI RADICAL HALF TREE TRUNK 204 | 2F5A , 7247 ,# KANGXI RADICAL SLICE 205 | 2F5B , 7259 ,# KANGXI RADICAL FANG 206 | 2F5C , 725B ,# KANGXI RADICAL COW 207 | 2F5D , 72AC ,# KANGXI RADICAL DOG 208 | 2F5E , 7384 ,# KANGXI RADICAL PROFOUND 209 | 2F5F , 7389 ,# KANGXI RADICAL JADE 210 | 2F60 , 74DC ,# KANGXI RADICAL MELON 211 | 2F61 , 74E6 ,# KANGXI RADICAL TILE 212 | 2F62 , 7518 ,# KANGXI RADICAL SWEET 213 | 2F63 , 751F ,# KANGXI RADICAL LIFE 214 | 2F64 , 7528 ,# KANGXI RADICAL USE 215 | 2F65 , 7530 ,# KANGXI RADICAL FIELD 216 | 2F66 , 758B ,# KANGXI RADICAL BOLT OF CLOTH 217 | 2F67 , 7592 ,# KANGXI RADICAL SICKNESS 218 | 2F68 , 7676 ,# KANGXI RADICAL DOTTED TENT 219 | 2F69 , 767D ,# KANGXI RADICAL WHITE 220 | 2F6A , 76AE ,# KANGXI RADICAL SKIN 221 | 2F6B , 76BF ,# KANGXI RADICAL DISH 222 | 2F6C , 76EE ,# KANGXI RADICAL EYE 223 | 2F6D , 77DB ,# KANGXI RADICAL SPEAR 224 | 2F6E , 77E2 ,# KANGXI RADICAL ARROW 225 | 2F6F , 77F3 ,# KANGXI RADICAL STONE 226 | 2F70 , 793A ,# KANGXI RADICAL SPIRIT 227 | 2F71 , 79B8 ,# KANGXI RADICAL TRACK 228 | 2F72 , 79BE ,# KANGXI RADICAL GRAIN 229 | 2F73 , 7A74 ,# KANGXI RADICAL CAVE 230 | 2F74 , 7ACB ,# KANGXI RADICAL STAND 231 | 2F75 , 7AF9 ,# KANGXI RADICAL BAMBOO 232 | 2F76 , 7C73 ,# KANGXI RADICAL RICE 233 | 2F77 , 7CF8 ,# KANGXI RADICAL SILK 234 | 2F78 , 7F36 ,# KANGXI RADICAL JAR 235 | 2F79 , 7F51 ,# KANGXI RADICAL NET 236 | 2F7A , 7F8A ,# KANGXI RADICAL SHEEP 237 | 2F7B , 7FBD ,# KANGXI RADICAL FEATHER 238 | 2F7C , 8001 ,# KANGXI RADICAL OLD 239 | 2F7D , 800C ,# KANGXI RADICAL AND 240 | 2F7E , 8012 ,# KANGXI RADICAL PLOW 241 | 2F7F , 8033 ,# KANGXI RADICAL EAR 242 | 2F80 , 807F ,# KANGXI RADICAL BRUSH 243 | 2F81 , 8089 ,# KANGXI RADICAL MEAT 244 | 2F82 , 81E3 ,# KANGXI RADICAL MINISTER 245 | 2F83 , 81EA ,# KANGXI RADICAL SELF 246 | 2F84 , 81F3 ,# KANGXI RADICAL ARRIVE 247 | 2F85 , 81FC ,# KANGXI RADICAL MORTAR 248 | 2F86 , 820C ,# KANGXI RADICAL TONGUE 249 | 2F87 , 821B ,# KANGXI RADICAL OPPOSE 250 | 2F88 , 821F ,# KANGXI RADICAL BOAT 251 | 2F89 , 826E ,# KANGXI RADICAL STOPPING 252 | 2F8A , 8272 ,# KANGXI RADICAL COLOR 253 | 2F8B , 8278 ,# KANGXI RADICAL GRASS 254 | 2F8C , 864D ,# KANGXI RADICAL TIGER 255 | 2F8D , 866B ,# KANGXI RADICAL INSECT 256 | 2F8E , 8840 ,# KANGXI RADICAL BLOOD 257 | 2F8F , 884C ,# KANGXI RADICAL WALK ENCLOSURE 258 | 2F90 , 8863 ,# KANGXI RADICAL CLOTHES 259 | 2F91 , 897E ,# KANGXI RADICAL WEST 260 | 2F92 , 898B ,# KANGXI RADICAL SEE 261 | 2F93 , 89D2 ,# KANGXI RADICAL HORN 262 | 2F94 , 8A00 ,# KANGXI RADICAL SPEECH 263 | 2F95 , 8C37 ,# KANGXI RADICAL VALLEY 264 | 2F96 , 8C46 ,# KANGXI RADICAL BEAN 265 | 2F97 , 8C55 ,# KANGXI RADICAL PIG 266 | 2F98 , 8C78 ,# KANGXI RADICAL BADGER 267 | 2F99 , 8C9D ,# KANGXI RADICAL SHELL 268 | 2F9A , 8D64 ,# KANGXI RADICAL RED 269 | 2F9B , 8D70 ,# KANGXI RADICAL RUN 270 | 2F9C , 8DB3 ,# KANGXI RADICAL FOOT 271 | 2F9D , 8EAB ,# KANGXI RADICAL BODY 272 | 2F9E , 8ECA ,# KANGXI RADICAL CART 273 | 2F9F , 8F9B ,# KANGXI RADICAL BITTER 274 | 2FA0 , 8FB0 ,# KANGXI RADICAL MORNING 275 | 2FA1 , 8FB5 ,# KANGXI RADICAL WALK 276 | 2FA2 , 9091 ,# KANGXI RADICAL CITY 277 | 2FA3 , 9149 ,# KANGXI RADICAL WINE 278 | 2FA4 , 91C6 ,# KANGXI RADICAL DISTINGUISH 279 | 2FA5 , 91CC ,# KANGXI RADICAL VILLAGE 280 | 2FA6 , 91D1 ,# KANGXI RADICAL GOLD 281 | 2FA7 , 9577 ,# KANGXI RADICAL LONG 282 | 2FA8 , 9580 ,# KANGXI RADICAL GATE 283 | 2FA9 , 961C ,# KANGXI RADICAL MOUND 284 | 2FAA , 96B6 ,# KANGXI RADICAL SLAVE 285 | 2FAB , 96B9 ,# KANGXI RADICAL SHORT TAILED BIRD 286 | 2FAC , 96E8 ,# KANGXI RADICAL RAIN 287 | 2FAD , 9751 ,# KANGXI RADICAL BLUE 288 | 2FAE , 975E ,# KANGXI RADICAL WRONG 289 | 2FAF , 9762 ,# KANGXI RADICAL FACE 290 | 2FB0 , 9769 ,# KANGXI RADICAL LEATHER 291 | 2FB1 , 97CB ,# KANGXI RADICAL TANNED LEATHER 292 | 2FB2 , 97ED ,# KANGXI RADICAL LEEK 293 | 2FB3 , 97F3 ,# KANGXI RADICAL SOUND 294 | 2FB4 , 9801 ,# KANGXI RADICAL LEAF 295 | 2FB5 , 98A8 ,# KANGXI RADICAL WIND 296 | 2FB6 , 98DB ,# KANGXI RADICAL FLY 297 | 2FB7 , 98DF ,# KANGXI RADICAL EAT 298 | 2FB8 , 9996 ,# KANGXI RADICAL HEAD 299 | 2FB9 , 9999 ,# KANGXI RADICAL FRAGRANT 300 | 2FBA , 99AC ,# KANGXI RADICAL HORSE 301 | 2FBB , 9AA8 ,# KANGXI RADICAL BONE 302 | 2FBC , 9AD8 ,# KANGXI RADICAL TALL 303 | 2FBD , 9ADF ,# KANGXI RADICAL HAIR 304 | 2FBE , 9B25 ,# KANGXI RADICAL FIGHT 305 | 2FBF , 9B2F ,# KANGXI RADICAL SACRIFICIAL WINE 306 | 2FC0 , 9B32 ,# KANGXI RADICAL CAULDRON 307 | 2FC1 , 9B3C ,# KANGXI RADICAL GHOST 308 | 2FC2 , 9B5A ,# KANGXI RADICAL FISH 309 | 2FC3 , 9CE5 ,# KANGXI RADICAL BIRD 310 | 2FC4 , 9E75 ,# KANGXI RADICAL SALT 311 | 2FC5 , 9E7F ,# KANGXI RADICAL DEER 312 | 2FC6 , 9EA5 ,# KANGXI RADICAL WHEAT 313 | 2FC7 , 9EBB ,# KANGXI RADICAL HEMP 314 | 2FC8 , 9EC3 ,# KANGXI RADICAL YELLOW 315 | 2FC9 , 9ECD ,# KANGXI RADICAL MILLET 316 | 2FCA , 9ED1 ,# KANGXI RADICAL BLACK 317 | 2FCB , 9EF9 ,# KANGXI RADICAL EMBROIDERY 318 | 2FCC , 9EFD ,# KANGXI RADICAL FROG 319 | 2FCD , 9F0E ,# KANGXI RADICAL TRIPOD 320 | 2FCE , 9F13 ,# KANGXI RADICAL DRUM 321 | 2FCF , 9F20 ,# KANGXI RADICAL RAT 322 | 2FD0 , 9F3B ,# KANGXI RADICAL NOSE 323 | 2FD1 , 9F4A ,# KANGXI RADICAL EVEN 324 | 2FD2 , 9F52 ,# KANGXI RADICAL TOOTH 325 | 2FD3 , 9F8D ,# KANGXI RADICAL DRAGON 326 | 2FD4 , 9F9C ,# KANGXI RADICAL TURTLE 327 | 2FD5 , 9FA0 ,# KANGXI RADICAL FLUTE 328 | -------------------------------------------------------------------------------- /nrm/radicals-sample/main.go: -------------------------------------------------------------------------------- 1 | //go:build run 2 | // +build run 3 | 4 | package main 5 | 6 | import ( 7 | _ "embed" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "strings" 12 | 13 | "github.com/goark/csvdata" 14 | "golang.org/x/text/unicode/norm" 15 | ) 16 | 17 | //go:embed equivalent-unified-ideograph.csv 18 | var kanjiList string 19 | 20 | func readData() (map[rune]rune, error) { 21 | kanjiMap := map[rune]rune{} 22 | cr := csvdata.New(strings.NewReader(kanjiList), true).WithFieldsPerRecord(3) 23 | for { 24 | if err := cr.Next(); err != nil { 25 | if errors.Is(err, io.EOF) { 26 | break 27 | } 28 | return nil, err 29 | } 30 | key, err := cr.ColumnInt64("radicals", 16) 31 | if err != nil { 32 | return nil, err 33 | } 34 | value, err := cr.ColumnInt64("normalize", 16) 35 | if err != nil { 36 | return nil, err 37 | } 38 | kanjiMap[rune(key)] = rune(value) 39 | } 40 | return kanjiMap, nil 41 | } 42 | 43 | func main() { 44 | kanjiMap, err := readData() 45 | if err != nil { 46 | fmt.Println(err) 47 | return 48 | } 49 | fmt.Println("var kangxiRadicals = unicode.SpecialCase{") 50 | for kr := rune(0x2e80); kr <= 0x2fdf; kr++ { 51 | ki, ok := kanjiMap[kr] 52 | if !ok { 53 | kis := []rune(norm.NFKC.String(string([]rune{kr}))) 54 | ki = kis[0] 55 | } 56 | if kr != ki { 57 | fmt.Printf("\tunicode.CaseRange{Lo: %#[1]x, Hi: %#[1]x, Delta: [unicode.MaxCase]rune{%#[2]x - %#[1]x, 0, 0}}, // %#[1]U -> %#[2]U\n", kr, ki) 58 | } 59 | } 60 | fmt.Println("}") 61 | } 62 | 63 | /* Copyright 2021 Spiegel 64 | * 65 | * Licensed under the Apache License, Version 2.0 (the "License"); 66 | * you may not use this file except in compliance with the License. 67 | * You may obtain a copy of the License at 68 | * 69 | * http://www.apache.org/licenses/LICENSE-2.0 70 | * 71 | * Unless required by applicable law or agreed to in writing, software 72 | * distributed under the License is distributed on an "AS IS" BASIS, 73 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 74 | * See the License for the specific language governing permissions and 75 | * limitations under the License. 76 | */ 77 | -------------------------------------------------------------------------------- /rbom/rbom.go: -------------------------------------------------------------------------------- 1 | package rbom 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "strings" 7 | "unicode/utf8" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/ecode" 11 | ) 12 | 13 | var bom = []byte{0xef, 0xbb, 0xbf} 14 | 15 | //RemoveBom removes BOM character in UTF-8 stream 16 | func RemoveBom(r io.Reader) ([]byte, error) { 17 | buf := bytes.Buffer{} 18 | if _, err := buf.ReadFrom(r); err != nil { 19 | return nil, errs.Wrap(err) 20 | } 21 | return RemoveBomBytes(buf.Bytes()) 22 | } 23 | 24 | //RemoveBomBytes removes BOM character in UTF-8 byte string 25 | func RemoveBomBytes(b []byte) ([]byte, error) { 26 | if len(b) == 0 { 27 | return []byte{}, nil 28 | } 29 | if !utf8.Valid(b) { 30 | return nil, errs.Wrap(ecode.ErrInvalidUTF8Text) 31 | } 32 | return bytes.ReplaceAll(b, bom, []byte{}), nil 33 | } 34 | 35 | //RemoveBomString removes BOM character in UTF-8 string 36 | func RemoveBomString(s string) string { 37 | if len(s) == 0 { 38 | return "" 39 | } 40 | return strings.ReplaceAll(s, string(bom), "") 41 | } 42 | 43 | /* Copyright 2020 Spiegel 44 | * 45 | * Licensed under the Apache License, Version 2.0 (the "License"); 46 | * you may not use this file except in compliance with the License. 47 | * You may obtain a copy of the License at 48 | * 49 | * http://www.apache.org/licenses/LICENSE-2.0 50 | * 51 | * Unless required by applicable law or agreed to in writing, software 52 | * distributed under the License is distributed on an "AS IS" BASIS, 53 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 54 | * See the License for the specific language governing permissions and 55 | * limitations under the License. 56 | */ 57 | -------------------------------------------------------------------------------- /rbom/rbom_test.go: -------------------------------------------------------------------------------- 1 | package rbom_test 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/goark/gnkf/rbom" 8 | ) 9 | 10 | func TestRemoveBomNil(t *testing.T) { 11 | testCases := []struct { 12 | inp []byte 13 | outp []byte 14 | }{ 15 | {inp: nil, outp: []byte{}}, 16 | } 17 | for _, tc := range testCases { 18 | if b, err := rbom.RemoveBomBytes(tc.inp); err != nil { 19 | t.Errorf("RemoveBom() = \"%v\", want nil.", err) 20 | } else if !bytes.Equal(b, tc.outp) { 21 | t.Errorf("RemoveBom() = %v, want %v.", b, tc.outp) 22 | } 23 | } 24 | } 25 | 26 | func TestRemoveBom(t *testing.T) { 27 | testCases := []struct { 28 | inp []byte 29 | outp []byte 30 | }{ 31 | {inp: nil, outp: []byte{}}, 32 | {inp: []byte("Hello"), outp: []byte("Hello")}, 33 | {inp: []byte{0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0xef, 0xbb, 0xbf, 0x6f}, outp: []byte("Hello")}, 34 | } 35 | for _, tc := range testCases { 36 | if b, err := rbom.RemoveBom(bytes.NewReader(tc.inp)); err != nil { 37 | t.Errorf("RemoveBom() = \"%v\", want nil.", err) 38 | } else if !bytes.Equal(b, tc.outp) { 39 | t.Errorf("RemoveBom() = %v, want %v.", b, tc.outp) 40 | } 41 | } 42 | } 43 | 44 | func TestRemoveBomString(t *testing.T) { 45 | testCases := []struct { 46 | inp string 47 | outp string 48 | }{ 49 | {inp: "", outp: ""}, 50 | {inp: "Hello", outp: "Hello"}, 51 | {inp: string([]byte{0xef, 0xbb, 0xbf, 0x48, 0x65, 0x6c, 0x6c, 0x6f}), outp: "Hello"}, 52 | } 53 | for _, tc := range testCases { 54 | s := rbom.RemoveBomString(tc.inp) 55 | if s != tc.outp { 56 | t.Errorf("RemoveBom() = %v, want %v.", s, tc.outp) 57 | } 58 | } 59 | } 60 | 61 | /* Copyright 2020 Spiegel 62 | * 63 | * Licensed under the Apache License, Version 2.0 (the "License"); 64 | * you may not use this file except in compliance with the License. 65 | * You may obtain a copy of the License at 66 | * 67 | * http://www.apache.org/licenses/LICENSE-2.0 68 | * 69 | * Unless required by applicable law or agreed to in writing, software 70 | * distributed under the License is distributed on an "AS IS" BASIS, 71 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 72 | * See the License for the specific language governing permissions and 73 | * limitations under the License. 74 | */ 75 | -------------------------------------------------------------------------------- /width/example_test.go: -------------------------------------------------------------------------------- 1 | package width_test 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/goark/gnkf/dump" 9 | "github.com/goark/gnkf/width" 10 | ) 11 | 12 | func ExampleConvertString() { 13 | txt := "12345 コンバンハ、セカイ 67890 コンバンハ、ニッポン" 14 | str, err := width.ConvertString("narrow", txt) 15 | if err != nil { 16 | return 17 | } 18 | if err := dump.UnicodePoint(os.Stdout, bytes.NewBufferString(str)); err != nil { 19 | fmt.Fprintln(os.Stderr, err) 20 | return 21 | } 22 | fmt.Println() 23 | str, err = width.ConvertString("widen", txt) 24 | if err != nil { 25 | fmt.Fprintln(os.Stderr, err) 26 | return 27 | } 28 | if err := dump.UnicodePoint(os.Stdout, bytes.NewBufferString(str)); err != nil { 29 | fmt.Fprintln(os.Stderr, err) 30 | return 31 | } 32 | fmt.Println() 33 | str, err = width.ConvertString("fold", txt) 34 | if err != nil { 35 | fmt.Fprintln(os.Stderr, err) 36 | return 37 | } 38 | if err := dump.UnicodePoint(os.Stdout, bytes.NewBufferString(str)); err != nil { 39 | fmt.Fprintln(os.Stderr, err) 40 | return 41 | } 42 | fmt.Println() 43 | //Output: 44 | //0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0020, 0xff7a, 0xff9d, 0xff8a, 0xff9e, 0xff9d, 0xff8a, 0xff64, 0xff7e, 0xff76, 0xff72, 0x0020, 0x0036, 0x0037, 0x0038, 0x0039, 0x0030, 0x0020, 0xff7a, 0xff9d, 0xff8a, 0xff9e, 0xff9d, 0xff8a, 0xff64, 0xff86, 0xff6f, 0xff8e, 0xff9f, 0xff9d 45 | //0xff11, 0xff12, 0xff13, 0xff14, 0xff15, 0x3000, 0x30b3, 0x30f3, 0x30d0, 0x30f3, 0x30cf, 0x3001, 0x30bb, 0x30ab, 0x30a4, 0x3000, 0xff16, 0xff17, 0xff18, 0xff19, 0xff10, 0x3000, 0x30b3, 0x30f3, 0x30d0, 0x30f3, 0x30cf, 0x3001, 0x30cb, 0x30c3, 0x30dd, 0x30f3 46 | //0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0020, 0x30b3, 0x30f3, 0x30d0, 0x30f3, 0x30cf, 0x3001, 0x30bb, 0x30ab, 0x30a4, 0x0020, 0x0036, 0x0037, 0x0038, 0x0039, 0x0030, 0x0020, 0x30b3, 0x30f3, 0x30d0, 0x30f3, 0x30cf, 0x3001, 0x30cb, 0x30c3, 0x30dd, 0x30f3 47 | } 48 | 49 | /* Copyright 2020 Spiegel 50 | * 51 | * Licensed under the Apache License, Version 2.0 (the "License"); 52 | * you may not use this file except in compliance with the License. 53 | * You may obtain a copy of the License at 54 | * 55 | * http://www.apache.org/licenses/LICENSE-2.0 56 | * 57 | * Unless required by applicable law or agreed to in writing, software 58 | * distributed under the License is distributed on an "AS IS" BASIS, 59 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 60 | * See the License for the specific language governing permissions and 61 | * limitations under the License. 62 | */ 63 | -------------------------------------------------------------------------------- /width/form.go: -------------------------------------------------------------------------------- 1 | package width 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/goark/errs" 7 | "github.com/goark/gnkf/ecode" 8 | wdth "golang.org/x/text/width" 9 | ) 10 | 11 | var formNamesMap = map[string]wdth.Transformer{ 12 | "fold": wdth.Fold, 13 | "narrow": wdth.Narrow, 14 | "widen": wdth.Widen, 15 | } 16 | 17 | func formName(f wdth.Transformer) string { 18 | for key, value := range formNamesMap { 19 | if value == f { 20 | return key 21 | } 22 | } 23 | return "" 24 | } 25 | 26 | //FormList returns list of width form 27 | func FormList() []string { 28 | return []string{ 29 | formName(wdth.Fold), 30 | formName(wdth.Narrow), 31 | formName(wdth.Widen), 32 | } 33 | } 34 | 35 | //FormOf returns Unicode normalization form type from name string 36 | func FormOf(name string) (wdth.Transformer, error) { 37 | if f, ok := formNamesMap[strings.ToLower(name)]; ok { 38 | return f, nil 39 | } 40 | return wdth.Fold, errs.Wrap(ecode.ErrInvalidWidthForm, errs.WithContext("name", name)) 41 | } 42 | 43 | /* Copyright 2020 Spiegel 44 | * 45 | * Licensed under the Apache License, Version 2.0 (the "License"); 46 | * you may not use this file except in compliance with the License. 47 | * You may obtain a copy of the License at 48 | * 49 | * http://www.apache.org/licenses/LICENSE-2.0 50 | * 51 | * Unless required by applicable law or agreed to in writing, software 52 | * distributed under the License is distributed on an "AS IS" BASIS, 53 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 54 | * See the License for the specific language governing permissions and 55 | * limitations under the License. 56 | */ 57 | -------------------------------------------------------------------------------- /width/width.go: -------------------------------------------------------------------------------- 1 | package width 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "strings" 7 | 8 | "github.com/goark/errs" 9 | "github.com/goark/kkconv/fold" 10 | wdth "golang.org/x/text/width" 11 | ) 12 | 13 | //Convert function converts character width in text stream. 14 | func Convert(formName string, writer io.Writer, txt io.Reader) error { 15 | buf := &bytes.Buffer{} 16 | if _, err := buf.ReadFrom(txt); err != nil { 17 | return errs.Wrap(err) 18 | } 19 | str, err := ConvertString(formName, buf.String()) 20 | if err != nil { 21 | return errs.Wrap(err) 22 | } 23 | if _, err := strings.NewReader(str).WriteTo(writer); err != nil { 24 | return errs.Wrap(err) 25 | } 26 | return nil 27 | } 28 | 29 | //ConvertString function converts character width in text string. 30 | func ConvertString(formName, txt string) (string, error) { 31 | f, err := FormOf(formName) 32 | if err != nil { 33 | return "", errs.Wrap(err, errs.WithContext("formName", formName)) 34 | } 35 | switch f { 36 | case wdth.Fold: 37 | return fold.Convert(txt), nil 38 | case wdth.Widen: 39 | return fold.ConvertWiden(txt), nil 40 | case wdth.Narrow: 41 | return fold.ConvertNarrow(txt), nil 42 | } 43 | return txt, nil 44 | } 45 | 46 | /* Copyright 2020-2021 Spiegel 47 | * 48 | * Licensed under the Apache License, Version 2.0 (the "License"); 49 | * you may not use this file except in compliance with the License. 50 | * You may obtain a copy of the License at 51 | * 52 | * http://www.apache.org/licenses/LICENSE-2.0 53 | * 54 | * Unless required by applicable law or agreed to in writing, software 55 | * distributed under the License is distributed on an "AS IS" BASIS, 56 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 57 | * See the License for the specific language governing permissions and 58 | * limitations under the License. 59 | */ 60 | -------------------------------------------------------------------------------- /width/width_test.go: -------------------------------------------------------------------------------- 1 | package width 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/goark/errs" 10 | "github.com/goark/gnkf/dump" 11 | "github.com/goark/gnkf/ecode" 12 | ) 13 | 14 | func TestFormList(t *testing.T) { 15 | res := "fold|narrow|widen" 16 | str := strings.Join(FormList(), "|") 17 | if str != res { 18 | t.Errorf("FormList() = \"%+v\", want \"%+v\".", str, res) 19 | } 20 | } 21 | 22 | func TestTranslate(t *testing.T) { 23 | testCases := []struct { 24 | inp, out []byte 25 | formName string 26 | err error 27 | }{ 28 | { 29 | inp: []byte("ペンギン 12345 ヸヹヺ"), 30 | out: []byte("ペンギン 12345 イ゙エ゙ヺ"), 31 | formName: "narrow", 32 | err: nil, 33 | }, 34 | { 35 | inp: []byte("ペンギン 12345 ヸヹヺ"), 36 | out: []byte("ペンギン 12345 ヸヹヺ"), 37 | formName: "widen", 38 | err: nil, 39 | }, 40 | { 41 | inp: []byte("ペンギン 12345 ヸヹヺ"), 42 | out: []byte("ペンギン 12345 ヸヹヺ"), 43 | formName: "fold", 44 | err: nil, 45 | }, 46 | { 47 | inp: []byte("ペンギン 12345"), 48 | out: []byte{}, 49 | formName: "foo", 50 | err: ecode.ErrInvalidWidthForm, 51 | }, 52 | { 53 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 54 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 55 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})), 56 | formName: "widen", 57 | err: nil, 58 | }, 59 | { 60 | //"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺヽヾヿㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン" + string([]rune{0x1b164, 0x1b165, 0x1b166, 0x1b167})) 61 | inp: []byte(string([]rune{0x30a1, 0x30a2, 0x30a3, 0x30a4, 0x30a5, 0x30a6, 0x30a7, 0x30a8, 0x30a9, 0x30aa, 0x30ab, 0x30ac, 0x30ad, 0x30ae, 0x30af, 0x30b0, 0x30b1, 0x30b2, 0x30b3, 0x30b4, 0x30b5, 0x30b6, 0x30b7, 0x30b8, 0x30b9, 0x30ba, 0x30bb, 0x30bc, 0x30bd, 0x30be, 0x30bf, 0x30c0, 0x30c1, 0x30c2, 0x30c3, 0x30c4, 0x30c5, 0x30c6, 0x30c7, 0x30c8, 0x30c9, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d0, 0x30d1, 0x30d2, 0x30d3, 0x30d4, 0x30d5, 0x30d6, 0x30d7, 0x30d8, 0x30d9, 0x30da, 0x30db, 0x30dc, 0x30dd, 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e3, 0x30e4, 0x30e5, 0x30e6, 0x30e7, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ee, 0x30ef, 0x30f0, 0x30f1, 0x30f2, 0x30f3, 0x30f4, 0x30f5, 0x30f6, 0x30f7, 0x30f8, 0x30f9, 0x30fa, 0x30fd, 0x30fe, 0x30ff, 0x31f0, 0x31f1, 0x31f2, 0x31f3, 0x31f4, 0x31f5, 0x31f6, 0x31f7, 0x31f8, 0x31f9, 0x31fa, 0x31fb, 0x31fc, 0x31fd, 0x31fe, 0x31ff, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f, 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f, 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e, 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d, 0x1b164, 0x1b165, 0x1b166, 0x1b167})), 62 | out: []byte("ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロワワイエヲンヴカケヷイ゙エ゙ヺヽヾヿクシストヌハヒフヘホムラリルレロヲァィゥェォャュョッアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワンイエヲン"), 63 | formName: "narrow", 64 | err: nil, 65 | }, 66 | } 67 | for _, tc := range testCases { 68 | buf := &bytes.Buffer{} 69 | if err := Convert(tc.formName, buf, bytes.NewReader(tc.inp)); err != nil { 70 | if !errs.Is(err, tc.err) { 71 | t.Errorf("Translate() error = \"%+v\", want \"%+v\".", err, tc.err) 72 | } 73 | } else if !bytes.Equal(buf.Bytes(), tc.out) { 74 | fmt.Println(buf.String()) 75 | t.Errorf("Translate(%s) -> %s, want %s", tc.formName, dump.OctetString(bytes.NewReader(tc.inp)), dump.OctetString(buf)) 76 | } 77 | } 78 | } 79 | 80 | /* Copyright 2020 Spiegel 81 | * 82 | * Licensed under the Apache License, Version 2.0 (the "License"); 83 | * you may not use this file except in compliance with the License. 84 | * You may obtain a copy of the License at 85 | * 86 | * http://www.apache.org/licenses/LICENSE-2.0 87 | * 88 | * Unless required by applicable law or agreed to in writing, software 89 | * distributed under the License is distributed on an "AS IS" BASIS, 90 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 91 | * See the License for the specific language governing permissions and 92 | * limitations under the License. 93 | */ 94 | --------------------------------------------------------------------------------