├── .github └── workflows │ └── ci.yml ├── CHANGELOG.md ├── LICENSE-APACHE-2.0.txt ├── LICENSE.txt ├── README.md ├── convert.go ├── convert_canonical_test.go ├── convert_compat_test.go ├── go.mod ├── go.sum ├── nkf ├── convert.go ├── convert_test.go ├── optparse.go └── optparse_test.go ├── options.go ├── options_test.go └── stream.go /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | go-version: ['1.13', '1.23.x'] 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Setup Go ${{ matrix.go-version }} 16 | uses: actions/setup-go@v5 17 | with: 18 | go-version: ${{ matrix.go-version }} 19 | - name: Install nkf for comparison 20 | run: sudo apt-get install nkf 21 | - name: Test 22 | run: go test ./... 23 | - name: Check formatting 24 | run: | 25 | go fmt ./... 26 | git diff --exit-code 27 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v0.1.0 2 | 3 | Initial release. 4 | -------------------------------------------------------------------------------- /LICENSE-APACHE-2.0.txt: -------------------------------------------------------------------------------- 1 | Copyright 2024 Masaki Hara 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2024 Masaki Hara 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Kana 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/wantedly/kana-go.svg)](https://pkg.go.dev/github.com/wantedly/kana-go) 4 | 5 | Module kana provides transformation between: 6 | 7 | - Fullwidth and halfwidth characters 8 | - Katakana and hiragana 9 | 10 | It also provides NKF-compatible wrapper. 11 | 12 | ## Example 13 | 14 | ```go 15 | package main 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/wantedly/kana-go" 21 | ) 22 | 23 | func main() { 24 | str := kana.Convert("ABC DEF", kana.FullwidthToNarrow) 25 | fmt.Println(str) // Output: ABC DEF 26 | } 27 | ``` 28 | 29 | ## NKF-compatible example 30 | 31 | ```go 32 | package main 33 | 34 | import ( 35 | "fmt" 36 | 37 | "github.com/wantedly/kana-go/nkf" 38 | ) 39 | 40 | func main() { 41 | str, err := nkf.Convert("ABC DEF", "-w -W -m0 -Z1") 42 | if err != nil { 43 | panic(err) 44 | } 45 | fmt.Println(str) // Output: ABC DEF 46 | } 47 | ``` 48 | -------------------------------------------------------------------------------- /convert.go: -------------------------------------------------------------------------------- 1 | // Package kana provides transformation between: 2 | // 3 | // - Fullwidth and halfwidth characters 4 | // - Katakana and hiragana 5 | // 6 | // # Example 7 | // 8 | // package main 9 | // 10 | // import ( 11 | // "fmt" 12 | // 13 | // "github.com/wantedly/kana-go" 14 | // ) 15 | // 16 | // func main() { 17 | // str := kana.Convert("ABC DEF", kana.FullwidthToNarrow) 18 | // fmt.Println(str) // Output: ABC DEF 19 | // } 20 | package kana 21 | 22 | // Convert converts a string with the given options. 23 | func Convert(input string, opts ConvertOptions) string { 24 | opts = opts.Normalize() 25 | strm := stringStream(input) 26 | 27 | strm = convertUnconditionalCompat(strm, opts) 28 | // Full <-> Half conversion 29 | strm = doWidthNormalization(strm, opts) 30 | strm = doKanaConversion(strm, opts) 31 | 32 | return strm.readAll() 33 | } 34 | 35 | func convertUnconditionalCompat(strm *stream, opts ConvertOptions) *stream { 36 | if opts&(CompatMinus|CompatOverline|CompatCurrency|CompatOtherSymbols) == 0 { 37 | return strm 38 | } 39 | return mapStream(strm, func(ch rune) rune { 40 | if opts&CompatMinus != 0 { 41 | switch ch { 42 | case '\u2015': 43 | return '\u2014' 44 | case '\uFF0D': 45 | return '\u2212' 46 | } 47 | } 48 | if opts&CompatOverline != 0 { 49 | switch ch { 50 | case '\uFFE3': 51 | return '\u203E' 52 | } 53 | } 54 | if opts&CompatCurrency != 0 { 55 | switch ch { 56 | case '\uFFE0': 57 | return '\u00A2' 58 | case '\uFFE1': 59 | return '\u00A3' 60 | case '\uFFE5': 61 | return '\u00A5' 62 | } 63 | } 64 | if opts&CompatOtherSymbols != 0 { 65 | switch ch { 66 | case '\u2225': 67 | return '\u2016' 68 | case '\uFFE2': 69 | return '\u00AC' 70 | case '\uFFE4': 71 | return '\u00A6' 72 | } 73 | } 74 | return ch 75 | }) 76 | } 77 | 78 | func doWidthNormalization(strm *stream, opts ConvertOptions) *stream { 79 | if opts&(FullwidthToNarrow|CompatWideKatakanaToHalfwidth|HalfwidthToWide) == 0 { 80 | return strm 81 | } 82 | return newStream(func(buf *[]rune) { 83 | ch, ok := strm.readOne() 84 | if !ok { 85 | return 86 | } 87 | 88 | if ok := convertFullwidthToNarrow(ch, buf, opts); ok { 89 | // Do nothing 90 | } else if ok := convertWideKatakanaToHalfwidth(ch, buf, opts); ok { 91 | // Do nothing 92 | } else if ok := convertHalfwidthToWide(ch, strm, buf, opts); ok { 93 | // Do nothing 94 | } else { 95 | *buf = append(*buf, ch) 96 | } 97 | }) 98 | } 99 | 100 | func convertFullwidthToNarrow(ch rune, buf *[]rune, opts ConvertOptions) bool { 101 | if opts&FullwidthToNarrow == 0 { 102 | return false 103 | } 104 | if opts&CompatQuotes != 0 { 105 | switch ch { 106 | case '\u00B4', '\u2019': 107 | *buf = append(*buf, '\'') 108 | return true 109 | case '\u2018': 110 | *buf = append(*buf, '`') 111 | return true 112 | case '\u201C', '\u201D': 113 | *buf = append(*buf, '"') 114 | return true 115 | case '\uFF02', '\uFF07': 116 | return false 117 | } 118 | } 119 | if opts&CompatMinus != 0 { 120 | switch ch { 121 | case '\u2014', '\u2212': 122 | // '\u2015' also falls here because it is converted to '\u2014' in convertUnconditionalCompat 123 | // '\uFF0D' also falls here because it is converted to '\uFF0D' in convertUnconditionalCompat 124 | *buf = append(*buf, '-') 125 | return true 126 | } 127 | } 128 | if opts&CompatOverline != 0 { 129 | switch ch { 130 | case '\uFF5E': 131 | return false 132 | } 133 | } 134 | if opts&CompatCurrency != 0 { 135 | switch ch { 136 | case '\uFFE6': 137 | return false 138 | } 139 | } 140 | if opts&CompatBrackets != 0 { 141 | switch ch { 142 | case '\u3008': 143 | *buf = append(*buf, '<') 144 | return true 145 | case '\u3009': 146 | *buf = append(*buf, '>') 147 | return true 148 | case '\uFF5F', '\uFF60': 149 | return false 150 | } 151 | } 152 | if opts&CompatKeepSpaces != 0 && ch == '\u3000' { 153 | return false 154 | } else if opts&CompatDoubleSpaces != 0 && ch == '\u3000' { 155 | *buf = append(*buf, ' ', ' ') 156 | return true 157 | } 158 | if ch >= '\uFF01' && ch <= '\uFF5E' { 159 | *buf = append(*buf, ch-'\uFF00'+' ') 160 | return true 161 | } else if ch == '\u3000' { 162 | *buf = append(*buf, ' ') 163 | return true 164 | } else if mapped, ok := fullwidthMap[ch]; ok { 165 | *buf = append(*buf, mapped) 166 | return true 167 | } 168 | return false 169 | } 170 | 171 | var fullwidthMap = map[rune]rune{ 172 | '\uFF5F': '\u2985', 173 | '\uFF60': '\u2986', 174 | '\uFFE0': '\u00A2', 175 | '\uFFE1': '\u00A3', 176 | '\uFFE2': '\u00AC', 177 | // NOTE: this is different from how it normalizes to in NFKC, which is the sequence U+0020 U+0304 178 | '\uFFE3': '\u00AF', 179 | '\uFFE4': '\u00A6', 180 | '\uFFE5': '\u00A5', 181 | '\uFFE6': '\u20A9', 182 | } 183 | 184 | func convertWideKatakanaToHalfwidth(ch rune, buf *[]rune, opts ConvertOptions) bool { 185 | if opts&CompatWideKatakanaToHalfwidth == 0 { 186 | return false 187 | } 188 | if mapped, ok := fullwidthKatakanaTable[ch]; ok { 189 | for _, mappedCh := range mapped { 190 | *buf = append(*buf, mappedCh) 191 | } 192 | return true 193 | } 194 | return false 195 | } 196 | 197 | var fullwidthKatakanaTable = map[rune]string{ 198 | '\u3001': "\uFF64", 199 | '\u3002': "\uFF61", 200 | '\u300C': "\uFF62", 201 | '\u300D': "\uFF63", 202 | '\u3099': "\uFF9E", 203 | '\u309A': "\uFF9F", 204 | '\u309B': "\uFF9E", 205 | '\u309C': "\uFF9F", 206 | '\u30A1': "\uFF67", 207 | '\u30A2': "\uFF71", 208 | '\u30A3': "\uFF68", 209 | '\u30A4': "\uFF72", 210 | '\u30A5': "\uFF69", 211 | '\u30A6': "\uFF73", 212 | '\u30A7': "\uFF6A", 213 | '\u30A8': "\uFF74", 214 | '\u30A9': "\uFF6B", 215 | '\u30AA': "\uFF75", 216 | '\u30AB': "\uFF76", 217 | '\u30AC': "\uFF76\uFF9E", 218 | '\u30AD': "\uFF77", 219 | '\u30AE': "\uFF77\uFF9E", 220 | '\u30AF': "\uFF78", 221 | '\u30B0': "\uFF78\uFF9E", 222 | '\u30B1': "\uFF79", 223 | '\u30B2': "\uFF79\uFF9E", 224 | '\u30B3': "\uFF7A", 225 | '\u30B4': "\uFF7A\uFF9E", 226 | '\u30B5': "\uFF7B", 227 | '\u30B6': "\uFF7B\uFF9E", 228 | '\u30B7': "\uFF7C", 229 | '\u30B8': "\uFF7C\uFF9E", 230 | '\u30B9': "\uFF7D", 231 | '\u30BA': "\uFF7D\uFF9E", 232 | '\u30BB': "\uFF7E", 233 | '\u30BC': "\uFF7E\uFF9E", 234 | '\u30BD': "\uFF7F", 235 | '\u30BE': "\uFF7F\uFF9E", 236 | '\u30BF': "\uFF80", 237 | '\u30C0': "\uFF80\uFF9E", 238 | '\u30C1': "\uFF81", 239 | '\u30C2': "\uFF81\uFF9E", 240 | '\u30C3': "\uFF6F", 241 | '\u30C4': "\uFF82", 242 | '\u30C5': "\uFF82\uFF9E", 243 | '\u30C6': "\uFF83", 244 | '\u30C7': "\uFF83\uFF9E", 245 | '\u30C8': "\uFF84", 246 | '\u30C9': "\uFF84\uFF9E", 247 | '\u30CA': "\uFF85", 248 | '\u30CB': "\uFF86", 249 | '\u30CC': "\uFF87", 250 | '\u30CD': "\uFF88", 251 | '\u30CE': "\uFF89", 252 | '\u30CF': "\uFF8A", 253 | '\u30D0': "\uFF8A\uFF9E", 254 | '\u30D1': "\uFF8A\uFF9F", 255 | '\u30D2': "\uFF8B", 256 | '\u30D3': "\uFF8B\uFF9E", 257 | '\u30D4': "\uFF8B\uFF9F", 258 | '\u30D5': "\uFF8C", 259 | '\u30D6': "\uFF8C\uFF9E", 260 | '\u30D7': "\uFF8C\uFF9F", 261 | '\u30D8': "\uFF8D", 262 | '\u30D9': "\uFF8D\uFF9E", 263 | '\u30DA': "\uFF8D\uFF9F", 264 | '\u30DB': "\uFF8E", 265 | '\u30DC': "\uFF8E\uFF9E", 266 | '\u30DD': "\uFF8E\uFF9F", 267 | '\u30DE': "\uFF8F", 268 | '\u30DF': "\uFF90", 269 | '\u30E0': "\uFF91", 270 | '\u30E1': "\uFF92", 271 | '\u30E2': "\uFF93", 272 | '\u30E3': "\uFF6C", 273 | '\u30E4': "\uFF94", 274 | '\u30E5': "\uFF6D", 275 | '\u30E6': "\uFF95", 276 | '\u30E7': "\uFF6E", 277 | '\u30E8': "\uFF96", 278 | '\u30E9': "\uFF97", 279 | '\u30EA': "\uFF98", 280 | '\u30EB': "\uFF99", 281 | '\u30EC': "\uFF9A", 282 | '\u30ED': "\uFF9B", 283 | '\u30EF': "\uFF9C", 284 | '\u30F2': "\uFF66", 285 | '\u30F3': "\uFF9D", 286 | '\u30F4': "\uFF73\uFF9E", 287 | '\u30FB': "\uFF65", 288 | '\u30FC': "\uFF70", 289 | } 290 | 291 | func convertHalfwidthToWide(ch rune, strm *stream, buf *[]rune, opts ConvertOptions) bool { 292 | if opts&HalfwidthToWide == 0 { 293 | return false 294 | } 295 | if opts&CompatVoicedSoundMarks != 0 { 296 | // Use a non-combining version 297 | switch ch { 298 | case '\uFF9E': 299 | *buf = append(*buf, '\u309B') 300 | return true 301 | case '\uFF9F': 302 | *buf = append(*buf, '\u309C') 303 | return true 304 | } 305 | } 306 | if opts&CompatKeepHalfwidthHangul != 0 && ('\uFFA0' <= ch && ch <= '\uFFDC') { 307 | return false 308 | } 309 | if opts&CompatKeepHalfwidthSymbols != 0 && ('\uFFE0' <= ch && ch <= '\uFFEF') { 310 | return false 311 | } 312 | if ch >= '\uFF61' && ch <= '\uFFEF' { 313 | mapped, ok := halfwidthMap[ch] 314 | if ok { 315 | nextCh, _ := strm.peekOne() 316 | if nextCh == '\uFF9E' { 317 | if opts&CompatVoicedKanaRestriction != 0 && (ch == '\uFF66' || ch == '\uFF9C') { 318 | *buf = append(*buf, mapped) 319 | return true 320 | } 321 | if voiced, ok := halfwidthVoicedKatakanaTable[ch]; ok { 322 | strm.consume(1) 323 | *buf = append(*buf, voiced) 324 | return true 325 | } 326 | } else if nextCh == '\uFF9F' { 327 | if semiVoiced, ok := halfwidthSemiVoicedKatakanaTable[ch]; ok { 328 | strm.consume(1) 329 | *buf = append(*buf, semiVoiced) 330 | return true 331 | } 332 | } 333 | 334 | *buf = append(*buf, mapped) 335 | return true 336 | } 337 | } 338 | return false 339 | } 340 | 341 | var halfwidthMap = map[rune]rune{ 342 | // Katakana and relevant punctuations 343 | '\uFF61': '\u3002', 344 | '\uFF62': '\u300C', 345 | '\uFF63': '\u300D', 346 | '\uFF64': '\u3001', 347 | '\uFF65': '\u30FB', 348 | '\uFF66': '\u30F2', 349 | '\uFF67': '\u30A1', 350 | '\uFF68': '\u30A3', 351 | '\uFF69': '\u30A5', 352 | '\uFF6A': '\u30A7', 353 | '\uFF6B': '\u30A9', 354 | '\uFF6C': '\u30E3', 355 | '\uFF6D': '\u30E5', 356 | '\uFF6E': '\u30E7', 357 | '\uFF6F': '\u30C3', 358 | '\uFF70': '\u30FC', 359 | '\uFF71': '\u30A2', 360 | '\uFF72': '\u30A4', 361 | '\uFF73': '\u30A6', 362 | '\uFF74': '\u30A8', 363 | '\uFF75': '\u30AA', 364 | '\uFF76': '\u30AB', 365 | '\uFF77': '\u30AD', 366 | '\uFF78': '\u30AF', 367 | '\uFF79': '\u30B1', 368 | '\uFF7A': '\u30B3', 369 | '\uFF7B': '\u30B5', 370 | '\uFF7C': '\u30B7', 371 | '\uFF7D': '\u30B9', 372 | '\uFF7E': '\u30BB', 373 | '\uFF7F': '\u30BD', 374 | '\uFF80': '\u30BF', 375 | '\uFF81': '\u30C1', 376 | '\uFF82': '\u30C4', 377 | '\uFF83': '\u30C6', 378 | '\uFF84': '\u30C8', 379 | '\uFF85': '\u30CA', 380 | '\uFF86': '\u30CB', 381 | '\uFF87': '\u30CC', 382 | '\uFF88': '\u30CD', 383 | '\uFF89': '\u30CE', 384 | '\uFF8A': '\u30CF', 385 | '\uFF8B': '\u30D2', 386 | '\uFF8C': '\u30D5', 387 | '\uFF8D': '\u30D8', 388 | '\uFF8E': '\u30DB', 389 | '\uFF8F': '\u30DE', 390 | '\uFF90': '\u30DF', 391 | '\uFF91': '\u30E0', 392 | '\uFF92': '\u30E1', 393 | '\uFF93': '\u30E2', 394 | '\uFF94': '\u30E4', 395 | '\uFF95': '\u30E6', 396 | '\uFF96': '\u30E8', 397 | '\uFF97': '\u30E9', 398 | '\uFF98': '\u30EA', 399 | '\uFF99': '\u30EB', 400 | '\uFF9A': '\u30EC', 401 | '\uFF9B': '\u30ED', 402 | '\uFF9C': '\u30EF', 403 | '\uFF9D': '\u30F3', 404 | // Use combining version for the voiced and semi-voiced marks, 405 | // although the halfwidth forms are non-combining. 406 | // It aligns with NFKC behavior and is justified by how 407 | // one would expect the mark to behave. 408 | // In NKF compat mode, the non-combining version is used, 409 | // but it tries to use the precomposed form, if it is available. 410 | '\uFF9E': '\u3099', 411 | '\uFF9F': '\u309A', 412 | // These are halfwidth versions of Hangul **Compatibility** Jamos 413 | // rather than the Unicode proper Hangul Jamos. 414 | // They lack distinction between L (leading consonant; choseong) 415 | // and T (trailing consonant; jungseong) and therefore it is difficult to 416 | // determine the syllable boundary 417 | // just as Unicode does for the proper Hangul Jamos. 418 | // They are merely for round-trip compatibility with legacy encodings. 419 | // To align with how Unicode handles these characters, we do not try 420 | // to determine the consonant type or compose them into a precomposed syllable. 421 | '\uFFA0': '\u3164', 422 | '\uFFA1': '\u3131', 423 | '\uFFA2': '\u3132', 424 | '\uFFA3': '\u3133', 425 | '\uFFA4': '\u3134', 426 | '\uFFA5': '\u3135', 427 | '\uFFA6': '\u3136', 428 | '\uFFA7': '\u3137', 429 | '\uFFA8': '\u3138', 430 | '\uFFA9': '\u3139', 431 | '\uFFAA': '\u313A', 432 | '\uFFAB': '\u313B', 433 | '\uFFAC': '\u313C', 434 | '\uFFAD': '\u313D', 435 | '\uFFAE': '\u313E', 436 | '\uFFAF': '\u313F', 437 | '\uFFB0': '\u3140', 438 | '\uFFB1': '\u3141', 439 | '\uFFB2': '\u3142', 440 | '\uFFB3': '\u3143', 441 | '\uFFB4': '\u3144', 442 | '\uFFB5': '\u3145', 443 | '\uFFB6': '\u3146', 444 | '\uFFB7': '\u3147', 445 | '\uFFB8': '\u3148', 446 | '\uFFB9': '\u3149', 447 | '\uFFBA': '\u314A', 448 | '\uFFBB': '\u314B', 449 | '\uFFBC': '\u314C', 450 | '\uFFBD': '\u314D', 451 | '\uFFBE': '\u314E', 452 | '\uFFC2': '\u314F', 453 | '\uFFC3': '\u3150', 454 | '\uFFC4': '\u3151', 455 | '\uFFC5': '\u3152', 456 | '\uFFC6': '\u3153', 457 | '\uFFC7': '\u3154', 458 | '\uFFCA': '\u3155', 459 | '\uFFCB': '\u3156', 460 | '\uFFCC': '\u3157', 461 | '\uFFCD': '\u3158', 462 | '\uFFCE': '\u3159', 463 | '\uFFCF': '\u315A', 464 | '\uFFD2': '\u315B', 465 | '\uFFD3': '\u315C', 466 | '\uFFD4': '\u315D', 467 | '\uFFD5': '\u315E', 468 | '\uFFD6': '\u315F', 469 | '\uFFD7': '\u3160', 470 | '\uFFDA': '\u3161', 471 | '\uFFDB': '\u3162', 472 | '\uFFDC': '\u3163', 473 | // Halfwidth forms of symbols 474 | // U+FFE8 is HALFWIDTH FORMS LIGHT VERTICAL while U+2502 is BOX DRAWINGS LIGHT VERTICAL 475 | // I guess, uh, this is probably what it is meant to be mapped to? 476 | '\uFFE8': '\u2502', 477 | '\uFFE9': '\u2190', 478 | '\uFFEA': '\u2191', 479 | '\uFFEB': '\u2192', 480 | '\uFFEC': '\u2193', 481 | '\uFFED': '\u25A0', 482 | '\uFFEE': '\u25CB', 483 | } 484 | 485 | var halfwidthVoicedKatakanaTable = map[rune]rune{ 486 | '\uFF66': '\u30FA', 487 | '\uFF73': '\u30F4', 488 | '\uFF76': '\u30AC', 489 | '\uFF77': '\u30AE', 490 | '\uFF78': '\u30B0', 491 | '\uFF79': '\u30B2', 492 | '\uFF7A': '\u30B4', 493 | '\uFF7B': '\u30B6', 494 | '\uFF7C': '\u30B8', 495 | '\uFF7D': '\u30BA', 496 | '\uFF7E': '\u30BC', 497 | '\uFF7F': '\u30BE', 498 | '\uFF80': '\u30C0', 499 | '\uFF81': '\u30C2', 500 | '\uFF82': '\u30C5', 501 | '\uFF83': '\u30C7', 502 | '\uFF84': '\u30C9', 503 | '\uFF8A': '\u30D0', 504 | '\uFF8B': '\u30D3', 505 | '\uFF8C': '\u30D6', 506 | '\uFF8D': '\u30D9', 507 | '\uFF8E': '\u30DC', 508 | '\uFF9C': '\u30F7', 509 | } 510 | 511 | var halfwidthSemiVoicedKatakanaTable = map[rune]rune{ 512 | '\uFF8A': '\u30D1', 513 | '\uFF8B': '\u30D4', 514 | '\uFF8C': '\u30D7', 515 | '\uFF8D': '\u30DA', 516 | '\uFF8E': '\u30DD', 517 | } 518 | 519 | func doKanaConversion(strm *stream, opts ConvertOptions) *stream { 520 | if opts&(KatakanaToHiragana|HiraganaToKatakana) == 0 { 521 | return strm 522 | } 523 | return newStream(func(buf *[]rune) { 524 | ch, ok := strm.readOne() 525 | if !ok { 526 | return 527 | } 528 | 529 | if ok := convertKatakanaToHiragana(ch, buf, opts); ok { 530 | // Do nothing 531 | } else if ok := convertHiraganaToKatakana(ch, buf, opts); ok { 532 | // Do nothing 533 | } else { 534 | *buf = append(*buf, ch) 535 | } 536 | }) 537 | } 538 | 539 | func convertKatakanaToHiragana(ch rune, buf *[]rune, opts ConvertOptions) bool { 540 | if opts&KatakanaToHiragana == 0 { 541 | return false 542 | } 543 | if opts&CompatKanaRestriction != 0 && !(ch >= '\u30A1' && ch <= '\u30F4' || ch >= '\u30FD' && ch <= '\u30FE') { 544 | return false 545 | } 546 | 547 | if ch >= '\u30A1' && ch <= '\u30F4' || ch >= '\u30F5' && ch <= '\u30F6' || ch >= '\u30FD' && ch <= '\u30FE' { 548 | *buf = append(*buf, ch-'\u30A0'+'\u3040') 549 | return true 550 | } 551 | switch ch { 552 | case '\u30F7': 553 | *buf = append(*buf, '\u308F', '\u3099') 554 | return true 555 | case '\u30F8': 556 | *buf = append(*buf, '\u3090', '\u3099') 557 | return true 558 | case '\u30F9': 559 | *buf = append(*buf, '\u3091', '\u3099') 560 | return true 561 | case '\u30FA': 562 | *buf = append(*buf, '\u3092', '\u3099') 563 | return true 564 | case '\U0001B155': 565 | *buf = append(*buf, '\U0001B132') 566 | return true 567 | case '\U0001B164': 568 | *buf = append(*buf, '\U0001B150') 569 | return true 570 | case '\U0001B165': 571 | *buf = append(*buf, '\U0001B151') 572 | return true 573 | case '\U0001B166': 574 | *buf = append(*buf, '\U0001B152') 575 | return true 576 | } 577 | return false 578 | } 579 | 580 | func convertHiraganaToKatakana(ch rune, buf *[]rune, opts ConvertOptions) bool { 581 | if opts&HiraganaToKatakana == 0 { 582 | return false 583 | } 584 | if opts&CompatKanaRestriction != 0 && !(ch >= '\u3041' && ch <= '\u3094' || ch >= '\u309D' && ch <= '\u309E') { 585 | return false 586 | } 587 | 588 | if ch >= '\u3041' && ch <= '\u3094' || ch >= '\u3095' && ch <= '\u3096' || ch >= '\u309D' && ch <= '\u309E' { 589 | *buf = append(*buf, ch-'\u3040'+'\u30A0') 590 | return true 591 | } 592 | switch ch { 593 | case '\U0001B132': 594 | *buf = append(*buf, '\U0001B155') 595 | return true 596 | case '\U0001B150': 597 | *buf = append(*buf, '\U0001B164') 598 | return true 599 | case '\U0001B151': 600 | *buf = append(*buf, '\U0001B165') 601 | return true 602 | case '\U0001B152': 603 | *buf = append(*buf, '\U0001B166') 604 | return true 605 | } 606 | return false 607 | } 608 | -------------------------------------------------------------------------------- /convert_canonical_test.go: -------------------------------------------------------------------------------- 1 | package kana_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | "github.com/wantedly/kana-go" 8 | ) 9 | 10 | func TestCanonicalConvert(t *testing.T) { 11 | var testcases = []struct { 12 | name string 13 | input string 14 | options kana.ConvertOptions 15 | expect string 16 | }{ 17 | { 18 | name: "Base case ASCII Printable", 19 | input: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 20 | options: 0, 21 | expect: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 22 | }, 23 | { 24 | name: "Base case Latin-1 Printable", 25 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 26 | options: 0, 27 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 28 | }, 29 | { 30 | name: "Base case Latin-1 Formatting", 31 | input: "\u00A0\u00AD", 32 | options: 0, 33 | expect: "\u00A0\u00AD", 34 | }, 35 | { 36 | name: "Base case General Punctuation Printable", 37 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 38 | options: 0, 39 | expect: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 40 | }, 41 | { 42 | name: "Base case Some of Mathematical Operators", 43 | input: "−∥", 44 | options: 0, 45 | expect: "−∥", 46 | }, 47 | { 48 | name: "Base case CJK Symbols and Punctuation", 49 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 50 | options: 0, 51 | expect: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 52 | }, 53 | { 54 | name: "Base case Hiragana", 55 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 56 | options: 0, 57 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 58 | }, 59 | { 60 | name: "Base case Katakana", 61 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 62 | options: 0, 63 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 64 | }, 65 | { 66 | name: "Base case Katakana Phonetic Extensions", 67 | input: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 68 | options: 0, 69 | expect: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 70 | }, 71 | { 72 | name: "Base case Fullwidth forms", 73 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 74 | options: 0, 75 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 76 | }, 77 | { 78 | name: "Base case Halfwidth forms", 79 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 80 | options: 0, 81 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 82 | }, 83 | { 84 | name: "Base case Small Kana Extension", 85 | input: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 86 | options: 0, 87 | expect: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 88 | }, 89 | { 90 | name: "With KatakanaToHiragana Hiragana", 91 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 92 | options: kana.KatakanaToHiragana, 93 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 94 | }, 95 | { 96 | name: "With KatakanaToHiragana Katakana", 97 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 98 | options: kana.KatakanaToHiragana, 99 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖわ\u3099ゐ\u3099ゑ\u3099を\u3099・ーゝゞヿ", 100 | }, 101 | { 102 | name: "With KatakanaToHiragana Katakana Phonetic Extensions", 103 | input: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 104 | options: kana.KatakanaToHiragana, 105 | expect: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 106 | }, 107 | { 108 | name: "With KatakanaToHiragana Halfwidth Forms", 109 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 110 | options: kana.KatakanaToHiragana, 111 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 112 | }, 113 | { 114 | name: "With KatakanaToHiragana Small Kana Extension", 115 | input: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 116 | options: kana.KatakanaToHiragana, 117 | expect: "𛄲𛅐𛅑𛅒𛄲𛅐𛅑𛅒𛅧", 118 | }, 119 | { 120 | name: "With HiraganaToKatakana Hiragana", 121 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 122 | options: kana.HiraganaToKatakana, 123 | expect: "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ\u3099\u309A゛゜ヽヾゟ", 124 | }, 125 | { 126 | name: "With HiraganaToKatakana Katakana", 127 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 128 | options: kana.HiraganaToKatakana, 129 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 130 | }, 131 | { 132 | name: "With HiraganaToKatakana Katakana Phonetic Extensions", 133 | input: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 134 | options: kana.HiraganaToKatakana, 135 | expect: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 136 | }, 137 | { 138 | name: "With HiraganaToKatakana Halfwidth Katakana", 139 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 140 | options: kana.HiraganaToKatakana, 141 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 142 | }, 143 | { 144 | name: "With HiraganaToKatakana Small Kana Extension", 145 | input: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 146 | options: kana.HiraganaToKatakana, 147 | expect: "𛅕𛅤𛅥𛅦𛅕𛅤𛅥𛅦𛅧", 148 | }, 149 | { 150 | name: "With (KatakanaToHiragana | HiraganaToKatakana) Hiragana", 151 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 152 | options: kana.KatakanaToHiragana | kana.HiraganaToKatakana, 153 | expect: "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ\u3099\u309A゛゜ヽヾゟ", 154 | }, 155 | { 156 | name: "With (KatakanaToHiragana | HiraganaToKatakana) Katakana", 157 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 158 | options: kana.KatakanaToHiragana | kana.HiraganaToKatakana, 159 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖわ\u3099ゐ\u3099ゑ\u3099を\u3099・ーゝゞヿ", 160 | }, 161 | { 162 | name: "With (KatakanaToHiragana | HiraganaToKatakana) Katakana Phonetic Extensions", 163 | input: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 164 | options: kana.KatakanaToHiragana | kana.HiraganaToKatakana, 165 | expect: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 166 | }, 167 | { 168 | name: "With (KatakanaToHiragana | HiraganaToKatakana) Halfwidth forms", 169 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 170 | options: kana.KatakanaToHiragana | kana.HiraganaToKatakana, 171 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 172 | }, 173 | { 174 | name: "With (KatakanaToHiragana | HiraganaToKatakana) Small Kana Extension", 175 | input: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 176 | options: kana.KatakanaToHiragana | kana.HiraganaToKatakana, 177 | expect: "𛅕𛅤𛅥𛅦𛄲𛅐𛅑𛅒𛅧", 178 | }, 179 | { 180 | name: "With HalfKanaToFull Halfwidth forms", 181 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 182 | options: kana.HalfwidthToWide, 183 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\u3099\u309A\u3164ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ│←↑→↓■○", 184 | }, 185 | { 186 | name: "With HalfKanaToFull Voiced Composites", 187 | input: "ヲ\uFF9Eァ\uFF9Eィ\uFF9Eゥ\uFF9Eェ\uFF9Eォ\uFF9Eャ\uFF9Eュ\uFF9Eョ\uFF9Eッ\uFF9Eー\uFF9Eア\uFF9Eイ\uFF9Eウ\uFF9Eエ\uFF9Eオ\uFF9Eカ\uFF9Eキ\uFF9Eク\uFF9Eケ\uFF9Eコ\uFF9Eサ\uFF9Eシ\uFF9Eス\uFF9Eセ\uFF9Eソ\uFF9Eタ\uFF9Eチ\uFF9Eツ\uFF9Eテ\uFF9Eト\uFF9Eナ\uFF9Eニ\uFF9Eヌ\uFF9Eネ\uFF9Eノ\uFF9Eハ\uFF9Eヒ\uFF9Eフ\uFF9Eヘ\uFF9Eホ\uFF9Eマ\uFF9Eミ\uFF9Eム\uFF9Eメ\uFF9Eモ\uFF9Eヤ\uFF9Eユ\uFF9Eヨ\uFF9Eラ\uFF9Eリ\uFF9Eル\uFF9Eレ\uFF9Eロ\uFF9Eワ\uFF9Eン\uFF9E", 188 | options: kana.HalfwidthToWide, 189 | expect: "ヺァ\u3099ィ\u3099ゥ\u3099ェ\u3099ォ\u3099ャ\u3099ュ\u3099ョ\u3099ッ\u3099ー\u3099ア\u3099イ\u3099ヴエ\u3099オ\u3099ガギグゲゴザジズゼゾダヂヅデドナ\u3099ニ\u3099ヌ\u3099ネ\u3099ノ\u3099バビブベボマ\u3099ミ\u3099ム\u3099メ\u3099モ\u3099ヤ\u3099ユ\u3099ヨ\u3099ラ\u3099リ\u3099ル\u3099レ\u3099ロ\u3099ヷン\u3099", 190 | }, 191 | { 192 | name: "With HalfKanaToFull Semi-Voiced Composites", 193 | input: "ヲ\uFF9Fァ\uFF9Fィ\uFF9Fゥ\uFF9Fェ\uFF9Fォ\uFF9Fャ\uFF9Fュ\uFF9Fョ\uFF9Fッ\uFF9Fー\uFF9Fア\uFF9Fイ\uFF9Fウ\uFF9Fエ\uFF9Fオ\uFF9Fカ\uFF9Fキ\uFF9Fク\uFF9Fケ\uFF9Fコ\uFF9Fサ\uFF9Fシ\uFF9Fス\uFF9Fセ\uFF9Fソ\uFF9Fタ\uFF9Fチ\uFF9Fツ\uFF9Fテ\uFF9Fト\uFF9Fナ\uFF9Fニ\uFF9Fヌ\uFF9Fネ\uFF9Fノ\uFF9Fハ\uFF9Fヒ\uFF9Fフ\uFF9Fヘ\uFF9Fホ\uFF9Fマ\uFF9Fミ\uFF9Fム\uFF9Fメ\uFF9Fモ\uFF9Fヤ\uFF9Fユ\uFF9Fヨ\uFF9Fラ\uFF9Fリ\uFF9Fル\uFF9Fレ\uFF9Fロ\uFF9Fワ\uFF9Fン\uFF9F", 194 | options: kana.HalfwidthToWide, 195 | expect: "ヲ\u309Aァ\u309Aィ\u309Aゥ\u309Aェ\u309Aォ\u309Aャ\u309Aュ\u309Aョ\u309Aッ\u309Aー\u309Aア\u309Aイ\u309Aウ\u309Aエ\u309Aオ\u309Aカ\u309Aキ\u309Aク\u309Aケ\u309Aコ\u309Aサ\u309Aシ\u309Aス\u309Aセ\u309Aソ\u309Aタ\u309Aチ\u309Aツ\u309Aテ\u309Aト\u309Aナ\u309Aニ\u309Aヌ\u309Aネ\u309Aノ\u309Aパピプペポマ\u309Aミ\u309Aム\u309Aメ\u309Aモ\u309Aヤ\u309Aユ\u309Aヨ\u309Aラ\u309Aリ\u309Aル\u309Aレ\u309Aロ\u309Aワ\u309Aン\u309A", 196 | }, 197 | { 198 | name: "With (HalfKanaToFull | KatakanaToHiragana) Hiragana", 199 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 200 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 201 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 202 | }, 203 | { 204 | name: "With (HalfKanaToFull | KatakanaToHiragana) Katakana", 205 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 206 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 207 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖわ\u3099ゐ\u3099ゑ\u3099を\u3099・ーゝゞヿ", 208 | }, 209 | { 210 | name: "With (HalfKanaToFull | KatakanaToHiragana) Katakana Phonetic Extensions", 211 | input: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 212 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 213 | expect: "ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ", 214 | }, 215 | { 216 | name: "With (HalfKanaToFull | KatakanaToHiragana) Halfwidth forms", 217 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 218 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 219 | expect: "。「」、・をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん\u3099\u309A\u3164ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ│←↑→↓■○", 220 | }, 221 | { 222 | name: "With (HalfKanaToFull | KatakanaToHiragana) Voiced Composites", 223 | input: "ヲ\uFF9Eァ\uFF9Eィ\uFF9Eゥ\uFF9Eェ\uFF9Eォ\uFF9Eャ\uFF9Eュ\uFF9Eョ\uFF9Eッ\uFF9Eー\uFF9Eア\uFF9Eイ\uFF9Eウ\uFF9Eエ\uFF9Eオ\uFF9Eカ\uFF9Eキ\uFF9Eク\uFF9Eケ\uFF9Eコ\uFF9Eサ\uFF9Eシ\uFF9Eス\uFF9Eセ\uFF9Eソ\uFF9Eタ\uFF9Eチ\uFF9Eツ\uFF9Eテ\uFF9Eト\uFF9Eナ\uFF9Eニ\uFF9Eヌ\uFF9Eネ\uFF9Eノ\uFF9Eハ\uFF9Eヒ\uFF9Eフ\uFF9Eヘ\uFF9Eホ\uFF9Eマ\uFF9Eミ\uFF9Eム\uFF9Eメ\uFF9Eモ\uFF9Eヤ\uFF9Eユ\uFF9Eヨ\uFF9Eラ\uFF9Eリ\uFF9Eル\uFF9Eレ\uFF9Eロ\uFF9Eワ\uFF9Eン\uFF9E", 224 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 225 | expect: "を\u3099ぁ\u3099ぃ\u3099ぅ\u3099ぇ\u3099ぉ\u3099ゃ\u3099ゅ\u3099ょ\u3099っ\u3099ー\u3099あ\u3099い\u3099ゔえ\u3099お\u3099がぎぐげござじずぜぞだぢづでどな\u3099に\u3099ぬ\u3099ね\u3099の\u3099ばびぶべぼま\u3099み\u3099む\u3099め\u3099も\u3099や\u3099ゆ\u3099よ\u3099ら\u3099り\u3099る\u3099れ\u3099ろ\u3099わ\u3099ん\u3099", 226 | }, 227 | { 228 | name: "With (HalfKanaToFull | KatakanaToHiragana) Semi-Voiced Composites", 229 | input: "ヲ\uFF9Fァ\uFF9Fィ\uFF9Fゥ\uFF9Fェ\uFF9Fォ\uFF9Fャ\uFF9Fュ\uFF9Fョ\uFF9Fッ\uFF9Fー\uFF9Fア\uFF9Fイ\uFF9Fウ\uFF9Fエ\uFF9Fオ\uFF9Fカ\uFF9Fキ\uFF9Fク\uFF9Fケ\uFF9Fコ\uFF9Fサ\uFF9Fシ\uFF9Fス\uFF9Fセ\uFF9Fソ\uFF9Fタ\uFF9Fチ\uFF9Fツ\uFF9Fテ\uFF9Fト\uFF9Fナ\uFF9Fニ\uFF9Fヌ\uFF9Fネ\uFF9Fノ\uFF9Fハ\uFF9Fヒ\uFF9Fフ\uFF9Fヘ\uFF9Fホ\uFF9Fマ\uFF9Fミ\uFF9Fム\uFF9Fメ\uFF9Fモ\uFF9Fヤ\uFF9Fユ\uFF9Fヨ\uFF9Fラ\uFF9Fリ\uFF9Fル\uFF9Fレ\uFF9Fロ\uFF9Fワ\uFF9Fン\uFF9F", 230 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 231 | expect: "を\u309Aぁ\u309Aぃ\u309Aぅ\u309Aぇ\u309Aぉ\u309Aゃ\u309Aゅ\u309Aょ\u309Aっ\u309Aー\u309Aあ\u309Aい\u309Aう\u309Aえ\u309Aお\u309Aか\u309Aき\u309Aく\u309Aけ\u309Aこ\u309Aさ\u309Aし\u309Aす\u309Aせ\u309Aそ\u309Aた\u309Aち\u309Aつ\u309Aて\u309Aと\u309Aな\u309Aに\u309Aぬ\u309Aね\u309Aの\u309Aぱぴぷぺぽま\u309Aみ\u309Aむ\u309Aめ\u309Aも\u309Aや\u309Aゆ\u309Aよ\u309Aら\u309Aり\u309Aる\u309Aれ\u309Aろ\u309Aわ\u309Aん\u309A", 232 | }, 233 | { 234 | name: "With (HalfKanaToFull | KatakanaToHiragana) Small Kana Extension", 235 | input: "𛄲𛅐𛅑𛅒𛅕𛅤𛅥𛅦𛅧", 236 | options: kana.HalfwidthToWide | kana.KatakanaToHiragana, 237 | expect: "𛄲𛅐𛅑𛅒𛄲𛅐𛅑𛅒𛅧", 238 | }, 239 | { 240 | name: "With FullwidthToNarrow Latin-1 Printable", 241 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 242 | options: kana.FullwidthToNarrow, 243 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 244 | }, 245 | { 246 | name: "With FullwidthToNarrow General Punctuation Printable", 247 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 248 | options: kana.FullwidthToNarrow, 249 | expect: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 250 | }, 251 | { 252 | name: "With FullwidthToNarrow Some of Mathematical Operators", 253 | input: "−∥", 254 | options: kana.FullwidthToNarrow, 255 | expect: "−∥", 256 | }, 257 | { 258 | name: "With FullwidthToNarrow CJK Symbols and Punctuation", 259 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 260 | options: kana.FullwidthToNarrow, 261 | expect: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 262 | }, 263 | { 264 | name: "With FullwidthToNarrow Fullwidth Forms", 265 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 266 | options: kana.FullwidthToNarrow, 267 | expect: "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬¯¦¥₩", 268 | }, 269 | } 270 | for _, tc := range testcases { 271 | t.Run(tc.name, func(t *testing.T) { 272 | actual := kana.Convert(tc.input, tc.options) 273 | if diff := cmp.Diff(actual, tc.expect); diff != "" { 274 | t.Errorf("diff (-actual +expect): %s", diff) 275 | } 276 | }) 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /convert_compat_test.go: -------------------------------------------------------------------------------- 1 | package kana_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | "github.com/wantedly/kana-go" 8 | ) 9 | 10 | func TestCompatConvert(t *testing.T) { 11 | var testcases = []struct { 12 | name string 13 | input string 14 | options kana.ConvertOptions 15 | expect string 16 | }{ 17 | { 18 | name: "With FullwidthToNarrow, Without CompatQuotes", 19 | input: "´‘’“”"'", 20 | options: kana.FullwidthToNarrow, 21 | expect: "´‘’“”\"'", 22 | }, 23 | { 24 | name: "With FullwidthToNarrow, With CompatQuotes", 25 | input: "´‘’“”"'", 26 | options: kana.FullwidthToNarrow | kana.CompatQuotes, 27 | expect: "'`'\"\""'", 28 | }, 29 | { 30 | name: "Without CompatMinus", 31 | input: "—―−-", 32 | options: 0, 33 | expect: "—―−-", 34 | }, 35 | { 36 | name: "With CompatMinus", 37 | input: "—―−-", 38 | options: kana.CompatMinus, 39 | expect: "——−−", 40 | }, 41 | { 42 | name: "With FullwidthToNarrow, Without CompatMinus", 43 | input: "—―−-", 44 | options: kana.FullwidthToNarrow, 45 | expect: "—―−-", 46 | }, 47 | { 48 | name: "With FullwidthToNarrow, With CompatMinus", 49 | input: "—―−-", 50 | options: kana.FullwidthToNarrow | kana.CompatMinus, 51 | expect: "----", 52 | }, 53 | { 54 | name: "Without CompatOverline", 55 | input: " ̄~", 56 | options: 0, 57 | expect: " ̄~", 58 | }, 59 | { 60 | name: "With CompatOverline", 61 | input: " ̄~", 62 | options: kana.CompatOverline, 63 | expect: "‾~", 64 | }, 65 | { 66 | name: "With FullwidthToNarrow, Without CompatOverline", 67 | input: " ̄~", 68 | options: kana.FullwidthToNarrow, 69 | expect: "¯~", 70 | }, 71 | { 72 | name: "With FullwidthToNarrow, With CompatOverline", 73 | input: " ̄~", 74 | options: kana.FullwidthToNarrow | kana.CompatOverline, 75 | expect: "‾~", 76 | }, 77 | { 78 | name: "Without CompatCurrency", 79 | input: "¢£¥₩", 80 | options: 0, 81 | expect: "¢£¥₩", 82 | }, 83 | { 84 | name: "With CompatCurrency", 85 | input: "¢£¥₩", 86 | options: kana.CompatCurrency, 87 | expect: "¢£¥₩", 88 | }, 89 | { 90 | name: "Without CompatOtherSymbols", 91 | input: "∥¬¦", 92 | options: 0, 93 | expect: "∥¬¦", 94 | }, 95 | { 96 | name: "With CompatOtherSymbols", 97 | input: "∥¬¦", 98 | options: kana.CompatOtherSymbols, 99 | expect: "‖¬¦", 100 | }, 101 | { 102 | name: "With FullwidthToNarrow, Without CompatCurrency", 103 | input: "¢£¥₩", 104 | options: kana.FullwidthToNarrow, 105 | expect: "¢£¥₩", 106 | }, 107 | { 108 | name: "With FullwidthToNarrow, With CompatCurrency", 109 | input: "¢£¥₩", 110 | options: kana.FullwidthToNarrow | kana.CompatCurrency, 111 | expect: "¢£¥₩", 112 | }, 113 | { 114 | name: "With FullwidthToNarrow, Without CompatBracket", 115 | input: "〈〉⦅⦆", 116 | options: kana.FullwidthToNarrow, 117 | expect: "〈〉⦅⦆", 118 | }, 119 | { 120 | name: "With FullwidthToNarrow, With CompatBracket", 121 | input: "〈〉⦅⦆", 122 | options: kana.FullwidthToNarrow | kana.CompatBrackets, 123 | expect: "<>⦅⦆", 124 | }, 125 | { 126 | name: "With HalfwidthToWide, Without CompatVoicedSoundMarks", 127 | input: "゙゚", 128 | options: kana.HalfwidthToWide, 129 | expect: "\u3099\u309A", 130 | }, 131 | { 132 | name: "With HalfwidthToWide, With CompatVoicedSoundMarks", 133 | input: "゙゚", 134 | options: kana.HalfwidthToWide | kana.CompatVoicedSoundMarks, 135 | expect: "゛゜", 136 | }, 137 | { 138 | name: "With HalfwidthToWide, Without CompatKeepHalfwidthHangul", 139 | input: "\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ", 140 | options: kana.HalfwidthToWide, 141 | expect: "\u3164ㄱㄲㄳㄴㄵㄶㄷㄸㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅃㅄㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ", 142 | }, 143 | { 144 | name: "With HalfwidthToWide, With CompatKeepHalfwidthHangul", 145 | input: "\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ", 146 | options: kana.HalfwidthToWide | kana.CompatKeepHalfwidthHangul, 147 | expect: "\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ", 148 | }, 149 | { 150 | name: "With HalfwidthToWide, Without CompatKeepHalfwidthSymbols", 151 | input: "│←↑→↓■○", 152 | options: kana.HalfwidthToWide, 153 | expect: "│←↑→↓■○", 154 | }, 155 | { 156 | name: "With HalfwidthToWide, With CompatKeepHalfwidthSymbols", 157 | input: "│←↑→↓■○", 158 | options: kana.HalfwidthToWide | kana.CompatKeepHalfwidthSymbols, 159 | expect: "│←↑→↓■○", 160 | }, 161 | { 162 | name: "With KatakanaToHiragana, Without CompatKanaRestriction", 163 | input: "ヵヶヷヸヹヺ𛅕𛅤𛅥𛅦", 164 | options: kana.KatakanaToHiragana, 165 | expect: "ゕゖわ\u3099ゐ\u3099ゑ\u3099を\u3099𛄲𛅐𛅑𛅒", 166 | }, 167 | { 168 | name: "With KatakanaToHiragana, With CompatKanaRestriction", 169 | input: "ヵヶヷヸヹヺ𛅕𛅤𛅥𛅦", 170 | options: kana.KatakanaToHiragana | kana.CompatKanaRestriction, 171 | expect: "ヵヶヷヸヹヺ𛅕𛅤𛅥𛅦", 172 | }, 173 | { 174 | name: "With HiraganaToKatakana, Without CompatKanaRestriction", 175 | input: "ゕゖ𛄲𛅐𛅑𛅒", 176 | options: kana.HiraganaToKatakana, 177 | expect: "ヵヶ𛅕𛅤𛅥𛅦", 178 | }, 179 | { 180 | name: "With HiraganaToKatakana, With CompatKanaRestriction", 181 | input: "ゕゖ𛄲𛅐𛅑𛅒", 182 | options: kana.HiraganaToKatakana | kana.CompatKanaRestriction, 183 | expect: "ゕゖ𛄲𛅐𛅑𛅒", 184 | }, 185 | } 186 | for _, tc := range testcases { 187 | t.Run(tc.name, func(t *testing.T) { 188 | actual := kana.Convert(tc.input, tc.options) 189 | if diff := cmp.Diff(actual, tc.expect); diff != "" { 190 | t.Errorf("diff (-actual +expect): %s", diff) 191 | } 192 | }) 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/wantedly/kana-go 2 | 3 | go 1.11 4 | 5 | require github.com/google/go-cmp v0.6.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 2 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 3 | -------------------------------------------------------------------------------- /nkf/convert.go: -------------------------------------------------------------------------------- 1 | // Package nkf provides an API compatible with the subset of 2 | // [go-nkf](https://pkg.go.dev/github.com/creasty/go-nkf). 3 | // 4 | // Note that this package is a wrapper around 5 | // [kana-go](https://pkg.go.dev/github.com/wantedly/kana-go). 6 | // NKF's main purpose is to convert character encodings, but this package only 7 | // provides the conversion of: 8 | // 9 | // - Fullwidth and halfwidth characters 10 | // - and Katakana and hiragana. 11 | // 12 | // # Example 13 | // 14 | // package main 15 | // 16 | // import ( 17 | // "fmt" 18 | // 19 | // "github.com/wantedly/kana-go/nkf" 20 | // ) 21 | // 22 | // func main() { 23 | // str, err := nkf.Convert("ABC DEF", "-w -W -m0 -Z1") 24 | // if err != nil { 25 | // panic(err) 26 | // } 27 | // fmt.Println(str) // Output: ABC DEF 28 | // } 29 | package nkf 30 | 31 | import "github.com/wantedly/kana-go" 32 | 33 | // Convert converts a string with the given options. 34 | // 35 | // # Available options 36 | // 37 | // The following options are required, meaning that it is 38 | // an error to omit them. 39 | // 40 | // This is to ensure compatibility with the original NKF. 41 | // 42 | // - -w or --utf8: Output in UTF-8. Always required. 43 | // - -W or --utf8-input: Input in UTF-8. Always required. 44 | // - -m0: No MIME decoding. 45 | // 46 | // The following options are related to fullwidth/halfwidth conversion. 47 | // 48 | // - -X: Convert halfwidth-form characters to its ordinary forms. 49 | // This option is enabled by default. 50 | // - -x: Disable -X. 51 | // - -Z0: Convert fullwidth characters to halfwidth, 52 | // except for the fullwidth space. 53 | // - -Z1: In addition to -Z0, convert fullwidth space to ASCII space. 54 | // - -Z2: In addition to -Z0, convert fullwidth space to two ASCII spaces. 55 | // - -Z4: In addition to -Z0, convert Katakana characters 56 | // back to their halfwidth forms. 57 | // 58 | // The following options are related to Katakana/Hiragana conversion. 59 | // 60 | // - -h or -h1 or --hiragana: Convert Katakana characters to Hiragana. 61 | // - -h2 or --katakana: Convert Hiragana characters to Katakana. 62 | // - -h3 or --katakana-hiragana: Equivalent to -h1 -h2. 63 | // Convert Katakana characters to Hiragana and vice versa. 64 | func Convert(str string, options string) (string, error) { 65 | optFlags, err := ParseOptions(options) 66 | if err != nil { 67 | return str, err 68 | } 69 | return kana.Convert(str, optFlags), nil 70 | } 71 | -------------------------------------------------------------------------------- /nkf/convert_test.go: -------------------------------------------------------------------------------- 1 | package nkf_test 2 | 3 | import ( 4 | "os" 5 | "os/exec" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/google/go-cmp/cmp" 10 | "github.com/wantedly/kana-go/nkf" 11 | ) 12 | 13 | type convertTestcase struct { 14 | name string 15 | input string 16 | options string 17 | expect string 18 | } 19 | 20 | var testcases = []convertTestcase{ 21 | { 22 | name: "Base case ASCII Printable", 23 | input: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 24 | options: "-w -W -m0 -x", 25 | expect: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 26 | }, 27 | { 28 | name: "Base case Latin-1 Printable", 29 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 30 | options: "-w -W -m0 -x", 31 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 32 | }, 33 | { 34 | name: "Base case Latin-1 Formatting", 35 | input: "\u00A0\u00AD", 36 | options: "-w -W -m0 -x", 37 | expect: "\u00A0\u00AD", 38 | }, 39 | { 40 | name: "Base case General Punctuation Printable (nondegenerate only)", 41 | input: "‐‒–—‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 42 | options: "-w -W -m0 -x", 43 | expect: "‐‒–—‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 44 | }, 45 | { 46 | name: "Base case Minus Sign", 47 | input: "−", 48 | options: "-w -W -m0 -x", 49 | expect: "−", 50 | }, 51 | { 52 | name: "Base case CJK Symbols and Punctuation", 53 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 54 | options: "-w -W -m0 -x", 55 | expect: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 56 | }, 57 | { 58 | name: "Base case Hiragana", 59 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 60 | options: "-w -W -m0 -x", 61 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 62 | }, 63 | { 64 | name: "Base case Katakana", 65 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 66 | options: "-w -W -m0 -x", 67 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 68 | }, 69 | { 70 | name: "Base case Fullwidth forms (nondegenerate only)", 71 | input: "!"#$%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆₩", 72 | options: "-w -W -m0 -x", 73 | expect: "!"#$%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆₩", 74 | }, 75 | { 76 | name: "Base case Halfwidth forms", 77 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 78 | options: "-w -W -m0 -x", 79 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 80 | }, 81 | { 82 | name: "Base case degenerate", 83 | input: "―∥-¢£¬ ̄¦¥", 84 | options: "-w -W -m0 -x", 85 | expect: "—‖−¢£¬‾¦¥", 86 | }, 87 | { 88 | name: "With -h Hiragana", 89 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 90 | options: "-w -W -m0 -x -h", 91 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 92 | }, 93 | { 94 | name: "With -h Katakana", 95 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 96 | options: "-w -W -m0 -x -h", 97 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔヵヶヷヸヹヺ・ーゝゞヿ", 98 | }, 99 | { 100 | name: "With -h Halfwidth Katakana", 101 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 102 | options: "-w -W -m0 -x -h", 103 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 104 | }, 105 | { 106 | name: "With -h2 Hiragana", 107 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 108 | options: "-w -W -m0 -x -h2", 109 | expect: "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴゕゖ\u3099\u309A゛゜ヽヾゟ", 110 | }, 111 | { 112 | name: "With -h2 Katakana", 113 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 114 | options: "-w -W -m0 -x -h2", 115 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 116 | }, 117 | { 118 | name: "With -h2 Halfwidth Katakana", 119 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 120 | options: "-w -W -m0 -x -h2", 121 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 122 | }, 123 | { 124 | name: "With -h3 Hiragana", 125 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 126 | options: "-w -W -m0 -x -h3", 127 | expect: "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴゕゖ\u3099\u309A゛゜ヽヾゟ", 128 | }, 129 | { 130 | name: "With -h3 Katakana", 131 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 132 | options: "-w -W -m0 -x -h3", 133 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔヵヶヷヸヹヺ・ーゝゞヿ", 134 | }, 135 | { 136 | name: "With -h3 Halfwidth Katakana", 137 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 138 | options: "-w -W -m0 -x -h3", 139 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 140 | }, 141 | { 142 | name: "With -X Basic Halfwidth Katakana", 143 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 144 | options: "-w -W -m0", 145 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜", 146 | }, 147 | { 148 | name: "With -X Voiced Composites", 149 | input: "ヲ\uFF9Eァ\uFF9Eィ\uFF9Eゥ\uFF9Eェ\uFF9Eォ\uFF9Eャ\uFF9Eュ\uFF9Eョ\uFF9Eッ\uFF9Eー\uFF9Eア\uFF9Eイ\uFF9Eウ\uFF9Eエ\uFF9Eオ\uFF9Eカ\uFF9Eキ\uFF9Eク\uFF9Eケ\uFF9Eコ\uFF9Eサ\uFF9Eシ\uFF9Eス\uFF9Eセ\uFF9Eソ\uFF9Eタ\uFF9Eチ\uFF9Eツ\uFF9Eテ\uFF9Eト\uFF9Eナ\uFF9Eニ\uFF9Eヌ\uFF9Eネ\uFF9Eノ\uFF9Eハ\uFF9Eヒ\uFF9Eフ\uFF9Eヘ\uFF9Eホ\uFF9Eマ\uFF9Eミ\uFF9Eム\uFF9Eメ\uFF9Eモ\uFF9Eヤ\uFF9Eユ\uFF9Eヨ\uFF9Eラ\uFF9Eリ\uFF9Eル\uFF9Eレ\uFF9Eロ\uFF9Eワ\uFF9Eン\uFF9E", 150 | options: "-w -W -m0", 151 | expect: "ヲ゛ァ゛ィ゛ゥ゛ェ゛ォ゛ャ゛ュ゛ョ゛ッ゛ー゛ア゛イ゛ヴエ゛オ゛ガギグゲゴザジズゼゾダヂヅデドナ゛ニ゛ヌ゛ネ゛ノ゛バビブベボマ゛ミ゛ム゛メ゛モ゛ヤ゛ユ゛ヨ゛ラ゛リ゛ル゛レ゛ロ゛ワ゛ン゛", 152 | }, 153 | { 154 | name: "With -X Semi-Voiced Composites", 155 | input: "ヲ\uFF9Fァ\uFF9Fィ\uFF9Fゥ\uFF9Fェ\uFF9Fォ\uFF9Fャ\uFF9Fュ\uFF9Fョ\uFF9Fッ\uFF9Fー\uFF9Fア\uFF9Fイ\uFF9Fウ\uFF9Fエ\uFF9Fオ\uFF9Fカ\uFF9Fキ\uFF9Fク\uFF9Fケ\uFF9Fコ\uFF9Fサ\uFF9Fシ\uFF9Fス\uFF9Fセ\uFF9Fソ\uFF9Fタ\uFF9Fチ\uFF9Fツ\uFF9Fテ\uFF9Fト\uFF9Fナ\uFF9Fニ\uFF9Fヌ\uFF9Fネ\uFF9Fノ\uFF9Fハ\uFF9Fヒ\uFF9Fフ\uFF9Fヘ\uFF9Fホ\uFF9Fマ\uFF9Fミ\uFF9Fム\uFF9Fメ\uFF9Fモ\uFF9Fヤ\uFF9Fユ\uFF9Fヨ\uFF9Fラ\uFF9Fリ\uFF9Fル\uFF9Fレ\uFF9Fロ\uFF9Fワ\uFF9Fン\uFF9F", 156 | options: "-w -W -m0", 157 | expect: "ヲ゜ァ゜ィ゜ゥ゜ェ゜ォ゜ャ゜ュ゜ョ゜ッ゜ー゜ア゜イ゜ウ゜エ゜オ゜カ゜キ゜ク゜ケ゜コ゜サ゜シ゜ス゜セ゜ソ゜タ゜チ゜ツ゜テ゜ト゜ナ゜ニ゜ヌ゜ネ゜ノ゜パピプペポマ゜ミ゜ム゜メ゜モ゜ヤ゜ユ゜ヨ゜ラ゜リ゜ル゜レ゜ロ゜ワ゜ン゜", 158 | }, 159 | { 160 | name: "With -X -h Hiragana", 161 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 162 | options: "-w -W -m0 -h", 163 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 164 | }, 165 | { 166 | name: "With -X -h Katakana", 167 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 168 | options: "-w -W -m0 -h", 169 | expect: "゠ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔヵヶヷヸヹヺ・ーゝゞヿ", 170 | }, 171 | { 172 | name: "With -X -h Halfwidth Katakana", 173 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F", 174 | options: "-w -W -m0 -h", 175 | expect: "。「」、・をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜", 176 | }, 177 | { 178 | name: "With -X -h Voiced Composites", 179 | input: "ヲ\uFF9Eァ\uFF9Eィ\uFF9Eゥ\uFF9Eェ\uFF9Eォ\uFF9Eャ\uFF9Eュ\uFF9Eョ\uFF9Eッ\uFF9Eー\uFF9Eア\uFF9Eイ\uFF9Eウ\uFF9Eエ\uFF9Eオ\uFF9Eカ\uFF9Eキ\uFF9Eク\uFF9Eケ\uFF9Eコ\uFF9Eサ\uFF9Eシ\uFF9Eス\uFF9Eセ\uFF9Eソ\uFF9Eタ\uFF9Eチ\uFF9Eツ\uFF9Eテ\uFF9Eト\uFF9Eナ\uFF9Eニ\uFF9Eヌ\uFF9Eネ\uFF9Eノ\uFF9Eハ\uFF9Eヒ\uFF9Eフ\uFF9Eヘ\uFF9Eホ\uFF9Eマ\uFF9Eミ\uFF9Eム\uFF9Eメ\uFF9Eモ\uFF9Eヤ\uFF9Eユ\uFF9Eヨ\uFF9Eラ\uFF9Eリ\uFF9Eル\uFF9Eレ\uFF9Eロ\uFF9Eワ\uFF9Eン\uFF9E", 180 | options: "-w -W -m0 -h", 181 | expect: "を゛ぁ゛ぃ゛ぅ゛ぇ゛ぉ゛ゃ゛ゅ゛ょ゛っ゛ー゛あ゛い゛ゔえ゛お゛がぎぐげござじずぜぞだぢづでどな゛に゛ぬ゛ね゛の゛ばびぶべぼま゛み゛む゛め゛も゛や゛ゆ゛よ゛ら゛り゛る゛れ゛ろ゛わ゛ん゛", 182 | }, 183 | { 184 | name: "With -X -h Semi-Voiced Composites", 185 | input: "ヲ\uFF9Fァ\uFF9Fィ\uFF9Fゥ\uFF9Fェ\uFF9Fォ\uFF9Fャ\uFF9Fュ\uFF9Fョ\uFF9Fッ\uFF9Fー\uFF9Fア\uFF9Fイ\uFF9Fウ\uFF9Fエ\uFF9Fオ\uFF9Fカ\uFF9Fキ\uFF9Fク\uFF9Fケ\uFF9Fコ\uFF9Fサ\uFF9Fシ\uFF9Fス\uFF9Fセ\uFF9Fソ\uFF9Fタ\uFF9Fチ\uFF9Fツ\uFF9Fテ\uFF9Fト\uFF9Fナ\uFF9Fニ\uFF9Fヌ\uFF9Fネ\uFF9Fノ\uFF9Fハ\uFF9Fヒ\uFF9Fフ\uFF9Fヘ\uFF9Fホ\uFF9Fマ\uFF9Fミ\uFF9Fム\uFF9Fメ\uFF9Fモ\uFF9Fヤ\uFF9Fユ\uFF9Fヨ\uFF9Fラ\uFF9Fリ\uFF9Fル\uFF9Fレ\uFF9Fロ\uFF9Fワ\uFF9Fン\uFF9F", 186 | options: "-w -W -m0 -h", 187 | expect: "を゜ぁ゜ぃ゜ぅ゜ぇ゜ぉ゜ゃ゜ゅ゜ょ゜っ゜ー゜あ゜い゜う゜え゜お゜か゜き゜く゜け゜こ゜さ゜し゜す゜せ゜そ゜た゜ち゜つ゜て゜と゜な゜に゜ぬ゜ね゜の゜ぱぴぷぺぽま゜み゜む゜め゜も゜や゜ゆ゜よ゜ら゜り゜る゜れ゜ろ゜わ゜ん゜", 188 | }, 189 | { 190 | name: "With -Z Latin-1 Printable", 191 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 192 | options: "-w -W -m0 -x -Z", 193 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 194 | }, 195 | { 196 | name: "With -Z General Punctuation Printable", 197 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 198 | options: "-w -W -m0 -x -Z", 199 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 200 | }, 201 | { 202 | name: "With -Z Minus Sign", 203 | input: "−", 204 | options: "-w -W -m0 -x -Z", 205 | expect: "-", 206 | }, 207 | { 208 | name: "With -Z CJK Symbols and Punctuation", 209 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 210 | options: "-w -W -m0 -x -Z", 211 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 212 | }, 213 | { 214 | name: "With -Z Fullwidth Forms", 215 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 216 | options: "-w -W -m0 -x -Z", 217 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 218 | }, 219 | { 220 | name: "With -Z1 Latin-1 Printable", 221 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 222 | options: "-w -W -m0 -x -Z1", 223 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 224 | }, 225 | { 226 | name: "With -Z1 General Punctuation Printable", 227 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 228 | options: "-w -W -m0 -x -Z1", 229 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 230 | }, 231 | { 232 | name: "With -Z1 Minus Sign", 233 | input: "−", 234 | options: "-w -W -m0 -x -Z1", 235 | expect: "-", 236 | }, 237 | { 238 | name: "With -Z1 CJK Symbols and Punctuation", 239 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 240 | options: "-w -W -m0 -x -Z1", 241 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 242 | }, 243 | { 244 | name: "With -Z1 Fullwidth Forms", 245 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 246 | options: "-w -W -m0 -x -Z1", 247 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 248 | }, 249 | { 250 | name: "With -Z2 Latin-1 Printable", 251 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 252 | options: "-w -W -m0 -x -Z2", 253 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 254 | }, 255 | { 256 | name: "With -Z2 General Punctuation Printable", 257 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 258 | options: "-w -W -m0 -x -Z2", 259 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 260 | }, 261 | { 262 | name: "With -Z2 Minus Sign", 263 | input: "−", 264 | options: "-w -W -m0 -x -Z2", 265 | expect: "-", 266 | }, 267 | { 268 | name: "With -Z2 CJK Symbols and Punctuation", 269 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 270 | options: "-w -W -m0 -x -Z2", 271 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 272 | }, 273 | { 274 | name: "With -Z2 Fullwidth Forms", 275 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 276 | options: "-w -W -m0 -x -Z2", 277 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 278 | }, 279 | { 280 | name: "With -Z4 Latin-1 Printable", 281 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 282 | options: "-w -W -m0 -x -Z4", 283 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 284 | }, 285 | { 286 | name: "With -Z4 General Punctuation Printable", 287 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 288 | options: "-w -W -m0 -x -Z4", 289 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 290 | }, 291 | { 292 | name: "With -Z4 Minus Sign", 293 | input: "−", 294 | options: "-w -W -m0 -x -Z4", 295 | expect: "-", 296 | }, 297 | { 298 | name: "With -Z4 CJK Symbols and Punctuation", 299 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 300 | options: "-w -W -m0 -x -Z4", 301 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 302 | }, 303 | { 304 | name: "With -Z4 Hiragana", 305 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 306 | options: "-w -W -m0 -x -Z4", 307 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\uFF9E\uFF9F\uFF9E\uFF9Fゝゞゟ", 308 | }, 309 | { 310 | name: "With -Z4 Katakana", 311 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 312 | options: "-w -W -m0 -x -Z4", 313 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 314 | }, 315 | { 316 | name: "With -Z4 Fullwidth Forms", 317 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 318 | options: "-w -W -m0 -x -Z4", 319 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 320 | }, 321 | { 322 | name: "With -X -Z4 Latin-1 Printable", 323 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 324 | options: "-w -W -m0 -Z4", 325 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 326 | }, 327 | { 328 | name: "With -X -Z4 General Punctuation Printable", 329 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 330 | options: "-w -W -m0 -Z4", 331 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 332 | }, 333 | { 334 | name: "With -X -Z4 Minus Sign", 335 | input: "−", 336 | options: "-w -W -m0 -Z4", 337 | expect: "-", 338 | }, 339 | { 340 | name: "With -X -Z4 CJK Symbols and Punctuation", 341 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 342 | options: "-w -W -m0 -Z4", 343 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 344 | }, 345 | { 346 | name: "With -X -Z4 Hiragana", 347 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 348 | options: "-w -W -m0 -Z4", 349 | expect: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\uFF9E\uFF9F\uFF9E\uFF9Fゝゞゟ", 350 | }, 351 | { 352 | name: "With -X -Z4 Katakana", 353 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 354 | options: "-w -W -m0 -Z4", 355 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 356 | }, 357 | { 358 | name: "With -X -Z4 Fullwidth Forms", 359 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 360 | options: "-w -W -m0 -Z4", 361 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 362 | }, 363 | { 364 | name: "With -X -Z4 Halfwidth forms", 365 | input: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン\uFF9E\uFF9F\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 366 | options: "-w -W -m0 -Z4", 367 | expect: "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜\uFFA0ᄀᄁᆪᄂᆬᆭᄃᄄᄅᆰᆱᆲᆳᆴᆵᄚᄆᄇᄈᄡᄉᄊᄋᄌᄍᄎᄏᄐᄑ하ᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ│←↑→↓■○", 368 | }, 369 | { 370 | name: "With -h2 -Z4 Latin-1 Printable", 371 | input: "¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 372 | options: "-w -W -m0 -x -h2 -Z4", 373 | expect: "¡¢£¤¥¦§¨©ª«¬®¯°±²³'µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", 374 | }, 375 | { 376 | name: "With -h2 -Z4 General Punctuation Printable", 377 | input: "‐‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 378 | options: "-w -W -m0 -x -h2 -Z4", 379 | expect: "‐‒–--‖‗`'‚‛\"\"„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞", 380 | }, 381 | { 382 | name: "With -h2 -Z4 Minus Sign", 383 | input: "−", 384 | options: "-w -W -m0 -x -h2 -Z4", 385 | expect: "-", 386 | }, 387 | { 388 | name: "With -h2 -Z4 CJK Symbols and Punctuation", 389 | input: " 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 390 | options: "-w -W -m0 -x -h2 -Z4", 391 | expect: " 、。〃〄々〆〇<>《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〡〢〣〤〥〦〧〨〩〪〭〫〬\u302E\u302F〰〱〲〳〴〵〶〷〸〹〺〻〼〽\u303E\u303F", 392 | }, 393 | { 394 | name: "With -h2 -Z4 Hiragana", 395 | input: "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ\u3099\u309A゛゜ゝゞゟ", 396 | options: "-w -W -m0 -x -h2 -Z4", 397 | expect: "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴゕゖ\uFF9E\uFF9F\uFF9E\uFF9Fヽヾゟ", 398 | }, 399 | { 400 | name: "With -h2 -Z4 Katakana", 401 | input: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 402 | options: "-w -W -m0 -x -h2 -Z4", 403 | expect: "゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺ・ーヽヾヿ", 404 | }, 405 | { 406 | name: "With -h2 -Z4 Fullwidth Forms", 407 | input: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬ ̄¦¥₩", 408 | options: "-w -W -m0 -x -h2 -Z4", 409 | expect: "!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆¢£¬‾¦¥₩", 410 | }, 411 | } 412 | 413 | func TestConvert(t *testing.T) { 414 | for _, tc := range testcases { 415 | t.Run(tc.name, func(t *testing.T) { 416 | actual, err := nkf.Convert(tc.input, tc.options) 417 | if err != nil { 418 | t.Fatalf("unexpected error: %v", err) 419 | } 420 | if diff := cmp.Diff(actual, tc.expect); diff != "" { 421 | t.Errorf("diff (-actual +expect): %s", diff) 422 | } 423 | }) 424 | } 425 | } 426 | 427 | func TestRealNKF(t *testing.T) { 428 | noNKF := os.Getenv("NO_REAL_NKF") 429 | if noNKF == "1" || noNKF == "true" || noNKF == "yes" { 430 | t.Skip("NO_REAL_NKF is set; skipping this test") 431 | } 432 | nkfPath := os.Getenv("REAL_NKF") 433 | if nkfPath == "" { 434 | nkfPath = "nkf" 435 | } 436 | for _, tc := range testcases { 437 | t.Run(tc.name, func(t *testing.T) { 438 | command := exec.Command(nkfPath, tc.options) 439 | command.Stdin = strings.NewReader(tc.input) 440 | output, err := command.Output() 441 | if err != nil { 442 | t.Fatalf("unexpected error: %v", err) 443 | } 444 | if diff := cmp.Diff(string(output), tc.expect); diff != "" { 445 | t.Errorf("diff (-actual +expect): %s", diff) 446 | } 447 | }) 448 | } 449 | } 450 | -------------------------------------------------------------------------------- /nkf/optparse.go: -------------------------------------------------------------------------------- 1 | package nkf 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/wantedly/kana-go" 8 | ) 9 | 10 | // ParseOptions parses the given text and returns ConvertOptions. 11 | func ParseOptions(text string) (kana.ConvertOptions, error) { 12 | p := parser{ 13 | halfwidthToWide: true, 14 | } 15 | if err := p.parseOptions(text); err != nil { 16 | return 0, err 17 | } 18 | if !p.utf8Output { 19 | return 0, fmt.Errorf("-w is required") 20 | } 21 | if !p.utf8Input { 22 | return 0, fmt.Errorf("-W is required") 23 | } 24 | if !p.noMime { 25 | return 0, fmt.Errorf("-m0 is required") 26 | } 27 | return p.toOptions(), nil 28 | } 29 | 30 | type parser struct { 31 | utf8Output bool 32 | utf8Input bool 33 | noMime bool 34 | katakanaToHiragana bool 35 | hiraganaToKatakana bool 36 | fullwidthToNarrow bool 37 | ideographicSpaceToNarrow bool 38 | doubleIdeographicSpace bool 39 | wideKatakanaToHalfwidth bool 40 | halfwidthToWide bool 41 | } 42 | 43 | func (p *parser) toOptions() kana.ConvertOptions { 44 | opts := kana.CompatMinus | kana.CompatOverline | kana.CompatCurrency | kana.CompatOtherSymbols 45 | if p.katakanaToHiragana { 46 | opts |= kana.KatakanaToHiragana | kana.CompatKanaRestriction 47 | } 48 | if p.hiraganaToKatakana { 49 | opts |= kana.HiraganaToKatakana | kana.CompatKanaRestriction 50 | } 51 | if p.fullwidthToNarrow { 52 | opts |= kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets | kana.CompatKeepSpaces 53 | } 54 | if p.ideographicSpaceToNarrow { 55 | opts &= ^kana.CompatKeepSpaces 56 | } else if p.doubleIdeographicSpace { 57 | opts &= ^kana.CompatKeepSpaces 58 | opts |= kana.CompatDoubleSpaces 59 | } 60 | if p.wideKatakanaToHalfwidth { 61 | opts |= kana.CompatWideKatakanaToHalfwidth 62 | } 63 | if p.halfwidthToWide { 64 | opts |= kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthHangul | kana.CompatKeepHalfwidthSymbols 65 | } 66 | return opts 67 | } 68 | 69 | func (p *parser) parseOptions(text string) error { 70 | // See `options` in nkf.c 71 | 72 | parts := strings.Split(text, " ") 73 | for _, part := range parts { 74 | if part == "" { 75 | continue 76 | } 77 | if part[0] != '-' || len(part) <= 1 { 78 | return fmt.Errorf("invalid option: %s", part) 79 | } 80 | if part[1] == '-' { 81 | if len(part) <= 2 { 82 | return fmt.Errorf("invalid option: %s", part) 83 | } 84 | longOpt, ok := longOptions[part[2:]] 85 | if !ok { 86 | return fmt.Errorf("invalid option: %s", part) 87 | } 88 | err := p.parseShortOptions(longOpt) 89 | if err != nil { 90 | return err 91 | } 92 | continue 93 | } 94 | err := p.parseShortOptions(part[1:]) 95 | if err != nil { 96 | return err 97 | } 98 | } 99 | return nil 100 | } 101 | 102 | func (p *parser) parseShortOptions(text string) error { 103 | bytes := []byte(text) 104 | i := 0 105 | for i < len(bytes) { 106 | j := i + 1 107 | for j < len(bytes) { 108 | switch text[i : j+1] { 109 | case "mB", "mQ", "mN", "mS": 110 | j += 1 111 | continue 112 | } 113 | if '0' <= bytes[j] && bytes[j] <= '9' { 114 | j += 1 115 | continue 116 | } else { 117 | break 118 | } 119 | } 120 | group := text[i:j] 121 | i = j 122 | switch group { 123 | case "h", "h1": 124 | p.katakanaToHiragana = true 125 | case "h2": 126 | p.hiraganaToKatakana = true 127 | case "h3": 128 | p.katakanaToHiragana = true 129 | p.hiraganaToKatakana = true 130 | case "w", "w8": 131 | p.utf8Output = true 132 | case "W", "W8": 133 | p.utf8Input = true 134 | case "Z", "Z0": 135 | p.fullwidthToNarrow = true 136 | case "Z1": 137 | p.fullwidthToNarrow = true 138 | p.ideographicSpaceToNarrow = true 139 | case "Z2": 140 | p.fullwidthToNarrow = true 141 | p.doubleIdeographicSpace = true 142 | case "Z4": 143 | p.fullwidthToNarrow = true 144 | p.wideKatakanaToHalfwidth = true 145 | case "x": 146 | p.halfwidthToWide = false 147 | case "X": 148 | p.halfwidthToWide = true 149 | case "m0": 150 | p.noMime = true 151 | default: 152 | return fmt.Errorf("invalid option: -%s", group) 153 | } 154 | } 155 | return nil 156 | } 157 | 158 | var longOptions = map[string]string{ 159 | "hiragana": "h1", 160 | "katakana": "h2", 161 | "katakana-hiragana": "h3", 162 | "utf8": "w", 163 | "utf8-input": "W", 164 | } 165 | -------------------------------------------------------------------------------- /nkf/optparse_test.go: -------------------------------------------------------------------------------- 1 | package nkf_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/wantedly/kana-go" 7 | "github.com/wantedly/kana-go/nkf" 8 | ) 9 | 10 | func TestParseOptions(t *testing.T) { 11 | compatBase := kana.CompatMinus | kana.CompatOverline | kana.CompatCurrency | kana.CompatOtherSymbols 12 | testcases := []struct { 13 | name string 14 | text string 15 | expect kana.ConvertOptions 16 | expectErr string 17 | }{ 18 | { 19 | name: "Without -w", 20 | text: "", 21 | expectErr: "-w is required", 22 | }, 23 | { 24 | name: "Without -W", 25 | text: "-w", 26 | expectErr: "-W is required", 27 | }, 28 | { 29 | name: "Without -m0", 30 | text: "-w -W", 31 | expectErr: "-m0 is required", 32 | }, 33 | { 34 | name: "Minimum options", 35 | text: "-w -W -m0", 36 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols, 37 | }, 38 | { 39 | name: "Minimum alt options", 40 | text: "-w8 -W8 -m0", 41 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols, 42 | }, 43 | { 44 | name: "Minimum longhand options", 45 | text: "--utf8 --utf8-input -m0", 46 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols, 47 | }, 48 | { 49 | name: "-h", 50 | text: "-w -W -m0 -h", 51 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.KatakanaToHiragana | kana.CompatKanaRestriction, 52 | }, 53 | { 54 | name: "-h1", 55 | text: "-w -W -m0 -h1", 56 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.KatakanaToHiragana | kana.CompatKanaRestriction, 57 | }, 58 | { 59 | name: "--hiragana", 60 | text: "-w -W -m0 --hiragana", 61 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.KatakanaToHiragana | kana.CompatKanaRestriction, 62 | }, 63 | { 64 | name: "-h2", 65 | text: "-w -W -m0 -h2", 66 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.HiraganaToKatakana | kana.CompatKanaRestriction, 67 | }, 68 | { 69 | name: "--katakana", 70 | text: "-w -W -m0 --katakana", 71 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.HiraganaToKatakana | kana.CompatKanaRestriction, 72 | }, 73 | { 74 | name: "-h3", 75 | text: "-w -W -m0 -h3", 76 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.KatakanaToHiragana | kana.HiraganaToKatakana | kana.CompatKanaRestriction, 77 | }, 78 | { 79 | name: "--katakana-hiragana", 80 | text: "-w -W -m0 --katakana-hiragana", 81 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.KatakanaToHiragana | kana.HiraganaToKatakana | kana.CompatKanaRestriction, 82 | }, 83 | { 84 | name: "-Z", 85 | text: "-w -W -m0 -Z", 86 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets | kana.CompatKeepSpaces, 87 | }, 88 | { 89 | name: "-Z0", 90 | text: "-w -W -m0 -Z0", 91 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets | kana.CompatKeepSpaces, 92 | }, 93 | { 94 | name: "-Z1", 95 | text: "-w -W -m0 -Z1", 96 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets, 97 | }, 98 | { 99 | name: "-Z2", 100 | text: "-w -W -m0 -Z2", 101 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets | kana.CompatDoubleSpaces, 102 | }, 103 | { 104 | name: "-Z4", 105 | text: "-w -W -m0 -Z4", 106 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols | kana.FullwidthToNarrow | kana.CompatQuotes | kana.CompatBrackets | kana.CompatKeepSpaces | kana.CompatWideKatakanaToHalfwidth, 107 | }, 108 | { 109 | name: "-x", 110 | text: "-w -W -m0 -x", 111 | expect: compatBase, 112 | }, 113 | { 114 | name: "-X", 115 | text: "-w -W -m0 -X", 116 | expect: compatBase | kana.HalfwidthToWide | kana.CompatVoicedSoundMarks | kana.CompatKeepHalfwidthHangul | kana.CompatVoicedKanaRestriction | kana.CompatKeepHalfwidthSymbols, 117 | }, 118 | } 119 | 120 | for _, tc := range testcases { 121 | t.Run(tc.name, func(t *testing.T) { 122 | opts, err := nkf.ParseOptions(tc.text) 123 | if tc.expectErr == "" { 124 | if err != nil { 125 | t.Errorf("expected no error, but got %v", err) 126 | } 127 | if opts != tc.expect { 128 | t.Errorf("expected %v, but got %v", tc.expect, opts) 129 | } 130 | } else { 131 | if err == nil { 132 | t.Errorf("expected error, but got nil") 133 | } else if err.Error() != tc.expectErr { 134 | t.Errorf("expected error %q, but got %q", tc.expectErr, err.Error()) 135 | } 136 | } 137 | }) 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- 1 | package kana 2 | 3 | import ( 4 | "strconv" 5 | "strings" 6 | ) 7 | 8 | // ConvertOptions describes options for [Convert]. 9 | type ConvertOptions int 10 | 11 | const ( 12 | // HalfwidthToWide converts characters in halfwidth forms 13 | // to their ordinary, wide versions. 14 | // 15 | // The characters having East_Asian_Width property value of 16 | // H (East Asian Halfwidth) except U+20A9 WON SIGN (₩) are converted. 17 | // That is: 18 | // 19 | // - U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP (。) to U+FFBE HALFWIDTH HANGUL LETTER HIEUH (ᄒ) 20 | // - U+FFC2 HALFWIDTH HANGUL LETTER A (ᅡ) to U+FFC7 HALFWIDTH HANGUL LETTER E (ᅦ) 21 | // - U+FFCA HALFWIDTH HANGUL LETTER YEO (ᅧ) to U+FFCF HALFWIDTH HANGUL LETTER OE (ᅬ) 22 | // - U+FFD2 HALFWIDTH HANGUL LETTER YO (ᅭ) to U+FFD7 HALFWIDTH HANGUL LETTER YU (ᅲ) 23 | // - U+FFDA HALFWIDTH HANGUL LETTER EU (ᅳ) to U+FFDC HALFWIDTH HANGUL LETTER I (ᅵ) 24 | // - U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL (│) to U+FFEE HALFWIDTH WHITE CIRCLE (○) 25 | // 26 | // The conversion is roughly equivalent to NFKC but with some differences: 27 | // 28 | // - Halfwidth Hangul letters are not fully normalized and instead 29 | // converted to the corresponding letters 30 | // in Hangul Compatibility Jamo block. 31 | // 32 | // The following compat flags affect the behavior of this transformation: 33 | // 34 | // - [CompatVoicedSoundMarks] 35 | // - [CompatVoicedKanaRestriction] 36 | // - [CompatKeepHalfwidthHangul] 37 | // - [CompatKeepHalfwidthSymbols] 38 | HalfwidthToWide ConvertOptions = 1 << iota 39 | // FullwidthToNarrow converts characters in fullwidth forms 40 | // to their ordinary, narrow versions. 41 | // 42 | // The characters having East_Asian_Width property value of 43 | // F (East Asian Fullwidth) are converted. 44 | // That is: 45 | // 46 | // - U+FF01 FULLWIDTH EXCLAMATION MARK (!) to U+FF60 FULLWIDTH RIGHT WHITE PARENTHESIS (⦆) 47 | // - U+FFE0 FULLWIDTH CENT SIGN (¢) to U+FFE6 FULLWIDTH WON SIGN (₩) 48 | // 49 | // The conversion is roughly equivalent to NFKC but with some differences: 50 | // 51 | // - U+FFE3 FULLWIDTH MACRON ( ̄) is not fully normalized and instead 52 | // converted to U+00AF MACRON (¯). 53 | // 54 | // The following compat flags affect the behavior of this transformation: 55 | // 56 | // - [CompatQuotes] 57 | // - [CompatMinus] 58 | // - [CompatOverline] 59 | // - [CompatCurrency] 60 | // - [CompatBrackets] 61 | // - [CompatKeepSpaces] 62 | // - [CompatDoubleSpaces] 63 | FullwidthToNarrow 64 | // KatakanaToHiragana converts katakana to hiragana. 65 | // 66 | // Consider it transformation from Script=Katakana to Script=Hiragana, 67 | // but there are a lot of exceptions. 68 | // 69 | // Those characters are converted to a single hiragana character: 70 | // 71 | // - U+30A1 KATAKANA LETTER SMALL A (ァ) to U+30F6 KATAKANA LETTER SMALL KE (ヶ) 72 | // - U+30FD KATAKANA ITERATION MARK (ヽ) to U+30FE KATAKANA VOICED ITERATION MARK (ヾ) 73 | // - U+1B155 KATAKANA LETTER SMALL KO (𛅕) 74 | // - U+1B164 KATAKANA LETTER SMALL WI (𛅤) to U+1B166 KATAKANA LETTER SMALL WO (𛅦) 75 | // 76 | // Those characters are converted to a sequence of characters: 77 | // 78 | // - U+30F7 KATAKANA LETTER VA (ヷ) to U+30FA KATAKANA LETTER VO (ヺ) 79 | // 80 | // Those characters are not converted: 81 | // 82 | // - U+30FF KATAKANA DIGRAPH KOTO (ヿ) 83 | // - U+31F0 KATAKANA LETTER SMALL KU (ㇰ) to U+31FF KATAKANA LETTER SMALL RO (ㇿ) 84 | // - U+32D0 CIRCLED KATAKANA A (㋐) to U+32FE CIRCLED KATAKANA WO (㋾) 85 | // - U+3300 SQUARE APAATO (㌀) to U+3357 SQUARE WATTO (㍗) 86 | // - U+1AFF0 KATAKANA LETTER MINNAN TONE-2 (𚿰) to U+1AFF3 KATAKANA LETTER MINNAN TONE-5 (𚿳) 87 | // - U+1AFF5 KATAKANA LETTER MINNAN TONE-7 (𚿵) to U+1AFFB KATAKANA LETTER MINNAN NASALIZED TONE-8 (𚿻) 88 | // - U+1B000 KATAKANA LETTER ARCHAIC E (𛀀) 89 | // - U+1B120 KATAKANA LETTER ARCHAIC YI (𛄠) to U+1B122 KATAKANA LETTER ARCHAIC WU (𛄢) 90 | // - U+1B167 KATAKANA LETTER SMALL N (𛅧) 91 | // 92 | // You need [HalfwidthToWide] to convert them to hiragana: 93 | // 94 | // - U+FF66 HALFWIDTH KATAKANA LETTER WO (ヲ) to U+FF6F HALFWIDTH KATAKANA LETTER SMALL TU (ッ) 95 | // - U+FF71 HALFWIDTH KATAKANA LETTER A (ア) to U+FF9D HALFWIDTH KATAKANA LETTER N (ン) 96 | // 97 | // The following compat flags affect the behavior of this transformation: 98 | // 99 | // - [CompatKanaRestriction] 100 | KatakanaToHiragana 101 | // HiraganaToKatakana converts hiragana to katakana. 102 | // 103 | // Consider it transformation from Script=Hiragana to Script=Katakana, 104 | // but there are a lot of exceptions. 105 | // 106 | // Those characters are converted to a single katakana character: 107 | // 108 | // - U+3041 HIRAGANA LETTER SMALL A (ぁ) to U+3096 HIRAGANA LETTER SMALL KE (ゖ) 109 | // - U+309D HIRAGANA ITERATION MARK (ゝ) to U+309E HIRAGANA VOICED ITERATION MARK (ゞ) 110 | // - U+1B132 HIRAGANA LETTER SMALL KO (𛄲) 111 | // - U+1B150 HIRAGANA LETTER SMALL WI (𛅐) to U+1B152 HIRAGANA LETTER SMALL WO (𛅒) 112 | // 113 | // Those characters are not converted: 114 | // 115 | // - U+309F HIRAGANA DIGRAPH YORI (ゟ) 116 | // - U+1B001 HIRAGANA LETTER ARCHAIC YE (𛀁) to U+1B11F HIRAGANA LETTER ARCHAIC WU (𛄟) 117 | // - U+1F200 SQUARE HIRAGANA HOKA (🈀) 118 | // 119 | // The following compat flags affect the behavior of this transformation: 120 | // 121 | // - [CompatKanaRestriction] 122 | HiraganaToKatakana 123 | // CompatWideKatakanaToHalfwidth converts ordinary katakana 124 | // to their halfwidth forms. 125 | // 126 | // This transformation newly introduces compatibility characters 127 | // rather than reducing them in the input string. 128 | // This is against what Unicode intends to do. Therefore, 129 | // the entire transformation mode is considered as a compatibility option. 130 | // 131 | // If you want to normalize between fullwidth and halfwidth katakana, 132 | // you should use [HalfwidthToWide] instead. 133 | // 134 | // The following characters are converted: 135 | // 136 | // - U+3001 IDEOGRAPHIC COMMA (、) to U+3002 IDEOGRAPHIC FULL STOP (。) 137 | // - U+300C LEFT CORNER BRACKET (「) to U+300D RIGHT CORNER BRACKET (」) 138 | // - U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK to U+309C KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK (゜) 139 | // - U+30A1 KATAKANA LETTER SMALL A (ァ) to U+30ED KATAKANA LETTER RO (ロ) 140 | // - U+30EF KATAKANA LETTER WA (ワ) 141 | // - U+30F2 KATAKANA LETTER WO (ヲ) to U+30F4 KATAKANA LETTER VU (ヴ) 142 | // - U+30FB KATAKANA MIDDLE DOT (・) to U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK (ー) 143 | // 144 | // When a character in the list canonically decomposes to a base character 145 | // and a combining voiced or semi-voiced sound mark, the transformation 146 | // is applied after decomposing the character. 147 | // 148 | // Note that, U+30F7 KATAKANA LETTER VA (ヷ) and U+30FA KATAKANA LETTER VO (ヺ) 149 | // can also be transformed this way, but they are not included in the list. 150 | // This is because the entire transformation exists for compatibility 151 | // with NKF. 152 | // 153 | // Like other compat options, this is not stable under canonical equivalence. 154 | CompatWideKatakanaToHalfwidth 155 | // CompatQuotes is a compatibility option 156 | // to reproduce NKF's behavior for quotes. 157 | // 158 | // Specifically, the following transformations are additionally applied 159 | // in [FullwidthToNarrow]: 160 | // 161 | // - U+00B4 ACUTE ACCENT (´) → U+0027 APOSTROPHE (') 162 | // - U+2018 LEFT SINGLE QUOTATION MARK (‘) → U+0060 GRAVE ACCENT (`) 163 | // - U+2019 RIGHT SINGLE QUOTATION MARK (’) → U+0027 APOSTROPHE (') 164 | // - U+201C LEFT DOUBLE QUOTATION MARK (“) → U+0022 QUOTATION MARK (") 165 | // - U+201D RIGHT DOUBLE QUOTATION MARK (”) → U+0022 QUOTATION MARK (") 166 | // 167 | // While the following transformations are inhibited in [FullwidthToNarrow]: 168 | // 169 | // - U+FF02 FULLWIDTH QUOTATION MARK (") 170 | // (usually converted to U+0022 QUOTATION MARK (")) 171 | // - U+FF07 FULLWIDTH APOSTROPHE (') 172 | // (usually converted to U+0027 APOSTROPHE (')) 173 | CompatQuotes 174 | // CompatMinus is a compatibility option 175 | // to reproduce NKF's behavior for minus signs, hypens, and similar symbols. 176 | // 177 | // Specifically, the following transformations are applied: 178 | // 179 | // - U+2015 HORIZONTAL BAR (―) → U+2014 EM DASH (—) 180 | // - U+FF0D FULLWIDTH HYPHEN-MINUS (-) → U+2212 MINUS SIGN (−) 181 | // 182 | // and the following transformations are additionally applied 183 | // in [FullwidthToNarrow]: 184 | // 185 | // - U+2014 EM DASH (—) → U+002D HYPHEN-MINUS (-) 186 | // - U+2015 HORIZONTAL BAR (―) → U+002D HYPHEN-MINUS (-) 187 | // - U+2212 MINUS SIGN (−) → U+002D HYPHEN-MINUS (-) 188 | // - U+FF0D FULLWIDTH HYPHEN-MINUS (-) → U+002D HYPHEN-MINUS (-) 189 | CompatMinus 190 | // CompatOverline is a compatibility option 191 | // to reproduce NKF's behavior for overlines and similar symbols. 192 | // 193 | // Specifically, the following transformations are applied: 194 | // 195 | // - U+FFE3 FULLWIDTH MACRON ( ̄) → U+203E OVERLINE (‾), which wins over 196 | // [FullwidthToNarrow], where it is converted to U+00AF MACRON (¯). 197 | // 198 | // 199 | // Additionally, the following transformations are inhibited in 200 | // [FullwidthToNarrow]: 201 | // 202 | // - U+FF5E FULLWIDTH TILDE (~) 203 | // (usually converted to U+007E TILDE (~)) 204 | CompatOverline 205 | // CompatCurrency is a compatibility option 206 | // to reproduce NKF's behavior for currency symbols. 207 | // 208 | // Specifically, the following transformations are applied regardless of 209 | // [FullwidthToNarrow]: 210 | // 211 | // - U+FFE0 FULLWIDTH CENT SIGN (¢) → U+00A2 CENT SIGN (¢) 212 | // - U+FFE1 FULLWIDTH POUND SIGN (£) → U+00A3 POUND SIGN (£) 213 | // - U+FFE5 FULLWIDTH YEN SIGN (¥) → U+00A5 YEN SIGN (¥) 214 | // 215 | // and the following transformations are inhibited in [FullwidthToNarrow]: 216 | // 217 | // - U+FFE6 FULLWIDTH WON SIGN (₩) 218 | // (usually converted to U+20A9 WON SIGN (₩)) 219 | CompatCurrency 220 | // CompatBrackets is a compatibility option 221 | // to reproduce NKF's behavior for brackets and parentheses. 222 | // 223 | // Specifically, the following transformations are additionally applied 224 | // in [FullwidthToNarrow]: 225 | // 226 | // - U+3008 LEFT ANGLE BRACKET (〈) → U+003C LESS-THAN SIGN (<) 227 | // - U+3009 RIGHT ANGLE BRACKET (〉) → U+003E GREATER-THAN SIGN (>) 228 | // 229 | // while the following transformations are inhibited 230 | // in [FullwidthToNarrow]: 231 | // 232 | // - U+FF5F FULLWIDTH LEFT WHITE PARENTHESIS (⦅) 233 | // (usually converted to U+2985 LEFT WHITE PARENTHESIS (⦅)) 234 | // - U+FF60 FULLWIDTH RIGHT WHITE PARENTHESIS (⦆) 235 | // (usually converted to U+2986 RIGHT WHITE PARENTHESIS (⦆)) 236 | CompatBrackets 237 | // CompatOtherSymbols is a compatibility option 238 | // to reproduce NKF's behavior for miscellaneous symbols. 239 | // 240 | // Specifically, the following transformations are applied regardless of 241 | // [FullwidthToNarrow]: 242 | // 243 | // - U+FFE2 FULLWIDTH NOT SIGN (¬) → U+00AC NOT SIGN (¬) 244 | // - U+FFE4 FULLWIDTH BROKEN BAR (¦) → U+00A6 BROKEN BAR (¦) 245 | // 246 | // and the following transformations are also applied regardless of 247 | // [FullwidthToNarrow]: 248 | // 249 | // - U+2225 PARALLEL TO (∥) → U+2016 DOUBLE VERTICAL LINE (‖) 250 | CompatOtherSymbols 251 | // CompatKeepSpaces is a compatibility option 252 | // to reproduce NKF's behavior for Ideographic Spaces. 253 | // 254 | // Specifically, the following transformations are inhibited in 255 | // [FullwidthToNarrow]: 256 | // 257 | // - U+3000 IDEOGRAPHIC SPACE ( ) 258 | // (usually converted to U+0020 SPACE ( )) 259 | CompatKeepSpaces 260 | // CompatDoubleSpaces is a compatibility option 261 | // to reproduce NKF's behavior for Ideographic Spaces. 262 | // 263 | // Specifically, if this option is present along with [FullwidthToNarrow], 264 | // U+3000 IDEOGRAPHIC SPACE ( ) is converted to two U+0020 SPACE ( ) characters. 265 | CompatDoubleSpaces 266 | // CompatVoicedSoundMarks is a compatibility option 267 | // to reproduce NKF's behavior for voiced and semi-voiced sound marks. 268 | // 269 | // Specifically, the following transformations are applied in [HalfwidthToWide]: 270 | // 271 | // - U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK (゙) is converted to 272 | // U+309B KATAKANA-HIRAGANA VOICED SOUND MARK rather than 273 | // U+3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK, except when 274 | // it follows ウ, カ, キ, ク, ケ, コ, サ, シ, ス, セ, ソ, タ, チ, ツ, テ, ト, ハ, ヒ, フ, 275 | // ヘ, or ホ. 276 | // - U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK (゚) is converted to 277 | // U+309C KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK rather than 278 | // U+309A COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK, except 279 | // when it follows ハ, ヒ, フ, ヘ, or ホ. 280 | CompatVoicedSoundMarks 281 | // CompatKeepHalfwidthHangul is a compatibility option 282 | // to reproduce NKF's behavior for halfwidth Katakana letters. 283 | // 284 | // Specifically, the following characters are transformed 285 | // differently in [HalfwidthToWide]: 286 | // 287 | // - U+FF66 HALFWIDTH KATAKANA LETTER WO (ヲ) followed by 288 | // U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK (゙) is converted to 289 | // U+30F2 KATAKANA LETTER WO (ヲ) followed by 290 | // U+309B KATAKANA-HIRAGANA VOICED SOUND MARK (゛), rather than 291 | // U+30FA KATAKANA LETTER VO (ヺ). 292 | // - U+FF9C HALFWIDTH KATAKANA LETTER TU (ワ) followed by 293 | // U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK (゙) is converted to 294 | // U+30EF KATAKANA LETTER WA (ワ) followed by 295 | // U+309B KATAKANA-HIRAGANA VOICED SOUND MARK (゛), rather than 296 | // U+30F7 KATAKANA LETTER VA (ヷ). 297 | CompatVoicedKanaRestriction 298 | // CompatKeepHalfwidthHangul is a compatibility option 299 | // to reproduce NKF's behavior for halfwidth Hangul letters. 300 | // 301 | // Specifically, the following characters are kept intact 302 | // in [HalfwidthToWide]: 303 | // 304 | // - U+FFA0 HALFWIDTH HANGUL FILLER (ᅠ) 305 | // - U+FFA1 HALFWIDTH HANGUL LETTER KIYEOK (ᄀ) to U+FFBE HALFWIDTH HANGUL LETTER HIEUH (ᄒ) 306 | // - U+FFC2 HALFWIDTH HANGUL LETTER A (ᅡ) to U+FFC7 HALFWIDTH HANGUL LETTER E (ᅦ) 307 | // - U+FFCA HALFWIDTH HANGUL LETTER YEO (ᅧ) to U+FFCF HALFWIDTH HANGUL LETTER OE (ᅬ) 308 | // - U+FFD2 HALFWIDTH HANGUL LETTER YO (ᅭ) to U+FFD7 HALFWIDTH HANGUL LETTER YU (ᅲ) 309 | // - U+FFDA HALFWIDTH HANGUL LETTER EU (ᅳ) to U+FFDC HALFWIDTH HANGUL LETTER I (ᅵ) 310 | CompatKeepHalfwidthHangul 311 | // CompatKeepHalfwidthSymbols is a compatibility option 312 | // to reproduce NKF's behavior for halfwidth symbols. 313 | // 314 | // Specifically, the following characters are kept intact 315 | // in [HalfwidthToWide]: 316 | // 317 | // - U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL (│) 318 | // - U+FFE9 HALFWIDTH LEFTWARDS ARROW (←) 319 | // - U+FFEA HALFWIDTH UPWARDS ARROW (↑) 320 | // - U+FFEB HALFWIDTH RIGHTWARDS ARROW (→) 321 | // - U+FFEC HALFWIDTH DOWNWARDS ARROW (↓) 322 | // - U+FFED HALFWIDTH BLACK SQUARE (■) 323 | // - U+FFEE HALFWIDTH WHITE CIRCLE (○) 324 | CompatKeepHalfwidthSymbols 325 | // CompatKanaRestriction is a compatibility option 326 | // to reproduce NKF's behavior for hiragana and katakana. 327 | // 328 | // Specifically, the following characters are kept intact 329 | // in [KatakanaToHiragana]: 330 | // 331 | // - U+30F5 KATAKANA LETTER SMALL KA (ヵ) 332 | // - U+30F6 KATAKANA LETTER SMALL KE (ヶ) 333 | // - U+30F7 KATAKANA LETTER VA (ヷ) 334 | // - U+30F8 KATAKANA LETTER VI (ヸ) 335 | // - U+30F9 KATAKANA LETTER VE (ヹ) 336 | // - U+30FA KATAKANA LETTER VO (ヺ) 337 | // - U+1B155 KATAKANA LETTER SMALL KO (𛅕) 338 | // - U+1B164 KATAKANA LETTER SMALL WI (𛅤) 339 | // - U+1B165 KATAKANA LETTER SMALL WE (𛅥) 340 | // - U+1B166 KATAKANA LETTER SMALL WO (𛅦) 341 | // 342 | // and the following characters are kept intact in [HiraganaToKatakana]: 343 | // 344 | // - U+3095 HIRAGANA LETTER SMALL KA (ゕ) 345 | // - U+3096 HIRAGANA LETTER SMALL KE (ゖ) 346 | // - U+1B132 HIRAGANA LETTER SMALL KO (𛄲) 347 | // - U+1B150 HIRAGANA LETTER SMALL WI (𛅐) 348 | // - U+1B151 HIRAGANA LETTER SMALL WE (𛅑) 349 | // - U+1B152 HIRAGANA LETTER SMALL WO (𛅒) 350 | CompatKanaRestriction 351 | ) 352 | 353 | func (o ConvertOptions) Normalize() ConvertOptions { 354 | if o&FullwidthToNarrow == 0 { 355 | o &^= CompatQuotes | CompatBrackets | CompatKeepSpaces | CompatDoubleSpaces 356 | } 357 | if o&CompatKeepSpaces != 0 { 358 | o &^= CompatDoubleSpaces 359 | } 360 | if o&HalfwidthToWide == 0 { 361 | o &^= CompatVoicedSoundMarks | CompatVoicedKanaRestriction | CompatKeepHalfwidthHangul | CompatKeepHalfwidthSymbols 362 | } 363 | if o&(KatakanaToHiragana|HiraganaToKatakana) == 0 { 364 | o &^= CompatKanaRestriction 365 | } 366 | return o 367 | } 368 | 369 | var flagNames = []struct { 370 | name string 371 | flag ConvertOptions 372 | mask ConvertOptions 373 | }{ 374 | {"HalfwidthToWide", HalfwidthToWide, HalfwidthToWide}, 375 | {"FullwidthToNarrow", FullwidthToNarrow, FullwidthToNarrow}, 376 | {"KatakanaToHiragana", KatakanaToHiragana, KatakanaToHiragana}, 377 | {"HiraganaToKatakana", HiraganaToKatakana, HiraganaToKatakana}, 378 | {"CompatWideKatakanaToHalfwidth", CompatWideKatakanaToHalfwidth, CompatWideKatakanaToHalfwidth}, 379 | {"CompatQuotes", CompatQuotes, CompatQuotes}, 380 | {"CompatMinus", CompatMinus, CompatMinus}, 381 | {"CompatOverline", CompatOverline, CompatOverline}, 382 | {"CompatCurrency", CompatCurrency, CompatCurrency}, 383 | {"CompatBrackets", CompatBrackets, CompatBrackets}, 384 | {"CompatOtherSymbols", CompatOtherSymbols, CompatOtherSymbols}, 385 | {"CompatKeepSpaces", CompatKeepSpaces, CompatKeepSpaces}, 386 | {"CompatDoubleSpaces", CompatDoubleSpaces, CompatDoubleSpaces}, 387 | {"CompatVoicedSoundMarks", CompatVoicedSoundMarks, CompatVoicedSoundMarks}, 388 | {"CompatVoicedKanaRestriction", CompatVoicedKanaRestriction, CompatVoicedKanaRestriction}, 389 | {"CompatKeepHalfwidthHangul", CompatKeepHalfwidthHangul, CompatKeepHalfwidthHangul}, 390 | {"CompatKeepHalfwidthSymbols", CompatKeepHalfwidthSymbols, CompatKeepHalfwidthSymbols}, 391 | {"CompatKanaRestriction", CompatKanaRestriction, CompatKanaRestriction}, 392 | } 393 | 394 | func (o ConvertOptions) String() string { 395 | var names []string 396 | for _, n := range flagNames { 397 | if o&n.mask == n.flag { 398 | names = append(names, n.name) 399 | o &^= n.mask 400 | } 401 | } 402 | if o != 0 { 403 | names = append(names, "0x"+strconv.FormatInt(int64(o), 16)) 404 | } else if len(names) == 0 { 405 | return "0" 406 | } 407 | 408 | return strings.Join(names, " | ") 409 | } 410 | -------------------------------------------------------------------------------- /options_test.go: -------------------------------------------------------------------------------- 1 | package kana_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | "github.com/wantedly/kana-go" 8 | ) 9 | 10 | func TestConvertOptionsNormalize(t *testing.T) { 11 | testcases := []struct { 12 | name string 13 | input kana.ConvertOptions 14 | expected kana.ConvertOptions 15 | }{ 16 | { 17 | name: "empty", 18 | input: 0, 19 | expected: 0, 20 | }, 21 | { 22 | name: "CompatQuotes, without FullwidthToNarrow", 23 | input: kana.CompatQuotes, 24 | expected: 0, 25 | }, 26 | { 27 | name: "CompatQuotes, with FullwidthToNarrow", 28 | input: kana.FullwidthToNarrow | kana.CompatQuotes, 29 | expected: kana.FullwidthToNarrow | kana.CompatQuotes, 30 | }, 31 | { 32 | name: "CompatBrackets, without FullwidthToNarrow", 33 | input: kana.CompatBrackets, 34 | expected: 0, 35 | }, 36 | { 37 | name: "CompatBrackets, with FullwidthToNarrow", 38 | input: kana.FullwidthToNarrow | kana.CompatBrackets, 39 | expected: kana.FullwidthToNarrow | kana.CompatBrackets, 40 | }, 41 | { 42 | name: "CompatKeepSpaces, without FullwidthToNarrow", 43 | input: kana.CompatKeepSpaces, 44 | expected: 0, 45 | }, 46 | { 47 | name: "CompatKeepSpaces, with FullwidthToNarrow", 48 | input: kana.FullwidthToNarrow | kana.CompatKeepSpaces, 49 | expected: kana.FullwidthToNarrow | kana.CompatKeepSpaces, 50 | }, 51 | { 52 | name: "CompatDoubleSpaces, without FullwidthToNarrow", 53 | input: kana.CompatDoubleSpaces, 54 | expected: 0, 55 | }, 56 | { 57 | name: "CompatDoubleSpaces, with FullwidthToNarrow", 58 | input: kana.FullwidthToNarrow | kana.CompatDoubleSpaces, 59 | expected: kana.FullwidthToNarrow | kana.CompatDoubleSpaces, 60 | }, 61 | { 62 | name: "CompatDoubleSpaces, without CompatKeepSpaces", 63 | input: kana.FullwidthToNarrow | kana.CompatDoubleSpaces, 64 | expected: kana.FullwidthToNarrow | kana.CompatDoubleSpaces, 65 | }, 66 | { 67 | name: "CompatDoubleSpaces, with CompatKeepSpaces", 68 | input: kana.FullwidthToNarrow | kana.CompatKeepSpaces | kana.CompatDoubleSpaces, 69 | expected: kana.FullwidthToNarrow | kana.CompatKeepSpaces, 70 | }, 71 | { 72 | name: "CompatVoicedSoundMarks, without HalfwidthToWide", 73 | input: kana.CompatVoicedSoundMarks, 74 | expected: 0, 75 | }, 76 | { 77 | name: "CompatVoicedSoundMarks, with HalfwidthToWide", 78 | input: kana.HalfwidthToWide | kana.CompatVoicedSoundMarks, 79 | expected: kana.HalfwidthToWide | kana.CompatVoicedSoundMarks, 80 | }, 81 | { 82 | name: "CompatKeepHalfwidthHangul, without HalfwidthToWide", 83 | input: kana.CompatKeepHalfwidthHangul, 84 | expected: 0, 85 | }, 86 | { 87 | name: "CompatKeepHalfwidthHangul, with HalfwidthToWide", 88 | input: kana.HalfwidthToWide | kana.CompatKeepHalfwidthHangul, 89 | expected: kana.HalfwidthToWide | kana.CompatKeepHalfwidthHangul, 90 | }, 91 | { 92 | name: "CompatKeepHalfwidthSymbols, without HalfwidthToWide", 93 | input: kana.CompatKeepHalfwidthSymbols, 94 | expected: 0, 95 | }, 96 | { 97 | name: "CompatKeepHalfwidthSymbols, with HalfwidthToWide", 98 | input: kana.HalfwidthToWide | kana.CompatKeepHalfwidthSymbols, 99 | expected: kana.HalfwidthToWide | kana.CompatKeepHalfwidthSymbols, 100 | }, 101 | { 102 | name: "CompatKanaRestriction, without KatakanaToHiragana or HiraganaToKatakana", 103 | input: kana.CompatKanaRestriction, 104 | expected: 0, 105 | }, 106 | { 107 | name: "CompatKanaRestriction, with KatakanaToHiragana", 108 | input: kana.KatakanaToHiragana | kana.CompatKanaRestriction, 109 | expected: kana.KatakanaToHiragana | kana.CompatKanaRestriction, 110 | }, 111 | { 112 | name: "CompatKanaRestriction, with HiraganaToKatakana", 113 | input: kana.HiraganaToKatakana | kana.CompatKanaRestriction, 114 | expected: kana.HiraganaToKatakana | kana.CompatKanaRestriction, 115 | }, 116 | } 117 | for _, tc := range testcases { 118 | t.Run(tc.name, func(t *testing.T) { 119 | actual := tc.input.Normalize() 120 | if diff := cmp.Diff(tc.expected, actual); diff != "" { 121 | t.Errorf("unexpected diff (-want +got):\n%s", diff) 122 | } 123 | }) 124 | } 125 | } 126 | 127 | func TestConvertOptionsString(t *testing.T) { 128 | testcases := []struct { 129 | name string 130 | opts kana.ConvertOptions 131 | expected string 132 | }{ 133 | { 134 | name: "empty", 135 | opts: 0, 136 | expected: "0", 137 | }, 138 | { 139 | name: "unknown bit", 140 | opts: 1 << 30, 141 | expected: "0x40000000", 142 | }, 143 | { 144 | name: "one", 145 | opts: kana.HalfwidthToWide, 146 | expected: "HalfwidthToWide", 147 | }, 148 | { 149 | name: "one plus extra", 150 | opts: kana.HalfwidthToWide | (1 << 30), 151 | expected: "HalfwidthToWide | 0x40000000", 152 | }, 153 | { 154 | name: "two", 155 | opts: kana.HalfwidthToWide | kana.KatakanaToHiragana, 156 | expected: "HalfwidthToWide | KatakanaToHiragana", 157 | }, 158 | { 159 | name: "two plus extra", 160 | opts: kana.HalfwidthToWide | kana.KatakanaToHiragana | (1 << 30), 161 | expected: "HalfwidthToWide | KatakanaToHiragana | 0x40000000", 162 | }, 163 | } 164 | for _, tc := range testcases { 165 | t.Run(tc.name, func(t *testing.T) { 166 | actual := tc.opts.String() 167 | if diff := cmp.Diff(tc.expected, actual); diff != "" { 168 | t.Errorf("unexpected diff (-want +got):\n%s", diff) 169 | } 170 | }) 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /stream.go: -------------------------------------------------------------------------------- 1 | package kana 2 | 3 | import ( 4 | "strings" 5 | "unicode/utf8" 6 | ) 7 | 8 | type stream struct { 9 | buf []rune 10 | end bool 11 | next func(buf *[]rune) 12 | } 13 | 14 | func (s *stream) fill(demand int) { 15 | if s.end { 16 | return 17 | } 18 | for len(s.buf) < demand { 19 | oldSize := len(s.buf) 20 | s.next(&s.buf) 21 | if len(s.buf) == oldSize { 22 | s.end = true 23 | break 24 | } 25 | } 26 | } 27 | 28 | func (s *stream) consume(num int) { 29 | newSize := len(s.buf) - num 30 | for i := 0; i < newSize; i++ { 31 | s.buf[i] = s.buf[i+num] 32 | } 33 | s.buf = s.buf[:newSize] 34 | } 35 | 36 | func (s *stream) readOne() (rune, bool) { 37 | s.fill(1) 38 | if len(s.buf) == 0 { 39 | return 0, false 40 | } 41 | ch := s.buf[0] 42 | s.consume(1) 43 | return ch, true 44 | } 45 | 46 | func (s *stream) peekOne() (rune, bool) { 47 | s.fill(1) 48 | if len(s.buf) == 0 { 49 | return 0, false 50 | } 51 | return s.buf[0], true 52 | } 53 | 54 | func (s *stream) readAll() string { 55 | builder := strings.Builder{} 56 | if !s.end { 57 | for { 58 | s.readCurrentTo(&builder) 59 | 60 | oldSize := len(s.buf) 61 | s.next(&s.buf) 62 | if len(s.buf) == oldSize { 63 | s.end = true 64 | break 65 | } 66 | } 67 | } 68 | s.readCurrentTo(&builder) 69 | return builder.String() 70 | } 71 | func (s *stream) readCurrentTo(builder *strings.Builder) { 72 | for _, ch := range s.buf { 73 | builder.WriteRune(ch) 74 | } 75 | s.buf = s.buf[:0] 76 | } 77 | 78 | func newStream(next func(buf *[]rune)) *stream { 79 | return &stream{ 80 | buf: nil, 81 | end: false, 82 | next: next, 83 | } 84 | } 85 | 86 | func stringStream(s string) *stream { 87 | pos := 0 88 | return newStream(func(buf *[]rune) { 89 | if pos < len(s) { 90 | ch, size := utf8.DecodeRuneInString(s[pos:]) 91 | *buf = append(*buf, ch) 92 | pos += size 93 | } 94 | }) 95 | } 96 | 97 | func mapStream(s *stream, f func(rune) rune) *stream { 98 | return newStream(func(buf *[]rune) { 99 | s.fill(1) 100 | for _, ch := range s.buf { 101 | *buf = append(*buf, f(ch)) 102 | } 103 | s.consume(len(s.buf)) 104 | }) 105 | } 106 | --------------------------------------------------------------------------------