├── maybe_index.go ├── convert.go ├── .gitignore ├── .circleci └── config.yml ├── LICENSE ├── dictionary.go ├── README.md ├── convert_test.go └── default_sets.go /maybe_index.go: -------------------------------------------------------------------------------- 1 | package moji 2 | 3 | // MaybeIndex may be an index or a rune 4 | type MaybeIndex struct { 5 | i int 6 | s string 7 | } 8 | -------------------------------------------------------------------------------- /convert.go: -------------------------------------------------------------------------------- 1 | package moji 2 | 3 | // Convert a string between two Dictionaries 4 | func Convert(s string, from, to Dictionary) string { 5 | return to.Encode(from.Decode(s)) 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Golang CircleCI 2.0 configuration file 2 | # 3 | # Check https://circleci.com/docs/2.0/language-go/ for more details 4 | version: 2 5 | jobs: 6 | build: 7 | docker: 8 | - image: circleci/golang:1.8 9 | working_directory: /go/src/github.com/ktnyt/go-moji 10 | steps: 11 | - checkout 12 | - run: go get -v -t -d ./... 13 | - run: go test -v ./... 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 ktnyt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dictionary.go: -------------------------------------------------------------------------------- 1 | package moji 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // Dictionary defines an interface for mapping between a string and index 8 | type Dictionary interface { 9 | Encode([]MaybeIndex) string 10 | Decode(string) []MaybeIndex 11 | } 12 | 13 | type defaultDictionary []string 14 | 15 | func (d defaultDictionary) encode(m MaybeIndex) string { 16 | if m.i < 0 || m.i > len(d) { 17 | return m.s 18 | } 19 | return d[m.i] 20 | } 21 | 22 | func (d defaultDictionary) decode(s string) (MaybeIndex, string) { 23 | for i, p := range d { 24 | if strings.HasPrefix(s, p) { 25 | return MaybeIndex{i: i, s: p}, strings.Replace(s, p, "", 1) 26 | } 27 | } 28 | rs := []rune(s) 29 | head := string(rs[:1]) 30 | tail := string(rs[1:]) 31 | return MaybeIndex{i: -1, s: head}, tail 32 | } 33 | 34 | func (d defaultDictionary) Encode(ms []MaybeIndex) string { 35 | s := make([]byte, 0) 36 | for _, m := range ms { 37 | s = append(s, d.encode(m)...) 38 | } 39 | return string(s) 40 | } 41 | 42 | func (d defaultDictionary) Decode(s string) []MaybeIndex { 43 | ms := make([]MaybeIndex, 0) 44 | var m MaybeIndex 45 | for len(s) != 0 { 46 | m, s = d.decode(s) 47 | ms = append(ms, m) 48 | } 49 | return ms 50 | } 51 | 52 | // NewDictionary creates a dictionary from the given string slice 53 | func NewDictionary(d []string) Dictionary { 54 | return defaultDictionary(d) 55 | } 56 | 57 | // NewRangeDictionary creates a dictionary from the given range 58 | func NewRangeDictionary(s, e rune) Dictionary { 59 | if s > e { 60 | panic("NewRangeDictionary: range is invaid") 61 | } 62 | d := make([]string, 0) 63 | for r := s; r != e; r++ { 64 | d = append(d, string([]rune{r})) 65 | } 66 | return NewDictionary(d) 67 | } 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-moji 2 | [![Build Status](https://circleci.com/gh/ktnyt/go-moji.svg?style=shield&circle-token==7da9cb901d095995e930651e7298c8ab233a0c85)](https://circleci.com/gh/ktnyt/go-moji) 3 | [![Go Report Card](https://goreportcard.com/badge/github.com/ktnyt/go-moji)](https://goreportcard.com/report/github.com/ktnyt/go-moji) 4 | [![GoDoc](http://godoc.org/github.com/ktnyt/go-moji?status.svg)](http://godoc.org/github.com/ktnyt/go-moji) 5 | 6 | This package provides a Go interface for converting between Zenkaku (全角 i.e. full-width) and Hankaku (半角 i.e. half-width) characters (mostly for Japanese). The library has been largely influenced by [niwaringo/moji](https://github.com/niwaringo/moji) the JavaScript implementation. 7 | 8 | For detailed information of the API, see the [documents](https://godoc.org/github.com/ktnyt/go-moji). 9 | 10 | ## Installation 11 | Use `go get`: 12 | ```sh 13 | $ go get github.com/ktnyt/go-moji 14 | ``` 15 | 16 | ## Requirements 17 | This package has only been tested on Go >= 1.8. Beware when using lower versions. 18 | 19 | ## Example 20 | ```go 21 | package main 22 | 23 | import ( 24 | "fmt" 25 | 26 | "github.com/ktnyt/go-moji" 27 | ) 28 | 29 | func main() { 30 | s := "ABC ABC あがぱ アガパ アガパ" 31 | 32 | // Convert Zenkaku Eisuu to Hankaku Eisuu 33 | fmt.Println(moji.Convert(s, moji.ZE, moji.HE)) 34 | 35 | // Convert Hankaku Eisuu to Zenkaku Eisuu 36 | fmt.Println(moji.Convert(s, moji.HE, moji.ZE)) 37 | 38 | // Convert HiraGana to KataKana 39 | fmt.Println(moji.Convert(s, moji.HG, moji.KK)) 40 | 41 | // Convert KataKana to HiraGana 42 | fmt.Println(moji.Convert(s, moji.KK, moji.HG)) 43 | 44 | // Convert Zenkaku Katakana to Hankaku Katakana 45 | fmt.Println(moji.Convert(s, moji.ZK, moji.HK)) 46 | 47 | // Convert Hankaku Katakana to Zenkaku Katakana 48 | fmt.Println(moji.Convert(s, moji.HK, moji.ZK)) 49 | 50 | // Convert Zenkaku Space to Hankaku Space 51 | fmt.Println(moji.Convert(s, moji.ZS, moji.HS)) 52 | 53 | // Convert Hankaku Space to Zenkaku Space 54 | fmt.Println(moji.Convert(s, moji.HS, moji.ZS)) 55 | } 56 | ``` 57 | 58 | ## Copyright 59 | Copyright (C) 2018 by Kotone Itaya 60 | 61 | go-moji is released under the terms of the MIT License. 62 | See [LICENSE](https://github.com/ktnyt/go-moji/blob/master/LICENSE) for details. 63 | -------------------------------------------------------------------------------- /convert_test.go: -------------------------------------------------------------------------------- 1 | package moji 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | type testCase struct { 8 | n string 9 | s string 10 | e string 11 | f Dictionary 12 | t Dictionary 13 | } 14 | 15 | var cases = []testCase{ 16 | // RangeDictionary test cases 17 | { 18 | n: "ZE to HE", 19 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 20 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 21 | f: ZE, 22 | t: HE, 23 | }, 24 | { 25 | n: "HE to ZE", 26 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 27 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 28 | f: HE, 29 | t: ZE, 30 | }, 31 | { 32 | n: "HG to KK", 33 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 34 | e: "AZaz09. AZaz09. 「アン゛ガパ」アン゜ガパ。「アン゙ガパ」", 35 | f: HG, 36 | t: KK, 37 | }, 38 | { 39 | n: "KK to HG", 40 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 41 | e: "AZaz09. AZaz09. 「あん゛がぱ」あん゜がぱ。「アン゙ガパ」", 42 | f: KK, 43 | t: HG, 44 | }, 45 | { 46 | n: "ZK to HK", 47 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 48 | e: "AZaz09. AZaz09. 「あん゙がぱ」アン゚ガパ。「アン゙ガパ」", 49 | f: ZK, 50 | t: HK, 51 | }, 52 | { 53 | n: "HK to ZK", 54 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 55 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゛ガパ」", 56 | f: HK, 57 | t: ZK, 58 | }, 59 | { 60 | n: "ZS to HS", 61 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 62 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 63 | f: ZS, 64 | t: HS, 65 | }, 66 | { 67 | n: "HS to ZS", 68 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 69 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパ」", 70 | f: HS, 71 | t: ZS, 72 | }, 73 | { 74 | n: "HK to UHK", 75 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパャュョッ」", 76 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパヤユヨツ」", 77 | f: HK, 78 | t: UHK, 79 | }, 80 | { 81 | n: "ZK to UHK", 82 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパャュョッ。「アン゙ガパャュョッ」", 83 | e: "AZaz09. AZaz09. 「あん゙がぱ」アン゚ガパヤユヨツ。「アン゙ガパャュョッ」", 84 | f: ZK, 85 | t: UHK, 86 | }, 87 | { 88 | n: "HK to UZK", 89 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゙ガパャュョッ」", 90 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパ。「アン゛ガパヤユヨツ」", 91 | f: HK, 92 | t: UZK, 93 | }, 94 | { 95 | n: "ZK to UZK", 96 | s: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパャュョッ。「アン゙ガパャュョッ」", 97 | e: "AZaz09. AZaz09. 「あん゛がぱ」アン゜ガパヤユヨツ。「アン゙ガパャュョッ」", 98 | f: ZK, 99 | t: UZK, 100 | }, 101 | } 102 | 103 | func TestConvert(t *testing.T) { 104 | for _, c := range cases { 105 | a := Convert(c.s, c.f, c.t) 106 | if c.e != a { 107 | t.Fatalf("convert '%s' failed:\n expected '%s'\n but got '%s'", c.n, c.e, a) 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /default_sets.go: -------------------------------------------------------------------------------- 1 | package moji 2 | 3 | // HE defines the Hankaku Eisuu (i.e. half width english text) Dictionary 4 | var HE = NewRangeDictionary(0x0021, 0x007e) 5 | 6 | // ZE defines the Zenkaku Eisuu (i.e. full width english text) Dictionary 7 | var ZE = NewRangeDictionary(0xff01, 0xff5e) 8 | 9 | // HG defines the HiraGana Dictionary 10 | var HG = NewRangeDictionary(0x3041, 0x3096) 11 | 12 | // KK defines the KataKana Dictionary 13 | var KK = NewRangeDictionary(0x30a1, 0x30f6) 14 | 15 | // HK defines the Hankaku Katakana (i.e. half width katakana) Dictionary 16 | var HK = NewDictionary([]string{ 17 | "ガ", "ギ", "グ", "ゲ", "ゴ", 18 | "ザ", "ジ", "ズ", "ゼ", "ゾ", 19 | "ダ", "ヂ", "ヅ", "デ", "ド", 20 | "バ", "パ", "ビ", "ピ", "ブ", "プ", "ベ", "ペ", "ボ", "ポ", 21 | "ヷ", "ヺ", "ヴ", 22 | "。", "「", "」", "、", "・", "ー", "゙", "゚", 23 | "ア", "イ", "ウ", "エ", "オ", 24 | "カ", "キ", "ク", "ケ", "コ", 25 | "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", 26 | "ナ", "ニ", "ヌ", "ネ", "ノ", 27 | "ハ", "ヒ", "フ", "ヘ", "ホ", 28 | "マ", "ミ", "ム", "メ", "モ", 29 | "ヤ", "ユ", "ヨ", 30 | "ラ", "リ", "ル", "レ", "ロ", 31 | "ワ", "ヲ", "ン", 32 | "ァ", "ィ", "ゥ", "ェ", "ォ", 33 | "ャ", "ュ", "ョ", "ッ", 34 | }) 35 | 36 | // UHK defines the Upper Hankaku Katakana (i.e. half width Upper katakana) Dictionary 37 | var UHK = NewDictionary([]string{ 38 | "ガ", "ギ", "グ", "ゲ", "ゴ", 39 | "ザ", "ジ", "ズ", "ゼ", "ゾ", 40 | "ダ", "ヂ", "ヅ", "デ", "ド", 41 | "バ", "パ", "ビ", "ピ", "ブ", "プ", "ベ", "ペ", "ボ", "ポ", 42 | "ヷ", "ヺ", "ヴ", 43 | "。", "「", "」", "、", "・", "ー", "゙", "゚", 44 | "ア", "イ", "ウ", "エ", "オ", 45 | "カ", "キ", "ク", "ケ", "コ", 46 | "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", 47 | "ナ", "ニ", "ヌ", "ネ", "ノ", 48 | "ハ", "ヒ", "フ", "ヘ", "ホ", 49 | "マ", "ミ", "ム", "メ", "モ", 50 | "ヤ", "ユ", "ヨ", 51 | "ラ", "リ", "ル", "レ", "ロ", 52 | "ワ", "ヲ", "ン", 53 | "ア", "イ", "ウ", "エ", "オ", 54 | "ヤ", "ユ", "ヨ", "ツ", 55 | }) 56 | 57 | // ZK defines the Zenkaku Katakana (i.e. full width katakana) Dictionary 58 | var ZK = NewDictionary([]string{ 59 | "ガ", "ギ", "グ", "ゲ", "ゴ", 60 | "ザ", "ジ", "ズ", "ゼ", "ゾ", 61 | "ダ", "ヂ", "ヅ", "デ", "ド", 62 | "バ", "パ", "ビ", "ピ", "ブ", "プ", "ベ", "ペ", "ボ", "ポ", 63 | "ヷ", "ヺ", "ヴ", 64 | "。", "「", "」", "、", "・", "ー", "゛", "゜", 65 | "ア", "イ", "ウ", "エ", "オ", 66 | "カ", "キ", "ク", "ケ", "コ", 67 | "サ", "シ", "ス", "セ", "ソ", 68 | "タ", "チ", "ツ", "テ", "ト", 69 | "ナ", "ニ", "ヌ", "ネ", "ノ", 70 | "ハ", "ヒ", "フ", "ヘ", "ホ", 71 | "マ", "ミ", "ム", "メ", "モ", 72 | "ヤ", "ユ", "ヨ", 73 | "ラ", "リ", "ル", "レ", "ロ", 74 | "ワ", "ヲ", "ン", 75 | "ァ", "ィ", "ゥ", "ェ", "ォ", 76 | "ャ", "ュ", "ョ", "ッ", 77 | }) 78 | 79 | // UZK defines the Upper Zenkaku Katakana (i.e. full width Upper katakana) Dictionary 80 | var UZK = NewDictionary([]string{ 81 | "ガ", "ギ", "グ", "ゲ", "ゴ", 82 | "ザ", "ジ", "ズ", "ゼ", "ゾ", 83 | "ダ", "ヂ", "ヅ", "デ", "ド", 84 | "バ", "パ", "ビ", "ピ", "ブ", "プ", "ベ", "ペ", "ボ", "ポ", 85 | "ヷ", "ヺ", "ヴ", 86 | "。", "「", "」", "、", "・", "ー", "゛", "゜", 87 | "ア", "イ", "ウ", "エ", "オ", 88 | "カ", "キ", "ク", "ケ", "コ", 89 | "サ", "シ", "ス", "セ", "ソ", 90 | "タ", "チ", "ツ", "テ", "ト", 91 | "ナ", "ニ", "ヌ", "ネ", "ノ", 92 | "ハ", "ヒ", "フ", "ヘ", "ホ", 93 | "マ", "ミ", "ム", "メ", "モ", 94 | "ヤ", "ユ", "ヨ", 95 | "ラ", "リ", "ル", "レ", "ロ", 96 | "ワ", "ヲ", "ン", 97 | "ア", "イ", "ウ", "エ", "オ", 98 | "ヤ", "ユ", "ヨ", "ツ", 99 | }) 100 | 101 | func stringify(r rune) string { 102 | return string([]rune{r}) 103 | } 104 | 105 | // HS defines the Hankaku Space (i.e. half width space) Dictionary 106 | var HS = NewDictionary([]string{" ", stringify(0x00a0)}) 107 | 108 | // ZS defines the Zenkaku Space (i.e. full width space) Dictionary 109 | var ZS = NewDictionary([]string{stringify(0x3000), stringify(0x3000)}) 110 | --------------------------------------------------------------------------------