├── .gitattributes ├── .github └── workflows │ └── go.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── _example ├── main.go └── pattern.go ├── cmd ├── gmigemo-client │ ├── .gitignore │ └── main.go ├── gmigemo-server │ └── main.go └── gmigemo │ ├── .gitignore │ └── main.go ├── conv ├── conv.go ├── conv_test.go ├── inflate.go ├── inflate_test.go ├── load.go ├── load_test.go └── load_test0.txt ├── dict ├── dict.go ├── dict_test.go └── load_skk.go ├── doc ├── words-flow.dot └── words-flow.png ├── embedict ├── _dict │ ├── SKK-JISYO.utf-8.L │ ├── hira2kata.txt │ ├── migemo-config.json │ ├── narrow2wide.txt │ ├── roma2hira.txt │ └── wide2narrow.txt ├── assets.go ├── bindata.go ├── fs.go ├── load.go └── load_test.go ├── go.mod ├── go.sum ├── inflator ├── dispatch.go ├── dispatch_test.go ├── echo.go ├── echo_test.go ├── filter.go ├── filter_test.go ├── inflator.go ├── inflator_test.go ├── join.go ├── join_test.go ├── prefix.go ├── prefix_test.go ├── suffix.go └── suffix_test.go ├── internal └── cli │ ├── console.go │ └── view.go ├── migemo ├── assets.go ├── defaults.go ├── defaults_test.go ├── dict.go ├── matcher.go ├── migemo.go ├── migemo_test.go ├── multiclause.go ├── multiclause_test.go └── pattern.go ├── readutil ├── readlines.go ├── readlines_test.go ├── stackable.go ├── stackable_test.go └── testdata │ ├── readlines_test0.txt │ └── readlines_test1.txt └── rpc ├── client └── migemo.go └── server ├── migemo.go └── server.go /.gitattributes: -------------------------------------------------------------------------------- 1 | *.go -crlf 2 | 3 | readutil/testdata/readlines_test*.txt -crlf 4 | 5 | embedict/_dict/* linguist-vendored 6 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: [push] 4 | 5 | env: 6 | GO_VERSION: '>=1.21.0' 7 | 8 | jobs: 9 | 10 | build: 11 | name: Build 12 | runs-on: ${{ matrix.os }} 13 | 14 | strategy: 15 | matrix: 16 | os: [ ubuntu-latest, macos-latest, windows-latest ] 17 | steps: 18 | 19 | - uses: actions/checkout@v4 20 | 21 | - uses: actions/setup-go@v5 22 | with: 23 | go-version: ${{ env.GO_VERSION }} 24 | 25 | - run: go test ./... 26 | 27 | - run: go build ./... 28 | 29 | # based on: github.com/koron-go/_skeleton/.github/workflows/go.yml 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | 3 | tags 4 | 5 | *.exe 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 MURAOKA Taro 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | build: 3 | go build -gcflags '-e' ./cmd/gmigemo 4 | 5 | .PHONY: test 6 | test: 7 | go test ./... 8 | 9 | .PHONY: bench 10 | bench: 11 | go test -bench ./... 12 | 13 | .PHONY: tags 14 | tags: 15 | gotags -f tags -R . 16 | 17 | .PHONY: cover 18 | cover: 19 | mkdir -p tmp 20 | go test -coverprofile tmp/_cover.out ./... 21 | go tool cover -html tmp/_cover.out -o tmp/cover.html 22 | 23 | .PHONY: checkall 24 | checkall: vet lint staticcheck 25 | 26 | .PHONY: vet 27 | vet: 28 | go vet ./... 29 | 30 | .PHONY: lint 31 | lint: 32 | golint ./... 33 | 34 | .PHONY: staticcheck 35 | staticcheck: 36 | staticcheck ./... 37 | 38 | .PHONY: clean 39 | clean: 40 | go clean 41 | rm -f tags 42 | rm -f tmp/_cover.out tmp/cover.html 43 | 44 | # based on: github.com/koron-go/_skeleton/Makefile 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go/Migemo module 2 | 3 | [![PkgGoDev](https://pkg.go.dev/badge/github.com/koron/gomigemo)](https://pkg.go.dev/github.com/koron/gomigemo) 4 | [![Actions/Go](https://github.com/koron/gomigemo/workflows/Go/badge.svg)](https://github.com/koron/gomigemo/actions?query=workflow%3AGo) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/koron/gomigemo)](https://goreportcard.com/report/github.com/koron/gomigemo) 6 | 7 | ## Install and Update 8 | 9 | ```console 10 | $ go get github.com/koron/gomigemo@latest 11 | ``` 12 | 13 | ## Usage 14 | 15 | To load dictionary files from file system: 16 | 17 | ```go 18 | // Import migemo package. 19 | import "github.com/koron/gomigemo/migemo" 20 | 21 | // Load dictionary files. 22 | dict, err := migemo.LoadDefault() 23 | 24 | // Compile to get *regexp.Regexp. 25 | re, err := migemo.Compile(dict, "aiueo") 26 | ``` 27 | 28 | To embedded dictionary to the executable file: 29 | 30 | ```go 31 | // Import migemo and embedict package. 32 | import ( 33 | "github.com/koron/gomigemo/embedict" 34 | "github.com/koron/gomigemo/migemo" 35 | ) 36 | 37 | // Load embedded dictionary. 38 | dict, err := embedict.Load() 39 | 40 | // Compile to get *regexp.Regexp. 41 | re, err := migemo.Compile(dict, "aiueo") 42 | ``` 43 | 44 | ## LICENSE 45 | 46 | Distributed under MIT License, 47 | except for `_dict/SKK-JISYO.utf-8.L` and `embedict/bindata.go` which is GPL. 48 | 49 | See LICENSE. 50 | -------------------------------------------------------------------------------- /_example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/koron/gomigemo/migemo" 8 | ) 9 | 10 | func main() { 11 | dict, err := migemo.Load("../dict") 12 | if err != nil { 13 | log.Fatal(err) 14 | } 15 | re, err := migemo.Compile(dict, "kensaku") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | if re.MatchString("検索") { 20 | fmt.Println(`"検索" is matched as "kensaku"`) 21 | } else { 22 | fmt.Println(`"検索" isn't matched as "kensaku"`) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /_example/pattern.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | 7 | "github.com/koron/gomigemo/migemo" 8 | ) 9 | 10 | func main() { 11 | dict, err := migemo.Load("./_dict") 12 | if err != nil { 13 | log.Fatal(err) 14 | } 15 | pat, err := migemo.Pattern(dict, "ai") 16 | if err != nil { 17 | log.Fatal(err) 18 | } 19 | fmt.Printf("pattern=%s\n", pat) 20 | } 21 | -------------------------------------------------------------------------------- /cmd/gmigemo-client/.gitignore: -------------------------------------------------------------------------------- 1 | gmigemo 2 | gmigemo.exe 3 | -------------------------------------------------------------------------------- /cmd/gmigemo-client/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "log" 6 | 7 | "github.com/koron/gomigemo/internal/cli" 8 | "github.com/koron/gomigemo/rpc/client" 9 | ) 10 | 11 | func query(c *client.Client, s string) (string, error) { 12 | rx, err := c.Compile(s) 13 | if err != nil { 14 | return "", err 15 | } 16 | return rx.String(), nil 17 | } 18 | 19 | func queryLoop(v cli.View, c *client.Client) { 20 | for { 21 | q, err := v.GetQuery() 22 | if err == io.EOF { 23 | return 24 | } 25 | if err != nil { 26 | log.Print(err) 27 | continue 28 | } 29 | p, err := query(c, q) 30 | if err != nil { 31 | log.Print(err) 32 | continue 33 | } 34 | err = v.PutPattern(p) 35 | if err != nil { 36 | log.Print(err) 37 | continue 38 | } 39 | } 40 | } 41 | 42 | func main() { 43 | c, err := client.Connect() 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | queryLoop(cli.NewConsole(), c) 48 | } 49 | -------------------------------------------------------------------------------- /cmd/gmigemo-server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/koron/gomigemo/rpc/server" 4 | 5 | func main() { 6 | server.RunDefault() 7 | } 8 | -------------------------------------------------------------------------------- /cmd/gmigemo/.gitignore: -------------------------------------------------------------------------------- 1 | gmigemo 2 | gmigemo.exe 3 | -------------------------------------------------------------------------------- /cmd/gmigemo/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io" 5 | "log" 6 | 7 | "github.com/koron/gomigemo/embedict" 8 | "github.com/koron/gomigemo/internal/cli" 9 | "github.com/koron/gomigemo/migemo" 10 | ) 11 | 12 | func adjustMatcher(m migemo.Matcher) { 13 | o := m.GetOptions() 14 | o.OpWSpaces = "" 15 | m.SetOptions(o) 16 | } 17 | 18 | func query(d migemo.Dict, s string) (string, error) { 19 | m, err := d.Matcher(s) 20 | if err != nil { 21 | return "", err 22 | } 23 | adjustMatcher(m) 24 | p, err := m.Pattern() 25 | if err != nil { 26 | return "", err 27 | } 28 | return p, nil 29 | } 30 | 31 | func queryLoop(v cli.View, d migemo.Dict) { 32 | for { 33 | q, err := v.GetQuery() 34 | if err == io.EOF { 35 | return 36 | } 37 | if err != nil { 38 | log.Print(err) 39 | continue 40 | } 41 | p, err := query(d, q) 42 | if err != nil { 43 | log.Print(err) 44 | continue 45 | } 46 | err = v.PutPattern(p) 47 | if err != nil { 48 | log.Print(err) 49 | continue 50 | } 51 | } 52 | } 53 | 54 | func main() { 55 | dict, err := embedict.Load() 56 | if err != nil { 57 | log.Fatal(err) 58 | } 59 | queryLoop(cli.NewConsole(), migemo.MultiClauses(dict)) 60 | } 61 | -------------------------------------------------------------------------------- /conv/conv.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | 7 | "github.com/koron/gelatin/trie" 8 | "github.com/koron/gomigemo/readutil" 9 | ) 10 | 11 | // Converter defines string (roma/katakana) converter with trie. 12 | type Converter struct { 13 | trie *trie.TernaryTrie 14 | balanced bool 15 | } 16 | 17 | type entry struct { 18 | output, remain string 19 | } 20 | 21 | // New creates a new conveter. 22 | func New() *Converter { 23 | return &Converter{ 24 | trie: trie.NewTernaryTrie(), 25 | balanced: false, 26 | } 27 | } 28 | 29 | // Add adds an entry for conversion. 30 | func (c *Converter) Add(key, output, remain string) { 31 | c.trie.Put(key, &entry{output, remain}) 32 | c.balanced = false 33 | } 34 | 35 | // Convert converts a string as roma or katakana or so. 36 | func (c *Converter) Convert(s string) (string, error) { 37 | return c.convert2(s, nil) 38 | } 39 | 40 | type resultProc func(core, remain string, n trie.Node) 41 | 42 | func (c *Converter) convert2(s string, proc resultProc) (string, error) { 43 | if !c.balanced { 44 | c.balance() 45 | } 46 | 47 | var out, pending bytes.Buffer 48 | r := readutil.NewStackabeRuneReader() 49 | r.PushFront(s) 50 | n := c.trie.Root() 51 | 52 | for { 53 | ch, _, err := r.ReadRune() 54 | if err == io.EOF { 55 | break 56 | } else if err != nil { 57 | return "", err 58 | } 59 | 60 | n = n.Get(ch) 61 | if n == nil { 62 | pending.WriteRune(ch) 63 | ch2, _, err := pending.ReadRune() 64 | if err == nil { 65 | out.WriteRune(ch2) 66 | r.PushFront(pending.String()) 67 | pending.Reset() 68 | n = c.trie.Root() 69 | } else if err != io.EOF { 70 | return "", err 71 | } 72 | } else if e, ok := n.Value().(*entry); ok { 73 | if len(e.output) > 0 { 74 | out.WriteString(e.output) 75 | } 76 | if len(e.remain) > 0 { 77 | r.PushFront(e.remain) 78 | } 79 | pending.Reset() 80 | n = c.trie.Root() 81 | } else { 82 | pending.WriteRune(ch) 83 | } 84 | } 85 | 86 | if proc != nil { 87 | proc(out.String(), pending.String(), n) 88 | } 89 | if pending.Len() > 0 { 90 | out.WriteString(pending.String()) 91 | } 92 | 93 | return out.String(), nil 94 | } 95 | 96 | func (c *Converter) balance() { 97 | c.trie.Balance() 98 | c.balanced = true 99 | } 100 | -------------------------------------------------------------------------------- /conv/conv_test.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func assertConvert(t *testing.T, c *Converter, expected, input string) { 8 | converted, err := c.Convert(input) 9 | if err != nil { 10 | t.Error("Convert failed:", err) 11 | t.Logf(" input=%s expected=%s", input, expected) 12 | } 13 | if converted != expected { 14 | t.Error("Convert returns unexpected:", converted) 15 | t.Logf(" input=%s expected=%s", input, expected) 16 | } 17 | } 18 | 19 | func TestEmpty(t *testing.T) { 20 | c := New() 21 | assertConvert(t, c, "", "") 22 | assertConvert(t, c, "foo", "foo") 23 | assertConvert(t, c, "bar", "bar") 24 | } 25 | 26 | func TestSimple(t *testing.T) { 27 | c := New() 28 | c.Add("a", "A", "") 29 | c.Add("b", "B", "") 30 | assertConvert(t, c, "A", "a") 31 | assertConvert(t, c, "B", "b") 32 | assertConvert(t, c, "c", "c") 33 | assertConvert(t, c, "AAAAABBBBBccccc", "aaaaabbbbbccccc") 34 | } 35 | 36 | func TestTiny(t *testing.T) { 37 | c := New() 38 | c.Add("aa", "A", "a") 39 | c.Add("ab", "B", "") 40 | assertConvert(t, c, "B", "ab") 41 | assertConvert(t, c, "Aa", "aa") 42 | assertConvert(t, c, "AB", "aab") 43 | assertConvert(t, c, "Bc", "abc") 44 | assertConvert(t, c, "Ba", "aba") 45 | } 46 | 47 | func TestHira(t *testing.T) { 48 | c := New() 49 | c.Add("a", "あ", "") 50 | c.Add("i", "い", "") 51 | assertConvert(t, c, "あい", "ai") 52 | } 53 | 54 | func TestHiraRemain(t *testing.T) { 55 | c := New() 56 | c.Add("a", "あ", "") 57 | c.Add("ka", "か", "") 58 | c.Add("ki", "き", "") 59 | assertConvert(t, c, "あk", "ak") 60 | } 61 | -------------------------------------------------------------------------------- /conv/inflate.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "github.com/koron/gelatin/trie" 5 | "github.com/koron/gomigemo/inflator" 6 | ) 7 | 8 | // Inflate inflates all convesion patterns. 9 | func (c *Converter) Inflate(s string) <-chan string { 10 | return inflator.Start(func(ch chan<- string) { 11 | c.convert2(s, func(core, remain string, n trie.Node) { 12 | extend := false 13 | if n != c.trie.Root() { 14 | recursiveEach(n, func(m trie.Node) { 15 | if e, ok := m.Value().(*entry); ok && e.output != "" { 16 | ch <- core + e.output 17 | extend = true 18 | } 19 | }) 20 | } 21 | if !extend { 22 | ch <- core 23 | } 24 | }) 25 | }) 26 | } 27 | 28 | func recursiveEach(n trie.Node, proc func(trie.Node)) { 29 | n.Each(func(m trie.Node) bool { 30 | proc(m) 31 | recursiveEach(m, proc) 32 | return true 33 | }) 34 | } 35 | -------------------------------------------------------------------------------- /conv/inflate_test.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func assertInflate(t *testing.T, c *Converter, input string, expected []string) { 8 | var actual []string 9 | ch := c.Inflate(input) 10 | for s := range ch { 11 | actual = append(actual, s) 12 | } 13 | 14 | if len(actual) != len(expected) { 15 | t.Errorf("length not match, expected=%d actual=%d", 16 | len(expected), len(actual)) 17 | t.Logf(" expected=%v", expected) 18 | return 19 | } 20 | for i, v := range actual { 21 | if v != expected[i] { 22 | t.Errorf("item[%d]=%v is not match with %v", i, v, expected[i]) 23 | t.Logf(" expected=%v", expected) 24 | break 25 | } 26 | } 27 | } 28 | 29 | func TestInflate(t *testing.T) { 30 | c := New() 31 | c.Add("a", "あ", "") 32 | c.Add("i", "い", "") 33 | c.Add("u", "う", "") 34 | c.Add("e", "え", "") 35 | c.Add("o", "お", "") 36 | c.Add("ka", "か", "") 37 | c.Add("ki", "き", "") 38 | c.Add("ku", "く", "") 39 | c.Add("ke", "け", "") 40 | c.Add("ko", "こ", "") 41 | c.Add("kk", "っ", "k") 42 | 43 | assertInflate(t, c, "a", []string{"あ"}) 44 | assertInflate(t, c, "ak", []string{ 45 | "あか", "あけ", "あき", "あっ", "あこ", "あく", 46 | }) 47 | assertInflate(t, c, "ik", []string{ 48 | "いか", "いけ", "いき", "いっ", "いこ", "いく", 49 | }) 50 | assertInflate(t, c, "uk", []string{ 51 | "うか", "うけ", "うき", "うっ", "うこ", "うく", 52 | }) 53 | assertInflate(t, c, "ek", []string{ 54 | "えか", "えけ", "えき", "えっ", "えこ", "えく", 55 | }) 56 | assertInflate(t, c, "ok", []string{ 57 | "おか", "おけ", "おき", "おっ", "おこ", "おく", 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /conv/load.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "os" 8 | "strings" 9 | 10 | "github.com/koron/gomigemo/readutil" 11 | ) 12 | 13 | // LoadFile loads entries from a file. 14 | func (c *Converter) LoadFile(path string) (count int, err error) { 15 | file, err := os.Open(path) 16 | if err != nil { 17 | return 0, err 18 | } 19 | defer file.Close() 20 | return c.Load(file, path) 21 | } 22 | 23 | // Load loads entries from io.Reader. 24 | func (c *Converter) Load(rd io.Reader, name string) (count int, err error) { 25 | lnum := 0 26 | err = readutil.ReadLines(rd, func(line string, err error) error { 27 | lnum++ 28 | line = strings.TrimRight(line, " \t\r\n") 29 | if len(line) == 0 || line[0] == '#' { 30 | return err 31 | } 32 | parts := strings.SplitN(line, "\t", 3) 33 | if parts == nil || len(parts) < 2 { 34 | return fmt.Errorf("invalid format in file %s at line %d", 35 | name, lnum) 36 | } 37 | key := unescape(parts[0]) 38 | emit := unescape(parts[1]) 39 | var remain string 40 | if len(parts) >= 3 { 41 | remain = unescape(parts[2]) 42 | } 43 | c.Add(key, emit, remain) 44 | count++ 45 | return err 46 | }) 47 | return count, err 48 | } 49 | 50 | func unescape(s string) string { 51 | if !strings.ContainsRune(s, '\\') { 52 | return s 53 | } 54 | b := new(bytes.Buffer) 55 | b.Grow(len(s)) 56 | escape := false 57 | for _, r := range s { 58 | if escape { 59 | escape = false 60 | b.WriteRune(r) 61 | } else { 62 | if r == '\\' { 63 | escape = true 64 | } else { 65 | b.WriteRune(r) 66 | } 67 | } 68 | } 69 | if escape { 70 | b.WriteRune('\\') 71 | } 72 | return b.String() 73 | } 74 | 75 | // LoadFile creates a Conveter with entries which loaded from a file. 76 | func LoadFile(path string) (*Converter, error) { 77 | c := New() 78 | _, err := c.LoadFile(path) 79 | if err != nil { 80 | return nil, err 81 | } 82 | return c, nil 83 | } 84 | -------------------------------------------------------------------------------- /conv/load_test.go: -------------------------------------------------------------------------------- 1 | package conv 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestLoad(t *testing.T) { 8 | c := New() 9 | count, err := c.LoadFile("./load_test0.txt") 10 | if err != nil { 11 | t.Fatal("failed to load load_test0.txt", err) 12 | } else if count != 18 { 13 | t.Fatal("load_test0.txt has unexpected entries:", count) 14 | } 15 | 16 | assertConvert(t, c, "あかん", "akann") 17 | assertConvert(t, c, "あk", "ak") 18 | assertConvert(t, c, "あかn", "akan") 19 | 20 | assertConvert(t, c, "かんな", "kannna") 21 | assertConvert(t, c, "かんあ", "kanna") 22 | assertConvert(t, c, "かな", "kana") 23 | 24 | assertConvert(t, c, "いっかん", "ikkann") 25 | assertConvert(t, c, "いっかn", "ikkan") 26 | assertConvert(t, c, "いかん", "ikann") 27 | assertConvert(t, c, "いかn", "ikan") 28 | 29 | assertConvert(t, c, "いっきん", "ikkinn") 30 | assertConvert(t, c, "いっきn", "ikkin") 31 | assertConvert(t, c, "いきん", "ikinn") 32 | assertConvert(t, c, "いきn", "ikin") 33 | } 34 | 35 | func assertUnescape(t *testing.T, text, expected string) { 36 | actual := unescape(text) 37 | if actual != expected { 38 | t.Errorf("unescape failed: expected=%s actual=%s", expected, actual) 39 | } 40 | } 41 | 42 | func TestUnescape(t *testing.T) { 43 | assertUnescape(t, "abc", "abc") 44 | assertUnescape(t, "a\\bc", "abc") 45 | assertUnescape(t, "\\\\", "\\") 46 | assertUnescape(t, "\\#", "#") 47 | assertUnescape(t, "\\", "\\") 48 | } 49 | -------------------------------------------------------------------------------- /conv/load_test0.txt: -------------------------------------------------------------------------------- 1 | a あ 2 | i い 3 | u う 4 | e え 5 | o お 6 | 7 | ka か 8 | ki き 9 | ku く 10 | ke け 11 | ko こ 12 | 13 | na な 14 | ni に 15 | nu ぬ 16 | ne ね 17 | no の 18 | 19 | kk っ k 20 | nn ん 21 | nk ん k 22 | -------------------------------------------------------------------------------- /dict/dict.go: -------------------------------------------------------------------------------- 1 | package dict 2 | 3 | import ( 4 | "github.com/koron/gelatin/trie" 5 | "github.com/koron/gomigemo/inflator" 6 | ) 7 | 8 | // Dict is a trie tree dictionary. 9 | type Dict struct { 10 | trie *trie.TernaryTrie 11 | balanced bool 12 | } 13 | 14 | type entry struct { 15 | words []string 16 | } 17 | 18 | // New creates a dictionary. 19 | func New() *Dict { 20 | return &Dict{ 21 | trie: trie.NewTernaryTrie(), 22 | balanced: false, 23 | } 24 | } 25 | 26 | // Add adds a label and corresponding words for the label to dictionary. 27 | func (d *Dict) Add(label string, words []string) { 28 | d.trie.Put(label, &entry{words: words}) 29 | d.balanced = false 30 | } 31 | 32 | // Balance makes internal trie tree balanced. 33 | func (d *Dict) Balance() { 34 | if !d.balanced { 35 | d.trie.Balance() 36 | d.balanced = true 37 | } 38 | } 39 | 40 | // Get retrieves all words for a label with proc callback. 41 | func (d *Dict) Get(label string, proc func(word string) bool) { 42 | n := d.trie.Get(label) 43 | if n == nil { 44 | return 45 | } 46 | f := func(o trie.Node) bool { 47 | e, ok := o.Value().(*entry) 48 | if !ok { 49 | return true 50 | } 51 | for _, w := range e.words { 52 | if !proc(w) { 53 | return false 54 | } 55 | } 56 | return true 57 | } 58 | if !f(n) { 59 | return 60 | } 61 | n.Each(f) 62 | } 63 | 64 | // GetAll retrieves all words for a label with array. 65 | func (d *Dict) GetAll(label string, max int) []string { 66 | limit := max 67 | if limit == 0 { 68 | limit = 32 69 | } 70 | words := make([]string, 0, limit) 71 | 72 | d.Get(label, func(word string) bool { 73 | words = append(words, word) 74 | if max > 0 && len(words) >= max { 75 | return false 76 | } 77 | return true 78 | }) 79 | return words 80 | } 81 | 82 | // Inflate retrieves all words for a label with channel. 83 | func (d *Dict) Inflate(s string) <-chan string { 84 | return inflator.Start(func(ch chan<- string) { 85 | d.Get(s, func(word string) bool { 86 | ch <- word 87 | return true 88 | }) 89 | }) 90 | } 91 | -------------------------------------------------------------------------------- /dict/dict_test.go: -------------------------------------------------------------------------------- 1 | package dict 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func assertGetAll(t *testing.T, d *Dict, label string, expected []string) { 8 | actual := d.GetAll(label, 0) 9 | if len(actual) != len(expected) { 10 | t.Errorf("length not match, expected=%d actual=%d", 11 | len(expected), len(actual)) 12 | t.Logf(" label=%s, expected=%v", label, expected) 13 | return 14 | } 15 | for i, v := range actual { 16 | if v != expected[i] { 17 | t.Errorf("item[%d]=%v is not match with %v", i, v, expected[i]) 18 | t.Logf(" label=%s, expected=%v", label, expected) 19 | } 20 | } 21 | } 22 | 23 | func TestDict(t *testing.T) { 24 | d := New() 25 | d.Add("あい", []string{"愛", "藍"}) 26 | d.Add("あき", []string{"秋", "空き"}) 27 | d.Add("あ", []string{"亜"}) 28 | d.Add("いき", []string{"息", "遺棄", "粋"}) 29 | d.Add("いし", []string{"石", "医師"}) 30 | 31 | assertGetAll(t, d, "あい", []string{"愛", "藍"}) 32 | assertGetAll(t, d, "あき", []string{"秋", "空き"}) 33 | assertGetAll(t, d, "あ", []string{"亜", "愛", "藍", "秋", "空き"}) 34 | assertGetAll(t, d, "あし", []string{}) 35 | assertGetAll(t, d, "い", []string{"息", "遺棄", "粋", "石", "医師"}) 36 | assertGetAll(t, d, "いき", []string{"息", "遺棄", "粋"}) 37 | assertGetAll(t, d, "いし", []string{"石", "医師"}) 38 | assertGetAll(t, d, "いち", []string{}) 39 | } 40 | -------------------------------------------------------------------------------- /dict/load_skk.go: -------------------------------------------------------------------------------- 1 | package dict 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/koron-go/skkdict" 7 | ) 8 | 9 | func addDictEntry(d *Dict, entry *skkdict.Entry) { 10 | words := make([]string, len(entry.Words)) 11 | for i, w := range entry.Words { 12 | words[i] = w.Text 13 | } 14 | d.Add(entry.Label, words) 15 | } 16 | 17 | // ReadSKK reads a SKK dictionary from io.Reader. 18 | func ReadSKK(rd io.Reader) (d *Dict, err error) { 19 | d = New() 20 | r := skkdict.NewReader(rd) 21 | for { 22 | n, err := r.Read() 23 | if err == io.EOF { 24 | break 25 | } else if err != nil { 26 | return nil, err 27 | } 28 | if n != nil { 29 | addDictEntry(d, n) 30 | } 31 | } 32 | return d, nil 33 | } 34 | -------------------------------------------------------------------------------- /doc/words-flow.dot: -------------------------------------------------------------------------------- 1 | // vim:set sts=2 sw=2 tw=0: 2 | 3 | digraph sample { 4 | raw -> matcher; 5 | raw -> hira; 6 | hira -> matcher; 7 | hira -> kata; 8 | kata -> matcher; 9 | kata -> han; 10 | han -> matcher; 11 | hira -> kanji; 12 | kanji -> matcher; 13 | raw -> wide; 14 | wide -> matcher; 15 | 16 | raw [ label="ローマ字\n(アルファベット)" ]; 17 | hira [ label="ひらがな" ]; 18 | matcher [ label="正規表現他" ]; 19 | kata [ label="全角カタカナ" ]; 20 | han [ label="半角カタカナ" ]; 21 | wide [ label="全角" ]; 22 | kanji [ label="漢字" ]; 23 | } 24 | -------------------------------------------------------------------------------- /doc/words-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koron/gomigemo/115ff5231d1e3c9befe1fd44b82c706ee1b8b0e7/doc/words-flow.png -------------------------------------------------------------------------------- /embedict/_dict/hira2kata.txt: -------------------------------------------------------------------------------- 1 | # Table to convert HIRAGANA to KATAKANA in Japanese. 2 | # 3 | # Format: {INPUT}\t{EMIT}[\t{REMAIN}] 4 | 5 | あ ア 6 | い イ 7 | う ウ 8 | え エ 9 | お オ 10 | 11 | か カ 12 | き キ 13 | く ク 14 | け ケ 15 | こ コ 16 | 17 | さ サ 18 | し シ 19 | す ス 20 | せ セ 21 | そ ソ 22 | 23 | た タ 24 | ち チ 25 | つ ツ 26 | て テ 27 | と ト 28 | 29 | な ナ 30 | に ニ 31 | ぬ ヌ 32 | ね ネ 33 | の ノ 34 | 35 | は ハ 36 | ひ ヒ 37 | ふ フ 38 | へ ヘ 39 | ほ ホ 40 | 41 | ま マ 42 | み ミ 43 | む ム 44 | め メ 45 | も モ 46 | 47 | や ヤ 48 | ゆ ユ 49 | よ ヨ 50 | 51 | ら ラ 52 | り リ 53 | る ル 54 | れ レ 55 | ろ ロ 56 | 57 | わ ワ 58 | ゐ ヰ 59 | ゑ ヱ 60 | を ヲ 61 | 62 | が ガ 63 | ぎ ギ 64 | ぐ グ 65 | げ ゲ 66 | ご ゴ 67 | 68 | ざ ザ 69 | じ ジ 70 | ず ズ 71 | ぜ ゼ 72 | ぞ ゾ 73 | 74 | だ ダ 75 | ぢ ヂ 76 | づ ヅ 77 | で デ 78 | ど ド 79 | 80 | ば バ 81 | び ビ 82 | ぶ ブ 83 | べ ベ 84 | ぼ ボ 85 | 86 | ぱ パ 87 | ぴ ピ 88 | ぷ プ 89 | ぺ ペ 90 | ぽ ポ 91 | 92 | ぁ ァ 93 | ぃ ィ 94 | ぅ ゥ 95 | ぇ ェ 96 | ぉ ォ 97 | 98 | ゃ ャ 99 | ゅ ュ 100 | ょ ョ 101 | 102 | ん ン 103 | っ ッ 104 | ゎ ヮ 105 | -------------------------------------------------------------------------------- /embedict/_dict/migemo-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "dict": { 3 | "roma2hira": { 4 | "type": "single", 5 | "href": "./roma2hira.txt" 6 | }, 7 | "hira2kata": { 8 | "type": "single", 9 | "href": "./hira2kata.txt" 10 | }, 11 | "wide2narrow": { 12 | "type": "single", 13 | "href": "./wide2narrow.txt" 14 | }, 15 | "narrow2wide": { 16 | "type": "single", 17 | "href": "./narrow2wide.txt" 18 | }, 19 | "hira2kanji": { 20 | "type": "multiple", 21 | "format": "SKK", 22 | "href": "http://openlab.jp/skk/dic/SKK-JISYO.L.gz" 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /embedict/_dict/narrow2wide.txt: -------------------------------------------------------------------------------- 1 | # Table to convert narrow (HANKAKU) to wide (ZENKAKU) chars in Japanese. 2 | # 3 | # Format: {INPUT}\t{EMIT}[\t{REMAIN}] 4 | 5 |   6 | ! ! 7 | " ” 8 | \# # 9 | $ $ 10 | % % 11 | & & 12 | ' ’ 13 | ( ( 14 | ) ) 15 | * * 16 | + + 17 | , , 18 | - - 19 | . . 20 | / / 21 | 0 0 22 | 1 1 23 | 2 2 24 | 3 3 25 | 4 4 26 | 5 5 27 | 6 6 28 | 7 7 29 | 8 8 30 | 9 9 31 | : : 32 | ; ; 33 | < < 34 | = = 35 | > > 36 | ? ? 37 | 38 | @ @ 39 | A A 40 | B B 41 | C C 42 | D D 43 | E E 44 | F F 45 | G G 46 | H H 47 | I I 48 | J J 49 | K K 50 | L L 51 | M M 52 | N N 53 | O O 54 | P P 55 | Q Q 56 | R R 57 | S S 58 | T T 59 | U U 60 | V V 61 | W W 62 | X X 63 | Y Y 64 | Z Z 65 | [ [ 66 | \\ ¥ 67 | ] ] 68 | ^ ^ 69 | _ _ 70 | 71 | ` ‘ 72 | a a 73 | b b 74 | c c 75 | d d 76 | e e 77 | f f 78 | g g 79 | h h 80 | i i 81 | j j 82 | k k 83 | l l 84 | m m 85 | n n 86 | o o 87 | p p 88 | q q 89 | r r 90 | s s 91 | t t 92 | u u 93 | v v 94 | w w 95 | x x 96 | y y 97 | z z 98 | { { 99 | | | 100 | } } 101 | ~ ~ 102 | 103 | 。 。 104 | 「 「 105 | 」 」 106 | 、 、 107 | ・ ・ 108 | ヲ ヲ 109 | ァ ァ 110 | ィ ィ 111 | ゥ ゥ 112 | ェ ェ 113 | ォ ォ 114 | ャ ャ 115 | ュ ュ 116 | ョ ョ 117 | ッ ッ 118 | ー ー 119 | ア ア 120 | イ イ 121 | ウ ウ 122 | エ エ 123 | オ オ 124 | カ カ 125 | キ キ 126 | ク ク 127 | ケ ケ 128 | コ コ 129 | サ サ 130 | シ シ 131 | ス ス 132 | セ セ 133 | ソ ソ 134 | 135 | タ タ 136 | チ チ 137 | ツ ツ 138 | テ テ 139 | ト ト 140 | ナ ナ 141 | ニ ニ 142 | ヌ ヌ 143 | ネ ネ 144 | ノ ノ 145 | ハ ハ 146 | ヒ ヒ 147 | フ フ 148 | ヘ ヘ 149 | ホ ホ 150 | マ マ 151 | ミ ミ 152 | ム ム 153 | メ メ 154 | モ モ 155 | ヤ ヤ 156 | ユ ユ 157 | ヨ ヨ 158 | ラ ラ 159 | リ リ 160 | ル ル 161 | レ レ 162 | ロ ロ 163 | ワ ワ 164 | ン ン 165 | ゙ ゛ 166 | ゚ ゜ 167 | -------------------------------------------------------------------------------- /embedict/_dict/roma2hira.txt: -------------------------------------------------------------------------------- 1 | # Table to convert ROMAJI to HIRAGANA in Japanese. 2 | # 3 | # Format: {INPUT}\t{EMIT}[\t{REMAIN}] 4 | 5 | a あ 6 | i い 7 | u う 8 | e え 9 | o お 10 | 11 | ka か 12 | ki き 13 | ku く 14 | ke け 15 | ko こ 16 | 17 | sa さ 18 | si し 19 | su す 20 | se せ 21 | so そ 22 | 23 | ta た 24 | ti ち 25 | tu つ 26 | te て 27 | to と 28 | 29 | na な 30 | ni に 31 | nu ぬ 32 | ne ね 33 | no の 34 | 35 | ha は 36 | hi ひ 37 | hu ふ 38 | he へ 39 | ho ほ 40 | 41 | ma ま 42 | mi み 43 | mu む 44 | me め 45 | mo も 46 | 47 | ya や 48 | yi い 49 | yu ゆ 50 | ye いぇ 51 | yo よ 52 | 53 | ra ら 54 | ri り 55 | ru る 56 | re れ 57 | ro ろ 58 | 59 | wa わ 60 | wi ゐ 61 | wu う 62 | we ゑ 63 | wo を 64 | 65 | ga が 66 | gi ぎ 67 | gu ぐ 68 | ge げ 69 | go ご 70 | 71 | za ざ 72 | zi じ 73 | zu ず 74 | ze ぜ 75 | zo ぞ 76 | 77 | da だ 78 | di ぢ 79 | du づ 80 | de で 81 | do ど 82 | 83 | ba ば 84 | bi び 85 | bu ぶ 86 | be べ 87 | bo ぼ 88 | 89 | pa ぱ 90 | pi ぴ 91 | pu ぷ 92 | pe ぺ 93 | po ぽ 94 | 95 | la ぁ 96 | li ぃ 97 | lu ぅ 98 | le ぇ 99 | lo ぉ 100 | 101 | lya ゃ 102 | lyi ぃ 103 | lyu ゅ 104 | lye ぇ 105 | lyo ょ 106 | 107 | xa ぁ 108 | xi ぃ 109 | xu ぅ 110 | xe ぇ 111 | xo ぉ 112 | 113 | xya ゃ 114 | xyi ぃ 115 | xyu ゅ 116 | xye ぇ 117 | xyo ょ 118 | 119 | kya きゃ 120 | kyi きぃ 121 | kyu きゅ 122 | kye きぇ 123 | kyo きょ 124 | 125 | gwa ぐぁ 126 | gwi ぐぃ 127 | gwu ぐぅ 128 | gwe ぐぇ 129 | gwo ぐぉ 130 | 131 | gya ぎゃ 132 | gyi ぎぃ 133 | gyu ぎゅ 134 | gye ぎぇ 135 | gyo ぎょ 136 | 137 | sha しゃ 138 | shi し 139 | shu しゅ 140 | she しぇ 141 | sho しょ 142 | 143 | swa すぁ 144 | swi すぃ 145 | swu すぅ 146 | swe すぇ 147 | swo すぉ 148 | 149 | sya しゃ 150 | syi しぃ 151 | syu しゅ 152 | sye しぇ 153 | syo しょ 154 | 155 | tha てゃ 156 | thi てぃ 157 | thu てゅ 158 | the てぇ 159 | tho てょ 160 | 161 | tsa つぁ 162 | tsi つぃ 163 | tsu つ 164 | tse つぇ 165 | tso つぉ 166 | 167 | twa とぁ 168 | twi とぃ 169 | twu とぅ 170 | twe とぇ 171 | two とぉ 172 | 173 | tya ちゃ 174 | tyi ちぃ 175 | tyu ちゅ 176 | tye ちぇ 177 | tyo ちょ 178 | 179 | dha でゃ 180 | dhi でぃ 181 | dhu でゅ 182 | dhe でぇ 183 | dho でょ 184 | 185 | nya にゃ 186 | nyi にぃ 187 | nyu にゅ 188 | nye にぇ 189 | nyo にょ 190 | 191 | hya ひゃ 192 | hyi ひぃ 193 | hyu ひゅ 194 | hye ひぇ 195 | hyo ひょ 196 | 197 | bya びゃ 198 | byi びぃ 199 | byu びゅ 200 | bye びぇ 201 | byo びょ 202 | 203 | pya ぴゃ 204 | pyi ぴぃ 205 | pyu ぴゅ 206 | pye ぴぇ 207 | pyo ぴょ 208 | 209 | mya みゃ 210 | myi みぃ 211 | myu みゅ 212 | mye みぇ 213 | myo みょ 214 | 215 | rya りゃ 216 | ryi りぃ 217 | ryu りゅ 218 | rye りぇ 219 | ryo りょ 220 | 221 | ca か 222 | ci し 223 | cu く 224 | ce せ 225 | co こ 226 | 227 | cha ちゃ 228 | chi ち 229 | chu ちゅ 230 | che ちぇ 231 | cho ちょ 232 | 233 | fa ふぁ 234 | fi ふぃ 235 | fu ふ 236 | fe ふぇ 237 | fo ふぉ 238 | 239 | fwa ふぁ 240 | fwi ふぃ 241 | fwu ふぅ 242 | fwe ふぇ 243 | fwo ふぉ 244 | 245 | fya ふゃ 246 | fyi ふぃ 247 | fyu ふゅ 248 | fye ふぇ 249 | fyo ふょ 250 | 251 | ja じゃ 252 | ji じ 253 | ju じゅ 254 | je じぇ 255 | jo じょ 256 | 257 | jya じゃ 258 | jyi じぃ 259 | jyu じゅ 260 | jye じぇ 261 | jyo じょ 262 | 263 | qa くぁ 264 | qi くぃ 265 | qu く 266 | qe くぇ 267 | qo くぉ 268 | 269 | qwa くぁ 270 | qwi くぃ 271 | qwu くぅ 272 | qwe くぇ 273 | qwo くぉ 274 | 275 | qya くゃ 276 | qyi くぃ 277 | qyu くゅ 278 | qye くぇ 279 | qyo くょ 280 | 281 | va ヴぁ 282 | vi ヴぃ 283 | vu ヴ 284 | ve ヴぇ 285 | vo ヴぉ 286 | 287 | vya ヴゃ 288 | vyi ヴぃ 289 | vyu ヴゅ 290 | vye ヴぇ 291 | vyo ヴょ 292 | 293 | nn ん 294 | n' ん 295 | xn ん 296 | ltu っ 297 | xtu っ 298 | lwa ゎ 299 | xwa ゎ 300 | lka ヵ 301 | xka ヵ 302 | lke ヶ 303 | xke ヶ 304 | kwa くぁ 305 | 306 | - ー 307 | ~ ~ 308 | 309 | mba んば 310 | mbi んび 311 | mbu んぶ 312 | mbe んべ 313 | mbo んぼ 314 | 315 | mpa んぱ 316 | mpi んぴ 317 | mpu んぷ 318 | mpe んぺ 319 | mpo んぽ 320 | 321 | mma んま 322 | mmi んみ 323 | mmu んむ 324 | mme んめ 325 | mmo んも 326 | 327 | tcha っちゃ 328 | tchi っち 329 | tchu っちゅ 330 | tche っちぇ 331 | tcho っちょ 332 | 333 | bb っ b 334 | cc っ c 335 | dd っ d 336 | ff っ f 337 | gg っ g 338 | hh っ h 339 | jj っ j 340 | kk っ k 341 | ll っ l 342 | mm っ m 343 | pp っ p 344 | qq っ q 345 | rr っ r 346 | ss っ s 347 | tt っ t 348 | vv っ v 349 | ww っ w 350 | xx っ x 351 | yy っ y 352 | zz っ z 353 | 354 | nb ん b 355 | nc ん c 356 | nd ん d 357 | nf ん f 358 | ng ん g 359 | nh ん h 360 | nj ん j 361 | nk ん k 362 | nl ん l 363 | nm ん m 364 | np ん p 365 | nq ん q 366 | nr ん r 367 | ns ん s 368 | nt ん t 369 | nv ん v 370 | nw ん w 371 | nx ん x 372 | ny ん y 373 | nz ん z 374 | -------------------------------------------------------------------------------- /embedict/_dict/wide2narrow.txt: -------------------------------------------------------------------------------- 1 | # Table to convert wide (ZENKAKU) to narrow (HANKAKU) chars in Japanese. 2 | # 3 | # Format: {INPUT}\t{EMIT}[\t{REMAIN}] 4 | 5 | ! ! 6 | ” " 7 | # # 8 | $ $ 9 | % % 10 | & & 11 | ’ ' 12 | ( ( 13 | ) ) 14 | * * 15 | + + 16 | , , 17 | - - 18 | . . 19 | / / 20 | 0 0 21 | 1 1 22 | 2 2 23 | 3 3 24 | 4 4 25 | 5 5 26 | 6 6 27 | 7 7 28 | 8 8 29 | 9 9 30 | : : 31 | ; ; 32 | < < 33 | = = 34 | > > 35 | ? ? 36 | 37 | @ @ 38 | A A 39 | B B 40 | C C 41 | D D 42 | E E 43 | F F 44 | G G 45 | H H 46 | I I 47 | J J 48 | K K 49 | L L 50 | M M 51 | N N 52 | O O 53 | P P 54 | Q Q 55 | R R 56 | S S 57 | T T 58 | U U 59 | V V 60 | W W 61 | X X 62 | Y Y 63 | Z Z 64 | [ [ 65 | ¥ \\ 66 | ] ] 67 | ^ ^ 68 | _ _ 69 | 70 | ‘ ` 71 | a a 72 | b b 73 | c c 74 | d d 75 | e e 76 | f f 77 | g g 78 | h h 79 | i i 80 | j j 81 | k k 82 | l l 83 | m m 84 | n n 85 | o o 86 | p p 87 | q q 88 | r r 89 | s s 90 | t t 91 | u u 92 | v v 93 | w w 94 | x x 95 | y y 96 | z z 97 | { { 98 | | | 99 | } } 100 | ~ ~ 101 | 102 | 。 。 103 | 「 「 104 | 」 」 105 | 、 、 106 | ・ ・ 107 | ヲ ヲ 108 | ァ ァ 109 | ィ ィ 110 | ゥ ゥ 111 | ェ ェ 112 | ォ ォ 113 | ャ ャ 114 | ュ ュ 115 | ョ ョ 116 | ッ ッ 117 | ー ー 118 | ア ア 119 | イ イ 120 | ウ ウ 121 | エ エ 122 | オ オ 123 | カ カ 124 | キ キ 125 | ク ク 126 | ケ ケ 127 | コ コ 128 | サ サ 129 | シ シ 130 | ス ス 131 | セ セ 132 | ソ ソ 133 | 134 | タ タ 135 | チ チ 136 | ツ ツ 137 | テ テ 138 | ト ト 139 | ナ ナ 140 | ニ ニ 141 | ヌ ヌ 142 | ネ ネ 143 | ノ ノ 144 | ハ ハ 145 | ヒ ヒ 146 | フ フ 147 | ヘ ヘ 148 | ホ ホ 149 | マ マ 150 | ミ ミ 151 | ム ム 152 | メ メ 153 | モ モ 154 | ヤ ヤ 155 | ユ ユ 156 | ヨ ヨ 157 | ラ ラ 158 | リ リ 159 | ル ル 160 | レ レ 161 | ロ ロ 162 | ワ ワ 163 | ン ン 164 | ゛ ゙ 165 | ゜ ゚ 166 | 167 | ヴ ヴ 168 | ガ ガ 169 | ギ ギ 170 | グ グ 171 | ゲ ゲ 172 | ゴ ゴ 173 | ザ ザ 174 | ジ ジ 175 | ズ ズ 176 | ゼ ゼ 177 | ゾ ゾ 178 | ダ ダ 179 | ヂ ヂ 180 | ヅ ヅ 181 | デ デ 182 | ド ド 183 | バ バ 184 | ビ ビ 185 | ブ ブ 186 | ベ ベ 187 | ボ ボ 188 | パ パ 189 | ピ ピ 190 | プ プ 191 | ペ ペ 192 | ポ ポ 193 | -------------------------------------------------------------------------------- /embedict/assets.go: -------------------------------------------------------------------------------- 1 | package embedict 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/koron/gomigemo/migemo" 7 | ) 8 | 9 | type assets struct { 10 | } 11 | 12 | func (*assets) Get(name string, proc migemo.AssetProc) error { 13 | b, err := Asset(name) 14 | if err != nil { 15 | return err 16 | } 17 | return proc(bytes.NewReader(b)) 18 | } 19 | -------------------------------------------------------------------------------- /embedict/fs.go: -------------------------------------------------------------------------------- 1 | // +build go1.16 2 | 3 | package embedict 4 | 5 | import ( 6 | "embed" 7 | "fmt" 8 | "io" 9 | "io/fs" 10 | ) 11 | 12 | //go:embed _dict 13 | var dictFS embed.FS 14 | 15 | var DictFS fs.FS 16 | 17 | func init() { 18 | fs, err := fs.Sub(dictFS, "_dict") 19 | if err != nil { 20 | panic(err) 21 | } 22 | DictFS = fs 23 | } 24 | 25 | func Asset(name string) ([]byte, error) { 26 | f, err := DictFS.Open(name) 27 | if err != nil { 28 | return nil, fmt.Errorf("Asset %q not found: %w", name, err) 29 | } 30 | defer f.Close() 31 | return io.ReadAll(f) 32 | } 33 | -------------------------------------------------------------------------------- /embedict/load.go: -------------------------------------------------------------------------------- 1 | package embedict 2 | 3 | import ( 4 | "github.com/koron/gomigemo/migemo" 5 | ) 6 | 7 | // Load loads embeded migemo.Dict. 8 | func Load() (migemo.Dict, error) { 9 | return migemo.LoadAssets(&assets{}) 10 | } 11 | -------------------------------------------------------------------------------- /embedict/load_test.go: -------------------------------------------------------------------------------- 1 | package embedict 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestLoad(t *testing.T) { 8 | d, err := Load() 9 | if err != nil { 10 | t.Fatal("failed to load embedded dict", err) 11 | } 12 | if d == nil { 13 | t.Fatal("embedict.Load returns nil") 14 | } 15 | } 16 | 17 | func BenchmarkLoad(b *testing.B) { 18 | for i := 0; i < b.N; i++ { 19 | _, _ = Load() 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/koron/gomigemo 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/google/go-cmp v0.6.0 7 | github.com/koron-go/skkdict v1.0.0 8 | github.com/koron/gelatin v0.0.0-20160729020448-88d6a03ce765 9 | ) 10 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 2 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 3 | github.com/koron-go/skkdict v1.0.0 h1:N8U7nFJt6ddKt52i1FoG5ukzjHgqlHqtscjK8UcmUCM= 4 | github.com/koron-go/skkdict v1.0.0/go.mod h1:Pd4MKJybKeA6yzuUNfXqV5+8SBSb3PEvowZ3orQfC4M= 5 | github.com/koron/gelatin v0.0.0-20160729020448-88d6a03ce765 h1:/k2Hth0PQq47SNc2pvOnwEQbjtu7HgtQu8TFXKhABKE= 6 | github.com/koron/gelatin v0.0.0-20160729020448-88d6a03ce765/go.mod h1:TJD1ti844npsMLPGgPPLa1Ozaz0W36N9EL6dQmTZ0kU= 7 | -------------------------------------------------------------------------------- /inflator/dispatch.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type dispatcher struct { 4 | inflatables []Inflatable 5 | } 6 | 7 | // Dispatch is an Inflatable which dispatch a string to Inflatables. 8 | func Dispatch(first Inflatable, others ...Inflatable) Inflatable { 9 | inflatables := make([]Inflatable, len(others)+1) 10 | inflatables[0] = first 11 | for i, v := range others { 12 | inflatables[i+1] = v 13 | } 14 | return &dispatcher{inflatables} 15 | } 16 | 17 | func (d *dispatcher) Inflate(s string) <-chan string { 18 | return Start(func(c chan<- string) { 19 | for _, n := range d.inflatables { 20 | for t := range n.Inflate(s) { 21 | c <- t 22 | } 23 | } 24 | }) 25 | } 26 | 27 | // DispatchEcho is an Inflatable which combined Echo and Dispatch. 28 | func DispatchEcho(inflatables ...Inflatable) Inflatable { 29 | return Dispatch(Echo(), inflatables...) 30 | } 31 | -------------------------------------------------------------------------------- /inflator/dispatch_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestDispatch(t *testing.T) { 8 | e := Echo() 9 | p := Prefix("foo1-", "foo2-") 10 | s := Suffix("-bar1", "-bar2") 11 | d := Dispatch(e, p, s) 12 | 13 | c1 := d.Inflate("qux") 14 | if v, ok := <-c1; !ok || v != "qux" { 15 | t.Error("didn't return \"qux\":", v, ok) 16 | } 17 | if v, ok := <-c1; !ok || v != "foo1-qux" { 18 | t.Error("didn't return \"foo1-qux\":", v, ok) 19 | } 20 | if v, ok := <-c1; !ok || v != "foo2-qux" { 21 | t.Error("didn't return \"foo2-qux\":", v, ok) 22 | } 23 | if v, ok := <-c1; !ok || v != "qux-bar1" { 24 | t.Error("didn't return \"qux-bar1\":", v, ok) 25 | } 26 | if v, ok := <-c1; !ok || v != "qux-bar2" { 27 | t.Error("didn't return \"qux-bar2\":", v, ok) 28 | } 29 | if v, ok := <-c1; ok { 30 | t.Error("returned unexpected:", v, ok) 31 | } 32 | 33 | c2 := d.Inflate("baz") 34 | if v, ok := <-c2; !ok || v != "baz" { 35 | t.Error("didn't return \"baz\":", v, ok) 36 | } 37 | if v, ok := <-c2; !ok || v != "foo1-baz" { 38 | t.Error("didn't return \"foo1-baz\":", v, ok) 39 | } 40 | if v, ok := <-c2; !ok || v != "foo2-baz" { 41 | t.Error("didn't return \"foo2-baz\":", v, ok) 42 | } 43 | if v, ok := <-c2; !ok || v != "baz-bar1" { 44 | t.Error("didn't return \"baz-bar1\":", v, ok) 45 | } 46 | if v, ok := <-c2; !ok || v != "baz-bar2" { 47 | t.Error("didn't return \"baz-bar2\":", v, ok) 48 | } 49 | if v, ok := <-c2; ok { 50 | t.Error("returned unexpected:", v, ok) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /inflator/echo.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type echo struct { 4 | } 5 | 6 | // Echo provides an Inflatable which returns a string as is. 7 | func Echo() Inflatable { 8 | return &echo{} 9 | } 10 | 11 | func (e *echo) Inflate(s string) <-chan string { 12 | return Start(func(c chan<- string) { 13 | c <- s 14 | }) 15 | } 16 | -------------------------------------------------------------------------------- /inflator/echo_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestEcho(t *testing.T) { 8 | e := Echo() 9 | 10 | c1 := e.Inflate("foo") 11 | if <-c1 != "foo" { 12 | t.Error("Echo didn't return \"foo\"") 13 | } 14 | if _, ok := <-c1; ok { 15 | t.Error("Echo returned others of \"foo\"") 16 | } 17 | 18 | c2 := e.Inflate("bar") 19 | if <-c2 != "bar" { 20 | t.Error("Echo didn't return \"bar\"") 21 | } 22 | if _, ok := <-c2; ok { 23 | t.Error("Echo returned others of \"bar\"") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /inflator/filter.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type filter struct { 4 | check func(string) bool 5 | } 6 | 7 | // Filter provides a filter Inflatable. 8 | func Filter(check func(string) bool) Inflatable { 9 | return &filter{check} 10 | } 11 | 12 | func (f *filter) Inflate(s string) <-chan string { 13 | return Start(func(c chan<- string) { 14 | if f.check(s) { 15 | c <- s 16 | } 17 | }) 18 | } 19 | -------------------------------------------------------------------------------- /inflator/filter_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestFilter(t *testing.T) { 8 | p := Prefix("a", "aa", "A", "AA") 9 | s := Suffix("b", "bb", "B", "BB") 10 | j1 := Join(p, s) 11 | f := Filter(func(s string) bool { 12 | return len(s) >= 4 13 | }) 14 | j2 := Join(j1, f) 15 | 16 | c := j2.Inflate("") 17 | if v, ok := <-c; !ok || v != "aabb" { 18 | t.Error("didn't return \"aabb\":", v, ok) 19 | } 20 | if v, ok := <-c; !ok || v != "aaBB" { 21 | t.Error("didn't return \"aaBB\":", v, ok) 22 | } 23 | if v, ok := <-c; !ok || v != "AAbb" { 24 | t.Error("didn't return \"AAbb\":", v, ok) 25 | } 26 | if v, ok := <-c; !ok || v != "AABB" { 27 | t.Error("didn't return \"AABB\":", v, ok) 28 | } 29 | if v, ok := <-c; ok { 30 | t.Error("returned unexpected:", v, ok) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /inflator/inflator.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | // Inflatable inflates a string with channel. 4 | type Inflatable interface { 5 | Inflate(s string) <-chan string 6 | } 7 | 8 | // Start starts inflation with function. 9 | func Start(f func(chan<- string)) <-chan string { 10 | c := make(chan string, 1) 11 | go func() { 12 | defer close(c) 13 | f(c) 14 | }() 15 | return c 16 | } 17 | -------------------------------------------------------------------------------- /inflator/inflator_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Test(t *testing.T) { 8 | } 9 | -------------------------------------------------------------------------------- /inflator/join.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type joiner struct { 4 | first, second Inflatable 5 | } 6 | 7 | // Join joins two Inflatables into one Inflatable. 8 | func Join(first, second Inflatable) Inflatable { 9 | return &joiner{first, second} 10 | } 11 | 12 | func (j *joiner) Inflate(s string) <-chan string { 13 | return Start(func(c chan<- string) { 14 | for t := range j.first.Inflate(s) { 15 | for u := range j.second.Inflate(t) { 16 | c <- u 17 | } 18 | } 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /inflator/join_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestJoin(t *testing.T) { 8 | p := Prefix("foo-") 9 | s := Suffix("-bar") 10 | j := Join(p, s) 11 | c := j.Inflate("qux") 12 | 13 | if v, ok := <-c; !ok || v != "foo-qux-bar" { 14 | t.Error("didn't return \"foo-qux-bar\":", v, ok) 15 | } 16 | if v, ok := <-c; ok { 17 | t.Error("returned unexpected:", v, ok) 18 | } 19 | } 20 | 21 | func TestJoinMulti(t *testing.T) { 22 | p := Prefix("foo1-", "foo2-", "foo3-") 23 | s := Suffix("-bar1", "-bar2") 24 | c := Join(p, s).Inflate("qux") 25 | 26 | if v, ok := <-c; !ok || v != "foo1-qux-bar1" { 27 | t.Error("didn't return \"foo1-qux-bar1\":", v, ok) 28 | } 29 | if v, ok := <-c; !ok || v != "foo1-qux-bar2" { 30 | t.Error("didn't return \"foo1-qux-bar2\":", v, ok) 31 | } 32 | if v, ok := <-c; !ok || v != "foo2-qux-bar1" { 33 | t.Error("didn't return \"foo2-qux-bar1\":", v, ok) 34 | } 35 | if v, ok := <-c; !ok || v != "foo2-qux-bar2" { 36 | t.Error("didn't return \"foo2-qux-bar2\":", v, ok) 37 | } 38 | if v, ok := <-c; !ok || v != "foo3-qux-bar1" { 39 | t.Error("didn't return \"foo3-qux-bar1\":", v, ok) 40 | } 41 | if v, ok := <-c; !ok || v != "foo3-qux-bar2" { 42 | t.Error("didn't return \"foo3-qux-bar2\":", v, ok) 43 | } 44 | if v, ok := <-c; ok { 45 | t.Error("returned unexpected:", v, ok) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /inflator/prefix.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type prefixer struct { 4 | prefixes []string 5 | } 6 | 7 | // Prefix provides Inflatable with prefixes. 8 | func Prefix(prefixes ...string) Inflatable { 9 | return &prefixer{prefixes} 10 | } 11 | 12 | func (p *prefixer) Inflate(s string) <-chan string { 13 | return Start(func(c chan<- string) { 14 | for _, t := range p.prefixes { 15 | c <- t + s 16 | } 17 | }) 18 | } 19 | -------------------------------------------------------------------------------- /inflator/prefix_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestPrefix(t *testing.T) { 8 | p := Prefix("foo", "bar", "baz") 9 | c := p.Inflate("-qux") 10 | if <-c != "foo-qux" { 11 | t.Error("Prefix didn't return \"foo-qux\"") 12 | } 13 | if <-c != "bar-qux" { 14 | t.Error("Prefix didn't return \"bar-qux\"") 15 | } 16 | if <-c != "baz-qux" { 17 | t.Error("Prefix didn't return \"baz-qux\"") 18 | } 19 | if _, ok := <-c; ok { 20 | t.Error("Prefix returned unexpected") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /inflator/suffix.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | type suffixer struct { 4 | suffixes []string 5 | } 6 | 7 | // Suffix provides Inflatable with suffixes. 8 | func Suffix(suffixes ...string) Inflatable { 9 | return &suffixer{suffixes} 10 | } 11 | 12 | func (p *suffixer) Inflate(s string) <-chan string { 13 | return Start(func(c chan<- string) { 14 | for _, t := range p.suffixes { 15 | c <- s + t 16 | } 17 | }) 18 | } 19 | -------------------------------------------------------------------------------- /inflator/suffix_test.go: -------------------------------------------------------------------------------- 1 | package inflator 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSuffix(t *testing.T) { 8 | s := Suffix("foo", "bar", "baz") 9 | c := s.Inflate("qux-") 10 | if <-c != "qux-foo" { 11 | t.Error("Suffix didn't return \"qux-foo\"") 12 | } 13 | if <-c != "qux-bar" { 14 | t.Error("Suffix didn't return \"qux-bar\"") 15 | } 16 | if <-c != "qux-baz" { 17 | t.Error("Suffix didn't return \"qux-baz\"") 18 | } 19 | if _, ok := <-c; ok { 20 | t.Error("Suffix returned unexpected") 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /internal/cli/console.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "os" 8 | "strings" 9 | ) 10 | 11 | // Console provides basic console for gmigemo. 12 | type Console struct { 13 | Reader *bufio.Reader 14 | Writer io.Writer 15 | } 16 | 17 | // NewConsole allocates a new console. 18 | func NewConsole() *Console { 19 | return &Console{ 20 | Reader: bufio.NewReader(os.Stdin), 21 | Writer: os.Stdout, 22 | } 23 | } 24 | 25 | // GetQuery gets a query from user. 26 | func (c *Console) GetQuery() (string, error) { 27 | fmt.Print("QUERY: ") 28 | l, err := c.Reader.ReadString('\n') 29 | return strings.TrimSpace(l), err 30 | } 31 | 32 | // PutPattern puts a regexp pattern as migemo result. 33 | func (c *Console) PutPattern(p string) error { 34 | _, err := fmt.Fprintf(c.Writer, "PATTERN: %s\n", p) 35 | return err 36 | } 37 | -------------------------------------------------------------------------------- /internal/cli/view.go: -------------------------------------------------------------------------------- 1 | package cli 2 | 3 | // View provides interface for gmigemo's input and ouput. 4 | type View interface { 5 | GetQuery() (string, error) 6 | PutPattern(string) error 7 | } 8 | -------------------------------------------------------------------------------- /migemo/assets.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "path/filepath" 7 | ) 8 | 9 | // AssetProc is a function to proceed asset's io.Reader. 10 | type AssetProc func(io.Reader) error 11 | 12 | // Assets provides assets collection, which can be obtained by Get. 13 | type Assets interface { 14 | Get(name string, proc AssetProc) error 15 | } 16 | 17 | // PathAssets is an implementation of Assets interface with physical file 18 | // system (path). 19 | type PathAssets struct { 20 | root string 21 | } 22 | 23 | // Get obtains an asset by name and proceed it with proc. 24 | func (a *PathAssets) Get(name string, proc AssetProc) error { 25 | path := filepath.Join(a.root, name) 26 | file, err := os.Open(path) 27 | if err != nil { 28 | return err 29 | } 30 | defer file.Close() 31 | return proc(file) 32 | } 33 | -------------------------------------------------------------------------------- /migemo/defaults.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | ) 7 | 8 | var defaultMatcherOptions = MatcherOptions{ 9 | OpOr: "|", 10 | OpGroupIn: "(?:", 11 | OpGroupOut: ")", 12 | OpClassIn: "[", 13 | OpClassOut: "]", 14 | OpWSpaces: "\\s*", 15 | // FIXME: Support MetaChars customization in future. 16 | //MetaChars: "", 17 | } 18 | 19 | // DefaultDictdir returns default dictonary directory. 20 | func DefaultDictdir() string { 21 | dir := os.Getenv("GOMIGEMO_DICTDIR") 22 | if dir != "" { 23 | return dir 24 | } 25 | // GMIGEMO_DICTDIR is obsolete 26 | dir0 := os.Getenv("GMIGEMO_DICTDIR") 27 | if dir0 != "" { 28 | return dir0 29 | } 30 | parts := []string{"", "src", "github.com", "koron", "gomigemo", "_dict"} 31 | for _, p := range filepath.SplitList(os.Getenv("GOPATH")) { 32 | parts[0] = p 33 | d := filepath.Join(parts...) 34 | if f, err := os.Stat(d); err == nil && f.IsDir() { 35 | return d 36 | } 37 | } 38 | // Fallback to current directory. 39 | return "." 40 | } 41 | 42 | // LoadDefault loads a dictionary with default dictdir. 43 | func LoadDefault() (Dict, error) { 44 | return Load(DefaultDictdir()) 45 | } 46 | -------------------------------------------------------------------------------- /migemo/defaults_test.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func BenchmarkLoadDefault(b *testing.B) { 8 | for i := 0; i < b.N; i++ { 9 | _, _ = LoadDefault() 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /migemo/dict.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | 7 | "github.com/koron/gomigemo/conv" 8 | skkdict "github.com/koron/gomigemo/dict" 9 | "github.com/koron/gomigemo/inflator" 10 | ) 11 | 12 | type dict struct { 13 | assets Assets 14 | inflator inflator.Inflatable 15 | } 16 | 17 | func (d *dict) Matcher(s string) (Matcher, error) { 18 | return newMatcher(d, s) 19 | } 20 | 21 | func (d *dict) loadSKKDict(name string) (sd *skkdict.Dict, err error) { 22 | err = d.assets.Get(name, func(rd io.Reader) (err error) { 23 | sd, err = skkdict.ReadSKK(rd) 24 | return err 25 | }) 26 | if err != nil { 27 | sd = nil 28 | } 29 | return sd, err 30 | } 31 | 32 | func (d *dict) loadConv(name string) (c *conv.Converter, err error) { 33 | c = conv.New() 34 | err = d.assets.Get(name, func(rd io.Reader) error { 35 | _, err := c.Load(rd, name) 36 | return err 37 | }) 38 | if err != nil { 39 | c = nil 40 | } 41 | return c, err 42 | } 43 | 44 | func (d *dict) load() error { 45 | if d.inflator != nil { 46 | return errors.New("dictionaries were loaded already") 47 | } 48 | 49 | // Load dictionaries. 50 | skk, err := d.loadSKKDict("SKK-JISYO.utf-8.L") 51 | if err != nil { 52 | return err 53 | } 54 | roma2hira, err := d.loadConv("roma2hira.txt") 55 | if err != nil { 56 | return err 57 | } 58 | hira2kata, err := d.loadConv("hira2kata.txt") 59 | if err != nil { 60 | return err 61 | } 62 | wide2narrow, err := d.loadConv("wide2narrow.txt") 63 | if err != nil { 64 | return err 65 | } 66 | 67 | // Build inflator. 68 | d.inflator = inflator.Join( 69 | inflator.DispatchEcho( 70 | inflator.Join( 71 | roma2hira, 72 | inflator.DispatchEcho(inflator.Join( 73 | hira2kata, 74 | inflator.DispatchEcho(wide2narrow), 75 | )), 76 | ), 77 | ), 78 | inflator.DispatchEcho(skk), 79 | ) 80 | 81 | // FIXME: Make these (loader and builder) flexible. 82 | return nil 83 | } 84 | -------------------------------------------------------------------------------- /migemo/matcher.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/koron/gelatin/trie" 7 | ) 8 | 9 | type matcher struct { 10 | options MatcherOptions 11 | trie *trie.TernaryTrie 12 | pattern string 13 | patterned bool 14 | } 15 | 16 | func newMatcher(d *dict, s string) (*matcher, error) { 17 | if d.inflator == nil { 18 | return nil, errors.New("dictionary is not loaded") 19 | } 20 | m := &matcher{ 21 | options: defaultMatcherOptions, 22 | trie: trie.NewTernaryTrie(), 23 | } 24 | // Inflate s word, add those to trie. 25 | ch := d.inflator.Inflate(s) 26 | for w := range ch { 27 | m.add(w) 28 | } 29 | m.trie.Balance() 30 | return m, nil 31 | } 32 | 33 | func (m *matcher) Match(s string) (chan Match, error) { 34 | // FIXME: Make own match with trie in future. 35 | return nil, nil 36 | } 37 | 38 | func (m *matcher) SetOptions(o MatcherOptions) { 39 | m.options = o 40 | m.patterned = false 41 | } 42 | 43 | func (m *matcher) GetOptions() MatcherOptions { 44 | return m.options 45 | } 46 | 47 | func (m *matcher) add(s string) { 48 | // Add a string to m.trie. 49 | if len(s) == 0 { 50 | return 51 | } 52 | n := m.trie.Root() 53 | for _, c := range s { 54 | n, _ = n.Dig(c) 55 | if n.Value() != nil { 56 | return 57 | } 58 | } 59 | n.SetValue(true) 60 | n.RemoveAll() 61 | } 62 | -------------------------------------------------------------------------------- /migemo/migemo.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "regexp" 5 | ) 6 | 7 | // Dict provides an interface of dictionary for Migemo. 8 | type Dict interface { 9 | Matcher(string) (Matcher, error) 10 | } 11 | 12 | // Matcher defines Migemo matcher interface, which have an expanded migemo tree 13 | // and provides matching operations. 14 | type Matcher interface { 15 | // Match matches with string. 16 | Match(string) (chan Match, error) 17 | // Pattern provides a regexp pattern of this match. 18 | Pattern() (string, error) 19 | // SetOptions changes matcher's options. 20 | SetOptions(MatcherOptions) 21 | // GetOptions retrieves matcher's options. 22 | GetOptions() MatcherOptions 23 | } 24 | 25 | // MatcherOptions defines options for migemo matcher (generation of regexp). 26 | type MatcherOptions struct { 27 | OpOr string 28 | OpGroupIn, OpGroupOut string 29 | OpClassIn, OpClassOut string 30 | OpWSpaces string 31 | //MetaChars string 32 | } 33 | 34 | // Match is positional information of a match. 35 | type Match struct { 36 | Start, End int 37 | } 38 | 39 | // Load loads a dict from path (file system). 40 | func Load(path string) (Dict, error) { 41 | return LoadAssets(&PathAssets{root: path}) 42 | } 43 | 44 | // LoadAssets loads a dict from Assets. 45 | func LoadAssets(assets Assets) (Dict, error) { 46 | d := &dict{assets: assets} 47 | err := d.load() 48 | if err != nil { 49 | return nil, err 50 | } 51 | return d, nil 52 | } 53 | 54 | // Compile compiles a regexp which expanded from string s with Migemo. 55 | func Compile(d Dict, s string) (*regexp.Regexp, error) { 56 | m, err := d.Matcher(s) 57 | if err != nil { 58 | return nil, err 59 | } 60 | return NewRegexp(m) 61 | } 62 | 63 | // NewRegexp generates a regexp from matcher. 64 | func NewRegexp(m Matcher) (*regexp.Regexp, error) { 65 | p, err := m.Pattern() 66 | if err != nil { 67 | return nil, err 68 | } 69 | return regexp.Compile(p) 70 | } 71 | 72 | // Pattern generates a regexp patter string from string s with Migemo. 73 | func Pattern(d Dict, s string) (string, error) { 74 | m, err := d.Matcher(s) 75 | if err != nil { 76 | return "", err 77 | } 78 | p, err := m.Pattern() 79 | if err != nil { 80 | return "", err 81 | } 82 | return p, nil 83 | } 84 | -------------------------------------------------------------------------------- /migemo/migemo_test.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestLoad(t *testing.T) { 8 | _, _ = Load("../dict") 9 | } 10 | -------------------------------------------------------------------------------- /migemo/multiclause.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "strings" 8 | "unicode" 9 | ) 10 | 11 | // mcDict means multiple clause (renbun) dictionary. 12 | type mcDict struct { 13 | dict Dict 14 | } 15 | 16 | // MultiClauses creates a new Dict which supports multiple clauses (renbun) 17 | // conversion based on given Dict. 18 | func MultiClauses(dict Dict) Dict { 19 | return mcDict{dict: dict} 20 | } 21 | 22 | // Matcher returns a new Matcher object which generated from query. 23 | 24 | func (d mcDict) Matcher(query string) (Matcher, error) { 25 | clauses, err := splitClauses(query) 26 | if err != nil { 27 | return nil, err 28 | } 29 | if len(clauses) == 0 { 30 | return nil, errors.New("no clauses in query") 31 | } 32 | mm := make(mcMatcher, len(clauses)) 33 | for i, c := range clauses { 34 | m, err := d.dict.Matcher(strings.ToLower(c)) 35 | if err != nil { 36 | return nil, fmt.Errorf("clause #%d %q failed to create a matcher: %w", i, c, err) 37 | } 38 | mm[i] = m 39 | } 40 | return mm, nil 41 | } 42 | 43 | type mcMatcher []Matcher 44 | 45 | func (mm mcMatcher) Match(s string) (chan Match, error) { 46 | // TODO: 47 | return nil, errors.New("Match method of multi-clause is not implemented yet") 48 | } 49 | 50 | func (mm mcMatcher) Pattern() (string, error) { 51 | var bb bytes.Buffer 52 | o := mm[0].GetOptions() 53 | for i, m := range mm { 54 | p, err := m.Pattern() 55 | if err != nil { 56 | return "", fmt.Errorf("clause #%d failed to generate pattern: %w", i, err) 57 | } 58 | // Consider MatcherOptions when concatenate. 59 | if bb.Len() > 0 { 60 | bb.WriteString(o.OpWSpaces) 61 | } 62 | bb.WriteString(p) 63 | } 64 | return bb.String(), nil 65 | } 66 | 67 | func (mm mcMatcher) SetOptions(o MatcherOptions) { 68 | for _, m := range mm { 69 | m.SetOptions(o) 70 | } 71 | } 72 | 73 | func (mm mcMatcher) GetOptions() MatcherOptions { 74 | return mm[0].GetOptions() 75 | } 76 | 77 | // splitClauses separates a string into clauses. The break between clauses is 78 | // usually an uppercase letter. Clauses that begin with multiple capital 79 | // letters are separated by non-capital letters. 80 | func splitClauses(query string) ([]string, error) { 81 | a := make([]string, 0, 8) 82 | mode := 0 83 | cstart := -1 84 | for i, ch := range query { 85 | if mode == 0 { 86 | cstart = i 87 | if unicode.IsUpper(ch) { 88 | mode = 2 89 | continue 90 | } 91 | mode = 1 92 | continue 93 | } 94 | // start with lower char. 95 | if mode == 1 { 96 | if unicode.IsUpper(ch) { 97 | mode = 2 98 | a = append(a, query[cstart:i]) 99 | cstart = i 100 | continue 101 | } 102 | continue 103 | } 104 | // start with upper char, process 2nd char 105 | if mode == 2 { 106 | if unicode.IsUpper(ch) { 107 | mode = 3 108 | continue 109 | } 110 | mode = 1 111 | continue 112 | } 113 | // start with two upper chars 114 | if mode == 3 { 115 | if !unicode.IsUpper(ch) { 116 | mode = 1 117 | a = append(a, query[cstart:i]) 118 | cstart = i 119 | continue 120 | } 121 | continue 122 | } 123 | } 124 | if cstart < len(query) { 125 | a = append(a, query[cstart:]) 126 | } 127 | return a, nil 128 | } 129 | -------------------------------------------------------------------------------- /migemo/multiclause_test.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/google/go-cmp/cmp" 7 | ) 8 | 9 | func TestSplitClauses(t *testing.T) { 10 | for i, tc := range []struct { 11 | in string 12 | want []string 13 | }{ 14 | {"abc", []string{"abc"}}, 15 | {"abcDef", []string{"abc", "Def"}}, 16 | {"abcDEFghi", []string{"abc", "DEF", "ghi"}}, 17 | 18 | {"aaa0", []string{"aaa0"}}, 19 | {"AAA0", []string{"AAA", "0"}}, 20 | } { 21 | got, err := splitClauses(tc.in) 22 | if err != nil { 23 | t.Errorf("unexpected error at #%d (%+v): %s", i, tc, err) 24 | } 25 | if d := cmp.Diff(tc.want, got); d != "" { 26 | t.Errorf("output mismatch: -want +got\n%s", d) 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /migemo/pattern.go: -------------------------------------------------------------------------------- 1 | package migemo 2 | 3 | import ( 4 | "bytes" 5 | "container/list" 6 | "regexp" 7 | "unicode/utf8" 8 | 9 | "github.com/koron/gelatin/trie" 10 | ) 11 | 12 | func (m *matcher) Pattern() (pattern string, err error) { 13 | if m.patterned { 14 | return m.pattern, nil 15 | } 16 | b := new(bytes.Buffer) 17 | err = m.writePattern(b, m.trie.Root()) 18 | if err != nil { 19 | return "", err 20 | } 21 | m.pattern = b.String() 22 | m.patterned = true 23 | return m.pattern, nil 24 | } 25 | 26 | func (m *matcher) writePattern(b *bytes.Buffer, n trie.Node) error { 27 | labels, chlidNodes := m.splitLabels(n) 28 | // Output group in. 29 | grouped := false 30 | c0 := utf8.RuneCountInString(labels) 31 | c1 := chlidNodes.Len() 32 | if c0+c1 > 1 && c1 > 0 { 33 | grouped = true 34 | b.WriteString(m.options.OpGroupIn) 35 | } 36 | // Output nodes which doesn't have any children. 37 | if c0 > 0 { 38 | if c0 > 1 { 39 | b.WriteString(m.options.OpClassIn) 40 | b.WriteString(m.quoteMeta(labels)) 41 | b.WriteString(m.options.OpClassOut) 42 | } else { 43 | b.WriteString(m.quoteMeta(labels)) 44 | } 45 | } 46 | // Output nodes which have some children. 47 | if c1 > 0 { 48 | first := c0 == 0 49 | for e := chlidNodes.Front(); e != nil; e = e.Next() { 50 | if !first { 51 | b.WriteString(m.options.OpOr) 52 | } else { 53 | first = false 54 | } 55 | child := e.Value.(*trie.TernaryNode) 56 | b.WriteString(m.quoteMeta(string(child.Label()))) 57 | b.WriteString(m.options.OpWSpaces) 58 | m.writePattern(b, child) 59 | } 60 | } 61 | // Output group out. 62 | if grouped { 63 | b.WriteString(m.options.OpGroupOut) 64 | } 65 | return nil 66 | } 67 | 68 | // splitLabels split children which have children or not. 69 | func (m *matcher) splitLabels(n trie.Node) (label string, nodes *list.List) { 70 | l := list.New() 71 | b := new(bytes.Buffer) 72 | n.Each(func(t trie.Node) bool { 73 | if t.HasChildren() { 74 | l.PushBack(t) 75 | } else { 76 | b.WriteRune(t.Label()) 77 | } 78 | return true 79 | }) 80 | return b.String(), l 81 | } 82 | 83 | func (m *matcher) quoteMeta(s string) string { 84 | // Quote regexp meta chars. 85 | return regexp.QuoteMeta(s) 86 | } 87 | -------------------------------------------------------------------------------- /readutil/readlines.go: -------------------------------------------------------------------------------- 1 | package readutil 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "os" 7 | ) 8 | 9 | // LineProc defines callback which proceed each lines. 10 | type LineProc func(line string, err error) error 11 | 12 | // ReadLines read lines from reader, and callback proc for each line. 13 | func ReadLines(rd io.Reader, proc LineProc) error { 14 | r := bufio.NewReader(rd) 15 | for { 16 | line, err := r.ReadString('\n') 17 | err = proc(line, err) 18 | if err != nil { 19 | if err == io.EOF { 20 | err = nil 21 | } 22 | return err 23 | } 24 | } 25 | } 26 | 27 | // ReadFileLines read a path file, and callback proc for each line. 28 | func ReadFileLines(path string, proc LineProc) error { 29 | file, err := os.Open(path) 30 | if err != nil { 31 | return err 32 | } 33 | defer file.Close() 34 | return ReadLines(file, proc) 35 | } 36 | -------------------------------------------------------------------------------- /readutil/readlines_test.go: -------------------------------------------------------------------------------- 1 | package readutil 2 | 3 | import ( 4 | "io" 5 | "testing" 6 | ) 7 | 8 | func open(path string, t *testing.T) <-chan string { 9 | ch := make(chan string, 1) 10 | go ReadFileLines(path, func(line string, err error) error { 11 | if len(line) > 0 { 12 | ch <- line 13 | } 14 | if err != nil { 15 | close(ch) 16 | if err != io.EOF { 17 | t.Error(err) 18 | } 19 | } 20 | return err 21 | }) 22 | return ch 23 | } 24 | 25 | func TestReadLines(t *testing.T) { 26 | ch := open("./testdata/readlines_test0.txt", t) 27 | if s1 := <-ch; s1 != "foo\n" { 28 | t.Error("1st line is not \"foo\\n\":", s1) 29 | } 30 | if s2 := <-ch; s2 != "bar\n" { 31 | t.Error("2nd line is not \"bar\\n\":", s2) 32 | } 33 | if s3 := <-ch; s3 != "baz\n" { 34 | t.Error("3rd line is not \"baz\\n\":", s3) 35 | } 36 | if s4, ok := <-ch; ok != false { 37 | t.Error("more data:", s4) 38 | } 39 | } 40 | 41 | func TestReadLinesWithoutEOL(t *testing.T) { 42 | ch := open("./testdata/readlines_test1.txt", t) 43 | if s1 := <-ch; s1 != "foo\n" { 44 | t.Error("1st line is not \"foo\\n\":", s1) 45 | } 46 | if s2 := <-ch; s2 != "bar\n" { 47 | t.Error("2nd line is not \"bar\\n\":", s2) 48 | } 49 | if s3 := <-ch; s3 != "baz" { 50 | t.Error("3rd line is not \"baz\":", s3) 51 | } 52 | if s4, ok := <-ch; ok != false { 53 | t.Error("more data:", s4) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /readutil/stackable.go: -------------------------------------------------------------------------------- 1 | package readutil 2 | 3 | import ( 4 | "container/list" 5 | "io" 6 | "strings" 7 | ) 8 | 9 | // StackableRuneReader provides io.RuneReader which can be pushed back strings. 10 | type StackableRuneReader struct { 11 | readers *list.List 12 | } 13 | 14 | // NewStackabeRuneReader creates a new StackableRuneReader instance. 15 | func NewStackabeRuneReader() *StackableRuneReader { 16 | return &StackableRuneReader{list.New()} 17 | } 18 | 19 | // PushFront pushes back a string. 20 | func (r *StackableRuneReader) PushFront(s string) { 21 | if len(s) > 0 { 22 | r.readers.PushFront(strings.NewReader(s)) 23 | } 24 | } 25 | 26 | // ReadRune reads a rune, which implements io.RuneReader. 27 | func (r *StackableRuneReader) ReadRune() (ch rune, size int, err error) { 28 | for r.readers.Len() > 0 { 29 | front := r.readers.Front() 30 | curr := front.Value.(*strings.Reader) 31 | ch, size, err = curr.ReadRune() 32 | if err != io.EOF { 33 | return 34 | } 35 | r.readers.Remove(front) 36 | } 37 | return 0, 0, io.EOF 38 | } 39 | -------------------------------------------------------------------------------- /readutil/stackable_test.go: -------------------------------------------------------------------------------- 1 | package readutil 2 | 3 | import ( 4 | "io" 5 | "testing" 6 | ) 7 | 8 | func assertReadRune(t *testing.T, exp rune, r *StackableRuneReader) { 9 | ch, _, err := r.ReadRune() 10 | if err != nil { 11 | t.Error(err) 12 | } else if ch != exp { 13 | t.Errorf("rune mismatch, expected=%c actual=%c", exp, ch) 14 | } 15 | } 16 | 17 | func assertEOF(t *testing.T, r *StackableRuneReader) { 18 | _, _, err := r.ReadRune() 19 | if err == nil { 20 | t.Error("expected io.EOF but no error actually") 21 | } else if err != io.EOF { 22 | t.Error("expected io.EOF but actual: ", err) 23 | } 24 | } 25 | 26 | func TestStackedReader(t *testing.T) { 27 | r := NewStackabeRuneReader() 28 | r.PushFront("abc") 29 | assertReadRune(t, 'a', r) 30 | assertReadRune(t, 'b', r) 31 | assertReadRune(t, 'c', r) 32 | assertEOF(t, r) 33 | } 34 | 35 | func TestStackedReaderMultiple(t *testing.T) { 36 | r := NewStackabeRuneReader() 37 | r.PushFront("foo") 38 | r.PushFront("bar") 39 | assertReadRune(t, 'b', r) 40 | assertReadRune(t, 'a', r) 41 | assertReadRune(t, 'r', r) 42 | assertReadRune(t, 'f', r) 43 | assertReadRune(t, 'o', r) 44 | assertReadRune(t, 'o', r) 45 | assertEOF(t, r) 46 | } 47 | 48 | func TestStackedReaderMultipleSuspend1(t *testing.T) { 49 | r := NewStackabeRuneReader() 50 | r.PushFront("foo") 51 | assertReadRune(t, 'f', r) 52 | r.PushFront("bar") 53 | assertReadRune(t, 'b', r) 54 | assertReadRune(t, 'a', r) 55 | assertReadRune(t, 'r', r) 56 | assertReadRune(t, 'o', r) 57 | assertReadRune(t, 'o', r) 58 | assertEOF(t, r) 59 | } 60 | 61 | func TestStackedReaderMultipleSuspend2(t *testing.T) { 62 | r := NewStackabeRuneReader() 63 | r.PushFront("foo") 64 | assertReadRune(t, 'f', r) 65 | assertReadRune(t, 'o', r) 66 | r.PushFront("bar") 67 | assertReadRune(t, 'b', r) 68 | assertReadRune(t, 'a', r) 69 | assertReadRune(t, 'r', r) 70 | assertReadRune(t, 'o', r) 71 | assertEOF(t, r) 72 | } 73 | 74 | func TestStackedReaderMultipleSuspend3(t *testing.T) { 75 | r := NewStackabeRuneReader() 76 | r.PushFront("foo") 77 | assertReadRune(t, 'f', r) 78 | assertReadRune(t, 'o', r) 79 | assertReadRune(t, 'o', r) 80 | r.PushFront("bar") 81 | assertReadRune(t, 'b', r) 82 | assertReadRune(t, 'a', r) 83 | assertReadRune(t, 'r', r) 84 | assertEOF(t, r) 85 | } 86 | -------------------------------------------------------------------------------- /readutil/testdata/readlines_test0.txt: -------------------------------------------------------------------------------- 1 | foo 2 | bar 3 | baz 4 | -------------------------------------------------------------------------------- /readutil/testdata/readlines_test1.txt: -------------------------------------------------------------------------------- 1 | foo 2 | bar 3 | baz -------------------------------------------------------------------------------- /rpc/client/migemo.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "net/rpc" 5 | "regexp" 6 | ) 7 | 8 | // Client is a migemo RPC client 9 | type Client rpc.Client 10 | 11 | // Compile compiles a migemo string and get regexp.Regexp with migemo RPC 12 | // server. 13 | func Compile(s string) (*regexp.Regexp, error) { 14 | c, err := Connect() 15 | if err != nil { 16 | return nil, err 17 | } 18 | return c.Compile(s) 19 | } 20 | 21 | // Addr is migemo RPC server address. 22 | var Addr = "127.0.0.1:1234" 23 | 24 | // Connect connects migemo RPC server and get Client. 25 | func Connect() (*Client, error) { 26 | c, err := rpc.DialHTTP("tcp", Addr) 27 | if err != nil { 28 | return nil, err 29 | } 30 | return (*Client)(c), nil 31 | } 32 | 33 | // Compile compiles a migemo string and get regexp.Regexp. 34 | func (c *Client) Compile(s string) (*regexp.Regexp, error) { 35 | var p string 36 | err := (*rpc.Client)(c).Call("Migemo.Pattern", s, &p) 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | return regexp.Compile(p) 42 | } 43 | -------------------------------------------------------------------------------- /rpc/server/migemo.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import "github.com/koron/gomigemo/migemo" 4 | 5 | // Migemo is migemo RPC server implementation. 6 | type Migemo struct { 7 | dict migemo.Dict 8 | } 9 | 10 | // Pattern generates a migemo pattern from query. 11 | func (m *Migemo) Pattern(query string, pattern *string) error { 12 | p, err := migemo.Pattern(m.dict, query) 13 | if err != nil { 14 | return err 15 | } 16 | *pattern = p 17 | return nil 18 | } 19 | -------------------------------------------------------------------------------- /rpc/server/server.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "net" 5 | "net/http" 6 | "net/rpc" 7 | 8 | "github.com/koron/gomigemo/embedict" 9 | ) 10 | 11 | // RunDefault starts a migemo RPC server listening on default address/port. 12 | func RunDefault() error { 13 | d, err := embedict.Load() 14 | if err != nil { 15 | return err 16 | } 17 | migemo := &Migemo{dict: d} 18 | 19 | rpc.Register(migemo) 20 | rpc.HandleHTTP() 21 | l, err := net.Listen("tcp", ":1234") 22 | if err != nil { 23 | return err 24 | } 25 | http.Serve(l, nil) 26 | 27 | return nil 28 | } 29 | --------------------------------------------------------------------------------