├── go.mod ├── README.md ├── go.sum ├── cmd └── markov │ └── main.go └── markov.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mattn/go-markov 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/ikawaha/kagome-dict/uni v1.1.9 7 | github.com/ikawaha/kagome/v2 v2.9.2 8 | ) 9 | 10 | require github.com/ikawaha/kagome-dict v1.0.9 // indirect 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-markov 2 | 3 | markov chain 4 | 5 | ## Usage 6 | 7 | ```go 8 | m := markov.New() 9 | scanner := bufio.NewScanner(os.Stdin) 10 | for scanner.Scan() { 11 | m.Update(strings.TrimSpace(scanner.Text())) 12 | } 13 | if err := scanner.Err(); err != nil { 14 | log.Fatal(err) 15 | } 16 | fmt.Println(m.Chain(m.First())) 17 | ``` 18 | 19 | ## Installation 20 | 21 | ``` 22 | go get github.com/mattn/go-markov 23 | ``` 24 | 25 | ## License 26 | 27 | MIT 28 | 29 | ## Author 30 | 31 | Yasuhiro Matsumoto (a.k.a. mattn) 32 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/ikawaha/kagome-dict v1.0.9 h1:1Gg735LbBYsdFu13fdTvW6eVt0qIf5+S2qXGJtlG8C0= 2 | github.com/ikawaha/kagome-dict v1.0.9/go.mod h1:mn9itZLkFb6Ixko7q8eZmUabHbg3i9EYewnhOtvd2RM= 3 | github.com/ikawaha/kagome-dict/ipa v1.0.10 h1:wk9I21yg+fKdL6HJB9WgGiyXIiu1VttumJwmIRwn0g8= 4 | github.com/ikawaha/kagome-dict/uni v1.1.9 h1:cyKLswS8DSjUPTwsOjlC4WEqRkndUUVgiJR0lcFqZUk= 5 | github.com/ikawaha/kagome-dict/uni v1.1.9/go.mod h1:xg/2qumqt+/s8DhDGYGIU7a+q9ori8ymFvDBtcAVmgc= 6 | github.com/ikawaha/kagome/v2 v2.9.2 h1:ojkD53v8kmlqGCHPuhZXCg0RUoF2zzisKTuDRtUI9CU= 7 | github.com/ikawaha/kagome/v2 v2.9.2/go.mod h1:OYzxPG9dQSalvznlcLNR8TEKpPwzKhnZszw9LLbf7e8= 8 | -------------------------------------------------------------------------------- /cmd/markov/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "math/rand" 9 | "os" 10 | "strings" 11 | "time" 12 | 13 | "github.com/ikawaha/kagome-dict/uni" 14 | "github.com/ikawaha/kagome/v2/tokenizer" 15 | "github.com/mattn/go-markov" 16 | ) 17 | 18 | func contains(a []string, s string) bool { 19 | for _, v := range a { 20 | if v == s { 21 | return true 22 | } 23 | } 24 | return false 25 | } 26 | 27 | func main() { 28 | var length int 29 | flag.IntVar(&length, "n", -1, "letters") 30 | flag.Parse() 31 | 32 | rand.Seed(time.Now().UnixNano()) 33 | 34 | m := markov.New() 35 | scanner := bufio.NewScanner(os.Stdin) 36 | for scanner.Scan() { 37 | m.Update(strings.TrimSpace(scanner.Text())) 38 | } 39 | if err := scanner.Err(); err != nil { 40 | log.Fatal(err) 41 | } 42 | 43 | t, err := tokenizer.New(uni.Dict(), tokenizer.OmitBosEos()) 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | 48 | bad := []string{ 49 | "助詞", 50 | "補助記号", 51 | } 52 | var result string 53 | for { 54 | var first string 55 | for { 56 | first = m.First() 57 | tokens := t.Tokenize(first) 58 | if !contains(bad, tokens[0].Features()[0]) { 59 | break 60 | } 61 | } 62 | 63 | result = strings.TrimSpace(m.Chain(first)) 64 | if result != "" && (length == -1 || len([]rune(result)) <= length) { 65 | break 66 | } 67 | } 68 | fmt.Println(result) 69 | } 70 | -------------------------------------------------------------------------------- /markov.go: -------------------------------------------------------------------------------- 1 | package markov 2 | 3 | import ( 4 | "math/rand" 5 | "regexp" 6 | 7 | "github.com/ikawaha/kagome-dict/uni" 8 | "github.com/ikawaha/kagome/v2/tokenizer" 9 | ) 10 | 11 | var ( 12 | reIgnoreText = regexp.MustCompile(`[\[\]「」『』()]`) 13 | ) 14 | 15 | type Markov struct { 16 | tbl map[string]map[string][]string 17 | } 18 | 19 | func New() *Markov { 20 | return &Markov{ 21 | tbl: make(map[string]map[string][]string), 22 | } 23 | } 24 | 25 | func (m *Markov) UpdateLine(text string) error { 26 | second, ok := m.tbl[text] 27 | if !ok { 28 | second = make(map[string][]string) 29 | m.tbl[text] = second 30 | } 31 | return nil 32 | } 33 | 34 | func (m *Markov) Update(text string) error { 35 | t, err := tokenizer.New(uni.Dict(), tokenizer.OmitBosEos()) 36 | if err != nil { 37 | return err 38 | } 39 | text = reIgnoreText.ReplaceAllString(text, "") 40 | tokens := t.Tokenize(text) 41 | 42 | words := []string{} 43 | for _, token := range tokens { 44 | if token.Surface == "BOS" || token.Surface == "EOS" { 45 | continue 46 | } 47 | words = append(words, token.Surface) 48 | } 49 | 50 | size := len(words) 51 | 52 | if size == 1 { 53 | second, ok := m.tbl[words[0]] 54 | if !ok { 55 | second = make(map[string][]string) 56 | m.tbl[words[0]] = second 57 | } 58 | return nil 59 | } 60 | for i := 0; i < size-2; i++ { 61 | second, ok := m.tbl[words[i]] 62 | if !ok { 63 | second = make(map[string][]string) 64 | m.tbl[words[i]] = second 65 | } 66 | second[words[i+1]] = append(second[words[i+1]], words[i+2]) 67 | } 68 | return nil 69 | } 70 | 71 | func (m *Markov) First() string { 72 | keys := []string{} 73 | for k := range m.tbl { 74 | keys = append(keys, k) 75 | } 76 | if len(keys) == 0 { 77 | return "" 78 | } 79 | return keys[rand.Int()%len(keys)] 80 | } 81 | 82 | func (m *Markov) Chain(first string) string { 83 | text := first 84 | 85 | keys := []string{} 86 | for k := range m.tbl[first] { 87 | keys = append(keys, k) 88 | } 89 | if len(keys) == 0 { 90 | return "" 91 | } 92 | kv := rand.Int() % len(keys) 93 | second := keys[kv] 94 | text += second 95 | 96 | limit := 500 97 | for second != "" { 98 | size := len(m.tbl[first][second]) 99 | if size == 0 { 100 | break 101 | } 102 | idx := rand.Int() % size 103 | next := m.tbl[first][second][idx] 104 | text += next 105 | first = second 106 | second = next 107 | 108 | if limit--; limit < 0 { 109 | return "" 110 | } 111 | } 112 | return text 113 | } 114 | --------------------------------------------------------------------------------