├── go.mod ├── README.md ├── go.sum └── main.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/nobonobo/voicevox-cli 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/hajimehoshi/oto v1.0.1 // indirect 7 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8 // indirect 8 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067 // indirect 9 | golang.org/x/mobile v0.0.0-20190415191353-3e0bab5405d6 // indirect 10 | golang.org/x/sys v0.0.0-20190429190828-d89cdac9e872 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # voicevox-cli 2 | 3 | CLI for [VOICEVOX](https://voicevox.hiroshiba.jp). 4 | 5 | ## install 6 | 7 | ```shell 8 | > go install github.com/nobonobo/voicevox-cli@latest 9 | ``` 10 | 11 | ## usage 12 | 13 | prerequired: 14 | 15 | ```shell 16 | docker run -d -p 50021:50021 hiroshiba/voicevox_engine:cpu-ubuntu20.04-0.10.4 17 | ``` 18 | 19 | example: 20 | 21 | ```shell 22 | > voicevox-cli -speaker=0 -style=0 "こんにちは" 23 | main.go:170: 四国めたん ノーマル 2 24 | ``` 25 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/hajimehoshi/oto v1.0.1 h1:8AMnq0Yr2YmzaiqTg/k1Yzd6IygUGk2we9nmjgbgPn4= 2 | github.com/hajimehoshi/oto v1.0.1/go.mod h1:wovJ8WWMfFKvP587mhHgot/MBr4DnNy9m6EepeVGnos= 3 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8 h1:idBdZTd9UioThJp8KpM/rTSinK/ChZFBE43/WtIy8zg= 4 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 5 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067 h1:KYGJGHOQy8oSi1fDlSpcZF0+juKwk/hEMv5SiwHogR0= 6 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 7 | golang.org/x/mobile v0.0.0-20190415191353-3e0bab5405d6 h1:vyLBGJPIl9ZYbcQFM2USFmJBK6KI+t+z6jL0lbwjrnc= 8 | golang.org/x/mobile v0.0.0-20190415191353-3e0bab5405d6/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= 9 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 10 | golang.org/x/sys v0.0.0-20190429190828-d89cdac9e872 h1:cGjJzUd8RgBw428LXP65YXni0aiGNA4Bl+ls8SmLOm8= 11 | golang.org/x/sys v0.0.0-20190429190828-d89cdac9e872/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 12 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 13 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "io" 8 | "io/ioutil" 9 | "log" 10 | "net/http" 11 | "strconv" 12 | "strings" 13 | 14 | "github.com/hajimehoshi/oto" 15 | ) 16 | 17 | type Params struct { 18 | AccentPhrases []AccentPhrases `json:"accent_phrases"` 19 | SpeedScale float64 `json:"speedScale"` 20 | PitchScale float64 `json:"pitchScale"` 21 | IntonationScale float64 `json:"intonationScale"` 22 | VolumeScale float64 `json:"volumeScale"` 23 | PrePhonemeLength float64 `json:"prePhonemeLength"` 24 | PostPhonemeLength float64 `json:"postPhonemeLength"` 25 | OutputSamplingRate int `json:"outputSamplingRate"` 26 | OutputStereo bool `json:"outputStereo"` 27 | Kana string `json:"kana"` 28 | } 29 | 30 | type Mora struct { 31 | Text string `json:"text"` 32 | Consonant *string `json:"consonant"` 33 | ConsonantLength *float64 `json:"consonant_length"` 34 | Vowel string `json:"vowel"` 35 | VowelLength float64 `json:"vowel_length"` 36 | Pitch float64 `json:"pitch"` 37 | } 38 | 39 | type AccentPhrases struct { 40 | Moras []Mora `json:"moras"` 41 | Accent int `json:"accent"` 42 | PauseMora *Mora `json:"pause_mora"` 43 | IsInterrogative bool `json:"is_interrogative"` 44 | } 45 | 46 | type Speakers []struct { 47 | Name string `json:"name"` 48 | SpeakerUUID string `json:"speaker_uuid"` 49 | Styles []Styles `json:"styles"` 50 | Version string `json:"version"` 51 | } 52 | 53 | type Styles struct { 54 | ID int `json:"id"` 55 | Name string `json:"name"` 56 | } 57 | 58 | type config struct { 59 | endpoint string 60 | speaker int 61 | style int 62 | speed float64 63 | intonation float64 64 | volume float64 65 | pitch float64 66 | output string 67 | } 68 | 69 | func getSpeakers(cfg config) Speakers { 70 | resp, err := http.Get(cfg.endpoint + "/speakers") 71 | if err != nil { 72 | log.Fatal(err) 73 | } 74 | defer resp.Body.Close() 75 | var speakers Speakers 76 | if err := json.NewDecoder(resp.Body).Decode(&speakers); err != nil { 77 | log.Fatal(err) 78 | } 79 | return speakers 80 | } 81 | 82 | func getQuery(cfg config, id int, text string) (*Params, error) { 83 | req, err := http.NewRequest("POST", cfg.endpoint+"/audio_query", nil) 84 | if err != nil { 85 | return nil, err 86 | } 87 | q := req.URL.Query() 88 | q.Add("speaker", strconv.Itoa(id)) 89 | q.Add("text", text) 90 | req.URL.RawQuery = q.Encode() 91 | //log.Println(req.URL.String()) 92 | resp, err := http.DefaultClient.Do(req) 93 | if err != nil { 94 | return nil, err 95 | } 96 | defer resp.Body.Close() 97 | var params *Params 98 | if err := json.NewDecoder(resp.Body).Decode(¶ms); err != nil { 99 | return nil, err 100 | } 101 | return params, nil 102 | } 103 | 104 | func synth(cfg config, id int, params *Params) ([]byte, error) { 105 | b, err := json.MarshalIndent(params, "", " ") 106 | if err != nil { 107 | return nil, err 108 | } 109 | //log.Println(string(b)) 110 | req, err := http.NewRequest("POST", cfg.endpoint+"/synthesis", bytes.NewReader(b)) 111 | if err != nil { 112 | return nil, err 113 | } 114 | req.Header.Add("Accept", "audio/wav") 115 | req.Header.Add("Content-Type", "application/json") 116 | q := req.URL.Query() 117 | q.Add("speaker", strconv.Itoa(id)) 118 | req.URL.RawQuery = q.Encode() 119 | //log.Println(req.URL.String()) 120 | resp, err := http.DefaultClient.Do(req) 121 | if err != nil { 122 | return nil, err 123 | } 124 | defer resp.Body.Close() 125 | buff := bytes.NewBuffer(nil) 126 | if _, err := io.Copy(buff, resp.Body); err != nil { 127 | return nil, err 128 | } 129 | return buff.Bytes(), nil 130 | } 131 | 132 | func playback(params *Params, b []byte) error { 133 | ch := 1 134 | if params.OutputStereo { 135 | ch = 2 136 | } 137 | ctx, err := oto.NewContext(params.OutputSamplingRate, ch, 2, 3200) 138 | if err != nil { 139 | return err 140 | } 141 | defer ctx.Close() 142 | p := ctx.NewPlayer() 143 | if _, err := io.Copy(p, bytes.NewReader(b)); err != nil { 144 | return err 145 | } 146 | if err := p.Close(); err != nil { 147 | return err 148 | } 149 | return nil 150 | } 151 | 152 | func main() { 153 | log.SetFlags(log.Lshortfile) 154 | cfg := config{} 155 | flag.StringVar(&cfg.endpoint, "endpoint", "http://localhost:50021", "api endpoint") 156 | flag.IntVar(&cfg.speaker, "speaker", 0, "speaker") 157 | flag.StringVar(&cfg.output, "o", "", "output wav file") 158 | flag.IntVar(&cfg.style, "style", 0, "style") 159 | flag.Float64Var(&cfg.speed, "speed", 1.0, "speed") 160 | flag.Float64Var(&cfg.intonation, "intonation", 1.0, "intonation") 161 | flag.Float64Var(&cfg.volume, "volume", 1.0, "volume") 162 | flag.Float64Var(&cfg.pitch, "pitch", 0.0, "pitch") 163 | flag.Parse() 164 | speakers := getSpeakers(cfg) 165 | if cfg.speaker >= len(speakers) { 166 | log.Fatal("speaker not found") 167 | } 168 | spk := speakers[cfg.speaker] 169 | if cfg.style >= len(spk.Styles) { 170 | log.Fatal("style not found") 171 | } 172 | spkID := spk.Styles[cfg.style].ID 173 | log.Println(spk.Name, spk.Styles[cfg.style].Name, spkID) 174 | params, err := getQuery(cfg, spkID, strings.Join(flag.Args(), " ")) 175 | if err != nil { 176 | log.Fatal(err) 177 | } 178 | params.SpeedScale = cfg.speed 179 | params.PitchScale = cfg.pitch 180 | params.IntonationScale = cfg.intonation 181 | params.VolumeScale = cfg.volume 182 | b, err := synth(cfg, spkID, params) 183 | if err != nil { 184 | log.Fatal(err) 185 | } 186 | if len(cfg.output) > 0 { 187 | if err := ioutil.WriteFile(cfg.output, b, 0644); err != nil { 188 | log.Fatal(err) 189 | } 190 | } else { 191 | if err := playback(params, b[44:]); err != nil { 192 | log.Fatal(err) 193 | } 194 | } 195 | } 196 | --------------------------------------------------------------------------------