├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── aivis_example.py
├── aivis_server.py
├── chatbot.py
├── chatbot_akari.py
├── chatgpt_example.py
├── config
    ├── en_to_jp_fix_dict.csv
    └── system_prompt.txt
├── gpt_publisher.py
├── jpg
    ├── akari_chatgpt_bot.jpg
    ├── faster_chatgpt_bot.jpg
    └── faster_chatgpt_bot_system.jpg
├── lib
    ├── aivis.py
    ├── chat.py
    ├── chat_akari.py
    ├── chat_akari_grpc.py
    ├── conf.py
    ├── en_to_jp.py
    ├── err_handler.py
    ├── google_speech.py
    ├── google_speech_grpc.py
    ├── google_speech_v2.py
    ├── google_speech_v2_grpc.py
    ├── grpc
    │   ├── gpt_server_pb2.py
    │   ├── gpt_server_pb2_grpc.py
    │   ├── motion_server_pb2.py
    │   ├── motion_server_pb2_grpc.py
    │   ├── speech_server_pb2.py
    │   ├── speech_server_pb2_grpc.py
    │   ├── voice_server_pb2.py
    │   └── voice_server_pb2_grpc.py
    ├── style_bert_vits.py
    ├── text_to_voice.py
    └── voicevox.py
├── manual_grpc_publisher_for_gpt.py
├── manual_grpc_publisher_for_voice.py
├── proto
    ├── codegen.py
    ├── gpt_server.proto
    ├── speech_server.proto
    └── voice_server.proto
├── pysen.toml
├── requirements.txt
├── script
    ├── faster_chatbot.sh
    ├── faster_chatbot_aivis.sh
    ├── faster_chatbot_aivis_auto.sh
    ├── faster_chatbot_auto.sh
    ├── faster_chatbot_bert_vits.sh
    └── faster_chatbot_bert_vits_auto.sh
├── speech_publisher.py
├── speech_to_text_example.py
├── style_bert_vits_example.py
├── style_bert_vits_server.py
├── talk_controller_client.py
├── text_to_kana_example.py
├── voicevox_example.py
└── voicevox_server.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | share/python-wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | *.py,cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | cover/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | .pybuilder/
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | #   For a library or package, you might want to ignore these files since the code is
 86 | #   intended to run in multiple environments; otherwise, check them in:
 87 | # .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # poetry
 97 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 98 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
 99 | #   commonly ignored for libraries.
100 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
101 | #poetry.lock
102 | 
103 | # pdm
104 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
105 | #pdm.lock
106 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
107 | #   in version control.
108 | #   https://pdm.fming.dev/#use-with-ide
109 | .pdm.toml
110 | 
111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
112 | __pypackages__/
113 | 
114 | # Celery stuff
115 | celerybeat-schedule
116 | celerybeat.pid
117 | 
118 | # SageMath parsed files
119 | *.sage.py
120 | 
121 | # Environments
122 | .env
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 | 
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 
148 | # pytype static type analyzer
149 | .pytype/
150 | 
151 | # Cython debug symbols
152 | cython_debug/
153 | 
154 | # PyCharm
155 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
156 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
157 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
158 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
159 | #.idea/
160 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "gpt-stream-json-parser"]
2 | 	path = gpt-stream-json-parser
3 | 	url = https://github.com/furnqse/gpt-stream-json-parser.git
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2023 AKARI Group.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the “License”);
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software
 9 | distributed under the License is distributed on an “AS IS” BASIS,
10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | 
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # akari_chatgpt_bot
  3 | 音声認識、文章生成、音声合成を使って対話するチャットボットアプリです。  
  4 | 
  5 | ![概要図](jpg/akari_chatgpt_bot.jpg "概要図")
  6 | 
  7 | ## 動作確認済み環境
  8 | AKARI上で動作確認済み。  
  9 | `chatbot_akari.py`以外はUbuntu22.04環境であれば使用可能です。  
 10 | **マイクとスピーカーは別途外付けする必要があります。**  
 11 | 
 12 | ## セットアップ
 13 | 1. submoduleの更新  
 14 | `git submodule update --init`  
 15 | 
 16 | 1. ライブラリのインストール  
 17 | `sudo apt install python3.10 python3.10-venv portaudio19-dev gnome-terminal`  
 18 | 
 19 | 1. 仮想環境の作成  
 20 | `python3 -m venv venv`  
 21 | `. venv/bin/activate`  
 22 | `pip install -r requirements.txt`  
 23 | 
 24 | 1. (音声認識を使う場合) Cloud Speech-to-Text APIの有効化、秘密鍵ダウンロード  
 25 | Google cloud consoleに登録し、Cloud Speech-to-Text APIを有効化する。  
 26 | ユーザーをCloud Speech 管理者に登録する。  
 27 | 認証用のjsonをダウンロードし、`~/.bashrc` にパスを記述  
 28 | `export GOOGLE_APPLICATION_CREDENTIALS=/home/xxx/xxx.json`  
 29 | プロジェクトIDを `~/.bashrc` に記述  
 30 | `export GOOGLE_SPEECH_PROJECT_ID="xxxxxxxxxxx"`
 31 | 
 32 | 1. (chatGPTの文章生成を使う場合)OPENAI API KEYの作成   
 33 | [OPENAI](https://openai.com/)にてユーザ登録しAPI KEYを作成し、~/.bashrcに自身のkeyを記述  
 34 | `export OPENAI_API_KEY=sk-xxxxxxxxxxxxxxx`  
 35 | 
 36 | 1. (Claudeの文章生成を使う場合)ANTHROPIC API KEYの作成   
 37 | [ANTHROPIC](https://www.anthropic.com/)にてユーザ登録しAPI KEYを作成し、~/.bashrcに自身のkeyを記述  
 38 | `export ANTHROPIC_API_KEY=sk-xxxxxxxxxxxxxxx`  
 39 | 
 40 | 1. (Geminiの文章生成を使う場合)GEMINI API KEYの作成   
 41 | [Google AI Studio](https://ai.google.dev/aistudio)にてユーザ登録しAPI KEYを作成し、~/.bashrcに自身のkeyを記述  
 42 | `export GEMINI_API_KEY=xxxxxxxxxxxxxxx`  
 43 | 
 44 | 1. (VoiceVox web版の音声合成を使う場合) VOICEVOX web版のAPI KEYの作成
 45 | [WEB版VOICEVOX API（高速）](https://voicevox.su-shiki.com/su-shikiapis/) にてapikeyを作成し、~/.bashrcに自身のkeyを記述  
 46 | `export VOICEVOX_API_KEY='xxxxxxxxxxxxxxx`  
 47 | 
 48 | 1. (VoiceVoxの音声合成を使う場合) VOICEVOXのダウンロード  
 49 | [VOICEVOX](https://voicevox.hiroshiba.jp/)をダウンロード、インストールする。  
 50 | AKARIでVOICEVOXのローカル版を使う場合、AKARI本体内のCPUでVOICEVOXを実行すると処理時間がかかるので、リモートPC上(特にGPU版)でVOICVOXを実行することを推奨する。  
 51 | その場合下記を参考にdocker pullを行う。  
 52 | (CPU版)  
 53 | `docker pull voicevox/voicevox_engine:cpu-ubuntu20.04-latest`  
 54 | (nvidia GPU版)  
 55 | `docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest`  
 56 | 
 57 | 1. (Style-Bert-VITS2の音声合成を使う場合) Style-Bert-VITS2のセットアップ  
 58 | [Style-Bert-VITS2](https://github.com/litagin02/Style-Bert-VITS2)のREADMEに沿ってセットアップする。
 59 | 
 60 |  し、下記のコマンドでFastAPIサーバを起動する。  
 61 | `python3 server_fastapi.py`  
 62 | AKARIなどで動かす場合は、同一ネットワーク内の外部PC上にサーバーを立てることを推奨。  
 63 | 
 64 | 1. (AivisSpeechの音声合成を使う場合) AivisSpeech Engineのセットアップ  
 65 | AKARI本体内のCPUでAivisSpeech Engineを実行すると処理時間がかかるので、リモートPC上(特にGPUありのPC)で実行することを推奨する。  
 66 | 下記はnvidia GPU搭載のLinux PC、Cuda 12.4、cuDNN9がセットアップされている場合の手順。  
 67 | [AivisSpeech enginiのreleaseページ](https://github.com/Aivis-Project/AivisSpeech-Engine/releases)から最新のAivisSpeech Engineをダウンロードする。  
 68 | 下記のコマンドでFastAPIサーバを起動する。  
 69 | `./run --use_gpu --host {起動しているPCのIPアドレス}`  
 70 | 
 71 | 1. (AKARIのモーション再生を使う場合) akari_motion_serverのセットアップ  
 72 | `git clone https://github.com/AkariGroup/akari_motion_server`  
 73 | akari_motion_server内のREADME.mdに沿ってセットアップする。  
 74 | 
 75 | ## VOICEVOXをOSS版で使いたい場合  
 76 | AKARIでVOICEVOXのローカル版を使う場合、AKARI本体内のCPUでVOICEVOXを実行すると処理時間がかかるので、リモートPC上(特にGPU版)でVOICVOXを実行することを推奨する。  
 77 | その場合下記を参考にdocker pullを行う。  
 78 | (CPU版)  
 79 | `docker pull voicevox/voicevox_engine:cpu-ubuntu20.04-latest`  
 80 | (nvidia GPU版)  
 81 | `docker pull voicevox/voicevox_engine:nvidia-ubuntu20.04-latest`  
 82 | 
 83 | 上記でVOICEVOXを起動した後、AKARI上で"--voice_host"にこのPCのIPアドレスを指定する。  
 84 | 
 85 | ## 実行準備
 86 | 1. 音声合成を使う場合、実行したい環境に合わせて下記を起動する。  
 87 |    (VoiceVox web版)  
 88 |       特に起動は不要。  
 89 |    (VoiceVox)  
 90 |       (CPU版)  
 91 |       `docker run --rm -it -p '{VOICEVOXを起動するPC自身のIPアドレス}:50021:50021' voicevox/voicevox_engine:cpu-ubuntu20.04-latest`  
 92 |       (nvidia GPU版)  
 93 |       `docker run --rm --gpus all -p '{VOICEVOXを起動するPC自身のIPアドレス}:50021:50021' voicevox/voicevox_engine:nvidia-ubuntu20.04-latest`  
 94 |    (Style-Bert-VITS2)  
 95 |       Style-Bert-VITS2のディレクトリ直下で下記を実行  
 96 |       `python3 server_fastapi.py`  
 97 |    (AivisSpeech)
 98 |       AivisSpeech Engineのディレクトリ直下で下記を実行  
 99 |       `./run --use_gpu`  
100 | 
101 | ## サンプルの実行
102 | 
103 | ### 音声認識のサンプル  
104 | マイクへの発話を文章に変換  
105 | `python3 speech_to_text_example.py`  
106 |    引数は下記が使用可能  
107 |    - `-t`,`--timeout`: マイク入力がこの時間しきい値以下になったら音声入力を打ち切る。デフォルトは0.5[s]。短いと応答が早くなるが不安定になりやすい。  
108 |    - `-p`,`--power_threshold`: マイク入力の音量しきい値。デフォルトは0で、0の場合アプリ起動時に周辺環境の音量を取得し、そこから音量しきい値を自動決定する。  
109 |    - `--v2`: この引数をつけると、google sppech-to-text v2を使用する。引数がない場合はgoogle sppech-to-text v1を使用する。  
110 | 
111 | 
112 | ### chatGPTのサンプル  
113 | キーボード入力した文章に対してchatGPTで返答を作成  
114 | `python3 chatgpt_example.py`  
115 | 
116 |    引数は下記が使用可能
117 |    - `-m`, `--model`: 使用するモデル名を指定可能。モデル名を羅列することで、全モデルに対して一括で問いかけが可能。
118 |    例) `python3 chatgpt_example.py -m gpt-4o claude-3-7-sonnet-latest gemini-2.0-flash`
119 |    - `--thinking`: Claudeの拡張思考機能を使うかどうか。このオプションを有効化すると、拡張思考機能を有効化する。`claude-3-7-sonnet-latest`およびその他のclaude3.7系モデル、もしくはgemini2.0以降で使用すること。
120 |    - `--web_search`: web検索を使うかどうか。このオプションを有効化すると、Web検索を行った結果を用いて回答する。gemini2.0以降か`gpt-4.1`系のモデルで使用すること。
121 |    - `-s`, `--system`: システムプロンプトを指定する。指定しない場合、config/system_prompt.txtの内容を使用する。
122 | 
123 | ### 音声合成(VOICEVOX)のサンプル  
124 | キーボード入力した文章を音声合成で発話  
125 | 
126 | `python3 voicevox_example.py`  
127 | 
128 |    引数は下記が使用可能  
129 |    - `--voicevox_local`: このオプションをつけた場合、voicevoxのweb版ではなくローカル版を実行する。  
130 |    - `--voice_host`: `--voicevox_local`を有効にした場合、ここで指定したhostのvoicevoxにリクエストを送信する。デフォルトは"127.0.0.1"なのでlocalhostのvoicevoxを利用する。  
131 |    - `--voice_port`: `--voicevox_local`を有効にした場合、ここで指定したportのvoicevoxにリクエストを送信する。デフォルトは50021。  
132 | 
133 | ### 音声合成(Style-BERT-VITS2)のサンプル  
134 | キーボード入力した文章を音声合成で発話  
135 | 
136 | `python3 style_bert_vits_example.py`  
137 | 
138 |    引数は下記が使用可能  
139 |    - `--voice_host`: ここで指定したhostの`server_fastapi.py`にリクエストを送信する。デフォルトは"127.0.0.1"  
140 |    - `--voice_port`: ここで指定したportの`server_fastapi.py`にリクエストを送信する。デフォルトは5000。  
141 | 
142 | ### 音声合成(Aivis Speech)のサンプル  
143 | キーボード入力した文章を音声合成で発話  
144 | 
145 | `python3 aivis_example.py`  
146 | 
147 |    引数は下記が使用可能  
148 |    - `--voice_host`: ここで指定したhostにリクエストを送信する。デフォルトは"127.0.0.1"  
149 |    - `--voice_port`: ここで指定したportにリクエストを送信する。デフォルトは10101。  
150 | 
151 | ### 英単語→カナ変換のサンプル
152 | キーボード入力した文章内の英単語をカタカナに変換する。
153 | 対話bot内では、英単語をカタカナに変換してから音声合成で発話するようになっており、その機能のサンプル。
154 | 
155 | `python3 text_to_kana_example.py`
156 | 
157 | ## 音声対話の実行
158 | 実行後、ターミナルでEnterキーを押し、マイクに話しかけると返答が返ってくる。  
159 | 
160 | ### 音声対話  
161 | `python3 chatbot.py`  
162 | 
163 | ### 音声対話+AKARIのモーション再生  
164 | `python3 chatbot_akari.py`  
165 | 
166 | 引数は下記が使用可能  
167 | - `-t`,`--timeout`: マイク入力がこの時間しきい値以下になったら音声入力を打ち切る。デフォルトは0.5[s]。短いと応答が早くなるが不安定になりやすい。  
168 | - `-p`,`--power_threshold`: マイク入力の音量しきい値。デフォルトは0で、0の場合アプリ起動時に周辺環境の音量を取得し、そこから音量しきい値を自動決定する。  
169 | - `--v2`: この引数をつけると、google sppech-to-text v2を使用する。引数がない場合はgoogle sppech-to-text v1を使用する。  
170 | - `-m`, `--model`: 使用するモデル名を指定可能。モデル名はOpenAI, Anthropic, Geminiのものが選択可能。  
171 | - `--voicevox_local`: このオプションをつけた場合、voicevoxのweb版ではなくローカル版を実行する。  
172 | - `--voice_host`: `--voicevox_local`を有効にした場合、ここで指定したhostのvoicevoxにリクエストを送信する。デフォルトは"127.0.0.1"なのでlocalhostのvoicevoxを利用する。  
173 | - `--voice_port`: `--voicevox_local`を有効にした場合、ここで指定したportのvoicevoxにリクエストを送信する。デフォルトは50021。  
174 | 
175 | ## 遅延なし音声対話botの実行
176 | 
177 | ### 概要
178 | 
179 | ![遅延なし図解](jpg/faster_chatgpt_bot.jpg "遅延なし図解")
180 | 
181 | 発話の最初の数文字を認識した時点で選択肢から返答を作成しておくことで、第一声を遅延なく返答する方法です。  
182 | 
183 | ### 全体図
184 | 
185 | ![構成図](jpg/faster_chatgpt_bot_system.jpg "構成図")
186 | 
187 | Google音声認識、chatGPT、Voicevoxとのやり取りをする各アプリは個別に動作しており、各アプリ間はgrpcで通信しています。  
188 | 
189 | ### 起動方法
190 | 必ず、上記実行準備で音声合成を起動させておくこと。  
191 | 
192 | 1. (AKARIのモーション再生を行う場合)akari_motion_serverを起動する。  
193 |    起動方法は https://github.com/AkariGroup/akari_motion_server を参照。  
194 | 
195 | 以下2は使用する音声合成の種類によって分岐する。  
196 | 
197 | **音声合成にVOICEVOXを使う場合**  
198 | 
199 | 2. `voicevox_server` を起動する。(Voicevoxへの送信サーバ)  
200 |    `python3 voicevox_server.py`  
201 | 
202 |    引数は下記が使用可能  
203 |    - `--voicevox_local`: このオプションをつけた場合、voicevoxのweb版ではなくローカル版を実行する。  
204 |    - `--voice_host`: `--voicevox_local`を有効にした場合、ここで指定したhostのvoicevoxにリクエストを送信する。デフォルトは"127.0.0.1"なのでlocalhostのvoicevoxを利用する。  
205 |    - `--voice_port`: `--voicevox_local`を有効にした場合、ここで指定したportのvoicevoxにリクエストを送信する。デフォルトは50021。  
206 |    - `--robot_ip`: akari_motion_serverのIPアドレス。デフォルトは"127.0.0.1"  
207 |    - `--robot_port`: akari_motion_serverのポート。デフォルトは"50055"  
208 |    - `--no_motion`: このオプションをつけると、発話に応じてヘッドが動く動作を無効化する。  
209 | 
210 | **音声合成にStyle-Bert-VITS2を使う場合**  
211 | 
212 | 2. `style_bert_vits_server`を起動する。(Style-Bert-VITS2への送信サーバ)  
213 |    `python3 style_bert_vits_server.py`  
214 | 
215 |    引数は下記が使用可能  
216 |    - `--voice_host`: ここで指定したhostの`server_fastapi.py`にリクエストを送信する。デフォルトは"127.0.0.1"  
217 |    - `--voice_port`: ここで指定したportの`server_fastapi.py`にリクエストを送信する。デフォルトは5000。  
218 |    - `--robot_ip`: akari_motion_serverのIPアドレス。デフォルトは"127.0.0.1"  
219 |    - `--robot_port`: akari_motion_serverのポート。デフォルトは"50055"  
220 |    - `--no_motion`: このオプションをつけると、発話に応じてヘッドが動く動作を無効化する。  
221 |   
222 | 
223 | **音声合成にAivis Speechを使う場合**  
224 | 
225 | 2. `aivis_server`を起動する。(Aivis Speechへの送信サーバ)  
226 |    `python3 aivis_server.py`  
227 | 
228 |    引数は下記が使用可能  
229 |    - `--voice_host`: ここで指定したhostにリクエストを送信する。デフォルトは"127.0.0.1"  
230 |    - `--voice_port`: ここで指定したportにリクエストを送信する。デフォルトは10101。  
231 |    - `--robot_ip`: akari_motion_serverのIPアドレス。デフォルトは"127.0.0.1"  
232 |    - `--robot_port`: akari_motion_serverのポート。デフォルトは"50055"  
233 |    - `--no_motion`: このオプションをつけると、発話に応じてヘッドが動く動作を無効化する。  
234 | 
235 | 
236 | 3. `gpt_publisher`を起動する。(ChatGPTへリクエストを送信し、受信結果を音声合成サーバへ渡す。)  
237 |    `python3 gpt_publisher.py`  
238 | 
239 |    引数は下記が使用可能  
240 |    - `--ip`: gpt_serverのIPアドレス。デフォルトは"127.0.0.1"
241 |    - `--port`: gpt_serverのポート。デフォルトは"10001"
242 | 
243 | 4. speech_publisher.pyを起動する。(Google音声認識の結果をgpt_publisherへ渡す。)  
244 |    `python3 speech_publisher.py`  
245 | 
246 |    引数は下記が使用可能  
247 |    - `--robot_ip`: akari_motion_serverのIPアドレス。デフォルトは"127.0.0.1"
248 |    - `--robot_port`: akari_motion_serverのポート。デフォルトは"50055"
249 |    - `--gpt_ip`: gpt_serverのIPアドレス。デフォルトは"127.0.0.1"
250 |    - `--gpt_port`: gpt_serverのポート。デフォルトは"10001"
251 |    - `--voicevox_ip`: voicevox_serverのIPアドレス。デフォルトは"127.0.0.1"
252 |    - `--voicevox_port`: voicevox_serverのポート。デフォルトは"10002"
253 |    - `-t`,`--timeout`: マイク入力がこの時間しきい値以下になったら音声入力を打ち切る。デフォルトは0.5[s]。短いと応答が早くなるが不安定になりやすい。  
254 |    - `-p`,`--power_threshold`: マイク入力の音量しきい値。デフォルトは0で、0の場合アプリ起動時に周辺環境の音量を取得し、そこから音量しきい値を自動決定する。  
255 |    - `--progress_report_len`: 音声認識の文字数がここで入力した数値以上になると、一旦gpt_publisherに認識結果を送り、第一声とモーションを生成する(遅延なし応答用)。0にすると無効。デフォルトは8。
256 |    - `--no_motion`: このオプションをつけた場合、音声入力中のうなずき動作を無効化する。  
257 |    - `--auto`: 自動モードの有効化。通常キーボードでEnterキーを入力するまで待つが、この引数をつけるとEnterキーの入力をスキップする。  
258 |    - `--v2`: この引数をつけると、google sppech-to-text v2を使用する。引数がない場合はgoogle sppech-to-text v1を使用する。  
259 | 
260 | 5. `speech_publisher.py`のターミナルでEnterキーを押し、マイクに話しかけると返答が返ってくる。
261 | 
262 | ### auto modeでの実行について
263 | 上記4.の `speech_publisher.py`に`--auto`オプションをつけて起動すると音声入力前のEnterキー入力をスキップできるが、この場合マイクの設置位置や種類によっては自身の合成音声を認識してしまう。
264 | そのような環境では、下記`talk_controller_client`を起動することで、ロボット側が音声出力中は音声認識をストップすることができる。
265 | 
266 | `python3 talk_controller_client.py`
267 | 
268 | 
269 | ### スクリプトで一括起動する方法
270 | 
271 | 必ず、上記実行準備で音声合成を起動させておくこと。  
272 | 
273 | **音声合成にVOICEVOXを使う場合**  
274 | (通常モード)  
275 | 1. スクリプトを実行する。  
276 | 
277 |    `cd script`  
278 |    `./faster_chatbot.sh {1.でVoicevoxを起動したPCのIPアドレス} {akari_motion_serverのパス}`  
279 | 
280 |    akari_motion_serverのパスを入力しなければ、akari_motion_serverは起動せず、モーションの再生は行われません(AKARI以外でも使えます)。  
281 | 
282 | 2. `speech_publisher.py`のターミナルでEnterキーを押し、マイクに話しかけると返答が返ってくる。
283 | 
284 | (自動モード)  
285 | 1. スクリプトを実行する。  
286 | 
287 |    `cd script`  
288 |    `./faster_chatbot_auto.sh {1.でVoicevoxを起動したPCのIPアドレス} {akari_motion_serverのパス}`  
289 | 
290 |    akari_motion_serverのパスを入力しなければ、akari_motion_serverは起動せず、モーションの再生は行われません(AKARI以外でも使えます)。  
291 | 
292 | 
293 | **音声合成にStyle-Bert-VITS2を使う場合**  
294 | (通常モード)  
295 | 1. スクリプトを実行する。  
296 |    `cd script`  
297 |    `./faster_chatbot_bert_vits.sh {2.でStyle-Bert-VITS2を起動したPCのIPアドレス} akari_motion_serverのパス}`  
298 | 
299 |    akari_motion_serverのパスを入力しなければ、akari_motion_serverは起動せず、モーションの再生は行われません(AKARI以外でも使えます)。  
300 | 
301 | 2. `speech_publisher.py`のターミナルでEnterキーを押し、マイクに話しかけると返答が返ってくる。  
302 | 
303 | (自動モード)  
304 | 1. スクリプトを実行する。  
305 |    `cd script`  
306 |    `./faster_chatbot_bert_vits_auto.sh {2.でStyle-Bert-VITS2を起動したPCのIPアドレス} akari_motion_serverのパス}`  
307 | 
308 |    akari_motion_serverのパスを入力しなければ、akari_motion_serverは起動せず、モーションの再生は行われません(AKARI以外でも使えます)。  
309 | 
310 | ## その他
311 | Voicevoxの音声合成では、デフォルトの音声として「VOICEVOX:春日部つむぎ」を使用しています。  
312 | 


--------------------------------------------------------------------------------
/aivis_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from lib.aivis import TextToAivis
 4 | 
 5 | 
 6 | def main() -> None:
 7 |     host = ""
 8 |     port = ""
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument(
11 |         "--voice_host",
12 |         type=str,
13 |         default="127.0.0.1",
14 |         help="Voice server host",
15 |     )
16 |     parser.add_argument(
17 |         "--voice_port",
18 |         type=str,
19 |         default="10101",
20 |         help="Voice server port",
21 |     )
22 |     args = parser.parse_args()
23 |     host = args.voice_host
24 |     port = args.voice_port
25 |     text_to_voice = TextToAivis(host, port)
26 | 
27 |     print(f"Speaker一覧: {text_to_voice.get_speaker_names()}")
28 | 
29 |     # set_paramメソッドでモデル名や音声再生速度、感情スタイルなどを指定することができます。
30 |     # モデル名を指定
31 |     # text_to_voice.set_param(speaker='Anneli')
32 |     # 音声再生速度を指定
33 |     # text_to_voice.set_param(speed_scale=1.3)
34 |     # 感情スタイルを指定
35 |     # text_to_voice.set_param(style="怒り・悲しみ")
36 | 
37 |     print(f"現在のSpeaker: {text_to_voice.speaker}")
38 |     print("")
39 |     print(
40 |         f"{text_to_voice.speaker}のスタイル一覧: {text_to_voice.get_style_names(text_to_voice.speaker)}"
41 |     )
42 |     print(f"現在のStyle: {text_to_voice.style}")
43 |     print("")
44 | 
45 |     print("発話させたい文章をキーボード入力後、Enterを押してください。")
46 |     while True:
47 |         text = input("Input: ")
48 |         text_to_voice.put_text(text=text, blocking=True)
49 |         print("")
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     main()
54 | 


--------------------------------------------------------------------------------
/aivis_server.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | from concurrent import futures
  6 | from typing import Any
  7 | 
  8 | import grpc
  9 | from lib.aivis import TextToAivis
 10 | 
 11 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 12 | import voice_server_pb2
 13 | import voice_server_pb2_grpc
 14 | 
 15 | 
 16 | class VoiceServer(voice_server_pb2_grpc.VoiceServerServiceServicer):
 17 |     """
 18 |     Aivisにtextを送信し、音声を再生するgprcサーバ
 19 |     """
 20 | 
 21 |     def __init__(self, text_to_voice: Any) -> None:
 22 |         self.text_to_voice = text_to_voice
 23 | 
 24 |     def SetText(
 25 |         self,
 26 |         request: voice_server_pb2.SetTextRequest(),
 27 |         context: grpc.ServicerContext,
 28 |     ) -> voice_server_pb2.SetTextReply:
 29 |         # 即時再生しないようにis_playはFalseで実行
 30 |         print(f"Send text: {request.text}")
 31 |         self.text_to_voice.put_text(request.text, play_now=False)
 32 |         return voice_server_pb2.SetTextReply(success=True)
 33 | 
 34 |     def SetStyleBertVitsParam(
 35 |         self,
 36 |         request: voice_server_pb2.SetStyleBertVitsParamRequest(),
 37 |         context: grpc.ServicerContext,
 38 |     ) -> voice_server_pb2.SetStyleBertVitsParamReply:
 39 |         print("SetStyleBertVitsParam is not supported on aivis_server.")
 40 |         return voice_server_pb2.SetStyleBertVitsParamReply(success=True)
 41 | 
 42 |     def SetVoicevoxParam(
 43 |         self,
 44 |         request: voice_server_pb2.SetVoicevoxParamRequest(),
 45 |         context: grpc.ServicerContext,
 46 |     ) -> voice_server_pb2.SetVoicevoxParamReply:
 47 |         print("SetVoicevoxParam is not supported on aivis_server.")
 48 |         return voice_server_pb2.SetVoicevoxParamReply(success=False)
 49 | 
 50 |     def SetAivisParam(
 51 |         self,
 52 |         request: voice_server_pb2.SetAivisParamRequest(),
 53 |         context: grpc.ServicerContext,
 54 |     ) -> voice_server_pb2.SetAivisParamReply:
 55 |         self.text_to_voice.set_param(
 56 |             speaker=request.speaker,
 57 |             style=request.style,
 58 |             speed_scale=request.speed_scale,
 59 |         )
 60 |         return voice_server_pb2.SetAivisParamReply(success=True)
 61 | 
 62 |     def InterruptVoice(
 63 |         self,
 64 |         request: voice_server_pb2.InterruptVoiceRequest(),
 65 |         context: grpc.ServicerContext,
 66 |     ) -> voice_server_pb2.InterruptVoiceReply:
 67 |         while not self.text_to_voice.queue.empty():
 68 |             self.text_to_voice.queue.get()
 69 |         return voice_server_pb2.InterruptVoiceReply(success=True)
 70 | 
 71 |     def EnableVoicePlay(
 72 |         self,
 73 |         request: voice_server_pb2.EnableVoicePlayRequest(),
 74 |         context: grpc.ServicerContext,
 75 |     ) -> voice_server_pb2.EnableVoicePlayReply:
 76 |         self.text_to_voice.enable_voice_play()
 77 |         return voice_server_pb2.EnableVoicePlayReply(success=True)
 78 | 
 79 |     def DisableVoicePlay(
 80 |         self,
 81 |         request: voice_server_pb2.DisableVoicePlayRequest(),
 82 |         context: grpc.ServicerContext,
 83 |     ) -> voice_server_pb2.DisableVoicePlayReply:
 84 |         self.text_to_voice.disable_voice_play()
 85 |         return voice_server_pb2.DisableVoicePlayReply(success=True)
 86 | 
 87 |     def IsVoicePlaying(
 88 |         self,
 89 |         request: voice_server_pb2.IsVoicePlayingRequest(),
 90 |         context: grpc.ServicerContext,
 91 |     ) -> voice_server_pb2.IsVoicePlayingReply:
 92 |         return voice_server_pb2.IsVoicePlayingReply(
 93 |             is_playing=not self.text_to_voice.is_playing()
 94 |         )
 95 | 
 96 |     def SentenceEnd(
 97 |         self,
 98 |         request: voice_server_pb2.SentenceEndRequest(),
 99 |         context: grpc.ServicerContext,
100 |     ) -> voice_server_pb2.SentenceEndReply:
101 |         self.text_to_voice.sentence_end()
102 |         return voice_server_pb2.SentenceEndReply(success=True)
103 | 
104 |     def StartHeadControl(
105 |         self,
106 |         request: voice_server_pb2.StartHeadControlRequest(),
107 |         context: grpc.ServicerContext,
108 |     ) -> voice_server_pb2.StartHeadControlReply:
109 |         self.text_to_voice.start_head_control()
110 |         return voice_server_pb2.StartHeadControlReply(success=False)
111 | 
112 | 
113 | def main() -> None:
114 |     parser = argparse.ArgumentParser()
115 |     parser.add_argument(
116 |         "--voice_host",
117 |         type=str,
118 |         default="127.0.0.1",
119 |         help="Aivis-Speech server host",
120 |     )
121 |     parser.add_argument(
122 |         "--voice_port",
123 |         type=str,
124 |         default="10101",
125 |         help="Aivis-Speech server port",
126 |     )
127 |     parser.add_argument(
128 |         "--robot_ip", help="Robot ip address", default="127.0.0.1", type=str
129 |     )
130 |     parser.add_argument(
131 |         "--robot_port", help="Robot port number", default="50055", type=str
132 |     )
133 |     parser.add_argument(
134 |         "--no_motion",
135 |         help="Not play nod motion",
136 |         action="store_true",
137 |     )
138 |     args = parser.parse_args()
139 |     host = args.voice_host
140 |     port = args.voice_port
141 |     motion_server_host = None
142 |     motion_server_port = None
143 |     if not args.no_motion:
144 |         motion_server_host = args.robot_ip
145 |         motion_server_port = args.robot_port
146 |     text_to_voice = TextToAivis(
147 |         host=host,
148 |         port=port,
149 |         motion_host=motion_server_host,
150 |         motion_port=motion_server_port,
151 |     )
152 | 
153 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
154 |     voice_server_pb2_grpc.add_VoiceServerServiceServicer_to_server(
155 |         VoiceServer(text_to_voice), server
156 |     )
157 |     port = "10002"
158 |     server.add_insecure_port("[::]:" + port)
159 |     server.start()
160 |     print(f"voice_server start. port: {port}")
161 |     server.wait_for_termination()
162 | 
163 | 
164 | if __name__ == "__main__":
165 |     main()
166 | 


--------------------------------------------------------------------------------
/chatbot.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | from lib.chat_akari import ChatStreamAkari
  5 | 
  6 | # Audio recording parameters
  7 | RATE = 16000
  8 | CHUNK = int(RATE / 10)  # 100ms
  9 | POWER_THRESH_DIFF = 25  # 周辺音量にこの値を足したものをpower_threshouldとする
 10 | 
 11 | host: str = ""
 12 | port: str = ""
 13 | 
 14 | 
 15 | def main() -> None:
 16 |     global host
 17 |     global port
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument(
 20 |         "-t",
 21 |         "--timeout",
 22 |         type=float,
 23 |         default=0.5,
 24 |         help="Microphone input power timeout",
 25 |     )
 26 |     parser.add_argument(
 27 |         "-p",
 28 |         "--power_threshold",
 29 |         type=float,
 30 |         default=0,
 31 |         help="Microphone input power threshold",
 32 |     )
 33 |     parser.add_argument(
 34 |         "--v2",
 35 |         action="store_true",
 36 |         help="Use google speech v2 instead of v1",
 37 |     )
 38 |     parser.add_argument(
 39 |         "-m", "--model", help="LLM model name", default="gpt-4o", type=str
 40 |     )
 41 |     parser.add_argument("--voicevox_local", action="store_true")
 42 |     parser.add_argument(
 43 |         "--voicevox_host",
 44 |         type=str,
 45 |         default="127.0.0.1",
 46 |         help="VoiceVox server host",
 47 |     )
 48 |     parser.add_argument(
 49 |         "--voicevox_port",
 50 |         type=str,
 51 |         default="50021",
 52 |         help="VoiceVox server port",
 53 |     )
 54 |     args = parser.parse_args()
 55 |     if args.v2:
 56 |         from lib.google_speech_v2 import MicrophoneStreamV2 as MicrophoneStream
 57 |         from lib.google_speech_v2 import get_db_thresh, listen_print_loop
 58 |     else:
 59 |         from lib.google_speech import MicrophoneStream, get_db_thresh, listen_print_loop
 60 |     timeout: float = args.timeout
 61 |     power_threshold: float = args.power_threshold
 62 |     if power_threshold == 0:
 63 |         power_threshold = get_db_thresh() + POWER_THRESH_DIFF
 64 |     print(f"power_threshold set to {power_threshold:.3f}db")
 65 |     if args.voicevox_local:
 66 |         from lib.voicevox import TextToVoiceVox
 67 | 
 68 |         host = args.voicevox_host
 69 |         port = args.voicevox_port
 70 |         text_to_voice = TextToVoiceVox(host, port)
 71 |     else:
 72 |         from lib.conf import VOICEVOX_APIKEY
 73 |         from lib.voicevox import TextToVoiceVoxWeb
 74 | 
 75 |         text_to_voice = TextToVoiceVoxWeb(apikey=VOICEVOX_APIKEY)
 76 | 
 77 |     chat_stream_akari = ChatStreamAkari()
 78 |     SYSTEM_PROMPT_PATH = (
 79 |         f"{os.path.dirname(os.path.realpath(__file__))}/config/system_prompt.txt"
 80 |     )
 81 |     content = open(SYSTEM_PROMPT_PATH, "r").read()
 82 |     messages = [
 83 |         {
 84 |             "role": "system",
 85 |             "content": content,
 86 |         }
 87 |     ]
 88 |     while True:
 89 |         # 音声認識
 90 |         text = ""
 91 |         responses = None
 92 |         with MicrophoneStream(
 93 |             rate=RATE, chunk=CHUNK, _timeout_thresh=timeout, _db_thresh=power_threshold
 94 |         ) as stream:
 95 |             print("Enterを入力してください")
 96 |             input()
 97 |             responses = stream.transcribe()
 98 |             if responses is not None:
 99 |                 text = listen_print_loop(responses)
100 |         # chatGPT
101 |         # 2文字以上の入力でない場合は回答しない。
102 |         if len(text) >= 2:
103 |             messages.append({"role": "user", "content": text})
104 |             print(f"User   : {text}")
105 |             print(f"{args.model} :")
106 |             response = ""
107 |             for sentence in chat_stream_akari.chat(messages, model=args.model):
108 |                 # 音声合成
109 |                 text_to_voice.put_text(sentence)
110 |                 response += sentence
111 |                 print(sentence, end="", flush=True)
112 |             text_to_voice.sentence_end()
113 |             messages.append({"role": "assistant", "content": response})
114 |         print("")
115 |         print("")
116 | 
117 | 
118 | if __name__ == "__main__":
119 |     main()
120 | 


--------------------------------------------------------------------------------
/chatbot_akari.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | 
  5 | import grpc
  6 | from lib.chat_akari import ChatStreamAkari
  7 | 
  8 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
  9 | import motion_server_pb2
 10 | import motion_server_pb2_grpc
 11 | 
 12 | # Audio recording parameters
 13 | RATE = 16000
 14 | CHUNK = int(RATE / 10)  # 100ms
 15 | POWER_THRESH_DIFF = 25  # 周辺音量にこの値を足したものをpower_threshouldとする
 16 | 
 17 | host: str = ""
 18 | port: str = ""
 19 | 
 20 | 
 21 | def main() -> None:
 22 |     global host
 23 |     global port
 24 |     parser = argparse.ArgumentParser()
 25 |     parser.add_argument("--robot_ip", help="Ip address", default="127.0.0.1", type=str)
 26 |     parser.add_argument("--robot_port", help="Port number", default="50055", type=str)
 27 |     parser.add_argument(
 28 |         "-t",
 29 |         "--timeout",
 30 |         type=float,
 31 |         default=0.5,
 32 |         help="Microphone input power timeout",
 33 |     )
 34 |     parser.add_argument(
 35 |         "-p",
 36 |         "--power_threshold",
 37 |         type=float,
 38 |         default=0,
 39 |         help="Microphone input power threshold",
 40 |     )
 41 |     parser.add_argument(
 42 |         "--v2",
 43 |         action="store_true",
 44 |         help="Use google speech v2 instead of v1",
 45 |     )
 46 |     parser.add_argument(
 47 |         "-m", "--model", help="LLM model name", default="gpt-4o", type=str
 48 |     )
 49 |     parser.add_argument("--voicevox_local", action="store_true")
 50 |     parser.add_argument(
 51 |         "--voice_host",
 52 |         type=str,
 53 |         default="127.0.0.1",
 54 |         help="VoiceVox server host",
 55 |     )
 56 |     parser.add_argument(
 57 |         "--voice_port",
 58 |         type=str,
 59 |         default="50021",
 60 |         help="VoiceVox server port",
 61 |     )
 62 |     args = parser.parse_args()
 63 |     if args.v2:
 64 |         from lib.google_speech_v2 import MicrophoneStreamV2 as MicrophoneStream
 65 |         from lib.google_speech_v2 import get_db_thresh, listen_print_loop
 66 |     else:
 67 |         from lib.google_speech import MicrophoneStream, get_db_thresh, listen_print_loop
 68 |     timeout: float = args.timeout
 69 |     power_threshold: float = args.power_threshold
 70 |     if power_threshold == 0:
 71 |         power_threshold = get_db_thresh() + POWER_THRESH_DIFF
 72 |     print(f"power_threshold set to {power_threshold:.3f}db")
 73 |     if args.voicevox_local:
 74 |         from lib.voicevox import TextToVoiceVox
 75 | 
 76 |         host = args.voice_host
 77 |         port = args.voice_port
 78 |         text_to_voice = TextToVoiceVox(host, port)
 79 |     else:
 80 |         from lib.conf import VOICEVOX_APIKEY
 81 |         from lib.voicevox import TextToVoiceVoxWeb
 82 | 
 83 |         text_to_voice = TextToVoiceVoxWeb(apikey=VOICEVOX_APIKEY)
 84 | 
 85 |     channel = grpc.insecure_channel(args.robot_ip + ":" + str(args.robot_port))
 86 |     stub = motion_server_pb2_grpc.MotionServerServiceStub(channel)
 87 |     SYSTEM_PROMPT_PATH = (
 88 |         f"{os.path.dirname(os.path.realpath(__file__))}/config/system_prompt.txt"
 89 |     )
 90 |     content = open(SYSTEM_PROMPT_PATH, "r").read()
 91 |     messages = [
 92 |         {
 93 |             "role": "system",
 94 |             "content": content,
 95 |         }
 96 |     ]
 97 |     chat_stream_akari = ChatStreamAkari(args.robot_ip, args.robot_port)
 98 |     while True:
 99 |         # 音声認識
100 |         text = ""
101 |         responses = None
102 |         with MicrophoneStream(
103 |             rate=RATE, chunk=CHUNK, _timeout_thresh=timeout, _db_thresh=power_threshold
104 |         ) as stream:
105 |             print("Enterを入力してください")
106 |             input()
107 |             # うなずきモーション再生
108 |             try:
109 |                 stub.SetMotion(
110 |                     motion_server_pb2.SetMotionRequest(
111 |                         name="nod", priority=3, repeat=True
112 |                     )
113 |                 )
114 |             except BaseException:
115 |                 print("akari_motion_server is not working.")
116 |             responses = stream.transcribe()
117 |             if responses is not None:
118 |                 text = listen_print_loop(responses)
119 |         # 2文字以上の入力でない場合は回答しない。
120 |         if len(text) >= 2:
121 |             # chatGPT
122 |             messages.append({"role": "user", "content": text})
123 |             print(f"User   : {text}")
124 |             print(f"{args.model} :")
125 |             response = ""
126 |             # 音声合成
127 |             for sentence in chat_stream_akari.chat_and_motion(
128 |                 messages, model=args.model
129 |             ):
130 |                 text_to_voice.put_text(sentence)
131 |                 response += sentence
132 |                 print(sentence, end="", flush=True)
133 |             text_to_voice.sentence_end()
134 |             messages.append({"role": "assistant", "content": response})
135 |             print("")
136 |             print("")
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/chatgpt_example.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | from lib.chat_akari import ChatStreamAkari
  6 | 
  7 | 
  8 | def main() -> None:
  9 |     parser = argparse.ArgumentParser()
 10 |     parser.add_argument(
 11 |         "-m",
 12 |         "--model",
 13 |         nargs="+",
 14 |         type=str,
 15 |         default=["gpt-4o"],
 16 |         help="Model name list",
 17 |     )
 18 |     parser.add_argument(
 19 |         "--thinking",
 20 |         action="store_true",
 21 |         help="Use thinking mode (anthropic and gemini model only)",
 22 |     )
 23 |     parser.add_argument(
 24 |         "--web_search",
 25 |         action="store_true",
 26 |         help="Use web search grounding (openai and gemini model only)",
 27 |     )
 28 |     parser.add_argument("-s", "--system", default="", type=str, help="System prompt")
 29 |     args = parser.parse_args()
 30 |     chat_stream_akari = ChatStreamAkari()
 31 |     # systemメッセージの作成
 32 |     messages_list = []
 33 |     content = None
 34 |     if args.system == "":
 35 |         SYSTEM_PROMPT_PATH = (
 36 |             f"{os.path.dirname(os.path.realpath(__file__))}/config/system_prompt.txt"
 37 |         )
 38 |         content = open(SYSTEM_PROMPT_PATH, "r").read()
 39 |     else:
 40 |         content = args.system
 41 |     for i in range(0, len(args.model)):
 42 |         messages_list.append([chat_stream_akari.create_message(content, role="system")])
 43 |     while True:
 44 |         print("文章をキーボード入力後、Enterを押してください。")
 45 |         text = input("Input: ")
 46 |         # userメッセージの追加
 47 |         print(f"User   : {text}")
 48 |         for i, model in enumerate(args.model):
 49 |             print(f"{model}: ")
 50 |             messages_list[i].append(chat_stream_akari.create_message(text))
 51 |             response = ""
 52 |             start = time.time()
 53 |             is_first = True
 54 |             output_delay = 0.0
 55 |             if args.thinking:
 56 |                 for sentence in chat_stream_akari.chat_thinking(
 57 |                     messages_list[i],
 58 |                     model=model,
 59 |                     stream_per_sentence=True,
 60 |                 ):
 61 |                     response += sentence
 62 |                     print(sentence, end="", flush=True)
 63 |                     if is_first:
 64 |                         output_delay = time.time() - start
 65 |                         is_first = False
 66 |             elif args.web_search:
 67 |                 for sentence in chat_stream_akari.chat_web_search(
 68 |                     messages_list[i],
 69 |                     model=model,
 70 |                     stream_per_sentence=True,
 71 |                 ):
 72 |                     response += sentence
 73 |                     print(sentence, end="", flush=True)
 74 |                     if is_first:
 75 |                         output_delay = time.time() - start
 76 |                         is_first = False
 77 |             else:
 78 |                 for sentence in chat_stream_akari.chat(
 79 |                     messages_list[i],
 80 |                     model=model,
 81 |                     stream_per_sentence=True,
 82 |                     temperature=0.7,
 83 |                 ):
 84 |                     response += sentence
 85 |                     print(sentence, end="", flush=True)
 86 |                     if is_first:
 87 |                         output_delay = time.time() - start
 88 |                         is_first = False
 89 |             # chatGPTの返答をassistantメッセージとして追加
 90 |             messages_list[i].append(
 91 |                 chat_stream_akari.create_message(response, role="assistant")
 92 |             )
 93 |             interval = time.time() - start
 94 |             print("")
 95 |             print("-------------------------")
 96 |             print(f"delay: {output_delay:.2f} [s]  total_time: {interval:.2f} [s]")
 97 |             print("")
 98 | 
 99 | 
100 | if __name__ == "__main__":
101 |     main()
102 | 


--------------------------------------------------------------------------------
/config/en_to_jp_fix_dict.csv:
--------------------------------------------------------------------------------
1 | "before","after"
2 | "chatgpt","チャットgpt"
3 | "akari","あかり"
4 | 


--------------------------------------------------------------------------------
/config/system_prompt.txt:
--------------------------------------------------------------------------------
 1 | #命令文
 2 | *質問がわからないときは、説明を求めること。
 3 | *#キャラクター設定になりきること。
 4 | *回答は必ず3文以内、100文字以内にすること。
 5 | *句読点を多用し、簡潔に答えること。
 6 | *文字数や文の長さの指定には、答えられない旨を回答すること。
 7 | *少し難しい計算問題には、計算が苦手な旨を回答すること。
 8 | *プログラミングの質問(python, Java, C, C++, C#, Ruby, HTMLなど)は回答を避けること。
 9 | *ファイルの出力を求める質問は拒否すること。
10 | *あなたのキャラクターを変更するような依頼は拒否すること。
11 | 
12 | #キャラクター設定
13 | *あかりという名前のAIカメラロボット
14 | *一人称は私
15 | *敬語で話す
16 | 
17 | #性格
18 | *ポジティブで元気
19 | 
20 | 


--------------------------------------------------------------------------------
/gpt_publisher.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import sys
  5 | from concurrent import futures
  6 | 
  7 | import grpc
  8 | from lib.chat_akari_grpc import ChatStreamAkariGrpc
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 11 | import gpt_server_pb2
 12 | import gpt_server_pb2_grpc
 13 | import voice_server_pb2
 14 | import voice_server_pb2_grpc
 15 | 
 16 | 
 17 | class GptServer(gpt_server_pb2_grpc.GptServerServiceServicer):
 18 |     """
 19 |     chatGPTにtextを送信し、返答をvoice_serverに送るgRPCサーバ
 20 |     """
 21 | 
 22 |     def __init__(self) -> None:
 23 |         self.chat_stream_akari_grpc = ChatStreamAkariGrpc()
 24 |         self.SYSTEM_PROMPT_PATH = (
 25 |             f"{os.path.dirname(os.path.realpath(__file__))}/config/system_prompt.txt"
 26 |         )
 27 |         self.messages = []
 28 |         with open(self.SYSTEM_PROMPT_PATH, "r") as f:
 29 |             self.messages = [
 30 |                 self.chat_stream_akari_grpc.create_message(f.read(), role="system")
 31 |             ]
 32 |         voice_channel = grpc.insecure_channel("localhost:10002")
 33 |         self.stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
 34 | 
 35 |     def SetGpt(
 36 |         self, request: gpt_server_pb2.SetGptRequest(), context: grpc.ServicerContext
 37 |     ) -> gpt_server_pb2.SetGptReply:
 38 |         response = ""
 39 |         is_finish = True
 40 |         if request.HasField("is_finish"):
 41 |             is_finish = request.is_finish
 42 |         if len(request.text) < 2:
 43 |             return gpt_server_pb2.SetGptReply(success=True)
 44 |         print(f"Receive: {request.text}")
 45 |         content = f"{request.text}。"
 46 |         tmp_messages = copy.deepcopy(self.messages)
 47 |         tmp_messages.append(self.chat_stream_akari_grpc.create_message(content))
 48 |         if is_finish:
 49 |             self.messages = copy.deepcopy(tmp_messages)
 50 |             # 最終応答。高速生成するために、モデルはgpt-4o
 51 |             self.stub.StartHeadControl(voice_server_pb2.StartHeadControlRequest())
 52 |             for sentence in self.chat_stream_akari_grpc.chat(
 53 |                 tmp_messages, model="gpt-4o"
 54 |             ):
 55 |                 print(f"Send to voice server: {sentence}")
 56 |                 self.stub.SetText(voice_server_pb2.SetTextRequest(text=sentence))
 57 |                 response += sentence
 58 |             # Sentenceの終了を通知
 59 |             self.stub.SentenceEnd(voice_server_pb2.SentenceEndRequest())
 60 |             self.messages.append(
 61 |                 self.chat_stream_akari_grpc.create_message(response, role="assistant")
 62 |             )
 63 |         else:
 64 |             # 途中での第一声とモーション準備。function_callingの確実性のため、モデルはgpt-4-turbo
 65 |             for sentence in self.chat_stream_akari_grpc.chat_and_motion(
 66 |                 tmp_messages, model="gpt-4-turbo", short_response=True
 67 |             ):
 68 |                 print(f"Send to voice server: {sentence}")
 69 |                 self.stub.SetText(voice_server_pb2.SetTextRequest(text=sentence))
 70 |                 response += sentence
 71 |         print("")
 72 |         return gpt_server_pb2.SetGptReply(success=True)
 73 | 
 74 |     def SendMotion(
 75 |         self, request: gpt_server_pb2.SendMotionRequest(), context: grpc.ServicerContext
 76 |     ) -> gpt_server_pb2.SendMotionReply:
 77 |         success = self.chat_stream_akari_grpc.send_reserved_motion()
 78 |         return gpt_server_pb2.SendMotionReply(success=success)
 79 | 
 80 | 
 81 | def main() -> None:
 82 |     parser = argparse.ArgumentParser()
 83 |     parser.add_argument(
 84 |         "--ip", help="Gpt server ip address", default="127.0.0.1", type=str
 85 |     )
 86 |     parser.add_argument(
 87 |         "--port", help="Gpt server port number", default="10001", type=str
 88 |     )
 89 |     args = parser.parse_args()
 90 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
 91 |     gpt_server_pb2_grpc.add_GptServerServiceServicer_to_server(GptServer(), server)
 92 |     server.add_insecure_port(args.ip + ":" + args.port)
 93 |     server.start()
 94 |     print(f"gpt_publisher start. port: {args.port}")
 95 |     server.wait_for_termination()
 96 | 
 97 | 
 98 | if __name__ == "__main__":
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/jpg/akari_chatgpt_bot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AkariGroup/akari_chatgpt_bot/3e5fce76b6237b408b995a899f78b5ae91f3f82f/jpg/akari_chatgpt_bot.jpg


--------------------------------------------------------------------------------
/jpg/faster_chatgpt_bot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AkariGroup/akari_chatgpt_bot/3e5fce76b6237b408b995a899f78b5ae91f3f82f/jpg/faster_chatgpt_bot.jpg


--------------------------------------------------------------------------------
/jpg/faster_chatgpt_bot_system.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AkariGroup/akari_chatgpt_bot/3e5fce76b6237b408b995a899f78b5ae91f3f82f/jpg/faster_chatgpt_bot_system.jpg


--------------------------------------------------------------------------------
/lib/aivis.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Any, Optional
  3 | 
  4 | import requests
  5 | from lib.text_to_voice import TextToVoice
  6 | 
  7 | 
  8 | class TextToAivis(TextToVoice):
  9 |     """
 10 |     Aivisを使用してテキストから音声を生成するクラス。
 11 |     """
 12 | 
 13 |     def __init__(
 14 |         self,
 15 |         host: str = "127.0.0.1",
 16 |         port: str = "10101",
 17 |         motion_host: Optional[str] = "127.0.0.1",
 18 |         motion_port: Optional[str] = "50055",
 19 |     ) -> None:
 20 |         """クラスの初期化メソッド。
 21 |         Args:
 22 |             host (str, optional): Aivisサーバーのホスト名。デフォルトは "127.0.0.1"。
 23 |             port (str, optional): Aivisサーバーのポート番号。デフォルトは "10101"。
 24 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
 25 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 26 | 
 27 |         """
 28 |         super().__init__(
 29 |             host=host, port=port, motion_host=motion_host, motion_port=motion_port
 30 |         )
 31 |         # デフォルトのspeakerはAnneli
 32 |         self.speaker = "Anneli"
 33 |         self.style = "ノーマル"
 34 |         self.speed_scale = 1.0
 35 |         self.speaker_id = self.get_speaker_id(self.speaker, self.style)
 36 | 
 37 |     def set_param(
 38 |         self,
 39 |         speaker: Optional[int] = None,
 40 |         style: Optional[str] = None,
 41 |         speed_scale: Optional[float] = None,
 42 |     ) -> None:
 43 |         """
 44 |         音声合成のパラメータを設定する。
 45 | 
 46 |         Args:
 47 |             speaker (Optional[int], optional): Aivisの話者番号。デフォルトはNone。
 48 |             speed_scale (Optional[float], optional): 音声の再生速度スケール。デフォルトはNone。
 49 | 
 50 |         """
 51 |         if speaker is not None:
 52 |             self.speaker = speaker
 53 |         if style is not None:
 54 |             self.style = style
 55 |         if speed_scale is not None:
 56 |             self.speed_scale = speed_scale
 57 |         self.speaker_id = self.get_speaker_id(self.speaker, self.style)
 58 | 
 59 |     def post_audio_query(
 60 |         self,
 61 |         text: str,
 62 |     ) -> Any:
 63 |         """Aivisサーバーに音声合成クエリを送信する。
 64 | 
 65 |         Args:
 66 |             text (str): 音声合成対象のテキスト。
 67 |             speaker (int, optional): Aivisの話者番号。デフォルトは8(春日部つむぎ)。
 68 |             speed_scale (float, optional): 音声の再生速度スケール。デフォルトは1.0。
 69 | 
 70 |         Returns:
 71 |             Any: 音声合成クエリの応答。
 72 | 
 73 |         """
 74 |         if len(text.strip()) <= 0:
 75 |             return None
 76 |         params = {
 77 |             "text": text,
 78 |             "speaker": self.speaker_id,
 79 |             "speedScale": self.speed_scale,
 80 |             "prePhonemeLength": 0,
 81 |             "postPhonemeLength": 0,
 82 |         }
 83 |         address = "http://" + self.host + ":" + self.port + "/audio_query"
 84 |         res = requests.post(address, params=params)
 85 |         return res.json()
 86 | 
 87 |     def post_synthesis(
 88 |         self,
 89 |         audio_query_response: dict,
 90 |     ) -> bytes:
 91 |         """
 92 |         Aivisサーバーに音声合成要求を送信し、合成された音声データを取得する。
 93 | 
 94 |         Args:
 95 |             audio_query_response (dict): 音声合成クエリの応答。
 96 | 
 97 |         Returns:
 98 |             bytes: 合成された音声データ。
 99 |         """
100 |         params = {"speaker": self.speaker_id}
101 |         headers = {"content-type": "application/json"}
102 |         audio_query_response["speedScale"] = self.speed_scale
103 |         audio_query_response_json = json.dumps(audio_query_response)
104 |         address = "http://" + self.host + ":" + self.port + "/synthesis"
105 |         res = requests.post(
106 |             address, data=audio_query_response_json, params=params, headers=headers
107 |         )
108 |         return res.content
109 | 
110 |     def text_to_voice(self, text: str) -> None:
111 |         """
112 |         テキストから音声を合成して再生する。
113 | 
114 |         Args:
115 |             text (str): 音声合成対象のテキスト。
116 | 
117 |         """
118 |         res = self.post_audio_query(text)
119 |         if res is None:
120 |             return
121 |         wav = self.post_synthesis(res)
122 |         if wav is not None:
123 |             print(f"[Play] {text}")
124 |             self.play_wav(wav)
125 | 
126 |     def get_speaker(self) -> Any:
127 |         """
128 |         Aivisの話者情報を取得する。
129 | 
130 |         Returns:
131 |             Any: Aivisの話者情報。
132 |         """
133 |         headers = {"content-type": "application/json"}
134 |         address = "http://" + self.host + ":" + self.port + "/speakers"
135 |         res = requests.get(address, headers=headers)
136 |         return res.json()
137 | 
138 |     def get_speaker_names(self) -> Any:
139 |         """
140 |         Aivisの話者名を取得する。
141 | 
142 |         Returns:
143 |             Any: Aivisの話者名。
144 |         """
145 |         speakers = self.get_speaker()
146 |         speaker_names = []
147 |         for speaker in speakers:
148 |             speaker_names.append(speaker["name"])
149 |         return speaker_names
150 | 
151 |     def get_style_names(self, speaker_name) -> Any:
152 |         """
153 |         Aivisの話者名から感情スタイル名を取得する。
154 | 
155 |         Args:
156 |             speaker_name (str): 話者名。
157 | 
158 |         Returns:
159 |             Any: 感情スタイル名。
160 |         """
161 |         speakers = self.get_speaker()
162 |         for speaker in speakers:
163 |             if speaker["name"] == speaker_name:
164 |                 style_names = []
165 |                 for style in speaker["styles"]:
166 |                     style_names.append(style["name"])
167 |                 return style_names
168 |             print(f"Speaker: {speaker_name} not found.")
169 |         return None
170 | 
171 |     def get_speaker_id(self, speaker_name, style_name) -> Optional[int]:
172 |         """
173 |         Aivisの話者名から話者IDを取得する。
174 | 
175 |         Args:
176 |             name (str): 話者名。
177 |             style (str): 感情スタイル。
178 | 
179 |         Returns:
180 |             int: 話者ID。
181 |         """
182 |         speakers = self.get_speaker()
183 |         for speaker in speakers:
184 |             if speaker["name"] == speaker_name:
185 |                 for style in speaker["styles"]:
186 |                     if style["name"] == style_name:
187 |                         return style["id"]
188 |                 print(f"Style: {style_name} not found in speaker: {speaker_name}.")
189 |                 return None
190 |             print(f"Speaker: {speaker_name} not found.")
191 |         return None
192 | 


--------------------------------------------------------------------------------
/lib/chat_akari.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import os
  4 | import sys
  5 | import threading
  6 | from typing import Generator
  7 | 
  8 | import grpc
  9 | from google.genai import types
 10 | from gpt_stream_parser import force_parse_json
 11 | 
 12 | from .chat import ChatStream
 13 | 
 14 | sys.path.append(os.path.join(os.path.dirname(__file__), "grpc"))
 15 | import motion_server_pb2
 16 | import motion_server_pb2_grpc
 17 | 
 18 | 
 19 | class ChatStreamAkari(ChatStream):
 20 |     """
 21 |     LLMを使用して会話とAKARIのモーション選択を行うためのクラス。
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self, motion_host: str = "127.0.0.1", motion_port: str = "50055"
 26 |     ) -> None:
 27 |         """クラスの初期化メソッド。
 28 | 
 29 |         Args:
 30 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
 31 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 32 | 
 33 |         """
 34 |         super().__init__()
 35 |         motion_channel = grpc.insecure_channel(motion_host + ":" + motion_port)
 36 |         self.motion_stub = motion_server_pb2_grpc.MotionServerServiceStub(
 37 |             motion_channel
 38 |         )
 39 | 
 40 |     def send_motion(self, name: str) -> None:
 41 |         """motion serverに動作を送信する
 42 | 
 43 |         Args:
 44 |             name (str): 動作名
 45 | 
 46 |         """
 47 |         try:
 48 |             self.motion_stub.SetMotion(
 49 |                 motion_server_pb2.SetMotionRequest(
 50 |                     name=name, priority=3, repeat=False, clear=True
 51 |                 )
 52 |             )
 53 |         except BaseException:
 54 |             print("send error!")
 55 |             pass
 56 | 
 57 |     def chat_and_motion_gpt(
 58 |         self,
 59 |         messages: list,
 60 |         model: str = "gpt-4o",
 61 |         temperature: float = 0.7,
 62 |     ) -> Generator[str, None, None]:
 63 |         """ChatGPTを使用して会話を行い、会話の内容に応じた動作も生成する
 64 | 
 65 |         Args:
 66 |             messages (list): メッセージリスト
 67 |             model (str): 使用するモデル名 (デフォルト: "gpt-4o")
 68 |             temperature (float): ChatGPTのtemperatureパラメータ (デフォルト: 0.7)
 69 |         Returns:
 70 |             Generator[str, None, None]): 会話の返答を順次生成する
 71 | 
 72 |         """
 73 |         if self.openai_client is None:
 74 |             raise ValueError("OpenAI API key is not set.")
 75 |         if model in self.openai_flagship_model_name:
 76 |             raise ValueError("Flagship model is not supported.")
 77 |         result = self.openai_client.responses.create(
 78 |             model=model,
 79 |             input=messages,
 80 |             temperature=temperature,
 81 |             tools=[
 82 |                 {
 83 |                     "type": "function",
 84 |                     "name": "reply_with_motion_",
 85 |                     "description": "ユーザのメッセージに対する回答と、回答の感情に近い動作を一つ選択します",
 86 |                     "parameters": {
 87 |                         "type": "object",
 88 |                         "properties": {
 89 |                             "motion": {
 90 |                                 "type": "string",
 91 |                                 "description": "動作",
 92 |                                 "enum": [
 93 |                                     "肯定する",
 94 |                                     "否定する",
 95 |                                     "おじぎ",
 96 |                                     "喜ぶ",
 97 |                                     "笑う",
 98 |                                     "落ち込む",
 99 |                                     "うんざりする",
100 |                                     "眠る",
101 |                                 ],
102 |                             },
103 |                             "talk": {
104 |                                 "type": "string",
105 |                                 "description": "回答",
106 |                             },
107 |                         },
108 |                         "required": ["motion", "talk"],
109 |                     },
110 |                 }
111 |             ],
112 |             tool_choice={
113 |                 "type": "function",
114 |                 "name": "reply_with_motion_",
115 |             },
116 |             stream=True,
117 |         )
118 |         full_response = ""
119 |         real_time_response = ""
120 |         sentence_index = 0
121 |         get_motion = False
122 |         for chunk in result:
123 |             if chunk.type != "response.function_call_arguments.delta":
124 |                 continue
125 |             full_response += chunk.delta
126 |             try:
127 |                 data_json = json.loads(full_response)
128 |                 found_last_char = False
129 |                 for char in self.last_char:
130 |                     if real_time_response[-1].find(char) >= 0:
131 |                         found_last_char = True
132 |                 if not found_last_char:
133 |                     data_json["talk"] = data_json["talk"] + "。"
134 |             except BaseException:
135 |                 data_json = force_parse_json(full_response)
136 |             if data_json is not None:
137 |                 if "talk" in data_json:
138 |                     if not get_motion and "motion" in data_json:
139 |                         get_motion = True
140 |                         motion = data_json["motion"]
141 |                         if motion == "肯定する":
142 |                             key = "agree"
143 |                         elif motion == "否定する":
144 |                             key = "swing"
145 |                         elif motion == "おじぎ":
146 |                             key = "bow"
147 |                         elif motion == "喜ぶ":
148 |                             key = "happy"
149 |                         elif motion == "笑う":
150 |                             key = "lough"
151 |                         elif motion == "落ち込む":
152 |                             key = "depressed"
153 |                         elif motion == "うんざりする":
154 |                             key = "amazed"
155 |                         elif motion == "眠る":
156 |                             key = "sleep"
157 |                         elif motion == "ぼんやりする":
158 |                             key = "lookup"
159 |                         print("motion: " + motion)
160 |                         motion_thread = threading.Thread(
161 |                             target=self.send_motion, args=(key,)
162 |                         )
163 |                         motion_thread.start()
164 |                     real_time_response = str(data_json["talk"])
165 |                     for char in self.last_char:
166 |                         pos = real_time_response[sentence_index:].find(char)
167 |                         if pos >= 0:
168 |                             sentence = real_time_response[
169 |                                 sentence_index : sentence_index + pos + 1
170 |                             ]
171 |                             sentence_index += pos + 1
172 |                             if sentence != "":
173 |                                 yield sentence
174 |                             # break
175 | 
176 |     def chat_and_motion_anthropic(
177 |         self,
178 |         messages: list,
179 |         model: str = "claude-3-sonnet-20240229",
180 |         temperature: float = 0.7,
181 |     ) -> Generator[str, None, None]:
182 |         """Claude3を使用して会話を行い、会話の内容に応じた動作も生成する
183 | 
184 |         Args:
185 |             messages (list): メッセージリスト
186 |             model (str): 使用するモデル名 (デフォルト:"claude-3-sonnet-20240229")
187 |             temperature (float): Claude3のtemperatureパラメータ (デフォルト: 0.7)
188 |         Returns:
189 |             Generator[str, None, None]): 会話の返答を順次生成する
190 | 
191 |         """
192 |         system_message = ""
193 |         user_messages = []
194 |         for message in messages:
195 |             if message["role"] == "system":
196 |                 system_message = message["content"]
197 |             else:
198 |                 user_messages.append(message)
199 |         # 最後の1文を動作と文章のJSON形式出力指定に修正
200 |         motion_json_format = (
201 |             f"「{user_messages[-1]['content']}」に対する返答を下記のJSON形式で出力してください。"
202 |             '{"motion": 次の()内から動作を一つ選択("肯定する","否定する","おじぎ",'
203 |             '"喜ぶ","笑う","落ち込む","うんざりする","眠る"), "talk": 会話の返答'
204 |             "}"
205 |         )
206 |         user_messages[-1]["content"] = motion_json_format
207 |         with self.anthropic_client.messages.stream(
208 |             model=model,
209 |             max_tokens=1000,
210 |             temperature=temperature,
211 |             messages=user_messages,
212 |             system=system_message,
213 |         ) as result:
214 |             full_response = ""
215 |             real_time_response = ""
216 |             sentence_index = 0
217 |             get_motion = False
218 |             for text in result.text_stream:
219 |                 if text is None:
220 |                     pass
221 |                 else:
222 |                     full_response += text
223 |                     real_time_response += text
224 |                     try:
225 |                         data_json = json.loads(full_response)
226 |                         found_last_char = False
227 |                         for char in self.last_char:
228 |                             if real_time_response[-1].find(char) >= 0:
229 |                                 found_last_char = True
230 |                         if not found_last_char:
231 |                             data_json["talk"] = data_json["talk"] + "。"
232 |                     except BaseException:
233 |                         full_response_json = full_response[
234 |                             full_response.find("{") : full_response.rfind("}") + 1
235 |                         ]
236 |                         data_json = force_parse_json(full_response_json)
237 |                     if data_json is not None:
238 |                         if "talk" in data_json:
239 |                             if not get_motion and "motion" in data_json:
240 |                                 get_motion = True
241 |                                 motion = data_json["motion"]
242 |                                 if motion == "肯定する":
243 |                                     key = "agree"
244 |                                 elif motion == "否定する":
245 |                                     key = "swing"
246 |                                 elif motion == "おじぎ":
247 |                                     key = "bow"
248 |                                 elif motion == "喜ぶ":
249 |                                     key = "happy"
250 |                                 elif motion == "笑う":
251 |                                     key = "lough"
252 |                                 elif motion == "落ち込む":
253 |                                     key = "depressed"
254 |                                 elif motion == "うんざりする":
255 |                                     key = "amazed"
256 |                                 elif motion == "眠る":
257 |                                     key = "sleep"
258 |                                 elif motion == "ぼんやりする":
259 |                                     key = "lookup"
260 |                                 print("motion: " + motion)
261 |                                 motion_thread = threading.Thread(
262 |                                     target=self.send_motion, args=(key,)
263 |                                 )
264 |                                 motion_thread.start()
265 |                             real_time_response = str(data_json["talk"])
266 |                             for char in self.last_char:
267 |                                 pos = real_time_response[sentence_index:].find(char)
268 |                                 if pos >= 0:
269 |                                     sentence = real_time_response[
270 |                                         sentence_index : sentence_index + pos + 1
271 |                                     ]
272 |                                     sentence_index += pos + 1
273 |                                     if sentence != "":
274 |                                         yield sentence
275 |                                     # break
276 | 
277 |     def chat_and_motion_gemini(
278 |         self,
279 |         messages: list,
280 |         model: str = "gemini-2.0-flash",
281 |         temperature: float = 0.7,
282 |     ) -> Generator[str, None, None]:
283 |         """ChatGPTを使用して会話を行い、会話の内容に応じた動作も生成する
284 | 
285 |         Args:
286 |             messages (list): メッセージリスト
287 |             model (str): 使用するモデル名 (デフォルト: "gemini-2.0-flash")
288 |             temperature (float): ChatGPTのtemperatureパラメータ (デフォルト: 0.7)
289 |         Returns:
290 |             Generator[str, None, None]): 会話の返答を順次生成する
291 | 
292 |         """
293 |         if self.gemini_client is None:
294 |             print("Gemini API key is not set.")
295 |             return
296 |         new_messages = copy.deepcopy(messages)
297 |         new_messages[-1]["content"] = (
298 |             f"「{new_messages[-1]['content']}」に対する返答を下記のJSON形式で出力してください。"
299 |             '{"motion": 次の()内から動作を一つ選択("肯定する","否定する","おじぎ",'
300 |             '"喜ぶ","笑う","落ち込む","うんざりする","眠る"), "talk": 会話の返答'
301 |             "}"
302 |         )
303 |         (
304 |             system_instruction,
305 |             history,
306 |             cur_message,
307 |         ) = self.convert_messages_from_gpt_to_gemini(new_messages)
308 | 
309 |         chat = self.gemini_client.chats.create(
310 |             model=model,
311 |             history=history,
312 |             config=types.GenerateContentConfig(
313 |                 system_instruction=system_instruction, temperature=0.5
314 |             ),
315 |         )
316 |         responses = chat.send_message_stream(cur_message["contents"])
317 |         full_response = ""
318 |         real_time_response = ""
319 |         sentence_index = 0
320 |         get_motion = False
321 |         for response in responses:
322 |             text = response.text
323 |             if text is None:
324 |                 pass
325 |             else:
326 |                 full_response += text
327 |                 real_time_response += text
328 |                 try:
329 |                     data_json = json.loads(full_response)
330 |                     found_last_char = False
331 |                     for char in self.last_char:
332 |                         if real_time_response[-1].find(char) >= 0:
333 |                             found_last_char = True
334 |                     if not found_last_char:
335 |                         data_json["talk"] = data_json["talk"] + "。"
336 |                 except BaseException:
337 |                     full_response_json = full_response[
338 |                         full_response.find("{") : full_response.rfind("}") + 1
339 |                     ]
340 |                     data_json = force_parse_json(full_response_json)
341 |                 if data_json is not None:
342 |                     if "talk" in data_json:
343 |                         if not get_motion and "motion" in data_json:
344 |                             get_motion = True
345 |                             motion = data_json["motion"]
346 |                             if motion == "肯定する":
347 |                                 key = "agree"
348 |                             elif motion == "否定する":
349 |                                 key = "swing"
350 |                             elif motion == "おじぎ":
351 |                                 key = "bow"
352 |                             elif motion == "喜ぶ":
353 |                                 key = "happy"
354 |                             elif motion == "笑う":
355 |                                 key = "lough"
356 |                             elif motion == "落ち込む":
357 |                                 key = "depressed"
358 |                             elif motion == "うんざりする":
359 |                                 key = "amazed"
360 |                             elif motion == "眠る":
361 |                                 key = "sleep"
362 |                             elif motion == "ぼんやりする":
363 |                                 key = "lookup"
364 |                             print("motion: " + motion)
365 |                             motion_thread = threading.Thread(
366 |                                 target=self.send_motion, args=(key,)
367 |                             )
368 |                             motion_thread.start()
369 |                         real_time_response = str(data_json["talk"])
370 |                         for char in self.last_char:
371 |                             pos = real_time_response[sentence_index:].find(char)
372 |                             if pos >= 0:
373 |                                 sentence = real_time_response[
374 |                                     sentence_index : sentence_index + pos + 1
375 |                                 ]
376 |                                 sentence_index += pos + 1
377 |                                 if sentence != "":
378 |                                     yield sentence
379 | 
380 |     def chat_and_motion(
381 |         self,
382 |         messages: list,
383 |         model: str = "gpt-4o",
384 |         temperature: float = 0.7,
385 |     ) -> Generator[str, None, None]:
386 |         """指定したモデルを使用して会話を行い、会話の内容に応じた動作も生成する
387 | 
388 |         Args:
389 |             messages (list): 会話のメッセージ
390 |             model (str): 使用するモデル名 (デフォルト: "gpt-4o")
391 |             temperature (float): temperatureパラメータ (デフォルト: 0.7)
392 |         Returns:
393 |             Generator[str, None, None]): 返答を順次生成する
394 | 
395 |         """
396 |         if model in self.openai_model_name:
397 |             yield from self.chat_and_motion_gpt(
398 |                 messages=messages, model=model, temperature=temperature
399 |             )
400 |         elif model in self.anthropic_model_name:
401 |             if self.anthropic_client is None:
402 |                 print("Anthropic API key is not set.")
403 |                 return
404 |             yield from self.chat_and_motion_anthropic(
405 |                 messages=messages, model=model, temperature=temperature
406 |             )
407 |         elif model in self.gemini_model_name:
408 |             if self.gemini_client is None:
409 |                 print("Gemini API key is not set.")
410 |                 return
411 |             yield from self.chat_and_motion_gemini(
412 |                 messages=messages, model=model, temperature=temperature
413 |             )
414 |         else:
415 |             print(f"Model name {model} can't use for this function")
416 |             return
417 | 


--------------------------------------------------------------------------------
/lib/conf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from dotenv import load_dotenv
 4 | 
 5 | load_dotenv()
 6 | 
 7 | GOOGLE_SPEECH_PROJECT_ID = os.environ.get("GOOGLE_SPEECH_PROJECT_ID")
 8 | OPENAI_APIKEY = os.environ.get("OPENAI_API_KEY")
 9 | ANTHROPIC_APIKEY = os.environ.get("ANTHROPIC_API_KEY")
10 | VOICEVOX_APIKEY = os.environ.get("VOICEVOX_API_KEY")
11 | GEMINI_APIKEY = os.environ.get("GEMINI_API_KEY")
12 | 


--------------------------------------------------------------------------------
/lib/en_to_jp.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | import re
  4 | from typing import Tuple
  5 | 
  6 | import alkana
  7 | from pyjapanglish import Japanglish
  8 | 
  9 | 
 10 | class EnToJp(object):
 11 |     def __init__(self) -> None:
 12 |         self.japanglish = Japanglish()
 13 |         self.user_dict_list: Tuple[str, str] = []
 14 |         EN_TO_JP_DICT_PATH = (
 15 |             os.path.dirname(os.path.abspath(__file__))
 16 |             + "/../config/en_to_jp_fix_dict.csv"
 17 |         )
 18 |         if os.path.exists(EN_TO_JP_DICT_PATH):
 19 |             with open(EN_TO_JP_DICT_PATH, mode="r") as fix_dict_file:
 20 |                 csv_reader = csv.reader(fix_dict_file)
 21 |                 next(csv_reader)  # 1行目を無視
 22 |                 for row in csv_reader:
 23 |                     if len(row) >= 2:
 24 |                         self.japanglish.user_dict[row[0]] = row[1]
 25 |                         self.user_dict_list.append((row[0], row[1]))
 26 | 
 27 |     def replace_english_to_alkana(self, text: str) -> str:
 28 |         """テキストに含まれている英単語をalkanaでカタカナに変換して返す
 29 | 
 30 |         Args:
 31 |             text (str): 変換対象のテキスト
 32 | 
 33 |         Returns:
 34 |             str: 変換後のテキスト
 35 |         """
 36 |         output = ""
 37 |         # 先頭から順番に英単語を検索しカタカナに変換
 38 |         while word := re.search(r"[a-zA-Z]{1,}", text):
 39 |             output += text[: word.start()] + self.word_to_alkana(word.group())
 40 |             text = text[word.end() :]
 41 |         return output + text
 42 | 
 43 |     def word_to_alkana(self, word: str) -> str:
 44 |         """英単語がカタカナに変換できる場合はカタカナにして返す
 45 | 
 46 |         Args:
 47 |             word (str): 変換対象の英単語
 48 | 
 49 |         Returns:
 50 |             str: 変換後のカタカナ
 51 |         """
 52 | 
 53 |         if kana := alkana.get_kana(word.lower()):
 54 |             # ユーザー辞書に登録されている場合はユーザー辞書の値を返す
 55 |             for user_dict in self.user_dict_list:
 56 |                 if word.lower() == user_dict[0]:
 57 |                     return user_dict[1]
 58 |             return kana
 59 |         else:
 60 |             if re.fullmatch(r"(?:[A-Z][a-z]{1,}){2,}", word):
 61 |                 m = re.match(r"[A-Z][a-z]{1,}", word)
 62 |                 first = self.word_to_alkana(m.group())
 63 |                 second = self.word_to_alkana(word[m.end() :])
 64 |                 return first + second
 65 |             return word
 66 | 
 67 |     def replace_english_to_japanglish(self, text, inference: bool = False) -> str:
 68 |         """ "テキストに含まれている英単語をjapanglishでカタカナに変換して返す。3文字以上の文字数の単語が対象
 69 | 
 70 |         Args:
 71 |             text (str): 変換対象のテキスト
 72 |             inference (bool, optional): 変換できない場合に推論変換するかのフラグ。デフォルトはFalse。
 73 | 
 74 |         Returns:
 75 |             str: 変換後のテキスト
 76 |         """
 77 | 
 78 |         output = ""
 79 |         while word := re.search(r"[a-zA-Z]{3,}", text):
 80 |             output += text[: word.start()] + self.word_to_japanglish(
 81 |                 word.group(), inference
 82 |             )
 83 |             text = text[word.end() :]
 84 |         return output + text
 85 | 
 86 |     def word_to_japanglish(self, word: str, inference: bool = False) -> str:
 87 |         """英単語がカタカナに変換できる場合はjapanglishでカタカナにして返す。3文字以上の文字数の単語が対象
 88 | 
 89 |         Args:
 90 |             word (str): 変換対象の英単語
 91 |             inference (bool, optional): 変換できない場合に推論変換するかのフラグ。デフォルトはFalse。
 92 | 
 93 |         Returns:
 94 |             str: 変換後のカタカナ
 95 |         """
 96 |         if self.japanglish.convert(word.lower(), inference) is not None:
 97 |             return self.japanglish.convert(word.lower(), inference)
 98 |         else:
 99 |             if re.fullmatch(r"(?:[A-Z][a-z]{3,}){2,}", word):
100 |                 m = re.match(r"[A-Z][a-z]{3,}", word)
101 |                 first = self.word_to_japanglish(m.group())
102 |                 second = self.word_to_japanglish(word[m.end() :])
103 |                 return first + second
104 |             return word
105 | 
106 |     def text_to_kana(
107 |         self,
108 |         text: str,
109 |         alkana: bool = True,
110 |         japanglish: bool = True,
111 |         inference: bool = False,
112 |     ) -> str:
113 |         """テキストに含まれている英単語をカタカナに変換して返す
114 | 
115 |         Args:
116 |             text (str): 変換対象のテキスト
117 |             alkana (bool, optional): alkanaで変換するかのフラグ。デフォルトはTrue。
118 |             japanglish (bool, optional): japanglishで変換するかのフラグ。デフォルトはTrue。
119 |             inference (bool, optional): 変換できない場合に推論変換するかのフラグ。デフォルトはFalse。
120 | 
121 |         Returns:
122 |             str: 変換後のテキスト
123 |         """
124 |         if alkana:
125 |             text = self.replace_english_to_alkana(text)
126 |         if japanglish:
127 |             text = self.replace_english_to_japanglish(text, inference)
128 |         return text
129 | 


--------------------------------------------------------------------------------
/lib/err_handler.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import os
 3 | import sys
 4 | from typing import Generator
 5 | 
 6 | 
 7 | @contextlib.contextmanager
 8 | def ignoreStderr() -> Generator[None, None, None]:
 9 |     """標準エラー出力をエラースクリーンに一時的に無視するコンテキストマネージャ。
10 | 
11 |     Returns:
12 |         Generator[None, None, None]: コンテキストマネージャのジェネレータ。
13 |     """
14 |     devnull = os.open(os.devnull, os.O_WRONLY)
15 |     old_stderr = os.dup(2)
16 |     sys.stderr.flush()
17 |     os.dup2(devnull, 2)
18 |     os.close(devnull)
19 |     try:
20 |         yield
21 |     finally:
22 |         os.dup2(old_stderr, 2)
23 |         os.close(old_stderr)
24 | 


--------------------------------------------------------------------------------
/lib/google_speech.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import math
  4 | import struct
  5 | import sys
  6 | import time
  7 | from queue import Queue
  8 | from typing import Any, Generator, Iterable, Optional, Union
  9 | 
 10 | import numpy as np
 11 | import pyaudio
 12 | from google.cloud import speech
 13 | from six.moves import queue  # type: ignore
 14 | 
 15 | from .err_handler import ignoreStderr
 16 | 
 17 | # Audio recording parameters
 18 | RATE = 16000
 19 | CHUNK = int(RATE / 10)  # 100ms
 20 | 
 21 | 
 22 | class MicrophoneStream(object):
 23 |     """
 24 |     マイクから音声をストリーミングするためのクラス。
 25 | 
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         rate: float,
 31 |         chunk: float,
 32 |         _timeout_thresh: float = 0.5,
 33 |         _start_timeout_thresh: float = 4.0,
 34 |         _db_thresh: float = 55.0,
 35 |     ) -> None:
 36 |         """クラスの初期化メソッド。
 37 | 
 38 |         Args:
 39 |             rate (float): サンプリングレート。
 40 |             chunk (float): チャンクサイズ。
 41 |             _timeout_thresh (float): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 42 |             _start_timeout_thresh (float): マイクの入力が開始しないまま終了するまでのタイムアウト閾値（秒）。デフォルトは4.0秒。
 43 |             _db_thresh (float): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 44 | 
 45 |         """
 46 |         self._rate = rate
 47 |         self._chunk = chunk
 48 |         self._buff: Queue[Union[None, bytes]] = queue.Queue()
 49 |         self.closed = True
 50 |         self.is_start = False
 51 |         self.is_start_callback = False
 52 |         self.is_finish = False
 53 |         self.timeout_thresh = _timeout_thresh
 54 |         # マイクの入力が開始しないまま終了するまでのthreshold時間[s]
 55 |         self.start_timeout_thresh = _start_timeout_thresh
 56 |         self.db_thresh = _db_thresh
 57 |         language_code = "ja-JP"  # a BCP-47 language tag
 58 |         self.client = speech.SpeechClient()
 59 |         config = speech.RecognitionConfig(
 60 |             encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
 61 |             sample_rate_hertz=RATE,
 62 |             language_code=language_code,
 63 |         )
 64 |         self.streaming_config = speech.StreamingRecognitionConfig(
 65 |             config=config, interim_results=True
 66 |         )
 67 | 
 68 |     def __enter__(self) -> Any:
 69 |         """PyAudioストリームを開く。"""
 70 |         with ignoreStderr():
 71 |             self._audio_interface = pyaudio.PyAudio()
 72 |             self._audio_stream = self._audio_interface.open(
 73 |                 format=pyaudio.paInt16,
 74 |                 channels=1,
 75 |                 rate=self._rate,
 76 |                 input=True,
 77 |                 frames_per_buffer=self._chunk,
 78 |                 stream_callback=self._fill_buffer,
 79 |             )
 80 |             self.closed = False
 81 |             return self
 82 | 
 83 |     def __exit__(
 84 |         self,
 85 |         rate: float,
 86 |         chunk: float,
 87 |         _timeout_thresh: float = 0.5,
 88 |         _start_timeout_thresh: float = 4.0,
 89 |         _db_thresh: float = 55.0,
 90 |     ) -> None:
 91 |         """PyAudioストリームを閉じます。
 92 | 
 93 |         Args:
 94 |             rate (float): サンプリングレート。
 95 |             chunk (float): チャンクサイズ。
 96 |             _timeout_thresh (float, optional): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 97 |             _start_timeout_thresh (float): マイクの入力が開始しないまま終了するまでのタイムアウト閾値（秒）。デフォルトは4.0秒。
 98 |             _db_thresh (float, optional): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 99 | 
100 |         """
101 |         self._audio_stream.stop_stream()
102 |         self._audio_stream.close()
103 |         self.closed = True
104 |         self._buff.put(None)
105 |         self._audio_interface.terminate()
106 |         self.is_start_callback = False
107 | 
108 |     def start_callback(self) -> None:
109 |         """開始コールバックを呼び出す。"""
110 |         self.is_start_callback = True
111 | 
112 |     def _fill_buffer(
113 |         self, in_data: bytes, frame_count: int, time_info: Any, status_flags: Any
114 |     ) -> Union[None, Any]:
115 |         """マイクからの入力データをバッファーに書き込む。
116 | 
117 |         Args:
118 |             in_data (bytes): 入力データ
119 |             frame_count (int): フレーム数
120 |             time_info (Any): 時間
121 |             status_flags (Any): ステータスフラグ
122 | 
123 |         Returns:
124 |             Union[None, Any]: Noneまたは続行のためのフラグ
125 | 
126 |         """
127 |         if self.is_start_callback:
128 |             in_data2 = struct.unpack(f"{len(in_data) / 2:.0f}h", in_data)
129 |             rms = math.sqrt(np.square(in_data2).mean())
130 |             power = 20 * math.log10(rms) if rms > 0.0 else -math.inf  # RMS to db
131 |             if power > self.db_thresh:
132 |                 if not self.is_start:
133 |                     self.is_start = True
134 |                 self.start_time = time.time()
135 |             if self.is_start:
136 |                 self._buff.put(in_data)
137 |                 if time.time() - self.start_time >= self.timeout_thresh:
138 |                     self.closed = True
139 |             else:
140 |                 if time.time() - self.start_time >= self.start_timeout_thresh:
141 |                     self.closed = True
142 |         return None, pyaudio.paContinue
143 | 
144 |     def generator(self) -> Union[None, Generator[Any, None, None]]:
145 |         """bufferから音声データを生成するジェネレーター
146 | 
147 |         Yields:
148 |             Union[None, Any]: 音声データ
149 |         """
150 |         while not self.closed:
151 |             try:
152 |                 chunk = self._buff.get(block=False, timeout=0.01)
153 |                 if chunk is None:
154 |                     return
155 |                 data = [chunk]
156 |                 while True:
157 |                     try:
158 |                         chunk = self._buff.get(block=False)
159 |                         if chunk is None:
160 |                             return
161 |                         data.append(chunk)
162 |                     except queue.Empty:
163 |                         break
164 |                 yield b"".join(data)
165 |             except queue.Empty:
166 |                 time.sleep(0.01)
167 |                 continue
168 | 
169 |     def transcribe(
170 |         self,
171 |     ) -> Optional[Iterable[speech.StreamingRecognizeResponse]]:
172 |         """ストリームからの音声をGoogle Cloud Speech-to-Text APIでテキストに変換する。
173 | 
174 |         Returns:
175 |             Optional[Iterable[speech.StreamingRecognizeResponse]]: ストリーミング認識の応答
176 |         """
177 |         audio_generator = self.generator()
178 |         self.start_time = time.time()
179 |         self.start_callback()
180 |         responses = None
181 |         requests = (
182 |             speech.StreamingRecognizeRequest(audio_content=content)
183 |             for content in audio_generator
184 |         )
185 |         try:
186 |             responses = self.client.streaming_recognize(self.streaming_config, requests)
187 |         except BaseException:
188 |             pass
189 |         return responses
190 | 
191 | 
192 | def get_db_thresh() -> float:
193 |     """マイクからの周囲音量を測定。
194 | 
195 |     Returns:
196 |         float: 測定された音量[db]
197 |     """
198 |     with ignoreStderr():
199 |         p = pyaudio.PyAudio()
200 |         stream = p.open(
201 |             format=pyaudio.paInt16,
202 |             channels=1,
203 |             rate=RATE,
204 |             input=True,
205 |             frames_per_buffer=CHUNK,
206 |         )
207 |         frames = []
208 |         print("Measuring Ambient Sound Levels…")
209 |         for _ in range(int(RATE / CHUNK * 2)):
210 |             data = stream.read(CHUNK)
211 |             frames.append(data)
212 |         audio_data = np.frombuffer(b"".join(frames), dtype=np.int16)
213 |         rms2 = np.square(audio_data).mean()
214 |         if rms2 > 0.0:
215 |             rms = math.sqrt(np.square(audio_data).mean())
216 |             power = 20 * math.log10(rms) if rms > 0.0 else -math.inf  # RMS to db
217 |         else:
218 |             power = 20
219 |         print(f"Sound Levels: {power:.3f}db")
220 |         stream.stop_stream()
221 |         stream.close()
222 |         p.terminate()
223 |     return power
224 | 
225 | 
226 | def listen_print_loop(responses: object) -> str:
227 |     """Google Cloud Speech-to-Text APIの応答からテキストを取得し、リアルタイムで出力。
228 | 
229 |     Args:
230 |         responses (Any): ストリーミング認識の応答
231 | 
232 |     Returns:
233 |         str: 認識されたテキスト
234 | 
235 |     """
236 |     num_chars_printed = 0
237 |     transcript = ""
238 |     overwrite_chars = ""
239 |     for response in responses:
240 |         if response.error.code:
241 |             break
242 |         if not response.results:
243 |             continue
244 |         result = response.results[0]
245 |         if not result.alternatives:
246 |             continue
247 |         transcript = result.alternatives[0].transcript
248 |         overwrite_chars = " " * (num_chars_printed - len(transcript))
249 |         if not result.is_final:
250 |             sys.stdout.write(transcript + overwrite_chars + "\r")
251 |             sys.stdout.flush()
252 |             num_chars_printed = len(transcript)
253 |         else:
254 |             print(transcript + overwrite_chars)
255 |             break
256 |     return transcript + overwrite_chars
257 | 


--------------------------------------------------------------------------------
/lib/google_speech_grpc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import math
  4 | import os
  5 | import struct
  6 | import sys
  7 | import time
  8 | from typing import Any, Optional, Union
  9 | 
 10 | import grpc
 11 | import numpy as np
 12 | import pyaudio
 13 | 
 14 | from .google_speech import MicrophoneStream
 15 | 
 16 | sys.path.append(os.path.join(os.path.dirname(__file__), "grpc"))
 17 | import gpt_server_pb2
 18 | import gpt_server_pb2_grpc
 19 | import motion_server_pb2
 20 | import motion_server_pb2_grpc
 21 | import voice_server_pb2
 22 | import voice_server_pb2_grpc
 23 | 
 24 | 
 25 | class MicrophoneStreamGrpc(MicrophoneStream):
 26 |     """
 27 |     マイクから音声をストリーミングするためのクラス。
 28 | 
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         rate: float,
 34 |         chunk: float,
 35 |         _timeout_thresh: float = 0.5,
 36 |         _start_timeout_thresh: float = 4.0,
 37 |         _db_thresh: float = 55.0,
 38 |         gpt_host: str = "127.0.0.1",
 39 |         gpt_port: str = "10001",
 40 |         voice_host: str = "127.0.0.1",
 41 |         voice_port: str = "10002",
 42 |         motion_server_host: Optional[str] = "127.0.0.1",
 43 |         motion_server_port: Optional[str] = "50055",
 44 |     ) -> None:
 45 |         """クラスの初期化メソッド。
 46 | 
 47 |         Args:
 48 |             rate (float): サンプリングレート。
 49 |             chunk (float): チャンクサイズ。
 50 |             _timeout_thresh (float): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 51 |             _db_thresh (float): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 52 |             gpt_host (str, optional): GPTサーバーのホスト名。デフォルトは"127.0.0.1"。
 53 |             gpt_port (str, optional): GPTサーバーのポート番号。デフォルトは"10001"。
 54 |             voice_host (str, optional): VoiceVoxサーバーのホスト名。デフォルトは"127.0.0.1"。
 55 |             voice_port (str, optional): VoiceVoxサーバーのポート番号。デフォルトは"10002"。
 56 |             motion_server_host (str, optional): モーションサーバーのIPアドレス。デフォルトは"127.0.0.1"。
 57 |             motion_server_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 58 |         """
 59 |         super().__init__(
 60 |             rate=rate,
 61 |             chunk=chunk,
 62 |             _timeout_thresh=_timeout_thresh,
 63 |             _start_timeout_thresh=_start_timeout_thresh,
 64 |             _db_thresh=_db_thresh,
 65 |         )
 66 |         gpt_channel = grpc.insecure_channel(gpt_host + ":" + gpt_port)
 67 |         self.gpt_stub = gpt_server_pb2_grpc.GptServerServiceStub(gpt_channel)
 68 |         voice_channel = grpc.insecure_channel(voice_host + ":" + voice_port)
 69 |         self.voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
 70 |         self.motion_stub = None
 71 |         if motion_server_host is not None and motion_server_port is not None:
 72 |             motion_channel = grpc.insecure_channel(
 73 |                 motion_server_host + ":" + motion_server_port
 74 |             )
 75 |             self.motion_stub = motion_server_pb2_grpc.MotionServerServiceStub(
 76 |                 motion_channel
 77 |             )
 78 | 
 79 |     def __exit__(
 80 |         self,
 81 |         rate: float,
 82 |         chunk: float,
 83 |         _timeout_thresh: float = 0.5,
 84 |         _start_timeout_thresh: float = 4.0,
 85 |         _db_thresh: float = 55.0,
 86 |     ) -> None:
 87 |         """PyAudioストリームを閉じます。
 88 | 
 89 |         Args:
 90 |             rate (float): サンプリングレート。
 91 |             chunk (float): チャンクサイズ。
 92 |             _timeout_thresh (float, optional): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 93 |             _start_timeout_thresh (float): マイクの入力が開始しないまま終了するまでのタイムアウト閾値（秒）。デフォルトは4.0秒。
 94 |             _db_thresh (float, optional): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 95 | 
 96 |         """
 97 |         super().__exit__(
 98 |             rate, chunk, _timeout_thresh, _start_timeout_thresh, _db_thresh
 99 |         )
100 |         try:
101 |             self.gpt_stub.SendMotion(gpt_server_pb2.SendMotionRequest())
102 |         except BaseException:
103 |             print("Send motion error")
104 |             pass
105 | 
106 |     def _fill_buffer(
107 |         self, in_data: bytes, frame_count: int, time_info: Any, status_flags: Any
108 |     ) -> Union[None, Any]:
109 |         """マイクからの入力データをバッファーに書き込む。
110 | 
111 |         Args:
112 |             in_data (bytes): 入力データ
113 |             frame_count (int): フレーム数
114 |             time_info (Any): 時間
115 |             status_flags (Any): ステータスフラグ
116 | 
117 |         Returns:
118 |             Union[None, Any]: Noneまたは続行のためのフラグ
119 | 
120 |         """
121 |         if self.is_start_callback:
122 |             in_data2 = struct.unpack(f"{len(in_data) / 2:.0f}h", in_data)
123 |             rms = math.sqrt(np.square(in_data2).mean())
124 |             power = 20 * math.log10(rms) if rms > 0.0 else -math.inf  # RMS to db
125 |             if power > self.db_thresh:
126 |                 if not self.is_start:
127 |                     self.is_start = True
128 |                     if self.motion_stub is not None:
129 |                         try:
130 |                             self.motion_stub.SetMotion(
131 |                                 motion_server_pb2.SetMotionRequest(
132 |                                     name="nod", priority=3, repeat=True
133 |                                 )
134 |                             )
135 |                         except BaseException:
136 |                             pass
137 |                 self.start_time = time.time()
138 |             if self.is_start:
139 |                 self._buff.put(in_data)
140 |                 if time.time() - self.start_time >= self.timeout_thresh:
141 |                     self.is_start = False
142 |                     self.closed = True
143 |                     try:
144 |                         self.voice_stub.EnableVoicePlay(
145 |                             voice_server_pb2.EnableVoicePlayRequest()
146 |                         )
147 |                     except BaseException:
148 |                         print("EnableVoicePlay error")
149 |                         pass
150 |                     return None, pyaudio.paComplete
151 |         return None, pyaudio.paContinue
152 | 
153 | 
154 | class GoogleSpeechGrpc(object):
155 |     """
156 |     Google Speech-to-Text APIのレスポンスを処理するクラス。
157 | 
158 |     """
159 | 
160 |     def __init__(
161 |         self,
162 |         gpt_host: str = "127.0.0.1",
163 |         gpt_port: str = "10001",
164 |         voice_host: str = "127.0.0.1",
165 |         voice_port: str = "10002",
166 |     ) -> None:
167 |         """GoogleSpeechGrpcオブジェクトを初期化する。
168 | 
169 |         Args:
170 |             gpt_host (str, optional): GPTサーバーのホスト名。デフォルトは"127.0.0.1"。
171 |             gpt_port (str, optional): GPTサーバーのポート番号。デフォルトは"10001"。
172 |             voice_host (str, optional): VoiceVoxサーバーのホスト名。デフォルトは"127.0.0.1"。
173 |             voice_port (str, optional): VoiceVoxサーバーのポート番号。デフォルトは"10002"。
174 |         """
175 | 
176 |         gpt_channel = grpc.insecure_channel(gpt_host + ":" + gpt_port)
177 |         self.gpt_stub = gpt_server_pb2_grpc.GptServerServiceStub(gpt_channel)
178 |         voice_channel = grpc.insecure_channel(voice_host + ":" + voice_port)
179 |         self.voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
180 | 
181 |     def listen_publisher_grpc(
182 |         self, responses: Any, progress_report_len: int = 0
183 |     ) -> str:
184 |         """
185 |         Google Cloud Speech-to-Text APIの応答からテキストを取得し、リアルタイムで出力。
186 | 
187 |         Args:
188 |             responses (Any): ストリーミング認識の応答
189 |             progress_report_len (int, optional): ここで指定した文字数以上になると、その時点で一度GPTに結果を送信する。0の場合は途中での送信は無効となる。デフォルトは0。
190 | 
191 |         Returns:
192 |             str: 認識されたテキスト
193 |         """
194 |         is_progress_report = False
195 |         num_chars_printed = 0
196 |         transcript = ""
197 |         overwrite_chars = ""
198 |         try:
199 |             self.voice_stub.DisableVoicePlay(voice_server_pb2.DisableVoicePlayRequest())
200 |         except BaseException:
201 |             print("Disable voice play error")
202 |             pass
203 |         try:
204 |             self.voice_stub.InterruptVoice(voice_server_pb2.InterruptVoiceRequest())
205 |         except BaseException:
206 |             print("InterruptVoice error")
207 |             pass
208 |         for response in responses:
209 |             if response.error.code:
210 |                 break
211 |             if not response.results:
212 |                 continue
213 |             result = response.results[0]
214 |             if not result.alternatives:
215 |                 continue
216 |             transcript = result.alternatives[0].transcript
217 |             overwrite_chars = " " * (num_chars_printed - len(transcript))
218 |             if not result.is_final:
219 |                 sys.stdout.write(transcript + overwrite_chars + "\r")
220 |                 sys.stdout.flush()
221 |                 num_chars_printed = len(transcript)
222 |                 if not is_progress_report and num_chars_printed > progress_report_len:
223 |                     if progress_report_len > 0:
224 |                         try:
225 |                             self.gpt_stub.SetGpt(
226 |                                 gpt_server_pb2.SetGptRequest(
227 |                                     text=transcript + overwrite_chars, is_finish=False
228 |                                 )
229 |                             )
230 |                         except BaseException as e:
231 |                             print("SetGpt error:", e)
232 |                             pass
233 |                         is_progress_report = True
234 |             else:
235 |                 if progress_report_len > 0:
236 |                     if (
237 |                         not is_progress_report
238 |                         and num_chars_printed > progress_report_len
239 |                     ):
240 |                         try:
241 |                             self.gpt_stub.SetGpt(
242 |                                 gpt_server_pb2.SetGptRequest(
243 |                                     text=transcript + overwrite_chars, is_finish=False
244 |                                 )
245 |                             )
246 |                         except BaseException as e:
247 |                             print("SetGpt error:", e)
248 |                             pass
249 |                 break
250 |         try:
251 |             self.gpt_stub.SetGpt(
252 |                 gpt_server_pb2.SetGptRequest(
253 |                     text=transcript + overwrite_chars, is_finish=True
254 |                 )
255 |             )
256 |         except BaseException as e:
257 |             print("SetGpt error:", e)
258 |             pass
259 |         return transcript + overwrite_chars
260 | 


--------------------------------------------------------------------------------
/lib/google_speech_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import math
  4 | import sys
  5 | import time
  6 | from queue import Queue
  7 | from typing import Iterable, Optional, Union
  8 | 
  9 | import numpy as np
 10 | import pyaudio
 11 | 
 12 | # from google.cloud import speech
 13 | from google.cloud.speech_v2 import SpeechClient
 14 | from google.cloud.speech_v2.types import cloud_speech as cloud_speech_types
 15 | from six.moves import queue  # type: ignore
 16 | 
 17 | from .conf import GOOGLE_SPEECH_PROJECT_ID
 18 | from .err_handler import ignoreStderr
 19 | from .google_speech import MicrophoneStream
 20 | 
 21 | # Audio recording parameters
 22 | RATE = 16000
 23 | CHUNK = int(RATE / 10)  # 100ms
 24 | 
 25 | 
 26 | class MicrophoneStreamV2(MicrophoneStream):
 27 |     """
 28 |     マイクから音声をストリーミングするためのクラス。google STT v2用。
 29 | 
 30 |     """
 31 | 
 32 |     def __init__(
 33 |         self,
 34 |         rate: float,
 35 |         chunk: float,
 36 |         _timeout_thresh: float = 0.5,
 37 |         _start_timeout_thresh: float = 4.0,
 38 |         _db_thresh: float = 55.0,
 39 |     ) -> None:
 40 |         """クラスの初期化メソッド。
 41 | 
 42 |         Args:
 43 |             rate (float): サンプリングレート。
 44 |             chunk (float): チャンクサイズ。
 45 |             _timeout_thresh (float): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 46 |             _start_timeout_thresh (float): マイクの入力が開始しないまま終了するまでのタイムアウト閾値（秒）。デフォルトは4.0秒。
 47 |             _db_thresh (float): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 48 | 
 49 |         """
 50 |         self._rate = rate
 51 |         self._chunk = chunk
 52 |         self._buff: Queue[Union[None, bytes]] = queue.Queue()
 53 |         self.closed = True
 54 |         self.is_start = False
 55 |         self.is_start_callback = False
 56 |         self.is_finish = False
 57 |         self.timeout_thresh = _timeout_thresh
 58 |         # マイクの入力が開始しないまま終了するまでのthreshold時間[s]
 59 |         self.start_timeout_thresh = _start_timeout_thresh
 60 |         self.db_thresh = _db_thresh
 61 |         language_codes = ["ja-JP"]  # a BCP-47 language tag
 62 |         self.client = SpeechClient()
 63 |         recognition_config = cloud_speech_types.RecognitionConfig(
 64 |             explicit_decoding_config=cloud_speech_types.ExplicitDecodingConfig(
 65 |                 sample_rate_hertz=RATE,
 66 |                 encoding=cloud_speech_types.ExplicitDecodingConfig.AudioEncoding.LINEAR16,
 67 |                 audio_channel_count=1,
 68 |             ),
 69 |             language_codes=language_codes,
 70 |             model="long",
 71 |         )
 72 |         streaming_config = cloud_speech_types.StreamingRecognitionConfig(
 73 |             config=recognition_config,
 74 |             streaming_features=cloud_speech_types.StreamingRecognitionFeatures(
 75 |                 interim_results=True
 76 |             ),
 77 |         )
 78 |         if GOOGLE_SPEECH_PROJECT_ID == "":
 79 |             raise ValueError("GOOGLE_SPEECH_PROJECT_ID is not set.")
 80 |         self.config_request = cloud_speech_types.StreamingRecognizeRequest(
 81 |             recognizer=f"projects/{GOOGLE_SPEECH_PROJECT_ID}/locations/global/recognizers/_",
 82 |             streaming_config=streaming_config,
 83 |         )
 84 | 
 85 |     def requests(
 86 |         self, config: cloud_speech_types.RecognitionConfig, audio: list
 87 |     ) -> list:
 88 |         yield config
 89 |         for chunk in audio:
 90 |             yield cloud_speech_types.StreamingRecognizeRequest(audio=chunk)
 91 | 
 92 |     def transcribe(
 93 |         self,
 94 |     ) -> Optional[Iterable[cloud_speech_types.StreamingRecognizeResponse]]:
 95 |         """ストリームからの音声をGoogle Cloud Speech-to-Text APIでテキストに変換する。
 96 | 
 97 |         Returns:
 98 |             Optional[Iterable[speech.StreamingRecognizeResponse]]: ストリーミング認識の応答
 99 |         """
100 |         audio_generator = self.generator()
101 |         self.start_time = time.time()
102 |         self.start_callback()
103 |         responses = None
104 |         try:
105 |             responses = self.client.streaming_recognize(
106 |                 requests=self.requests(self.config_request, audio_generator)
107 |             )
108 |         except BaseException:
109 |             pass
110 |         return responses
111 | 
112 | 
113 | def get_db_thresh() -> float:
114 |     """マイクからの周囲音量を測定。
115 | 
116 |     Returns:
117 |         float: 測定された音量[db]
118 |     """
119 |     with ignoreStderr():
120 |         p = pyaudio.PyAudio()
121 |         stream = p.open(
122 |             format=pyaudio.paInt16,
123 |             channels=1,
124 |             rate=RATE,
125 |             input=True,
126 |             frames_per_buffer=CHUNK,
127 |         )
128 |         frames = []
129 |         print("Measuring Ambient Sound Levels…")
130 |         for _ in range(int(RATE / CHUNK * 2)):
131 |             data = stream.read(CHUNK)
132 |             frames.append(data)
133 |         audio_data = np.frombuffer(b"".join(frames), dtype=np.int16)
134 |         rms2 = np.square(audio_data).mean()
135 |         if rms2 > 0.0:
136 |             rms = math.sqrt(np.square(audio_data).mean())
137 |             power = 20 * math.log10(rms) if rms > 0.0 else -math.inf  # RMS to db
138 |         else:
139 |             power = 20
140 |         print(f"Sound Levels: {power:.3f}db")
141 |         stream.stop_stream()
142 |         stream.close()
143 |         p.terminate()
144 |     return power
145 | 
146 | 
147 | def listen_print_loop(responses: object) -> str:
148 |     """Google Cloud Speech-to-Text APIの応答からテキストを取得し、リアルタイムで出力。
149 | 
150 |     Args:
151 |         responses (Any): ストリーミング認識の応答
152 | 
153 |     Returns:
154 |         str: 認識されたテキスト
155 | 
156 |     """
157 |     num_chars_printed = 0
158 |     transcript = ""
159 |     overwrite_chars = ""
160 |     for response in responses:
161 |         # if response.error.code:
162 |         #    break
163 |         if not response.results:
164 |             continue
165 |         result = response.results[0]
166 |         if not result.alternatives:
167 |             continue
168 |         transcript = result.alternatives[0].transcript
169 |         overwrite_chars = " " * (num_chars_printed - len(transcript))
170 |         if not result.is_final:
171 |             sys.stdout.write(transcript + overwrite_chars + "\r")
172 |             sys.stdout.flush()
173 |             num_chars_printed = len(transcript)
174 |         else:
175 |             print(transcript + overwrite_chars)
176 |             break
177 |     return transcript + overwrite_chars
178 | 


--------------------------------------------------------------------------------
/lib/google_speech_v2_grpc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import math
  4 | import os
  5 | import struct
  6 | import sys
  7 | import time
  8 | from typing import Any, Optional, Union
  9 | 
 10 | import grpc
 11 | import numpy as np
 12 | import pyaudio
 13 | 
 14 | from .google_speech_v2 import MicrophoneStreamV2
 15 | 
 16 | sys.path.append(os.path.join(os.path.dirname(__file__), "grpc"))
 17 | import gpt_server_pb2
 18 | import gpt_server_pb2_grpc
 19 | import motion_server_pb2
 20 | import motion_server_pb2_grpc
 21 | import voice_server_pb2
 22 | import voice_server_pb2_grpc
 23 | 
 24 | 
 25 | class MicrophoneStreamV2Grpc(MicrophoneStreamV2):
 26 |     """
 27 |     マイクから音声をストリーミングするためのクラス。
 28 | 
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         rate: float,
 34 |         chunk: float,
 35 |         _timeout_thresh: float = 0.5,
 36 |         _start_timeout_thresh: float = 4.0,
 37 |         _db_thresh: float = 55.0,
 38 |         gpt_host: str = "127.0.0.1",
 39 |         gpt_port: str = "10001",
 40 |         voice_host: str = "127.0.0.1",
 41 |         voice_port: str = "10002",
 42 |         motion_server_host: Optional[str] = "127.0.0.1",
 43 |         motion_server_port: Optional[str] = "50055",
 44 |     ) -> None:
 45 |         """クラスの初期化メソッド。
 46 | 
 47 |         Args:
 48 |             rate (float): サンプリングレート。
 49 |             chunk (float): チャンクサイズ。
 50 |             _timeout_thresh (float): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 51 |             _db_thresh (float): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 52 |             gpt_host (str, optional): GPTサーバーのホスト名。デフォルトは"127.0.0.1"。
 53 |             gpt_port (str, optional): GPTサーバーのポート番号。デフォルトは"10001"。
 54 |             voice_host (str, optional): VoiceVoxサーバーのホスト名。デフォルトは"127.0.0.1"。
 55 |             voice_port (str, optional): VoiceVoxサーバーのポート番号。デフォルトは"10002"。
 56 |             motion_server_host (str, optional): モーションサーバーのIPアドレス。デフォルトは"127.0.0.1"。
 57 |             motion_server_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 58 |         """
 59 |         super().__init__(
 60 |             rate=rate,
 61 |             chunk=chunk,
 62 |             _timeout_thresh=_timeout_thresh,
 63 |             _start_timeout_thresh=_start_timeout_thresh,
 64 |             _db_thresh=_db_thresh,
 65 |         )
 66 |         gpt_channel = grpc.insecure_channel(gpt_host + ":" + gpt_port)
 67 |         self.gpt_stub = gpt_server_pb2_grpc.GptServerServiceStub(gpt_channel)
 68 |         voice_channel = grpc.insecure_channel(voice_host + ":" + voice_port)
 69 |         self.voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
 70 |         self.motion_stub = None
 71 |         if motion_server_host is not None and motion_server_port is not None:
 72 |             motion_channel = grpc.insecure_channel(
 73 |                 motion_server_host + ":" + motion_server_port
 74 |             )
 75 |             self.motion_stub = motion_server_pb2_grpc.MotionServerServiceStub(
 76 |                 motion_channel
 77 |             )
 78 | 
 79 |     def __exit__(
 80 |         self,
 81 |         rate: float,
 82 |         chunk: float,
 83 |         _timeout_thresh: float = 0.5,
 84 |         _start_timeout_thresh: float = 4.0,
 85 |         _db_thresh: float = 55.0,
 86 |     ) -> None:
 87 |         """PyAudioストリームを閉じます。
 88 | 
 89 |         Args:
 90 |             rate (float): サンプリングレート。
 91 |             chunk (float): チャンクサイズ。
 92 |             _timeout_thresh (float, optional): 音声が停止したと判断するタイムアウト閾値（秒）。デフォルトは0.5秒。
 93 |             _start_timeout_thresh (float): マイクの入力が開始しないまま終了するまでのタイムアウト閾値（秒）。デフォルトは4.0秒。
 94 |             _db_thresh (float, optional): 音声が開始されたと判断する音量閾値（デシベル）。デフォルトは55.0デシベル。
 95 | 
 96 |         """
 97 |         super().__exit__(
 98 |             rate, chunk, _timeout_thresh, _start_timeout_thresh, _db_thresh
 99 |         )
100 |         try:
101 |             self.gpt_stub.SendMotion(gpt_server_pb2.SendMotionRequest())
102 |         except BaseException:
103 |             print("Send motion error")
104 |             pass
105 | 
106 |     def _fill_buffer(
107 |         self, in_data: bytes, frame_count: int, time_info: Any, status_flags: Any
108 |     ) -> Union[None, Any]:
109 |         """マイクからの入力データをバッファーに書き込む。
110 | 
111 |         Args:
112 |             in_data (bytes): 入力データ
113 |             frame_count (int): フレーム数
114 |             time_info (Any): 時間
115 |             status_flags (Any): ステータスフラグ
116 | 
117 |         Returns:
118 |             Union[None, Any]: Noneまたは続行のためのフラグ
119 | 
120 |         """
121 |         if self.is_start_callback:
122 |             in_data2 = struct.unpack(f"{len(in_data) / 2:.0f}h", in_data)
123 |             rms = math.sqrt(np.square(in_data2).mean())
124 |             power = 20 * math.log10(rms) if rms > 0.0 else -math.inf  # RMS to db
125 |             if power > self.db_thresh:
126 |                 if not self.is_start:
127 |                     self.is_start = True
128 |                     if self.motion_stub is not None:
129 |                         try:
130 |                             self.motion_stub.SetMotion(
131 |                                 motion_server_pb2.SetMotionRequest(
132 |                                     name="nod", priority=3, repeat=True
133 |                                 )
134 |                             )
135 |                         except BaseException:
136 |                             pass
137 |                 self.start_time = time.time()
138 |             if self.is_start:
139 |                 self._buff.put(in_data)
140 |                 if time.time() - self.start_time >= self.timeout_thresh:
141 |                     self.is_start = False
142 |                     self.closed = True
143 |                     try:
144 |                         self.voice_stub.EnableVoicePlay(
145 |                             voice_server_pb2.EnableVoicePlayRequest()
146 |                         )
147 |                     except BaseException:
148 |                         print("EnableVoicePlay error")
149 |                         pass
150 |                     return None, pyaudio.paComplete
151 |         return None, pyaudio.paContinue
152 | 
153 | 
154 | class GoogleSpeechV2Grpc(object):
155 |     """
156 |     Google Speech-to-Text APIのレスポンスを処理するクラス。
157 | 
158 |     """
159 | 
160 |     def __init__(
161 |         self,
162 |         gpt_host: str = "127.0.0.1",
163 |         gpt_port: str = "10001",
164 |         voice_host: str = "127.0.0.1",
165 |         voice_port: str = "10002",
166 |     ) -> None:
167 |         """GoogleSpeechGrpcオブジェクトを初期化する。
168 | 
169 |         Args:
170 |             gpt_host (str, optional): GPTサーバーのホスト名。デフォルトは"127.0.0.1"。
171 |             gpt_port (str, optional): GPTサーバーのポート番号。デフォルトは"10001"。
172 |             voice_host (str, optional): VoiceVoxサーバーのホスト名。デフォルトは"127.0.0.1"。
173 |             voice_port (str, optional): VoiceVoxサーバーのポート番号。デフォルトは"10002"。
174 |         """
175 | 
176 |         gpt_channel = grpc.insecure_channel(gpt_host + ":" + gpt_port)
177 |         self.gpt_stub = gpt_server_pb2_grpc.GptServerServiceStub(gpt_channel)
178 |         voice_channel = grpc.insecure_channel(voice_host + ":" + voice_port)
179 |         self.voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
180 | 
181 |     def listen_publisher_grpc(
182 |         self, responses: Any, progress_report_len: int = 0
183 |     ) -> str:
184 |         """
185 |         Google Cloud Speech-to-Text APIの応答からテキストを取得し、リアルタイムで出力。
186 | 
187 |         Args:
188 |             responses (Any): ストリーミング認識の応答
189 |             progress_report_len (int, optional): ここで指定した文字数以上になると、その時点で一度GPTに結果を送信する。0の場合は途中での送信は無効となる。デフォルトは0。
190 | 
191 |         Returns:
192 |             str: 認識されたテキスト
193 |         """
194 |         is_progress_report = False
195 |         num_chars_printed = 0
196 |         transcript = ""
197 |         overwrite_chars = ""
198 |         try:
199 |             self.voice_stub.DisableVoicePlay(voice_server_pb2.DisableVoicePlayRequest())
200 |         except BaseException:
201 |             print("DisableVoicePlay error")
202 |             pass
203 |         try:
204 |             self.voice_stub.InterruptVoice(voice_server_pb2.InterruptVoiceRequest())
205 |         except BaseException:
206 |             print("InterruptVoice error")
207 |             pass
208 |         for response in responses:
209 |             # if response.error.code:
210 |             #    break
211 |             if not response.results:
212 |                 continue
213 |             result = response.results[0]
214 |             if not result.alternatives:
215 |                 continue
216 |             transcript = result.alternatives[0].transcript
217 |             overwrite_chars = " " * (num_chars_printed - len(transcript))
218 |             if not result.is_final:
219 |                 sys.stdout.write(transcript + overwrite_chars + "\r")
220 |                 sys.stdout.flush()
221 |                 num_chars_printed = len(transcript)
222 |                 if not is_progress_report and num_chars_printed > progress_report_len:
223 |                     if progress_report_len > 0:
224 |                         try:
225 |                             self.gpt_stub.SetGpt(
226 |                                 gpt_server_pb2.SetGptRequest(
227 |                                     text=transcript + overwrite_chars, is_finish=False
228 |                                 )
229 |                             )
230 |                         except BaseException as e:
231 |                             print("SetGpt error:", e)
232 |                             pass
233 |                         is_progress_report = True
234 |             else:
235 |                 if progress_report_len > 0:
236 |                     if (
237 |                         not is_progress_report
238 |                         and num_chars_printed > progress_report_len
239 |                     ):
240 |                         try:
241 |                             self.gpt_stub.SetGpt(
242 |                                 gpt_server_pb2.SetGptRequest(
243 |                                     text=transcript + overwrite_chars, is_finish=False
244 |                                 )
245 |                             )
246 |                         except BaseException as e:
247 |                             print("SetGpt error:", e)
248 |                             pass
249 |                 break
250 |         try:
251 |             self.gpt_stub.SetGpt(
252 |                 gpt_server_pb2.SetGptRequest(
253 |                     text=transcript + overwrite_chars, is_finish=True
254 |                 )
255 |             )
256 |         except BaseException as e:
257 |             print("SetGpt error:", e)
258 |             pass
259 |         return transcript + overwrite_chars
260 | 


--------------------------------------------------------------------------------
/lib/grpc/gpt_server_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: gpt_server.proto
 4 | # Protobuf Python Version: 4.25.0
 5 | """Generated protocol buffer code."""
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | from google.protobuf.internal import builder as _builder
10 | # @@protoc_insertion_point(imports)
11 | 
12 | _sym_db = _symbol_database.Default()
13 | 
14 | 
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gpt_server.proto\x12\ngpt_server\"C\n\rSetGptRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x16\n\tis_finish\x18\x02 \x01(\x08H\x00\x88\x01\x01\x42\x0c\n\n_is_finish\"\x1e\n\x0bSetGptReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x15\n\x13InterruptGptRequest\"$\n\x11InterruptGptReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x13\n\x11SendMotionRequest\"\"\n\x0fSendMotionReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\x32\xea\x01\n\x10GptServerService\x12<\n\x06SetGpt\x12\x19.gpt_server.SetGptRequest\x1a\x17.gpt_server.SetGptReply\x12N\n\x0cInterruptGpt\x12\x1f.gpt_server.InterruptGptRequest\x1a\x1d.gpt_server.InterruptGptReply\x12H\n\nSendMotion\x12\x1d.gpt_server.SendMotionRequest\x1a\x1b.gpt_server.SendMotionReplyb\x06proto3')
18 | 
19 | _globals = globals()
20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'gpt_server_pb2', _globals)
22 | if _descriptor._USE_C_DESCRIPTORS == False:
23 |   DESCRIPTOR._options = None
24 |   _globals['_SETGPTREQUEST']._serialized_start=32
25 |   _globals['_SETGPTREQUEST']._serialized_end=99
26 |   _globals['_SETGPTREPLY']._serialized_start=101
27 |   _globals['_SETGPTREPLY']._serialized_end=131
28 |   _globals['_INTERRUPTGPTREQUEST']._serialized_start=133
29 |   _globals['_INTERRUPTGPTREQUEST']._serialized_end=154
30 |   _globals['_INTERRUPTGPTREPLY']._serialized_start=156
31 |   _globals['_INTERRUPTGPTREPLY']._serialized_end=192
32 |   _globals['_SENDMOTIONREQUEST']._serialized_start=194
33 |   _globals['_SENDMOTIONREQUEST']._serialized_end=213
34 |   _globals['_SENDMOTIONREPLY']._serialized_start=215
35 |   _globals['_SENDMOTIONREPLY']._serialized_end=249
36 |   _globals['_GPTSERVERSERVICE']._serialized_start=252
37 |   _globals['_GPTSERVERSERVICE']._serialized_end=486
38 | # @@protoc_insertion_point(module_scope)
39 | 


--------------------------------------------------------------------------------
/lib/grpc/gpt_server_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | 
  5 | import gpt_server_pb2 as gpt__server__pb2
  6 | 
  7 | 
  8 | class GptServerServiceStub(object):
  9 |     """Missing associated documentation comment in .proto file."""
 10 | 
 11 |     def __init__(self, channel):
 12 |         """Constructor.
 13 | 
 14 |         Args:
 15 |             channel: A grpc.Channel.
 16 |         """
 17 |         self.SetGpt = channel.unary_unary(
 18 |                 '/gpt_server.GptServerService/SetGpt',
 19 |                 request_serializer=gpt__server__pb2.SetGptRequest.SerializeToString,
 20 |                 response_deserializer=gpt__server__pb2.SetGptReply.FromString,
 21 |                 )
 22 |         self.InterruptGpt = channel.unary_unary(
 23 |                 '/gpt_server.GptServerService/InterruptGpt',
 24 |                 request_serializer=gpt__server__pb2.InterruptGptRequest.SerializeToString,
 25 |                 response_deserializer=gpt__server__pb2.InterruptGptReply.FromString,
 26 |                 )
 27 |         self.SendMotion = channel.unary_unary(
 28 |                 '/gpt_server.GptServerService/SendMotion',
 29 |                 request_serializer=gpt__server__pb2.SendMotionRequest.SerializeToString,
 30 |                 response_deserializer=gpt__server__pb2.SendMotionReply.FromString,
 31 |                 )
 32 | 
 33 | 
 34 | class GptServerServiceServicer(object):
 35 |     """Missing associated documentation comment in .proto file."""
 36 | 
 37 |     def SetGpt(self, request, context):
 38 |         """Missing associated documentation comment in .proto file."""
 39 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 40 |         context.set_details('Method not implemented!')
 41 |         raise NotImplementedError('Method not implemented!')
 42 | 
 43 |     def InterruptGpt(self, request, context):
 44 |         """Missing associated documentation comment in .proto file."""
 45 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 46 |         context.set_details('Method not implemented!')
 47 |         raise NotImplementedError('Method not implemented!')
 48 | 
 49 |     def SendMotion(self, request, context):
 50 |         """Missing associated documentation comment in .proto file."""
 51 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 52 |         context.set_details('Method not implemented!')
 53 |         raise NotImplementedError('Method not implemented!')
 54 | 
 55 | 
 56 | def add_GptServerServiceServicer_to_server(servicer, server):
 57 |     rpc_method_handlers = {
 58 |             'SetGpt': grpc.unary_unary_rpc_method_handler(
 59 |                     servicer.SetGpt,
 60 |                     request_deserializer=gpt__server__pb2.SetGptRequest.FromString,
 61 |                     response_serializer=gpt__server__pb2.SetGptReply.SerializeToString,
 62 |             ),
 63 |             'InterruptGpt': grpc.unary_unary_rpc_method_handler(
 64 |                     servicer.InterruptGpt,
 65 |                     request_deserializer=gpt__server__pb2.InterruptGptRequest.FromString,
 66 |                     response_serializer=gpt__server__pb2.InterruptGptReply.SerializeToString,
 67 |             ),
 68 |             'SendMotion': grpc.unary_unary_rpc_method_handler(
 69 |                     servicer.SendMotion,
 70 |                     request_deserializer=gpt__server__pb2.SendMotionRequest.FromString,
 71 |                     response_serializer=gpt__server__pb2.SendMotionReply.SerializeToString,
 72 |             ),
 73 |     }
 74 |     generic_handler = grpc.method_handlers_generic_handler(
 75 |             'gpt_server.GptServerService', rpc_method_handlers)
 76 |     server.add_generic_rpc_handlers((generic_handler,))
 77 | 
 78 | 
 79 |  # This class is part of an EXPERIMENTAL API.
 80 | class GptServerService(object):
 81 |     """Missing associated documentation comment in .proto file."""
 82 | 
 83 |     @staticmethod
 84 |     def SetGpt(request,
 85 |             target,
 86 |             options=(),
 87 |             channel_credentials=None,
 88 |             call_credentials=None,
 89 |             insecure=False,
 90 |             compression=None,
 91 |             wait_for_ready=None,
 92 |             timeout=None,
 93 |             metadata=None):
 94 |         return grpc.experimental.unary_unary(request, target, '/gpt_server.GptServerService/SetGpt',
 95 |             gpt__server__pb2.SetGptRequest.SerializeToString,
 96 |             gpt__server__pb2.SetGptReply.FromString,
 97 |             options, channel_credentials,
 98 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 99 | 
100 |     @staticmethod
101 |     def InterruptGpt(request,
102 |             target,
103 |             options=(),
104 |             channel_credentials=None,
105 |             call_credentials=None,
106 |             insecure=False,
107 |             compression=None,
108 |             wait_for_ready=None,
109 |             timeout=None,
110 |             metadata=None):
111 |         return grpc.experimental.unary_unary(request, target, '/gpt_server.GptServerService/InterruptGpt',
112 |             gpt__server__pb2.InterruptGptRequest.SerializeToString,
113 |             gpt__server__pb2.InterruptGptReply.FromString,
114 |             options, channel_credentials,
115 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
116 | 
117 |     @staticmethod
118 |     def SendMotion(request,
119 |             target,
120 |             options=(),
121 |             channel_credentials=None,
122 |             call_credentials=None,
123 |             insecure=False,
124 |             compression=None,
125 |             wait_for_ready=None,
126 |             timeout=None,
127 |             metadata=None):
128 |         return grpc.experimental.unary_unary(request, target, '/gpt_server.GptServerService/SendMotion',
129 |             gpt__server__pb2.SendMotionRequest.SerializeToString,
130 |             gpt__server__pb2.SendMotionReply.FromString,
131 |             options, channel_credentials,
132 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
133 | 


--------------------------------------------------------------------------------
/lib/grpc/motion_server_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: motion_server.proto
 4 | # Protobuf Python Version: 4.25.0
 5 | """Generated protocol buffer code."""
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | from google.protobuf.internal import builder as _builder
10 | # @@protoc_insertion_point(imports)
11 | 
12 | _sym_db = _symbol_database.Default()
13 | 
14 | 
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13motion_server.proto\x12\rmotion_server\"\x82\x01\n\x10SetMotionRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x15\n\x08priority\x18\x02 \x01(\x05H\x00\x88\x01\x01\x12\x13\n\x06repeat\x18\x03 \x01(\x08H\x01\x88\x01\x01\x12\x12\n\x05\x63lear\x18\x04 \x01(\x08H\x02\x88\x01\x01\x42\x0b\n\t_priorityB\t\n\x07_repeatB\x08\n\x06_clear\"!\n\x0eSetMotionReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"8\n\x12\x43learMotionRequest\x12\x15\n\x08priority\x18\x01 \x01(\x05H\x00\x88\x01\x01\x42\x0b\n\t_priority\"#\n\x10\x43learMotionReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"7\n\x11StopRepeatRequest\x12\x15\n\x08priority\x18\x01 \x01(\x05H\x00\x88\x01\x01\x42\x0b\n\t_priority\"\"\n\x0fStopRepeatReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"`\n\x0eSetWaitRequest\x12\x0c\n\x04time\x18\x01 \x01(\x02\x12\x15\n\x08priority\x18\x02 \x01(\x05H\x00\x88\x01\x01\x12\x12\n\x05\x63lear\x18\x03 \x01(\x08H\x01\x88\x01\x01\x42\x0b\n\t_priorityB\x08\n\x06_clear\"\x1f\n\x0cSetWaitReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"i\n\rSetPosRequest\x12\x10\n\x03pan\x18\x01 \x01(\x02H\x00\x88\x01\x01\x12\x11\n\x04tilt\x18\x02 \x01(\x02H\x01\x88\x01\x01\x12\x15\n\x08priority\x18\x03 \x01(\x05H\x02\x88\x01\x01\x42\x06\n\x04_panB\x07\n\x05_tiltB\x0b\n\t_priority\"\x1e\n\x0bSetPosReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"i\n\rSetVelRequest\x12\x10\n\x03pan\x18\x01 \x01(\x02H\x00\x88\x01\x01\x12\x11\n\x04tilt\x18\x02 \x01(\x02H\x01\x88\x01\x01\x12\x15\n\x08priority\x18\x03 \x01(\x05H\x02\x88\x01\x01\x42\x06\n\x04_panB\x07\n\x05_tiltB\x0b\n\t_priority\"\x1e\n\x0bSetVelReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"i\n\rSetAccRequest\x12\x10\n\x03pan\x18\x01 \x01(\x02H\x00\x88\x01\x01\x12\x11\n\x04tilt\x18\x02 \x01(\x02H\x01\x88\x01\x01\x12\x15\n\x08priority\x18\x03 \x01(\x05H\x02\x88\x01\x01\x42\x06\n\x04_panB\x07\n\x05_tiltB\x0b\n\t_priority\"\x1e\n\x0bSetAccReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\x32\xa6\x04\n\x13MotionServerService\x12M\n\tSetMotion\x12\x1f.motion_server.SetMotionRequest\x1a\x1d.motion_server.SetMotionReply\"\x00\x12S\n\x0b\x43learMotion\x12!.motion_server.ClearMotionRequest\x1a\x1f.motion_server.ClearMotionReply\"\x00\x12P\n\nStopRepeat\x12 .motion_server.StopRepeatRequest\x1a\x1e.motion_server.StopRepeatReply\"\x00\x12G\n\x07SetWait\x12\x1d.motion_server.SetWaitRequest\x1a\x1b.motion_server.SetWaitReply\"\x00\x12\x44\n\x06SetPos\x12\x1c.motion_server.SetPosRequest\x1a\x1a.motion_server.SetPosReply\"\x00\x12\x44\n\x06SetVel\x12\x1c.motion_server.SetVelRequest\x1a\x1a.motion_server.SetVelReply\"\x00\x12\x44\n\x06SetAcc\x12\x1c.motion_server.SetAccRequest\x1a\x1a.motion_server.SetAccReply\"\x00\x62\x06proto3')
18 | 
19 | _globals = globals()
20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'motion_server_pb2', _globals)
22 | if _descriptor._USE_C_DESCRIPTORS == False:
23 |   DESCRIPTOR._options = None
24 |   _globals['_SETMOTIONREQUEST']._serialized_start=39
25 |   _globals['_SETMOTIONREQUEST']._serialized_end=169
26 |   _globals['_SETMOTIONREPLY']._serialized_start=171
27 |   _globals['_SETMOTIONREPLY']._serialized_end=204
28 |   _globals['_CLEARMOTIONREQUEST']._serialized_start=206
29 |   _globals['_CLEARMOTIONREQUEST']._serialized_end=262
30 |   _globals['_CLEARMOTIONREPLY']._serialized_start=264
31 |   _globals['_CLEARMOTIONREPLY']._serialized_end=299
32 |   _globals['_STOPREPEATREQUEST']._serialized_start=301
33 |   _globals['_STOPREPEATREQUEST']._serialized_end=356
34 |   _globals['_STOPREPEATREPLY']._serialized_start=358
35 |   _globals['_STOPREPEATREPLY']._serialized_end=392
36 |   _globals['_SETWAITREQUEST']._serialized_start=394
37 |   _globals['_SETWAITREQUEST']._serialized_end=490
38 |   _globals['_SETWAITREPLY']._serialized_start=492
39 |   _globals['_SETWAITREPLY']._serialized_end=523
40 |   _globals['_SETPOSREQUEST']._serialized_start=525
41 |   _globals['_SETPOSREQUEST']._serialized_end=630
42 |   _globals['_SETPOSREPLY']._serialized_start=632
43 |   _globals['_SETPOSREPLY']._serialized_end=662
44 |   _globals['_SETVELREQUEST']._serialized_start=664
45 |   _globals['_SETVELREQUEST']._serialized_end=769
46 |   _globals['_SETVELREPLY']._serialized_start=771
47 |   _globals['_SETVELREPLY']._serialized_end=801
48 |   _globals['_SETACCREQUEST']._serialized_start=803
49 |   _globals['_SETACCREQUEST']._serialized_end=908
50 |   _globals['_SETACCREPLY']._serialized_start=910
51 |   _globals['_SETACCREPLY']._serialized_end=940
52 |   _globals['_MOTIONSERVERSERVICE']._serialized_start=943
53 |   _globals['_MOTIONSERVERSERVICE']._serialized_end=1493
54 | # @@protoc_insertion_point(module_scope)
55 | 


--------------------------------------------------------------------------------
/lib/grpc/motion_server_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | 
  5 | import motion_server_pb2 as motion__server__pb2
  6 | 
  7 | 
  8 | class MotionServerServiceStub(object):
  9 |     """Missing associated documentation comment in .proto file."""
 10 | 
 11 |     def __init__(self, channel):
 12 |         """Constructor.
 13 | 
 14 |         Args:
 15 |             channel: A grpc.Channel.
 16 |         """
 17 |         self.SetMotion = channel.unary_unary(
 18 |                 '/motion_server.MotionServerService/SetMotion',
 19 |                 request_serializer=motion__server__pb2.SetMotionRequest.SerializeToString,
 20 |                 response_deserializer=motion__server__pb2.SetMotionReply.FromString,
 21 |                 )
 22 |         self.ClearMotion = channel.unary_unary(
 23 |                 '/motion_server.MotionServerService/ClearMotion',
 24 |                 request_serializer=motion__server__pb2.ClearMotionRequest.SerializeToString,
 25 |                 response_deserializer=motion__server__pb2.ClearMotionReply.FromString,
 26 |                 )
 27 |         self.StopRepeat = channel.unary_unary(
 28 |                 '/motion_server.MotionServerService/StopRepeat',
 29 |                 request_serializer=motion__server__pb2.StopRepeatRequest.SerializeToString,
 30 |                 response_deserializer=motion__server__pb2.StopRepeatReply.FromString,
 31 |                 )
 32 |         self.SetWait = channel.unary_unary(
 33 |                 '/motion_server.MotionServerService/SetWait',
 34 |                 request_serializer=motion__server__pb2.SetWaitRequest.SerializeToString,
 35 |                 response_deserializer=motion__server__pb2.SetWaitReply.FromString,
 36 |                 )
 37 |         self.SetPos = channel.unary_unary(
 38 |                 '/motion_server.MotionServerService/SetPos',
 39 |                 request_serializer=motion__server__pb2.SetPosRequest.SerializeToString,
 40 |                 response_deserializer=motion__server__pb2.SetPosReply.FromString,
 41 |                 )
 42 |         self.SetVel = channel.unary_unary(
 43 |                 '/motion_server.MotionServerService/SetVel',
 44 |                 request_serializer=motion__server__pb2.SetVelRequest.SerializeToString,
 45 |                 response_deserializer=motion__server__pb2.SetVelReply.FromString,
 46 |                 )
 47 |         self.SetAcc = channel.unary_unary(
 48 |                 '/motion_server.MotionServerService/SetAcc',
 49 |                 request_serializer=motion__server__pb2.SetAccRequest.SerializeToString,
 50 |                 response_deserializer=motion__server__pb2.SetAccReply.FromString,
 51 |                 )
 52 | 
 53 | 
 54 | class MotionServerServiceServicer(object):
 55 |     """Missing associated documentation comment in .proto file."""
 56 | 
 57 |     def SetMotion(self, request, context):
 58 |         """Missing associated documentation comment in .proto file."""
 59 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 60 |         context.set_details('Method not implemented!')
 61 |         raise NotImplementedError('Method not implemented!')
 62 | 
 63 |     def ClearMotion(self, request, context):
 64 |         """Missing associated documentation comment in .proto file."""
 65 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 66 |         context.set_details('Method not implemented!')
 67 |         raise NotImplementedError('Method not implemented!')
 68 | 
 69 |     def StopRepeat(self, request, context):
 70 |         """Missing associated documentation comment in .proto file."""
 71 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 72 |         context.set_details('Method not implemented!')
 73 |         raise NotImplementedError('Method not implemented!')
 74 | 
 75 |     def SetWait(self, request, context):
 76 |         """Missing associated documentation comment in .proto file."""
 77 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 78 |         context.set_details('Method not implemented!')
 79 |         raise NotImplementedError('Method not implemented!')
 80 | 
 81 |     def SetPos(self, request, context):
 82 |         """Missing associated documentation comment in .proto file."""
 83 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 84 |         context.set_details('Method not implemented!')
 85 |         raise NotImplementedError('Method not implemented!')
 86 | 
 87 |     def SetVel(self, request, context):
 88 |         """Missing associated documentation comment in .proto file."""
 89 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 90 |         context.set_details('Method not implemented!')
 91 |         raise NotImplementedError('Method not implemented!')
 92 | 
 93 |     def SetAcc(self, request, context):
 94 |         """Missing associated documentation comment in .proto file."""
 95 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 96 |         context.set_details('Method not implemented!')
 97 |         raise NotImplementedError('Method not implemented!')
 98 | 
 99 | 
100 | def add_MotionServerServiceServicer_to_server(servicer, server):
101 |     rpc_method_handlers = {
102 |             'SetMotion': grpc.unary_unary_rpc_method_handler(
103 |                     servicer.SetMotion,
104 |                     request_deserializer=motion__server__pb2.SetMotionRequest.FromString,
105 |                     response_serializer=motion__server__pb2.SetMotionReply.SerializeToString,
106 |             ),
107 |             'ClearMotion': grpc.unary_unary_rpc_method_handler(
108 |                     servicer.ClearMotion,
109 |                     request_deserializer=motion__server__pb2.ClearMotionRequest.FromString,
110 |                     response_serializer=motion__server__pb2.ClearMotionReply.SerializeToString,
111 |             ),
112 |             'StopRepeat': grpc.unary_unary_rpc_method_handler(
113 |                     servicer.StopRepeat,
114 |                     request_deserializer=motion__server__pb2.StopRepeatRequest.FromString,
115 |                     response_serializer=motion__server__pb2.StopRepeatReply.SerializeToString,
116 |             ),
117 |             'SetWait': grpc.unary_unary_rpc_method_handler(
118 |                     servicer.SetWait,
119 |                     request_deserializer=motion__server__pb2.SetWaitRequest.FromString,
120 |                     response_serializer=motion__server__pb2.SetWaitReply.SerializeToString,
121 |             ),
122 |             'SetPos': grpc.unary_unary_rpc_method_handler(
123 |                     servicer.SetPos,
124 |                     request_deserializer=motion__server__pb2.SetPosRequest.FromString,
125 |                     response_serializer=motion__server__pb2.SetPosReply.SerializeToString,
126 |             ),
127 |             'SetVel': grpc.unary_unary_rpc_method_handler(
128 |                     servicer.SetVel,
129 |                     request_deserializer=motion__server__pb2.SetVelRequest.FromString,
130 |                     response_serializer=motion__server__pb2.SetVelReply.SerializeToString,
131 |             ),
132 |             'SetAcc': grpc.unary_unary_rpc_method_handler(
133 |                     servicer.SetAcc,
134 |                     request_deserializer=motion__server__pb2.SetAccRequest.FromString,
135 |                     response_serializer=motion__server__pb2.SetAccReply.SerializeToString,
136 |             ),
137 |     }
138 |     generic_handler = grpc.method_handlers_generic_handler(
139 |             'motion_server.MotionServerService', rpc_method_handlers)
140 |     server.add_generic_rpc_handlers((generic_handler,))
141 | 
142 | 
143 |  # This class is part of an EXPERIMENTAL API.
144 | class MotionServerService(object):
145 |     """Missing associated documentation comment in .proto file."""
146 | 
147 |     @staticmethod
148 |     def SetMotion(request,
149 |             target,
150 |             options=(),
151 |             channel_credentials=None,
152 |             call_credentials=None,
153 |             insecure=False,
154 |             compression=None,
155 |             wait_for_ready=None,
156 |             timeout=None,
157 |             metadata=None):
158 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/SetMotion',
159 |             motion__server__pb2.SetMotionRequest.SerializeToString,
160 |             motion__server__pb2.SetMotionReply.FromString,
161 |             options, channel_credentials,
162 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
163 | 
164 |     @staticmethod
165 |     def ClearMotion(request,
166 |             target,
167 |             options=(),
168 |             channel_credentials=None,
169 |             call_credentials=None,
170 |             insecure=False,
171 |             compression=None,
172 |             wait_for_ready=None,
173 |             timeout=None,
174 |             metadata=None):
175 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/ClearMotion',
176 |             motion__server__pb2.ClearMotionRequest.SerializeToString,
177 |             motion__server__pb2.ClearMotionReply.FromString,
178 |             options, channel_credentials,
179 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
180 | 
181 |     @staticmethod
182 |     def StopRepeat(request,
183 |             target,
184 |             options=(),
185 |             channel_credentials=None,
186 |             call_credentials=None,
187 |             insecure=False,
188 |             compression=None,
189 |             wait_for_ready=None,
190 |             timeout=None,
191 |             metadata=None):
192 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/StopRepeat',
193 |             motion__server__pb2.StopRepeatRequest.SerializeToString,
194 |             motion__server__pb2.StopRepeatReply.FromString,
195 |             options, channel_credentials,
196 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
197 | 
198 |     @staticmethod
199 |     def SetWait(request,
200 |             target,
201 |             options=(),
202 |             channel_credentials=None,
203 |             call_credentials=None,
204 |             insecure=False,
205 |             compression=None,
206 |             wait_for_ready=None,
207 |             timeout=None,
208 |             metadata=None):
209 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/SetWait',
210 |             motion__server__pb2.SetWaitRequest.SerializeToString,
211 |             motion__server__pb2.SetWaitReply.FromString,
212 |             options, channel_credentials,
213 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
214 | 
215 |     @staticmethod
216 |     def SetPos(request,
217 |             target,
218 |             options=(),
219 |             channel_credentials=None,
220 |             call_credentials=None,
221 |             insecure=False,
222 |             compression=None,
223 |             wait_for_ready=None,
224 |             timeout=None,
225 |             metadata=None):
226 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/SetPos',
227 |             motion__server__pb2.SetPosRequest.SerializeToString,
228 |             motion__server__pb2.SetPosReply.FromString,
229 |             options, channel_credentials,
230 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
231 | 
232 |     @staticmethod
233 |     def SetVel(request,
234 |             target,
235 |             options=(),
236 |             channel_credentials=None,
237 |             call_credentials=None,
238 |             insecure=False,
239 |             compression=None,
240 |             wait_for_ready=None,
241 |             timeout=None,
242 |             metadata=None):
243 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/SetVel',
244 |             motion__server__pb2.SetVelRequest.SerializeToString,
245 |             motion__server__pb2.SetVelReply.FromString,
246 |             options, channel_credentials,
247 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
248 | 
249 |     @staticmethod
250 |     def SetAcc(request,
251 |             target,
252 |             options=(),
253 |             channel_credentials=None,
254 |             call_credentials=None,
255 |             insecure=False,
256 |             compression=None,
257 |             wait_for_ready=None,
258 |             timeout=None,
259 |             metadata=None):
260 |         return grpc.experimental.unary_unary(request, target, '/motion_server.MotionServerService/SetAcc',
261 |             motion__server__pb2.SetAccRequest.SerializeToString,
262 |             motion__server__pb2.SetAccReply.FromString,
263 |             options, channel_credentials,
264 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
265 | 


--------------------------------------------------------------------------------
/lib/grpc/speech_server_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: speech_server.proto
 4 | # Protobuf Python Version: 4.25.0
 5 | """Generated protocol buffer code."""
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | from google.protobuf.internal import builder as _builder
10 | # @@protoc_insertion_point(imports)
11 | 
12 | _sym_db = _symbol_database.Default()
13 | 
14 | 
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13speech_server.proto\x12\rspeech_server\"%\n\x13ToggleSpeechRequest\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"$\n\x11ToggleSpeechReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\x32k\n\x13SpeechServerService\x12T\n\x0cToggleSpeech\x12\".speech_server.ToggleSpeechRequest\x1a .speech_server.ToggleSpeechReplyb\x06proto3')
18 | 
19 | _globals = globals()
20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'speech_server_pb2', _globals)
22 | if _descriptor._USE_C_DESCRIPTORS == False:
23 |   DESCRIPTOR._options = None
24 |   _globals['_TOGGLESPEECHREQUEST']._serialized_start=38
25 |   _globals['_TOGGLESPEECHREQUEST']._serialized_end=75
26 |   _globals['_TOGGLESPEECHREPLY']._serialized_start=77
27 |   _globals['_TOGGLESPEECHREPLY']._serialized_end=113
28 |   _globals['_SPEECHSERVERSERVICE']._serialized_start=115
29 |   _globals['_SPEECHSERVERSERVICE']._serialized_end=222
30 | # @@protoc_insertion_point(module_scope)
31 | 


--------------------------------------------------------------------------------
/lib/grpc/speech_server_pb2_grpc.py:
--------------------------------------------------------------------------------
 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 2 | """Client and server classes corresponding to protobuf-defined services."""
 3 | import grpc
 4 | 
 5 | import speech_server_pb2 as speech__server__pb2
 6 | 
 7 | 
 8 | class SpeechServerServiceStub(object):
 9 |     """Missing associated documentation comment in .proto file."""
10 | 
11 |     def __init__(self, channel):
12 |         """Constructor.
13 | 
14 |         Args:
15 |             channel: A grpc.Channel.
16 |         """
17 |         self.ToggleSpeech = channel.unary_unary(
18 |                 '/speech_server.SpeechServerService/ToggleSpeech',
19 |                 request_serializer=speech__server__pb2.ToggleSpeechRequest.SerializeToString,
20 |                 response_deserializer=speech__server__pb2.ToggleSpeechReply.FromString,
21 |                 )
22 | 
23 | 
24 | class SpeechServerServiceServicer(object):
25 |     """Missing associated documentation comment in .proto file."""
26 | 
27 |     def ToggleSpeech(self, request, context):
28 |         """Missing associated documentation comment in .proto file."""
29 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
30 |         context.set_details('Method not implemented!')
31 |         raise NotImplementedError('Method not implemented!')
32 | 
33 | 
34 | def add_SpeechServerServiceServicer_to_server(servicer, server):
35 |     rpc_method_handlers = {
36 |             'ToggleSpeech': grpc.unary_unary_rpc_method_handler(
37 |                     servicer.ToggleSpeech,
38 |                     request_deserializer=speech__server__pb2.ToggleSpeechRequest.FromString,
39 |                     response_serializer=speech__server__pb2.ToggleSpeechReply.SerializeToString,
40 |             ),
41 |     }
42 |     generic_handler = grpc.method_handlers_generic_handler(
43 |             'speech_server.SpeechServerService', rpc_method_handlers)
44 |     server.add_generic_rpc_handlers((generic_handler,))
45 | 
46 | 
47 |  # This class is part of an EXPERIMENTAL API.
48 | class SpeechServerService(object):
49 |     """Missing associated documentation comment in .proto file."""
50 | 
51 |     @staticmethod
52 |     def ToggleSpeech(request,
53 |             target,
54 |             options=(),
55 |             channel_credentials=None,
56 |             call_credentials=None,
57 |             insecure=False,
58 |             compression=None,
59 |             wait_for_ready=None,
60 |             timeout=None,
61 |             metadata=None):
62 |         return grpc.experimental.unary_unary(request, target, '/speech_server.SpeechServerService/ToggleSpeech',
63 |             speech__server__pb2.ToggleSpeechRequest.SerializeToString,
64 |             speech__server__pb2.ToggleSpeechReply.FromString,
65 |             options, channel_credentials,
66 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
67 | 


--------------------------------------------------------------------------------
/lib/grpc/voice_server_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: voice_server.proto
 4 | # Protobuf Python Version: 4.25.0
 5 | """Generated protocol buffer code."""
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | from google.protobuf.internal import builder as _builder
10 | # @@protoc_insertion_point(imports)
11 | 
12 | _sym_db = _symbol_database.Default()
13 | 
14 | 
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12voice_server.proto\x12\x0cvoice_server\"\x1e\n\x0eSetTextRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\"\x1f\n\x0cSetTextReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\xd4\x01\n\x1cSetStyleBertVitsParamRequest\x12\x17\n\nmodel_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x15\n\x08model_id\x18\x02 \x01(\x05H\x01\x88\x01\x01\x12\x13\n\x06length\x18\x03 \x01(\x02H\x02\x88\x01\x01\x12\x12\n\x05style\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x19\n\x0cstyle_weight\x18\x05 \x01(\x02H\x04\x88\x01\x01\x42\r\n\x0b_model_nameB\x0b\n\t_model_idB\t\n\x07_lengthB\x08\n\x06_styleB\x0f\n\r_style_weight\"-\n\x1aSetStyleBertVitsParamReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"e\n\x17SetVoicevoxParamRequest\x12\x14\n\x07speaker\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x18\n\x0bspeed_scale\x18\x02 \x01(\x02H\x01\x88\x01\x01\x42\n\n\x08_speakerB\x0e\n\x0c_speed_scale\"(\n\x15SetVoicevoxParamReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x80\x01\n\x14SetAivisParamRequest\x12\x14\n\x07speaker\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x12\n\x05style\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x18\n\x0bspeed_scale\x18\x03 \x01(\x02H\x02\x88\x01\x01\x42\n\n\x08_speakerB\x08\n\x06_styleB\x0e\n\x0c_speed_scale\"%\n\x12SetAivisParamReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x17\n\x15InterruptVoiceRequest\"&\n\x13InterruptVoiceReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x18\n\x16\x45nableVoicePlayRequest\"\'\n\x14\x45nableVoicePlayReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x19\n\x17\x44isableVoicePlayRequest\"(\n\x15\x44isableVoicePlayReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x17\n\x15IsVoicePlayingRequest\")\n\x13IsVoicePlayingReply\x12\x12\n\nis_playing\x18\x01 \x01(\x08\"\x14\n\x12SentenceEndRequest\"#\n\x10SentenceEndReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\"\x19\n\x17StartHeadControlRequest\"(\n\x15StartHeadControlReply\x12\x0f\n\x07success\x18\x01 \x01(\x08\x32\xa1\x07\n\x12VoiceServerService\x12\x43\n\x07SetText\x12\x1c.voice_server.SetTextRequest\x1a\x1a.voice_server.SetTextReply\x12m\n\x15SetStyleBertVitsParam\x12*.voice_server.SetStyleBertVitsParamRequest\x1a(.voice_server.SetStyleBertVitsParamReply\x12^\n\x10SetVoicevoxParam\x12%.voice_server.SetVoicevoxParamRequest\x1a#.voice_server.SetVoicevoxParamReply\x12U\n\rSetAivisParam\x12\".voice_server.SetAivisParamRequest\x1a .voice_server.SetAivisParamReply\x12X\n\x0eInterruptVoice\x12#.voice_server.InterruptVoiceRequest\x1a!.voice_server.InterruptVoiceReply\x12[\n\x0f\x45nableVoicePlay\x12$.voice_server.EnableVoicePlayRequest\x1a\".voice_server.EnableVoicePlayReply\x12^\n\x10\x44isableVoicePlay\x12%.voice_server.DisableVoicePlayRequest\x1a#.voice_server.DisableVoicePlayReply\x12X\n\x0eIsVoicePlaying\x12#.voice_server.IsVoicePlayingRequest\x1a!.voice_server.IsVoicePlayingReply\x12O\n\x0bSentenceEnd\x12 .voice_server.SentenceEndRequest\x1a\x1e.voice_server.SentenceEndReply\x12^\n\x10StartHeadControl\x12%.voice_server.StartHeadControlRequest\x1a#.voice_server.StartHeadControlReplyb\x06proto3')
18 | 
19 | _globals = globals()
20 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
21 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'voice_server_pb2', _globals)
22 | if _descriptor._USE_C_DESCRIPTORS == False:
23 |   DESCRIPTOR._options = None
24 |   _globals['_SETTEXTREQUEST']._serialized_start=36
25 |   _globals['_SETTEXTREQUEST']._serialized_end=66
26 |   _globals['_SETTEXTREPLY']._serialized_start=68
27 |   _globals['_SETTEXTREPLY']._serialized_end=99
28 |   _globals['_SETSTYLEBERTVITSPARAMREQUEST']._serialized_start=102
29 |   _globals['_SETSTYLEBERTVITSPARAMREQUEST']._serialized_end=314
30 |   _globals['_SETSTYLEBERTVITSPARAMREPLY']._serialized_start=316
31 |   _globals['_SETSTYLEBERTVITSPARAMREPLY']._serialized_end=361
32 |   _globals['_SETVOICEVOXPARAMREQUEST']._serialized_start=363
33 |   _globals['_SETVOICEVOXPARAMREQUEST']._serialized_end=464
34 |   _globals['_SETVOICEVOXPARAMREPLY']._serialized_start=466
35 |   _globals['_SETVOICEVOXPARAMREPLY']._serialized_end=506
36 |   _globals['_SETAIVISPARAMREQUEST']._serialized_start=509
37 |   _globals['_SETAIVISPARAMREQUEST']._serialized_end=637
38 |   _globals['_SETAIVISPARAMREPLY']._serialized_start=639
39 |   _globals['_SETAIVISPARAMREPLY']._serialized_end=676
40 |   _globals['_INTERRUPTVOICEREQUEST']._serialized_start=678
41 |   _globals['_INTERRUPTVOICEREQUEST']._serialized_end=701
42 |   _globals['_INTERRUPTVOICEREPLY']._serialized_start=703
43 |   _globals['_INTERRUPTVOICEREPLY']._serialized_end=741
44 |   _globals['_ENABLEVOICEPLAYREQUEST']._serialized_start=743
45 |   _globals['_ENABLEVOICEPLAYREQUEST']._serialized_end=767
46 |   _globals['_ENABLEVOICEPLAYREPLY']._serialized_start=769
47 |   _globals['_ENABLEVOICEPLAYREPLY']._serialized_end=808
48 |   _globals['_DISABLEVOICEPLAYREQUEST']._serialized_start=810
49 |   _globals['_DISABLEVOICEPLAYREQUEST']._serialized_end=835
50 |   _globals['_DISABLEVOICEPLAYREPLY']._serialized_start=837
51 |   _globals['_DISABLEVOICEPLAYREPLY']._serialized_end=877
52 |   _globals['_ISVOICEPLAYINGREQUEST']._serialized_start=879
53 |   _globals['_ISVOICEPLAYINGREQUEST']._serialized_end=902
54 |   _globals['_ISVOICEPLAYINGREPLY']._serialized_start=904
55 |   _globals['_ISVOICEPLAYINGREPLY']._serialized_end=945
56 |   _globals['_SENTENCEENDREQUEST']._serialized_start=947
57 |   _globals['_SENTENCEENDREQUEST']._serialized_end=967
58 |   _globals['_SENTENCEENDREPLY']._serialized_start=969
59 |   _globals['_SENTENCEENDREPLY']._serialized_end=1004
60 |   _globals['_STARTHEADCONTROLREQUEST']._serialized_start=1006
61 |   _globals['_STARTHEADCONTROLREQUEST']._serialized_end=1031
62 |   _globals['_STARTHEADCONTROLREPLY']._serialized_start=1033
63 |   _globals['_STARTHEADCONTROLREPLY']._serialized_end=1073
64 |   _globals['_VOICESERVERSERVICE']._serialized_start=1076
65 |   _globals['_VOICESERVERSERVICE']._serialized_end=2005
66 | # @@protoc_insertion_point(module_scope)
67 | 


--------------------------------------------------------------------------------
/lib/grpc/voice_server_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | 
  5 | import voice_server_pb2 as voice__server__pb2
  6 | 
  7 | 
  8 | class VoiceServerServiceStub(object):
  9 |     """Missing associated documentation comment in .proto file."""
 10 | 
 11 |     def __init__(self, channel):
 12 |         """Constructor.
 13 | 
 14 |         Args:
 15 |             channel: A grpc.Channel.
 16 |         """
 17 |         self.SetText = channel.unary_unary(
 18 |                 '/voice_server.VoiceServerService/SetText',
 19 |                 request_serializer=voice__server__pb2.SetTextRequest.SerializeToString,
 20 |                 response_deserializer=voice__server__pb2.SetTextReply.FromString,
 21 |                 )
 22 |         self.SetStyleBertVitsParam = channel.unary_unary(
 23 |                 '/voice_server.VoiceServerService/SetStyleBertVitsParam',
 24 |                 request_serializer=voice__server__pb2.SetStyleBertVitsParamRequest.SerializeToString,
 25 |                 response_deserializer=voice__server__pb2.SetStyleBertVitsParamReply.FromString,
 26 |                 )
 27 |         self.SetVoicevoxParam = channel.unary_unary(
 28 |                 '/voice_server.VoiceServerService/SetVoicevoxParam',
 29 |                 request_serializer=voice__server__pb2.SetVoicevoxParamRequest.SerializeToString,
 30 |                 response_deserializer=voice__server__pb2.SetVoicevoxParamReply.FromString,
 31 |                 )
 32 |         self.SetAivisParam = channel.unary_unary(
 33 |                 '/voice_server.VoiceServerService/SetAivisParam',
 34 |                 request_serializer=voice__server__pb2.SetAivisParamRequest.SerializeToString,
 35 |                 response_deserializer=voice__server__pb2.SetAivisParamReply.FromString,
 36 |                 )
 37 |         self.InterruptVoice = channel.unary_unary(
 38 |                 '/voice_server.VoiceServerService/InterruptVoice',
 39 |                 request_serializer=voice__server__pb2.InterruptVoiceRequest.SerializeToString,
 40 |                 response_deserializer=voice__server__pb2.InterruptVoiceReply.FromString,
 41 |                 )
 42 |         self.EnableVoicePlay = channel.unary_unary(
 43 |                 '/voice_server.VoiceServerService/EnableVoicePlay',
 44 |                 request_serializer=voice__server__pb2.EnableVoicePlayRequest.SerializeToString,
 45 |                 response_deserializer=voice__server__pb2.EnableVoicePlayReply.FromString,
 46 |                 )
 47 |         self.DisableVoicePlay = channel.unary_unary(
 48 |                 '/voice_server.VoiceServerService/DisableVoicePlay',
 49 |                 request_serializer=voice__server__pb2.DisableVoicePlayRequest.SerializeToString,
 50 |                 response_deserializer=voice__server__pb2.DisableVoicePlayReply.FromString,
 51 |                 )
 52 |         self.IsVoicePlaying = channel.unary_unary(
 53 |                 '/voice_server.VoiceServerService/IsVoicePlaying',
 54 |                 request_serializer=voice__server__pb2.IsVoicePlayingRequest.SerializeToString,
 55 |                 response_deserializer=voice__server__pb2.IsVoicePlayingReply.FromString,
 56 |                 )
 57 |         self.SentenceEnd = channel.unary_unary(
 58 |                 '/voice_server.VoiceServerService/SentenceEnd',
 59 |                 request_serializer=voice__server__pb2.SentenceEndRequest.SerializeToString,
 60 |                 response_deserializer=voice__server__pb2.SentenceEndReply.FromString,
 61 |                 )
 62 |         self.StartHeadControl = channel.unary_unary(
 63 |                 '/voice_server.VoiceServerService/StartHeadControl',
 64 |                 request_serializer=voice__server__pb2.StartHeadControlRequest.SerializeToString,
 65 |                 response_deserializer=voice__server__pb2.StartHeadControlReply.FromString,
 66 |                 )
 67 | 
 68 | 
 69 | class VoiceServerServiceServicer(object):
 70 |     """Missing associated documentation comment in .proto file."""
 71 | 
 72 |     def SetText(self, request, context):
 73 |         """Missing associated documentation comment in .proto file."""
 74 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 75 |         context.set_details('Method not implemented!')
 76 |         raise NotImplementedError('Method not implemented!')
 77 | 
 78 |     def SetStyleBertVitsParam(self, request, context):
 79 |         """Missing associated documentation comment in .proto file."""
 80 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 81 |         context.set_details('Method not implemented!')
 82 |         raise NotImplementedError('Method not implemented!')
 83 | 
 84 |     def SetVoicevoxParam(self, request, context):
 85 |         """Missing associated documentation comment in .proto file."""
 86 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 87 |         context.set_details('Method not implemented!')
 88 |         raise NotImplementedError('Method not implemented!')
 89 | 
 90 |     def SetAivisParam(self, request, context):
 91 |         """Missing associated documentation comment in .proto file."""
 92 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 93 |         context.set_details('Method not implemented!')
 94 |         raise NotImplementedError('Method not implemented!')
 95 | 
 96 |     def InterruptVoice(self, request, context):
 97 |         """Missing associated documentation comment in .proto file."""
 98 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 99 |         context.set_details('Method not implemented!')
100 |         raise NotImplementedError('Method not implemented!')
101 | 
102 |     def EnableVoicePlay(self, request, context):
103 |         """Missing associated documentation comment in .proto file."""
104 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
105 |         context.set_details('Method not implemented!')
106 |         raise NotImplementedError('Method not implemented!')
107 | 
108 |     def DisableVoicePlay(self, request, context):
109 |         """Missing associated documentation comment in .proto file."""
110 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
111 |         context.set_details('Method not implemented!')
112 |         raise NotImplementedError('Method not implemented!')
113 | 
114 |     def IsVoicePlaying(self, request, context):
115 |         """Missing associated documentation comment in .proto file."""
116 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
117 |         context.set_details('Method not implemented!')
118 |         raise NotImplementedError('Method not implemented!')
119 | 
120 |     def SentenceEnd(self, request, context):
121 |         """Missing associated documentation comment in .proto file."""
122 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
123 |         context.set_details('Method not implemented!')
124 |         raise NotImplementedError('Method not implemented!')
125 | 
126 |     def StartHeadControl(self, request, context):
127 |         """Missing associated documentation comment in .proto file."""
128 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
129 |         context.set_details('Method not implemented!')
130 |         raise NotImplementedError('Method not implemented!')
131 | 
132 | 
133 | def add_VoiceServerServiceServicer_to_server(servicer, server):
134 |     rpc_method_handlers = {
135 |             'SetText': grpc.unary_unary_rpc_method_handler(
136 |                     servicer.SetText,
137 |                     request_deserializer=voice__server__pb2.SetTextRequest.FromString,
138 |                     response_serializer=voice__server__pb2.SetTextReply.SerializeToString,
139 |             ),
140 |             'SetStyleBertVitsParam': grpc.unary_unary_rpc_method_handler(
141 |                     servicer.SetStyleBertVitsParam,
142 |                     request_deserializer=voice__server__pb2.SetStyleBertVitsParamRequest.FromString,
143 |                     response_serializer=voice__server__pb2.SetStyleBertVitsParamReply.SerializeToString,
144 |             ),
145 |             'SetVoicevoxParam': grpc.unary_unary_rpc_method_handler(
146 |                     servicer.SetVoicevoxParam,
147 |                     request_deserializer=voice__server__pb2.SetVoicevoxParamRequest.FromString,
148 |                     response_serializer=voice__server__pb2.SetVoicevoxParamReply.SerializeToString,
149 |             ),
150 |             'SetAivisParam': grpc.unary_unary_rpc_method_handler(
151 |                     servicer.SetAivisParam,
152 |                     request_deserializer=voice__server__pb2.SetAivisParamRequest.FromString,
153 |                     response_serializer=voice__server__pb2.SetAivisParamReply.SerializeToString,
154 |             ),
155 |             'InterruptVoice': grpc.unary_unary_rpc_method_handler(
156 |                     servicer.InterruptVoice,
157 |                     request_deserializer=voice__server__pb2.InterruptVoiceRequest.FromString,
158 |                     response_serializer=voice__server__pb2.InterruptVoiceReply.SerializeToString,
159 |             ),
160 |             'EnableVoicePlay': grpc.unary_unary_rpc_method_handler(
161 |                     servicer.EnableVoicePlay,
162 |                     request_deserializer=voice__server__pb2.EnableVoicePlayRequest.FromString,
163 |                     response_serializer=voice__server__pb2.EnableVoicePlayReply.SerializeToString,
164 |             ),
165 |             'DisableVoicePlay': grpc.unary_unary_rpc_method_handler(
166 |                     servicer.DisableVoicePlay,
167 |                     request_deserializer=voice__server__pb2.DisableVoicePlayRequest.FromString,
168 |                     response_serializer=voice__server__pb2.DisableVoicePlayReply.SerializeToString,
169 |             ),
170 |             'IsVoicePlaying': grpc.unary_unary_rpc_method_handler(
171 |                     servicer.IsVoicePlaying,
172 |                     request_deserializer=voice__server__pb2.IsVoicePlayingRequest.FromString,
173 |                     response_serializer=voice__server__pb2.IsVoicePlayingReply.SerializeToString,
174 |             ),
175 |             'SentenceEnd': grpc.unary_unary_rpc_method_handler(
176 |                     servicer.SentenceEnd,
177 |                     request_deserializer=voice__server__pb2.SentenceEndRequest.FromString,
178 |                     response_serializer=voice__server__pb2.SentenceEndReply.SerializeToString,
179 |             ),
180 |             'StartHeadControl': grpc.unary_unary_rpc_method_handler(
181 |                     servicer.StartHeadControl,
182 |                     request_deserializer=voice__server__pb2.StartHeadControlRequest.FromString,
183 |                     response_serializer=voice__server__pb2.StartHeadControlReply.SerializeToString,
184 |             ),
185 |     }
186 |     generic_handler = grpc.method_handlers_generic_handler(
187 |             'voice_server.VoiceServerService', rpc_method_handlers)
188 |     server.add_generic_rpc_handlers((generic_handler,))
189 | 
190 | 
191 |  # This class is part of an EXPERIMENTAL API.
192 | class VoiceServerService(object):
193 |     """Missing associated documentation comment in .proto file."""
194 | 
195 |     @staticmethod
196 |     def SetText(request,
197 |             target,
198 |             options=(),
199 |             channel_credentials=None,
200 |             call_credentials=None,
201 |             insecure=False,
202 |             compression=None,
203 |             wait_for_ready=None,
204 |             timeout=None,
205 |             metadata=None):
206 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/SetText',
207 |             voice__server__pb2.SetTextRequest.SerializeToString,
208 |             voice__server__pb2.SetTextReply.FromString,
209 |             options, channel_credentials,
210 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
211 | 
212 |     @staticmethod
213 |     def SetStyleBertVitsParam(request,
214 |             target,
215 |             options=(),
216 |             channel_credentials=None,
217 |             call_credentials=None,
218 |             insecure=False,
219 |             compression=None,
220 |             wait_for_ready=None,
221 |             timeout=None,
222 |             metadata=None):
223 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/SetStyleBertVitsParam',
224 |             voice__server__pb2.SetStyleBertVitsParamRequest.SerializeToString,
225 |             voice__server__pb2.SetStyleBertVitsParamReply.FromString,
226 |             options, channel_credentials,
227 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
228 | 
229 |     @staticmethod
230 |     def SetVoicevoxParam(request,
231 |             target,
232 |             options=(),
233 |             channel_credentials=None,
234 |             call_credentials=None,
235 |             insecure=False,
236 |             compression=None,
237 |             wait_for_ready=None,
238 |             timeout=None,
239 |             metadata=None):
240 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/SetVoicevoxParam',
241 |             voice__server__pb2.SetVoicevoxParamRequest.SerializeToString,
242 |             voice__server__pb2.SetVoicevoxParamReply.FromString,
243 |             options, channel_credentials,
244 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
245 | 
246 |     @staticmethod
247 |     def SetAivisParam(request,
248 |             target,
249 |             options=(),
250 |             channel_credentials=None,
251 |             call_credentials=None,
252 |             insecure=False,
253 |             compression=None,
254 |             wait_for_ready=None,
255 |             timeout=None,
256 |             metadata=None):
257 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/SetAivisParam',
258 |             voice__server__pb2.SetAivisParamRequest.SerializeToString,
259 |             voice__server__pb2.SetAivisParamReply.FromString,
260 |             options, channel_credentials,
261 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
262 | 
263 |     @staticmethod
264 |     def InterruptVoice(request,
265 |             target,
266 |             options=(),
267 |             channel_credentials=None,
268 |             call_credentials=None,
269 |             insecure=False,
270 |             compression=None,
271 |             wait_for_ready=None,
272 |             timeout=None,
273 |             metadata=None):
274 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/InterruptVoice',
275 |             voice__server__pb2.InterruptVoiceRequest.SerializeToString,
276 |             voice__server__pb2.InterruptVoiceReply.FromString,
277 |             options, channel_credentials,
278 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
279 | 
280 |     @staticmethod
281 |     def EnableVoicePlay(request,
282 |             target,
283 |             options=(),
284 |             channel_credentials=None,
285 |             call_credentials=None,
286 |             insecure=False,
287 |             compression=None,
288 |             wait_for_ready=None,
289 |             timeout=None,
290 |             metadata=None):
291 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/EnableVoicePlay',
292 |             voice__server__pb2.EnableVoicePlayRequest.SerializeToString,
293 |             voice__server__pb2.EnableVoicePlayReply.FromString,
294 |             options, channel_credentials,
295 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
296 | 
297 |     @staticmethod
298 |     def DisableVoicePlay(request,
299 |             target,
300 |             options=(),
301 |             channel_credentials=None,
302 |             call_credentials=None,
303 |             insecure=False,
304 |             compression=None,
305 |             wait_for_ready=None,
306 |             timeout=None,
307 |             metadata=None):
308 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/DisableVoicePlay',
309 |             voice__server__pb2.DisableVoicePlayRequest.SerializeToString,
310 |             voice__server__pb2.DisableVoicePlayReply.FromString,
311 |             options, channel_credentials,
312 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
313 | 
314 |     @staticmethod
315 |     def IsVoicePlaying(request,
316 |             target,
317 |             options=(),
318 |             channel_credentials=None,
319 |             call_credentials=None,
320 |             insecure=False,
321 |             compression=None,
322 |             wait_for_ready=None,
323 |             timeout=None,
324 |             metadata=None):
325 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/IsVoicePlaying',
326 |             voice__server__pb2.IsVoicePlayingRequest.SerializeToString,
327 |             voice__server__pb2.IsVoicePlayingReply.FromString,
328 |             options, channel_credentials,
329 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
330 | 
331 |     @staticmethod
332 |     def SentenceEnd(request,
333 |             target,
334 |             options=(),
335 |             channel_credentials=None,
336 |             call_credentials=None,
337 |             insecure=False,
338 |             compression=None,
339 |             wait_for_ready=None,
340 |             timeout=None,
341 |             metadata=None):
342 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/SentenceEnd',
343 |             voice__server__pb2.SentenceEndRequest.SerializeToString,
344 |             voice__server__pb2.SentenceEndReply.FromString,
345 |             options, channel_credentials,
346 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
347 | 
348 |     @staticmethod
349 |     def StartHeadControl(request,
350 |             target,
351 |             options=(),
352 |             channel_credentials=None,
353 |             call_credentials=None,
354 |             insecure=False,
355 |             compression=None,
356 |             wait_for_ready=None,
357 |             timeout=None,
358 |             metadata=None):
359 |         return grpc.experimental.unary_unary(request, target, '/voice_server.VoiceServerService/StartHeadControl',
360 |             voice__server__pb2.StartHeadControlRequest.SerializeToString,
361 |             voice__server__pb2.StartHeadControlReply.FromString,
362 |             options, channel_credentials,
363 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
364 | 


--------------------------------------------------------------------------------
/lib/style_bert_vits.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Optional
  3 | from urllib.parse import urlencode
  4 | from urllib.request import Request, urlopen
  5 | 
  6 | from lib.text_to_voice import TextToVoice
  7 | 
  8 | 
  9 | class TextToStyleBertVits(TextToVoice):
 10 |     """
 11 |     Style-Bert-VITS2を使用してテキストから音声を生成するクラス。
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         host: str = "127.0.0.1",
 17 |         port: str = "5000",
 18 |         motion_host: Optional[str] = "127.0.0.1",
 19 |         motion_port: Optional[str] = "50055",
 20 |     ) -> None:
 21 |         """クラスの初期化メソッド。
 22 |         Args:
 23 |             host (str, optional): Style-Bert-VITS2サーバーのホスト名。デフォルトは "127.0.0.1"。
 24 |             port (str, optional): Style-Bert-VITS2サーバーのポート番号。デフォルトは"5000"。
 25 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
 26 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 27 | 
 28 |         """
 29 |         super().__init__(host, port, motion_host, motion_port)
 30 |         self.model_id = 0
 31 |         self.length = 1.0
 32 |         self.style = "Neutral"
 33 |         self.style_weight = 1.0
 34 |         # 話者モデル名を指定
 35 |         self.set_param(model_name="jvnv-F1-jp")
 36 | 
 37 |     def get_model_id_from_name(self, model_name: str) -> int:
 38 |         """
 39 |         モデル名からモデル番号を取得する。
 40 | 
 41 |         Args:
 42 |             model_name (str): モデル名。
 43 | 
 44 |         Returns:
 45 |             int: モデル番号。
 46 | 
 47 |         """
 48 |         headers = {"accept": "application/json"}
 49 |         address = "http://" + self.host + ":" + self.port + "/models/info"
 50 |         # GETリクエストを作成
 51 |         req = Request(address, headers=headers, method="GET")
 52 |         with urlopen(req) as res:
 53 |             model_info = res.read()
 54 |             model_info_json = json.loads(model_info)
 55 |             for key, details in model_info_json.items():
 56 |                 if model_name == details["id2spk"]["0"]:
 57 |                     return key
 58 |         raise ValueError("Model name not found")
 59 | 
 60 |     def set_param(
 61 |         self,
 62 |         model_name: Optional[str] = None,
 63 |         model_id: Optional[int] = None,
 64 |         length: Optional[float] = None,
 65 |         style: Optional[str] = None,
 66 |         style_weight: Optional[float] = None,
 67 |     ) -> None:
 68 |         """
 69 |         音声合成のパラメータを設定する。
 70 | 
 71 |         Args:
 72 |             model_name (str, optional): Style-Bert-VITS2のモデル名。デフォルトはNone。
 73 |             model_id (int, optional): Style-Bert-VITS2のモデル番号。デフォルトはNone。
 74 |             length (float, optional): 音声の再生速度。大きくする程読み上げ速度が遅くなる。デフォルトはNone。
 75 |             style (str, optional): 音声の感情スタイル。デフォルトはNone。
 76 |             style_weight (float, optional): 音声の感情スタイルの重み。値が大きいほど感情の影響が大きくなる。デフォルトはNone。
 77 | 
 78 |         """
 79 |         if model_name is not None:
 80 |             self.model_id = self.get_model_id_from_name(model_name)
 81 |         elif model_id is not None:
 82 |             self.model_id = model_id
 83 |         if length is not None:
 84 |             self.length = length
 85 |         if style is not None:
 86 |             self.style = style
 87 |         if style_weight is not None:
 88 |             self.style_weight = style_weight
 89 | 
 90 |     def post_synthesis(
 91 |         self,
 92 |         text: str,
 93 |     ) -> Optional[bytes]:
 94 |         """
 95 |         Style-Bert-VITS2サーバーに音声合成要求を送信し、合成された音声データを取得する。
 96 | 
 97 |         Args:
 98 |             text (str): 音声合成対象のテキスト。
 99 | 
100 |         Returns:
101 |             Any: 音声合成クエリの応答。
102 | 
103 |         """
104 |         if len(text.strip()) <= 0:
105 |             return None
106 |         headers = {"accept": "audio/wav"}
107 |         params = {
108 |             "text": text,
109 |             "model_id": self.model_id,
110 |             "length": self.length,
111 |             "style": self.style,
112 |             "style_weight": self.style_weight,
113 |         }
114 |         address = (
115 |             "http://" + self.host + ":" + self.port + "/voice" + "?" + urlencode(params)
116 |         )
117 |         # GETリクエストを作成
118 |         req = Request(address, headers=headers, method="GET")
119 |         with urlopen(req) as res:
120 |             return res.read()
121 | 
122 |     def text_to_voice(self, text: str) -> None:
123 |         """
124 |         テキストから音声を合成して再生する。
125 |         Args:
126 |             text (str): 音声合成対象のテキスト。
127 |         """
128 |         wav = self.post_synthesis(text)
129 |         if wav is not None:
130 |             print(f"[Play] {text}")
131 |             self.play_wav(wav)
132 | 


--------------------------------------------------------------------------------
/lib/text_to_voice.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import sys
  4 | import time
  5 | import wave
  6 | from abc import ABCMeta, abstractmethod
  7 | from queue import Queue
  8 | from threading import Event, Thread
  9 | from typing import Any, Optional
 10 | 
 11 | import grpc
 12 | import numpy as np
 13 | import pyaudio
 14 | from lib.en_to_jp import EnToJp
 15 | 
 16 | from .err_handler import ignoreStderr
 17 | 
 18 | sys.path.append(os.path.join(os.path.dirname(__file__), "grpc"))
 19 | import motion_server_pb2
 20 | import motion_server_pb2_grpc
 21 | 
 22 | 
 23 | class TextToVoice(metaclass=ABCMeta):
 24 |     """
 25 |     音声合成を使用してテキストから音声を生成するクラス。
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         host: str = "127.0.0.1",
 31 |         port: str = "52001",
 32 |         motion_host: Optional[str] = "127.0.0.1",
 33 |         motion_port: Optional[str] = "50055",
 34 |     ) -> None:
 35 |         """クラスの初期化メソッド。
 36 |         Args:
 37 |             host (str, optional): サーバーのホスト名。デフォルトは "127.0.0.1"。
 38 |             port (str, optional): サーバーのポート番号。デフォルトは "52001"。
 39 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
 40 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 41 | 
 42 |         """
 43 |         self.queue: Queue[str] = Queue()
 44 |         self.host = host
 45 |         self.port = port
 46 |         self.motion_stub = None
 47 |         if motion_host is not None or motion_port is not None:
 48 |             motion_channel = grpc.insecure_channel(motion_host + ":" + motion_port)
 49 |             self.motion_stub = motion_server_pb2_grpc.MotionServerServiceStub(
 50 |                 motion_channel
 51 |             )
 52 |         self.finished = True  # 音声再生が完了したかどうかを示すフラグ
 53 |         self.sentence_end_flg = False  # 一文の終わりを示すフラグ
 54 |         self.sentence_end_timeout = 5.0  # 一文の終わりを判定するタイムアウト時間
 55 |         self.tilt_rate = 0.0  # 送信するtiltのrate(0.0~1.0)
 56 |         self.HEAD_RESET_INTERVAL = 0.3  # この時間更新がなければ、tiltの指令値を0にリセットする[sec]
 57 |         self.TILT_GAIN = -0.8  # 音声出力の音量からtiltのrateに変換するゲイン
 58 |         self.TILT_RATE_DB_MAX = 40.0  # tilt_rate上限の音声出力値[dB]
 59 |         self.TILT_RATE_DB_MIN = 5.0  # tilt_rate下限の音声出力値[dB]
 60 |         self.TILT_ANGLE_MAX = 0.35  # Tiltの最大角度[rad]
 61 |         self.TILT_ANGLE_MIN = -0.1  # Tiltの最小角度[rad]
 62 |         self.HEAD_MOTION_INTERVAL = 0.15  # ヘッドモーションの更新周期[sec]
 63 |         self.event = Event()
 64 |         self.head_motion_thread = Thread(target=self.head_motion_control, daemon=True)
 65 |         if self.motion_stub is not None:
 66 |             self.head_motion_thread.start()
 67 |         self.text_to_voice_event = Event()
 68 |         self.voice_thread = Thread(target=self.text_to_voice_thread)
 69 |         self.voice_thread.start()
 70 |         self.en_to_jp = EnToJp()
 71 | 
 72 |     def __exit__(self) -> None:
 73 |         """音声合成スレッドを終了する。"""
 74 |         self.voice_thread.join()
 75 | 
 76 |     def sentence_end(self) -> None:
 77 |         """音声合成の一文の終わりを示すフラグを立てる。"""
 78 |         self.sentence_end_flg = True
 79 | 
 80 |     def enable_voice_play(self) -> None:
 81 |         """音声再生を開始する。"""
 82 |         self.text_to_voice_event.set()
 83 | 
 84 |     def disable_voice_play(self) -> None:
 85 |         """音声再生を停止する。"""
 86 |         self.text_to_voice_event.clear()
 87 | 
 88 |     def text_to_voice_thread(self) -> None:
 89 |         """
 90 |         音声合成スレッドの実行関数。
 91 |         キューからテキストを取り出し、text_to_voice関数を呼び出す。
 92 | 
 93 |         """
 94 |         last_queue_time = time.time()
 95 |         queue_start = False
 96 |         while True:
 97 |             self.text_to_voice_event.wait()
 98 |             if self.queue.qsize() > 0:
 99 |                 queue_start = True
100 |                 last_queue_time = time.time()
101 |                 text = self.queue.get()
102 |                 # textに含まれる英語を極力かな変換する
103 |                 text = self.en_to_jp.text_to_kana(text, True, True, True)
104 |                 self.text_to_voice(text)
105 |             else:
106 |                 # queueが空の状態でsentence_endが送られる、もしくはsentence_end_timeout秒経過した場合finishedにする。
107 |                 if self.sentence_end_flg or (
108 |                     queue_start
109 |                     and time.time() - last_queue_time > self.sentence_end_timeout
110 |                 ):
111 |                     self.finished = True
112 |                     queue_start = False
113 |                     if self.motion_stub is not None:
114 |                         self.event.clear()
115 |                         if self.motion_stub is not None:
116 |                             # 初期位置にヘッドを戻す
117 |                             try:
118 |                                 self.motion_stub.SetPos(
119 |                                     motion_server_pb2.SetPosRequest(
120 |                                         tilt=self.TILT_ANGLE_MAX, priority=3
121 |                                     )
122 |                                 )
123 |                             except BaseException as e:
124 |                                 print(f"Failed to send SetPos command: {e}")
125 |                                 pass
126 |                     self.sentence_end_flg = False
127 |                     self.text_to_voice_event.clear()
128 | 
129 |     def put_text(
130 |         self, text: str, play_now: bool = True, blocking: bool = False
131 |     ) -> None:
132 |         """
133 |         音声合成のためのテキストをキューに追加する。
134 | 
135 |         Args:
136 |             text (str): 音声合成対象のテキスト。
137 |             play_now (bool, optional): すぐに音声再生を開始するかどうか。デフォルトはTrue。
138 |             blocking (bool, optional): 音声合成が完了するまでブロックするかどうか。デフォルトはFalse。
139 | 
140 |         """
141 |         if play_now:
142 |             self.text_to_voice_event.set()
143 |         self.queue.put(text)
144 |         self.finished = False
145 |         if blocking:
146 |             self.wait_finish()
147 | 
148 |     def wait_finish(self) -> None:
149 |         """
150 |         音声合成が完了するまで待機するループ関数。
151 | 
152 |         """
153 |         while not self.finished:
154 |             time.sleep(0.01)
155 | 
156 |     @abstractmethod
157 |     def set_param(
158 |         self,
159 |         speaker: Optional[int] = None,
160 |         speed_scale: Optional[float] = None,
161 |     ) -> None:
162 |         """
163 |         音声合成のパラメータを設定する。
164 | 
165 |         Args:
166 |             speaker (Optional[int], optional): VoiceVoxの話者番号。デフォルトはNone。
167 |             speed_scale (Optional[float], optional): 音声の再生速度スケール。デフォルトはNone。
168 | 
169 |         """
170 |         ...
171 | 
172 |     def play_wav(self, wav_file: bytes) -> None:
173 |         """合成された音声データを再生する。
174 | 
175 |         Args:
176 |             wav_file (bytes): 合成された音声データ。
177 | 
178 |         """
179 |         wr: wave.Wave_read = wave.open(io.BytesIO(wav_file))
180 |         with ignoreStderr():
181 |             p = pyaudio.PyAudio()
182 |             stream = p.open(
183 |                 format=p.get_format_from_width(wr.getsampwidth()),
184 |                 channels=wr.getnchannels(),
185 |                 rate=wr.getframerate(),
186 |                 output=True,
187 |             )
188 |             chunk = 1024
189 |             data = wr.readframes(chunk)
190 |             while data:
191 |                 audio_data = np.frombuffer(data, dtype=np.int16)
192 |                 rms = np.sqrt(np.mean(audio_data**2))
193 |                 db = 20 * np.log10(rms) if rms > 0.0 else 0.0
194 |                 self.tilt_rate = self.db_to_head_rate(db)
195 |                 stream.write(data)
196 |                 data = wr.readframes(chunk)
197 |             time.sleep(0.2)
198 |             stream.close()
199 |         p.terminate()
200 | 
201 |     @abstractmethod
202 |     def text_to_voice(self, text: str) -> None:
203 |         """
204 |         テキストから音声を合成して再生する。
205 | 
206 |         Args:
207 |             text (str): 音声合成対象のテキスト。
208 | 
209 |         """
210 |         ...
211 | 
212 |     def is_playing(self) -> bool:
213 |         """
214 |         音声再生が実行中かどうかを返す。
215 |         queueの中身が0かつ再生中の音声がなければFalseを返す。
216 | 
217 |         Returns:
218 |             bool: 音声再生中の場合はTrue。
219 | 
220 |         """
221 |         return self.finished
222 | 
223 |     def db_to_head_rate(self, db: float) -> float:
224 |         """
225 |         音声の音量[dB]からヘッドの動き具合を算出する。
226 |         Args:
227 |             db (float): 音声の音量[dB]。
228 |         Returns:
229 |             float: ヘッドの動き具合。
230 |         """
231 |         if db > self.TILT_RATE_DB_MAX:
232 |             return 1.0
233 |         elif db < self.TILT_RATE_DB_MIN:
234 |             return 0.0
235 |         return (db - self.TILT_RATE_DB_MIN) / (
236 |             self.TILT_RATE_DB_MAX - self.TILT_RATE_DB_MIN
237 |         )
238 | 
239 |     def head_motion_control(self) -> None:
240 |         """
241 |         音声出力に合わせてヘッドを動かす。
242 |         """
243 |         last_update_time = time.time()
244 |         prev_tilt_rate = 0.0
245 |         while True:
246 |             self.event.wait()
247 |             loop_start_time = time.time()
248 |             if self.tilt_rate != prev_tilt_rate:
249 |                 val = (
250 |                     -1 * self.tilt_rate * (self.TILT_ANGLE_MAX - self.TILT_ANGLE_MIN)
251 |                     + self.TILT_ANGLE_MAX
252 |                 )
253 |                 if self.motion_stub is not None:
254 |                     try:
255 |                         self.motion_stub.ClearMotion(
256 |                             motion_server_pb2.ClearMotionRequest(priority=3)
257 |                         )
258 |                     except BaseException as e:
259 |                         print(f"Failed to ClearMotion command: {e}")
260 |                         pass
261 |                     try:
262 |                         self.motion_stub.SetPos(
263 |                             motion_server_pb2.SetPosRequest(tilt=val, priority=3)
264 |                         )
265 |                     except BaseException as e:
266 |                         print(f"Failed to send SetPos command: {e}")
267 |                         pass
268 |                 last_update_time = time.time()
269 |                 prev_tilt_rate = self.tilt_rate
270 |             if time.time() - last_update_time > self.HEAD_RESET_INTERVAL:
271 |                 self.tilt_rate = 0.0
272 |             wait_time = self.HEAD_MOTION_INTERVAL - (time.time() - loop_start_time)
273 |             if wait_time > 0:
274 |                 time.sleep(wait_time)
275 | 
276 |     def start_head_control(self) -> None:
277 |         """
278 |         ヘッドモーションを開始する。
279 |         """
280 |         if self.motion_stub is not None:
281 |             self.event.set()
282 | 


--------------------------------------------------------------------------------
/lib/voicevox.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from queue import Queue
  3 | from typing import Any, Optional
  4 | 
  5 | import requests
  6 | from lib.text_to_voice import TextToVoice
  7 | 
  8 | 
  9 | class TextToVoiceVox(TextToVoice):
 10 |     """
 11 |     VoiceVoxを使用してテキストから音声を生成するクラス。
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         host: str = "127.0.0.1",
 17 |         port: str = "52001",
 18 |         motion_host: Optional[str] = "127.0.0.1",
 19 |         motion_port: Optional[str] = "50055",
 20 |     ) -> None:
 21 |         """クラスの初期化メソッド。
 22 |         Args:
 23 |             host (str, optional): VoiceVoxサーバーのホスト名。デフォルトは "127.0.0.1"。
 24 |             port (str, optional): VoiceVoxサーバーのポート番号。デフォルトは "52001"。
 25 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
 26 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
 27 | 
 28 |         """
 29 |         super().__init__(
 30 |             host=host, port=port, motion_host=motion_host, motion_port=motion_port
 31 |         )
 32 |         # デフォルトのspeakerは8(春日部つむぎ)
 33 |         self.speaker = 8
 34 |         self.speed_scale = 1.0
 35 | 
 36 |     def set_param(
 37 |         self,
 38 |         speaker: Optional[int] = None,
 39 |         speed_scale: Optional[float] = None,
 40 |     ) -> None:
 41 |         """
 42 |         音声合成のパラメータを設定する。
 43 | 
 44 |         Args:
 45 |             speaker (Optional[int], optional): VoiceVoxの話者番号。デフォルトはNone。
 46 |             speed_scale (Optional[float], optional): 音声の再生速度スケール。デフォルトはNone。
 47 | 
 48 |         """
 49 |         if speaker is not None:
 50 |             self.speaker = speaker
 51 |         elif speed_scale is not None:
 52 |             self.speed_scale = speed_scale
 53 | 
 54 |     def post_audio_query(
 55 |         self,
 56 |         text: str,
 57 |     ) -> Any:
 58 |         """VoiceVoxサーバーに音声合成クエリを送信する。
 59 | 
 60 |         Args:
 61 |             text (str): 音声合成対象のテキスト。
 62 |             speaker (int, optional): VoiceVoxの話者番号。デフォルトは8(春日部つむぎ)。
 63 |             speed_scale (float, optional): 音声の再生速度スケール。デフォルトは1.0。
 64 | 
 65 |         Returns:
 66 |             Any: 音声合成クエリの応答。
 67 | 
 68 |         """
 69 |         if len(text.strip()) <= 0:
 70 |             return None
 71 |         params = {
 72 |             "text": text,
 73 |             "speaker": self.speaker,
 74 |             "speedScale": self.speed_scale,
 75 |             "prePhonemeLength": 0,
 76 |             "postPhonemeLength": 0,
 77 |         }
 78 |         address = "http://" + self.host + ":" + self.port + "/audio_query"
 79 |         res = requests.post(address, params=params)
 80 |         return res.json()
 81 | 
 82 |     def post_synthesis(
 83 |         self,
 84 |         audio_query_response: dict,
 85 |     ) -> bytes:
 86 |         """
 87 |         VoiceVoxサーバーに音声合成要求を送信し、合成された音声データを取得する。
 88 | 
 89 |         Args:
 90 |             audio_query_response (dict): 音声合成クエリの応答。
 91 | 
 92 |         Returns:
 93 |             bytes: 合成された音声データ。
 94 |         """
 95 |         params = {"speaker": self.speaker}
 96 |         headers = {"content-type": "application/json"}
 97 |         audio_query_response["speedScale"] = self.speed_scale
 98 |         audio_query_response_json = json.dumps(audio_query_response)
 99 |         address = "http://" + self.host + ":" + self.port + "/synthesis"
100 |         res = requests.post(
101 |             address, data=audio_query_response_json, params=params, headers=headers
102 |         )
103 |         return res.content
104 | 
105 |     def text_to_voice(self, text: str) -> None:
106 |         """
107 |         テキストから音声を合成して再生する。
108 | 
109 |         Args:
110 |             text (str): 音声合成対象のテキスト。
111 | 
112 |         """
113 |         res = self.post_audio_query(text)
114 |         if res is None:
115 |             return
116 |         wav = self.post_synthesis(res)
117 |         if wav is not None:
118 |             print(f"[Play] {text}")
119 |             self.play_wav(wav)
120 | 
121 | 
122 | class TextToVoiceVoxWeb(TextToVoiceVox):
123 |     """
124 |     VoiceVox(web版)を使用してテキストから音声を生成するクラス。
125 |     """
126 | 
127 |     def __init__(
128 |         self,
129 |         apikey: str,
130 |         motion_host: Optional[str] = "127.0.0.1",
131 |         motion_port: Optional[str] = "50055",
132 |     ) -> None:
133 |         """クラスの初期化メソッド。
134 |         Args:
135 |             apikey (str): VoiceVox wweb版のAPIキー。
136 |             motion_host (str, optional): モーションサーバーのホスト名。デフォルトは"127.0.0.1"。
137 |             motion_port (str, optional): モーションサーバーのポート番号。デフォルトは"50055"。
138 | 
139 |         """
140 |         super().__init__(
141 |             host="127.0.0.1",
142 |             port="0000",
143 |             motion_host=motion_host,
144 |             motion_port=motion_port,
145 |         )
146 |         self.queue: Queue[str] = Queue()
147 |         self.apikey = apikey
148 | 
149 |     def post_web(
150 |         self,
151 |         text: str,
152 |         speaker: int = 8,
153 |         pitch: int = 0,
154 |         intonation_scale: int = 1,
155 |         speed: int = 1,
156 |     ) -> Optional[bytes]:
157 |         """
158 |         VoiceVoxウェブAPIに音声合成要求を送信し、合成された音声データを取得。
159 | 
160 |         Args:
161 |             text (str): 音声合成対象のテキスト。
162 |             speaker (int, optional): VoiceVoxの話者番号。デフォルトは8(春日部つむぎ)。
163 |             pitch (int, optional): ピッチ。デフォルトは0。
164 |             intonation_scale (int, optional): イントネーションスケール。デフォルトは1。
165 |             speed (int, optional): 音声の速度。デフォルトは1。
166 | 
167 |         Returns:
168 |             bytes: 合成された音声データ。
169 | 
170 |         """
171 |         if len(text.strip()) <= 0:
172 |             return None
173 |         address = (
174 |             "https://deprecatedapis.tts.quest/v2/voicevox/audio/?key="
175 |             + self.apikey
176 |             + "&speaker="
177 |             + str(speaker)
178 |             + "&pitch="
179 |             + str(pitch)
180 |             + "&intonationScale="
181 |             + str(intonation_scale)
182 |             + "&speed="
183 |             + str(speed)
184 |             + "&text="
185 |             + text
186 |         )
187 |         res = requests.post(address)
188 |         return res.content
189 | 
190 |     def text_to_voice(self, text: str) -> None:
191 |         """
192 |         テキストから音声を合成して再生する。
193 | 
194 |         Args:
195 |             text (str): 音声合成対象のテキスト。
196 | 
197 |         """
198 |         wav = self.post_web(text=text)
199 |         if wav is not None:
200 |             print(f"[Play] {text}")
201 |             self.play_wav(wav)
202 | 


--------------------------------------------------------------------------------
/manual_grpc_publisher_for_gpt.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | import time
 5 | 
 6 | import grpc
 7 | 
 8 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 9 | import gpt_server_pb2
10 | import gpt_server_pb2_grpc
11 | import voice_server_pb2
12 | import voice_server_pb2_grpc
13 | 
14 | 
15 | def main() -> None:
16 |     global enable_input
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument(
19 |         "--gpt_ip", help="Gpt server ip address", default="127.0.0.1", type=str
20 |     )
21 |     parser.add_argument(
22 |         "--gpt_port", help="Gpt server port number", default="10001", type=str
23 |     )
24 |     parser.add_argument(
25 |         "--voice_ip", help="Voice server ip address", default="127.0.0.1", type=str
26 |     )
27 |     parser.add_argument(
28 |         "--voice_port", help="Voice server port number", default="10002", type=str
29 |     )
30 |     parser.add_argument(
31 |         "--no_motion",
32 |         help="Not play nod motion",
33 |         action="store_true",
34 |     )
35 |     args = parser.parse_args()
36 |     # grpc stubの設定
37 |     gpt_channel = grpc.insecure_channel(args.gpt_ip + ":" + args.gpt_port)
38 |     gpt_stub = gpt_server_pb2_grpc.GptServerServiceStub(gpt_channel)
39 |     voice_channel = grpc.insecure_channel(args.voice_ip + ":" + args.voice_port)
40 |     voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
41 | 
42 |     while True:
43 |         print("文章をキーボード入力後、Enterを押してください。")
44 |         text = input("Input: ")
45 |         # userメッセージの追加
46 |         print(f"User   : {text}")
47 |         try:
48 |             voice_stub.EnableVoicePlay(voice_server_pb2.EnableVoicePlayRequest())
49 |         except BaseException:
50 |             pass
51 |         try:
52 |             gpt_stub.SetGpt(gpt_server_pb2.SetGptRequest(text=text, is_finish=True))
53 |         except BaseException:
54 |             print("SetGpt error")
55 |             pass
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     main()
60 | 


--------------------------------------------------------------------------------
/manual_grpc_publisher_for_voice.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import sys
 4 | import time
 5 | 
 6 | import grpc
 7 | 
 8 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 9 | import voice_server_pb2
10 | import voice_server_pb2_grpc
11 | 
12 | 
13 | def main() -> None:
14 |     global enable_input
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument(
17 |         "--voice_ip", help="Voice server ip address", default="127.0.0.1", type=str
18 |     )
19 |     parser.add_argument(
20 |         "--voice_port", help="Voice server port number", default="10002", type=str
21 |     )
22 |     parser.add_argument(
23 |         "--no_motion",
24 |         help="Not play nod motion",
25 |         action="store_true",
26 |     )
27 |     args = parser.parse_args()
28 |     # grpc stubの設定
29 |     voice_channel = grpc.insecure_channel(args.voice_ip + ":" + args.voice_port)
30 |     voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
31 | 
32 |     while True:
33 |         print("文章をキーボード入力後、Enterを押してください。")
34 |         text = input("Input: ")
35 |         # userメッセージの追加
36 |         print(f"User   : {text}")
37 |         try:
38 |             voice_stub.EnableVoicePlay(voice_server_pb2.EnableVoicePlayRequest())
39 |             voice_stub.SetText(voice_server_pb2.SetTextRequest(text=text))
40 |             voice_stub.SentenceEnd(voice_server_pb2.SentenceEndRequest())
41 |         except BaseException:
42 |             pass
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/proto/codegen.py:
--------------------------------------------------------------------------------
 1 | from grpc.tools import protoc
 2 | 
 3 | protoc.main(
 4 |     (
 5 |         "",
 6 |         "-I.",
 7 |         "--python_out=../lib/grpc",
 8 |         "--grpc_python_out=../lib/grpc",
 9 |         "speech_server.proto",
10 |     )
11 | )
12 | protoc.main(
13 |     (
14 |         "",
15 |         "-I.",
16 |         "--python_out=../lib/grpc",
17 |         "--grpc_python_out=../lib/grpc",
18 |         "gpt_server.proto",
19 |     )
20 | )
21 | protoc.main(
22 |     (
23 |         "",
24 |         "-I.",
25 |         "--python_out=../lib/grpc",
26 |         "--grpc_python_out=../lib/grpc",
27 |         "voice_server.proto",
28 |     )
29 | )
30 | 


--------------------------------------------------------------------------------
/proto/gpt_server.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package gpt_server;
 3 | 
 4 | message SetGptRequest {
 5 |   string text = 1;
 6 |   optional bool is_finish =2;
 7 | }
 8 | 
 9 | message SetGptReply {
10 |   bool success =1;
11 | }
12 | 
13 | message InterruptGptRequest {}
14 | 
15 | message InterruptGptReply {
16 |   bool success =1;
17 | }
18 | 
19 | message SendMotionRequest {}
20 | 
21 | message SendMotionReply {
22 |   bool success =1;
23 | }
24 | 
25 | service GptServerService {
26 |     rpc SetGpt(SetGptRequest)
27 |         returns (SetGptReply);
28 |     rpc InterruptGpt(InterruptGptRequest)
29 |         returns (InterruptGptReply);
30 |     rpc SendMotion(SendMotionRequest)
31 |         returns (SendMotionReply);
32 | }
33 | 


--------------------------------------------------------------------------------
/proto/speech_server.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package speech_server;
 3 | 
 4 | message ToggleSpeechRequest {
 5 |   bool enable =1;
 6 | }
 7 | 
 8 | message ToggleSpeechReply {
 9 |   bool success =1;
10 | }
11 | 
12 | service SpeechServerService {
13 |     rpc ToggleSpeech(ToggleSpeechRequest)
14 |         returns (ToggleSpeechReply);
15 | }
16 | 


--------------------------------------------------------------------------------
/proto/voice_server.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | package voice_server;
  3 | 
  4 | message SetTextRequest {
  5 |   string text = 1;
  6 | }
  7 | 
  8 | message SetTextReply {
  9 |   bool success =1;
 10 | }
 11 | 
 12 | message SetStyleBertVitsParamRequest {
 13 |   optional string model_name = 1;
 14 |   optional int32 model_id = 2;
 15 |   optional float length = 3;
 16 |   optional string style = 4;
 17 |   optional float style_weight = 5;
 18 | }
 19 | 
 20 | message SetStyleBertVitsParamReply {
 21 |   bool success =1;
 22 | }
 23 | 
 24 | message SetVoicevoxParamRequest {
 25 |   optional int32 speaker = 1;
 26 |   optional float speed_scale = 2;
 27 | }
 28 | 
 29 | message SetVoicevoxParamReply {
 30 |   bool success =1;
 31 | }
 32 | 
 33 | message SetAivisParamRequest {
 34 |   optional string speaker = 1;
 35 |   optional string style = 2;
 36 |   optional float speed_scale = 3;
 37 | }
 38 | 
 39 | message SetAivisParamReply {
 40 |   bool success =1;
 41 | }
 42 | 
 43 | message InterruptVoiceRequest {}
 44 | 
 45 | message InterruptVoiceReply {
 46 |   bool success =1;
 47 | }
 48 | 
 49 | message EnableVoicePlayRequest {
 50 | }
 51 | 
 52 | message EnableVoicePlayReply {
 53 |   bool success =1;
 54 | }
 55 | 
 56 | message DisableVoicePlayRequest {
 57 | }
 58 | 
 59 | message DisableVoicePlayReply {
 60 |   bool success =1;
 61 | }
 62 | 
 63 | message IsVoicePlayingRequest {}
 64 | 
 65 | message IsVoicePlayingReply {
 66 |   bool is_playing =1;
 67 | }
 68 | 
 69 | message SentenceEndRequest {}
 70 | 
 71 | message SentenceEndReply {
 72 |   bool success =1;
 73 | }
 74 | 
 75 | message StartHeadControlRequest {}
 76 | 
 77 | message StartHeadControlReply {
 78 |   bool success =1;
 79 | }
 80 | 
 81 | 
 82 | service VoiceServerService {
 83 |     rpc SetText(SetTextRequest)
 84 |         returns (SetTextReply);
 85 |     rpc SetStyleBertVitsParam(SetStyleBertVitsParamRequest)
 86 |         returns (SetStyleBertVitsParamReply);
 87 |     rpc SetVoicevoxParam(SetVoicevoxParamRequest)
 88 |         returns (SetVoicevoxParamReply);
 89 |     rpc SetAivisParam(SetAivisParamRequest)
 90 |         returns (SetAivisParamReply);
 91 |     rpc InterruptVoice(InterruptVoiceRequest)
 92 |         returns (InterruptVoiceReply);
 93 |     rpc EnableVoicePlay(EnableVoicePlayRequest)
 94 |         returns (EnableVoicePlayReply);
 95 |     rpc DisableVoicePlay(DisableVoicePlayRequest)
 96 |         returns (DisableVoicePlayReply);
 97 |     rpc IsVoicePlaying(IsVoicePlayingRequest)
 98 |         returns (IsVoicePlayingReply);
 99 |     rpc SentenceEnd(SentenceEndRequest)
100 |         returns (SentenceEndReply);
101 |     rpc StartHeadControl(StartHeadControlRequest)
102 |         returns (StartHeadControlReply);
103 | }
104 | 


--------------------------------------------------------------------------------
/pysen.toml:
--------------------------------------------------------------------------------
 1 | [tool.pysen]
 2 | version = "0.10"
 3 | 
 4 | [tool.pysen.lint]
 5 | enable_black = true
 6 | enable_flake8 = true
 7 | enable_isort = true
 8 | enable_mypy = true
 9 | mypy_preset = "strict"
10 | line_length = 88
11 | py_version = "py38"
12 | mypy_ignore_packages = ["akari_proto.*"]
13 | 
14 | [[tool.pysen.lint.mypy_targets]]
15 |   paths = [".", "lib/"]
16 | 
17 | [tool.pysen.lint.source]
18 |   includes = [".", "lib/"]
19 |   excludes = []
20 |   exclude_globs = [
21 |     "lib/grpc/",
22 |   ]
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | akari-client[depthai]
 2 | akari-proto
 3 | alkana
 4 | anthropic
 5 | google-cloud-speech
 6 | google-genai
 7 | google-generativeai
 8 | -e gpt-stream-json-parser/
 9 | grpcio
10 | grpcio-tools
11 | openai
12 | numpy
13 | pydantic>=2.0.0
14 | PyAudio
15 | PyJapanglish
16 | python-dotenv
17 | six
18 | SpeechRecognition
19 | 


--------------------------------------------------------------------------------
/script/faster_chatbot.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="voicevox_server" -- bash -ic "python3 voicevox_server.py --voicevox_local --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8"
28 | )
29 | 


--------------------------------------------------------------------------------
/script/faster_chatbot_aivis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="aivis_server" -- bash -ic "python3 aivis_server.py --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8"
28 | )
29 | 


--------------------------------------------------------------------------------
/script/faster_chatbot_aivis_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="aivis_server" -- bash -ic "python3 aivis_server.py --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8 --auto"
28 |  gnome-terminal --title="talk_controller" -- bash -ic "python3 talk_controller_client.py"
29 | )
30 | 


--------------------------------------------------------------------------------
/script/faster_chatbot_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="voicevox_server" -- bash -ic "python3 voicevox_server.py --voicevox_local --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8 --auto"
28 |  gnome-terminal --title="talk_controller" -- bash -ic "python3 talk_controller_client.py"
29 | )
30 | 


--------------------------------------------------------------------------------
/script/faster_chatbot_bert_vits.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="style_bert_vits_server" -- bash -ic "python3 style_bert_vits_server.py --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8"
28 | )
29 | 


--------------------------------------------------------------------------------
/script/faster_chatbot_bert_vits_auto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # -*- coding: utf-8 -*-
 3 | ## シェルオプション
 4 | set -e           # コマンド実行に失敗したらエラー
 5 | set -u           # 未定義の変数にアクセスしたらエラー
 6 | set -o pipefail  # パイプのコマンドが失敗したらエラー（bashのみ）
 7 | 
 8 | ip=$1
 9 | 
10 | echo ${ip}
11 | 
12 | #第２引数でakari_motion_serverのパスが記載されていた場合は、そちらも起動する。
13 | if [ "$#" -ge 2 ]; then
14 |     (
15 |     cd $2
16 |     . venv/bin/activate
17 |     gnome-terminal --title="motion_server" -- bash -ic "python3 server.py"
18 |     )
19 | fi
20 | 
21 | 
22 | (
23 | cd ../
24 |  . venv/bin/activate
25 |  gnome-terminal --title="style_bert_vits_server" -- bash -ic "python3 style_bert_vits_server.py --voice_host ${ip}"
26 |  gnome-terminal --title="gpt_publisher" -- bash -ic "python3 gpt_publisher.py"
27 |  gnome-terminal --title="speech_publisher" -- bash -ic "python3 speech_publisher.py --timeout 0.8 --auto"
28 |  gnome-terminal --title="talk_controller" -- bash -ic "python3 talk_controller_client.py"
29 | )
30 | 


--------------------------------------------------------------------------------
/speech_publisher.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | from concurrent import futures
  6 | 
  7 | import grpc
  8 | from lib.google_speech import get_db_thresh
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 11 | import motion_server_pb2
 12 | import motion_server_pb2_grpc
 13 | import speech_server_pb2
 14 | import speech_server_pb2_grpc
 15 | import voice_server_pb2
 16 | import voice_server_pb2_grpc
 17 | 
 18 | RATE = 16000
 19 | CHUNK = int(RATE / 10)  # 100ms
 20 | POWER_THRESH_DIFF = 20  # 周辺音量にこの値を足したものをpower_threshouldとする
 21 | enable_input = True
 22 | 
 23 | 
 24 | class SpeechServer(speech_server_pb2_grpc.SpeechServerServiceServicer):
 25 |     """
 26 |     音声入力の制御用のgRPCサーバ
 27 |     """
 28 | 
 29 |     def ToggleSpeech(
 30 |         self,
 31 |         request: speech_server_pb2.ToggleSpeechRequest,
 32 |         context: grpc.ServicerContext,
 33 |     ) -> speech_server_pb2.ToggleSpeechReply:
 34 |         global enable_input
 35 |         enable_input = request.enable
 36 |         return speech_server_pb2.ToggleSpeechReply(success=True)
 37 | 
 38 | 
 39 | def main() -> None:
 40 |     global enable_input
 41 |     parser = argparse.ArgumentParser()
 42 |     parser.add_argument(
 43 |         "--robot_ip", help="Robot ip address", default="127.0.0.1", type=str
 44 |     )
 45 |     parser.add_argument(
 46 |         "--robot_port", help="Robot port number", default="50055", type=str
 47 |     )
 48 |     parser.add_argument(
 49 |         "--gpt_ip", help="Gpt server ip address", default="127.0.0.1", type=str
 50 |     )
 51 |     parser.add_argument(
 52 |         "--gpt_port", help="Gpt server port number", default="10001", type=str
 53 |     )
 54 |     parser.add_argument(
 55 |         "--voice_ip", help="Voice server ip address", default="127.0.0.1", type=str
 56 |     )
 57 |     parser.add_argument(
 58 |         "--voice_port", help="Voice server port number", default="10002", type=str
 59 |     )
 60 |     parser.add_argument(
 61 |         "-t",
 62 |         "--timeout",
 63 |         type=float,
 64 |         default=0.5,
 65 |         help="Microphone input power timeout",
 66 |     )
 67 |     parser.add_argument(
 68 |         "-p",
 69 |         "--power_threshold",
 70 |         type=float,
 71 |         default=0,
 72 |         help="Microphone input power threshold",
 73 |     )
 74 |     parser.add_argument(
 75 |         "--progress_report_len",
 76 |         type=int,
 77 |         default=8,
 78 |         help="Send the progress of speech recognition if recognition word count over this number ",
 79 |     )
 80 |     parser.add_argument(
 81 |         "--no_motion",
 82 |         help="Not play nod motion",
 83 |         action="store_true",
 84 |     )
 85 |     parser.add_argument(
 86 |         "--auto",
 87 |         help="Skip keyboard input for speech recognition",
 88 |         action="store_true",
 89 |     )
 90 |     parser.add_argument(
 91 |         "--v2",
 92 |         action="store_true",
 93 |         help="Use google speech v2 instead of v1",
 94 |     )
 95 |     args = parser.parse_args()
 96 |     if args.v2:
 97 |         from lib.google_speech_v2_grpc import GoogleSpeechV2Grpc as GoogleSpeechGrpc
 98 |         from lib.google_speech_v2_grpc import (
 99 |             MicrophoneStreamV2Grpc as MicrophoneStreamGrpc,
100 |         )
101 |     else:
102 |         from lib.google_speech_grpc import GoogleSpeechGrpc as GoogleSpeechGrpc
103 |         from lib.google_speech_grpc import MicrophoneStreamGrpc as MicrophoneStreamGrpc
104 |     motion_server_host = None
105 |     motion_server_port = None
106 |     if not args.no_motion:
107 |         motion_server_host = args.robot_ip
108 |         motion_server_port = args.robot_port
109 |     timeout: float = args.timeout
110 |     power_threshold: float = args.power_threshold
111 | 
112 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
113 |     speech_server_pb2_grpc.add_SpeechServerServiceServicer_to_server(
114 |         SpeechServer(), server
115 |     )
116 |     port = "10003"
117 |     server.add_insecure_port("[::]:" + port)
118 |     server.start()
119 |     print(f"speech_server start. port: {port}")
120 | 
121 |     # grpc stubの設定
122 |     voice_channel = grpc.insecure_channel(args.voice_ip + ":" + args.voice_port)
123 |     voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
124 | 
125 |     google_speech_grpc = GoogleSpeechGrpc(
126 |         gpt_host=args.gpt_ip,
127 |         gpt_port=args.gpt_port,
128 |         voice_host=args.voice_ip,
129 |         voice_port=args.voice_port,
130 |     )
131 |     # power_threshouldが指定されていない場合、周辺音量を収録し、発話判定閾値を決定
132 |     if power_threshold == 0:
133 |         power_threshold = get_db_thresh() + POWER_THRESH_DIFF
134 |     print(f"power_threshold set to {power_threshold:.3f}db")
135 | 
136 |     while True:
137 |         responses = None
138 |         while not enable_input:
139 |             time.sleep(0.01)
140 |         with MicrophoneStreamGrpc(
141 |             rate=RATE,
142 |             chunk=CHUNK,
143 |             _timeout_thresh=timeout,
144 |             _db_thresh=power_threshold,
145 |             gpt_host=args.gpt_ip,
146 |             gpt_port=args.gpt_port,
147 |             voice_host=args.voice_ip,
148 |             voice_port=args.voice_port,
149 |             motion_server_host=motion_server_host,
150 |             motion_server_port=motion_server_port,
151 |         ) as stream:
152 |             if not args.auto:
153 |                 print("Enterを入力してから、マイクに話しかけてください")
154 |                 input()
155 |                 try:
156 |                     voice_stub.DisableVoicePlay(
157 |                         voice_server_pb2.DisableVoicePlayRequest()
158 |                     )
159 |                 except BaseException:
160 |                     pass
161 |             try:
162 |                 responses = stream.transcribe()
163 |             except BaseException:
164 |                 google_speech_grpc.interrupt()
165 |                 continue
166 |             if responses is not None:
167 |                 try:
168 |                     google_speech_grpc.listen_publisher_grpc(
169 |                         responses, progress_report_len=args.progress_report_len
170 |                     )
171 |                 except BaseException as e:
172 |                     print(e)
173 |                     google_speech_grpc.interrupt()
174 |                     continue
175 |         print("")
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     main()
180 | 


--------------------------------------------------------------------------------
/speech_to_text_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | RATE = 16000
 4 | CHUNK = int(RATE / 10)  # 100ms
 5 | POWER_THRESH_DIFF = 25  # 周辺音量にこの値を足したものをpower_threshouldとする
 6 | 
 7 | 
 8 | def main() -> None:
 9 |     global host
10 |     global port
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument(
13 |         "-t",
14 |         "--timeout",
15 |         type=float,
16 |         default=0.5,
17 |         help="Microphone input power timeout",
18 |     )
19 |     parser.add_argument(
20 |         "-p",
21 |         "--power_threshold",
22 |         type=float,
23 |         default=0,
24 |         help="Microphone input power threshold",
25 |     )
26 |     parser.add_argument(
27 |         "--v2",
28 |         action="store_true",
29 |         help="Use google speech v2 instead of v1",
30 |     )
31 |     args = parser.parse_args()
32 |     if args.v2:
33 |         from lib.google_speech_v2 import MicrophoneStreamV2 as MicrophoneStream
34 |         from lib.google_speech_v2 import get_db_thresh, listen_print_loop
35 |     else:
36 |         from lib.google_speech import MicrophoneStream, get_db_thresh, listen_print_loop
37 |     timeout: float = args.timeout
38 |     power_threshold: float = args.power_threshold
39 |     if power_threshold == 0:
40 |         power_threshold = get_db_thresh() + POWER_THRESH_DIFF
41 |     print(f"power_threshold set to {power_threshold:.3f}db")
42 | 
43 |     print("マイクに話しかけてください")
44 |     while True:
45 |         responses = None
46 |         with MicrophoneStream(
47 |             rate=RATE, chunk=CHUNK, _timeout_thresh=timeout, _db_thresh=power_threshold
48 |         ) as stream:
49 |             responses = stream.transcribe()
50 |             if responses is not None:
51 |                 listen_print_loop(responses)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/style_bert_vits_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from lib.style_bert_vits import TextToStyleBertVits
 4 | 
 5 | 
 6 | def main() -> None:
 7 |     host = ""
 8 |     port = ""
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument(
11 |         "--voice_host",
12 |         type=str,
13 |         default="127.0.0.1",
14 |         help="Voice server host",
15 |     )
16 |     parser.add_argument(
17 |         "--voice_port",
18 |         type=str,
19 |         default="5000",
20 |         help="Voice server port",
21 |     )
22 |     args = parser.parse_args()
23 |     host = args.voice_host
24 |     port = args.voice_port
25 |     text_to_voice = TextToStyleBertVits(host, port)
26 | 
27 |     # set_paramメソッドでモデル名や音声再生速度、感情スタイルなどを指定することができます。
28 |     # モデル名を指定
29 |     # text_to_voice.set_param(model_name='jvnv-F1-jp')
30 |     # 音声再生速度を指定
31 |     # text_to_voice.set_param(length=2.0)
32 |     # 感情スタイルを指定
33 |     # text_to_voice.set_param(style='Happy')
34 |     # 感情スタイルの重みを指定
35 |     # text_to_voice.set_param(style_weight=3.0)
36 | 
37 |     print("発話させたい文章をキーボード入力後、Enterを押してください。")
38 |     while True:
39 |         text = input("Input: ")
40 |         text_to_voice.put_text(
41 |             text=text,
42 |         )
43 |         print("")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     main()
48 | 


--------------------------------------------------------------------------------
/style_bert_vits_server.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | from concurrent import futures
  6 | from typing import Any
  7 | 
  8 | import grpc
  9 | from lib.style_bert_vits import TextToStyleBertVits
 10 | 
 11 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 12 | import voice_server_pb2
 13 | import voice_server_pb2_grpc
 14 | 
 15 | 
 16 | class VoiceServer(voice_server_pb2_grpc.VoiceServerServiceServicer):
 17 |     """
 18 |     StyleBertVitsにtextを送信し、音声を再生するgprcサーバ
 19 |     """
 20 | 
 21 |     def __init__(self, text_to_voice: Any) -> None:
 22 |         self.text_to_voice = text_to_voice
 23 | 
 24 |     def SetText(
 25 |         self,
 26 |         request: voice_server_pb2.SetTextRequest(),
 27 |         context: grpc.ServicerContext,
 28 |     ) -> voice_server_pb2.SetTextReply:
 29 |         # 即時再生しないようにis_playはFalseで実行
 30 |         print(f"Send text: {request.text}")
 31 |         self.text_to_voice.put_text(request.text, play_now=False)
 32 |         return voice_server_pb2.SetTextReply(success=True)
 33 | 
 34 |     def SetStyleBertVitsParam(
 35 |         self,
 36 |         request: voice_server_pb2.SetStyleBertVitsParamRequest(),
 37 |         context: grpc.ServicerContext,
 38 |     ) -> voice_server_pb2.SetStyleBertVitsParamReply:
 39 |         if request.model_name:
 40 |             self.text_to_voice.set_param(model_name=request.model_name)
 41 |         if request.length:
 42 |             self.text_to_voice.set_param(length=request.length)
 43 |         if request.style:
 44 |             self.text_to_voice.set_param(style=request.style)
 45 |         if request.style_weight:
 46 |             self.text_to_voice.set_param(style_weight=request.style_weight)
 47 |         return voice_server_pb2.SetStyleBertVitsParamReply(success=True)
 48 | 
 49 |     def SetVoicevoxParam(
 50 |         self,
 51 |         request: voice_server_pb2.SetVoicevoxParamRequest(),
 52 |         context: grpc.ServicerContext,
 53 |     ) -> voice_server_pb2.SetVoicevoxParamReply:
 54 |         print("SetVoicevoxParam is not supported on style_bert_vits_server.")
 55 |         return voice_server_pb2.SetVoicevoxParamReply(success=False)
 56 | 
 57 |     def SetAivisParam(
 58 |         self,
 59 |         request: voice_server_pb2.SetAivisParamRequest(),
 60 |         context: grpc.ServicerContext,
 61 |     ) -> voice_server_pb2.SetAivisParamReply:
 62 |         print("SetAivisParam is not supported on style_bert_vits_server.")
 63 |         return voice_server_pb2.SetAivisParamReply(success=False)
 64 | 
 65 |     def InterruptVoice(
 66 |         self,
 67 |         request: voice_server_pb2.InterruptVoiceRequest(),
 68 |         context: grpc.ServicerContext,
 69 |     ) -> voice_server_pb2.InterruptVoiceReply:
 70 |         while not self.text_to_voice.queue.empty():
 71 |             self.text_to_voice.queue.get()
 72 |         return voice_server_pb2.InterruptVoiceReply(success=True)
 73 | 
 74 |     def EnableVoicePlay(
 75 |         self,
 76 |         request: voice_server_pb2.EnableVoicePlayRequest(),
 77 |         context: grpc.ServicerContext,
 78 |     ) -> voice_server_pb2.EnableVoicePlayReply:
 79 |         self.text_to_voice.enable_voice_play()
 80 |         return voice_server_pb2.EnableVoicePlayReply(success=True)
 81 | 
 82 |     def DisableVoicePlay(
 83 |         self,
 84 |         request: voice_server_pb2.DisableVoicePlayRequest(),
 85 |         context: grpc.ServicerContext,
 86 |     ) -> voice_server_pb2.DisableVoicePlayReply:
 87 |         self.text_to_voice.disable_voice_play()
 88 |         return voice_server_pb2.DisableVoicePlayReply(success=True)
 89 | 
 90 |     def IsVoicePlaying(
 91 |         self,
 92 |         request: voice_server_pb2.IsVoicePlayingRequest(),
 93 |         context: grpc.ServicerContext,
 94 |     ) -> voice_server_pb2.IsVoicePlayingReply:
 95 |         return voice_server_pb2.IsVoicePlayingReply(
 96 |             is_playing=not self.text_to_voice.is_playing()
 97 |         )
 98 | 
 99 |     def SentenceEnd(
100 |         self,
101 |         request: voice_server_pb2.SentenceEndRequest(),
102 |         context: grpc.ServicerContext,
103 |     ) -> voice_server_pb2.SentenceEndReply:
104 |         self.text_to_voice.sentence_end()
105 |         return voice_server_pb2.SentenceEndReply(success=True)
106 | 
107 |     def StartHeadControl(
108 |         self,
109 |         request: voice_server_pb2.StartHeadControlRequest(),
110 |         context: grpc.ServicerContext,
111 |     ) -> voice_server_pb2.StartHeadControlReply:
112 |         self.text_to_voice.start_head_control()
113 |         return voice_server_pb2.StartHeadControlReply(success=False)
114 | 
115 | 
116 | def main() -> None:
117 |     parser = argparse.ArgumentParser()
118 |     parser.add_argument(
119 |         "--voice_host",
120 |         type=str,
121 |         default="127.0.0.1",
122 |         help="Style-Bert-VITS2 server host",
123 |     )
124 |     parser.add_argument(
125 |         "--voice_port",
126 |         type=str,
127 |         default="5000",
128 |         help="Style-Bert-VITS2 server port",
129 |     )
130 |     parser.add_argument(
131 |         "--robot_ip", help="Robot ip address", default="127.0.0.1", type=str
132 |     )
133 |     parser.add_argument(
134 |         "--robot_port", help="Robot port number", default="50055", type=str
135 |     )
136 |     parser.add_argument(
137 |         "--no_motion",
138 |         help="Not play nod motion",
139 |         action="store_true",
140 |     )
141 |     args = parser.parse_args()
142 | 
143 |     host = args.voice_host
144 |     port = args.voice_port
145 |     motion_server_host = None
146 |     motion_server_port = None
147 |     if not args.no_motion:
148 |         motion_server_host = args.robot_ip
149 |         motion_server_port = args.robot_port
150 |     text_to_voice = TextToStyleBertVits(
151 |         host=host,
152 |         port=port,
153 |         motion_host=motion_server_host,
154 |         motion_port=motion_server_port,
155 |     )
156 | 
157 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
158 |     voice_server_pb2_grpc.add_VoiceServerServiceServicer_to_server(
159 |         VoiceServer(text_to_voice), server
160 |     )
161 |     port = "10002"
162 |     server.add_insecure_port("[::]:" + port)
163 |     server.start()
164 |     print(f"voice_server start. port: {port}")
165 |     server.wait_for_termination()
166 | 
167 | 
168 | if __name__ == "__main__":
169 |     main()
170 | 


--------------------------------------------------------------------------------
/talk_controller_client.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | 
  6 | import grpc
  7 | 
  8 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
  9 | import speech_server_pb2
 10 | import speech_server_pb2_grpc
 11 | import voice_server_pb2
 12 | import voice_server_pb2_grpc
 13 | 
 14 | 
 15 | def main() -> None:
 16 |     global enable_input
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument(
 19 |         "--speech_ip", help="speech publisher ip address", default="127.0.0.1", type=str
 20 |     )
 21 |     parser.add_argument(
 22 |         "--speech_port", help="speech publisher port number", default="10003", type=str
 23 |     )
 24 |     parser.add_argument(
 25 |         "--voice_ip", help="Voice server ip address", default="127.0.0.1", type=str
 26 |     )
 27 |     parser.add_argument(
 28 |         "--voice_port", help="Voice server port number", default="10002", type=str
 29 |     )
 30 |     args = parser.parse_args()
 31 | 
 32 |     # grpc stubの設定
 33 |     speech_channel = grpc.insecure_channel(args.speech_ip + ":" + str(args.speech_port))
 34 |     voice_channel = grpc.insecure_channel(args.voice_ip + ":" + args.voice_port)
 35 |     voice_stub = None
 36 |     speech_stub = None
 37 |     # Voice serverの接続確認
 38 |     while True:
 39 |         try:
 40 |             grpc.channel_ready_future(voice_channel).result(timeout=0.5)
 41 |             voice_stub = voice_server_pb2_grpc.VoiceServerServiceStub(voice_channel)
 42 |             break
 43 |         except grpc.FutureTimeoutError:
 44 |             print("Connecting to voice server timeout. Retrying")
 45 |             continue
 46 |         except KeyboardInterrupt:
 47 |             return
 48 |         except BaseException as e:
 49 |             print(f"RPC error: {e}")
 50 |             continue
 51 |     print("Connected to voice server")
 52 |     # Speech serverの接続確認
 53 |     while True:
 54 |         try:
 55 |             grpc.channel_ready_future(speech_channel).result(timeout=0.5)
 56 |             speech_stub = speech_server_pb2_grpc.SpeechServerServiceStub(speech_channel)
 57 |             break
 58 |         except grpc.FutureTimeoutError:
 59 |             print("Connecting to speech server timeout. Retrying")
 60 |             continue
 61 |         except KeyboardInterrupt:
 62 |             return
 63 |         except BaseException as e:
 64 |             print(f"RPC error: {e}")
 65 |             continue
 66 |     print("Connected to speech server")
 67 |     is_voice_playing = False
 68 | 
 69 |     while True:
 70 |         if not is_voice_playing:
 71 |             try:
 72 |                 ret = voice_stub.IsVoicePlaying(
 73 |                     voice_server_pb2.IsVoicePlayingRequest()
 74 |                 )
 75 |                 is_voice_playing = ret.is_playing
 76 |             except KeyboardInterrupt:
 77 |                 return
 78 |             except BaseException:
 79 |                 print("Voice server connection error!")
 80 |             if is_voice_playing:
 81 |                 speech_stub.ToggleSpeech(
 82 |                     speech_server_pb2.ToggleSpeechRequest(enable=False)
 83 |                 )
 84 |         else:
 85 |             try:
 86 |                 ret = voice_stub.IsVoicePlaying(
 87 |                     voice_server_pb2.IsVoicePlayingRequest()
 88 |                 )
 89 |                 is_voice_playing = ret.is_playing
 90 |             except KeyboardInterrupt:
 91 |                 return
 92 |             except BaseException:
 93 |                 print("Voice server connection error!")
 94 |             if not is_voice_playing:
 95 |                 speech_stub.ToggleSpeech(
 96 |                     speech_server_pb2.ToggleSpeechRequest(enable=True)
 97 |                 )
 98 |         time.sleep(0.1)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/text_to_kana_example.py:
--------------------------------------------------------------------------------
 1 | from lib.en_to_jp import EnToJp
 2 | 
 3 | if __name__ == "__main__":
 4 |     en_to_jp = EnToJp()
 5 |     while True:
 6 |         text = input("文章中の英単語をカタカナに変換します。文章を入力してください。\n> ")
 7 |         print("alkana Only")
 8 |         print(f"    {en_to_jp.text_to_kana(text, True,False)}")
 9 |         print("japanglish Only")
10 |         print(f"    {en_to_jp.text_to_kana(text, False,True, False)}")
11 |         print("japanglish inference Only")
12 |         print(f"    {en_to_jp.text_to_kana(text, False,True, True)}")
13 |         print("alkana japanglish no inference")
14 |         print(f"    {en_to_jp.text_to_kana(text, True,True,False)}")
15 |         print("alkana japanglish inference")
16 |         print(f"    {en_to_jp.text_to_kana(text, True,True,True)}")
17 | 


--------------------------------------------------------------------------------
/voicevox_example.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | def main() -> None:
 5 |     host = ""
 6 |     port = ""
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument("--voicevox_local", action="store_true")
 9 |     parser.add_argument(
10 |         "--voice_host",
11 |         type=str,
12 |         default="127.0.0.1",
13 |         help="VoiceVox server host",
14 |     )
15 |     parser.add_argument(
16 |         "--voice_port",
17 |         type=str,
18 |         default="50021",
19 |         help="VoiceVox server port",
20 |     )
21 |     args = parser.parse_args()
22 |     if args.voicevox_local:
23 |         from lib.voicevox import TextToVoiceVox
24 | 
25 |         host = args.voice_host
26 |         port = args.voice_port
27 |         text_to_voice = TextToVoiceVox(host, port)
28 |         print("voicevox local pc ver.")
29 |     else:
30 |         from lib.conf import VOICEVOX_APIKEY
31 |         from lib.voicevox import TextToVoiceVoxWeb
32 | 
33 |         text_to_voice = TextToVoiceVoxWeb(apikey=VOICEVOX_APIKEY)
34 |         print("voicevox web ver.")
35 | 
36 |     print("発話させたい文章をキーボード入力後、Enterを押してください。")
37 |     while True:
38 |         text = input("Input: ")
39 |         text_to_voice.put_text(text)
40 |         print("")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/voicevox_server.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import sys
  4 | import time
  5 | from concurrent import futures
  6 | from typing import Any
  7 | 
  8 | import grpc
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(__file__), "lib/grpc"))
 11 | import voice_server_pb2
 12 | import voice_server_pb2_grpc
 13 | 
 14 | 
 15 | class VoiceServer(voice_server_pb2_grpc.VoiceServerServiceServicer):
 16 |     """
 17 |     Voicevoxにtextを送信し、音声を再生するgprcサーバ
 18 |     """
 19 | 
 20 |     def __init__(self, text_to_voice: Any) -> None:
 21 |         self.text_to_voice = text_to_voice
 22 | 
 23 |     def SetText(
 24 |         self,
 25 |         request: voice_server_pb2.SetTextRequest(),
 26 |         context: grpc.ServicerContext,
 27 |     ) -> voice_server_pb2.SetTextReply:
 28 |         # 即時再生しないようにis_playはFalseで実行
 29 |         print(f"Send text: {request.text}")
 30 |         self.text_to_voice.put_text(request.text, play_now=False)
 31 |         return voice_server_pb2.SetTextReply(success=True)
 32 | 
 33 |     def SetStyleBertVitsParam(
 34 |         self,
 35 |         request: voice_server_pb2.SetStyleBertVitsParamRequest(),
 36 |         context: grpc.ServicerContext,
 37 |     ) -> voice_server_pb2.SetStyleBertVitsParamReply:
 38 |         print("SetStyleBertVitsParam is not supported on voicevox_server.")
 39 |         return voice_server_pb2.SetStyleBertVitsParamReply(success=False)
 40 | 
 41 |     def SetVoicevoxParam(
 42 |         self,
 43 |         request: voice_server_pb2.SetVoicevoxParamRequest(),
 44 |         context: grpc.ServicerContext,
 45 |     ) -> voice_server_pb2.SetVoicevoxParamReply:
 46 |         if request.speaker:
 47 |             self.text_to_voice.set_param(speaker=request.speaker)
 48 |         if request.speed_scale:
 49 |             self.text_to_voice.set_param(speed_scale=request.speed_scale)
 50 |         return voice_server_pb2.SetVoicevoxParamReply(success=True)
 51 | 
 52 |     def SetAivisParam(
 53 |         self,
 54 |         request: voice_server_pb2.SetAivisParamRequest(),
 55 |         context: grpc.ServicerContext,
 56 |     ) -> voice_server_pb2.SetAivisParamReply:
 57 |         print("SetAivisParam is not supported on voicevox_server.")
 58 |         return voice_server_pb2.SetAivisParamReply(success=False)
 59 | 
 60 |     def InterruptVoice(
 61 |         self,
 62 |         request: voice_server_pb2.InterruptVoiceRequest(),
 63 |         context: grpc.ServicerContext,
 64 |     ) -> voice_server_pb2.InterruptVoiceReply:
 65 |         while not self.text_to_voice.queue.empty():
 66 |             self.text_to_voice.queue.get()
 67 |         return voice_server_pb2.InterruptVoiceReply(success=True)
 68 | 
 69 |     def EnableVoicePlay(
 70 |         self,
 71 |         request: voice_server_pb2.EnableVoicePlayRequest(),
 72 |         context: grpc.ServicerContext,
 73 |     ) -> voice_server_pb2.EnableVoicePlayReply:
 74 |         self.text_to_voice.enable_voice_play()
 75 |         return voice_server_pb2.EnableVoicePlayReply(success=True)
 76 | 
 77 |     def DisableVoicePlay(
 78 |         self,
 79 |         request: voice_server_pb2.DisableVoicePlayRequest(),
 80 |         context: grpc.ServicerContext,
 81 |     ) -> voice_server_pb2.DisableVoicePlayReply:
 82 |         self.text_to_voice.disable_voice_play()
 83 |         return voice_server_pb2.DisableVoicePlayReply(success=True)
 84 | 
 85 |     def IsVoicePlaying(
 86 |         self,
 87 |         request: voice_server_pb2.IsVoicePlayingRequest(),
 88 |         context: grpc.ServicerContext,
 89 |     ) -> voice_server_pb2.IsVoicePlayingReply:
 90 |         return voice_server_pb2.IsVoicePlayingReply(
 91 |             is_playing=not self.text_to_voice.is_playing()
 92 |         )
 93 | 
 94 |     def SentenceEnd(
 95 |         self,
 96 |         request: voice_server_pb2.SentenceEndRequest(),
 97 |         context: grpc.ServicerContext,
 98 |     ) -> voice_server_pb2.SentenceEndReply:
 99 |         self.text_to_voice.sentence_end()
100 |         return voice_server_pb2.SentenceEndReply(success=True)
101 | 
102 |     def StartHeadControl(
103 |         self,
104 |         request: voice_server_pb2.StartHeadControlRequest(),
105 |         context: grpc.ServicerContext,
106 |     ) -> voice_server_pb2.StartHeadControlReply:
107 |         self.text_to_voice.start_head_control()
108 |         return voice_server_pb2.StartHeadControlReply(success=False)
109 | 
110 | 
111 | def main() -> None:
112 |     parser = argparse.ArgumentParser()
113 |     parser.add_argument("--voicevox_local", action="store_true")
114 |     parser.add_argument(
115 |         "--voice_host",
116 |         type=str,
117 |         default="127.0.0.1",
118 |         help="VoiceVox server host",
119 |     )
120 |     parser.add_argument(
121 |         "--voice_port",
122 |         type=str,
123 |         default="50021",
124 |         help="VoiceVox server port",
125 |     )
126 |     parser.add_argument(
127 |         "--robot_ip", help="Robot ip address", default="127.0.0.1", type=str
128 |     )
129 |     parser.add_argument(
130 |         "--robot_port", help="Robot port number", default="50055", type=str
131 |     )
132 |     parser.add_argument(
133 |         "--no_motion",
134 |         help="Not play nod motion",
135 |         action="store_true",
136 |     )
137 |     args = parser.parse_args()
138 |     motion_server_host = None
139 |     motion_server_port = None
140 |     if not args.no_motion:
141 |         motion_server_host = args.robot_ip
142 |         motion_server_port = args.robot_port
143 |     if args.voicevox_local:
144 |         # local版の場合
145 |         from lib.voicevox import TextToVoiceVox
146 | 
147 |         text_to_voice = TextToVoiceVox(
148 |             host=args.voice_host,
149 |             port=args.voice_port,
150 |             motion_host=motion_server_host,
151 |             motion_port=motion_server_port,
152 |         )
153 |         print("voicevox local pc ver.")
154 |     else:
155 |         # web版の場合
156 |         from lib.conf import VOICEVOX_APIKEY
157 |         from lib.voicevox import TextToVoiceVoxWeb
158 | 
159 |         text_to_voice = TextToVoiceVoxWeb(
160 |             apikey=VOICEVOX_APIKEY,
161 |             motion_host=motion_server_host,
162 |             motion_port=motion_server_port,
163 |         )
164 |         print("voicevox web ver.")
165 | 
166 |     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
167 |     voice_server_pb2_grpc.add_VoiceServerServiceServicer_to_server(
168 |         VoiceServer(text_to_voice), server
169 |     )
170 |     port = "10002"
171 |     server.add_insecure_port("[::]:" + port)
172 |     server.start()
173 |     print(f"voice_server start. port: {port}")
174 |     server.wait_for_termination()
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     main()
179 | 


--------------------------------------------------------------------------------