├── .gitignore
├── LICENSE
├── README.md
├── README_chinese.md
├── googlesr.py
├── googlesr_gui_chinese.py
├── googlesr_gui_english.py
├── images
    ├── 1.png
    ├── 2.png
    ├── 3.png
    └── 4.png
├── requirements.txt
└── subtitleListener.cs


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Quei-An Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Unity_live_caption
 2 | 
 3 | [繁體中文解說看這裡](README_chinese.md)
 4 | 
 5 | Use Google Speech-to-Text API to do real-time live stream caption on Unity! Best when combined with your virtual character!
 6 | 
 7 | This is part of the [OpenVTuberProject](https://github.com/kwea123/OpenVTuberProject), which provides many toolkits for becoming a VTuber.
 8 | 
 9 | **Important notice before you continue : The speech to text API is NOT free! The pricing guide is [here](https://cloud.google.com/speech-to-text/pricing).**
10 | 
11 | The [youtube livestream](https://www.youtube.com/watch?v=AZsUm_cuj9U) that demos and explains how this works (explanation in Chinese, caption in Chinese/Japanese/English/French).
12 | 
13 | Currently, the live caption is done in `python` and the result is sent to unity in real time. There might be a way to do everything in `C#`, maybe [this](https://github.com/GoogleCloudPlatform/dotnet-docs-samples/tree/master/speech/api) but I did in `python` because of some reasons:
14 | 1.  I'm not fluent in C#.
15 | 2.  Doing speech recognition in another program allows to start/turn off the recognition at any time, and also allows to change the language at wish without restarting unity `.exe`.
16 | 3.  There is already an asset which claims that it can do this (I don't know if it can do real time recognition though).
17 | 
18 | ## Pre-requisite
19 | 
20 | As this process uses Google Cloud API, you need to have an google account.
21 | 
22 | Follow the [website](https://cloud.google.com/speech-to-text/) to activate the `Speech-to-Text` API in the console, and download the API key, which should be a `.json` file. I will refer this key to be `key.json` in the following.
23 | 
24 | Next, there are command line (CLI) version and GUI versions of this program. The code is the same but there are some performance differences:
25 | 
26 | CLI: file size is small and allows more customization.
27 | 
28 | GUI: file size is large (about 250MB) and takes some time to warm-up the speech to text program.
29 | 
30 | Here is the tutorial of command line usage. For GUI users, please jump to [here](#GUI-usage).
31 | 
32 | ## Installation
33 | 
34 | Make sure you have python. If not, installation is recommended via [Anaconda](https://www.anaconda.com/distribution/) with python version **3.6** (if you use other versions, you need to manually download and install `pyaudio` from [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio)).
35 | 
36 | Run `pip install -r requirements.txt` to install python dependencies.
37 | 
38 | ## Usage
39 | 
40 | 1.  Test if speech recognition works in python:
41 |     1. Change [here](https://github.com/kwea123/Unity_live_caption/blob/master/googlesr.py#L9) to where your `key.json` is located.
42 |     2. Run `python googlesr.py --debug --lang_code={YOUR LANGUAGE CODE}`. For the language codes, check [here](https://cloud.google.com/speech-to-text/docs/languages). You should see the recognition output on the console.
43 | 
44 | 2.  Output the recognition result to unity:
45 |     1.  Create a Text component via `GameObject->UI->Text`.
46 |     2.  Attach `subtitleListener.cs` to it.
47 |     3.  Run the unity program **FIRST**, either in editor or executable, then run `python googlesr.py --lang_code={YOUR LANGUAGE CODE} --connect`. You should see the recognition output now in unity. You can stop and restart the recognition anytime by pressing `Ctrl` and `c` in the python console without affecting the unity program at all.
48 |     
49 | 3.  **Remember to stop the python program when you finish the work, otherwise it is going to keep charging you! I disclaim any reponsibility of the induced charges by using my program.**
50 |     
51 | ## Customization
52 | 
53 | 1.  You can change the connection port by changing the port number (default 5067) [here](https://github.com/kwea123/Unity_live_caption/blob/master/googlesr.py#L127) and [here](https://github.com/kwea123/Unity_live_caption/blob/master/subtitleListener.cs#L18)
54 | 
55 | 2.  You can change how the text is printed on unity [here](https://github.com/kwea123/Unity_live_caption/blob/master/subtitleListener.cs#L73-L79) and [here](https://github.com/kwea123/Unity_live_caption/blob/master/subtitleListener.cs#L36-L39). The default is configured to print at most 32 characters in Chinese, so you might need to change if you're not using Chinese.
56 | 
57 | ## GUI usage
58 | 
59 | 1.  Download `googlesr_gui_english.zip` from [here](https://github.com/kwea123/Unity_live_caption/releases/tag/v1.0).
60 | 
61 | 2.  Open `googlesr_gui_english.exe` and you will see
62 | 
63 | ![alt](images/1.png)
64 | 
65 | 3.  Select your language, set the API key to where you downloaded `key.json` and select whether to connect to unity and/or print to console (if you want to connect to unity, please see the second point [here](#Usage)).
66 | 
67 | 4.  Press Start to start. It takes some time to warm-up. When it's ready, you will see the following and you can start to talk. You can adjust the size of this window.
68 | 
69 | ![alt](images/2.png)
70 | 
71 | 5.  Press `Ctrl` and `c` to stop the program when you finish.
72 | 
73 | 6.  **Remember to stop the program when you finish the work, otherwise it is going to keep charging you! I disclaim any reponsibility of the induced charges by using my program.**
74 | 
75 | ## Other issues
76 | Please ask in [issue](https://github.com/kwea123/Unity_live_caption/issues)
77 | 
78 | 


--------------------------------------------------------------------------------
/README_chinese.md:
--------------------------------------------------------------------------------
 1 | # Unity_live_caption
 2 | 利用 Google Speech-to-Text API 和 Unity 來做實時直播上字幕！ 可以跟你的虛擬腳色很好的搭配使用！ 真人的youtuber當然也可以！
 3 | 
 4 | **重要訊息 : 這個API是要收費的！ 收費規則在[這裡](https://cloud.google.com/speech-to-text/pricing).**
 5 | 
 6 | 這是我 [youtube直播](https://www.youtube.com/watch?v=AZsUm_cuj9U) 演示的結果。
 7 | 
 8 | ## 前置作業
 9 | 
10 | 你要先有google帳號。
11 | 
12 | 根據[官網](https://cloud.google.com/speech-to-text/)的指示，在主控台啟用`Speech-to-Text` API，並且下載API金鑰(會是一個`.json`檔)。
13 | 
14 | 中文的話我只介紹圖形介面(GUI)怎麼使用，如果想從命令行執行的話，請參照英文的[README](README.md)。
15 |     
16 | ## GUI使用方法
17 | 
18 | 1.  從[這裡](https://github.com/kwea123/Unity_live_caption/releases/tag/v1.0)下載 `googlesr_gui_chinese.zip`
19 | 
20 | 2.  打開 `googlesr_gui_chinese.exe` 然後你會看到
21 | 
22 | ![alt](images/3.png)
23 | 
24 | 3.  選擇語言，設置剛才下載的API金鑰並且選擇是否連結到unity或者要不要在螢幕上直接顯示字幕。要連結到unity的話，請看[unity設置](#unity設置)
25 | 
26 | 4.  按開始。他會需要一些時間啟動。啟動完成後你會看到以下的圖片(如果你有按「字幕顯示在螢幕」的話)，然後就可以開始講話了。
27 | 
28 | ![alt](images/4.png)
29 | 
30 | 5.  同時按 `Ctrl` 和 `c` 來結束錄音。
31 | 
32 | 6.  **請務必絕對一定要按結束，不然他會持續算錢！免責聲明：本人不負責任何正當或非正當使用本程式所帶來的任何費用！使用者皆視為同意我的免責聲明。**
33 | 
34 | ## unity設置
35 | 
36 | 1.  先設置一個Text component (`GameObject->UI->Text`)
37 | 2.  在那個Text component上加上這裡的`subtitleListener.cs`
38 | 3.  執行unity(在編輯器或是執行檔都可以)之後，他就會開始聆聽傳過去的字幕，然後這時再開始GUI的錄音即可。
39 | 
40 | ## 其他問題
41 | 可以在[這裡](https://github.com/kwea123/Unity_live_caption/issues)詢問
42 | 
43 | 


--------------------------------------------------------------------------------
/googlesr.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | from google.cloud import speech
  3 | from translatepy import Translator
  4 | from pyaudio import PyAudio, paInt16, paContinue
  5 | from six.moves.queue import Queue, Empty
  6 | from sys import stdout
  7 | import socket
  8 | import time
  9 | 
 10 | from os import environ
 11 | environ['GOOGLE_APPLICATION_CREDENTIALS'] = \
 12 |     'C:/Users/kwea1/Downloads/youtubeapi-329002-9f71c84a7e37.json'
 13 | 
 14 | # Audio recording parameters
 15 | RATE = 44100
 16 | CHUNK = RATE//1000  # 100ms
 17 | TRANSLATION_INTERVAL = 1 # second
 18 | 
 19 | class MicrophoneStream:
 20 |     """Opens a recording stream as a generator yielding the audio chunks."""
 21 |     def __init__(self, rate, chunk):
 22 |         self._rate = rate
 23 |         self._chunk = chunk
 24 | 
 25 |         # Create a thread-safe buffer of audio data
 26 |         self._buff = Queue()
 27 |         self.closed = True
 28 | 
 29 |     def __enter__(self):
 30 |         self._audio_interface = PyAudio()
 31 |         self._audio_stream = self._audio_interface.open(
 32 |             format=paInt16,
 33 |             # The API currently only supports 1-channel (mono) audio
 34 |             # https://goo.gl/z757pE
 35 |             channels=1, rate=self._rate,
 36 |             input=True, frames_per_buffer=self._chunk,
 37 |             # Run the audio stream asynchronously to fill the buffer object.
 38 |             # This is necessary so that the input device's buffer doesn't
 39 |             # overflow while the calling thread makes network requests, etc.
 40 |             stream_callback=self._fill_buffer,
 41 |         )
 42 | 
 43 |         self.closed = False
 44 | 
 45 |         return self
 46 | 
 47 |     def __exit__(self, type, value, traceback):
 48 |         self._audio_stream.stop_stream()
 49 |         self._audio_stream.close()
 50 |         self.closed = True
 51 |         # Signal the generator to terminate so that the client's
 52 |         # streaming_recognize method will not block the process termination.
 53 |         self._buff.put(None)
 54 |         self._audio_interface.terminate()
 55 | 
 56 |     def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
 57 |         """Continuously collect data from the audio stream, into the buffer."""
 58 |         self._buff.put(in_data)
 59 |         return None, paContinue
 60 | 
 61 |     def generator(self):
 62 |         while not self.closed:
 63 |             # Use a blocking get() to ensure there's at least one chunk of
 64 |             # data, and stop iteration if the chunk is None, indicating the
 65 |             # end of the audio stream.
 66 |             chunk = self._buff.get()
 67 |             if chunk is None: return
 68 |             data = [chunk]
 69 | 
 70 |             # Now consume whatever other data's still buffered.
 71 |             while True:
 72 |                 try:
 73 |                     chunk = self._buff.get(block=False)
 74 |                     if chunk is None: return
 75 |                     data.append(chunk)
 76 |                 except Empty: break
 77 | 
 78 |             yield b''.join(data)
 79 | 
 80 | def listen_print_loop(sp_responses,
 81 |                       tr_client,
 82 |                       tgt_lang_code,
 83 |                       print_locally=True,
 84 |                       sock=None):
 85 |     global last_t
 86 |     num_chars_printed = 0
 87 |     for sp_response in sp_responses:
 88 |         if not sp_response.results: continue
 89 | 
 90 |         # The `results` list is consecutive. For streaming, we only care about
 91 |         # the first result being considered, since once it's `is_final`, it
 92 |         # moves on to considering the next utterance.
 93 |         result = sp_response.results[0]
 94 |         if not result.alternatives: continue
 95 | 
 96 |         # Display the transcription of the top alternative.
 97 |         transcript = result.alternatives[0].transcript
 98 |         t = time.time()
 99 |         if t-last_t>TRANSLATION_INTERVAL and tr_client is not None: # translate
100 |             # add a prefix '@' indicating this is translation
101 |             transcript = '@'+tr_client.translate(transcript, tgt_lang_code).result
102 |             last_t = t
103 | 
104 |         if sock is not None:
105 |             sock.send(bytes(transcript, "utf-8"))
106 |         
107 |         if print_locally: # print the result on the console.
108 |             # If the previous result was longer than this one, we need to print
109 |             # some extra spaces to overwrite the previous result
110 |             overwrite_chars = ' ' * (num_chars_printed - len(transcript))
111 |             if not result.is_final:
112 |                 if tr_client is None or \
113 |                    (tr_client is not None and transcript[0]=='@'):
114 |                     stdout.write(transcript + overwrite_chars + '\r')
115 |                     stdout.flush()
116 |                     num_chars_printed = len(transcript)
117 |             else:
118 |                 if tr_client is not None and transcript[0]!='@': # translate
119 |                     # add a prefix '@' indicating this is translation
120 |                     transcript = '@'+tr_client.translate(transcript, tgt_lang_code).result
121 |                 last_t = time.time()
122 |                 print(transcript + overwrite_chars)
123 |                 num_chars_printed = 0
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     parser = ArgumentParser()
128 |     parser.add_argument("--debug", default=False, action="store_true", 
129 |                         help="show speech recognition result on the console")
130 |     parser.add_argument("--connect", default=False, action="store_true", 
131 |                         help="connect to unity")
132 |     parser.add_argument("--src_lang_code", type=str, default="zh-tw",
133 |                         help="the language code of the speech language")
134 |     parser.add_argument("--tgt_lang_code", type=str, default="en",
135 |                         help="""the language code of the language you want to translate to.
136 |                         Set to empty string to disable translation.""")
137 |     args = parser.parse_args()
138 | 
139 |     if args.connect:
140 |         address = ('127.0.0.1', 5067)
141 |         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
142 |         sock.connect(address)
143 |     else:
144 |         sock = None
145 | 
146 |     sp_client = speech.SpeechClient()
147 |     config = speech.RecognitionConfig(
148 |                 encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
149 |                 sample_rate_hertz=RATE,
150 |                 enable_automatic_punctuation=True,
151 |                 language_code=args.src_lang_code)
152 |     streaming_config = \
153 |         speech.StreamingRecognitionConfig(config=config, interim_results=True)
154 | 
155 |     if args.tgt_lang_code != "":
156 |         tr_client = Translator()
157 |     else:
158 |         tr_client = None
159 | 
160 |     print(f"{args.src_lang_code} recognition, {args.tgt_lang_code} translation started!")
161 |     last_t = time.time()
162 |     while True:
163 |         with MicrophoneStream(RATE, CHUNK) as stream:
164 |             audio_generator = stream.generator()
165 |             requests = (speech.StreamingRecognizeRequest(audio_content=content)
166 |                         for content in audio_generator)
167 |             try:
168 |                 sp_responses = sp_client.streaming_recognize(streaming_config, requests)
169 |                 listen_print_loop(sp_responses, tr_client, args.tgt_lang_code,
170 |                                   print_locally=args.debug, sock=sock)
171 |             except KeyboardInterrupt:
172 |                 break
173 |             except: # ignore "400 Exceeded maximum allowed stream duration of 305 seconds."
174 |                 continue
175 | 
176 |     if sock is not None:
177 |         sock.close()


--------------------------------------------------------------------------------
/googlesr_gui_chinese.py:
--------------------------------------------------------------------------------
 1 | from tkinter import Tk, Label, StringVar, OptionMenu, \
 2 |                     Button, LEFT, filedialog, IntVar, \
 3 |                     Checkbutton
 4 | from os import system, getcwd
 5 | 
 6 | LANG_CODE_DICT = {u"中文(繁體)": "zh-tw", 
 7 |                   u"中文(简体)": "zh-cn",
 8 |                   u"English"   : "en", 
 9 |                   u"日本語"    : "ja"}
10 | 
11 | def convert_lang(lang_code):
12 |     return LANG_CODE_DICT[lang_code]
13 |     
14 | def execute():
15 |     if file_name.get() == '' or file_name.get() == u'請選取API金鑰！':
16 |         file_name.set(u'請選取API金鑰！')
17 |         file_name_label.config(fg="red")
18 |     else:
19 |         lang = convert_lang(lang_code.get())
20 |         cmd_string = 'start cmd /c "set GOOGLE_APPLICATION_CREDENTIALS=%s'%file_name.get() + \
21 |                      ' & googlesr.exe'
22 |         cmd_string += " --lang_code=%s"%lang
23 |         if debug_int.get() == 1:
24 |             cmd_string += " --debug"
25 |         if connect_int.get() == 1:
26 |             cmd_string += " --connect"
27 |         
28 |         cmd_string+='"' # to terminate the command
29 |         system(cmd_string)
30 |     
31 | def choose_file():
32 |     file_name.set('')
33 |     file_name_label.config(fg="black")
34 |     file_name_ = filedialog.askopenfilename(initialdir = getcwd(), 
35 |                                             title = "選取API金鑰", 
36 |                                             filetypes = (("json files","*.json"),("all files","*.*")))
37 |     file_name.set(file_name_)
38 | 
39 | root = Tk()
40 | root.title(u"自動語音轉錄文字")
41 | root.geometry("300x240+200+200")
42 | root.resizable(False, False)
43 | 
44 | lang_label = Label(text=u'選取語音語言')
45 | lang_label.config(font=("Courier", 12))
46 | lang_label.place(x=30, y=20)
47 | lang_code = StringVar(root)
48 | lang_code.set(u"中文(繁體)")
49 | lang = OptionMenu(root, lang_code, *LANG_CODE_DICT.keys())
50 | lang.config(font=("Courier", 12), width=8)
51 | lang.place(x=20, y=50)
52 | 
53 | file_button = Button(text=u'選取API金鑰', command=choose_file)
54 | file_button.config(font=("Courier", 12))
55 | file_button.place(x=24, y=110)
56 | file_name = StringVar()
57 | file_name.set('')
58 | file_name_label = Label(root, textvariable=file_name, 
59 |                            justify=LEFT, wraplengt=250)
60 | file_name_label.place(x=20, y=150)
61 | 
62 | connect_int = IntVar()
63 | connect_check = Checkbutton(root, text=u"連接至unity\n(5067號埠)", 
64 |                             variable=connect_int,
65 |                             onvalue=1, offvalue=0)
66 | connect_check.config(font=("Courier", 12))
67 | connect_check.place(x=160, y=5)
68 | 
69 | debug_int = IntVar()
70 | debug_check = Checkbutton(root, text=u"字幕顯示在螢幕", 
71 |                             variable=debug_int,
72 |                             onvalue=1, offvalue=0)
73 | debug_check.config(font=("Courier", 12))
74 | debug_check.place(x=160, y=50)
75 | 
76 | file_button = Button(text=u'開始', command=execute)
77 | file_button.config(font=("Courier", 25))
78 | file_button.place(x=160, y=80)
79 | 
80 | root.mainloop()


--------------------------------------------------------------------------------
/googlesr_gui_english.py:
--------------------------------------------------------------------------------
 1 | from tkinter import Tk, Label, StringVar, OptionMenu, \
 2 |                     Button, LEFT, filedialog, IntVar, \
 3 |                     Checkbutton
 4 | from os import system, getcwd
 5 | 
 6 | LANG_CODE_DICT = {u"中文(繁體)": "zh-tw", 
 7 |                   u"中文(简体)": "zh-cn",
 8 |                   u"English"   : "en", 
 9 |                   u"日本語"    : "ja"}
10 | 
11 | def convert_lang(lang_code):
12 |     return LANG_CODE_DICT[lang_code]
13 |     
14 | def execute():
15 |     if file_name.get() == '' or file_name.get() == u'Select API key!':
16 |         file_name.set(u'Select API key!')
17 |         file_name_label.config(fg="red")
18 |     else:
19 |         lang = convert_lang(lang_code.get())
20 |         cmd_string = 'start cmd /c "set GOOGLE_APPLICATION_CREDENTIALS=%s'%file_name.get() + \
21 |                      ' & googlesr.exe'
22 |         cmd_string += " --lang_code=%s"%lang
23 |         if debug_int.get() == 1:
24 |             cmd_string += " --debug"
25 |         if connect_int.get() == 1:
26 |             cmd_string += " --connect"
27 |         
28 |         cmd_string+='"' # to terminate the command
29 |         system(cmd_string)
30 |     
31 | def choose_file():
32 |     file_name.set('')
33 |     file_name_label.config(fg="black")
34 |     file_name_ = filedialog.askopenfilename(initialdir = getcwd(), 
35 |                                             title = "Select API key", 
36 |                                             filetypes = (("json files","*.json"),("all files","*.*")))
37 |     file_name.set(file_name_)
38 | 
39 | root = Tk()
40 | root.title(u"Auto Speech2Text")
41 | root.geometry("300x240+200+200")
42 | root.resizable(False, False)
43 | 
44 | lang_label = Label(text=u'Select audio language')
45 | # lang_label.config(font=("Courier", 12))
46 | lang_label.place(x=30, y=20)
47 | lang_code = StringVar(root)
48 | lang_code.set(u"中文(繁體)")
49 | lang = OptionMenu(root, lang_code, *LANG_CODE_DICT.keys())
50 | # lang.config(font=("Courier", 12), width=8)
51 | lang.place(x=20, y=50)
52 | 
53 | file_button = Button(text=u'Select API key', command=choose_file)
54 | # file_button.config(font=("Courier", 12))
55 | file_button.place(x=24, y=110)
56 | file_name = StringVar()
57 | file_name.set('')
58 | file_name_label = Label(root, textvariable=file_name, 
59 |                            justify=LEFT, wraplengt=250)
60 | file_name_label.place(x=20, y=150)
61 | 
62 | connect_int = IntVar()
63 | connect_check = Checkbutton(root, text=u"connect to unity\n(port 5067)", 
64 |                             variable=connect_int,
65 |                             onvalue=1, offvalue=0)
66 | # connect_check.config(font=("Courier", 12))
67 | connect_check.place(x=160, y=5)
68 | 
69 | debug_int = IntVar()
70 | debug_check = Checkbutton(root, text=u"Display in console", 
71 |                             variable=debug_int,
72 |                             onvalue=1, offvalue=0)
73 | # debug_check.config(font=("Courier", 12))
74 | debug_check.place(x=160, y=50)
75 | 
76 | file_button = Button(text=u'Start', command=execute)
77 | file_button.config(font=("Courier", 25))
78 | file_button.place(x=160, y=80)
79 | 
80 | root.mainloop()


--------------------------------------------------------------------------------
/images/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kwea123/Unity_live_caption/99dfea98cea9d0d2babbfe6e264e3db944447dfd/images/1.png


--------------------------------------------------------------------------------
/images/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kwea123/Unity_live_caption/99dfea98cea9d0d2babbfe6e264e3db944447dfd/images/2.png


--------------------------------------------------------------------------------
/images/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kwea123/Unity_live_caption/99dfea98cea9d0d2babbfe6e264e3db944447dfd/images/3.png


--------------------------------------------------------------------------------
/images/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kwea123/Unity_live_caption/99dfea98cea9d0d2babbfe6e264e3db944447dfd/images/4.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | google-cloud-speech==2.13.1
2 | translatepy==2.3
3 | pyaudio


--------------------------------------------------------------------------------
/subtitleListener.cs:
--------------------------------------------------------------------------------
  1 | ﻿using UnityEngine;
  2 | using UnityEngine.UI;
  3 | using System.Threading;
  4 | using System.Net.Sockets;
  5 | using System.Net;
  6 | using System;
  7 | using System.Text;
  8 | using System.Collections;
  9 | 
 10 | public class subtitleListener : MonoBehaviour
 11 | {
 12 |     private Text subtitle;
 13 | 
 14 |     // Thread
 15 |     Thread receiveThread;
 16 |     TcpClient client;
 17 |     TcpListener listener;
 18 |     int port = 5067;
 19 |     string receivedText = "", prevText = "";
 20 |     int sameCount = 0;
 21 | 
 22 |     // Start is called before the first frame update
 23 |     void Start()
 24 |     {
 25 |         subtitle = GetComponent<Text>();
 26 | 
 27 |         InitTCP();
 28 |     }
 29 | 
 30 |     // Update is called once per frame
 31 |     void Update()
 32 |     {
 33 |         if (prevText.Equals(receivedText))
 34 |         {
 35 |             sameCount++;
 36 |             if (sameCount == 60) // if no speech for two seconds, erase the subtitle.
 37 |             {
 38 |                 receivedText = "";
 39 |             }
 40 |         } else
 41 |         {
 42 |             sameCount = 0;
 43 |         }
 44 |         subtitle.text = receivedText;
 45 |         prevText = receivedText;
 46 |     }
 47 | 
 48 |     private void InitTCP()
 49 |     {
 50 |         receiveThread = new Thread(new ThreadStart(ReceiveData));
 51 |         receiveThread.IsBackground = true;
 52 |         receiveThread.Start();
 53 |     }
 54 | 
 55 |     private void ReceiveData()
 56 |     {
 57 |         try
 58 |         {
 59 |             listener = new TcpListener(IPAddress.Parse("127.0.0.1"), port);
 60 |             listener.Start();
 61 |             Byte[] bytes = new Byte[1024];
 62 |             while (true)
 63 |             {
 64 |                 using (client = listener.AcceptTcpClient())
 65 |                 {
 66 |                     using (NetworkStream stream = client.GetStream())
 67 |                     {
 68 |                         int length;
 69 |                         while ((length = stream.Read(bytes, 0, bytes.Length)) != 0)
 70 |                         {
 71 |                             var incommingData = new byte[length];
 72 |                             Array.Copy(bytes, 0, incommingData, 0, length);
 73 |                             receivedText = Encoding.UTF8.GetString(incommingData);
 74 |                             if (receivedText.Length > 32) // if the text is longer than 32 words,
 75 |                                                           // cut the subtitle and print in new
 76 |                                                           // line.
 77 |                             {
 78 |                                 int startIndex = 32 * (receivedText.Length / 32);
 79 |                                 receivedText = receivedText.Substring(startIndex);
 80 |                             }
 81 |                         }
 82 |                     }
 83 |                 }
 84 |             }
 85 |         }
 86 |         catch (Exception e)
 87 |         {
 88 |             print(e.ToString());
 89 |         }
 90 |     }
 91 | 
 92 |     void OnApplicationQuit()
 93 |     {
 94 |         try
 95 |         {
 96 |             receiveThread.Abort();
 97 |             client.Close();
 98 |             listener.Stop();
 99 |         }
100 |         catch (Exception e)
101 |         {
102 |             Debug.Log(e.Message);
103 |         }
104 | 
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------