├── README.md ├── recognize.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | Unpack software archive into some folder, e.g. C:\soundcard2txt 2 | 3 | Go to https://www.anaconda.com/download/ and choose Python 3.6 version, 64-Bit Graphical Installer 4 | or download directly: https://repo.continuum.io/archive/Anaconda3-5.0.1-Windows-x86_64.exe 5 | 6 | Run anaconda prompt, change dir to C:\soundcard2txt [c:] [cd \soundcard2txt], then run: 7 | ``` 8 | pip install -r requirements.txt 9 | ``` 10 | 11 | Detect the appropriate sound device, e.g., if we want to recognize audio from speakers, choose "Stereo Mix" device: 12 | ``` 13 | python recognize.py --list 14 | ``` 15 | 16 | Now we may run using chosen device and language, this will produce text recognition results file [recognition_log.txt]: 17 | ``` 18 | python recognize.py --device=3 --lang=ru-RU 19 | ``` 20 | 21 | Some unnecessary help available: 22 | ``` 23 | python recognize.py --help 24 | ``` 25 | 26 | List of supported languages and their codes: https://cloud.google.com/speech/docs/languages 27 | 28 | Recognition quality can be improved using paid Google, Microsoft, IBM or other services. 29 | -------------------------------------------------------------------------------- /recognize.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import pyaudio 4 | import speech_recognition as sr 5 | import threading 6 | import time 7 | 8 | recognizer = sr.Recognizer() 9 | pa = pyaudio.PyAudio() 10 | 11 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 12 | parser.add_argument('--lang', type=str, default='en-US', 13 | help='language to recognize, en-US, ru-RU, fi-FI or any other supported') 14 | parser.add_argument('--buf', type=int, default=200, 15 | help='buffer size to recognize') 16 | parser.add_argument('--rate', type=int, default=48000, 17 | help='audio sampling rate') 18 | parser.add_argument('--device', type=int, default=0, 19 | help='input device number') 20 | parser.add_argument('--list', action='store_true', help='list audio devices and their numbers') 21 | args = parser.parse_args() 22 | if args.list: 23 | for i in range(0, pa.get_device_count()): 24 | print(i, " - ", pa.get_device_info_by_index(i)['name']) 25 | exit() 26 | 27 | audio_rate = args.rate 28 | stream_buf = bytes() 29 | stream_counter = 0 30 | 31 | 32 | def recognize(stream_text): 33 | global args 34 | 35 | def logger(s): 36 | f = open('recognition_log.txt', 'a+', encoding='utf-8') 37 | f.write(datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] ")) 38 | f.write(s) 39 | f.write("\x0A") 40 | f.close() 41 | 42 | audio_data = sr.AudioData(stream_text, audio_rate, 2) 43 | try: 44 | # result = recognizer.recognize_sphinx(audio_data) 45 | result = recognizer.recognize_google(audio_data, language=args.lang) 46 | print(result) 47 | logger(result) 48 | except sr.UnknownValueError: 49 | pass 50 | except sr.RequestError as e: 51 | print("Could not request results from GSR service; {0}".format(e)) 52 | 53 | 54 | def stream_audio(data): 55 | global args 56 | global stream_buf 57 | global stream_counter 58 | 59 | if stream_counter < args.buf: 60 | stream_buf += data 61 | stream_counter += 1 62 | else: 63 | threading.Thread(target=recognize, args=(stream_buf,)).start() 64 | stream_buf = bytes() 65 | stream_counter = 0 66 | 67 | 68 | def callback(in_data, frame_count, time_info, status): 69 | stream_audio(in_data) 70 | return (None, pyaudio.paContinue) 71 | 72 | 73 | stream = pa.open(format=pyaudio.paInt16, channels=1, rate=audio_rate, input=True, stream_callback=callback, 74 | input_device_index=args.device) 75 | stream.start_stream() 76 | while stream.is_active(): time.sleep(0.1) 77 | stream.stop_stream() 78 | stream.close() 79 | pa.terminate() 80 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy 3 | pyaudio 4 | speech_recognition 5 | --------------------------------------------------------------------------------