├── README.md
├── recognize.py
└── requirements.txt


/README.md:
--------------------------------------------------------------------------------
 1 | Unpack software archive into some folder, e.g. C:\soundcard2txt
 2 | 
 3 | Go to https://www.anaconda.com/download/ and choose Python 3.6 version, 64-Bit Graphical Installer
 4 | or download directly: https://repo.continuum.io/archive/Anaconda3-5.0.1-Windows-x86_64.exe
 5 | 
 6 | Run anaconda prompt, change dir to C:\soundcard2txt [c:] [cd \soundcard2txt], then run:
 7 | ```
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | Detect the appropriate sound device, e.g., if we want to recognize audio from speakers, choose "Stereo Mix" device:
12 | ```
13 | python recognize.py --list
14 | ```
15 | 
16 | Now we may run using chosen device and language, this will produce text recognition results file [recognition_log.txt]:
17 | ```
18 | python recognize.py --device=3 --lang=ru-RU
19 | ```
20 | 
21 | Some unnecessary help available:
22 | ```
23 | python recognize.py --help
24 | ```
25 | 
26 | List of supported languages and their codes: https://cloud.google.com/speech/docs/languages
27 | 
28 | Recognition quality can be improved using paid Google, Microsoft, IBM or other services.
29 | 


--------------------------------------------------------------------------------
/recognize.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import datetime
 3 | import pyaudio
 4 | import speech_recognition as sr
 5 | import threading
 6 | import time
 7 | 
 8 | recognizer = sr.Recognizer()
 9 | pa = pyaudio.PyAudio()
10 | 
11 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
12 | parser.add_argument('--lang', type=str, default='en-US',
13 |                     help='language to recognize, en-US, ru-RU, fi-FI or any other supported')
14 | parser.add_argument('--buf', type=int, default=200,
15 |                     help='buffer size to recognize')
16 | parser.add_argument('--rate', type=int, default=48000,
17 |                     help='audio sampling rate')
18 | parser.add_argument('--device', type=int, default=0,
19 |                     help='input device number')
20 | parser.add_argument('--list', action='store_true', help='list audio devices and their numbers')
21 | args = parser.parse_args()
22 | if args.list:
23 |     for i in range(0, pa.get_device_count()):
24 |         print(i, " - ", pa.get_device_info_by_index(i)['name'])
25 |     exit()
26 | 
27 | audio_rate = args.rate
28 | stream_buf = bytes()
29 | stream_counter = 0
30 | 
31 | 
32 | def recognize(stream_text):
33 |     global args
34 | 
35 |     def logger(s):
36 |         f = open('recognition_log.txt', 'a+', encoding='utf-8')
37 |         f.write(datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] "))
38 |         f.write(s)
39 |         f.write("\x0A")
40 |         f.close()
41 | 
42 |     audio_data = sr.AudioData(stream_text, audio_rate, 2)
43 |     try:
44 |         # result = recognizer.recognize_sphinx(audio_data)
45 |         result = recognizer.recognize_google(audio_data, language=args.lang)
46 |         print(result)
47 |         logger(result)
48 |     except sr.UnknownValueError:
49 |         pass
50 |     except sr.RequestError as e:
51 |         print("Could not request results from GSR service; {0}".format(e))
52 | 
53 | 
54 | def stream_audio(data):
55 |     global args
56 |     global stream_buf
57 |     global stream_counter
58 | 
59 |     if stream_counter < args.buf:
60 |         stream_buf += data
61 |         stream_counter += 1
62 |     else:
63 |         threading.Thread(target=recognize, args=(stream_buf,)).start()
64 |         stream_buf = bytes()
65 |         stream_counter = 0
66 | 
67 | 
68 | def callback(in_data, frame_count, time_info, status):
69 |     stream_audio(in_data)
70 |     return (None, pyaudio.paContinue)
71 | 
72 | 
73 | stream = pa.open(format=pyaudio.paInt16, channels=1, rate=audio_rate, input=True, stream_callback=callback,
74 |                  input_device_index=args.device)
75 | stream.start_stream()
76 | while stream.is_active(): time.sleep(0.1)
77 | stream.stop_stream()
78 | stream.close()
79 | pa.terminate()
80 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | numpy
3 | pyaudio
4 | speech_recognition
5 | 


--------------------------------------------------------------------------------