├── .gitignore ├── LICENSE ├── README.md ├── audio_files ├── harvard.wav └── jackhammer.wav └── guessing_game.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Real Python 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Speech Recognition with Python 2 | 3 | This repository contains resources from [The Ultimate Guide to Speech Recognition with Python](https://realpython.com/python-speech-recognition/) tutorial on Real Python. 4 | 5 | Audio files for the examples in the *Working With Audio Files* section of the post can be found in the `audio_files` directory. To download them, use the green "Clone or download" button at the top right corner of this page. 6 | 7 | The `guessing_game.py` file contains the full source code for the "Guess a Word" game example. 8 | 9 | > **NOTE**: You will need to install the [SpeechRecognition](https://github.com/Uberi/speech_recognition) and [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) packages in order to run the example. Please see the [tutorial](https://realpython.com/python-speech-recognition/) for step-by-step instructions. 10 | 11 | You can test your SpeechRecognition and PyAudio installation by downloading `guessing_game.py` and typing the following into a Python REPL session: 12 | 13 | ```pycon 14 | >>> import speech_recognition as sr 15 | >>> from guessing_game.py import recognize_speech_from_mic 16 | >>> r = sr.Recognizer() 17 | >>> m = sr.Microphone() 18 | >>> recognize_speech_from_mic(r, m) # speak after running this line 19 | {'success': True, 'error': None, 'transcription': 'hello'} 20 | ``` 21 | 22 | Of course, your output will vary depending on what you said after running `recognize_speech_from_mic(r, m)`. 23 | -------------------------------------------------------------------------------- /audio_files/harvard.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realpython/python-speech-recognition/0c07b810808c01144a9611faf84739f24513184e/audio_files/harvard.wav -------------------------------------------------------------------------------- /audio_files/jackhammer.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/realpython/python-speech-recognition/0c07b810808c01144a9611faf84739f24513184e/audio_files/jackhammer.wav -------------------------------------------------------------------------------- /guessing_game.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | 4 | import speech_recognition as sr 5 | 6 | 7 | def recognize_speech_from_mic(recognizer, microphone): 8 | """Transcribe speech from recorded from `microphone`. 9 | 10 | Returns a dictionary with three keys: 11 | "success": a boolean indicating whether or not the API request was 12 | successful 13 | "error": `None` if no error occured, otherwise a string containing 14 | an error message if the API could not be reached or 15 | speech was unrecognizable 16 | "transcription": `None` if speech could not be transcribed, 17 | otherwise a string containing the transcribed text 18 | """ 19 | # check that recognizer and microphone arguments are appropriate type 20 | if not isinstance(recognizer, sr.Recognizer): 21 | raise TypeError("`recognizer` must be `Recognizer` instance") 22 | 23 | if not isinstance(microphone, sr.Microphone): 24 | raise TypeError("`microphone` must be `Microphone` instance") 25 | 26 | # adjust the recognizer sensitivity to ambient noise and record audio 27 | # from the microphone 28 | with microphone as source: 29 | recognizer.adjust_for_ambient_noise(source) 30 | audio = recognizer.listen(source) 31 | 32 | # set up the response object 33 | response = { 34 | "success": True, 35 | "error": None, 36 | "transcription": None 37 | } 38 | 39 | # try recognizing the speech in the recording 40 | # if a RequestError or UnknownValueError exception is caught, 41 | # update the response object accordingly 42 | try: 43 | response["transcription"] = recognizer.recognize_google(audio) 44 | except sr.RequestError: 45 | # API was unreachable or unresponsive 46 | response["success"] = False 47 | response["error"] = "API unavailable" 48 | except sr.UnknownValueError: 49 | # speech was unintelligible 50 | response["error"] = "Unable to recognize speech" 51 | 52 | return response 53 | 54 | 55 | if __name__ == "__main__": 56 | # set the list of words, maxnumber of guesses, and prompt limit 57 | WORDS = ["apple", "banana", "grape", "orange", "mango", "lemon"] 58 | NUM_GUESSES = 3 59 | PROMPT_LIMIT = 5 60 | 61 | # create recognizer and mic instances 62 | recognizer = sr.Recognizer() 63 | microphone = sr.Microphone() 64 | 65 | # get a random word from the list 66 | word = random.choice(WORDS) 67 | 68 | # format the instructions string 69 | instructions = ( 70 | "I'm thinking of one of these words:\n" 71 | "{words}\n" 72 | "You have {n} tries to guess which one.\n" 73 | ).format(words=', '.join(WORDS), n=NUM_GUESSES) 74 | 75 | # show instructions and wait 3 seconds before starting the game 76 | print(instructions) 77 | time.sleep(3) 78 | 79 | for i in range(NUM_GUESSES): 80 | # get the guess from the user 81 | # if a transcription is returned, break out of the loop and 82 | # continue 83 | # if no transcription returned and API request failed, break 84 | # loop and continue 85 | # if API request succeeded but no transcription was returned, 86 | # re-prompt the user to say their guess again. Do this up 87 | # to PROMPT_LIMIT times 88 | for j in range(PROMPT_LIMIT): 89 | print('Guess {}. Speak!'.format(i+1)) 90 | guess = recognize_speech_from_mic(recognizer, microphone) 91 | if guess["transcription"]: 92 | break 93 | if not guess["success"]: 94 | break 95 | print("I didn't catch that. What did you say?\n") 96 | 97 | # if there was an error, stop the game 98 | if guess["error"]: 99 | print("ERROR: {}".format(guess["error"])) 100 | break 101 | 102 | # show the user the transcription 103 | print("You said: {}".format(guess["transcription"])) 104 | 105 | # determine if guess is correct and if any attempts remain 106 | guess_is_correct = guess["transcription"].lower() == word.lower() 107 | user_has_more_attempts = i < NUM_GUESSES - 1 108 | 109 | # determine if the user has won the game 110 | # if not, repeat the loop if user has more attempts 111 | # if no attempts left, the user loses the game 112 | if guess_is_correct: 113 | print("Correct! You win!".format(word)) 114 | break 115 | elif user_has_more_attempts: 116 | print("Incorrect. Try again.\n") 117 | else: 118 | print("Sorry, you lose!\nI was thinking of '{}'.".format(word)) 119 | break 120 | --------------------------------------------------------------------------------