├── .gitignore ├── 01-basics ├── README.md ├── load_mp3.py ├── output.wav ├── plot_audio.py ├── record_mic.py └── wave_example.py ├── 02-simple-speech-recognition ├── Natural Language Processing Short.m4a ├── api_02.py ├── api_secrets.py ├── file_title.txt └── main.py ├── 03-sentiment-analysis ├── README.md ├── api_03.py ├── api_secrets.py ├── data │ ├── iPhone_13_Review:_Pros_and_Cons.txt │ └── iPhone_13_Review:_Pros_and_Cons_sentiments.json ├── main.py └── yt_extractor.py ├── 04-podcast-summarization ├── 4bdac209399848b4a2e72f68362096da.txt ├── 51fa27d67c1e41598c28197c4100e0f4.txt ├── api_04.py ├── api_secrets.py └── main.py ├── 05-realtime-openai ├── README.md ├── api_secrets.py ├── main.py └── openai_helper.py ├── README.md ├── output.wav └── utilities ├── api_02.py ├── api_03.py └── api_secrets.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.json 3 | __pycache__ -------------------------------------------------------------------------------- /01-basics/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Dependencies 3 | 4 | ```console 5 | $ pip install pyaudio 6 | ``` 7 | 8 | M1 Mac: 9 | 10 | ```console 11 | $ python -m pip install --global-option='build_ext' --global-option='-I/opt/homebrew/Cellar/portaudio/19.7.0/include' --global-option='-L/opt/homebrew/Cellar/portaudio/19.7.0/lib' pyaudio 12 | ``` 13 | -------------------------------------------------------------------------------- /01-basics/load_mp3.py: -------------------------------------------------------------------------------- 1 | # brew install ffmpeg 2 | # pip install pydub 3 | from pydub import AudioSegment 4 | 5 | audio = AudioSegment.from_wav("output.wav") 6 | # audio = AudioSegment.from_mp3("mashup.mp3") 7 | 8 | # boost volume by 6dB 9 | audio = audio + 6 10 | 11 | # repeat the clip twice 12 | audio = audio * 2 13 | 14 | # 2 sec fade in 15 | audio = audio.fade_in(2000) 16 | 17 | audio.export("mashup.mp3", format="mp3") -------------------------------------------------------------------------------- /01-basics/output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI-Community/python-speech-recognition-course/db2dddbbf01c4bd2ac546f154d05a075b6e4e0a7/01-basics/output.wav -------------------------------------------------------------------------------- /01-basics/plot_audio.py: -------------------------------------------------------------------------------- 1 | # https://learnpython.com/blog/plot-waveform-in-python/ 2 | import wave 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | wav_obj = wave.open('output.wav', 'r') 7 | 8 | sample_freq = wav_obj.getframerate() 9 | print(sample_freq) 10 | 11 | n_samples = wav_obj.getnframes() 12 | print(n_samples) 13 | 14 | t_audio = n_samples/sample_freq 15 | print(t_audio, "seconds") 16 | 17 | signal_wave = wav_obj.readframes(n_samples) 18 | signal_array = np.frombuffer(signal_wave, dtype=np.int16) 19 | print(signal_array.shape) 20 | 21 | # for stereo: 22 | #l_channel = signal_array[0::2] 23 | #r_channel = signal_array[1::2] 24 | 25 | times = np.linspace(0, n_samples/sample_freq, num=n_samples) 26 | 27 | plt.figure(figsize=(15, 5)) 28 | plt.plot(times, signal_array) 29 | plt.title('Audio') 30 | plt.ylabel('Signal Value') 31 | plt.xlabel('Time (s)') 32 | plt.xlim(0, t_audio) 33 | plt.show() 34 | 35 | plt.figure(figsize=(15, 5)) 36 | plt.specgram(signal_array, Fs=sample_freq, vmin=-20, vmax=50) 37 | plt.title('Left Channel') 38 | plt.ylabel('Frequency (Hz)') 39 | plt.xlabel('Time (s)') 40 | plt.xlim(0, t_audio) 41 | plt.colorbar() 42 | plt.show() -------------------------------------------------------------------------------- /01-basics/record_mic.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import wave 3 | 4 | FRAMES_PER_BUFFER = 3200 5 | FORMAT = pyaudio.paInt16 6 | CHANNELS = 1 7 | RATE = 16000 8 | p = pyaudio.PyAudio() 9 | 10 | # starts recording 11 | stream = p.open( 12 | format=FORMAT, 13 | channels=CHANNELS, 14 | rate=RATE, 15 | input=True, 16 | frames_per_buffer=FRAMES_PER_BUFFER 17 | ) 18 | 19 | print("start recording...") 20 | 21 | frames = [] 22 | seconds = 5 23 | for i in range(0, int(RATE / FRAMES_PER_BUFFER * seconds)): 24 | data = stream.read(FRAMES_PER_BUFFER) 25 | frames.append(data) 26 | 27 | print("recording stopped") 28 | 29 | stream.stop_stream() 30 | stream.close() 31 | p.terminate() 32 | 33 | 34 | wf = wave.open("output.wav", 'wb') 35 | wf.setnchannels(CHANNELS) 36 | wf.setsampwidth(p.get_sample_size(FORMAT)) 37 | wf.setframerate(RATE) 38 | wf.writeframes(b''.join(frames)) 39 | wf.close() -------------------------------------------------------------------------------- /01-basics/wave_example.py: -------------------------------------------------------------------------------- 1 | import wave 2 | 3 | # Explain 4 | # - wave file structure 5 | # - number of channels 6 | # - sample width 7 | # - framerate/sample_rate 8 | # - number of frames 9 | # - values of a frame 10 | 11 | # open wave file 12 | obj = wave.open("output.wav",'rb') 13 | 14 | print("Number of channels", obj.getnchannels()) 15 | print("Sample width", obj.getsampwidth()) 16 | print("Frame rate.", obj.getframerate()) 17 | print("Number of frames", obj.getnframes()) 18 | print("parameters:", obj.getparams()) 19 | frames = obj.readframes(obj.getnframes()) 20 | 21 | print(len(frames) / obj.getsampwidth(), frames[0], type(frames[0])) 22 | obj.close() 23 | 24 | # write wave file 25 | sample_rate = 16000.0 # hertz 26 | obj = wave.open("new_file.wav",'wb') 27 | obj.setnchannels(1) # mono 28 | obj.setsampwidth(2) 29 | obj.setframerate(sample_rate) 30 | obj.writeframes(frames) 31 | obj.close() -------------------------------------------------------------------------------- /02-simple-speech-recognition/Natural Language Processing Short.m4a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI-Community/python-speech-recognition-course/db2dddbbf01c4bd2ac546f154d05a075b6e4e0a7/02-simple-speech-recognition/Natural Language Processing Short.m4a -------------------------------------------------------------------------------- /02-simple-speech-recognition/api_02.py: -------------------------------------------------------------------------------- 1 | # files after part 2 2 | import requests 3 | import time 4 | from api_secrets import API_KEY_ASSEMBLYAI 5 | 6 | 7 | upload_endpoint = 'https://api.assemblyai.com/v2/upload' 8 | transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' 9 | 10 | headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI} 11 | 12 | headers = { 13 | "authorization": API_KEY_ASSEMBLYAI, 14 | "content-type": "application/json" 15 | } 16 | 17 | CHUNK_SIZE = 5_242_880 # 5MB 18 | 19 | 20 | def upload(filename): 21 | def read_file(filename): 22 | with open(filename, 'rb') as f: 23 | while True: 24 | data = f.read(CHUNK_SIZE) 25 | if not data: 26 | break 27 | yield data 28 | 29 | upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename)) 30 | return upload_response.json()['upload_url'] 31 | 32 | 33 | def transcribe(audio_url): 34 | transcript_request = { 35 | 'audio_url': audio_url 36 | } 37 | 38 | transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers) 39 | return transcript_response.json()['id'] 40 | 41 | 42 | def poll(transcript_id): 43 | polling_endpoint = transcript_endpoint + '/' + transcript_id 44 | polling_response = requests.get(polling_endpoint, headers=headers) 45 | return polling_response.json() 46 | 47 | 48 | def get_transcription_result_url(url): 49 | transcribe_id = transcribe(url) 50 | while True: 51 | data = poll(transcribe_id) 52 | if data['status'] == 'completed': 53 | return data, None 54 | elif data['status'] == 'error': 55 | return data, data['error'] 56 | 57 | print("waiting for 30 seconds") 58 | time.sleep(30) 59 | 60 | 61 | def save_transcript(url, title): 62 | data, error = get_transcription_result_url(url) 63 | 64 | if data: 65 | filename = title + '.txt' 66 | with open(filename, 'w') as f: 67 | f.write(data['text']) 68 | print('Transcript saved') 69 | elif error: 70 | print("Error!!!", error) 71 | -------------------------------------------------------------------------------- /02-simple-speech-recognition/api_secrets.py: -------------------------------------------------------------------------------- 1 | API_KEY_ASSEMBLYAI = 'key1' -------------------------------------------------------------------------------- /02-simple-speech-recognition/file_title.txt: -------------------------------------------------------------------------------- 1 | Be your best buddy? Not now, but probably very soon. Can Alexa be your best buddy? Well, not now. Now, but probably very soon. We have been seeing gigantic leaps in terms of natural we have been seeing gigantic leaps over the last couple of years in terms of how computers can understand and use natural language. This is all thanks to the subdiscipline of AI called Natural Language Processing, or NLP, for short. Nlp is the practice of making sense of audio. Nlp is the practice of making sense of naturally spoken language, mostly in text form. Nlp is the practice of making sense of naturally spoken language, but mostly in text form. Nlp is the practice of making sense of natural language, mostly in text form. Nlp is the practice of making sense of NLP is the practice of making sense of natural language under its umbrella. There are a bunch of umbrella. There are many tasks that we have conquered already to a good degree, like machine translation, text classification, sentiment analysis, or summarisation. But these are not enough. The essence but the ultimate goal is to make a model that can encapsulate the actual essence of language. As we make better models, we get closer to unlocking a whole new set of data for you than developing better systems. As we make better and better models, we get closer to unlocking a whole new set of data to making even better systems and make it easier for humans to communicate with these systems, no matter how tech savvy they are or not. How do these happen? Well, mostly, how do these happen? How are these models made? Well, mostly with convolutional neural networks and recurrent neural networks. And some of these how are these models made? Well, mostly with convolutional neural networks and recurrent neural networks. Sometimes we have some variations of these two and very recently transformer models with attention mechanisms on them, and it's not hard to include NLP in your projects. There are many APIs and services online that you can use, even for free. Check out some of these options. To check out, you can use APIs offered by companies. You can use APIs offered by companies, or you can use Python libraries to include NLP functionalities in your project. You can use APIs as a service. You can use NLP APIs as a service or include a model by fine tuning it to your specific problem. -------------------------------------------------------------------------------- /02-simple-speech-recognition/main.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from api_02 import * 3 | 4 | filename = "Natural Language Processing Short.m4a" 5 | audio_url = upload(filename) 6 | 7 | save_transcript(audio_url, 'file_title') -------------------------------------------------------------------------------- /03-sentiment-analysis/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Dependencies 3 | 4 | ```console 5 | pip install requests youtube_dl 6 | ``` 7 | 8 | ## Run Code 9 | 10 | ```console 11 | $ python main.py 12 | ``` -------------------------------------------------------------------------------- /03-sentiment-analysis/api_03.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | from api_secrets import API_KEY_ASSEMBLYAI 5 | 6 | 7 | upload_endpoint = 'https://api.assemblyai.com/v2/upload' 8 | transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' 9 | 10 | headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI} 11 | 12 | headers = { 13 | "authorization": API_KEY_ASSEMBLYAI, 14 | "content-type": "application/json" 15 | } 16 | 17 | CHUNK_SIZE = 5_242_880 # 5MB 18 | 19 | 20 | def upload(filename): 21 | def read_file(filename): 22 | with open(filename, 'rb') as f: 23 | while True: 24 | data = f.read(CHUNK_SIZE) 25 | if not data: 26 | break 27 | yield data 28 | 29 | upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename)) 30 | return upload_response.json()['upload_url'] 31 | 32 | 33 | def transcribe(audio_url, sentiment_analysis): 34 | transcript_request = { 35 | 'audio_url': audio_url, 36 | 'sentiment_analysis': sentiment_analysis 37 | } 38 | 39 | transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers) 40 | return transcript_response.json()['id'] 41 | 42 | 43 | def poll(transcript_id): 44 | polling_endpoint = transcript_endpoint + '/' + transcript_id 45 | polling_response = requests.get(polling_endpoint, headers=headers) 46 | return polling_response.json() 47 | 48 | 49 | def get_transcription_result_url(url, sentiment_analysis): 50 | transcribe_id = transcribe(url, sentiment_analysis) 51 | while True: 52 | data = poll(transcribe_id) 53 | if data['status'] == 'completed': 54 | return data, None 55 | elif data['status'] == 'error': 56 | return data, data['error'] 57 | 58 | print("waiting for 30 seconds") 59 | time.sleep(30) 60 | 61 | 62 | def save_transcript(url, title, sentiment_analysis=False): 63 | data, error = get_transcription_result_url(url, sentiment_analysis) 64 | 65 | if data: 66 | filename = title + '.txt' 67 | with open(filename, 'w') as f: 68 | f.write(data['text']) 69 | 70 | if sentiment_analysis: 71 | filename = title + '_sentiments.json' 72 | with open(filename, 'w') as f: 73 | sentiments = data['sentiment_analysis_results'] 74 | json.dump(sentiments, f, indent=4) 75 | print('Transcript saved') 76 | return True 77 | elif error: 78 | print("Error!!!", error) 79 | return False -------------------------------------------------------------------------------- /03-sentiment-analysis/api_secrets.py: -------------------------------------------------------------------------------- 1 | API_KEY_ASSEMBLYAI = 'key1' -------------------------------------------------------------------------------- /03-sentiment-analysis/data/iPhone_13_Review:_Pros_and_Cons.txt: -------------------------------------------------------------------------------- 1 | With the exception of a smaller notch, the iPhone 13 doesn't seem very new at first glance, but when you start using this flagship, you start to appreciate a bunch of welcome upgrades. The new iPhones display is brighter than before, the battery life is longer, and Apple has improved an already great game camera system with better sensors and features like cinematic mode that make you feel like a pro even on a non pro phone. Still, there are some flaws here. Here's the Pros and cons in my iPhone 13 review. There's really two notable design changes for the iPhone 13. First, the rear cameras are now arranged diagonally instead of vertically, and the notch is now 20% smaller. I appreciated the mini notch when using apps with the white background, such as Safari and Gmail. However, it still swoops down. The button placement is slightly different on the iPhone 13 versus iPhone Twelve, such as the power button being lower. Otherwise, you get the same thing as before ceramic shield display up front, durable glass back, and flat edges. Unfortunately, the iPhone 13 doesn't offer Touch ID, an upgrade many of us would have appreciated. Since Face ID doesn't work well with mass, a sensor embedded in the power button like the iPad Mini Six would have been just fine. The bad news is that the iPhone 13 doesn't offer the 120 Hz refresh rate of the iPhone 13 Pro, which would have been nice for smoother scrolling and overall performance. But you do get a brighter display than the iPhone twelve, which helps most when you're outdoors. We measured 795 nets of brightness, compared to 569 for last year's iPhone and 702 for the Galaxy S 21. The overall display quality continues to be stellar, with great colors, contrast and viewing angles. When watching the trailer for The Matrix Resurrections, I could make out individual hairs on Neo's beard as he stared up into the sun, and I could almost feel the punch he delivered to the new Morpheus. The iPhone 13 cameras get a number of key upgrades. This includes a main camera that lets in more light, an ultra wide camera that captures more scene, and an improved night mode. The overall image quality was top notch in my testing, complete with compelling portrait shots, plenty of detail and colorful ultra wide landscapes. If you want more control, you'll appreciate the Photographic Styles feature. You can switch between multiple options in the camera app, including standard vibrant, rich, contrast, warm and cool. I put the iPhone 13 cameras up against Samsung's best, and the iPhone twelve, and the new iPhone fared quite well in this photo. Taken at a reservoir, you can see how superior the iPhone thirteen's camera is compared to the Galaxy S 21 Ultra. The water looks more translucent, and the iPhone 13 does a better job rendering the shadows around the wood. The iPhone 13 also delivered a better looking shot of these Halloween decorations. The sign looks more vibrant as do the surrounding reallife pumpkins. The S 21 Ultras pick is pretty sharp, but looks kind of flat in many shooting situations. The iPhone 13 was on a par with the iPhone twelve, but I did notice a market improvement in night mode. The iPhone 13 captures a brighter pink and the hydrangea flowers in the dark, and this candle shot looks crisper in very low light. With this selfie, the iPhone 13 delivers colors that are a bit too saturated, while the S 21 shot looks a bit hazy and washed out, so it's kind of a drop for me. The iPhone 13 is the rare smartphone that can make you feel like a pro as you shoot videos, and that's thanks to a new feature called Cinematic Mode. The effect brings depth of field to your footage and automatically changes the focus to various subjects. It's almost like magic when recording a couple of dogs. I was really impressed when the iPhone 13 placed the focus on the pup that was closest to me at first. You can also change the focus point manually by tapping the screen. The bokeh effect on the background is also pretty convincing and immersive. As you can see in this footage of me walking along a trail and in this clip of me trying to sneak a cookie, the iPhone 13 did a fairly good job deciding when to focus on me versus the plate. Cinematic Mode isn't perfect, as the iPhone 13 sometimes took a second to recognize faster moving subjects, and you can't do Cinematic mode in 4K, but overall it's a cool feature. The video quality from the iPhone 13 is the best I've seen from a phone, with improved noise reduction and better dynamic range. This filthy vision HDR footage of waves lapping up against the sand is almost mesmerizing, and this sweeping landscape of the Battle of Mammothsite delivers a Crystal clear blue sky and lots of details in the clouds and surrounding trees. The A 15 Bionic inside the iPhone XIV doesn't offer a dramatic performance gain over the iPhone Twelve, but it's still the fastest chip in any phone in a game. Like Gentian Impact, the animations and effects felt console quality as I climbed mountains, swim through water, and tried to battle enemies. I was equally impressed by the seat gap, which instantly identified plants when I pointed the camera around a nearby park. The iPhone 13 blows away Android phones on benchmarks like Geekbench Five and in graphics tests on three D Mark Wildlife, the new iPhone hit 55 nine frames per second. That's a bit higher than the iPhone Twelve at 51 FPS, but the Galaxy S 21 Ultra mustered only 33. One of the best iPhone 13 upgrades is longer battery life thanks to a bigger battery, more efficient display, and the A 15 Bionic chip on the Times Guide battery test, which involves continuous five G web surfing, the iPhone 13 endured for 10 hours and 33 minutes. That's more than 2 hours longer than the 825 time from the iPhone twelve. The best result we saw from the Galaxy S 21 was 953. Unfortunately the iPhone 13 offers the same lame charging speeds as before. The new iPhone got to 51% in 30 minutes with Apple's optional 20 Watt charger. Other phones are much speedier. The iPhone's 15 Watt MagSafe charger is even slower but some may find it more convenient because you don't have to deal with lightning. The iPhone 13 doesn't quite well like the iPhone 13 Pro and Pro Max which offer 120 Hz display, telephoto, Zoom and macrophotography but at 799 this is the best new iPhone for the money. The cameras are simply the best you'll find in this class and I especially like the longer battery life. I wouldn't recommend that iPhone twelve owners upgrade to the new iPhone but the iPhone 13 could be worth it if you own an iPhone eleven or older device. I'd like to see Apple offer faster charging next time around and the lack of touch ID is a bummer but overall the iPhone 13 is a fantastic phone for Tom's guide this is Martin Spoon hour. -------------------------------------------------------------------------------- /03-sentiment-analysis/data/iPhone_13_Review:_Pros_and_Cons_sentiments.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "text": "With the exception of a smaller notch, the iPhone 13 doesn't seem very new at first glance, but when you start using this flagship, you start to appreciate a bunch of welcome upgrades.", 4 | "start": 370, 5 | "end": 9082, 6 | "sentiment": "POSITIVE", 7 | "confidence": 0.9713384509086609, 8 | "speaker": null 9 | }, 10 | { 11 | "text": "The new iPhones display is brighter than before, the battery life is longer, and Apple has improved an already great game camera system with better sensors and features like cinematic mode that make you feel like a pro even on a non pro phone.", 12 | "start": 9226, 13 | "end": 21462, 14 | "sentiment": "POSITIVE", 15 | "confidence": 0.9772546291351318, 16 | "speaker": null 17 | }, 18 | { 19 | "text": "Still, there are some flaws here.", 20 | "start": 21596, 21 | "end": 23250, 22 | "sentiment": "NEGATIVE", 23 | "confidence": 0.6381384134292603, 24 | "speaker": null 25 | }, 26 | { 27 | "text": "Here's the Pros and cons in my iPhone 13 review.", 28 | "start": 23360, 29 | "end": 26780, 30 | "sentiment": "NEUTRAL", 31 | "confidence": 0.7499386668205261, 32 | "speaker": null 33 | }, 34 | { 35 | "text": "There's really two notable design changes for the iPhone 13.", 36 | "start": 27350, 37 | "end": 30118, 38 | "sentiment": "POSITIVE", 39 | "confidence": 0.8534098863601685, 40 | "speaker": null 41 | }, 42 | { 43 | "text": "First, the rear cameras are now arranged diagonally instead of vertically, and the notch is now 20% smaller.", 44 | "start": 30214, 45 | "end": 35746, 46 | "sentiment": "NEUTRAL", 47 | "confidence": 0.7851952314376831, 48 | "speaker": null 49 | }, 50 | { 51 | "text": "I appreciated the mini notch when using apps with the white background, such as Safari and Gmail.", 52 | "start": 35878, 53 | "end": 40762, 54 | "sentiment": "POSITIVE", 55 | "confidence": 0.9176777601242065, 56 | "speaker": null 57 | }, 58 | { 59 | "text": "However, it still swoops down.", 60 | "start": 40846, 61 | "end": 42726, 62 | "sentiment": "NEUTRAL", 63 | "confidence": 0.736538827419281, 64 | "speaker": null 65 | }, 66 | { 67 | "text": "The button placement is slightly different on the iPhone 13 versus iPhone Twelve, such as the power button being lower.", 68 | "start": 42908, 69 | "end": 48522, 70 | "sentiment": "NEUTRAL", 71 | "confidence": 0.8457890152931213, 72 | "speaker": null 73 | }, 74 | { 75 | "text": "Otherwise, you get the same thing as before ceramic shield display up front, durable glass back, and flat edges.", 76 | "start": 48596, 77 | "end": 54778, 78 | "sentiment": "NEUTRAL", 79 | "confidence": 0.7579075694084167, 80 | "speaker": null 81 | }, 82 | { 83 | "text": "Unfortunately, the iPhone 13 doesn't offer Touch ID, an upgrade many of us would have appreciated.", 84 | "start": 54934, 85 | "end": 60046, 86 | "sentiment": "NEGATIVE", 87 | "confidence": 0.453982949256897, 88 | "speaker": null 89 | }, 90 | { 91 | "text": "Since Face ID doesn't work well with mass, a sensor embedded in the power button like the iPad Mini Six would have been just fine.", 92 | "start": 60118, 93 | "end": 67314, 94 | "sentiment": "NEUTRAL", 95 | "confidence": 0.533276379108429, 96 | "speaker": null 97 | }, 98 | { 99 | "text": "The bad news is that the iPhone 13 doesn't offer the 120 Hz refresh rate of the iPhone 13 Pro, which would have been nice for smoother scrolling and overall performance.", 100 | "start": 67472, 101 | "end": 76366, 102 | "sentiment": "NEGATIVE", 103 | "confidence": 0.4499838650226593, 104 | "speaker": null 105 | }, 106 | { 107 | "text": "But you do get a brighter display than the iPhone twelve, which helps most when you're outdoors.", 108 | "start": 76498, 109 | "end": 81586, 110 | "sentiment": "POSITIVE", 111 | "confidence": 0.9162416458129883, 112 | "speaker": null 113 | }, 114 | { 115 | "text": "We measured 795 nets of brightness, compared to 569 for last year's iPhone and 702 for the Galaxy S 21.", 116 | "start": 81718, 117 | "end": 89754, 118 | "sentiment": "NEUTRAL", 119 | "confidence": 0.5288670659065247, 120 | "speaker": null 121 | }, 122 | { 123 | "text": "The overall display quality continues to be stellar, with great colors, contrast and viewing angles.", 124 | "start": 89912, 125 | "end": 95698, 126 | "sentiment": "POSITIVE", 127 | "confidence": 0.9815881252288818, 128 | "speaker": null 129 | }, 130 | { 131 | "text": "When watching the trailer for The Matrix Resurrections, I could make out individual hairs on Neo's beard as he stared up into the sun, and I could almost feel the punch he delivered to the new Morpheus.", 132 | "start": 95854, 133 | "end": 106650, 134 | "sentiment": "NEUTRAL", 135 | "confidence": 0.7407410740852356, 136 | "speaker": null 137 | }, 138 | { 139 | "text": "The iPhone 13 cameras get a number of key upgrades.", 140 | "start": 107150, 141 | "end": 110062, 142 | "sentiment": "POSITIVE", 143 | "confidence": 0.8331774473190308, 144 | "speaker": null 145 | }, 146 | { 147 | "text": "This includes a main camera that lets in more light, an ultra wide camera that captures more scene, and an improved night mode.", 148 | "start": 110146, 149 | "end": 116878, 150 | "sentiment": "POSITIVE", 151 | "confidence": 0.7885133028030396, 152 | "speaker": null 153 | }, 154 | { 155 | "text": "The overall image quality was top notch in my testing, complete with compelling portrait shots, plenty of detail and colorful ultra wide landscapes.", 156 | "start": 117034, 157 | "end": 125530, 158 | "sentiment": "POSITIVE", 159 | "confidence": 0.9683874845504761, 160 | "speaker": null 161 | }, 162 | { 163 | "text": "If you want more control, you'll appreciate the Photographic Styles feature.", 164 | "start": 125710, 165 | "end": 129202, 166 | "sentiment": "NEUTRAL", 167 | "confidence": 0.6491402387619019, 168 | "speaker": null 169 | }, 170 | { 171 | "text": "You can switch between multiple options in the camera app, including standard vibrant, rich, contrast, warm and cool.", 172 | "start": 129286, 173 | "end": 136266, 174 | "sentiment": "POSITIVE", 175 | "confidence": 0.6676361560821533, 176 | "speaker": null 177 | }, 178 | { 179 | "text": "I put the iPhone 13 cameras up against Samsung's best, and the iPhone twelve, and the new iPhone fared quite well in this photo.", 180 | "start": 136448, 181 | "end": 143122, 182 | "sentiment": "POSITIVE", 183 | "confidence": 0.9085156917572021, 184 | "speaker": null 185 | }, 186 | { 187 | "text": "Taken at a reservoir, you can see how superior the iPhone thirteen's camera is compared to the Galaxy S 21 Ultra.", 188 | "start": 143146, 189 | "end": 149338, 190 | "sentiment": "POSITIVE", 191 | "confidence": 0.8729413747787476, 192 | "speaker": null 193 | }, 194 | { 195 | "text": "The water looks more translucent, and the iPhone 13 does a better job rendering the shadows around the wood.", 196 | "start": 149434, 197 | "end": 154786, 198 | "sentiment": "POSITIVE", 199 | "confidence": 0.9258265495300293, 200 | "speaker": null 201 | }, 202 | { 203 | "text": "The iPhone 13 also delivered a better looking shot of these Halloween decorations.", 204 | "start": 154918, 205 | "end": 159046, 206 | "sentiment": "POSITIVE", 207 | "confidence": 0.9408524036407471, 208 | "speaker": null 209 | }, 210 | { 211 | "text": "The sign looks more vibrant as do the surrounding reallife pumpkins.", 212 | "start": 159178, 213 | "end": 162874, 214 | "sentiment": "POSITIVE", 215 | "confidence": 0.9462752342224121, 216 | "speaker": null 217 | }, 218 | { 219 | "text": "The S 21 Ultras pick is pretty sharp, but looks kind of flat in many shooting situations.", 220 | "start": 162982, 221 | "end": 168442, 222 | "sentiment": "POSITIVE", 223 | "confidence": 0.5404807925224304, 224 | "speaker": null 225 | }, 226 | { 227 | "text": "The iPhone 13 was on a par with the iPhone twelve, but I did notice a market improvement in night mode.", 228 | "start": 168466, 229 | "end": 173938, 230 | "sentiment": "POSITIVE", 231 | "confidence": 0.9060447216033936, 232 | "speaker": null 233 | }, 234 | { 235 | "text": "The iPhone 13 captures a brighter pink and the hydrangea flowers in the dark, and this candle shot looks crisper in very low light.", 236 | "start": 174034, 237 | "end": 181218, 238 | "sentiment": "POSITIVE", 239 | "confidence": 0.8981630802154541, 240 | "speaker": null 241 | }, 242 | { 243 | "text": "With this selfie, the iPhone 13 delivers colors that are a bit too saturated, while the S 21 shot looks a bit hazy and washed out, so it's kind of a drop for me.", 244 | "start": 181364, 245 | "end": 191120, 246 | "sentiment": "NEUTRAL", 247 | "confidence": 0.4757264256477356, 248 | "speaker": null 249 | }, 250 | { 251 | "text": "The iPhone 13 is the rare smartphone that can make you feel like a pro as you shoot videos, and that's thanks to a new feature called Cinematic Mode.", 252 | "start": 191930, 253 | "end": 198886, 254 | "sentiment": "POSITIVE", 255 | "confidence": 0.961600661277771, 256 | "speaker": null 257 | }, 258 | { 259 | "text": "The effect brings depth of field to your footage and automatically changes the focus to various subjects.", 260 | "start": 198958, 261 | "end": 204022, 262 | "sentiment": "NEUTRAL", 263 | "confidence": 0.7180827856063843, 264 | "speaker": null 265 | }, 266 | { 267 | "text": "It's almost like magic when recording a couple of dogs.", 268 | "start": 204106, 269 | "end": 207598, 270 | "sentiment": "POSITIVE", 271 | "confidence": 0.810951828956604, 272 | "speaker": null 273 | }, 274 | { 275 | "text": "I was really impressed when the iPhone 13 placed the focus on the pup that was closest to me at first.", 276 | "start": 207634, 277 | "end": 212502, 278 | "sentiment": "POSITIVE", 279 | "confidence": 0.9806657433509827, 280 | "speaker": null 281 | }, 282 | { 283 | "text": "You can also change the focus point manually by tapping the screen.", 284 | "start": 212636, 285 | "end": 216222, 286 | "sentiment": "NEUTRAL", 287 | "confidence": 0.8885435461997986, 288 | "speaker": null 289 | }, 290 | { 291 | "text": "The bokeh effect on the background is also pretty convincing and immersive.", 292 | "start": 216356, 293 | "end": 220114, 294 | "sentiment": "POSITIVE", 295 | "confidence": 0.967531144618988, 296 | "speaker": null 297 | }, 298 | { 299 | "text": "As you can see in this footage of me walking along a trail and in this clip of me trying to sneak a cookie, the iPhone 13 did a fairly good job deciding when to focus on me versus the plate.", 300 | "start": 220222, 301 | "end": 230974, 302 | "sentiment": "POSITIVE", 303 | "confidence": 0.8418933153152466, 304 | "speaker": null 305 | }, 306 | { 307 | "text": "Cinematic Mode isn't perfect, as the iPhone 13 sometimes took a second to recognize faster moving subjects, and you can't do Cinematic mode in 4K, but overall it's a cool feature.", 308 | "start": 231142, 309 | "end": 241290, 310 | "sentiment": "POSITIVE", 311 | "confidence": 0.739893913269043, 312 | "speaker": null 313 | }, 314 | { 315 | "text": "The video quality from the iPhone 13 is the best I've seen from a phone, with improved noise reduction and better dynamic range.", 316 | "start": 241790, 317 | "end": 248758, 318 | "sentiment": "POSITIVE", 319 | "confidence": 0.9810681939125061, 320 | "speaker": null 321 | }, 322 | { 323 | "text": "This filthy vision HDR footage of waves lapping up against the sand is almost mesmerizing, and this sweeping landscape of the Battle of Mammothsite delivers a Crystal clear blue sky and lots of details in the clouds and surrounding trees.", 324 | "start": 248914, 325 | "end": 262710, 326 | "sentiment": "POSITIVE", 327 | "confidence": 0.9438332915306091, 328 | "speaker": null 329 | }, 330 | { 331 | "text": "The A 15 Bionic inside the iPhone XIV doesn't offer a dramatic performance gain over the iPhone Twelve, but it's still the fastest chip in any phone in a game.", 332 | "start": 263630, 333 | "end": 271818, 334 | "sentiment": "POSITIVE", 335 | "confidence": 0.8297197818756104, 336 | "speaker": null 337 | }, 338 | { 339 | "text": "Like Gentian Impact, the animations and effects felt console quality as I climbed mountains, swim through water, and tried to battle enemies.", 340 | "start": 271844, 341 | "end": 279358, 342 | "sentiment": "NEUTRAL", 343 | "confidence": 0.6295968890190125, 344 | "speaker": null 345 | }, 346 | { 347 | "text": "I was equally impressed by the seat gap, which instantly identified plants when I pointed the camera around a nearby park.", 348 | "start": 279514, 349 | "end": 286354, 350 | "sentiment": "POSITIVE", 351 | "confidence": 0.8864355087280273, 352 | "speaker": null 353 | }, 354 | { 355 | "text": "The iPhone 13 blows away Android phones on benchmarks like Geekbench Five and in graphics tests on three D Mark Wildlife, the new iPhone hit 55 nine frames per second.", 356 | "start": 286522, 357 | "end": 298170, 358 | "sentiment": "POSITIVE", 359 | "confidence": 0.9047859311103821, 360 | "speaker": null 361 | }, 362 | { 363 | "text": "That's a bit higher than the iPhone Twelve at 51 FPS, but the Galaxy S 21 Ultra mustered only 33.", 364 | "start": 298340, 365 | "end": 305862, 366 | "sentiment": "NEUTRAL", 367 | "confidence": 0.5150642991065979, 368 | "speaker": null 369 | }, 370 | { 371 | "text": "One of the best iPhone 13 upgrades is longer battery life thanks to a bigger battery, more efficient display, and the A 15 Bionic chip on the Times Guide battery test, which involves continuous five G web surfing, the iPhone 13 endured for 10 hours and 33 minutes.", 372 | "start": 306056, 373 | "end": 320550, 374 | "sentiment": "POSITIVE", 375 | "confidence": 0.9666282534599304, 376 | "speaker": null 377 | }, 378 | { 379 | "text": "That's more than 2 hours longer than the 825 time from the iPhone twelve.", 380 | "start": 320660, 381 | "end": 325170, 382 | "sentiment": "NEUTRAL", 383 | "confidence": 0.623497724533081, 384 | "speaker": null 385 | }, 386 | { 387 | "text": "The best result we saw from the Galaxy S 21 was 953.", 388 | "start": 325340, 389 | "end": 329370, 390 | "sentiment": "POSITIVE", 391 | "confidence": 0.9443984627723694, 392 | "speaker": null 393 | }, 394 | { 395 | "text": "Unfortunately the iPhone 13 offers the same lame charging speeds as before.", 396 | "start": 329540, 397 | "end": 333774, 398 | "sentiment": "NEGATIVE", 399 | "confidence": 0.7531523108482361, 400 | "speaker": null 401 | }, 402 | { 403 | "text": "The new iPhone got to 51% in 30 minutes with Apple's optional 20 Watt charger.", 404 | "start": 333932, 405 | "end": 340054, 406 | "sentiment": "POSITIVE", 407 | "confidence": 0.9403113722801208, 408 | "speaker": null 409 | }, 410 | { 411 | "text": "Other phones are much speedier.", 412 | "start": 340222, 413 | "end": 342150, 414 | "sentiment": "POSITIVE", 415 | "confidence": 0.6014129519462585, 416 | "speaker": null 417 | }, 418 | { 419 | "text": "The iPhone's 15 Watt MagSafe charger is even slower but some may find it more convenient because you don't have to deal with lightning.", 420 | "start": 342470, 421 | "end": 350770, 422 | "sentiment": "POSITIVE", 423 | "confidence": 0.6052224040031433, 424 | "speaker": null 425 | }, 426 | { 427 | "text": "The iPhone 13 doesn't quite well like the iPhone 13 Pro and Pro Max which offer 120 Hz display, telephoto, Zoom and macrophotography but at 799 this is the best new iPhone for the money.", 428 | "start": 350950, 429 | "end": 363798, 430 | "sentiment": "POSITIVE", 431 | "confidence": 0.4751574397087097, 432 | "speaker": null 433 | }, 434 | { 435 | "text": "The cameras are simply the best you'll find in this class and I especially like the longer battery life.", 436 | "start": 363944, 437 | "end": 369282, 438 | "sentiment": "POSITIVE", 439 | "confidence": 0.9813477993011475, 440 | "speaker": null 441 | }, 442 | { 443 | "text": "I wouldn't recommend that iPhone twelve owners upgrade to the new iPhone but the iPhone 13 could be worth it if you own an iPhone eleven or older device.", 444 | "start": 369416, 445 | "end": 378454, 446 | "sentiment": "POSITIVE", 447 | "confidence": 0.7283997535705566, 448 | "speaker": null 449 | }, 450 | { 451 | "text": "I'd like to see Apple offer faster charging next time around and the lack of touch ID is a bummer but overall the iPhone 13 is a fantastic phone for Tom's guide this is Martin Spoon hour.", 452 | "start": 378622, 453 | "end": 390480, 454 | "sentiment": "POSITIVE", 455 | "confidence": 0.8383842706680298, 456 | "speaker": null 457 | } 458 | ] -------------------------------------------------------------------------------- /03-sentiment-analysis/main.py: -------------------------------------------------------------------------------- 1 | import json 2 | from yt_extractor import get_video_info, get_audio_url 3 | from api_03 import save_transcript 4 | 5 | 6 | def save_video_sentiments(url): 7 | video_info = get_video_info(url) 8 | url = get_audio_url(video_info) 9 | if url: 10 | title = video_info['title'] 11 | title = title.strip().replace(" ", "_") 12 | title = "data/" + title 13 | save_transcript(url, title, sentiment_analysis=True) 14 | 15 | if __name__ == "__main__": 16 | # save_video_sentiments("https://youtu.be/e-kSGNzu0hM") 17 | 18 | with open("data/iPhone_13_Review:_Pros_and_Cons_sentiments.json", "r") as f: 19 | data = json.load(f) 20 | 21 | positives = [] 22 | negatives = [] 23 | neutrals = [] 24 | for result in data: 25 | text = result["text"] 26 | if result["sentiment"] == "POSITIVE": 27 | positives.append(text) 28 | elif result["sentiment"] == "NEGATIVE": 29 | negatives.append(text) 30 | else: 31 | neutrals.append(text) 32 | 33 | n_pos = len(positives) 34 | n_neg = len(negatives) 35 | n_neut = len(neutrals) 36 | 37 | print("Num positives:", n_pos) 38 | print("Num negatives:", n_neg) 39 | print("Num neutrals:", n_neut) 40 | 41 | # ignore neutrals here 42 | r = n_pos / (n_pos + n_neg) 43 | print(f"Positive ratio: {r:.3f}") 44 | 45 | -------------------------------------------------------------------------------- /03-sentiment-analysis/yt_extractor.py: -------------------------------------------------------------------------------- 1 | # Sentiment Analysis on iPhone reviews from youtube 2 | # Learning: youtube_dl, sentiment classification feature 3 | # https://youtu.be/e-kSGNzu0hM 4 | import youtube_dl 5 | from youtube_dl.utils import DownloadError 6 | 7 | ydl = youtube_dl.YoutubeDL() 8 | 9 | def get_video_info(url): 10 | with ydl: 11 | try: 12 | result = ydl.extract_info( 13 | url, 14 | download=False 15 | ) 16 | except DownloadError: 17 | return None 18 | 19 | if 'entries' in result: 20 | # Can be a playlist or a list of videos 21 | video = result['entries'][0] 22 | else: 23 | # Just a video 24 | video = result 25 | return video 26 | 27 | 28 | def get_audio_url(video): 29 | for f in video['formats']: 30 | if f['ext'] == 'm4a': 31 | return f['url'] 32 | 33 | 34 | if __name__ == '__main__': 35 | video_info = get_video_info("https://youtu.be/e-kSGNzu0hM") 36 | url = get_audio_url(video_info) 37 | print(url) 38 | 39 | -------------------------------------------------------------------------------- /04-podcast-summarization/4bdac209399848b4a2e72f68362096da.txt: -------------------------------------------------------------------------------- 1 | Hey, Steve, take it easy there, Chuck Norris. Why are you kicking that mannequin in the head? Don't worry, Randy. It's just my new martial arts workout. Check it out. Boot to the Head. I'm Steve McClellan. And I'm Randy Hodgens. And while Steve may not be much of a martial artist, he does know his comedy Legends, including the creators of Boot to behead the Frantics. The group formed in Toronto in 1979 when young comics Paul Chateau, Rick Green, Dan Redican, and Peter Wildman joined forces and soon gained a local following with sketches that featured offthewall humor in the vein of Monty Python and the Goons. By 1981, the franchise had secured their own CBC radio comedy program called Frantic Times. Here's a clip from that show with a playful but pointed view of organized religion. Wait a minute, you just said I could go to heaven. Well, the Kingdom of heaven is for those who follow the one true path. Presbyterians. Presbyterians. What about Catholics and Menonites and all the others who live faithfully by the tenets of their religion? It blew it. Next. After the radio show had run its course, the Frantic tried to make the move to the small screen, but a television series called Four on the Floor lasted just 13 weeks, and the group began a long hiatus in 1989. While the group may have been inactive in the 90s, its comedy endured, and a track titled Taekwon Leap Boot to the Head became a favorite on the Doctor Demento radio show. We heard a short snippet of that cut at the top of the show, but it's well worth a longer listen, as annoying student Ed Gruberman is introduced to the finer points of Taekwon Leap by his very calm but very quick and accurate Sensei. Hey, I wasn't ready. Come and get me now, Shorty. Come on. Are you chicken? Boot to the Head. Oh, okay. Now I'm ready. Okay, now, come on, try it now. Boot to the Head. Mind if I just lie down here for a minute? As time went on, fans began to clamor for a reunion, and in 2004 the Frantic was obliged and began issuing new recordings featuring sketches and songs such as this take on the Wild West adventures of noted frontiersman Albert Einstein. Now, Einstein claimed that space was curved and mass could make life bend. Well, Max Planck tried to prove how wrong it proved to beat Blanks in Schrodinger was next to try, but I thought faster. Heisenberg was uncertain and fell before the master. Today, the franchise are active on the web with podcasts that feature the best of their old radio show, plus new material and a generous assortment of videos and pictures from throughout their 40 year career. You can find out more about the Frantix and all our clients comedy Legends, including video clips and extended audio cuts at Laugh Tracks Radio.com. Thanks for listening, and until next time, it's goodbye from me, and it's goodbye from him goodbye everybody. -------------------------------------------------------------------------------- /04-podcast-summarization/51fa27d67c1e41598c28197c4100e0f4.txt: -------------------------------------------------------------------------------- 1 | With Fast funding up to ten $0 available through Net Credit. Our online application process was designed to get the money you need quickly. If approved, you can borrow an amount that meets your needs and repay in a way that works for your financial situation. And we report ontime payments to credit bureaus so you can build credit history as you repay. See what NetCredit can do for you today. Check your eligibility without affecting your credit score@netcredit.com. All Net Credit loans and lines of credit are offered by a member of the Net Credit family of companies or one of our lending partners is the NetCredit. Compartners for more information. Hi, and welcome to The Short Stuff. I'm Josh, and there's Chuck and Jerry's. Even here, Dave's not no one even knows where Dave is right now, but Jerry's here, and that's good. This is short stuff about Jaywalking. You Jay. Yeah. I think I want to start this out by saying that our mutual friend who shot subfusiono TV shows, the DP, Scott Ipalito, when we lived in La together, he got a Jaywalking ticket one day, man. That is so Scott and I was like, what they do that? And he said, they do that. I could see Scott's face when he actually got a ticket, and it was syncing in that he's getting a ticket for Jaywalking. It's great stuff, man. And I think it wasn't even, like, crossing against the crosswalk sign. I think he literally just crossed the street where he shouldn't have the cops. I don't like your face. Yeah, he's got a great face, too, supposedly. That's the thing, too, that we'll talk about later, that Jaywalking is not equitably doled out among the different types of people in the United States, which is sucky. But at the same time, Chuck, we should probably start at the beginning of Jaywalking, because Jaywalking hasn't always been around. Because cars haven't always been around. And Jaywalking doesn't really exist without the context of cars, because thanks to a really interesting Vox article that we found, it turns out the automobile industry is behind the creation and criminalization of the concept of Jaywalking. Before, it was just here's the street. You're a pedestrian, which means you basically on the street. Go ahead and cross wherever you want. Yeah. So shout out to Vox and our old pals from Housetepworks.com for this stuff. What I couldn't derive from the Vox article. Well, let's just go ahead and say this. The term J back then was a name. If you were like a Rube or a nudge or a Hick, you would be called A-J-A lot of times it was a name for somebody like that. It was very derogatory. It's my new one. You can call someone J. Yeah, I like that. But people were called Jade drivers, or they were known as Jay driving. But it seemed like Vox had it the other way around and that Jay driving came about after the term jaywalking, and I couldn't figure out which it was. I couldn't either. But they were so very close that they were pretty much created almost at the same time. But they were both born, it seems like, out of this conflict between automotive drivers, car drivers, we might call them today, and pedestrians, people who are just walking around because like I was saying before, the street belongs to everybody. And there were like, core strong carriages and all that stuff. But for the most part, people were on foot. And the first people who were driving cars were super rich gadflies, basically like F. Scott Fitzgerald types were the ones who were driving automotives at first. And so there was certainly class resentment out of the gate. But there was also, more than anything, resentment for people who are just zipping through the streets that were crowded with people, including children playing in the street because that was a normal thing to do. And so there was a tension almost out of the gate between drivers and pedestrians. Yeah. I mean, the drivers essentially were like, get off the road. And the walkers were like, wait a minute, the roads belong to us? And they said, no, that's why we started building sidewalks. And the walkers were like, what? You want us to walk over there on that little four foot span when we've lived our whole life in the streets? There's cracks there that I can't step on. The term J driving. I think it may have been first, though, because this was like the 1920s when the automobile really started to come on the scene and started making a legit claim to the streets. And it looks like Jay driving was in an actual newspaper in the Kansas Junction City Union and the Kansas City Star talked about Jay driving. So I think that may have been first, and this was driving on the wrong side of the road. And then initially, jaywalking meant just being rude on the sidewalk. And I don't know if it was right or left base, but basically walking in a way that wasn't appropriate on the sidewalk. Yeah, that's the how stuff works. Spiel, if you read that Vox article, it's basically saying that jaywalkers were called that for getting in the way of cars. It was a derogatory term for people who didn't know better to stay out of these new awesome cars way. And then in response, people called people driving cars J drivers because they didn't it was just a retaliatory term from what I could tell. So this shall be known. Henceforth is the great how stuff works. Fox Brawl yeah, exactly. Between J House Stuff works and J Vox. But the long and the short of it is that once this started to be a thing, the automobile industry got involved and their lobby and their money got involved, and they got together with local police forces. And they started initially like a shame campaign. Didn't they to shame people that were jaywalking. Yeah. There was apparently the National Automobile Chamber of Commerce who had the bright idea of creating a free wire service for local journalists where the journalists would send in the details of a car accident, like a pedestrian hit by a car. And the wire service would send them back a full article. Go ahead and run it. You can put your name on it if you want, but it would put the onus on the pedestrian. It would talk about how the pedestrian was a dummy for not getting out of the way of the car. It was the pedestrian's fault. And that was the level of underhandedness that this campaign was taking. Yeah. And as far as the shame campaign, it was literally on the streets. Like they would advise police officers to shout them down, to blow their whistles at them and call attention to them. There were like, legit 1940s 50 style propaganda posters about jaywalking that they would put up. And all of a sudden the pedestrian was persona non grata in the United States. Yeah. And then in very short order, they started making crosswalks. And then in very short order after that, there were laws that were passed that said, this is where the only place you can cross the street and be within the letter of the law. And as a result, cars came to dominate streets for the first time ever, pretty quickly after they were invented and introduced. And I say we take a break and then talk about those laws that kind of came up as a result of that automotive industry lobbying. Let's do it. Well, now when you're on the road driving in your truck, why not learn a thing or two from Josh and Chuck? It's stuff you should know. All right. With bills to pay and debt piling up, it's easy to feel overwhelmed. Personal loans through Net Credit can provide funding up to $10,000 to help you get back on track financially. If eligible, our secure application process allows you to customize the terms that work for you and your budget. So check your eligibility today without affecting your credit score and help get your finances back on track. Net Credit a more personal personal loan. On that credit loan, the lines of credit are offered by a member of the Net Credits family of companies are one of our lending partners. Visit NetCredit. Compartners for more information. All right. So back in 1925, Herbert Hoover, apparently, who was the commerce Secretary at the time before he was President, he wrote up a uniform law that guided pedestrian behavior in the hopes that everybody would just adopt this law and it would make sense, but instead, a patchwork of not just state laws, but municipal laws arose. So depending on where you are in the country, the law is going to be radically different from somewhere else in the country about whether you're in the right or the wrong for getting hit by that car. Yeah. I mean, that's still the thing today. It depends on what city you're in. And this is like you said, even sometimes small towns and municipalities all the way up to big cities in New York, people jaywalk that's the only way you can get anywhere in New York is if you just kind of do your own thing. And I've never seen anyone get busted for it. It seems like it's acceptable to do there in Los Angeles, it's weird like people will stop and if there's no cars coming, they will sit there, they will stand there and wait at a crosswalk until it turns. It's much less pedestrian friendly than New York is, obviously, but depending on where you are, it's either a faux pot to do it or it's downright illegal. If you're talking right of way, it's anybody's guess sometimes who technically, legally has the right of way. There's an old saying that the right of way is something you give, not take, especially when you're in a car and someone's walking. I think you should always sort of be nice and defer to the person walking. Right. Don't you think? Yeah. Of course, the overall points of this is that if you're driving a car, you're not really in any sort of danger of being harmed yourself if you get in an accident with a pedestrian. But a pedestrian is in a lot of danger for getting hit by a car. And so you, as the operator of the car, have a responsibility to look out for pedestrians, and then Conversely, you as a pedestrian, just out of a sense of self preservation, have a responsibility not to walk in front of cars and presume they're going to stop. Yeah. Because here's a stat 3% of traffic incidents involved pedestrians, but 14% of traffic deaths are pedestrians. And I think 70% of those fatalities are outside of intersections, just people crossing the street or whatever, wherever they want. Yeah. So the National Highway Transportation Safety Administration put together a publication called Pedestrian Safety Enforcement Operations, and you colon a how to guide. I did the and you part. Okay. And they basically instruct cops to just go ahead and cite everybody, site, the driver and the pedestrian if there's any kind of accident. But they recommend really bringing the hammer down on drivers because they're the ones who are going to cause the most damage. So they really need to be taught to be on the lookout for pedestrians, even if the car has the right of way. This House Works article makes a really great point, regardless of any local laws of who's right or who's wrong. If you were a driver and you hit a pedestrian, it doesn't matter whether you're in the right or wrong. You just hit somebody and maybe seriously injured or maybe even killed them. And that's a life changing event. So forget the laws. Just look out for pedestrians. At all times. Maybe if you actually do hit somebody, you can look into local laws to see what the deal is or have your lawyer do it. But up to that point, you should be looking out for pedestrians. And if you're a pedestrian, you should never, ever in a million years ever be looking at your cell phone when you step off of a curb and cross the street. It is one of the dumbest, most easily avoidable things that you can do. And yet it happens everywhere. Yeah. I mean, there are towns that have in cities that have, I think, distracted pedestrian laws on the books for stuff just like that, right? Yeah. And I also want to put in two cent for enforcing distracted drivers laws. Just seeing people driving around, looking at their phone is it makes me crazy. It makes my blood boil. Like if blood could boil, I would say it literally makes my blood boil. Speeding down the highway, just staring into their lap 80 miles an hour. It's unbelievable. Yeah. Or they'll drop down all of a sudden from 80 to 60 because they're checking their phone now and they're not really paying attention. But yeah, no matter how you're doing it, it's just wrong. Wrong. You're wrong. You're wrong. Yeah. I used to give a nice little friendly tap with someone in front of me at a red light. Hadn't gone yet. On the Horn. Oh, on the Horn, I see. Yeah. No, not the bumper. But now when I see the head down and they're on the phone, I lay on it. I don't care. Yeah, I'm with you. I got one more quick story, if I may. Sure. To illustrate yielding to pedestrians. It was Christmas Eve this year, and I was coming down McLendon through the neighborhoods of Atlanta toward Candler Park. And there's kind of a funky pedestrian crossing there. That's a little it's not very intuitive. And there was a legit elderly couple, like maybe in their Eighties even sort of like, should I go? Should I not? And they shouldn't have, but I could tell that they were confused. So I stopped and they started to go. And a guy coming from the other direction laid on his Horn and was like screaming and pointing up at the sign saying they shouldn't be walking. As he went by on Christmas Eve, this elderly couple. And he was like he didn't look like some young jerk. He was like some middle aged guy. I think he had a normal middle aged passenger. You have to look out for them. Yes. They're the worst, actually. Yeah. Those are the ones you have to look out for the most. Oh, it just made me so mad. I couldn't believe that this guy just blew through there. Like, what it would have cost him to just let this whole couple pass. I know. And you see that so much. Everybody so keyed up these days. But it seems like middle age dudes are the most keyed up of all. I know it's a time to be keyed up, which also means it's a time to be kinder than ever. Yeah, that's good advice, Chuck. All right, I'm done off the soapbox. I've got one more thing. Apparently there was a 2012 study that looked at how Jaywalking is enforced among races and depending on the town you're in, you are much likelier to be cited for jaywalking if you're a person of color, champagne or Bana, Illinois is the one that's usually trotted out as the shameful poster child for this. But the 89% of people back in 2012 who were cited for jaywalking were black, despite only 12.4% of the population of champagne Urbana, Illinois being black for shame. And it's not just them. This happens in towns all over the country, but it's, like bad enough to get a ticket for jaywalking. But if it's not being doled out evenly, then that is even worse. I agreed. Be careful out there. Be nice to people. That's all I got. I think that's good. Well, then that means everybody's short stuff is out. Stuff you should know is a production of iHeartRadio for more podcasts my heart radio visit the iHeartRadio App Apple podcasts or wherever you listen to your favorite shows. -------------------------------------------------------------------------------- /04-podcast-summarization/api_04.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | from api_secrets import API_KEY_ASSEMBLYAI, API_KEY_LISTENNOTES 5 | import pprint 6 | 7 | 8 | transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' 9 | headers_assemblyai = { 10 | "authorization": API_KEY_ASSEMBLYAI, 11 | "content-type": "application/json" 12 | } 13 | 14 | listennotes_episode_endpoint = 'https://listen-api.listennotes.com/api/v2/episodes' 15 | headers_listennotes = { 16 | 'X-ListenAPI-Key': API_KEY_LISTENNOTES, 17 | } 18 | 19 | 20 | def get_episode_audio_url(episode_id): 21 | url = listennotes_episode_endpoint + '/' + episode_id 22 | response = requests.request('GET', url, headers=headers_listennotes) 23 | 24 | data = response.json() 25 | # pprint.pprint(data) 26 | 27 | episode_title = data['title'] 28 | thumbnail = data['thumbnail'] 29 | podcast_title = data['podcast']['title'] 30 | audio_url = data['audio'] 31 | return audio_url, thumbnail, podcast_title, episode_title 32 | 33 | def transcribe(audio_url, auto_chapters): 34 | transcript_request = { 35 | 'audio_url': audio_url, 36 | 'auto_chapters': auto_chapters 37 | } 38 | 39 | transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers_assemblyai) 40 | pprint.pprint(transcript_response.json()) 41 | return transcript_response.json()['id'] 42 | 43 | 44 | def poll(transcript_id): 45 | polling_endpoint = transcript_endpoint + '/' + transcript_id 46 | polling_response = requests.get(polling_endpoint, headers=headers_assemblyai) 47 | return polling_response.json() 48 | 49 | 50 | 51 | def get_transcription_result_url(url, auto_chapters): 52 | transcribe_id = transcribe(url, auto_chapters) 53 | while True: 54 | data = poll(transcribe_id) 55 | if data['status'] == 'completed': 56 | return data, None 57 | elif data['status'] == 'error': 58 | return data, data['error'] 59 | 60 | print("waiting for 60 seconds") 61 | time.sleep(60) 62 | 63 | 64 | def save_transcript(episode_id): 65 | audio_url, thumbnail, podcast_title, episode_title = get_episode_audio_url(episode_id) 66 | data, error = get_transcription_result_url(audio_url, auto_chapters=True) 67 | if data: 68 | filename = episode_id + '.txt' 69 | with open(filename, 'w') as f: 70 | f.write(data['text']) 71 | 72 | filename = episode_id + '_chapters.json' 73 | with open(filename, 'w') as f: 74 | chapters = data['chapters'] 75 | 76 | data = {'chapters': chapters} 77 | data['audio_url']=audio_url 78 | data['thumbnail']=thumbnail 79 | data['podcast_title']=podcast_title 80 | data['episode_title']=episode_title 81 | # for key, value in kwargs.items(): 82 | # data[key] = value 83 | 84 | json.dump(data, f, indent=4) 85 | print('Transcript saved') 86 | return True 87 | elif error: 88 | print("Error!!!", error) 89 | return False -------------------------------------------------------------------------------- /04-podcast-summarization/api_secrets.py: -------------------------------------------------------------------------------- 1 | API_KEY_ASSEMBLYAI = 'key_1' 2 | API_KEY_LISTENNOTES = 'key_2' 3 | -------------------------------------------------------------------------------- /04-podcast-summarization/main.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import glob 3 | import json 4 | from api_04 import save_transcript 5 | 6 | st.title("Podcast Summaries") 7 | 8 | json_files = glob.glob('*.json') 9 | 10 | episode_id = st.sidebar.text_input("Episode ID") 11 | button = st.sidebar.button("Download Episode summary", on_click=save_transcript, args=(episode_id,)) 12 | 13 | 14 | def get_clean_time(start_ms): 15 | seconds = int((start_ms / 1000) % 60) 16 | minutes = int((start_ms / (1000 * 60)) % 60) 17 | hours = int((start_ms / (1000 * 60 * 60)) % 24) 18 | if hours > 0: 19 | start_t = f'{hours:02d}:{minutes:02d}:{seconds:02d}' 20 | else: 21 | start_t = f'{minutes:02d}:{seconds:02d}' 22 | 23 | return start_t 24 | 25 | 26 | if button: 27 | filename = episode_id + '_chapters.json' 28 | print(filename) 29 | with open(filename, 'r') as f: 30 | data = json.load(f) 31 | 32 | chapters = data['chapters'] 33 | episode_title = data['episode_title'] 34 | thumbnail = data['thumbnail'] 35 | podcast_title = data['podcast_title'] 36 | audio = data['audio_url'] 37 | 38 | st.header(f"{podcast_title} - {episode_title}") 39 | st.image(thumbnail, width=200) 40 | st.markdown(f'#### {episode_title}') 41 | 42 | for chp in chapters: 43 | with st.expander(chp['gist'] + ' - ' + get_clean_time(chp['start'])): 44 | chp['summary'] -------------------------------------------------------------------------------- /05-realtime-openai/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Dependencies 3 | 4 | ```console 5 | $ pip install pyaudio requests websockets openai 6 | ``` 7 | 8 | ## Run Code 9 | 10 | ```console 11 | $ python main.py 12 | ``` -------------------------------------------------------------------------------- /05-realtime-openai/api_secrets.py: -------------------------------------------------------------------------------- 1 | API_KEY_ASSEMBLYAI = 'key1' 2 | API_KEY_OPENAI = 'key2' -------------------------------------------------------------------------------- /05-realtime-openai/main.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import websockets 3 | import asyncio 4 | import base64 5 | import json 6 | from openai_helper import ask_computer 7 | from api_secrets import API_KEY_ASSEMBLYAI 8 | 9 | 10 | FRAMES_PER_BUFFER = 3200 11 | FORMAT = pyaudio.paInt16 12 | CHANNELS = 1 13 | RATE = 16000 14 | 15 | p = pyaudio.PyAudio() 16 | 17 | # starts recording 18 | stream = p.open( 19 | format=FORMAT, 20 | channels=CHANNELS, 21 | rate=RATE, 22 | input=True, 23 | frames_per_buffer=FRAMES_PER_BUFFER 24 | ) 25 | 26 | print(p.get_default_input_device_info()) 27 | 28 | # the AssemblyAI endpoint we're going to hit 29 | URL = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000" 30 | 31 | async def send_receive(): 32 | print(f'Connecting websocket to url ${URL}') 33 | async with websockets.connect( 34 | URL, 35 | extra_headers=(("Authorization", API_KEY_ASSEMBLYAI),), 36 | ping_interval=5, 37 | ping_timeout=20 38 | ) as _ws: 39 | await asyncio.sleep(0.1) 40 | print("Receiving SessionBegins ...") 41 | session_begins = await _ws.recv() 42 | print(session_begins) 43 | print("Sending messages ...") 44 | async def send(): 45 | while True: 46 | try: 47 | data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 48 | data = base64.b64encode(data).decode("utf-8") 49 | json_data = json.dumps({"audio_data":str(data)}) 50 | await _ws.send(json_data) 51 | except websockets.exceptions.ConnectionClosedError as e: 52 | print(e) 53 | assert e.code == 4008 54 | break 55 | except Exception as e: 56 | assert False, "Not a websocket 4008 error" 57 | await asyncio.sleep(0.01) 58 | 59 | return True 60 | 61 | async def receive(): 62 | while True: 63 | try: 64 | result_str = await _ws.recv() 65 | result = json.loads(result_str) 66 | prompt = result['text'] 67 | if prompt and result['message_type'] == 'FinalTranscript': 68 | print("Me:", prompt) 69 | answer = ask_computer(prompt) 70 | print("Bot", answer) 71 | except websockets.exceptions.ConnectionClosedError as e: 72 | print(e) 73 | assert e.code == 4008 74 | break 75 | except Exception as e: 76 | assert False, "Not a websocket 4008 error" 77 | 78 | send_result, receive_result = await asyncio.gather(send(), receive()) 79 | 80 | 81 | asyncio.run(send_receive()) -------------------------------------------------------------------------------- /05-realtime-openai/openai_helper.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from api_secrets import API_KEY_OPENAI 3 | 4 | openai.api_key = API_KEY_OPENAI 5 | 6 | def ask_computer(prompt): 7 | return "This is my answer" 8 | 9 | # prompt = "What is your favorite color?" 10 | res = openai.Completion.create( 11 | engine="text-davinci-002", 12 | prompt=prompt, 13 | ) 14 | # print(res) 15 | return res["choices"][0]["text"] 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code For Python Speech Recognition Course 2 | 3 | Watch the video: 4 | 5 | [![Alt text](https://img.youtube.com/vi/mYUyaKmvu6Y/hqdefault.jpg)](https://youtu.be/mYUyaKmvu6Y) 6 | -------------------------------------------------------------------------------- /output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI-Community/python-speech-recognition-course/db2dddbbf01c4bd2ac546f154d05a075b6e4e0a7/output.wav -------------------------------------------------------------------------------- /utilities/api_02.py: -------------------------------------------------------------------------------- 1 | # files after part 2 2 | import requests 3 | import time 4 | from api_secrets import API_KEY_ASSEMBLYAI 5 | 6 | 7 | upload_endpoint = 'https://api.assemblyai.com/v2/upload' 8 | transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' 9 | 10 | headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI} 11 | 12 | headers = { 13 | "authorization": API_KEY_ASSEMBLYAI, 14 | "content-type": "application/json" 15 | } 16 | 17 | CHUNK_SIZE = 5_242_880 # 5MB 18 | 19 | 20 | def upload(filename): 21 | def read_file(filename): 22 | with open(filename, 'rb') as f: 23 | while True: 24 | data = f.read(CHUNK_SIZE) 25 | if not data: 26 | break 27 | yield data 28 | 29 | upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename)) 30 | return upload_response.json()['upload_url'] 31 | 32 | 33 | def transcribe(audio_url): 34 | transcript_request = { 35 | 'audio_url': audio_url 36 | } 37 | 38 | transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers) 39 | return transcript_response.json()['id'] 40 | 41 | 42 | def poll(transcript_id): 43 | polling_endpoint = transcript_endpoint + '/' + transcript_id 44 | polling_response = requests.get(polling_endpoint, headers=headers) 45 | return polling_response.json() 46 | 47 | 48 | def get_transcription_result_file(filename): 49 | audio_url = upload(filename) 50 | return get_transcription_result_url(audio_url) 51 | 52 | 53 | def get_transcription_result_url(url): 54 | transcribe_id = transcribe(url) 55 | while True: 56 | data = poll(transcribe_id) 57 | if data['status'] == 'completed': 58 | return data, None 59 | elif data['status'] == 'error': 60 | return data, data['error'] 61 | 62 | print("waiting for 30 seconds") 63 | time.sleep(30) 64 | 65 | 66 | def save_transcript(url, title): 67 | data, error = get_transcription_result_url(url) 68 | 69 | if data: 70 | filename = title + '.txt' 71 | with open(filename, 'w') as f: 72 | f.write(data['text']) 73 | print('Transcript saved') 74 | elif error: 75 | print("Error!!!", error) 76 | -------------------------------------------------------------------------------- /utilities/api_03.py: -------------------------------------------------------------------------------- 1 | # files after part 3 2 | import requests 3 | import json 4 | import time 5 | from api_secrets import API_KEY_ASSEMBLYAI 6 | 7 | 8 | upload_endpoint = 'https://api.assemblyai.com/v2/upload' 9 | transcript_endpoint = 'https://api.assemblyai.com/v2/transcript' 10 | 11 | headers_auth_only = {'authorization': API_KEY_ASSEMBLYAI} 12 | 13 | headers = { 14 | "authorization": API_KEY_ASSEMBLYAI, 15 | "content-type": "application/json" 16 | } 17 | 18 | CHUNK_SIZE = 5_242_880 # 5MB 19 | 20 | 21 | def upload(filename): 22 | def read_file(filename): 23 | with open(filename, 'rb') as f: 24 | while True: 25 | data = f.read(CHUNK_SIZE) 26 | if not data: 27 | break 28 | yield data 29 | 30 | upload_response = requests.post(upload_endpoint, headers=headers_auth_only, data=read_file(filename)) 31 | return upload_response.json()['upload_url'] 32 | 33 | 34 | def transcribe(audio_url, sentiment_analysis): 35 | transcript_request = { 36 | 'audio_url': audio_url, 37 | 'sentiment_analysis': sentiment_analysis 38 | } 39 | 40 | transcript_response = requests.post(transcript_endpoint, json=transcript_request, headers=headers) 41 | return transcript_response.json()['id'] 42 | 43 | 44 | def poll(transcript_id): 45 | polling_endpoint = transcript_endpoint + '/' + transcript_id 46 | polling_response = requests.get(polling_endpoint, headers=headers) 47 | return polling_response.json() 48 | 49 | 50 | def get_transcription_result_file(filename, sentiment_analysis): 51 | audio_url = upload(filename) 52 | return get_transcription_result_url(audio_url, sentiment_analysis) 53 | 54 | 55 | def get_transcription_result_url(url, sentiment_analysis): 56 | transcribe_id = transcribe(url, sentiment_analysis) 57 | while True: 58 | data = poll(transcribe_id) 59 | if data['status'] == 'completed': 60 | return data, None 61 | elif data['status'] == 'error': 62 | return data, data['error'] 63 | 64 | print("waiting for 30 seconds") 65 | time.sleep(30) 66 | 67 | 68 | def save_transcript(url, title, sentiment_analysis=False): 69 | data, error = get_transcription_result_url(url, sentiment_analysis) 70 | 71 | if data: 72 | filename = title + '.txt' 73 | with open(filename, 'w') as f: 74 | f.write(data['text']) 75 | 76 | if sentiment_analysis: 77 | filename = title + '_sentiments.json' 78 | with open(filename, 'w') as f: 79 | sentiments = data['sentiment_analysis_results'] 80 | json.dump(sentiments, f, indent=4) 81 | print('Transcript saved') 82 | elif error: 83 | print("Error!!!", error) 84 | -------------------------------------------------------------------------------- /utilities/api_secrets.py: -------------------------------------------------------------------------------- 1 | API_KEY_ASSEMBLYAI = 'key1' 2 | API_KEY_OPENAI = 'key2' 3 | API_KEY_LISTENNOTES = 'key3' --------------------------------------------------------------------------------