├── .DS_Store
├── .gitattributes
├── AudioOutpu_ChatGPT
    ├── AudioOutpu_ChatGPT.ino
    ├── debug.cfg
    ├── debug_custom.json
    └── esp32.svd
├── ChatGPT Voice Assistant
    ├── ESP32_Speech_to_Text
    │   ├── Audio.cpp
    │   ├── Audio.h
    │   ├── CloudSpeechClient.cpp
    │   ├── CloudSpeechClient.h
    │   ├── ESP32_Speech_to_Text.ino
    │   ├── I2S.cpp
    │   ├── I2S.h
    │   └── network_param.h
    └── ESP32_Text_to_Speech
    │   └── ESP32_Text_to_Speech.ino
├── ChatGPT_ESP32_DemoCode
    └── ChatGPT_ESP32_DemoCode.ino
├── ESP32_TextToSpeech_SampleCode
    └── ESP32_TextToSpeech_SampleCode.ino
└── Speech_To_Text_ESP32
    ├── Audio.cpp
    ├── Audio.h
    ├── CloudSpeechClient.cpp
    ├── CloudSpeechClient.h
    ├── I2S.cpp
    ├── I2S.h
    ├── Speech_To_Text_ESP32.ino
    └── network_param.h


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techiesms/ESP32-ChatGPT/c09f8854350da5dc3590f38755f9755eaf50066b/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/AudioOutpu_ChatGPT/AudioOutpu_ChatGPT.ino:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <Arduino.h>
  3 | #include <WiFi.h>
  4 | #include <HTTPClient.h>
  5 | 
  6 | #include <ArduinoJson.h>
  7 | #include "Audio.h"
  8 | 
  9 | const char* ssid = "SmS_jiofi";
 10 | const char* password = "sms123458956";
 11 | const char* chatgpt_token = "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";
 12 | const char* temperature = "0";
 13 | const char* max_tokens = "45";
 14 | String Question = "";
 15 | 
 16 | #define I2S_DOUT      25
 17 | #define I2S_BCLK      27
 18 | #define I2S_LRC       26
 19 | 
 20 | Audio audio;
 21 | 
 22 | void setup()
 23 | {
 24 |   Serial.begin(115200);
 25 | 
 26 |   WiFi.mode(WIFI_STA);
 27 |   WiFi.disconnect();
 28 | 
 29 | 
 30 |   while (!Serial);
 31 | 
 32 |   // wait for WiFi connection
 33 |   WiFi.begin(ssid, password);
 34 |   Serial.print("Connecting to ");
 35 |   Serial.println(ssid);
 36 |   
 37 |   while (WiFi.status() != WL_CONNECTED) 
 38 |   {
 39 |     delay(1000);
 40 |     Serial.print(".");
 41 |   }
 42 |   Serial.println("connected");
 43 |   Serial.print("IP address: ");
 44 |   Serial.println(WiFi.localIP());
 45 | 
 46 |   audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
 47 |   audio.setVolume(100);
 48 | }
 49 | 
 50 | void loop()
 51 | {
 52 | 
 53 |   Serial.print("Ask your Question : ");
 54 |   while (!Serial.available())
 55 |   {
 56 |     audio.loop();
 57 |   }
 58 |   while (Serial.available())
 59 |   {
 60 |     char add = Serial.read();
 61 |     Question = Question + add;
 62 |     delay(1);
 63 |   }
 64 |   int len = Question.length();
 65 |   Question = Question.substring(0, (len - 1));
 66 |   Question = "\"" + Question + "\"";
 67 |   Serial.println(Question);
 68 | 
 69 |   HTTPClient https;
 70 | 
 71 |   //Serial.print("[HTTPS] begin...\n");
 72 |   if (https.begin("https://api.openai.com/v1/completions")) {  // HTTPS
 73 | 
 74 |     https.addHeader("Content-Type", "application/json");
 75 |     String token_key = String("Bearer ") + chatgpt_token;
 76 |     https.addHeader("Authorization", token_key);
 77 | 
 78 |     String payload = String("{\"model\": \"text-davinci-003\", \"prompt\": ") + Question + String(", \"temperature\": ") + temperature + String(", \"max_tokens\": ") + max_tokens + String("}"); //Instead of TEXT as Payload, can be JSON as Paylaod
 79 | 
 80 |     //Serial.print("[HTTPS] GET...\n");
 81 | 
 82 |     // start connection and send HTTP header
 83 |     int httpCode = https.POST(payload);
 84 | 
 85 |     // httpCode will be negative on error
 86 |     // file found at server
 87 |     if (httpCode == HTTP_CODE_OK || httpCode == HTTP_CODE_MOVED_PERMANENTLY) {
 88 |       String payload = https.getString();
 89 |       //Serial.println(payload);
 90 | 
 91 |       DynamicJsonDocument doc(1024);
 92 | 
 93 | 
 94 |       deserializeJson(doc, payload);
 95 |       String Answer = doc["choices"][0]["text"];
 96 |       Answer = Answer.substring(2);
 97 |       Serial.print("Answer : "); Serial.println(Answer);
 98 |       audio.connecttospeech(Answer.c_str(), "en");
 99 | 
100 |     }
101 |     else {
102 |       Serial.printf("[HTTPS] GET... failed, error: %s\n", https.errorToString(httpCode).c_str());
103 |     }
104 |     https.end();
105 |   }
106 |   else {
107 |     Serial.printf("[HTTPS] Unable to connect\n");
108 |   }
109 | 
110 |   Question = "";
111 | }
112 | 
113 | void audio_info(const char *info) {
114 |   Serial.print("audio_info: "); Serial.println(info);
115 | }
116 | 


--------------------------------------------------------------------------------
/AudioOutpu_ChatGPT/debug.cfg:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: GPL-2.0-or-later
 2 | #
 3 | # Example OpenOCD configuration file for ESP32-WROVER-KIT board.
 4 | #
 5 | # For example, OpenOCD can be started for ESP32 debugging on
 6 | #
 7 | #   openocd -f board/esp32-wrover-kit-3.3v.cfg
 8 | #
 9 | 
10 | # Source the JTAG interface configuration file
11 | source [find interface/ftdi/esp32_devkitj_v1.cfg]
12 | set ESP32_FLASH_VOLTAGE 3.3
13 | # Source the ESP32 configuration file
14 | source [find target/esp32.cfg]
15 | 


--------------------------------------------------------------------------------
/AudioOutpu_ChatGPT/debug_custom.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name":"Arduino on ESP32",
 3 | 	"toolchainPrefix":"xtensa-esp32-elf",
 4 | 	"svdFile":"esp32.svd",
 5 | 	"request":"attach",
 6 | 	"postAttachCommands":[
 7 | 		"set remote hardware-watchpoint-limit 2",
 8 | 		"monitor reset halt",
 9 | 		"monitor gdb_sync",
10 | 		"thb setup",
11 | 		"c"
12 | 	],
13 | 	"overrideRestartCommands":[
14 | 		"monitor reset halt",
15 | 		"monitor gdb_sync",
16 | 		"thb setup",
17 | 		"c"
18 | 	]
19 | }


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/Audio.cpp:
--------------------------------------------------------------------------------
 1 | #include "Audio.h"
 2 | 
 3 | Audio::Audio(MicType micType) {
 4 |   wavData = new char*[wavDataSize/dividedWavDataSize];
 5 |   for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) wavData[i] = new char[dividedWavDataSize];
 6 |   i2s = new I2S(micType);
 7 | }
 8 | 
 9 | Audio::~Audio() {
10 |   for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) delete[] wavData[i];
11 |   delete[] wavData;
12 |   delete i2s;
13 | }
14 | 
15 | void Audio::CreateWavHeader(byte* header, int waveDataSize){
16 |   header[0] = 'R';
17 |   header[1] = 'I';
18 |   header[2] = 'F';
19 |   header[3] = 'F';
20 |   unsigned int fileSizeMinus8 = waveDataSize + 44 - 8;
21 |   header[4] = (byte)(fileSizeMinus8 & 0xFF);
22 |   header[5] = (byte)((fileSizeMinus8 >> 8) & 0xFF);
23 |   header[6] = (byte)((fileSizeMinus8 >> 16) & 0xFF);
24 |   header[7] = (byte)((fileSizeMinus8 >> 24) & 0xFF);
25 |   header[8] = 'W';
26 |   header[9] = 'A';
27 |   header[10] = 'V';
28 |   header[11] = 'E';
29 |   header[12] = 'f';
30 |   header[13] = 'm';
31 |   header[14] = 't';
32 |   header[15] = ' ';
33 |   header[16] = 0x10;  // linear PCM
34 |   header[17] = 0x00;
35 |   header[18] = 0x00;
36 |   header[19] = 0x00;
37 |   header[20] = 0x01;  // linear PCM
38 |   header[21] = 0x00;
39 |   header[22] = 0x01;  // monoral
40 |   header[23] = 0x00;
41 |   header[24] = 0x80;  // sampling rate 16000
42 |   header[25] = 0x3E;
43 |   header[26] = 0x00;
44 |   header[27] = 0x00;
45 |   header[28] = 0x00;  // Byte/sec = 16000x2x1 = 32000
46 |   header[29] = 0x7D;
47 |   header[30] = 0x00;
48 |   header[31] = 0x00;
49 |   header[32] = 0x02;  // 16bit monoral
50 |   header[33] = 0x00;
51 |   header[34] = 0x10;  // 16bit
52 |   header[35] = 0x00;
53 |   header[36] = 'd';
54 |   header[37] = 'a';
55 |   header[38] = 't';
56 |   header[39] = 'a';
57 |   header[40] = (byte)(waveDataSize & 0xFF);
58 |   header[41] = (byte)((waveDataSize >> 8) & 0xFF);
59 |   header[42] = (byte)((waveDataSize >> 16) & 0xFF);
60 |   header[43] = (byte)((waveDataSize >> 24) & 0xFF);
61 | }
62 | 
63 | void Audio::Record() {
64 |   CreateWavHeader(paddedHeader, wavDataSize);
65 |   int bitBitPerSample = i2s->GetBitPerSample();
66 |   if (bitBitPerSample == 16) {
67 |     for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
68 |       i2s->Read(i2sBuffer, i2sBufferSize/2);
69 |       for (int i = 0; i < i2sBufferSize/8; ++i) {
70 |         wavData[j][2*i] = i2sBuffer[4*i + 2];
71 |         wavData[j][2*i + 1] = i2sBuffer[4*i + 3];
72 |       }
73 |     }
74 |   }
75 |   else if (bitBitPerSample == 32) {
76 |     for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
77 |       i2s->Read(i2sBuffer, i2sBufferSize);
78 |       for (int i = 0; i < i2sBufferSize/8; ++i) {
79 |         wavData[j][2*i] = i2sBuffer[8*i + 2];
80 |         wavData[j][2*i + 1] = i2sBuffer[8*i + 3];
81 |       }
82 |     }
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/Audio.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AUDIO_H
 2 | #define _AUDIO_H
 3 | 
 4 | #include <Arduino.h>
 5 | #include "I2S.h"
 6 | 
 7 | // 16bit, monoral, 16000Hz,  linear PCM
 8 | class Audio {
 9 |   I2S* i2s;
10 |   static const int headerSize = 44;
11 |   static const int i2sBufferSize = 12000;
12 |   char i2sBuffer[i2sBufferSize];
13 |   void CreateWavHeader(byte* header, int waveDataSize);
14 | 
15 | public:
16 |   static const int wavDataSize = 90000;                   // It must be multiple of dividedWavDataSize. Recording time is about 1.9 second.
17 |   static const int dividedWavDataSize = i2sBufferSize/4;
18 |   char** wavData;                                         // It's divided. Because large continuous memory area can't be allocated in esp32.
19 |   byte paddedHeader[headerSize + 4] = {0};                // The size must be multiple of 3 for Base64 encoding. Additional byte size must be even because wave data is 16bit.
20 | 
21 |   Audio(MicType micType);
22 |   ~Audio();
23 |   void Record();
24 | };
25 | 
26 | #endif // _AUDIO_H
27 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/CloudSpeechClient.cpp:
--------------------------------------------------------------------------------
  1 | #include "CloudSpeechClient.h"
  2 | #include "network_param.h"
  3 | #include <base64.h>
  4 | #include <ArduinoJson.h>
  5 | #define USE_SERIAL Serial
  6 | #include <Arduino.h>
  7 | #include <HTTPClient.h>
  8 | //#define uart_en 15
  9 |  #define led_3 4
 10 |  #define led_1 15
 11 | #define led_2 2
 12 | //#include <SoftwareSerial.h>
 13 | ////SoftwareSerial (D4, D2);
 14 | const char* chatgpt_token = "Your_ChatGPT_Token";
 15 | CloudSpeechClient::CloudSpeechClient(Authentication authentication) {
 16 |   this->authentication = authentication;
 17 |   WiFi.begin(ssid, password);
 18 |  //  while (WiFi.status() == WL_CONNECTED){ digitalWrite(led_3,1);}
 19 |   while (WiFi.status() != WL_CONNECTED) delay(1000);
 20 |   client.setCACert(root_ca);
 21 |  
 22 |  
 23 |   if (!client.connect(server, 443)) Serial.println("Connection failed!"); digitalWrite(led_3,1);digitalWrite(led_1,0);digitalWrite(led_2,0);
 24 | }
 25 | 
 26 | String ans;
 27 | 
 28 | CloudSpeechClient::~CloudSpeechClient() {
 29 |   client.stop();
 30 |   WiFi.disconnect();
 31 | }
 32 | 
 33 | void CloudSpeechClient::PrintHttpBody2(Audio* audio)
 34 | {
 35 |   String enc = base64::encode(audio->paddedHeader, sizeof(audio->paddedHeader));
 36 |   enc.replace("\n", "");  // delete last "\n"
 37 |   client.print(enc);      // HttpBody2
 38 |   char** wavData = audio->wavData;
 39 |   for (int j = 0; j < audio->wavDataSize / audio->dividedWavDataSize; ++j) {
 40 |     enc = base64::encode((byte*)wavData[j], audio->dividedWavDataSize);
 41 |     enc.replace("\n", "");// delete last "\n"
 42 |     client.print(enc);    // HttpBody2
 43 |   }
 44 | }
 45 | 
 46 | void CloudSpeechClient::Transcribe(Audio* audio) {
 47 |   String HttpBody1 = "{\"config\":{\"encoding\":\"LINEAR16\",\"sampleRateHertz\":16000,\"languageCode\":\"en-IN\"},\"audio\":{\"content\":\"";
 48 |   String HttpBody3 = "\"}}\r\n\r\n";
 49 |   int httpBody2Length = (audio->wavDataSize + sizeof(audio->paddedHeader)) * 4 / 3; // 4/3 is from base64 encoding
 50 |   String ContentLength = String(HttpBody1.length() + httpBody2Length + HttpBody3.length());
 51 |   String HttpHeader;
 52 |   // if (authentication == USE_APIKEY)
 53 |   HttpHeader = String("POST /v1/speech:recognize?key=") + ApiKey
 54 |                + String(" HTTP/1.1\r\nHost: speech.googleapis.com\r\nContent-Type: application/json\r\nContent-Length: ") + ContentLength + String("\r\n\r\n");
 55 |   //  else if (authentication == USE_ACCESSTOKEN)
 56 |   //    HttpHeader = String("POST /v1beta1/speech:syncrecognize HTTP/1.1\r\nHost: speech.googleapis.com\r\nContent-Type: application/json\r\nAuthorization: Bearer ")
 57 |   //   + AccessToken + String("\r\nContent-Length: ") + ContentLength + String("\r\n\r\n");
 58 |   client.print(HttpHeader);
 59 |   client.print(HttpBody1);
 60 |   PrintHttpBody2(audio);
 61 |   client.print(HttpBody3);
 62 |   String My_Answer="";
 63 |   while (!client.available());
 64 |    
 65 |   while (client.available())
 66 |   {
 67 |     char temp = client.read();
 68 |     My_Answer = My_Answer + temp;
 69 |    // Serial.write(client.read());
 70 |   }
 71 | 
 72 |  // Serial.print("My Answer - ");Serial.println(My_Answer);
 73 |   int postion = My_Answer.indexOf('{');
 74 |  // Serial.println(postion);
 75 |   ans = My_Answer.substring(postion);
 76 |   Serial.print("Json daata--");
 77 |   //Serial.print(ans);
 78 |   
 79 |  DynamicJsonDocument doc(384);
 80 | 
 81 | //StaticJsonDocument<384> doc;
 82 | 
 83 | DeserializationError error = deserializeJson(doc, ans);
 84 | 
 85 | if (error) {
 86 |   Serial.print("deserializeJson() failed: ");
 87 |   Serial.println(error.c_str());
 88 |   return;
 89 | }
 90 | 
 91 | JsonObject results_0 = doc["results"][0];
 92 | //const char* 
 93 | const char* chatgpt_Q = results_0["alternatives"][0]["transcript"];
 94 | const char* a= "light on";
 95 | const char* b= "light off";
 96 | //String chatgpt_Q = a+ans+b;
 97 | //Serial.println(ans);
 98 | Serial.print(chatgpt_Q);Serial.println("-");
 99 | ///////////////////////////////////////////////////////////
100 | 
101 | if(strstr(chatgpt_Q, "light on")){
102 | 
103 |   Serial.println("Light's On");
104 |   digitalWrite(15, LOW);
105 | delay(1);
106 | Serial2.println("Turning Light on");
107 | digitalWrite(led_1,1);
108 | digitalWrite(led_3,0);
109 | digitalWrite(led_2,1);
110 | //digitalWrite(uart_en,HIGH);
111 |   Serial.print("To ask again");
112 |   }
113 | if(strstr(chatgpt_Q, "light off")){
114 | 
115 |   Serial.println("Light's Off");
116 |   digitalWrite(15, LOW);
117 | delay(1);
118 | Serial2.println("Turning Light off");
119 | digitalWrite(led_1,1);
120 | digitalWrite(led_2,1);
121 | //digitalWrite(uart_en,HIGH);
122 |   Serial.print("To ask again");
123 |   }  
124 |  if(strstr(chatgpt_Q, "blink on")){
125 |   HTTPClient http;
126 | 
127 |         USE_SERIAL.print("[HTTP] begin...\n");
128 |         // configure traged server and url
129 |         //http.begin("https://www.howsmyssl.com/a/check", ca); //HTTPS
130 |         http.begin("http://example.com/index.html"); //HTTP
131 | 
132 |         USE_SERIAL.print("[HTTP] GET...\n");
133 |         // start connection and send HTTP header
134 |         int httpCode = http.GET();
135 | 
136 |         // httpCode will be negative on error
137 |         if(httpCode > 0) {
138 |             // HTTP header has been send and Server response header has been handled
139 |             USE_SERIAL.printf("[HTTP] GET... code: %d\n", httpCode);
140 | 
141 |             // file found at server
142 |             if(httpCode == HTTP_CODE_OK) {
143 |                 String payload = http.getString();
144 |                 USE_SERIAL.println(payload);
145 |             }
146 |         } else {
147 |             USE_SERIAL.printf("[HTTP] GET... failed, error: %s\n", http.errorToString(httpCode).c_str());
148 |         }
149 | 
150 |         http.end();
151 |   } 
152 |  else if(strstr(chatgpt_Q, "light on")==0 && strstr(chatgpt_Q, "light off") == 0 && strstr(chatgpt_Q, "blink on") ==0 ){
153 |   Serial.println("Asking Chat GPT");
154 |   HTTPClient https;
155 | 
156 |     Serial.print("[HTTPS] begin...\n");
157 |     if (https.begin("https://api.openai.com/v1/completions")) {  // HTTPS
158 |       
159 |       https.addHeader("Content-Type", "application/json"); 
160 |       String token_key = String("Bearer ") + chatgpt_token;
161 |       https.addHeader("Authorization", token_key);
162 |       
163 |       String payload = String("{\"model\": \"text-davinci-003\", \"prompt\": ") +"\""+ chatgpt_Q +"\"" + String(", \"temperature\": 0.2, \"max_tokens\": 40}"); //Instead of TEXT as Payload, can be JSON as Paylaod
164 |       
165 |       Serial.print("[HTTPS] GET...\n");
166 |       
167 |       // start connection and send HTTP header
168 |       int httpCode = https.POST(payload);
169 | 
170 |       // httpCode will be negative on error      
171 |       // file found at server
172 |       if (httpCode == HTTP_CODE_OK || httpCode == HTTP_CODE_MOVED_PERMANENTLY) {
173 |         String payload = https.getString();
174 |         Serial.println(payload);
175 |        // Serial2.println(payload);
176 |         //////////////////////////////////////////////////
177 |         StaticJsonDocument<2000> doc2;
178 | 
179 | DeserializationError error = deserializeJson(doc2, payload);
180 | 
181 | if (error) {
182 |   Serial.print("deserializeJson() failed: ");
183 |   Serial.println(error.c_str());
184 |   return;
185 | 
186 | }
187 | JsonObject choices_0 = doc2["choices"][0];
188 | const char* only_ans = choices_0["text"];
189 | Serial.println("Only ans:-");Serial.print(only_ans);
190 | Serial2.print(only_ans);
191 | digitalWrite(led_1,1);
192 | digitalWrite(led_2,1);
193 | //digitalWrite(uart_en, LOW);
194 | delay(1);
195 | 
196 | //digitalWrite(uart_en,HIGH);
197 |         /////////////////////////////////////////////////////////
198 |       }
199 |       else {
200 |         Serial.printf("[HTTPS] GET... failed, error: %s\n", https.errorToString(httpCode).c_str());
201 |       }
202 |       https.end();
203 |     }
204 |     else {
205 |       Serial.printf("[HTTPS] Unable to connect\n");
206 |     }
207 | 
208 |   Serial.print("To ask again");
209 |   //delay(10000);
210 |   
211 |   } 
212 | 
213 | 
214 | 
215 | 
216 | ///////////////////////////////////////////////////////////
217 | /*
218 | 
219 |    
220 |   */
221 | }
222 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/CloudSpeechClient.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CLOUDSPEECHCLIENT_H
 2 | #define _CLOUDSPEECHCLIENT_H
 3 | #include <WiFiClientSecure.h>
 4 | #include "Audio.h"
 5 | 
 6 | enum Authentication {
 7 |   USE_ACCESSTOKEN,
 8 |   USE_APIKEY
 9 | };
10 | 
11 | class CloudSpeechClient {
12 |   WiFiClientSecure client;
13 |   void PrintHttpBody2(Audio* audio);
14 |   Authentication authentication;
15 | 
16 | public:
17 |   CloudSpeechClient(Authentication authentication);
18 |   ~CloudSpeechClient();
19 |   void Transcribe(Audio* audio);
20 | };
21 | 
22 | #endif // _CLOUDSPEECHCLIENT_H
23 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/ESP32_Speech_to_Text.ino:
--------------------------------------------------------------------------------
 1 | //////Running code with board maneger verstion 1.0.6
 2 |  #define led_1 15
 3 | #define led_2 2
 4 | #define button 23 //IR Sensor
 5 | 
 6 | #define led_3 4
 7 | 
 8 | #define RXp2 16
 9 | #define TXp2 17
10 | #include "Audio.h"
11 | #include "CloudSpeechClient.h"
12 | int i=0;
13 | void setup() {
14 | pinMode(button, INPUT);
15 | pinMode(led_1,OUTPUT);
16 | pinMode(led_2,OUTPUT);
17 |  pinMode(led_3,OUTPUT);
18 |   Serial.begin(115200);
19 |  Serial2.begin(115200, SERIAL_8N1, RXp2,TXp2);
20 |    Serial2.println("Intialising");
21 | //  Serial.println(My_Data);
22 | }
23 | 
24 | void loop() {
25 |    
26 |      digitalWrite(led_1, 0);
27 |      digitalWrite(led_2, 0);
28 |       digitalWrite(led_3, 0);
29 | 
30 |    if(i==0){
31 |    Serial.println("Press button");
32 |    i=1;
33 |    }
34 |  //  if(i==1){delay(1);}
35 |    
36 |    delay(500);
37 |    if(digitalRead(button)==0){
38 |      Serial2.println("\r\nPlease Ask!\r\n");
39 |      digitalWrite(led_1, 1);
40 |      digitalWrite(led_2, 0);
41 |       digitalWrite(led_3, 0);
42 |      delay(2100);
43 |   Serial.println("\r\nRecord start!\r\n");
44 |   //Serial2.println("\r\nRecord start!\r\n");
45 |   Audio* audio = new Audio(ADMP441);
46 |  //Audio* audio = new Audio(M5STACKFIRE);
47 |   audio->Record();
48 |   Serial.println("Recoding Complited Processing");
49 |   digitalWrite(led_1,0);
50 |   digitalWrite(led_3,0);
51 |   digitalWrite(led_2,1);
52 |   CloudSpeechClient* cloudSpeechClient = new CloudSpeechClient(USE_APIKEY);
53 |   cloudSpeechClient->Transcribe(audio);
54 |   delete cloudSpeechClient;
55 |   delete audio;
56 |   i=0;
57 |   }
58 |   if(digitalRead(button)==1){
59 |     delay(1);
60 |     
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/I2S.cpp:
--------------------------------------------------------------------------------
 1 | #include "I2S.h"
 2 | #define SAMPLE_RATE (16000)
 3 | #define PIN_I2S_BCLK 26
 4 | #define PIN_I2S_LRC 22
 5 | #define PIN_I2S_DIN 34
 6 | #define PIN_I2S_DOUT 25
 7 | 
 8 | // This I2S specification : 
 9 | //  -   LRC high is channel 2 (right).
10 | //  -   LRC signal transitions once each word.
11 | //  -   DATA is valid on the CLOCK rising edge.
12 | //  -   Data bits are MSB first.
13 | //  -   DATA bits are left-aligned with respect to LRC edge.
14 | //  -   DATA bits are right-shifted by one with respect to LRC edges.
15 | I2S::I2S(MicType micType) {
16 |   if (micType == M5GO || micType == M5STACKFIRE ) {
17 |     BITS_PER_SAMPLE = I2S_BITS_PER_SAMPLE_16BIT;
18 |     i2s_config_t i2s_config = {
19 |       .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX | I2S_MODE_DAC_BUILT_IN | I2S_MODE_ADC_BUILT_IN),
20 |       .sample_rate = SAMPLE_RATE,
21 |       .bits_per_sample = BITS_PER_SAMPLE,
22 |       .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
23 |       .communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_I2S_MSB),
24 |       .intr_alloc_flags = 0,
25 |       .dma_buf_count = 2,
26 |       .dma_buf_len = 1024
27 |     };
28 |     i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
29 |     i2s_set_adc_mode(ADC_UNIT_1, ADC1_CHANNEL_6);
30 |     i2s_set_clk(I2S_NUM_0, SAMPLE_RATE, BITS_PER_SAMPLE, I2S_CHANNEL_STEREO);
31 |     i2s_adc_enable(I2S_NUM_0);
32 |   }
33 |   else if (micType == ADMP441 || micType == ICS43434 ) {
34 |     BITS_PER_SAMPLE = I2S_BITS_PER_SAMPLE_32BIT;
35 |     i2s_config_t i2s_config = {
36 |       .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
37 |       .sample_rate = SAMPLE_RATE,
38 |       .bits_per_sample = BITS_PER_SAMPLE,
39 |       .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
40 |       .communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
41 |       .intr_alloc_flags = 0,
42 |       .dma_buf_count = 16,
43 |       .dma_buf_len = 60
44 |     };
45 |     i2s_pin_config_t pin_config;
46 |     pin_config.bck_io_num = PIN_I2S_BCLK;
47 |     pin_config.ws_io_num = PIN_I2S_LRC;
48 |     pin_config.data_out_num = I2S_PIN_NO_CHANGE;
49 |     pin_config.data_in_num = PIN_I2S_DIN;
50 |     i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
51 |     i2s_set_pin(I2S_NUM_0, &pin_config);
52 |     i2s_set_clk(I2S_NUM_0, SAMPLE_RATE, BITS_PER_SAMPLE, I2S_CHANNEL_STEREO);
53 |   }
54 | }
55 | 
56 | int I2S::Read(char* data, int numData) {
57 |   return i2s_read_bytes(I2S_NUM_0, (char *)data, numData, portMAX_DELAY);
58 | }
59 | 
60 | int I2S::GetBitPerSample() {
61 |   return (int)BITS_PER_SAMPLE;
62 | }
63 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/I2S.h:
--------------------------------------------------------------------------------
 1 | #ifndef _I2S_H
 2 | #define _I2S_H
 3 | #include <Arduino.h>
 4 | #include "freertos/FreeRTOS.h"
 5 | #include "freertos/task.h"
 6 | #include "driver/i2s.h"
 7 | #include "esp_system.h"
 8 | 
 9 | enum MicType {
10 |   ADMP441,
11 |   ICS43434,
12 |   M5GO,
13 |   M5STACKFIRE
14 | };
15 | 
16 | class I2S {
17 |   i2s_bits_per_sample_t BITS_PER_SAMPLE;
18 | public:
19 |   I2S(MicType micType);
20 |   int Read(char* data, int numData);
21 |   int GetBitPerSample();
22 | };
23 | 
24 | #endif // _I2S_H
25 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Speech_to_Text/network_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NETWORK_PARAM_H
 2 | #define _NETWORK_PARAM_H
 3 | 
 4 | const char *ssid = "SSID";
 5 | const char *password = "PASSWORD";
 6 | const char*  server = "speech.googleapis.com";
 7 | 
 8 | // To get the certificate for your region run:
 9 | // openssl s_client -showcerts -connect speech.googleapis.com:443
10 | // Copy the certificate (all lines between and including ---BEGIN CERTIFICATE---
11 | // and --END CERTIFICATE--) to root.cert and put here on the root_cert variable.
12 | const char* root_ca= 
13 | "-----BEGIN CERTIFICATE-----\n"
14 | "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQsw\n"
15 | "CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU\n"
16 | "MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAw\n"
17 | "MDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp\n"
18 | "Y2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQAD\n"
19 | "ggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzp\n"
20 | "kgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsX\n"
21 | "lOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcm\n"
22 | "BA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKA\n"
23 | "gOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwL\n"
24 | "tmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1Ud\n"
25 | "DwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0T\n"
26 | "AQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYD\n"
27 | "VR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYG\n"
28 | "CCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcw\n"
29 | "AoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQt\n"
30 | "MCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcG\n"
31 | "A1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3Br\n"
32 | "aS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcN\n"
33 | "AQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQ\n"
34 | "cSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrL\n"
35 | "RklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U\n"
36 | "+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2Yr\n"
37 | "PxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IER\n"
38 | "lQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGs\n"
39 | "Yye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjO\n"
40 | "z23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJG\n"
41 | "AJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKw\n"
42 | "juDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl\n"
43 | "1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd\n"
44 | "-----END CERTIFICATE-----\n";
45 | 
46 | // Getting Access Token : 
47 | // At first, you should get service account key (JSON file).
48 | // Type below command in Google Cloud Shell to get AccessToken: 
49 | // $ gcloud auth activate-service-account --key-file=KEY_FILE   (KEY_FILE is your service account key file)
50 | // $ gcloud auth print-access-token
51 | // The Access Token is expired in an hour.
52 | // Google recommends to use Access Token.
53 | //const String AccessToken = "";
54 | 
55 | // It is also possible to use "API Key" instead of "Access Token". It doesn't have time limit.
56 | const String ApiKey = "Your_API_Key";
57 | 
58 | // see https://cloud.google.com/docs/authentication?hl=ja#getting_credentials_for_server-centric_flow
59 | // see https://qiita.com/basi/items/3623a576b754f738138e (Japanese)
60 | 
61 | #endif  // _NETWORK_PARAM_H
62 | 


--------------------------------------------------------------------------------
/ChatGPT Voice Assistant/ESP32_Text_to_Speech/ESP32_Text_to_Speech.ino:
--------------------------------------------------------------------------------
 1 | #include "Arduino.h"
 2 | #include "WiFi.h"
 3 | #include "Audio.h"
 4 | #define uart_en 15
 5 | #define RXp2 16
 6 | #define TXp2 17
 7 | #define I2S_DOUT      25
 8 | #define I2S_BCLK      27
 9 | #define I2S_LRC       26
10 | 
11 | Audio audio;
12 | 
13 | 
14 | void setup()
15 | {
16 | 
17 |   Serial.begin(115200);
18 |     Serial2.begin(115200, SERIAL_8N1, RXp2,TXp2);
19 | 
20 |   WiFi.disconnect();
21 |   WiFi.mode(WIFI_STA);
22 |   WiFi.begin( "SSID", "PASSWORD");
23 | 
24 |   while (WiFi.status() != WL_CONNECTED)
25 |     delay(1500);
26 | 
27 |   audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
28 |   audio.setVolume(100);
29 |   audio.connecttospeech("Starting ", "en"); // Google TTS
30 | }
31 | 
32 | 
33 | void loop()
34 | 
35 | {
36 | if (Serial2.available()){
37 |   String Answer = Serial2.readString();
38 |   Serial.println(Answer);
39 |  audio.connecttospeech(Answer.c_str(), "en");
40 |   }
41 |   audio.loop();
42 | 
43 | }
44 | 
45 | void audio_info(const char *info) {
46 |   Serial.print("audio_info: "); Serial.println(info);}
47 | 


--------------------------------------------------------------------------------
/ChatGPT_ESP32_DemoCode/ChatGPT_ESP32_DemoCode.ino:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include <Arduino.h>
 4 | #include <WiFi.h>
 5 | #include <HTTPClient.h>
 6 | #include <ArduinoJson.h>
 7 | 
 8 | const char* ssid = "SmS_jiofi";
 9 | const char* password = "sms123458956";
10 | const char* chatgpt_token = "Your_ChatGPT_Token";
11 | String res = "";
12 | void setup() {
13 |   Serial.begin(115200);
14 | 
15 |   WiFi.mode(WIFI_STA);
16 |   WiFi.disconnect();
17 | 
18 | 
19 |   while (!Serial);
20 | 
21 |   // wait for WiFi connection
22 |   WiFi.begin(ssid, password);
23 |   Serial.print("Connecting to ");
24 |   Serial.println(ssid);
25 |   while (WiFi.status() != WL_CONNECTED) {
26 |     delay(1000);
27 |     Serial.print(".");
28 |   }
29 |   Serial.println("connected");
30 |   Serial.print("IP address: ");
31 |   Serial.println(WiFi.localIP());
32 | }
33 | 
34 | void loop()
35 | {
36 |   Serial.print("Ask your Question : ");
37 |   while (!Serial.available());
38 |   while (Serial.available())
39 |   {
40 |     char add = Serial.read();
41 |     res = res + add;
42 |     delay(1);
43 |   }
44 |   int len = res.length();
45 |   res = res.substring(0, (len - 1));
46 |   res = "\"" + res + "\"";
47 |   Serial.println(res);
48 | 
49 |   HTTPClient https;
50 | 
51 |   //Serial.print("[HTTPS] begin...\n");
52 |   if (https.begin("https://api.openai.com/v1/completions")) {  // HTTPS
53 | 
54 |     https.addHeader("Content-Type", "application/json");
55 |     String token_key = String("Bearer ") + chatgpt_token;
56 |     https.addHeader("Authorization", token_key);
57 | 
58 |     String payload = String("{\"model\": \"text-davinci-003\", \"prompt\": ") + res + String(", \"temperature\": 0, \"max_tokens\": 100}"); //Instead of TEXT as Payload, can be JSON as Paylaod
59 | 
60 |     //Serial.print("[HTTPS] GET...\n");
61 | 
62 |     // start connection and send HTTP header
63 |     int httpCode = https.POST(payload);
64 | 
65 |     // httpCode will be negative on error
66 |     // file found at server
67 |     if (httpCode == HTTP_CODE_OK || httpCode == HTTP_CODE_MOVED_PERMANENTLY) {
68 |       String payload = https.getString();
69 |       //Serial.println(payload);
70 | 
71 |       DynamicJsonDocument doc(1024);
72 | 
73 | 
74 |       deserializeJson(doc, payload);
75 |       String Answer = doc["choices"][0]["text"];
76 |       Answer = Answer.substring(2);
77 |       Serial.print("Answer : "); Serial.println(Answer);
78 | 
79 |     }
80 |     else {
81 |       Serial.printf("[HTTPS] GET... failed, error: %s\n", https.errorToString(httpCode).c_str());
82 |     }
83 |     https.end();
84 |   }
85 |   else {
86 |     Serial.printf("[HTTPS] Unable to connect\n");
87 |   }
88 | 
89 |   Serial.println("Wait 10s before next round...");
90 |   res = "";
91 |   delay(10000);
92 | }
93 | 


--------------------------------------------------------------------------------
/ESP32_TextToSpeech_SampleCode/ESP32_TextToSpeech_SampleCode.ino:
--------------------------------------------------------------------------------
 1 | #include "Arduino.h"
 2 | #include "WiFi.h"
 3 | #include "Audio.h"
 4 | 
 5 | #define I2S_DOUT      25
 6 | #define I2S_BCLK      27
 7 | #define I2S_LRC       26
 8 | 
 9 | Audio audio;
10 | 
11 | 
12 | void setup()
13 | {
14 | 
15 |   Serial.begin(115200);
16 | 
17 |   WiFi.disconnect();
18 |   WiFi.mode(WIFI_STA);
19 |   WiFi.begin( "SSID", "PASSWORD");
20 | 
21 |   while (WiFi.status() != WL_CONNECTED)
22 |     delay(1500);
23 | 
24 |   audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
25 |   audio.setVolume(100);
26 |   audio.connecttospeech("Hello From Sachin Soni", "en"); // Google TTS
27 | }
28 | 
29 | 
30 | void loop()
31 | 
32 | {
33 | 
34 |   audio.loop();
35 | 
36 | }
37 | 
38 | void audio_info(const char *info) {
39 |   Serial.print("audio_info: "); Serial.println(info);
40 | }
41 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/Audio.cpp:
--------------------------------------------------------------------------------
 1 | #include "Audio.h"
 2 | 
 3 | Audio::Audio(MicType micType) {
 4 |   wavData = new char*[wavDataSize/dividedWavDataSize];
 5 |   for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) wavData[i] = new char[dividedWavDataSize];
 6 |   i2s = new I2S(micType);
 7 | }
 8 | 
 9 | Audio::~Audio() {
10 |   for (int i = 0; i < wavDataSize/dividedWavDataSize; ++i) delete[] wavData[i];
11 |   delete[] wavData;
12 |   delete i2s;
13 | }
14 | 
15 | void Audio::CreateWavHeader(byte* header, int waveDataSize){
16 |   header[0] = 'R';
17 |   header[1] = 'I';
18 |   header[2] = 'F';
19 |   header[3] = 'F';
20 |   unsigned int fileSizeMinus8 = waveDataSize + 44 - 8;
21 |   header[4] = (byte)(fileSizeMinus8 & 0xFF);
22 |   header[5] = (byte)((fileSizeMinus8 >> 8) & 0xFF);
23 |   header[6] = (byte)((fileSizeMinus8 >> 16) & 0xFF);
24 |   header[7] = (byte)((fileSizeMinus8 >> 24) & 0xFF);
25 |   header[8] = 'W';
26 |   header[9] = 'A';
27 |   header[10] = 'V';
28 |   header[11] = 'E';
29 |   header[12] = 'f';
30 |   header[13] = 'm';
31 |   header[14] = 't';
32 |   header[15] = ' ';
33 |   header[16] = 0x10;  // linear PCM
34 |   header[17] = 0x00;
35 |   header[18] = 0x00;
36 |   header[19] = 0x00;
37 |   header[20] = 0x01;  // linear PCM
38 |   header[21] = 0x00;
39 |   header[22] = 0x01;  // monoral
40 |   header[23] = 0x00;
41 |   header[24] = 0x80;  // sampling rate 16000
42 |   header[25] = 0x3E;
43 |   header[26] = 0x00;
44 |   header[27] = 0x00;
45 |   header[28] = 0x00;  // Byte/sec = 16000x2x1 = 32000
46 |   header[29] = 0x7D;
47 |   header[30] = 0x00;
48 |   header[31] = 0x00;
49 |   header[32] = 0x02;  // 16bit monoral
50 |   header[33] = 0x00;
51 |   header[34] = 0x10;  // 16bit
52 |   header[35] = 0x00;
53 |   header[36] = 'd';
54 |   header[37] = 'a';
55 |   header[38] = 't';
56 |   header[39] = 'a';
57 |   header[40] = (byte)(waveDataSize & 0xFF);
58 |   header[41] = (byte)((waveDataSize >> 8) & 0xFF);
59 |   header[42] = (byte)((waveDataSize >> 16) & 0xFF);
60 |   header[43] = (byte)((waveDataSize >> 24) & 0xFF);
61 | }
62 | 
63 | void Audio::Record() {
64 |   CreateWavHeader(paddedHeader, wavDataSize);
65 |   int bitBitPerSample = i2s->GetBitPerSample();
66 |   if (bitBitPerSample == 16) {
67 |     for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
68 |       i2s->Read(i2sBuffer, i2sBufferSize/2);
69 |       for (int i = 0; i < i2sBufferSize/8; ++i) {
70 |         wavData[j][2*i] = i2sBuffer[4*i + 2];
71 |         wavData[j][2*i + 1] = i2sBuffer[4*i + 3];
72 |       }
73 |     }
74 |   }
75 |   else if (bitBitPerSample == 32) {
76 |     for (int j = 0; j < wavDataSize/dividedWavDataSize; ++j) {
77 |       i2s->Read(i2sBuffer, i2sBufferSize);
78 |       for (int i = 0; i < i2sBufferSize/8; ++i) {
79 |         wavData[j][2*i] = i2sBuffer[8*i + 2];
80 |         wavData[j][2*i + 1] = i2sBuffer[8*i + 3];
81 |       }
82 |     }
83 |   }
84 | }
85 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/Audio.h:
--------------------------------------------------------------------------------
 1 | #ifndef _AUDIO_H
 2 | #define _AUDIO_H
 3 | 
 4 | #include <Arduino.h>
 5 | #include "I2S.h"
 6 | 
 7 | // 16bit, monoral, 16000Hz,  linear PCM
 8 | class Audio {
 9 |   I2S* i2s;
10 |   static const int headerSize = 44;
11 |   static const int i2sBufferSize = 12000;
12 |   char i2sBuffer[i2sBufferSize];
13 |   void CreateWavHeader(byte* header, int waveDataSize);
14 | 
15 | public:
16 |   static const int wavDataSize = 90000;                   // It must be multiple of dividedWavDataSize. Recording time is about 1.9 second.
17 |   static const int dividedWavDataSize = i2sBufferSize/4;
18 |   char** wavData;                                         // It's divided. Because large continuous memory area can't be allocated in esp32.
19 |   byte paddedHeader[headerSize + 4] = {0};                // The size must be multiple of 3 for Base64 encoding. Additional byte size must be even because wave data is 16bit.
20 | 
21 |   Audio(MicType micType);
22 |   ~Audio();
23 |   void Record();
24 | };
25 | 
26 | #endif // _AUDIO_H
27 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/CloudSpeechClient.cpp:
--------------------------------------------------------------------------------
 1 | #include "CloudSpeechClient.h"
 2 | #include "network_param.h"
 3 | #include <base64.h>
 4 | #include <ArduinoJson.h>
 5 | CloudSpeechClient::CloudSpeechClient(Authentication authentication) 
 6 | {
 7 |   this->authentication = authentication;
 8 |   WiFi.begin(ssid, password);
 9 |   while (WiFi.status() != WL_CONNECTED) delay(1000);
10 |   client.setCACert(root_ca);
11 |   if (!client.connect(server, 443)) Serial.println("Connection failed!");
12 | }
13 | 
14 | String ans;
15 | 
16 | CloudSpeechClient::~CloudSpeechClient() {
17 |   client.stop();
18 |   WiFi.disconnect();
19 | }
20 | 
21 | void CloudSpeechClient::PrintHttpBody2(Audio* audio)
22 | {
23 |   String enc = base64::encode(audio->paddedHeader, sizeof(audio->paddedHeader));
24 |   enc.replace("\n", "");  // delete last "\n"
25 |   client.print(enc);      // HttpBody2
26 |   char** wavData = audio->wavData;
27 |   for (int j = 0; j < audio->wavDataSize / audio->dividedWavDataSize; ++j) {
28 |     enc = base64::encode((byte*)wavData[j], audio->dividedWavDataSize);
29 |     enc.replace("\n", "");// delete last "\n"
30 |     client.print(enc);    // HttpBody2
31 |   }
32 | }
33 | 
34 | void CloudSpeechClient::Transcribe(Audio* audio) {
35 |   String HttpBody1 = "{\"config\":{\"encoding\":\"LINEAR16\",\"sampleRateHertz\":16000,\"languageCode\":\"en-IN\"},\"audio\":{\"content\":\"";
36 |   String HttpBody3 = "\"}}\r\n\r\n";
37 |   int httpBody2Length = (audio->wavDataSize + sizeof(audio->paddedHeader)) * 4 / 3; // 4/3 is from base64 encoding
38 |   String ContentLength = String(HttpBody1.length() + httpBody2Length + HttpBody3.length());
39 |   String HttpHeader;
40 |   // if (authentication == USE_APIKEY)
41 |   HttpHeader = String("POST /v1/speech:recognize?key=") + ApiKey
42 |                + String(" HTTP/1.1\r\nHost: speech.googleapis.com\r\nContent-Type: application/json\r\nContent-Length: ") + ContentLength + String("\r\n\r\n");
43 |   //  else if (authentication == USE_ACCESSTOKEN)
44 |   //    HttpHeader = String("POST /v1beta1/speech:syncrecognize HTTP/1.1\r\nHost: speech.googleapis.com\r\nContent-Type: application/json\r\nAuthorization: Bearer ")
45 |   //   + AccessToken + String("\r\nContent-Length: ") + ContentLength + String("\r\n\r\n");
46 |   client.print(HttpHeader);
47 |   client.print(HttpBody1);
48 |   PrintHttpBody2(audio);
49 |   client.print(HttpBody3);
50 |   String My_Answer="";
51 |   while (!client.available());
52 |    
53 |   while (client.available())
54 |   {
55 |     char temp = client.read();
56 |     My_Answer = My_Answer + temp;
57 |    // Serial.write(client.read());
58 |   }
59 | 
60 |   Serial.print("My Answer - ");Serial.println(My_Answer);
61 |   int postion = My_Answer.indexOf('{');
62 |   Serial.println(postion);
63 |   ans = My_Answer.substring(postion);
64 |   Serial.print("Json daata--");
65 |   Serial.print(ans);
66 | }
67 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/CloudSpeechClient.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CLOUDSPEECHCLIENT_H
 2 | #define _CLOUDSPEECHCLIENT_H
 3 | #include <WiFiClientSecure.h>
 4 | #include "Audio.h"
 5 | 
 6 | enum Authentication {
 7 |   USE_ACCESSTOKEN,
 8 |   USE_APIKEY
 9 | };
10 | 
11 | class CloudSpeechClient {
12 |   WiFiClientSecure client;
13 |   void PrintHttpBody2(Audio* audio);
14 |   Authentication authentication;
15 | 
16 | public:
17 |   CloudSpeechClient(Authentication authentication);
18 |   ~CloudSpeechClient();
19 |   void Transcribe(Audio* audio);
20 | };
21 | 
22 | #endif // _CLOUDSPEECHCLIENT_H
23 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/I2S.cpp:
--------------------------------------------------------------------------------
 1 | #include "I2S.h"
 2 | #define SAMPLE_RATE (16000)
 3 | #define PIN_I2S_BCLK 26
 4 | #define PIN_I2S_LRC 22
 5 | #define PIN_I2S_DIN 34
 6 | #define PIN_I2S_DOUT 25
 7 | 
 8 | // This I2S specification : 
 9 | //  -   LRC high is channel 2 (right).
10 | //  -   LRC signal transitions once each word.
11 | //  -   DATA is valid on the CLOCK rising edge.
12 | //  -   Data bits are MSB first.
13 | //  -   DATA bits are left-aligned with respect to LRC edge.
14 | //  -   DATA bits are right-shifted by one with respect to LRC edges.
15 | I2S::I2S(MicType micType) {
16 |   if (micType == M5GO || micType == M5STACKFIRE ) {
17 |     BITS_PER_SAMPLE = I2S_BITS_PER_SAMPLE_16BIT;
18 |     i2s_config_t i2s_config = {
19 |       .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX | I2S_MODE_DAC_BUILT_IN | I2S_MODE_ADC_BUILT_IN),
20 |       .sample_rate = SAMPLE_RATE,
21 |       .bits_per_sample = BITS_PER_SAMPLE,
22 |       .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
23 |       .communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_I2S_MSB),
24 |       .intr_alloc_flags = 0,
25 |       .dma_buf_count = 2,
26 |       .dma_buf_len = 1024
27 |     };
28 |     i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
29 |     i2s_set_adc_mode(ADC_UNIT_1, ADC1_CHANNEL_6);
30 |     i2s_set_clk(I2S_NUM_0, SAMPLE_RATE, BITS_PER_SAMPLE, I2S_CHANNEL_STEREO);
31 |     i2s_adc_enable(I2S_NUM_0);
32 |   }
33 |   else if (micType == ADMP441 || micType == ICS43434 ) {
34 |     BITS_PER_SAMPLE = I2S_BITS_PER_SAMPLE_32BIT;
35 |     i2s_config_t i2s_config = {
36 |       .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
37 |       .sample_rate = SAMPLE_RATE,
38 |       .bits_per_sample = BITS_PER_SAMPLE,
39 |       .channel_format = I2S_CHANNEL_FMT_RIGHT_LEFT,
40 |       .communication_format = (i2s_comm_format_t)(I2S_COMM_FORMAT_I2S | I2S_COMM_FORMAT_I2S_MSB),
41 |       .intr_alloc_flags = 0,
42 |       .dma_buf_count = 16,
43 |       .dma_buf_len = 60
44 |     };
45 |     i2s_pin_config_t pin_config;
46 |     pin_config.bck_io_num = PIN_I2S_BCLK;
47 |     pin_config.ws_io_num = PIN_I2S_LRC;
48 |     pin_config.data_out_num = I2S_PIN_NO_CHANGE;
49 |     pin_config.data_in_num = PIN_I2S_DIN;
50 |     i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
51 |     i2s_set_pin(I2S_NUM_0, &pin_config);
52 |     i2s_set_clk(I2S_NUM_0, SAMPLE_RATE, BITS_PER_SAMPLE, I2S_CHANNEL_STEREO);
53 |   }
54 | }
55 | 
56 | int I2S::Read(char* data, int numData) {
57 |   return i2s_read_bytes(I2S_NUM_0, (char *)data, numData, portMAX_DELAY);
58 | }
59 | 
60 | int I2S::GetBitPerSample() {
61 |   return (int)BITS_PER_SAMPLE;
62 | }
63 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/I2S.h:
--------------------------------------------------------------------------------
 1 | #ifndef _I2S_H
 2 | #define _I2S_H
 3 | #include <Arduino.h>
 4 | #include "freertos/FreeRTOS.h"
 5 | #include "freertos/task.h"
 6 | #include "driver/i2s.h"
 7 | #include "esp_system.h"
 8 | 
 9 | enum MicType {
10 |   ADMP441,
11 |   ICS43434,
12 |   M5GO,
13 |   M5STACKFIRE
14 | };
15 | 
16 | class I2S {
17 |   i2s_bits_per_sample_t BITS_PER_SAMPLE;
18 | public:
19 |   I2S(MicType micType);
20 |   int Read(char* data, int numData);
21 |   int GetBitPerSample();
22 | };
23 | 
24 | #endif // _I2S_H
25 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/Speech_To_Text_ESP32.ino:
--------------------------------------------------------------------------------
 1 | #include "Audio.h"
 2 | #include "CloudSpeechClient.h"
 3 | 
 4 | void setup() {
 5 |   Serial.begin(115200);
 6 |   delay(500);
 7 |   Serial.println("\r\nRecord start!\r\n");
 8 |   Audio* audio = new Audio(ICS43434);
 9 |   //Audio* audio = new Audio(M5STACKFIRE);
10 |   audio->Record();
11 |   Serial.println("Recording Completed. Now Processing...");
12 |   CloudSpeechClient* cloudSpeechClient = new CloudSpeechClient(USE_APIKEY);
13 |   cloudSpeechClient->Transcribe(audio);
14 |   delete cloudSpeechClient;
15 |   delete audio;
16 | 
17 | }
18 | 
19 | void loop() {
20 | }
21 | 


--------------------------------------------------------------------------------
/Speech_To_Text_ESP32/network_param.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NETWORK_PARAM_H
 2 | #define _NETWORK_PARAM_H
 3 | 
 4 | const char *ssid = "SSID";
 5 | const char *password = "PASS";
 6 | const char*  server = "speech.googleapis.com";
 7 | 
 8 | // To get the certificate for your region run:
 9 | // openssl s_client -showcerts -connect speech.googleapis.com:443
10 | // Copy the certificate (all lines between and including ---BEGIN CERTIFICATE---
11 | // and --END CERTIFICATE--) to root.cert and put here on the root_cert variable.
12 | const char* root_ca= 
13 | "-----BEGIN CERTIFICATE-----\n"
14 | "MIIFljCCA36gAwIBAgINAgO8U1lrNMcY9QFQZjANBgkqhkiG9w0BAQsFADBHMQsw\n"
15 | "CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU\n"
16 | "MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMjAwODEzMDAwMDQyWhcNMjcwOTMwMDAw\n"
17 | "MDQyWjBGMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp\n"
18 | "Y2VzIExMQzETMBEGA1UEAxMKR1RTIENBIDFDMzCCASIwDQYJKoZIhvcNAQEBBQAD\n"
19 | "ggEPADCCAQoCggEBAPWI3+dijB43+DdCkH9sh9D7ZYIl/ejLa6T/belaI+KZ9hzp\n"
20 | "kgOZE3wJCor6QtZeViSqejOEH9Hpabu5dOxXTGZok3c3VVP+ORBNtzS7XyV3NzsX\n"
21 | "lOo85Z3VvMO0Q+sup0fvsEQRY9i0QYXdQTBIkxu/t/bgRQIh4JZCF8/ZK2VWNAcm\n"
22 | "BA2o/X3KLu/qSHw3TT8An4Pf73WELnlXXPxXbhqW//yMmqaZviXZf5YsBvcRKgKA\n"
23 | "gOtjGDxQSYflispfGStZloEAoPtR28p3CwvJlk/vcEnHXG0g/Zm0tOLKLnf9LdwL\n"
24 | "tmsTDIwZKxeWmLnwi/agJ7u2441Rj72ux5uxiZ0CAwEAAaOCAYAwggF8MA4GA1Ud\n"
25 | "DwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwEgYDVR0T\n"
26 | "AQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUinR/r4XN7pXNPZzQ4kYU83E1HScwHwYD\n"
27 | "VR0jBBgwFoAU5K8rJnEaK0gnhS9SZizv8IkTcT4waAYIKwYBBQUHAQEEXDBaMCYG\n"
28 | "CCsGAQUFBzABhhpodHRwOi8vb2NzcC5wa2kuZ29vZy9ndHNyMTAwBggrBgEFBQcw\n"
29 | "AoYkaHR0cDovL3BraS5nb29nL3JlcG8vY2VydHMvZ3RzcjEuZGVyMDQGA1UdHwQt\n"
30 | "MCswKaAnoCWGI2h0dHA6Ly9jcmwucGtpLmdvb2cvZ3RzcjEvZ3RzcjEuY3JsMFcG\n"
31 | "A1UdIARQME4wOAYKKwYBBAHWeQIFAzAqMCgGCCsGAQUFBwIBFhxodHRwczovL3Br\n"
32 | "aS5nb29nL3JlcG9zaXRvcnkvMAgGBmeBDAECATAIBgZngQwBAgIwDQYJKoZIhvcN\n"
33 | "AQELBQADggIBAIl9rCBcDDy+mqhXlRu0rvqrpXJxtDaV/d9AEQNMwkYUuxQkq/BQ\n"
34 | "cSLbrcRuf8/xam/IgxvYzolfh2yHuKkMo5uhYpSTld9brmYZCwKWnvy15xBpPnrL\n"
35 | "RklfRuFBsdeYTWU0AIAaP0+fbH9JAIFTQaSSIYKCGvGjRFsqUBITTcFTNvNCCK9U\n"
36 | "+o53UxtkOCcXCb1YyRt8OS1b887U7ZfbFAO/CVMkH8IMBHmYJvJh8VNS/UKMG2Yr\n"
37 | "PxWhu//2m+OBmgEGcYk1KCTd4b3rGS3hSMs9WYNRtHTGnXzGsYZbr8w0xNPM1IER\n"
38 | "lQCh9BIiAfq0g3GvjLeMcySsN1PCAJA/Ef5c7TaUEDu9Ka7ixzpiO2xj2YC/WXGs\n"
39 | "Yye5TBeg2vZzFb8q3o/zpWwygTMD0IZRcZk0upONXbVRWPeyk+gB9lm+cZv9TSjO\n"
40 | "z23HFtz30dZGm6fKa+l3D/2gthsjgx0QGtkJAITgRNOidSOzNIb2ILCkXhAd4FJG\n"
41 | "AJ2xDx8hcFH1mt0G/FX0Kw4zd8NLQsLxdxP8c4CU6x+7Nz/OAipmsHMdMqUybDKw\n"
42 | "juDEI/9bfU1lcKwrmz3O2+BtjjKAvpafkmO8l7tdufThcV4q5O8DIrGKZTqPwJNl\n"
43 | "1IXNDw9bg1kWRxYtnCQ6yICmJhSFm/Y3m6xv+cXDBlHz4n/FsRC6UfTd\n"
44 | "-----END CERTIFICATE-----\n";
45 | 
46 | // Getting Access Token : 
47 | // At first, you should get service account key (JSON file).
48 | // Type below command in Google Cloud Shell to get AccessToken: 
49 | // $ gcloud auth activate-service-account --key-file=KEY_FILE   (KEY_FILE is your service account key file)
50 | // $ gcloud auth print-access-token
51 | // The Access Token is expired in an hour.
52 | // Google recommends to use Access Token.
53 | //const String AccessToken = "";
54 | 
55 | // It is also possible to use "API Key" instead of "Access Token". It doesn't have time limit.
56 | const String ApiKey = "Your_API_Key";
57 | 
58 | // see https://cloud.google.com/docs/authentication?hl=ja#getting_credentials_for_server-centric_flow
59 | // see https://qiita.com/basi/items/3623a576b754f738138e (Japanese)
60 | 
61 | #endif  // _NETWORK_PARAM_H
62 | 


--------------------------------------------------------------------------------