├── .DS_Store
├── .gitattributes
├── Schematic.pdf
├── ESP32CAM_Base64_Encoding_SerialMonitor
    ├── .DS_Store
    └── ESP32CAM_Base64_Encoding_SerialMonitor.ino
└── ESP32CAM_ImageAnalysis_OLED
    └── ESP32CAM_ImageAnalysis_OLED.ino


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/.DS_Store


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/Schematic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/Schematic.pdf


--------------------------------------------------------------------------------
/ESP32CAM_Base64_Encoding_SerialMonitor/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/ESP32CAM_Base64_Encoding_SerialMonitor/.DS_Store


--------------------------------------------------------------------------------
/ESP32CAM_Base64_Encoding_SerialMonitor/ESP32CAM_Base64_Encoding_SerialMonitor.ino:
--------------------------------------------------------------------------------
  1 | /*
  2 |    This code demonstrates how to use the ESP32-CAM module to capture an image, encode it in Base64 format, 
  3 |    and send the Base64 string to the Serial Monitor. The code is designed to work with the ESP32-CAM AI-Thinker 
  4 |    module and includes the necessary configurations and pin definitions for this specific hardware.
  5 | 
  6 |    Key Functionalities:
  7 |    1. Initialize the ESP32-CAM hardware and configure its pins for camera operation.
  8 |    2. Capture an image using the camera.
  9 |    3. Encode the captured image into a Base64 string for easy transfer or storage.
 10 |    4. Print the Base64-encoded image string to the Serial Monitor.
 11 | 
 12 |    Components:
 13 |    - ESP32-CAM AI-Thinker module
 14 |    - Arduino IDE or compatible environment
 15 |    
 16 |    Version Check:
 17 |    - ESP32 Boards Package Version - 3.0.0
 18 |    - Arduino IDE Version - 2.3.2
 19 |    Note:
 20 |    - Ensure the ESP32-CAM module is properly powered, as it requires sufficient current to operate the camera.
 21 |    - The Base64 string output can be used for web-based applications or for transmitting the image over networks.
 22 | */
 23 | 
 24 | 
 25 | 
 26 | #include <WiFi.h>         // Include WiFi library for ESP32
 27 | #include <Base64.h>       // For Base64 encoding
 28 | #include "esp_camera.h"   // Include ESP32 camera library
 29 | 
 30 | // Pin definitions for the ESP32-CAM AI-Thinker module
 31 | #define PWDN_GPIO_NUM 32
 32 | #define RESET_GPIO_NUM -1
 33 | #define XCLK_GPIO_NUM 0
 34 | #define SIOD_GPIO_NUM 26
 35 | #define SIOC_GPIO_NUM 27
 36 | 
 37 | #define Y9_GPIO_NUM 35
 38 | #define Y8_GPIO_NUM 34
 39 | #define Y7_GPIO_NUM 39
 40 | #define Y6_GPIO_NUM 36
 41 | #define Y5_GPIO_NUM 21
 42 | #define Y4_GPIO_NUM 19
 43 | #define Y3_GPIO_NUM 18
 44 | #define Y2_GPIO_NUM 5
 45 | #define VSYNC_GPIO_NUM 25
 46 | #define HREF_GPIO_NUM 23
 47 | #define PCLK_GPIO_NUM 22
 48 | 
 49 | void setup() {
 50 |   Serial.begin(115200);
 51 | 
 52 |   // Initialize camera
 53 |   camera_config_t config;
 54 |   config.ledc_channel = LEDC_CHANNEL_0;
 55 |   config.ledc_timer = LEDC_TIMER_0;
 56 |   config.pin_d0 = Y2_GPIO_NUM;
 57 |   config.pin_d1 = Y3_GPIO_NUM;
 58 |   config.pin_d2 = Y4_GPIO_NUM;
 59 |   config.pin_d3 = Y5_GPIO_NUM;
 60 |   config.pin_d4 = Y6_GPIO_NUM;
 61 |   config.pin_d5 = Y7_GPIO_NUM;
 62 |   config.pin_d6 = Y8_GPIO_NUM;
 63 |   config.pin_d7 = Y9_GPIO_NUM;
 64 |   config.pin_xclk = XCLK_GPIO_NUM;
 65 |   config.pin_pclk = PCLK_GPIO_NUM;
 66 |   config.pin_vsync = VSYNC_GPIO_NUM;
 67 |   config.pin_href = HREF_GPIO_NUM;
 68 |   config.pin_sscb_sda = SIOD_GPIO_NUM;
 69 |   config.pin_sscb_scl = SIOC_GPIO_NUM;
 70 |   config.pin_pwdn = PWDN_GPIO_NUM;
 71 |   config.pin_reset = RESET_GPIO_NUM;
 72 |   config.xclk_freq_hz = 20000000;
 73 |   config.pixel_format = PIXFORMAT_JPEG;
 74 |   config.frame_size = FRAMESIZE_VGA;  // or FRAMESIZE_VGA
 75 |   config.jpeg_quality = 10;
 76 |   config.fb_count = 1;
 77 | 
 78 |   // Camera init
 79 |   if (esp_camera_init(&config) != ESP_OK) {
 80 |     Serial.println("Camera init failed");
 81 |     return;
 82 |   }
 83 | 
 84 |   // Capture and encode the image to Base64
 85 |   captureAndPrintBase64Image();
 86 | }
 87 | 
 88 | void captureAndPrintBase64Image() {
 89 |   camera_fb_t* fb = esp_camera_fb_get();
 90 |   if (!fb) {
 91 |     Serial.println("Camera capture failed");
 92 |     return;
 93 |   }
 94 | 
 95 |   // Encode the image buffer to Base64
 96 |   String base64Image = base64::encode((const uint8_t*)fb->buf, fb->len);
 97 | 
 98 |   // Print the Base64 string to the Serial Monitor
 99 |   Serial.println("\n[Base64 Encoded Image]:");
100 |   Serial.println(base64Image);
101 | 
102 |   esp_camera_fb_return(fb);  // Return the frame buffer
103 | }
104 | 
105 | void loop() {
106 |   // Nothing to loop in this example
107 | }


--------------------------------------------------------------------------------
/ESP32CAM_ImageAnalysis_OLED/ESP32CAM_ImageAnalysis_OLED.ino:
--------------------------------------------------------------------------------
  1 | /*
  2 |   ESP32-CAM Image Analysis with OpenAI API and OLED Display
  3 | 
  4 |   This code captures an image using the ESP32-CAM module, processes it, 
  5 |   and sends it to OpenAI's GPT-4o API for analysis. The API's response is 
  6 |   displayed on an OLED screen. The code also provides audio feedback using 
  7 |   a buzzer and includes features like Wi-Fi connectivity, image encoding, 
  8 |   and scrolling text display.
  9 | 
 10 |   Tested with:
 11 |   - Arduino IDE version 2.3.2
 12 |   - ESP32 boards package version 3.0.0
 13 |   - Adafruit GFX library version 1.11.11
 14 |   - Adafruit SSD1306 library version 2.5.13
 15 |   - ArduinoJson library version 7.1.0
 16 |   - Base64 library (default version with ESP32 boards package)
 17 | 
 18 |   Make sure to install these libraries and configure your environment 
 19 |   as specified above before running the code.
 20 | */
 21 | 
 22 | #include <WiFi.h>
 23 | #include <HTTPClient.h>
 24 | #include <Base64.h>
 25 | #include "esp_camera.h"
 26 | #include <Adafruit_GFX.h>  
 27 | #include <Wire.h>
 28 | #include <Adafruit_SSD1306.h> 
 29 | #include <ArduinoJson.h>
 30 | 
 31 | // WiFi credentials
 32 | const char* ssid = "SSID";
 33 | const char* password = "PASS";
 34 | 
 35 | // OpenAI API key
 36 | const String apiKey = "API KEY";
 37 | 
 38 | // Question to be Asked about the image
 39 | String Question = "Summarize the image";
 40 | 
 41 | // OLED display settings
 42 | #define SCREEN_WIDTH 128
 43 | #define SCREEN_HEIGHT 64
 44 | #define OLED_SCL 14
 45 | #define OLED_SDA 15
 46 | #define OLED_RESET -1
 47 | Adafruit_SSD1306 display(SCREEN_WIDTH, SCREEN_HEIGHT, &Wire, OLED_RESET);
 48 | 
 49 | // Pin definitions for ESP32-CAM AI-Thinker module
 50 | #define PWDN_GPIO_NUM 32
 51 | #define RESET_GPIO_NUM -1
 52 | #define XCLK_GPIO_NUM 0
 53 | #define SIOD_GPIO_NUM 26
 54 | #define SIOC_GPIO_NUM 27
 55 | #define Y9_GPIO_NUM 35
 56 | #define Y8_GPIO_NUM 34
 57 | #define Y7_GPIO_NUM 39
 58 | #define Y6_GPIO_NUM 36
 59 | #define Y5_GPIO_NUM 21
 60 | #define Y4_GPIO_NUM 19
 61 | #define Y3_GPIO_NUM 18
 62 | #define Y2_GPIO_NUM 5
 63 | #define VSYNC_GPIO_NUM 25
 64 | #define HREF_GPIO_NUM 23
 65 | #define PCLK_GPIO_NUM 22
 66 | 
 67 | #define BUTTON_PIN 13
 68 | #define BUZZER_PIN 2  // Buzzer connected to GPIO2
 69 | 
 70 | void displayCenteredText(const String& text, int textSize = 1) {
 71 |   display.clearDisplay();
 72 |   display.setTextSize(textSize);
 73 |   display.setTextColor(SSD1306_WHITE);
 74 | 
 75 |   int maxLineLength = 16;  // Assuming 16 characters fit per line at textSize 1
 76 |   String lineBuffer = "";
 77 |   String wordBuffer = "";
 78 |   int16_t x1, y1;
 79 |   uint16_t textWidth, textHeight;
 80 | 
 81 |   // Calculate line height
 82 |   display.getTextBounds("A", 0, 0, &x1, &y1, &textWidth, &textHeight);
 83 |   int lineHeight = textHeight + 2;
 84 | 
 85 |   // Calculate the total number of lines needed
 86 |   int lineCount = 0;
 87 |   for (size_t i = 0; i <= text.length(); i++) {
 88 |     char c = text.charAt(i);
 89 |     if (c == ' ' || c == '\n' || c == '\0') {
 90 |       if (lineBuffer.length() + wordBuffer.length() > maxLineLength) {
 91 |         lineCount++;
 92 |         lineBuffer = wordBuffer;
 93 |       } else {
 94 |         lineBuffer += (lineBuffer.isEmpty() ? "" : " ") + wordBuffer;
 95 |       }
 96 |       wordBuffer = "";
 97 | 
 98 |       if (c == '\n') {
 99 |         lineCount++;
100 |         lineBuffer = "";
101 |       }
102 |     } else {
103 |       wordBuffer += c;
104 |     }
105 |   }
106 |   if (!lineBuffer.isEmpty()) lineCount++;  // Count the last line
107 | 
108 |   // Calculate the vertical offset to center the block of text
109 |   int totalTextHeight = lineCount * lineHeight;
110 |   int yOffset = (SCREEN_HEIGHT - totalTextHeight) / 2;
111 | 
112 |   // Render the text line by line, vertically centered
113 |   int yPos = yOffset;
114 |   lineBuffer = "";
115 |   wordBuffer = "";
116 |   for (size_t i = 0; i <= text.length(); i++) {
117 |     char c = text.charAt(i);
118 |     if (c == ' ' || c == '\n' || c == '\0') {
119 |       if (lineBuffer.length() + wordBuffer.length() > maxLineLength) {
120 |         // Render the current line
121 |         display.setCursor((SCREEN_WIDTH - lineBuffer.length() * textWidth) / 2, yPos);
122 |         display.print(lineBuffer);
123 |         yPos += lineHeight;
124 |         lineBuffer = wordBuffer;
125 |       } else {
126 |         lineBuffer += (lineBuffer.isEmpty() ? "" : " ") + wordBuffer;
127 |       }
128 |       wordBuffer = "";
129 | 
130 |       if (c == '\n' || c == '\0') {
131 |         display.setCursor((SCREEN_WIDTH - lineBuffer.length() * textWidth) / 2, yPos);
132 |         display.print(lineBuffer);
133 |         yPos += lineHeight;
134 |         lineBuffer = "";
135 |       }
136 |     } else {
137 |       wordBuffer += c;
138 |     }
139 |   }
140 | 
141 |   display.display();
142 | }
143 | // Function to encode image to Base64
144 | String encodeImageToBase64(const uint8_t* imageData, size_t imageSize) {
145 |   return base64::encode(imageData, imageSize);
146 | }
147 | void setup() {
148 |   Serial.begin(115200);
149 |   WiFi.begin(ssid, password);
150 | 
151 |   pinMode(BUTTON_PIN, INPUT_PULLUP);
152 |   pinMode(BUZZER_PIN, OUTPUT);  // Set Buzzer pin as output
153 | 
154 |   Wire.begin(OLED_SDA, OLED_SCL);
155 |   if (!display.begin(SSD1306_SWITCHCAPVCC, 0x3C)) {
156 |     Serial.println("SSD1306 allocation failed");
157 |     for (;;)
158 |       ;
159 |   }
160 | 
161 |   // Display the project title on power-on
162 |   displayCenteredText("AI VISION Project\nby techiesms", 1);
163 |   delay(3000);  // Hold the title screen for 3 seconds
164 | 
165 |   displayCenteredText("Connecting to WiFi...");
166 |   while (WiFi.status() != WL_CONNECTED) {
167 |     delay(1000);
168 |     Serial.println("Connecting to WiFi...");
169 |   }
170 | 
171 |   displayCenteredText("WiFi Connected!");
172 |   delay(2000);
173 | 
174 |   camera_config_t config;
175 |   config.ledc_channel = LEDC_CHANNEL_0;
176 |   config.ledc_timer = LEDC_TIMER_0;
177 |   config.pin_d0 = Y2_GPIO_NUM;
178 |   config.pin_d1 = Y3_GPIO_NUM;
179 |   config.pin_d2 = Y4_GPIO_NUM;
180 |   config.pin_d3 = Y5_GPIO_NUM;
181 |   config.pin_d4 = Y6_GPIO_NUM;
182 |   config.pin_d5 = Y7_GPIO_NUM;
183 |   config.pin_d6 = Y8_GPIO_NUM;
184 |   config.pin_d7 = Y9_GPIO_NUM;
185 |   config.pin_xclk = XCLK_GPIO_NUM;
186 |   config.pin_pclk = PCLK_GPIO_NUM;
187 |   config.pin_vsync = VSYNC_GPIO_NUM;
188 |   config.pin_href = HREF_GPIO_NUM;
189 |   config.pin_sscb_sda = SIOD_GPIO_NUM;
190 |   config.pin_sscb_scl = SIOC_GPIO_NUM;
191 |   config.pin_pwdn = PWDN_GPIO_NUM;
192 |   config.pin_reset = RESET_GPIO_NUM;
193 |   config.xclk_freq_hz = 20000000;
194 |   config.pixel_format = PIXFORMAT_JPEG;
195 |   config.frame_size = FRAMESIZE_QVGA;
196 |   config.jpeg_quality = 10;
197 |   config.fb_count = 1;
198 | 
199 |   if (esp_camera_init(&config) != ESP_OK) {
200 |     Serial.println("Camera init failed");
201 |     displayCenteredText("Camera Init Failed");
202 |     return;
203 |   }
204 | 
205 |   displayCenteredText("Camera Initialized");
206 |   delay(2000);
207 | 
208 |   displayCenteredText("Press button to capture");
209 | }
210 | 
211 | 
212 | void captureAndAnalyzeImage() {
213 |   Serial.println("Capturing image...");
214 | 
215 |   // Capture the image frame buffer
216 |   camera_fb_t* fb = esp_camera_fb_get();  // Get the frame buffer
217 |   if (!fb) {
218 |     Serial.println("Camera capture failed");
219 |     displayCenteredText("Capture Failed");
220 |     return;
221 |   }
222 | 
223 |   // After the new frame is obtained, ensure the buffer is returned (cleared)
224 |   esp_camera_fb_return(fb);  // Release the frame buffer from the previous capture
225 | 
226 |   // Now, capture the new image
227 |   fb = esp_camera_fb_get();  // Get the frame buffer again for the new image
228 | 
229 |   if (!fb) {
230 |     Serial.println("Camera capture failed");
231 |     displayCenteredText("Capture Failed");
232 |     return;
233 |   }
234 | 
235 |   Serial.println("Image captured");
236 |   String base64Image = encodeImageToBase64(fb->buf, fb->len);
237 | 
238 |   beep();
239 |   // Return the frame buffer after processing the image
240 |   esp_camera_fb_return(fb);  // Return the frame buffer to free memory
241 | 
242 |   if (base64Image.isEmpty()) {
243 |     Serial.println("Failed to encode the image!");
244 |     displayCenteredText("Encode Failed");
245 |     return;
246 |   }
247 |   // Send the image to OpenAI for analysis
248 |   AnalyzeImage(base64Image);
249 | }
250 | 
251 | void AnalyzeImage(const String& base64Image) {
252 |   Serial.println("Sending image for analysis...");
253 |   displayCenteredText("Processing...");
254 | 
255 |   String result;
256 | 
257 |   // Prepare the payload for the OpenAI API
258 |   String url = "data:image/jpeg;base64," + base64Image;
259 |   Serial.println(url);
260 | 
261 |   DynamicJsonDocument doc(4096);
262 |   doc["model"] = "gpt-4o";
263 |   JsonArray messages = doc.createNestedArray("messages");
264 |   JsonObject message = messages.createNestedObject();
265 |   message["role"] = "user";
266 |   JsonArray content = message.createNestedArray("content");
267 |   JsonObject textContent = content.createNestedObject();
268 |   textContent["type"] = "text";
269 |   textContent["text"] = "Summarize the context of this image?";
270 | 
271 |   JsonObject imageContent = content.createNestedObject();
272 |   imageContent["type"] = "image_url";
273 |   JsonObject imageUrlObject = imageContent.createNestedObject("image_url");
274 |   imageUrlObject["url"] = url;
275 |   imageContent["image_url"]["detail"] = "auto";
276 | 
277 |   doc["max_tokens"] = 400;
278 | 
279 |   String jsonPayload;
280 |   serializeJson(doc, jsonPayload);
281 | 
282 |   // Send request and validate response
283 |   if (sendPostRequest(jsonPayload, result)) {
284 |     Serial.print("[ChatGPT] Response: ");
285 |     Serial.println(result);
286 | 
287 |     // Clear the display before showing the new response
288 |     display.clearDisplay();
289 |     display.display();
290 | 
291 |     DynamicJsonDocument responseDoc(4096);
292 |     deserializeJson(responseDoc, result);
293 | 
294 |     String responseContent = responseDoc["choices"][0]["message"]["content"].as<String>();
295 |     Serial.println("[ChatGPT] Parsed response: " + responseContent);
296 | 
297 |     // Smooth scrolling and proper word wrapping
298 |     display.clearDisplay();
299 |     int lineHeight = 8;     // Height of each line in pixels
300 |     int maxLineChars = 21;  // Approx. max characters per line
301 |     int visibleLines = 7;
302 |     int scrollDelay = 2000;  // Delay for scrolling in milliseconds
303 | 
304 |     std::vector<String> lines;  // Store formatted lines for display
305 | 
306 |     // Split responseContent into words for word wrapping
307 |     String word = "";
308 |     String currentLine = "";
309 | 
310 |     for (int i = 0; i < responseContent.length(); i++) {
311 |       char c = responseContent.charAt(i);
312 |       if (c == ' ' || c == '\n') {
313 |         if (currentLine.length() + word.length() <= maxLineChars) {
314 |           currentLine += (currentLine.isEmpty() ? "" : " ") + word;
315 |         } else {
316 |           lines.push_back(currentLine);
317 |           currentLine = word;
318 |         }
319 |         word = "";
320 |       } else {
321 |         word += c;
322 |       }
323 |     }
324 |     if (!currentLine.isEmpty()) lines.push_back(currentLine);
325 |     if (!word.isEmpty()) lines.push_back(word);
326 | 
327 |     // Display lines with scrolling effect
328 |     for (size_t i = 0; i < lines.size(); i++) {
329 |       display.clearDisplay();
330 |       for (size_t j = 0; j < visibleLines && (i + j) < lines.size(); j++) {
331 |         display.setCursor(0, j * lineHeight);
332 |         display.print(lines[i + j]);
333 |       }
334 |       display.display();
335 |       delay(scrollDelay);
336 |     }
337 | 
338 |     // Clear display after the response
339 |     display.clearDisplay();
340 |     display.display();
341 | 
342 |     displayCenteredText("Press button to capture");
343 |   } else {
344 |     Serial.print("[ChatGPT] Error: ");
345 |     Serial.println(result);
346 |     display.clearDisplay();
347 |     display.setCursor(0, 0);
348 |     display.print("API Error");
349 |     display.display();
350 |   }
351 | }
352 | 
353 | bool sendPostRequest(const String& payload, String& result) {
354 |   HTTPClient http;
355 |   http.begin("https://api.openai.com/v1/chat/completions");
356 | 
357 |   http.addHeader("Content-Type", "application/json");
358 |   http.addHeader("Authorization", "Bearer " + apiKey);
359 |   http.setTimeout(20000);
360 | 
361 |   Serial.print("Payload size: ");
362 |   Serial.println(payload.length());
363 | 
364 |   int httpResponseCode = http.POST(payload);
365 | 
366 |   if (httpResponseCode > 0) {
367 |     result = http.getString();
368 |     Serial.println("HTTP Response Code: " + String(httpResponseCode));
369 |     Serial.println("Response Body: " + result);
370 |     http.end();
371 |     return true;
372 |   } else {
373 |     result = "HTTP request failed, response code: " + String(httpResponseCode);
374 |     Serial.println("Error Code: " + String(httpResponseCode));
375 |     Serial.println("Error Message: " + http.errorToString(httpResponseCode));
376 |     http.end();
377 |     return false;
378 |   }
379 | }
380 | 
381 | 
382 | // Remaining code remains unchanged...
383 | 
384 | void loop() {
385 |   if (digitalRead(BUTTON_PIN) == LOW) {
386 |     Serial.println("Button pressed! Capturing image...");
387 |     displayCenteredText("Capturing...");
388 |     captureAndAnalyzeImage();
389 |     delay(1000);  // Small delay to debounce button press
390 |   }
391 | }
392 | void beep(){
393 |   digitalWrite(2,HIGH);
394 |   delay(300);
395 |   digitalWrite(2,LOW);
396 |   
397 | }
398 | 


--------------------------------------------------------------------------------