├── .DS_Store ├── .gitattributes ├── Schematic.pdf ├── ESP32CAM_Base64_Encoding_SerialMonitor ├── .DS_Store └── ESP32CAM_Base64_Encoding_SerialMonitor.ino └── ESP32CAM_ImageAnalysis_OLED └── ESP32CAM_ImageAnalysis_OLED.ino /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /Schematic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/Schematic.pdf -------------------------------------------------------------------------------- /ESP32CAM_Base64_Encoding_SerialMonitor/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/techiesms/ESP32Cam_GPT4o/main/ESP32CAM_Base64_Encoding_SerialMonitor/.DS_Store -------------------------------------------------------------------------------- /ESP32CAM_Base64_Encoding_SerialMonitor/ESP32CAM_Base64_Encoding_SerialMonitor.ino: -------------------------------------------------------------------------------- 1 | /* 2 | This code demonstrates how to use the ESP32-CAM module to capture an image, encode it in Base64 format, 3 | and send the Base64 string to the Serial Monitor. The code is designed to work with the ESP32-CAM AI-Thinker 4 | module and includes the necessary configurations and pin definitions for this specific hardware. 5 | 6 | Key Functionalities: 7 | 1. Initialize the ESP32-CAM hardware and configure its pins for camera operation. 8 | 2. Capture an image using the camera. 9 | 3. Encode the captured image into a Base64 string for easy transfer or storage. 10 | 4. Print the Base64-encoded image string to the Serial Monitor. 11 | 12 | Components: 13 | - ESP32-CAM AI-Thinker module 14 | - Arduino IDE or compatible environment 15 | 16 | Version Check: 17 | - ESP32 Boards Package Version - 3.0.0 18 | - Arduino IDE Version - 2.3.2 19 | Note: 20 | - Ensure the ESP32-CAM module is properly powered, as it requires sufficient current to operate the camera. 21 | - The Base64 string output can be used for web-based applications or for transmitting the image over networks. 22 | */ 23 | 24 | 25 | 26 | #include // Include WiFi library for ESP32 27 | #include // For Base64 encoding 28 | #include "esp_camera.h" // Include ESP32 camera library 29 | 30 | // Pin definitions for the ESP32-CAM AI-Thinker module 31 | #define PWDN_GPIO_NUM 32 32 | #define RESET_GPIO_NUM -1 33 | #define XCLK_GPIO_NUM 0 34 | #define SIOD_GPIO_NUM 26 35 | #define SIOC_GPIO_NUM 27 36 | 37 | #define Y9_GPIO_NUM 35 38 | #define Y8_GPIO_NUM 34 39 | #define Y7_GPIO_NUM 39 40 | #define Y6_GPIO_NUM 36 41 | #define Y5_GPIO_NUM 21 42 | #define Y4_GPIO_NUM 19 43 | #define Y3_GPIO_NUM 18 44 | #define Y2_GPIO_NUM 5 45 | #define VSYNC_GPIO_NUM 25 46 | #define HREF_GPIO_NUM 23 47 | #define PCLK_GPIO_NUM 22 48 | 49 | void setup() { 50 | Serial.begin(115200); 51 | 52 | // Initialize camera 53 | camera_config_t config; 54 | config.ledc_channel = LEDC_CHANNEL_0; 55 | config.ledc_timer = LEDC_TIMER_0; 56 | config.pin_d0 = Y2_GPIO_NUM; 57 | config.pin_d1 = Y3_GPIO_NUM; 58 | config.pin_d2 = Y4_GPIO_NUM; 59 | config.pin_d3 = Y5_GPIO_NUM; 60 | config.pin_d4 = Y6_GPIO_NUM; 61 | config.pin_d5 = Y7_GPIO_NUM; 62 | config.pin_d6 = Y8_GPIO_NUM; 63 | config.pin_d7 = Y9_GPIO_NUM; 64 | config.pin_xclk = XCLK_GPIO_NUM; 65 | config.pin_pclk = PCLK_GPIO_NUM; 66 | config.pin_vsync = VSYNC_GPIO_NUM; 67 | config.pin_href = HREF_GPIO_NUM; 68 | config.pin_sscb_sda = SIOD_GPIO_NUM; 69 | config.pin_sscb_scl = SIOC_GPIO_NUM; 70 | config.pin_pwdn = PWDN_GPIO_NUM; 71 | config.pin_reset = RESET_GPIO_NUM; 72 | config.xclk_freq_hz = 20000000; 73 | config.pixel_format = PIXFORMAT_JPEG; 74 | config.frame_size = FRAMESIZE_VGA; // or FRAMESIZE_VGA 75 | config.jpeg_quality = 10; 76 | config.fb_count = 1; 77 | 78 | // Camera init 79 | if (esp_camera_init(&config) != ESP_OK) { 80 | Serial.println("Camera init failed"); 81 | return; 82 | } 83 | 84 | // Capture and encode the image to Base64 85 | captureAndPrintBase64Image(); 86 | } 87 | 88 | void captureAndPrintBase64Image() { 89 | camera_fb_t* fb = esp_camera_fb_get(); 90 | if (!fb) { 91 | Serial.println("Camera capture failed"); 92 | return; 93 | } 94 | 95 | // Encode the image buffer to Base64 96 | String base64Image = base64::encode((const uint8_t*)fb->buf, fb->len); 97 | 98 | // Print the Base64 string to the Serial Monitor 99 | Serial.println("\n[Base64 Encoded Image]:"); 100 | Serial.println(base64Image); 101 | 102 | esp_camera_fb_return(fb); // Return the frame buffer 103 | } 104 | 105 | void loop() { 106 | // Nothing to loop in this example 107 | } -------------------------------------------------------------------------------- /ESP32CAM_ImageAnalysis_OLED/ESP32CAM_ImageAnalysis_OLED.ino: -------------------------------------------------------------------------------- 1 | /* 2 | ESP32-CAM Image Analysis with OpenAI API and OLED Display 3 | 4 | This code captures an image using the ESP32-CAM module, processes it, 5 | and sends it to OpenAI's GPT-4o API for analysis. The API's response is 6 | displayed on an OLED screen. The code also provides audio feedback using 7 | a buzzer and includes features like Wi-Fi connectivity, image encoding, 8 | and scrolling text display. 9 | 10 | Tested with: 11 | - Arduino IDE version 2.3.2 12 | - ESP32 boards package version 3.0.0 13 | - Adafruit GFX library version 1.11.11 14 | - Adafruit SSD1306 library version 2.5.13 15 | - ArduinoJson library version 7.1.0 16 | - Base64 library (default version with ESP32 boards package) 17 | 18 | Make sure to install these libraries and configure your environment 19 | as specified above before running the code. 20 | */ 21 | 22 | #include 23 | #include 24 | #include 25 | #include "esp_camera.h" 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | // WiFi credentials 32 | const char* ssid = "SSID"; 33 | const char* password = "PASS"; 34 | 35 | // OpenAI API key 36 | const String apiKey = "API KEY"; 37 | 38 | // Question to be Asked about the image 39 | String Question = "Summarize the image"; 40 | 41 | // OLED display settings 42 | #define SCREEN_WIDTH 128 43 | #define SCREEN_HEIGHT 64 44 | #define OLED_SCL 14 45 | #define OLED_SDA 15 46 | #define OLED_RESET -1 47 | Adafruit_SSD1306 display(SCREEN_WIDTH, SCREEN_HEIGHT, &Wire, OLED_RESET); 48 | 49 | // Pin definitions for ESP32-CAM AI-Thinker module 50 | #define PWDN_GPIO_NUM 32 51 | #define RESET_GPIO_NUM -1 52 | #define XCLK_GPIO_NUM 0 53 | #define SIOD_GPIO_NUM 26 54 | #define SIOC_GPIO_NUM 27 55 | #define Y9_GPIO_NUM 35 56 | #define Y8_GPIO_NUM 34 57 | #define Y7_GPIO_NUM 39 58 | #define Y6_GPIO_NUM 36 59 | #define Y5_GPIO_NUM 21 60 | #define Y4_GPIO_NUM 19 61 | #define Y3_GPIO_NUM 18 62 | #define Y2_GPIO_NUM 5 63 | #define VSYNC_GPIO_NUM 25 64 | #define HREF_GPIO_NUM 23 65 | #define PCLK_GPIO_NUM 22 66 | 67 | #define BUTTON_PIN 13 68 | #define BUZZER_PIN 2 // Buzzer connected to GPIO2 69 | 70 | void displayCenteredText(const String& text, int textSize = 1) { 71 | display.clearDisplay(); 72 | display.setTextSize(textSize); 73 | display.setTextColor(SSD1306_WHITE); 74 | 75 | int maxLineLength = 16; // Assuming 16 characters fit per line at textSize 1 76 | String lineBuffer = ""; 77 | String wordBuffer = ""; 78 | int16_t x1, y1; 79 | uint16_t textWidth, textHeight; 80 | 81 | // Calculate line height 82 | display.getTextBounds("A", 0, 0, &x1, &y1, &textWidth, &textHeight); 83 | int lineHeight = textHeight + 2; 84 | 85 | // Calculate the total number of lines needed 86 | int lineCount = 0; 87 | for (size_t i = 0; i <= text.length(); i++) { 88 | char c = text.charAt(i); 89 | if (c == ' ' || c == '\n' || c == '\0') { 90 | if (lineBuffer.length() + wordBuffer.length() > maxLineLength) { 91 | lineCount++; 92 | lineBuffer = wordBuffer; 93 | } else { 94 | lineBuffer += (lineBuffer.isEmpty() ? "" : " ") + wordBuffer; 95 | } 96 | wordBuffer = ""; 97 | 98 | if (c == '\n') { 99 | lineCount++; 100 | lineBuffer = ""; 101 | } 102 | } else { 103 | wordBuffer += c; 104 | } 105 | } 106 | if (!lineBuffer.isEmpty()) lineCount++; // Count the last line 107 | 108 | // Calculate the vertical offset to center the block of text 109 | int totalTextHeight = lineCount * lineHeight; 110 | int yOffset = (SCREEN_HEIGHT - totalTextHeight) / 2; 111 | 112 | // Render the text line by line, vertically centered 113 | int yPos = yOffset; 114 | lineBuffer = ""; 115 | wordBuffer = ""; 116 | for (size_t i = 0; i <= text.length(); i++) { 117 | char c = text.charAt(i); 118 | if (c == ' ' || c == '\n' || c == '\0') { 119 | if (lineBuffer.length() + wordBuffer.length() > maxLineLength) { 120 | // Render the current line 121 | display.setCursor((SCREEN_WIDTH - lineBuffer.length() * textWidth) / 2, yPos); 122 | display.print(lineBuffer); 123 | yPos += lineHeight; 124 | lineBuffer = wordBuffer; 125 | } else { 126 | lineBuffer += (lineBuffer.isEmpty() ? "" : " ") + wordBuffer; 127 | } 128 | wordBuffer = ""; 129 | 130 | if (c == '\n' || c == '\0') { 131 | display.setCursor((SCREEN_WIDTH - lineBuffer.length() * textWidth) / 2, yPos); 132 | display.print(lineBuffer); 133 | yPos += lineHeight; 134 | lineBuffer = ""; 135 | } 136 | } else { 137 | wordBuffer += c; 138 | } 139 | } 140 | 141 | display.display(); 142 | } 143 | // Function to encode image to Base64 144 | String encodeImageToBase64(const uint8_t* imageData, size_t imageSize) { 145 | return base64::encode(imageData, imageSize); 146 | } 147 | void setup() { 148 | Serial.begin(115200); 149 | WiFi.begin(ssid, password); 150 | 151 | pinMode(BUTTON_PIN, INPUT_PULLUP); 152 | pinMode(BUZZER_PIN, OUTPUT); // Set Buzzer pin as output 153 | 154 | Wire.begin(OLED_SDA, OLED_SCL); 155 | if (!display.begin(SSD1306_SWITCHCAPVCC, 0x3C)) { 156 | Serial.println("SSD1306 allocation failed"); 157 | for (;;) 158 | ; 159 | } 160 | 161 | // Display the project title on power-on 162 | displayCenteredText("AI VISION Project\nby techiesms", 1); 163 | delay(3000); // Hold the title screen for 3 seconds 164 | 165 | displayCenteredText("Connecting to WiFi..."); 166 | while (WiFi.status() != WL_CONNECTED) { 167 | delay(1000); 168 | Serial.println("Connecting to WiFi..."); 169 | } 170 | 171 | displayCenteredText("WiFi Connected!"); 172 | delay(2000); 173 | 174 | camera_config_t config; 175 | config.ledc_channel = LEDC_CHANNEL_0; 176 | config.ledc_timer = LEDC_TIMER_0; 177 | config.pin_d0 = Y2_GPIO_NUM; 178 | config.pin_d1 = Y3_GPIO_NUM; 179 | config.pin_d2 = Y4_GPIO_NUM; 180 | config.pin_d3 = Y5_GPIO_NUM; 181 | config.pin_d4 = Y6_GPIO_NUM; 182 | config.pin_d5 = Y7_GPIO_NUM; 183 | config.pin_d6 = Y8_GPIO_NUM; 184 | config.pin_d7 = Y9_GPIO_NUM; 185 | config.pin_xclk = XCLK_GPIO_NUM; 186 | config.pin_pclk = PCLK_GPIO_NUM; 187 | config.pin_vsync = VSYNC_GPIO_NUM; 188 | config.pin_href = HREF_GPIO_NUM; 189 | config.pin_sscb_sda = SIOD_GPIO_NUM; 190 | config.pin_sscb_scl = SIOC_GPIO_NUM; 191 | config.pin_pwdn = PWDN_GPIO_NUM; 192 | config.pin_reset = RESET_GPIO_NUM; 193 | config.xclk_freq_hz = 20000000; 194 | config.pixel_format = PIXFORMAT_JPEG; 195 | config.frame_size = FRAMESIZE_QVGA; 196 | config.jpeg_quality = 10; 197 | config.fb_count = 1; 198 | 199 | if (esp_camera_init(&config) != ESP_OK) { 200 | Serial.println("Camera init failed"); 201 | displayCenteredText("Camera Init Failed"); 202 | return; 203 | } 204 | 205 | displayCenteredText("Camera Initialized"); 206 | delay(2000); 207 | 208 | displayCenteredText("Press button to capture"); 209 | } 210 | 211 | 212 | void captureAndAnalyzeImage() { 213 | Serial.println("Capturing image..."); 214 | 215 | // Capture the image frame buffer 216 | camera_fb_t* fb = esp_camera_fb_get(); // Get the frame buffer 217 | if (!fb) { 218 | Serial.println("Camera capture failed"); 219 | displayCenteredText("Capture Failed"); 220 | return; 221 | } 222 | 223 | // After the new frame is obtained, ensure the buffer is returned (cleared) 224 | esp_camera_fb_return(fb); // Release the frame buffer from the previous capture 225 | 226 | // Now, capture the new image 227 | fb = esp_camera_fb_get(); // Get the frame buffer again for the new image 228 | 229 | if (!fb) { 230 | Serial.println("Camera capture failed"); 231 | displayCenteredText("Capture Failed"); 232 | return; 233 | } 234 | 235 | Serial.println("Image captured"); 236 | String base64Image = encodeImageToBase64(fb->buf, fb->len); 237 | 238 | beep(); 239 | // Return the frame buffer after processing the image 240 | esp_camera_fb_return(fb); // Return the frame buffer to free memory 241 | 242 | if (base64Image.isEmpty()) { 243 | Serial.println("Failed to encode the image!"); 244 | displayCenteredText("Encode Failed"); 245 | return; 246 | } 247 | // Send the image to OpenAI for analysis 248 | AnalyzeImage(base64Image); 249 | } 250 | 251 | void AnalyzeImage(const String& base64Image) { 252 | Serial.println("Sending image for analysis..."); 253 | displayCenteredText("Processing..."); 254 | 255 | String result; 256 | 257 | // Prepare the payload for the OpenAI API 258 | String url = "data:image/jpeg;base64," + base64Image; 259 | Serial.println(url); 260 | 261 | DynamicJsonDocument doc(4096); 262 | doc["model"] = "gpt-4o"; 263 | JsonArray messages = doc.createNestedArray("messages"); 264 | JsonObject message = messages.createNestedObject(); 265 | message["role"] = "user"; 266 | JsonArray content = message.createNestedArray("content"); 267 | JsonObject textContent = content.createNestedObject(); 268 | textContent["type"] = "text"; 269 | textContent["text"] = "Summarize the context of this image?"; 270 | 271 | JsonObject imageContent = content.createNestedObject(); 272 | imageContent["type"] = "image_url"; 273 | JsonObject imageUrlObject = imageContent.createNestedObject("image_url"); 274 | imageUrlObject["url"] = url; 275 | imageContent["image_url"]["detail"] = "auto"; 276 | 277 | doc["max_tokens"] = 400; 278 | 279 | String jsonPayload; 280 | serializeJson(doc, jsonPayload); 281 | 282 | // Send request and validate response 283 | if (sendPostRequest(jsonPayload, result)) { 284 | Serial.print("[ChatGPT] Response: "); 285 | Serial.println(result); 286 | 287 | // Clear the display before showing the new response 288 | display.clearDisplay(); 289 | display.display(); 290 | 291 | DynamicJsonDocument responseDoc(4096); 292 | deserializeJson(responseDoc, result); 293 | 294 | String responseContent = responseDoc["choices"][0]["message"]["content"].as(); 295 | Serial.println("[ChatGPT] Parsed response: " + responseContent); 296 | 297 | // Smooth scrolling and proper word wrapping 298 | display.clearDisplay(); 299 | int lineHeight = 8; // Height of each line in pixels 300 | int maxLineChars = 21; // Approx. max characters per line 301 | int visibleLines = 7; 302 | int scrollDelay = 2000; // Delay for scrolling in milliseconds 303 | 304 | std::vector lines; // Store formatted lines for display 305 | 306 | // Split responseContent into words for word wrapping 307 | String word = ""; 308 | String currentLine = ""; 309 | 310 | for (int i = 0; i < responseContent.length(); i++) { 311 | char c = responseContent.charAt(i); 312 | if (c == ' ' || c == '\n') { 313 | if (currentLine.length() + word.length() <= maxLineChars) { 314 | currentLine += (currentLine.isEmpty() ? "" : " ") + word; 315 | } else { 316 | lines.push_back(currentLine); 317 | currentLine = word; 318 | } 319 | word = ""; 320 | } else { 321 | word += c; 322 | } 323 | } 324 | if (!currentLine.isEmpty()) lines.push_back(currentLine); 325 | if (!word.isEmpty()) lines.push_back(word); 326 | 327 | // Display lines with scrolling effect 328 | for (size_t i = 0; i < lines.size(); i++) { 329 | display.clearDisplay(); 330 | for (size_t j = 0; j < visibleLines && (i + j) < lines.size(); j++) { 331 | display.setCursor(0, j * lineHeight); 332 | display.print(lines[i + j]); 333 | } 334 | display.display(); 335 | delay(scrollDelay); 336 | } 337 | 338 | // Clear display after the response 339 | display.clearDisplay(); 340 | display.display(); 341 | 342 | displayCenteredText("Press button to capture"); 343 | } else { 344 | Serial.print("[ChatGPT] Error: "); 345 | Serial.println(result); 346 | display.clearDisplay(); 347 | display.setCursor(0, 0); 348 | display.print("API Error"); 349 | display.display(); 350 | } 351 | } 352 | 353 | bool sendPostRequest(const String& payload, String& result) { 354 | HTTPClient http; 355 | http.begin("https://api.openai.com/v1/chat/completions"); 356 | 357 | http.addHeader("Content-Type", "application/json"); 358 | http.addHeader("Authorization", "Bearer " + apiKey); 359 | http.setTimeout(20000); 360 | 361 | Serial.print("Payload size: "); 362 | Serial.println(payload.length()); 363 | 364 | int httpResponseCode = http.POST(payload); 365 | 366 | if (httpResponseCode > 0) { 367 | result = http.getString(); 368 | Serial.println("HTTP Response Code: " + String(httpResponseCode)); 369 | Serial.println("Response Body: " + result); 370 | http.end(); 371 | return true; 372 | } else { 373 | result = "HTTP request failed, response code: " + String(httpResponseCode); 374 | Serial.println("Error Code: " + String(httpResponseCode)); 375 | Serial.println("Error Message: " + http.errorToString(httpResponseCode)); 376 | http.end(); 377 | return false; 378 | } 379 | } 380 | 381 | 382 | // Remaining code remains unchanged... 383 | 384 | void loop() { 385 | if (digitalRead(BUTTON_PIN) == LOW) { 386 | Serial.println("Button pressed! Capturing image..."); 387 | displayCenteredText("Capturing..."); 388 | captureAndAnalyzeImage(); 389 | delay(1000); // Small delay to debounce button press 390 | } 391 | } 392 | void beep(){ 393 | digitalWrite(2,HIGH); 394 | delay(300); 395 | digitalWrite(2,LOW); 396 | 397 | } 398 | --------------------------------------------------------------------------------