├── .gitattributes └── code ├── debug_custom.json ├── debug.cfg └── code.ino /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /code/debug_custom.json: -------------------------------------------------------------------------------- 1 | { 2 | "name":"Arduino on ESP32", 3 | "toolchainPrefix":"xtensa-esp32-elf", 4 | "svdFile":"esp32.svd", 5 | "request":"attach", 6 | "postAttachCommands":[ 7 | "set remote hardware-watchpoint-limit 2", 8 | "monitor reset halt", 9 | "monitor gdb_sync", 10 | "thb setup", 11 | "c" 12 | ], 13 | "overrideRestartCommands":[ 14 | "monitor reset halt", 15 | "monitor gdb_sync", 16 | "thb setup", 17 | "c" 18 | ] 19 | } -------------------------------------------------------------------------------- /code/debug.cfg: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: GPL-2.0-or-later 2 | # 3 | # Example OpenOCD configuration file for ESP32-WROVER-KIT board. 4 | # 5 | # For example, OpenOCD can be started for ESP32 debugging on 6 | # 7 | # openocd -f board/esp32-wrover-kit-3.3v.cfg 8 | # 9 | 10 | # Source the JTAG interface configuration file 11 | source [find interface/ftdi/esp32_devkitj_v1.cfg] 12 | set ESP32_FLASH_VOLTAGE 3.3 13 | # Source the ESP32 configuration file 14 | source [find target/esp32.cfg] 15 | -------------------------------------------------------------------------------- /code/code.ino: -------------------------------------------------------------------------------- 1 | /* 2 | * Project: GPT4o Image Question-Answering on ESP32 3 | * Description: This code demonstrates how to run the GPT4o model on an ESP32, allowing users to input an image URL 4 | * and a question via the serial monitor and receive AI-generated responses directly in the serial monitor. 5 | * 6 | * Tested Environment: 7 | * - Arduino IDE version: 2.3.2 8 | * - ESP32 boards package version: 3.0.0 9 | * - ArduinoJson library version: 7.1.0 10 | * 11 | * Important Notes: 12 | * 1. Before uploading this code, ensure you have entered the following details: 13 | * - Wi-Fi credentials (SSID and Password) for internet connectivity. 14 | * - GPT4o API or related setup information required for the model to work. 15 | * - Any other project-specific configuration parameters as mentioned in the code. 16 | * 2. Install all necessary libraries and verify compatibility with the mentioned versions. 17 | * 3. Ensure the ESP32 is connected to a reliable power source and your hardware setup is correctly configured. 18 | * 19 | * For a complete step-by-step tutorial, check out our YouTube video: 20 | * https://youtu.be/Mp0GPfIBWMs 21 | * 22 | * Happy Making! 23 | */ 24 | 25 | #include // Include WiFi library for ESP32 26 | #include // Include HTTPClient library for HTTP requests 27 | #include // Include ArduinoJson library for JSON parsing 28 | 29 | // Replace with your WiFi credentials 30 | const char* WIFI_SSID = "SSID"; 31 | const char* WIFI_PASSWORD = "PASSWORD"; 32 | 33 | // Replace the key below with your actual API key 34 | const String API_KEY = "OPEN AI API KEY"; 35 | 36 | // API host and endpoint 37 | const char* ENDPOINT = "https://api.openai.com/v1/chat/completions"; 38 | 39 | void setup() { 40 | Serial.begin(115200); 41 | WiFi.begin(WIFI_SSID, WIFI_PASSWORD); 42 | 43 | // Connect to WiFi 44 | while (WiFi.status() != WL_CONNECTED) { 45 | delay(1000); 46 | Serial.println("Connecting to WiFi..."); 47 | } 48 | 49 | Serial.println("Connected to WiFi!"); 50 | Serial.print("IP Address: "); 51 | Serial.println(WiFi.localIP()); 52 | 53 | Serial.println("\nEnter an image URL for analysis:"); 54 | } 55 | 56 | void loop() { 57 | static String imageUrl = ""; 58 | static bool awaitingQuestion = false; 59 | 60 | if (Serial.available() > 0) { 61 | String input = Serial.readStringUntil('\n'); 62 | input.trim(); 63 | 64 | if (!awaitingQuestion) { 65 | imageUrl = input; 66 | Serial.println("Image URL received. Now enter the question to ask:"); 67 | awaitingQuestion = true; 68 | } else { 69 | String question = input; 70 | awaitingQuestion = false; 71 | 72 | if (imageUrl.length() > 0 && question.length() > 0) { 73 | String result; 74 | Serial.println("\n[ChatGPT] - Analyzing the provided image URL and question"); 75 | 76 | if (Image_Query("gpt-4o", "user", question.c_str(), imageUrl.c_str(), "auto", 400, result)) { 77 | Serial.print("[ChatGPT] Response: "); 78 | Serial.println(result); 79 | } else { 80 | Serial.print("[ChatGPT] Error: "); 81 | Serial.println(result); 82 | } 83 | 84 | Serial.println("\nEnter another image URL for analysis:"); 85 | } else { 86 | Serial.println("Invalid input. Please try again."); 87 | } 88 | } 89 | } 90 | } 91 | 92 | bool Image_Query(const char* model, const char* role, const char* text, const char* imageUrl, const char* detail, int max_tokens, String& result) { 93 | String postBody = String("{") + 94 | "\"model\": \"" + model + "\", " + 95 | "\"max_tokens\": " + String(max_tokens) + ", " + 96 | "\"messages\": [{\"role\": \"" + role + "\", \"content\": " + 97 | "[{\"type\": \"text\", \"text\": \"" + text + "\"}, " + 98 | "{\"type\": \"image_url\", \"image_url\": {\"url\": \"" + imageUrl + "\", \"detail\": \"" + detail + "\"}}]}]}"; 99 | 100 | return sendRequest(postBody, result); 101 | } 102 | 103 | bool sendRequest(const String& postBody, String& result) { 104 | HTTPClient http; 105 | 106 | http.begin(ENDPOINT); // Specify the endpoint 107 | http.addHeader("Authorization", "Bearer " + API_KEY); // Add Authorization header 108 | http.addHeader("Content-Type", "application/json"); // Add Content-Type header 109 | 110 | int httpResponseCode = http.POST(postBody); 111 | 112 | if (httpResponseCode > 0) { 113 | String response = http.getString(); 114 | 115 | if (httpResponseCode == 200) { 116 | int start = response.indexOf("{"); 117 | int end = response.lastIndexOf("}"); 118 | 119 | if (start == -1 || end == -1) { 120 | result = "[ERR] Invalid JSON response"; 121 | return false; 122 | } 123 | 124 | String jsonBody = response.substring(start, end + 1); 125 | 126 | DynamicJsonDocument doc(2048); 127 | DeserializationError error = deserializeJson(doc, jsonBody); 128 | if (error) { 129 | result = "[ERR] JSON Parsing Failed: "; 130 | result += error.c_str(); 131 | return false; 132 | } 133 | 134 | if (doc.containsKey("choices") && doc["choices"].size() > 0) { 135 | result = doc["choices"][0]["message"]["content"].as(); 136 | return true; 137 | } else { 138 | result = "[ERR] No valid response in JSON"; 139 | return false; 140 | } 141 | } else { 142 | result = "[ERR] HTTP Error: " + String(httpResponseCode) + ", Response: " + response; 143 | return false; 144 | } 145 | } else { 146 | result = "[ERR] Connection failed, Error: " + String(httpResponseCode); 147 | return false; 148 | } 149 | 150 | http.end(); 151 | } 152 | --------------------------------------------------------------------------------