├── GeminiWithFiles.js ├── InvoiceManager.js ├── LICENCE ├── README.md ├── appsscript.json ├── images ├── fig1.jpg ├── fig2.png ├── fig3.png ├── fig4.png └── fig5.png └── main.js /GeminiWithFiles.js: -------------------------------------------------------------------------------- 1 | /** 2 | * A new Google Apps Script library called GeminiWithFiles simplifies using Gemini, 3 | * a large language model, to process unstructured data like images and PDFs. 4 | * GeminiWithFiles can upload files, generate content, and create descriptions 5 | * from multiple images at once. 6 | * This significantly reduces workload and expands possibilities for using Gemini. 7 | * 8 | * GeminiWithFiles v2.0.0 9 | * GitHub: https://github.com/tanaikech/GeminiWithFiles 10 | */ 11 | class GeminiWithFiles { 12 | 13 | /** 14 | * 15 | * @param {Object} object API key or access token for using Gemini API. 16 | * @param {String} object.apiKey API key. 17 | * @param {String} object.accessToken Access token. 18 | * @param {String} object.model Model. Default is "models/gemini-1.5-pro-latest". 19 | * @param {String} object.version Version of API. Default is "v1beta". 20 | * @param {Boolean} object.doCountToken Default is false. If this is true, when Gemini API is requested, the token of request is shown in the log. 21 | * @param {Array} object.history History for continuing chat. 22 | * @param {Array} object.functions If you want to give the custom functions, please use this. 23 | * @param {String} object.response_mime_type In the current stage, only "application/json" can be used. 24 | * @param {String} object.responseMimeType In the current stage, only "application/json" can be used. 25 | * @param {Object} object.systemInstruction Ref: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini. 26 | * @param {Boolean} object.exportTotalTokens When this is true, the total tokens are exported as the result value. At that time, the generated content and the total tokens are returned as an object. 27 | * @param {Boolean} object.exportRawData The default value is false. When this is true, the raw data returned from Gemini API is returned. 28 | */ 29 | constructor(object = {}) { 30 | const { apiKey, accessToken, model, version, doCountToken, history, functions, response_mime_type, responseMimeType, systemInstruction, exportTotalTokens, exportRawData } = object; 31 | 32 | /** @private */ 33 | this.model = model || "models/gemini-1.5-flash-latest"; // After v2.0.0, the model was changed from "models/gemini-1.5-pro-latest" to "models/gemini-1.5-flash-latest". 34 | 35 | /** @private */ 36 | this.version = version || "v1beta"; 37 | 38 | const baseUrl = "https://generativelanguage.googleapis.com"; 39 | 40 | /** @private */ 41 | this.urlGenerateContent = `${baseUrl}/${this.version}/${this.model}:generateContent`; 42 | 43 | /** @private */ 44 | this.urlUploadFile = `${baseUrl}/upload/${this.version}/files`; 45 | 46 | /** @private */ 47 | this.urlGetFileList = `${baseUrl}/${this.version}/files`; 48 | 49 | /** @private */ 50 | this.urlDeleteFile = `${baseUrl}/${this.version}/`; 51 | 52 | /** @private */ 53 | this.urlCountToken = `${baseUrl}/${this.version}/${this.model}:countTokens`; 54 | 55 | /** @private */ 56 | this.doCountToken = doCountToken || false; 57 | 58 | /** @private */ 59 | this.exportTotalTokens = exportTotalTokens || false; 60 | 61 | /** @private */ 62 | this.exportRawData = exportRawData || false; 63 | 64 | /** @private */ 65 | this.totalTokens = 0; 66 | 67 | /** @private */ 68 | this.queryParameters = {}; 69 | if (apiKey) { 70 | this.queryParameters.key = apiKey; 71 | } 72 | 73 | /** @private */ 74 | this.accessToken = accessToken || ScriptApp.getOAuthToken(); 75 | 76 | /** @private */ 77 | this.headers = { authorization: `Bearer ${this.accessToken}` }; 78 | 79 | /** @private */ 80 | this.fileIds = []; 81 | 82 | /** @private */ 83 | this.asImage = false; 84 | 85 | /** @private */ 86 | this.blobs = []; 87 | 88 | /** @private */ 89 | this.resumableUplaods = []; 90 | 91 | /** @private */ 92 | this.fileList = []; 93 | 94 | /** @private */ 95 | this.response_mime_type = ""; 96 | 97 | /** @private */ 98 | this.systemInstruction = systemInstruction || null; 99 | 100 | this.functions = {}; 101 | 102 | if ((response_mime_type && response_mime_type != "") || (responseMimeType && responseMimeType != "")) { 103 | this.response_mime_type = response_mime_type; 104 | } 105 | if (functions && functions.params_) { 106 | this.functions = functions; 107 | } 108 | 109 | /** @private */ 110 | this.toolConfig = null; 111 | const keys = Object.keys(this.functions); 112 | if (keys.length > 0) { 113 | this.toolConfig = { 114 | functionCallingConfig: { 115 | mode: "ANY", 116 | allowedFunctionNames: keys.filter(e => e != "params") 117 | } 118 | } 119 | } 120 | 121 | /** 122 | * Array including the history of chat with Gemini. 123 | * 124 | * @type {Array} 125 | */ 126 | this.history = history || []; 127 | } 128 | 129 | /** 130 | * ### Description 131 | * Set file IDs. 132 | * 133 | * @param {Array} fileIds File IDs on Google Drive for uploading to Gemini. 134 | * @param {Boolean} asImage Default is false. If this is true, all files are used as the thumbnail images. 135 | * @returns {GeminiWithFiles}. 136 | */ 137 | setFileIds(fileIds, asImage = false) { 138 | this.fileIds.push(...fileIds); 139 | this.asImage = asImage; 140 | return this; 141 | } 142 | 143 | /** 144 | * ### Description 145 | * Set blobs. 146 | * 147 | * @param {Blob[]} blobs Blobs for uploading to Gemini. 148 | * @returns {GeminiWithFiles}. 149 | */ 150 | setBlobs(blobs) { 151 | this.blobs.push(...blobs); 152 | return this; 153 | } 154 | 155 | /** 156 | * ### Description 157 | * Upload data (files) to Gemini with resumable upload. 158 | * In this case, you can use the file ID on Google Drive and the URL of the direct link of the file. 159 | * 160 | * @param {Array} array Array including the file IDs or URLs for uploading to Gemini. 161 | * @returns {GeminiWithFiles}. 162 | */ 163 | setFileIdsOrUrlsWithResumableUpload(array) { 164 | this.resumableUplaods.push(...array); 165 | return this; 166 | } 167 | 168 | /** 169 | * ### Description 170 | * Create object for using the generateContent method. 171 | * 172 | * @param {Array} fileList File list from the uploadFiles and getFileList method. 173 | * @returns {GeminiWithFiles} 174 | */ 175 | withUploadedFilesByGenerateContent(fileList = [], retry = 3) { 176 | if (fileList.length == 0) { 177 | throw new Error("Given fileList is empty."); 178 | } 179 | const checkState = fileList.filter(({ state }) => state == "PROCESSING"); 180 | if (checkState.length > 0) { 181 | if (retry > 0) { 182 | const waitTime = 10; // seconds 183 | const dn = checkState.map(({ displayName }) => displayName) 184 | console.warn(`Now, the state of the uploaded files "${dn.join(",")}" is not active. So, it will wait until it is active. Please wait for ${waitTime} seconds. Retry (${4 - retry}/3)`); 185 | const tempObj = fileList.reduce((o, { name }) => (o[name] = true, o), {}); 186 | const tempList = this.getFileList().filter(({ name }) => tempObj[name]); 187 | Utilities.sleep(waitTime * 1000); 188 | this.withUploadedFilesByGenerateContent(tempList, --retry); 189 | } else { 190 | console.warn("Although It waited for 30 seconds, the state of the uploaded files has not changed to active. In this case, please directly retrieve the metadata of the uploaded file after the state becomes active and generate content again."); 191 | } 192 | } 193 | const obj = fileList.reduce((m, e) => { 194 | let k = ""; 195 | if (/^fileId@.*?\$page@.*\$maxPage@.*$/.test(e.displayName)) { 196 | k = e.displayName.split("$")[0].split("@")[1]; 197 | } else if (/^blobName@.*$/.test(e.displayName)) { 198 | k = e.displayName.split("@")[1]; 199 | } else { 200 | k = e.displayName; 201 | } 202 | return m.set(k, m.has(k) ? [...m.get(k), e] : [e]); 203 | }, new Map()); 204 | obj.forEach((v, k, m) => { 205 | if (v.length > 0 && /^fileId@.*?\$page@.*\$maxPage@.*$/.test(v[0])) { 206 | v.sort((a, b) => Number(a.displayName.split("$")[1].split("@")[1]) > Number(b.displayName.split("$")[1].split("@")[1]) ? 1 : -1); 207 | } 208 | m.set(k, v); 209 | }); 210 | this.fileList = [...obj.values()].map(files => ({ files })); 211 | return this; 212 | } 213 | 214 | /** 215 | * ### Description 216 | * Upload files to Gemini. 217 | * 218 | * @param {Number} n Number of concurrent upload to Gemini. Default value is 50. 219 | * @returns {Object} Returned object from Gemini. 220 | */ 221 | uploadFiles(n = 50) { 222 | const q = { ...this.queryParameters, uploadType: "multipart" }; 223 | const url = this.addQueryParameters_(this.urlUploadFile, q); 224 | if (this.fileIds.length > 0) { 225 | const requests = []; 226 | for (let i = 0; i < this.fileIds.length; i++) { 227 | const fileId = this.fileIds[i]; 228 | const metadata = { file: { displayName: `fileId@${fileId}$page@${1}$maxPage@1` } }; 229 | const file = this.asImage 230 | ? this.fetch_({ url: `https://drive.google.com/thumbnail?sz=w1500&id=${fileId}`, headers: this.headers }).getBlob() 231 | : DriveApp.getFileById(fileId).getBlob(); 232 | requests.push({ 233 | url, 234 | method: "post", 235 | payload: { metadata: Utilities.newBlob(JSON.stringify(metadata), "application/json"), file }, 236 | muteHttpExceptions: true 237 | }); 238 | } 239 | return this.requestUploadFiles_(requests, n); 240 | } else if (this.blobs.length > 0) { 241 | const requests = []; 242 | for (let i = 0; i < this.blobs.length; i++) { 243 | const blob = this.blobs[i]; 244 | const metadata = { file: { displayName: `blobName@${blob.getName()}` } }; 245 | requests.push({ 246 | url, 247 | method: "post", 248 | payload: { metadata: Utilities.newBlob(JSON.stringify(metadata), "application/json"), file: blob }, 249 | ...(this.queryParameters.key ? {} : { headers: this.headers }), 250 | muteHttpExceptions: true 251 | }); 252 | } 253 | return this.requestUploadFiles_(requests, n); 254 | } else if (this.resumableUplaods.length > 0) { 255 | return this.resumableUplaods.map(e => this.uploadApp_(e)); 256 | } 257 | throw new Error("No upload items."); 258 | } 259 | 260 | /** 261 | * ### Description 262 | * Request upload file method. 263 | * 264 | * @private 265 | * @param {Object} requests Requests including parameters. 266 | * @param {Number} n Number of concurrent uploadto Gemini. 267 | * @returns {Object} Returned object from Gemini. 268 | */ 269 | requestUploadFiles_(requests, n) { 270 | console.log(`Total number of items: ${requests.length}`); 271 | const split = [...Array(Math.ceil(requests.length / n))].map((_) => requests.splice(0, n)); 272 | const uploadedFiles = split.flatMap((requests, i, a) => { 273 | console.log(`Upload process: ${i + 1}/${a.length} every ${n} items.`); 274 | return UrlFetchApp.fetchAll(requests).map(r => JSON.parse(r.getContentText())).reduce((ar, { file }) => { 275 | if (file) { 276 | ar.push(file); 277 | } 278 | return ar; 279 | }, []); 280 | }); 281 | return uploadedFiles; 282 | } 283 | 284 | /** 285 | * ### Description 286 | * Get file list in Gemini. 287 | * 288 | * @returns {Array} File list. 289 | */ 290 | getFileList() { 291 | const fileList = [] 292 | const q = { ...this.queryParameters, pageSize: 100 }; 293 | let pageToken = ""; 294 | do { 295 | q.pageToken = pageToken; 296 | const url = this.addQueryParameters_(this.urlGetFileList, q); 297 | const res = this.fetch_({ url, ...(this.queryParameters.key ? {} : { headers: this.headers }) }); 298 | const obj = JSON.parse(res.getContentText()); 299 | pageToken = obj.nextPageToken; 300 | const files = obj.files; 301 | if (files && files.length > 0) { 302 | fileList.push(...files); 303 | } 304 | } while (pageToken); 305 | this.fileList = fileList; 306 | return this.fileList; 307 | } 308 | 309 | /** 310 | * ### Description 311 | * Delete files from Gemini. 312 | * 313 | * @param {Array} names Array including names of the files on Gemini. 314 | * @param {Number} n Number of concurrent delete files. Default value is 50. 315 | * @returns {Array} Array including response values. When the delete is successed, no response is returned. 316 | */ 317 | deleteFiles(names, n = 50) { 318 | const requests = names.map(name => ({ 319 | url: `${this.urlDeleteFile}${name}` + (this.queryParameters.key ? `?key=${this.queryParameters.key}` : ""), 320 | method: "delete", 321 | ...(this.queryParameters.key ? {} : { headers: this.headers }), 322 | muteHttpExceptions: true 323 | })); 324 | if (requests.length == 0) return []; 325 | console.log(`${requests.length} items are deleted.`); 326 | const split = [...Array(Math.ceil(requests.length / n))].map((_) => requests.splice(0, n)); 327 | return split.flatMap(requests => UrlFetchApp.fetchAll(requests).map(r => JSON.parse(r.getContentText()))); 328 | } 329 | 330 | /** 331 | * ### Description 332 | * Main method. 333 | * 334 | * @param {Object} object Object using Gemini API. 335 | * @param {String} object.q Input text. 336 | * @returns {(String|Number|Array|Object|Boolean)} Output value. 337 | */ 338 | generateContent(object, retry = 5) { 339 | if (!object || typeof object != "object") { 340 | throw new Error("Please set object including question."); 341 | } 342 | let { q, jsonSchema, parts } = object; 343 | if ((!q || q === "") && (!jsonSchema || typeof jsonSchema != "object") && (!parts || !Array.isArray(parts))) { 344 | throw new Error("Please set a question."); 345 | } 346 | if ((!q || q === "") && (jsonSchema || typeof jsonSchema == "object") && !parts) { 347 | q = `Follow JSON schema.${JSON.stringify(jsonSchema)}`; 348 | } 349 | let uploadedFiles = this.fileList.length > 0 ? this.fileList : []; 350 | if (uploadedFiles.length > 0) { 351 | const n = uploadedFiles.reduce((n, o) => (n += o.files ? o.files.length : 1), 0); 352 | console.log(`${n} uploaded files are used with generateCotent.`); 353 | } 354 | const function_declarations = Object.keys(this.functions).flatMap((k) => 355 | k != "params_" 356 | ? { 357 | name: k, 358 | description: this.functions.params_[k].description, 359 | parameters: this.functions.params_[k]?.parameters, 360 | } 361 | : [] 362 | ); 363 | const files = uploadedFiles.flatMap(({ files, mimeType, uri, name }) => { 364 | if (files && Array.isArray(files)) { 365 | if (/^fileId@.*?\$page@.*\$maxPage@.*$/.test(files[0].displayName)) { 366 | name = files[0].displayName.split("$")[0].split("@")[1]; 367 | return [ 368 | { text: `[Filename of the following file is ${name}. Total pages are ${files.length}.]` }, 369 | ...files.flatMap(({ mimeType, uri }) => ({ fileData: { fileUri: uri, mimeType } })) 370 | ]; 371 | } else if (/^blobName@.*$/.test(files[0].displayName)) { 372 | name = files[0].displayName.split("@")[1]; 373 | return [ 374 | { text: `[Filename of the following file is ${name}. Total pages are 1.]` }, 375 | ...files.flatMap(({ mimeType, uri }) => ({ fileData: { fileUri: uri, mimeType } })) 376 | ]; 377 | } else { 378 | name = files[0].displayName; 379 | return [ 380 | { text: `[Filename of the following file is ${name}. Total pages are 1.]` }, 381 | ...files.flatMap(({ mimeType, uri }) => ({ fileData: { fileUri: uri, mimeType } })) 382 | ]; 383 | } 384 | } 385 | return [ 386 | { text: `[Filename of the following file is ${name}. Total pages are 1.]` }, 387 | { fileData: { fileUri: uri, mimeType } } 388 | ]; 389 | }); 390 | const contents = [...this.history] 391 | if (!q && !jsonSchema && parts) { 392 | contents.push({ parts: [...parts, ...files], role: "user" }); 393 | } else { 394 | contents.push({ parts: [{ text: q }, ...files], role: "user" }); 395 | } 396 | let check = true; 397 | let usageMetadataObj; 398 | const results = []; 399 | const url = this.addQueryParameters_(this.urlGenerateContent, this.queryParameters); 400 | do { 401 | retry--; 402 | const payload = { contents, tools: [{ function_declarations }] }; 403 | if (this.response_mime_type != "") { 404 | payload.generationConfig = { response_mime_type: this.response_mime_type }; 405 | } 406 | if (this.systemInstruction) { 407 | payload.systemInstruction = this.systemInstruction; 408 | } 409 | if (this.toolConfig) { 410 | payload.toolConfig = this.toolConfig; 411 | } 412 | if (this.doCountToken) { 413 | const res = this.fetch_({ 414 | url: this.addQueryParameters_(this.urlCountToken, this.queryParameters), 415 | method: "post", 416 | payload: JSON.stringify({ contents: payload.contents }), 417 | contentType: "application/json", 418 | ...(this.queryParameters.key ? {} : { headers: this.headers }), 419 | muteHttpExceptions: true, 420 | }, false); 421 | if (res.getResponseCode() != 200) { 422 | console.error(res.getContentText()); 423 | 424 | if (files && files.length > 0) { 425 | // I confirmed that this issue was resolved on Jun 2, 2024. 426 | // So, I believe that this warning will not be used. 427 | console.warn("In the current stage, when the uploaded files are used with countToken, an error like 'PERMISSION_DENIED'. So, at this time, the script is run as 'doCountToken: false'. I have already reported this. https://issuetracker.google.com/issues/343257597 I believe that this will be resolved in the future update."); 428 | } 429 | 430 | } else { 431 | console.log(res.getContentText()); 432 | } 433 | } 434 | const res = this.fetch_({ 435 | url, 436 | method: "post", 437 | payload: JSON.stringify(payload), 438 | contentType: "application/json", 439 | ...(this.queryParameters.key ? {} : { headers: this.headers }), 440 | muteHttpExceptions: true, 441 | }, false); 442 | if (res.getResponseCode() == 500 && retry > 0) { 443 | console.warn("Retry by the status code 500."); 444 | console.warn("If the error 500 is continued, please try 'const g = GeminiWithFiles_test.geminiWithFiles({ apiKey, functions: {} });' and 'const g = GeminiWithFiles_test.geminiWithFiles({ apiKey, response_mime_type: \"application/json\" });'."); 445 | console.warn(res.getContentText()); 446 | Utilities.sleep(3000); 447 | this.generateContent({ q, jsonSchema, parts }, retry); 448 | } else if (res.getResponseCode() != 200) { 449 | throw new Error(res.getContentText()); 450 | } 451 | const raw = JSON.parse(res.getContentText()); 452 | if (this.exportRawData) { 453 | results.push(raw); 454 | break; 455 | } 456 | const { candidates, usageMetadata } = raw; 457 | usageMetadataObj = { ...usageMetadata }; 458 | if (candidates && !candidates[0]?.content?.parts) { 459 | results.push(candidates[0]); 460 | break; 461 | } 462 | const partsAr = (candidates && candidates[0]?.content?.parts) || []; 463 | results.push(...partsAr); 464 | contents.push({ parts: partsAr.slice(), role: "model" }); 465 | check = partsAr.find((o) => o.hasOwnProperty("functionCall")); 466 | if (check && check.functionCall?.name) { 467 | const functionName = check.functionCall.name; 468 | const res2 = this.functions[functionName]( 469 | check.functionCall.args || null 470 | ); 471 | contents.push({ 472 | parts: [ 473 | { 474 | functionResponse: { 475 | name: functionName, 476 | response: { name: functionName, content: res2 }, 477 | }, 478 | }, 479 | ], 480 | role: "function", 481 | }); 482 | partsAr.push({ functionResponse: res2 }); 483 | results.push(...partsAr); 484 | this.history = contents; 485 | if (/^customType_.*/.test(functionName)) { 486 | if (res2.hasOwnProperty("items") && Object.keys(e).length == 1) { 487 | return res2.items; 488 | } else if (Array.isArray(res2) && res2.every(e => e.hasOwnProperty("items") && Object.keys(e).length == 1)) { 489 | return res2.map(e => e.items || e); 490 | } 491 | return res2; 492 | } 493 | } else { 494 | this.history = contents; 495 | } 496 | } while (check && retry > 0); 497 | if (this.exportRawData) { 498 | return results; 499 | } 500 | const output = results.pop(); 501 | if ( 502 | !output || 503 | (output.finishReason && 504 | ["OTHER", "RECITATION"].includes(output.finishReason)) 505 | ) { 506 | console.warn(output); 507 | return "No values."; 508 | } 509 | const returnValue = output.text.trim(); 510 | try { 511 | if (this.exportTotalTokens) { 512 | return { returnValue: JSON.parse(returnValue), usageMetadata: usageMetadataObj }; 513 | } 514 | return JSON.parse(returnValue); 515 | } catch (stack) { 516 | // console.warn(stack); 517 | if (this.exportTotalTokens) { 518 | return { returnValue, usageMetadata: usageMetadataObj }; 519 | } 520 | return returnValue; 521 | } 522 | } 523 | 524 | /** 525 | * ### Description 526 | * This method is used for adding the query parameters to the URL. 527 | * Ref: https://tanaikech.github.io/2018/07/12/adding-query-parameters-to-url-using-google-apps-script/ 528 | * 529 | * @private 530 | * @param {String} url The base URL for adding the query parameters. 531 | * @param {Object} obj JSON object including query parameters. 532 | * @return {String} URL including the query parameters. 533 | */ 534 | addQueryParameters_(url, obj) { 535 | return (url == "" ? "" : `${url}?`) + Object.entries(obj).flatMap(([k, v]) => Array.isArray(v) ? v.map(e => `${k}=${encodeURIComponent(e)}`) : `${k}=${encodeURIComponent(v)}`).join("&"); 536 | } 537 | 538 | /** 539 | * ### Description 540 | * Request Gemini API. 541 | * 542 | * @private 543 | * @param {Object} obj Object for using UrlFetchApp.fetchAll. 544 | * @returns {UrlFetchApp.HTTPResponse} Response from API. 545 | */ 546 | fetch_(obj, checkError = true) { 547 | obj.muteHttpExceptions = true; 548 | const res = UrlFetchApp.fetchAll([obj])[0]; 549 | if (checkError) { 550 | if (res.getResponseCode() != 200) { 551 | throw new Error(res.getContentText()); 552 | } 553 | } 554 | return res; 555 | } 556 | 557 | /** 558 | * ### Description 559 | * Upload large file to Gemini with resumable upload. 560 | ref: https://github.com/tanaikech/UploadApp 561 | ref: https://medium.com/google-cloud/uploading-large-files-to-gemini-with-google-apps-script-overcoming-50-mb-limit-6ea63204ee81 562 | * 563 | * @private 564 | * @param {Object} obj Object for using UrlFetchApp.fetchAll. 565 | * @returns {UrlFetchApp.HTTPResponse} Response from API. 566 | */ 567 | uploadApp_(object) { 568 | /** 569 | * ### Description 570 | * Upload a little large data with Google APIs. The target of this script is the data with several hundred MB. 571 | * GitHub: https://github.com/tanaikech/UploadApp 572 | * 573 | * Sample situation: 574 | * - Upload a file from Google Drive to Gemini, Google Drive, YouTube, and so on. 575 | * - Upload a file from the URL outside of Google to Gemini, Google Drive, YouTube, and so on. 576 | */ 577 | class UploadApp { 578 | 579 | /** 580 | * 581 | * @param {Object} object Information of the source data and the metadata of the destination. 582 | * @param {Object} object.source Information of the source data. 583 | * @param {Object} object.destination Information of the metadata of the destination. 584 | */ 585 | constructor(object = {}) { 586 | this.property = PropertiesService.getScriptProperties(); 587 | const next = this.property.getProperty("next"); 588 | if (!next && (!object.source || (!object.source.fileId && !object.source.url))) { 589 | throw new Error("Please set a valid object."); 590 | } else if (next) { 591 | this.tempObject = JSON.parse(next); 592 | this.current = this.tempObject.next; 593 | this.tempObject.next = 0; 594 | if (this.tempObject.result) { 595 | delete this.tempObject.result; 596 | } 597 | } else { 598 | this.current = 0; 599 | this.tempObject = { orgObject: { ...object } }; 600 | } 601 | if (this.tempObject.orgObject.source.fileId) { 602 | this.googleDrive = true; 603 | this.fileGet = `https://www.googleapis.com/drive/v3/files/${this.tempObject.orgObject.source.fileId}?supportsAllDrives=true`; 604 | this.downloadUrl = `${this.fileGet}&alt=media`; 605 | } else { 606 | this.googleDrive = false; 607 | this.downloadUrl = this.tempObject.orgObject.source.url; 608 | } 609 | this.startTime = Date.now(); 610 | this.limitProcessTime = 300 * 1000; // seconds 611 | this.authorization = `Bearer ${this.tempObject.orgObject.accessToken || ScriptApp.getOAuthToken()}`; 612 | this.chunkSize = 16777216; // Chunk size is 16 MB. 613 | } 614 | 615 | /** 616 | * ### Description 617 | * Main method. 618 | * 619 | * @returns {Object} Response value. When the file could be completly uploaded, the file metadata of the uploaded file is returned. When the file is not be completly uploaded, an object including message. 620 | */ 621 | run() { 622 | if (this.current == 0) { 623 | console.log("Get metadata"); 624 | this.getMetadata_(); 625 | console.log("Calculate chunks"); 626 | this.getChunks_(); 627 | console.log("Get location"); 628 | this.getLocation_(); 629 | } 630 | console.log("Download and upload data."); 631 | this.downloadAndUpload_(); 632 | return this.tempObject.result; 633 | } 634 | 635 | /** 636 | * ### Description 637 | * Get metadata of the source data. 638 | * 639 | * @return {void} 640 | * @private 641 | */ 642 | getMetadata_() { 643 | if (this.googleDrive) { 644 | const res = UrlFetchApp.fetch(`${this.fileGet}&fields=mimeType%2Csize`, { headers: { authorization: this.authorization } }); 645 | const obj = JSON.parse(res.getContentText()); 646 | if (obj.mimeType.includes("application/vnd.google-apps")) { 647 | throw new Error("This script cannot be used to the files related to Google. For example, Google Doc, Google Sheet, and so on."); 648 | } 649 | this.tempObject.orgObject.source.mimeType = obj.mimeType; 650 | this.tempObject.orgObject.source.size = obj.size; 651 | return; 652 | } 653 | const res = UrlFetchApp.fetch(this.downloadUrl, { 654 | muteHttpExceptions: true, 655 | headers: { Range: "bytes=0-1" } 656 | }); 657 | if (res.getResponseCode() != 206) { 658 | throw new Error("This file cannot be done the resumable download."); 659 | } 660 | const headers = res.getHeaders(); 661 | const range = headers["Content-Range"].split("\/"); 662 | this.tempObject.orgObject.source.fileName = (headers["Content-Disposition"] && headers["Content-Disposition"].match(/filename=\"([a-zA-Z0-9\s\S].+)\";/)) ? headers["Content-Disposition"].match(/filename=\"([a-zA-Z0-9\s\S].+)\";/)[1].trim() : this.startTime.toString(); 663 | this.tempObject.orgObject.source.mimeType = headers["Content-Type"].split(";")[0]; 664 | this.tempObject.orgObject.source.size = Number(range[1]); 665 | } 666 | 667 | /** 668 | * ### Description 669 | * Calculate the chunks for uploading. 670 | * 671 | * @return {void} 672 | * @private 673 | */ 674 | getChunks_() { 675 | const chunks = [...Array(Math.ceil(this.tempObject.orgObject.source.size / this.chunkSize))].map((_, i, a) => [ 676 | i * this.chunkSize, 677 | i == a.length - 1 ? this.tempObject.orgObject.source.size - 1 : (i + 1) * this.chunkSize - 1, 678 | ]); 679 | this.tempObject.chunks = chunks; 680 | } 681 | 682 | /** 683 | * ### Description 684 | * Get location URL for uploading. 685 | * 686 | * @return {void} 687 | * @private 688 | */ 689 | getLocation_() { 690 | const options = { 691 | payload: JSON.stringify(this.tempObject.orgObject.destination.metadata), 692 | contentType: "application/json", 693 | muteHttpExceptions: true, 694 | }; 695 | const q = this.parseQueryParameters_(this.tempObject.orgObject.destination.uploadUrl); 696 | if (!q.queryParameters.uploadType) { 697 | throw new Error("Please confirm whether your endpoint can be used for the resumable upload. And, please include uploadType=resumable in uploadUrl."); 698 | } 699 | if (!q.queryParameters.key) { 700 | options.headers = { authorization: this.authorization }; 701 | } 702 | const res = UrlFetchApp.fetch(this.tempObject.orgObject.destination.uploadUrl, options); 703 | if (res.getResponseCode() != 200) { 704 | throw new Error(res.getContentText()); 705 | } 706 | this.tempObject.location = res.getAllHeaders()["Location"]; 707 | } 708 | 709 | /** 710 | * ### Description 711 | * Download and upload data. 712 | * 713 | * @return {void} 714 | * @private 715 | */ 716 | downloadAndUpload_() { 717 | let res1 = []; 718 | const len = this.tempObject.chunks.length; 719 | for (let i = this.current; i < len; i++) { 720 | const e = this.tempObject.chunks[i]; 721 | const currentBytes = `${e[0]}-${e[1]}`; 722 | console.log(`Now... ${i + 1}/${len}`); 723 | const params1 = { headers: { range: `bytes=${currentBytes}` }, muteHttpExceptions: true }; 724 | if (this.googleDrive) { 725 | params1.headers.authorization = this.authorization; 726 | } 727 | console.log(`Start downloading data with ${currentBytes}`); 728 | res1 = UrlFetchApp.fetch(this.downloadUrl, params1).getContent(); 729 | console.log(`Finished downloading data with ${currentBytes}`); 730 | const params2 = { 731 | headers: { "Content-Range": `bytes ${currentBytes}/${this.tempObject.orgObject.source.size}` }, 732 | payload: res1, 733 | muteHttpExceptions: true, 734 | }; 735 | console.log(`Start uploading data with ${currentBytes}`); 736 | const res2 = UrlFetchApp.fetch(this.tempObject.location, params2); 737 | console.log(`Finished uploading data with ${currentBytes}`); 738 | const statusCode = res2.getResponseCode(); 739 | if (statusCode == 200) { 740 | console.log("Done."); 741 | this.tempObject.result = JSON.parse(res2.getContentText()); 742 | } else if (statusCode == 308) { 743 | console.log("Upload the next chunk."); 744 | res1.splice(0, res1.length); 745 | } else { 746 | throw new Error(res2.getContentText()); 747 | } 748 | if ((Date.now() - this.startTime) > this.limitProcessTime) { 749 | this.tempObject.next = i + 1; 750 | this.property.setProperty("next", JSON.stringify(this.tempObject)); 751 | break; 752 | } 753 | } 754 | if (this.tempObject.next > 0 && !this.tempObject.result) { 755 | const message = "There is the next upload chunk. So, please run the script again."; 756 | console.warn(message); 757 | this.tempObject.result = { message }; 758 | } else { 759 | this.property.deleteProperty("next"); 760 | } 761 | } 762 | 763 | /** 764 | * ### Description 765 | * Parse query parameters. 766 | * ref: https://github.com/tanaikech/UtlApp?tab=readme-ov-file#parsequeryparameters 767 | * 768 | * @param {String} url URL including the query parameters. 769 | * @return {Array} Array including the parsed query parameters. 770 | * @private 771 | */ 772 | parseQueryParameters_(url) { 773 | if (url === null || typeof url != "string") { 774 | throw new Error("Please give URL (String) including the query parameters."); 775 | } 776 | const s = url.split("?"); 777 | if (s.length == 1) { 778 | return { url: s[0], queryParameters: null }; 779 | } 780 | const [baseUrl, query] = s; 781 | if (query) { 782 | const queryParameters = query.split("&").reduce(function (o, e) { 783 | const temp = e.split("="); 784 | const key = temp[0].trim(); 785 | let value = temp[1].trim(); 786 | value = isNaN(value) ? value : Number(value); 787 | if (o[key]) { 788 | o[key].push(value); 789 | } else { 790 | o[key] = [value]; 791 | } 792 | return o; 793 | }, {}); 794 | return { url: baseUrl, queryParameters }; 795 | } 796 | return null; 797 | } 798 | } 799 | 800 | const { url, fileId } = object; 801 | const displayName = url ? `url@${url}$page@${1}$maxPage@1` : `fileId@${fileId}$page@${1}$maxPage@1`; 802 | const obj = { 803 | destination: { 804 | uploadUrl: `https://generativelanguage.googleapis.com/upload/v1beta/files?uploadType=resumable&key=${this.queryParameters.key}`, 805 | metadata: { file: { displayName } } 806 | }, 807 | accessToken: this.accessToken, 808 | }; 809 | if (url) { 810 | obj.source = { url }; 811 | } else if (fileId) { 812 | obj.source = { fileId }; 813 | } else { 814 | throw new Error("No URL or file ID."); 815 | } 816 | const { file } = new UploadApp(obj).run(); 817 | return file; 818 | } 819 | 820 | } 821 | -------------------------------------------------------------------------------- /InvoiceManager.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Class object for Unlock Smart Invoice Management: Gemini, Gmail, and Google Apps Script Integration 3 | */ 4 | class InvoiceManager { 5 | 6 | /** 7 | * 8 | * @param {Object} object API key or access token for using Gemini API. 9 | * @param {String} object.apiKey API key. 10 | * @param {String} object.accessToken Access token. 11 | * @param {String} object.model Model. Default is "models/gemini-1.5-pro-latest". 12 | * @param {String} object.version Version of API. Default is "v1beta". 13 | */ 14 | constructor(object = null) { 15 | /** @private */ 16 | this.object = object; 17 | 18 | /** @private */ 19 | this.apiKey = null; 20 | 21 | /** @private */ 22 | this.useAccessToken = false; 23 | 24 | /** @private */ 25 | this.model = "models/gemini-1.5-flash-latest"; 26 | 27 | /** @private */ 28 | this.version = "v1beta"; 29 | 30 | /** @private */ 31 | this.labelName = null; 32 | 33 | /** @private */ 34 | this.cycleMinTimeDrivenTrigger = 10; 35 | 36 | /** @private */ 37 | this.extraTime = this.cycleMinTimeDrivenTrigger * 2; // In the current stage, this value is set as 2 times this.cycleMinTimeDrivenTrigger. For example, when the script is run by the time-driven trigger with a cycle of 10 minutes, this script retrieved the emails from 30 minutes before. By this, even when the script was finished by an error, you have 2 chances for retrying. 38 | 39 | /** @private */ 40 | this.mainFunctionName = "main"; 41 | 42 | /** @private */ 43 | this.configurationSheetName = "configuration"; 44 | 45 | /** @private */ 46 | this.jsonSchemaSheetName = "jsonSchema"; 47 | 48 | /** @private */ 49 | this.notifyModificationpointsToSender = false; 50 | 51 | /** @private */ 52 | this.logSheetName = "log"; 53 | 54 | /** @private */ 55 | this.accessToken = null; 56 | 57 | /** @private */ 58 | this.dashboardSheet = null; 59 | 60 | /** @private */ 61 | this.logSheet = null; 62 | 63 | /** @private */ 64 | this.jsonSchemaSheet = null; 65 | 66 | /** @private */ 67 | this.keys = ["apiKey", "useAccessToken", "model", "version", "labelName", "cycleMinTimeDrivenTrigger", "extraTime", "mainFunctionName", "notifyModificationpointsToSender"]; 68 | 69 | /** @private */ 70 | this.now = new Date(); 71 | 72 | /** @private */ 73 | this.waitTime = 5; // seconds 74 | 75 | /** @private */ 76 | this.rowColors = { doneRows: "#d9ead3", invalidRows: "#f4cccc", unrelatedRows: "#d9d9d9" }; 77 | 78 | /** @private */ 79 | this.jsonSchema = {}; 80 | 81 | } 82 | 83 | /** 84 | * ### Description 85 | * Main method. 86 | * 87 | * @return {void} 88 | */ 89 | run() { 90 | this.setTimeDrivenTriggers_(); 91 | this.getSheets_(); 92 | if (this.object && Object.keys(this.object).length > 0) { 93 | this.keys.forEach(k => { 94 | if (this.object[k]) { 95 | this[k] = this.object[k]; 96 | } 97 | }); 98 | } else { 99 | this.getInitParams_(); 100 | } 101 | let messages = this.getEmailsWithInvoices_(); 102 | const coloredRows = { 103 | doneRows: [], 104 | invalidRows: [], 105 | unrelatedRows: [], 106 | }; 107 | const values = []; 108 | for (let i = 0; i < messages.length; i++) { 109 | const { threadId, messageId, pdfFiles, searchUrl, sender, subject, messageObj } = messages[i]; 110 | const pdfFilesLen = pdfFiles.length; 111 | for (let j = 0; j < pdfFilesLen; j++) { 112 | const blob = pdfFiles[j]; 113 | const o = this.parseInvoiceByGemini_(blob); 114 | const prefix = [this.now, threadId, messageId, searchUrl, sender, subject]; 115 | if (o.check.invoice == true && o.check.invalidCheck == false) { 116 | values.push([...prefix, true, true, null, JSON.stringify(o), null]); 117 | coloredRows.doneRows.push(values.length - 1); 118 | } else if (o.check.invoice == true && o.check.invalidCheck == true) { 119 | if (this.notifyModificationpointsToSender == true) { 120 | const msg = `This message is automatically sent from a script for checking your invoice by Gemini.\nNow, Gemini suggested the modification points in your invoice. Please confirm the following modification points and send the modified invoice again.\n\nModification points:\n${o.check.invalidPoints}`; 121 | messageObj.reply(msg); 122 | } 123 | values.push([...prefix, true, false, o.check.invalidPoints, JSON.stringify(o), null]); 124 | coloredRows.invalidRows.push(values.length - 1); 125 | } else if (o.check.invoice == false) { 126 | values.push([...prefix, false, null, null, null, null]); 127 | coloredRows.unrelatedRows.push(values.length - 1); 128 | } else { 129 | values.push([...prefix, null, null, null, null, JSON.stringify(o)]); 130 | } 131 | if (pdfFilesLen >= 2) { 132 | Utilities.sleep(this.waitTime * 1000); 133 | } 134 | } 135 | } 136 | const valuesLen = values.length; 137 | let msg = ""; 138 | if (valuesLen > 0) { 139 | const offset = this.logSheet.getLastRow() + 1; 140 | this.logSheet.getRange(offset, 1, values.length, values[0].length).setValues(values); 141 | ["doneRows", "invalidRows", "unrelatedRows"].forEach(k => { 142 | if (coloredRows[k].length > 0) { 143 | this.logSheet.getRangeList(coloredRows[k].map(e => `${e + offset}:${e + offset}`)).setBackground(this.rowColors[k]); 144 | } 145 | }); 146 | msg = `${valuesLen} emails were processed.`; 147 | this.logSheet.activate(); 148 | } else { 149 | msg = "No emails were processed."; 150 | } 151 | this.showLog_(msg); 152 | } 153 | 154 | /** 155 | * ### Description 156 | * Get work sheets. 157 | * 158 | * @private 159 | */ 160 | getSheets_() { 161 | // In the current stage, the sheet names of "configuration" and "log" are fixed. 162 | const ss = SpreadsheetApp.getActiveSpreadsheet(); 163 | this.dashboardSheet = ss.getSheetByName(this.configurationSheetName) || ss.insertSheet(this.configurationSheetName); 164 | this.logSheet = ss.getSheetByName(this.logSheetName) || ss.insertSheet(this.logSheetName); 165 | this.jsonSchemaSheet = ss.getSheetByName(this.jsonSchemaSheetName) || ss.insertSheet(this.jsonSchemaSheetName); 166 | if (!this.dashboardSheet || !this.logSheet) { 167 | this.showError_("Sheet names are changed from the default names of 'dashboard' and 'log'. Please confirm them."); 168 | } 169 | } 170 | 171 | /** 172 | * ### Description 173 | * Delete time-driven triggers. 174 | */ 175 | deleteTimeDrivenTriggers() { 176 | ScriptApp.getProjectTriggers().forEach(t => { 177 | if (t.getHandlerFunction() == this.mainFunctionName) { 178 | ScriptApp.deleteTrigger(t); 179 | } 180 | }); 181 | } 182 | 183 | /** 184 | * ### Description 185 | * Set time-driven triggers. 186 | * 187 | * @private 188 | */ 189 | setTimeDrivenTriggers_() { 190 | this.deleteTimeDrivenTriggers(); 191 | ScriptApp.newTrigger(this.mainFunctionName).timeBased().everyMinutes(this.cycleMinTimeDrivenTrigger).create(); 192 | } 193 | 194 | /** 195 | * ### Description 196 | * Get user's values. 197 | * 198 | * @private 199 | */ 200 | getInitParams_() { 201 | // Dashboard sheet has 2 header rows. 202 | const v = this.dashboardSheet.getDataRange().getValues(); 203 | if (v.join("") == "") { 204 | const defaultValues = [ 205 | ["Configuration", "", ""], 206 | ["Names of values", "Your values", "Descriptions"], 207 | ["apiKey", "", "This API key is used for requesting Gemini API."], 208 | ["useAccessToken", false, "Default is FALSE. If you want to use your access token, please set this as TRUE. At that time, The access token is retrieved by ScriptApp.getOAuthToken(). When you use this value as TRUE, the API key is not used."], 209 | ["model", "models/gemini-1.5-flash-latest", "This value is the model name to use for generating content. Default is \"models/gemini-1.5-flash-latest\"."], 210 | ["version", "v1beta", "This value is the version of Gemini API. Default is v1beta."], 211 | ["labelName", "", "This label name on Gmail is used for searching the emails of invoices. If you have no label, please set empty. By this, this application retrieves the emails by searching a word \"invoice\" in the email."], 212 | ["cycleMinTimeDrivenTrigger", 10, "Unit is minutes. The default is 10 minutes. This value is used for executing the script for managing the invoices of emails by the time-driven trigger. Please select one of 5, 10, 15, or 30 from the dropdown list."], 213 | ["mainFunctionName", "main", "This value is the name of main function. Default is \"main\"."], 214 | ["notifyModificationpointsToSender", false, "Default is false. When this value is true, when the invoice has modification points, an email including them is automatically sent as a reply mail."] 215 | ]; 216 | this.dashboardSheet.getRange(1, 1, defaultValues.length, defaultValues[0].length).setValues(defaultValues); 217 | } 218 | const [, , ...values] = v; 219 | values.forEach(([a, b]) => { 220 | const ta = a.trim(); 221 | const tb = typeof b == "string" ? b.trim() : b; 222 | if (this.keys.includes(ta)) { 223 | this[ta] = tb ?? this[ta]; 224 | } 225 | }); 226 | const jsonSchemaRange = this.jsonSchemaSheet.getRange("A1"); 227 | const jsonSchema = jsonSchemaRange.getDisplayValue(); 228 | if (!jsonSchema) { 229 | this.jsonSchema = { 230 | description: "About the invoices of the following files, check carefully, and create an array including an object that parses the following images of the invoices by pointing out the detailed improvement points in the invoice. Confirm by calculating 3 times whether the total amount of the invoice is correct. Furthermore, confirm whether the name, address, phone number, and the required fields are written in the invoice.", 231 | type: "object", 232 | properties: { 233 | check: { 234 | description: "Point out the improvement points in the invoice. Return the detailed imprivement points like details of invalid, insufficient, wrong, and miscalculated parts. Here, ignore the calculation of tax.", 235 | type: "object", 236 | properties: { 237 | invoice: { 238 | description: "If the file is an invoice, it's true. If the file is not an invoice, it's false.", 239 | type: "boolean", 240 | }, 241 | invalidCheck: { 242 | description: "Details of invalid, insufficient, wrong, and miscalculated points of the invoice. When no issue was found, this should be false. When issues were found, this should be true.", 243 | type: "boolean" 244 | }, 245 | invalidPoints: { 246 | description: "Details of invalid, insufficient, wrong, and miscalculated points of the invoice. When no issue was found, this should be no value.", 247 | type: "string" 248 | } 249 | }, 250 | required: ["invoice", "invalidCheck"], 251 | additionalProperties: false, 252 | }, 253 | parse: { 254 | description: "Create an object parsed the invoice.", 255 | type: "object", 256 | properties: { 257 | name: { description: "Name given as 'Filename'", type: "string" }, 258 | invoiceTitle: { description: "Title of invoice", type: "string" }, 259 | invoiceDate: { description: "Date of invoice", type: "string" }, 260 | invoiceNumber: { description: "Number of the invoice", type: "string" }, 261 | invoiceDestinationName: { description: "Name of destination of invoice", type: "string" }, 262 | invoiceDestinationAddress: { description: "Address of the destination of invoice", type: "string" }, 263 | totalCost: { description: "Total cost of all costs", type: "string" }, 264 | table: { 265 | description: "Table of the invoice. This is a 2-dimensional array. Add the first header row to the table in the 2-dimensional array. The column should be 'title or description of item', 'number of items', 'unit cost', 'total cost'", 266 | type: "array", 267 | }, 268 | }, 269 | required: [ 270 | "name", 271 | "invoiceTitle", 272 | "invoiceDate", 273 | "invoiceNumber", 274 | "invoiceDestinationName", 275 | "invoiceDestinationAddress", 276 | "totalCost", 277 | "table", 278 | ], 279 | additionalProperties: false, 280 | } 281 | }, 282 | required: [ 283 | "check", 284 | "parse", 285 | ], 286 | additionalProperties: false, 287 | }; 288 | } else { 289 | try { 290 | this.jsonSchema = JSON.parse(jsonSchema); 291 | } catch ({ stack }) { 292 | this.showError_(stack); 293 | } 294 | } 295 | if (this.useAccessToken === true) { 296 | this.accessToken = ScriptApp.getOAuthToken(); 297 | } 298 | } 299 | 300 | /** 301 | * ### Description 302 | * Get log from the log sheet. 303 | * 304 | * @returns {Array} Log. 305 | * @private 306 | */ 307 | getLog_() { 308 | let [head, ...values] = this.logSheet.getDataRange().getValues(); 309 | if (head.join("") == "") { 310 | head = ["date", "threadId", "messageId", "searchUrl", "sender", "subject", "hasInvoice", "isValidInvoice", "modificationPoints", "parsedInvoice", "notes"]; 311 | this.logSheet.getRange(1, 1, 1, head.length).setValues([head]); 312 | } 313 | return values.map(r => head.reduce((o, h, j) => (o[h] = r[j], o), {})); 314 | } 315 | 316 | /** 317 | * ### Description 318 | * Get emails including the invoices as PDF files from Gmail. 319 | * 320 | * @returns {Array} Retrieved messages including PDF files. 321 | * @private 322 | */ 323 | getEmailsWithInvoices_() { 324 | const processedMessageIds = this.getLog_().map(({ messageId }) => messageId); 325 | const now = this.now.getTime(); 326 | const after = (now - ((this.cycleMinTimeDrivenTrigger + this.extraTime) * 60 * 1000)).toString(); 327 | let searchQuery = `after:${after.slice(0, after.length - 3)} has:attachment`; 328 | if (this.labelName != "") { 329 | searchQuery += ` label:invoices`; 330 | } else { 331 | searchQuery += ` label:INBOX`; 332 | } 333 | const threads = GmailApp.search(searchQuery); 334 | const messages = threads.reduce((ar, t) => { 335 | const threadId = t.getId(); 336 | t.getMessages().forEach(m => { 337 | const files = m.getAttachments(); 338 | if (files.length > 0) { 339 | const pdfFiles = files.filter(f => f.getContentType() == MimeType.PDF).map(a => Utilities.newBlob(a.getBytes(), a.getContentType(), a.getName)); 340 | if (pdfFiles.length > 0) { 341 | const messageId = m.getId(); 342 | if (!processedMessageIds.includes(messageId)) { 343 | const sender = m.getFrom(); 344 | const subject = m.getSubject(); 345 | const searchUrl = `https://mail.google.com/mail/#search/rfc822msgid:${encodeURIComponent(m.getHeader("Message-ID"))}`; 346 | ar.push({ threadId, messageId, pdfFiles, searchUrl, sender, subject, messageObj: m }); 347 | } 348 | } 349 | } 350 | }); 351 | return ar; 352 | }, []); 353 | return messages; 354 | } 355 | 356 | /** 357 | * ### Description 358 | * Generate content from PDF blob of invoice. 359 | * This method generates content by Gemini API with my Google Apps Script library [GeminiWithFiles](https://github.com/tanaikech/GeminiWithFiles). 360 | * 361 | * @param {Blob} blob PDF blob of invoice. 362 | * @returns {object} Generated content as a JSON object. 363 | * @private 364 | */ 365 | parseInvoiceByGemini_(blob) { 366 | try { 367 | const tempObj = { model: this.model, version: this.version, response_mime_type: "application/json" }; 368 | if (this.accessToken) { 369 | tempObj.accessToken = this.accessToken; 370 | } else if (this.apiKey) { 371 | tempObj.apiKey = this.apiKey; 372 | } else { 373 | showError_("Please set your API key for using Gemini API."); 374 | } 375 | const g = new GeminiWithFiles(tempObj); 376 | 377 | // On August 3, 2024, I updated GeminiWithFiles (https://github.com/tanaikech/GeminiWithFiles). 378 | // By this, PDF data can be directly used with Gemini API without async/await. 379 | const fileList = g.setBlobs([blob]).uploadFiles(); 380 | 381 | const res = g.withUploadedFilesByGenerateContent(fileList).generateContent({ jsonSchema: this.jsonSchema }); 382 | g.deleteFiles(fileList.map(({ name }) => name)); 383 | return res; 384 | } catch ({ stack }) { 385 | this.showError_(stack); 386 | } 387 | } 388 | 389 | /** 390 | * ### Description 391 | * Show message as a log. 392 | * 393 | * @param {string} msg Message. 394 | * 395 | * @private 396 | */ 397 | showLog_(msg) { 398 | console.log(msg); 399 | Browser.msgBox(msg); 400 | } 401 | 402 | /** 403 | * ### Description 404 | * Show error message. 405 | * 406 | * @param {string} msg Error message. 407 | * 408 | * @private 409 | */ 410 | showError_(msg) { 411 | console.log(msg); 412 | Browser.msgBox(msg); 413 | throw new Error(msg); 414 | } 415 | } 416 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2024 Kanshi TANAIKE 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UnlockSmartInvoiceManagementWithGeminiAPI 2 | 3 | ![](images/fig1.jpg) 4 | 5 | # Overview 6 | 7 | This is an application for "Unlock Smart Invoice Management: Gemini, Gmail, and Google Apps Script Integration". 8 | 9 | # Abstract 10 | 11 | This report describes an invoice processing application built with Google Apps Script. It leverages Gemini, a large language model, to automatically parse invoices received as email attachments and automates the process using time-driven triggers. 12 | 13 | # Introduction 14 | 15 | The emergence of large language models (LLMs) like ChatGPT and Gemini has significantly impacted various aspects of our daily lives. One such example is their ability to automate tasks previously requiring manual effort. In my case, Gemini has streamlined the processing of invoices I receive as email attachments in PDF format. 16 | 17 | Before Gemini, I manually reviewed each invoice, extracting crucial information. However, with Gemini's release, this tedious process transformed. As previously reported, Gemini can automatically parse invoices. [Ref](https://medium.com/google-cloud/parsing-invoices-using-gemini-1-5-api-with-google-apps-script-1f32af1678f2) Furthermore, my subsequent reports explored the ability to: 18 | 19 | - Return parsed invoice data in JSON format [Ref](https://medium.com/google-cloud/taming-the-wild-output-effective-control-of-gemini-api-response-formats-with-response-mime-type-da273c08be85) [Ref](https://medium.com/google-cloud/taming-the-wild-output-effective-control-of-gemini-api-response-formats-with-response-schema-ae0097b97502) 20 | - Upload files to Gemini for content generation, as demonstrated through my creation of a dedicated library [Ref](https://github.com/tanaikech/GeminiWithFiles) 21 | 22 | These advancements allowed me to expand a simple invoice parsing script into a full-fledged application. This report details an application built using Google Apps Script. The application retrieves emails containing invoices from Gmail, utilizes the Gemini API to parse the extracted invoices, and leverages time-driven triggers for automatic execution. This approach ensures seamless processing of all invoice-related emails. 23 | 24 | # Repository of this application 25 | 26 | [https://github.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI](https://github.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI) 27 | 28 | # Why Google Apps Script? 29 | 30 | I chose Google Apps Script to create this application for several reasons: 31 | 32 | - Ease of Use: Google Apps Script is a low-code platform, making it accessible to users with no coding experience. 33 | - Seamless Integration: It integrates effortlessly with Gmail, Google Docs (including Docs, Sheets, Slides, and more), and various Google APIs. 34 | - Cloud-Based Automation: As a cloud-based scripting language, Google Apps Script can be triggered to run by time-based triggers.- Personal opinion: I would like to introduce the advantages of Google Apps Script. 35 | 36 | # Origin for constructing this application 37 | 38 | The origin for constructing this application is as follows. 39 | 40 | - Apr 3, 2024: [Parsing Invoices using Gemini 1.5 API with Google Apps Script](https://medium.com/google-cloud/parsing-invoices-using-gemini-1-5-api-with-google-apps-script-1f32af1678f2) 41 | - Apr 10, 2024: [Specifying Output Types for Gemini API with Google Apps Script](https://medium.com/google-cloud/specifying-output-types-for-gemini-api-with-google-apps-script-c2f6a753c8d7) 42 | - Apr 26, 2024: [GeminiWithFiles of a Google Apps Script library](https://github.com/tanaikech/GeminiWithFiles) 43 | - May 1, 2024: [Taming the Wild Output: Effective Control of Gemini API Response Formats with response_mime_type](https://medium.com/google-cloud/taming-the-wild-output-effective-control-of-gemini-api-response-formats-with-response-mime-type-da273c08be85) 44 | - May 21, 2024: [Taming the Wild Output: Effective Control of Gemini API Response Formats with response_schema](https://medium.com/google-cloud/taming-the-wild-output-effective-control-of-gemini-api-response-formats-with-response-schema-ae0097b97502) 45 | 46 | This application was created by integrating those sources. 47 | 48 | ## Comments: 49 | 50 | Before the `response_mime_type` property was released, I used function calls to control the output format. However, after the release of `response_mime_type`, it became the preferred method for controlling the output format. I also tested using the `response_schema` property when it was released, but found that the combination of `response_mime_type` and a JSON schema in the prompt provides the most control over the output format. As a result, this application parses invoices using the combination of `response_mime_type` and a JSON schema in the prompt. 51 | 52 | # Usage 53 | 54 | ## 1. Copy Google Spreadsheet 55 | 56 | Please copy a Google Spreadsheet including this application to your Google Drive. So, please access the following URL. 57 | 58 | [https://docs.google.com/spreadsheets/d/17UjnOcz8vyrpDI6pNhms4d2ZXAy6aWTYgmTkvZcuqaA/copy](https://docs.google.com/spreadsheets/d/17UjnOcz8vyrpDI6pNhms4d2ZXAy6aWTYgmTkvZcuqaA/copy) 59 | 60 | When you open the script editor of this Spreadsheet, you can see the following script files. 61 | 62 | - `main.gs`: This includes the main methods. 63 | - `InvoiceManager.gs`: This includes the main class object. 64 | - `GeminiWithFiles.gs`: This is from [GeminiWithFiles (Author: me)](https://github.com/tanaikech/GeminiWithFiles). 65 | - `PDFApp.gs`: This is from [PDFApp (Author: me)](https://github.com/tanaikech/PDFApp). 66 | 67 | ## 2. Create an API key 68 | 69 | Please access [https://ai.google.dev/gemini-api/docs/api-key](https://ai.google.dev/gemini-api/docs/api-key) and create your API key. At that time, please enable Generative Language API at the API console. This API key is used for this sample script. 70 | 71 | This official document can be also seen. [Ref](https://ai.google.dev/). 72 | 73 | Of course, if you can link the Google Cloud Platform Project to the Google Apps Script Project in the copied Spreadsheet, you can also use the access token. 74 | 75 | ## 3. Setup 76 | 77 | Open the "configuration" sheet and enter your API key. If you want to use the access token, leave the API key field blank and set useAccessToken to TRUE. You can also set other parameters as needed. 78 | 79 | ![](images/fig2.png) 80 | 81 | By default, this application checks emails in the "INBOX" label that include invoices. To search emails from specific labels, set the label containing invoices to labelName. The script will then search for emails with that label. 82 | 83 | From v1.0.1, in order to easily customize the value of "jsonSchema" for generating content with Gemini API, I added it as a new sheet of "jsonSchema" sheet in the Spreadsheet. When you customize it, you can edit the cell "A1" of the "jsonSchema" sheet. By this, the script generates content with Gemini API using your customized JSON schema. The cell "A2" is the number of characters of "A1". 84 | 85 | ## 4. Testing 86 | 87 | Clicking the "START" button on the "Configuration" sheet runs the application's script. However, you might see "No emails were processed" in the dialog even after running the script. In this case, to test the application, please send an email to your account that includes an invoice as a PDF attachment. Then, click the "START" button again. 88 | 89 | The script execution triggers a time-driven trigger, which automatically runs the application based on the `cycleMinTimeDrivenTrigger` interval. 90 | 91 | ### Sample invoice 1 92 | 93 | ![](images/fig3.png) 94 | 95 | This sample invoice is from [here](https://create.microsoft.com/en-us/template/service-invoice-with-tax-calculations-9330a1fe-20ae-4590-ac01-54c53ed1f3ba). When this application parses this invoice, the following JSON is returned. 96 | 97 | ``` 98 | { 99 | "check": { 100 | "invoice": true, 101 | "invalidCheck": false, 102 | "invalidPoints": null 103 | }, 104 | "parse": { 105 | "name": "invoice1.png", 106 | "invoiceTitle": "INVOICE", 107 | "invoiceDate": "6月 4, 2024", 108 | "invoiceNumber": "100", 109 | "invoiceDestinationName": "Nazar Neill", 110 | "invoiceDestinationAddress": "Downtown Pets\n123 South Street\nManhattan, NY 15161", 111 | "totalCost": "$4350", 112 | "table": [ 113 | ["DESCRIPTION", "HOURS", "RATE", "AMOUNT"], 114 | ["Pour cement foundation", "4.00", "$150.00", "$600"], 115 | ["Framing and drywall", "16.00", "$150.00", "$2400"], 116 | ["Tiling and flooring install", "9.00", "$150.00", "$1350"] 117 | ] 118 | } 119 | } 120 | ``` 121 | 122 | When you see this JSON, you can see that the values `invalidCheck` and `invalidPoints` are `false` and `null`, respectively. From this, it is found that this invoice has no issues. 123 | 124 | ### Sample invoice 2 125 | 126 | ![](images/fig4.png) 127 | 128 | This sample invoice is from [here](https://create.microsoft.com/en-us/template/simple-invoice-7c5c0318-8bc0-4ec9-8be3-2683dbf8adae). When this application parses this invoice, the following JSON is returned. 129 | 130 | ``` 131 | { 132 | "check": { 133 | "invoice": true, 134 | "invalidCheck": true, 135 | "invalidPoints": "The total amount calculated is incorrect. The total amount should be a summation of prices, but it's not. In addition, the table has an item without item number and description." 136 | }, 137 | "parse": { 138 | "name": "invoice2.png", 139 | "invoiceTitle": "INVOICE", 140 | "invoiceDate": "2024/6/7", 141 | "invoiceNumber": "10654", 142 | "invoiceDestinationName": null, 143 | "invoiceDestinationAddress": "123 Avenue A,\nBurbank, CA 56789", 144 | "totalCost": "$1,077.25", 145 | "table": [ 146 | ["Item #", "Description", "Qty", "Unit price", "Discount", "Price"], 147 | ["A875", "Peonies", "35", "$1.05", null, "$36.75"], 148 | ["K245", "Tulips", "25", "$2.00", null, "$50.00"], 149 | ["U123", "Buttercup", "30", "$1.35", null, "$40.50"], 150 | ["sample", "sample item", "10", "$10.00", null, "$1,000.00"] 151 | ] 152 | } 153 | } 154 | ``` 155 | 156 | When you see this JSON, you can see that the values `invalidCheck` and `invalidPoints` are `true` and `The total amount calculated is incorrect. The total amount should be a summation of prices, but it's not. In addition, the table has an item without item number and description.`, respectively. From this, it is found that this invoice has issues and the modification points are suggested. 157 | 158 | ## Log 159 | 160 | After processing the two invoices mentioned above, the application displays the following information in the "log" sheet: 161 | 162 | ![](images/fig5.png) 163 | 164 | - Valid invoices: These will be displayed in green. 165 | - Invalid invoices: These will be displayed in red. 166 | 167 | For invalid invoices, the application can automatically send an email response containing the necessary corrections. However, during my testing with various invoices, I encountered instances where the `invalidCheck` value was flagged as `true` even for invoices without errors. Due to this, I have temporarily set the `notifyModificationPointsToSender` value to `FALSE` by default. I anticipate the content generation accuracy to improve in future updates. 168 | 169 | 170 | # Applied Section 171 | When the method shown in this report is used, the parsing functionality can be extended to other document types besides invoices by modifying the JSON schema. 172 | 173 | 174 | # Future update 175 | 176 | - Currently, Gemini 1.5 API cannot directly use PDF data. As a workaround, this sample converts each page of the PDF data to PNG images. When direct PDF processing is available in a future update, I expect the accuracy for generating content to improve. 177 | - On July 23, 2024, I confirmed that this was achieved. By this, I updated the script for directly using PDF data. 178 | 179 | # Note 180 | 181 | - The top abstract image was created by [Gemini](https://gemini.google.com/app). 182 | 183 | --- 184 | 185 | 186 | 187 | # Licence 188 | 189 | [MIT](LICENCE) 190 | 191 | 192 | 193 | # Author 194 | 195 | [Tanaike](https://tanaikech.github.io/about/) 196 | 197 | [Donate](https://tanaikech.github.io/donate/) 198 | 199 | 200 | 201 | # Update History 202 | 203 | - v1.0.0 (June 15, 2024) 204 | 205 | 1. Initial release. 206 | 207 | - v1.0.1 (June 17, 2024) 208 | 209 | 1. In order to easily customize the value of "jsonSchema" for generating content with Gemini API, I added it as a new sheet of "jsonSchema" sheet in the Spreadsheet. When you customize it, you can edit the cell "A1" of the "jsonSchema" sheet. By this, the script generates content with Gemini API using your customized JSON schema. The cell "A2" is the number of characters of "A1". 210 | 211 | - v1.0.2 (July 23, 2024) 212 | 213 | 1. On July 23, 2024, I noticed that PDF data could be directly parsed by Gemini API. It is considered that this is due to the update by the Google side. So, I updated `setBlobs([blob], true)` to `setBlobs([blob], false)` of the method `parseInvoiceByGemini_`. By this modification, the PDF blob is directly used with Gemini API. [Ref](https://github.com/tanaikech/GeminiWithFiles?tab=readme-ov-file#setblobs) 214 | - v1.0.3 (August 3, 2024) 215 | 216 | 1. On August 3, 2024, I upated GeminiWithFiles (https://github.com/tanaikech/GeminiWithFiles). In this version, PDF data can be processed with Gemini API without async/await. So, I updated UnlockSmartInvoiceManagementWithGeminiAPI. 217 | 218 | [TOP](#top) 219 | -------------------------------------------------------------------------------- /appsscript.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeZone": "Asia/Tokyo", 3 | "exceptionLogging": "STACKDRIVER", 4 | "runtimeVersion": "V8", 5 | "dependencies": {} 6 | } -------------------------------------------------------------------------------- /images/fig1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI/dbd54a79d6437b66120ca647881ea0ae3c4659a8/images/fig1.jpg -------------------------------------------------------------------------------- /images/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI/dbd54a79d6437b66120ca647881ea0ae3c4659a8/images/fig2.png -------------------------------------------------------------------------------- /images/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI/dbd54a79d6437b66120ca647881ea0ae3c4659a8/images/fig3.png -------------------------------------------------------------------------------- /images/fig4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI/dbd54a79d6437b66120ca647881ea0ae3c4659a8/images/fig4.png -------------------------------------------------------------------------------- /images/fig5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanaikech/UnlockSmartInvoiceManagementWithGeminiAPI/dbd54a79d6437b66120ca647881ea0ae3c4659a8/images/fig5.png -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | /* 2 | This report describes an invoice processing application built with Google Apps Script. It leverages Gemini, a large language model, to automatically parse invoices received as email attachments and automates the process using time-driven triggers. 3 | 4 | Repository: https://github.com/tanaikech/PUnlockSmartInvoiceManagementWithGeminiAPI 5 | */ 6 | 7 | // When this function is run, the installed time-driven trigger is deleted. 8 | function stopTrigger() { 9 | new InvoiceManager().deleteTimeDrivenTriggers(); 10 | Browser.msgBox("Trigger was removed."); 11 | } 12 | 13 | // This is a main function. 14 | // When this function is run, this application is launched. 15 | function main() { 16 | new InvoiceManager().run(); 17 | } 18 | --------------------------------------------------------------------------------