├── .github └── dependabot.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── package.json ├── sample.env ├── server.js ├── src ├── alt-text-org.js ├── analyze-links.js ├── check.js ├── describe.js ├── live-tweeters.js ├── ocr.js ├── tweet-predicates.js ├── twtr.js └── util.js └── testbed.js /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "npm" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | node_modules 3 | lists/ 4 | users/ 5 | enabled.json 6 | .env 7 | *~ 8 | .* 9 | !.github/ -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Hannah Kolbeck 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Alt Text Util 2 | ============= 3 | 4 | A Twitter bot which exposes a number of utilities around alt text 5 | 6 | Public Commands 7 | --------------- 8 | 9 | Tag the bot in a tweet, quote tweet, or reply with one of these commands 10 | 11 | **OCR** or **extract text** 12 | 13 | Performs Optical Character Recognition on any images it finds, replying with the result in alt text of re-uploaded images. 14 | 15 | **save** 16 | 17 | Saves alt text on any images it finds to the [alt-text.org](https://alt-text.org) library. 18 | 19 | **analyze links** 20 | 21 | Checks the websites of any links found for their usage of alt text. 22 | 23 | **explain** 24 | 25 | Replies with a quick explanation of what alt text is and how to add it. 26 | 27 | 28 | Private Commands 29 | ---------------- 30 | 31 | Direct message the bot with one of these commands 32 | 33 | **fetch** 34 | 35 | Search the [alt-text.org](https://alt-text.org) library for alt text for a tweet if you include a link or an image if you include one. 36 | 37 | **OCR** or **extract text** 38 | 39 | Perform Optical Character Recognition on images from a tweet if you include a link or an image if you include one. 40 | 41 | **check** 42 | 43 | Include a link to a tweet, a username, or a link to a user's profile to get an analysis of alt text usage. 44 | 45 | **help** 46 | 47 | Get a text version of these instructions 48 | 49 | 50 | How Does The Bot Choose Which Image(s) to OCR? 51 | ---------------------------------------------- 52 | 53 | When you tag the bot to OCR an image, it first has to choose which image to analyze. It looks: 54 | 55 | 1. On the **tweet** with the command 56 | 2. On any tweet **quoted** in that tweet 57 | 3. On the tweet being **replied** to 58 | 4. On any tweet **quoted** in the tweet being **replied** to 59 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "alt-text-util", 3 | "version": "0.0.1", 4 | "description": "A Twitter bot providing a number of tools around alt text ", 5 | "main": "server.js", 6 | "scripts": { 7 | "start": "node server.js" 8 | }, 9 | "dependencies": { 10 | "@google-cloud/vision": "^2.3.1", 11 | "@stablelib/base64": "^1.0.1", 12 | "base64-arraybuffer": "^1.0.1", 13 | "canvas": "2.8.0", 14 | "canvas-txt": "^3.0.0", 15 | "datejs": "1.0.0-rc3", 16 | "node-fetch": "2.6.6", 17 | "node-html-parser": "^3.1.4", 18 | "oauth-1.0a": "^2.2.6", 19 | "twitter-api-client": "^1.5.2", 20 | "twitter-autohook": "^1.2.1", 21 | "unicode-default-word-boundary": "^13.0.0" 22 | }, 23 | "engines": { 24 | "node": "12.x" 25 | }, 26 | "repository": { 27 | "url": "https://github.com/alt-text-org/AltTextCrew" 28 | }, 29 | "license": "MIT", 30 | "keywords": [ 31 | "node", 32 | "glitch", 33 | "express" 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /sample.env: -------------------------------------------------------------------------------- 1 | TWITTER_CONSUMER_SECRET= 2 | TWITTER_CONSUMER_KEY= 3 | TWITTER_ACCESS_TOKEN_SECRET= 4 | TWITTER_ACCESS_TOKEN= 5 | BEARER_TOKEN= 6 | LIST= 7 | USER= 8 | GOOGLE_APPLICATION_CREDENTIALS= 9 | NGROK_SECRET= 10 | -------------------------------------------------------------------------------- /server.js: -------------------------------------------------------------------------------- 1 | const twitter = require("twitter-api-client"); 2 | const twtrHook = require("twitter-autohook"); 3 | const OAuth = require("oauth-1.0a"); 4 | const crypto = require("crypto"); 5 | 6 | const { 7 | ts, 8 | fetchImage, 9 | extractMessageMedia, 10 | extractTargets, 11 | getTweetImagesAndAlts, 12 | splitText 13 | } = require("./src/util"); 14 | const { 15 | saveEnabled, 16 | pollLiveTweeters, 17 | getListRecord 18 | } = require("./src/live-tweeters"); 19 | const { 20 | tweet, 21 | reply, 22 | getTweet, 23 | sendDM, 24 | replyChain, 25 | uploadImageWithAltText, uploadMedia 26 | } = require("./src/twtr"); 27 | const {ocr, ocrRaw, ocrTweetImages, getAuxImage, getResponseText} = require("./src/ocr"); 28 | const {checkUserTweets, checkTweet} = require("./src/check"); 29 | const { 30 | saveAltTextForImage, 31 | fetchAltTextForTweet, 32 | fetchAltTextForBase64, fetchAltTextForUrl 33 | } = require("./src/alt-text-org"); 34 | const {analyzeUrls, getUrls} = require("./src/analyze-links"); 35 | const {describeRaw, describeUrl, describeTweetImages} = require("./src/describe"); 36 | 37 | const config = { 38 | list: process.env.LIST, 39 | myUser: process.env.USER, 40 | writeToken: process.env.API_WRITER_TOKEN, 41 | twitterClientConfig: { 42 | accessToken: process.env.TWITTER_ACCESS_TOKEN, 43 | accessTokenSecret: process.env.TWITTER_ACCESS_TOKEN_SECRET, 44 | apiKey: process.env.TWITTER_CONSUMER_KEY, 45 | apiSecret: process.env.TWITTER_CONSUMER_SECRET, 46 | disableCache: true 47 | }, 48 | twitterToken: { 49 | key: process.env.TWITTER_ACCESS_TOKEN, 50 | secret: process.env.TWITTER_ACCESS_TOKEN_SECRET 51 | }, 52 | activityApiConfig: { 53 | token: process.env.TWITTER_ACCESS_TOKEN, 54 | token_secret: process.env.TWITTER_ACCESS_TOKEN_SECRET, 55 | oauth_token: process.env.TWITTER_ACCESS_TOKEN, 56 | oauth_token_secret: process.env.TWITTER_ACCESS_TOKEN_SECRET, 57 | consumer_key: process.env.TWITTER_CONSUMER_KEY, 58 | consumer_secret: process.env.TWITTER_CONSUMER_SECRET, 59 | ngrok_secret: process.env.NGROK_SECRET, 60 | env: "prod" 61 | }, 62 | azure: { 63 | descriptionKey: process.env.AZURE_KEY, 64 | descriptionEndpoint: process.env.AZURE_DESCRIPTION_ENDPOINT 65 | } 66 | }; 67 | 68 | async function describeDMCmd(twtr, oauth, msg, text) { 69 | let reply = []; 70 | let targets = await extractTargets(text); 71 | let rawImage = await extractMessageMedia(oauth, config.twitterToken, msg); 72 | 73 | if (rawImage) { 74 | const description = await describeRaw(config.azure.descriptionEndpoint, config.azure.descriptionKey, rawImage) 75 | if (description) { 76 | reply.push(description); 77 | } else { 78 | reply.push("Couldn't describe attached image"); 79 | } 80 | } else if (targets.web.size > 0) { 81 | for (const url of targets.web) { 82 | const description = await describeUrl(config.azure.descriptionEndpoint, config.azure.descriptionKey, url) 83 | if (description) { 84 | reply.push(`${url}: ${description}`); 85 | } else { 86 | reply.push(`${url}: No description found`); 87 | } 88 | } 89 | } else if (targets.tweet.size > 0) { 90 | for (const tweetId of targets.tweet) { 91 | let tweet = await getTweet(twtr, tweetId); 92 | if (tweet) { 93 | const descriptions = await describeTweetImages(config.azure.descriptionEndpoint, config.azure.descriptionKey, tweet); 94 | let annotated = descriptions.map( 95 | desc => `${tweet.user.screen_name}'s tweet: ${desc.text}` 96 | ); 97 | reply.push(...annotated); 98 | } else { 99 | reply.push(`Couldn't fetch tweet: ${tweetId}`); 100 | } 101 | } 102 | } else { 103 | reply.push("I don't see anything to describe"); 104 | } 105 | 106 | return reply; 107 | } 108 | 109 | async function ocrDMCmd(twtr, oauth, msg, text) { 110 | let ocrTexts = []; 111 | let targets = await extractTargets(text); 112 | let rawImage = await extractMessageMedia(oauth, config.twitterToken, msg); 113 | if (rawImage) { 114 | let imageOcr = await ocrRaw(rawImage) 115 | .catch(err => { 116 | console.log("Error OCRing raw image"); 117 | console.log(err) 118 | return null; 119 | }); 120 | if (imageOcr) { 121 | ocrTexts.push(imageOcr); 122 | } else { 123 | ocrTexts.push("Couldn't extract text from attached image"); 124 | } 125 | } else if (targets.web.size > 0) { 126 | for (const url of targets.web) { 127 | let imgOcr = await ocr(url); 128 | if (imgOcr) { 129 | ocrTexts.push(imgOcr); 130 | } else { 131 | ocrTexts.push(`${url}: No text extracted`); 132 | } 133 | } 134 | } else if (targets.tweet.size > 0) { 135 | for (const tweetId of targets.tweet) { 136 | let tweet = await getTweet(twtr, tweetId); 137 | if (tweet) { 138 | let ocrs = await ocrTweetImages(twtr, tweet); 139 | ocrTexts.push(...ocrs); 140 | } else { 141 | ocrTexts.push(`Couldn't fetch tweet: ${tweetId}`); 142 | } 143 | } 144 | } else { 145 | ocrTexts.push("I don't see anything to OCR"); 146 | } 147 | 148 | if (text.match(/one reply/i)) { 149 | return ocrTexts.map(ocr => { 150 | if (ocr.text) { 151 | return ocr.text 152 | } else { 153 | return ocr 154 | } 155 | }) 156 | } 157 | 158 | const reply = [] 159 | for (let text of ocrTexts) { 160 | if (text.text) { 161 | if (text.text.length > 1000) { 162 | const split = splitText(text.text, 1000) 163 | reply.push(split[0]) 164 | for (let i = 1; i < split.length; i++) { 165 | const image = getAuxImage(text.locale, i + 1, split.length) 166 | const auxMediaId = await uploadMedia(twtr, image); 167 | if (auxMediaId) { 168 | reply.push({ 169 | text: split[i], 170 | mediaId: auxMediaId 171 | }) 172 | } else { 173 | reply.push("Image upload failed. Text: " + split[i]) 174 | } 175 | } 176 | } else { 177 | reply.push(text.text) 178 | } 179 | } else { 180 | reply.push(text) 181 | } 182 | } 183 | 184 | return reply; 185 | } 186 | 187 | async function checkDMCmd(twtr, text) { 188 | let reply = []; 189 | let foundTarget = false; 190 | let targets = await extractTargets(text); 191 | 192 | if (targets.tweet.size > 0) { 193 | foundTarget = true; 194 | let checks = await Promise.all( 195 | Array.from(targets.tweet).map(tweetId => checkTweet(twtr, tweetId)) 196 | ); 197 | 198 | checks.forEach(check => reply.push(...check)); 199 | } 200 | 201 | if (targets.user.size > 0) { 202 | foundTarget = true; 203 | let checks = await Promise.all( 204 | Array.from(targets.user).map(userName => checkUserTweets(twtr, userName)) 205 | ); 206 | 207 | reply.push(...checks); 208 | } 209 | 210 | let chunks = text.match(/check\s+(.+)/i); 211 | if (chunks && chunks.length > 1) { 212 | let split = chunks[1].split(/\s+/g); 213 | let toCheck = split.filter(item => item.match(/^@?\w+$/)); 214 | foundTarget = foundTarget || toCheck.length > 0; 215 | let checks = await Promise.all( 216 | toCheck.map(userName => checkUserTweets(twtr, userName)) 217 | ); 218 | 219 | reply.push(...checks); 220 | } 221 | 222 | if (!foundTarget) { 223 | reply.push("I don't see anything to check"); 224 | } 225 | 226 | return reply; 227 | } 228 | 229 | async function fetchDMCmd(twtr, oauth, msg, text) { 230 | let reply = []; 231 | let foundTarget = false; 232 | let targets = await extractTargets(text); 233 | let rawImage = await extractMessageMedia(oauth, config.twitterToken, msg); 234 | if (rawImage) { 235 | foundTarget = true; 236 | let lang = text.match(/fetch (..)(?:\s|$)/i) || [null, "en"]; 237 | let alts = await fetchAltTextForBase64(rawImage, lang[1]); 238 | console.log(JSON.stringify(alts)) 239 | if (alts) { 240 | if (alts.ocr) { 241 | reply.push(`Extracted text: ${alts.ocr}`) 242 | } 243 | 244 | alts.exact.forEach(alt => reply.push( 245 | `Attached image (exact): ${alt.alt_text}` 246 | )) 247 | 248 | alts.fuzzy.forEach(alt => { 249 | if (!alts.exact.some(exact => exact.sha256 === alt.sha256) && alt.score >= 0.98) { 250 | reply.push( 251 | `Attached image (Similarity ${Math.floor(alt.score * 100)}%): ${alt.alt_text}` 252 | ) 253 | } 254 | }) 255 | 256 | if (reply.length === 0) { 257 | reply.push("Attached image: No saved description found"); 258 | } 259 | } else { 260 | reply.push("Attached image: No saved description found"); 261 | } 262 | } 263 | 264 | if (targets.tweet.size > 0) { 265 | foundTarget = true; 266 | let fetched = await Promise.all( 267 | Array.from(targets.tweet).flatMap(async tweetId => 268 | fetchAltTextForTweet(twtr, tweetId) 269 | ) 270 | ); 271 | 272 | reply.push(...fetched); 273 | } 274 | 275 | if (!foundTarget) { 276 | reply.push("I don't see anything to check"); 277 | } 278 | 279 | return reply; 280 | } 281 | 282 | const help = `Tweet/Reply commands: 283 | To use these, tag the bot in either the tweet to be examined or a reply to that tweet. If a tweet is a reply, only the parent will be processed. 284 | Save: Saves alt text to the alt-text.org database for any images on the tweet or its parent. 285 | OCR or extract text: Attempts tp extract text from the images on a tweet or its parent. 286 | Analyze links: Produces a report on alt text usage for any linked websites. 287 | Explain: Respond with a quick explanation of alt text and how to add it. 288 | 289 | DM Commands: 290 | fetch : Searches the alt-text.org database for alt text for an image or the images on a tweet. 291 | ocr or extract text : Attempts to extract text from an image or the images on a tweet. 292 | check : Checks a tweet for alt text on images, or produces a report on a user's alt text usage. 293 | describe : Attempts to describe an image or the images on a tweet 294 | help: Print this help message.`; 295 | 296 | async function handleDMEvent(twtr, oauth, msg) { 297 | if (msg.type && msg.type === "message_create") { 298 | if ( 299 | msg.message_create && 300 | msg.message_create.sender_id !== config.myUser && 301 | msg.message_create.message_data && 302 | msg.message_create.message_data.text 303 | ) { 304 | let text = msg.message_create.message_data.text.trim(); 305 | console.log(`Found DM text: '${text}'`); 306 | 307 | let reply = []; 308 | if (text.toUpperCase() === "PAUSE") { 309 | saveEnabled(msg.message_create.sender_id, false); 310 | reply.push("Pausing boost of tweets without alt text"); 311 | tweet( 312 | twtr, 313 | msg.message_create.sender_id, 314 | (name, username) => `${name} (@${username}) is signing off.` 315 | ); 316 | } else if (text.toUpperCase() === "START") { 317 | saveEnabled(msg.message_create.sender_id, true); 318 | reply.push("Beginning boost of tweets without alt text"); 319 | tweet( 320 | twtr, 321 | msg.message_create.sender_id, 322 | (name, username) => 323 | `${name} (@${username}) is going live. Please reply to this tweet if you're able to assist them with descriptions.` 324 | ); 325 | } else if (text.match(/^(ocr)|(extract text)/i)) { 326 | let ocrReply = await ocrDMCmd(twtr, oauth, msg, text); 327 | reply.push(...ocrReply); 328 | } else if (text.match(/^check/i)) { 329 | let checkReply = await checkDMCmd(twtr, text); 330 | reply.push(...checkReply); 331 | } else if (text.match(/^(fetch)|(search)/i)) { 332 | let fetched = await fetchDMCmd(twtr, oauth, msg, text); 333 | reply.push(...fetched); 334 | } else if (text.match(/^describe/i)) { 335 | let descReply = await describeDMCmd(twtr, oauth, msg, text) 336 | reply.push(...descReply) 337 | } else if (text.match(/^help/i)) { 338 | reply.push(help); 339 | } else { 340 | console.log("Got non-understood DM: '" + text + "'"); 341 | reply.push( 342 | "Unknown command. Try 'help' for a full list of commands. DM @HBeckPDX with questions." 343 | ); 344 | } 345 | 346 | await Promise.all( 347 | reply.map(dm => sendDM(twtr, msg.message_create.sender_id, dm)) 348 | ); 349 | } 350 | } 351 | } 352 | 353 | async function handleOcrMention(twtr, tweet, targetTweet, cmdReply) { 354 | let ocrs = await ocrTweetImages(twtr, targetTweet); 355 | if (ocrs) { 356 | const anySucceeded = ocrs.map(ocr => ocr.extracted).reduce((a, b) => a || b, false) 357 | if (!anySucceeded) { 358 | cmdReply.push(`Couldn't extract text from any images found`) 359 | return 360 | } 361 | 362 | let splitOcrs = ocrs.map(ocr => ({ 363 | img: ocr.img, 364 | text: ocr.text, 365 | locale: ocr.locale, 366 | split: splitText(ocr.text, 1000) 367 | })); 368 | 369 | let imageGroups = []; 370 | let uploadFailures = false; 371 | for (let i = 0; i < splitOcrs.length; i++) { 372 | let ocrRecord = splitOcrs[i]; 373 | let imageRecord = await fetchImage(ocrRecord.img); 374 | if (imageRecord) { 375 | let origMediaId = await uploadImageWithAltText( 376 | twtr, 377 | imageRecord.data, 378 | ocrRecord.split[0] 379 | ); 380 | 381 | if (!origMediaId) { 382 | uploadFailures = true; 383 | } 384 | 385 | let uploadsForImage = [ 386 | {mediaId: origMediaId, text: ocrRecord.split[0]} 387 | ]; 388 | 389 | for (let j = 1; j < ocrRecord.split.length; j++) { 390 | let auxImage = getAuxImage(ocrRecord.locale, j + 1, ocrRecord.split.length); 391 | let auxMediaId = await uploadImageWithAltText( 392 | twtr, 393 | auxImage, 394 | ocrRecord.split[j] 395 | ); 396 | 397 | if (!auxMediaId) { 398 | uploadFailures = true; 399 | } 400 | 401 | uploadsForImage.push({ 402 | mediaId: auxMediaId, 403 | text: ocrRecord.split[j] 404 | }); 405 | } 406 | 407 | imageGroups.push(uploadsForImage); 408 | } else { 409 | console.log( 410 | `${ts()}: Failed to fetch image ${ocrRecord.img}. Tweet: ${ 411 | tweet.user.screen_name 412 | }/${tweet.id_str}` 413 | ); 414 | break; 415 | } 416 | } 417 | 418 | let totalImagesToUpload = imageGroups 419 | .map(group => group.length) 420 | .reduce((prev, cur) => prev + cur); 421 | console.log(`${ts()}: Image groups: ${JSON.stringify(imageGroups)}`); 422 | 423 | if (uploadFailures) { 424 | console.log( 425 | `${ts()}: Failed to upload images for response to ${ 426 | tweet.user.screen_name 427 | }/${tweet.id_str}` 428 | ); 429 | cmdReply.push( 430 | "Failed to re-upload images, if the problem persists please contact @HBeckPDX" 431 | ); 432 | } else { 433 | if (totalImagesToUpload <= 4) { 434 | cmdReply.push({ 435 | text: getResponseText(splitOcrs), 436 | media: imageGroups.flatMap(group => group.map(img => img.mediaId)) 437 | }); 438 | } else { 439 | let tweetNum = 1; 440 | let numTweets = imageGroups 441 | .map( 442 | group => 443 | Math.floor(group.length / 4) + (group.length % 4 === 0 ? 0 : 1) 444 | ) 445 | .reduce((prev, curr) => prev + curr); 446 | 447 | imageGroups.forEach((group) => { 448 | for (let idxStart = 0; idxStart < group.length; idxStart += 4) { 449 | cmdReply.push({ 450 | text: `${getResponseText(splitOcrs)} ${tweetNum}/${numTweets}`, 451 | media: group 452 | .slice(idxStart, idxStart + 4) 453 | .map(img => img.mediaId) 454 | }); 455 | tweetNum++; 456 | } 457 | }); 458 | } 459 | } 460 | } else { 461 | console.log( 462 | `${ts()}: No images found on tweet ${tweet.user.screen_name}/${ 463 | tweet.id_str 464 | }` 465 | ); 466 | cmdReply.push("No images found to OCR"); 467 | } 468 | } 469 | 470 | async function handleFetchMention(twtr, targetTweet, cmdReply) { 471 | const images = Object.keys(getTweetImagesAndAlts(targetTweet)); 472 | const results = []; 473 | 474 | let foundAny = false; 475 | for (let image of images) { 476 | const parts = [] 477 | const alt = await fetchAltTextForUrl(image, "en") 478 | 479 | let resultAlt; 480 | if (alt) { 481 | foundAny = true; 482 | if (alt.ocr) { 483 | if (alt.ocr.length < 100) { 484 | parts.push(`OCR: ${alt.ocr}`) 485 | } else { 486 | parts.push("Has long OCR available, try OCR as well") 487 | } 488 | } 489 | 490 | for (let exactMatch of alt.exact) { 491 | parts.push(`Exact: ${exactMatch.alt_text}`) 492 | } 493 | 494 | for (let fuzzyMatch of alt.fuzzy) { 495 | parts.push(`${Math.floor(fuzzyMatch.score * 100)}% confidence: ${fuzzyMatch.alt_text}`) 496 | } 497 | 498 | resultAlt = parts.join("\n"); 499 | if (resultAlt.length > 1000) { 500 | const andMore = "More results available, try searching in DMs or on alt-text.org" 501 | const subset = [] 502 | for (let part of parts) { 503 | const lengthSoFar = subset.join("\n").length 504 | if (lengthSoFar + andMore.length + part.length + 1 < 1000) { 505 | subset.push(part) 506 | } else { 507 | break 508 | } 509 | } 510 | subset.push(andMore) 511 | resultAlt = subset.join("\n") 512 | } 513 | } else { 514 | resultAlt = "No alt text found." 515 | } 516 | 517 | 518 | const rawImage = await fetchImage(image) 519 | if (rawImage) { 520 | const mediaId = await uploadImageWithAltText(twtr, rawImage.data, resultAlt) 521 | if (mediaId) { 522 | results.push(mediaId) 523 | } else { 524 | console.log(`${ts()}: Failed to upload image for alt-text.org search: '${image}': '${resultAlt}'`) 525 | } 526 | } else { 527 | console.log(`${ts()}: Failed to fetch image for alt-text.org search: '${image}'`) 528 | } 529 | } 530 | 531 | if (foundAny && results.length > 0) { 532 | cmdReply.push({ 533 | text: "Search results in image descriptions", 534 | media: results 535 | }) 536 | } else { 537 | cmdReply.push("No results found for any images, or error re-uploading them. To help fill the database, sign up " + 538 | "at https://alt-text.org/sign-up.html, or include the #SaveAltText hashtag on your tweets with images") 539 | } 540 | } 541 | 542 | async function getTargetTweet(twtr, bareTweet, needsImages) { 543 | let targetTweet; 544 | let tweetTargetStr = "tweet"; 545 | 546 | if (needsImages) { 547 | let images; 548 | let tweet; 549 | 550 | tweet = await getTweet(twtr, bareTweet.id_str); 551 | images = tweet ? Object.keys(getTweetImagesAndAlts(tweet)) : [] 552 | if (images.length > 0) { 553 | console.log(`${ts()}: Found ${images.length} on tweet`) 554 | return { 555 | targetTweet: tweet, 556 | tweetTargetStr: "tweet" 557 | } 558 | } 559 | 560 | if (bareTweet.quoted_status_id_str) { 561 | tweet = await getTweet(twtr, bareTweet.quoted_status_id_str); 562 | images = tweet ? Object.keys(getTweetImagesAndAlts(tweet)) : [] 563 | if (images.length > 0) { 564 | console.log(`${ts()}: Found ${images.length} on quoted tweet`) 565 | return { 566 | targetTweet: tweet, 567 | tweetTargetStr: "quoted tweet" 568 | } 569 | } 570 | } 571 | 572 | if (bareTweet.in_reply_to_status_id_str) { 573 | tweet = await getTweet(twtr, bareTweet.in_reply_to_status_id_str); 574 | images = tweet ? Object.keys(getTweetImagesAndAlts(tweet)) : [] 575 | if (images.length > 0) { 576 | console.log(`${ts()}: Found ${images.length} on parent tweet`) 577 | return { 578 | targetTweet: tweet, 579 | tweetTargetStr: "parent tweet" 580 | } 581 | } 582 | 583 | if (tweet && tweet.quoted_status_id_str) { 584 | tweet = await getTweet(twtr, tweet.quoted_status_id_str); 585 | images = tweet ? Object.keys(getTweetImagesAndAlts(tweet)) : [] 586 | if (images.length > 0) { 587 | console.log(`${ts()}: Found ${images.length} on parent tweet's quoted tweet`) 588 | return { 589 | targetTweet: tweet, 590 | tweetTargetStr: "parent tweet's quoted tweet" 591 | } 592 | } 593 | } 594 | } 595 | 596 | console.log(`${ts()}: Needed image, but none found for ${bareTweet.user.id_str}/${bareTweet.id_str}`) 597 | return { 598 | targetTweet: null, 599 | tweetTargetStr: "no-images-found" 600 | } 601 | } else { 602 | if (bareTweet.quoted_status_id_str) { 603 | tweetTargetStr = "quoted tweet"; 604 | targetTweet = await getTweet(twtr, bareTweet.quoted_status_id_str); 605 | } else if (bareTweet.in_reply_to_status_id_str) { 606 | tweetTargetStr = "parent tweet"; 607 | targetTweet = await getTweet(twtr, bareTweet.in_reply_to_status_id_str); 608 | } else { 609 | tweetTargetStr = "tweet"; 610 | targetTweet = await getTweet(twtr, bareTweet.id_str); 611 | } 612 | 613 | return {targetTweet, tweetTargetStr} 614 | } 615 | } 616 | 617 | const explain = `Alt text allows people who can't see images to know what's in them 618 | 619 | What in your image is needed to enable someone who can't see it to be a full participant in the conversation? 620 | 621 | To add it click "Add Description" in browser or "+Alt" on mobile`; 622 | 623 | async function handleMention(twtr, oauth, tweet) { 624 | if (tweet.user.id_str === "1374555039528669184") { 625 | console.log(`${ts()}: Got tweetId ${tweet.id_str}, but it was from me`); 626 | return; 627 | } else if (tweet.retweeted_status) { 628 | console.log(`${ts()}: Got tweetId ${tweet.id_str}, but it was a retweet`); 629 | return; 630 | } 631 | 632 | let text; 633 | if (tweet.extended_tweet && tweet.extended_tweet.full_text) { 634 | text = tweet.extended_tweet.full_text; 635 | } else if (tweet.text) { 636 | text = tweet.text; 637 | } else { 638 | console.log( 639 | `${ts()}: Got tweet ${tweet.id_str} with no text??? ${JSON.stringify( 640 | tweet 641 | )}` 642 | ); 643 | } 644 | 645 | if (!text.match(/@AltTextUtil/i)) { 646 | console.log( 647 | `${ts()}: Got mention, but it didn't actually contain my name: '${text}'` 648 | ); 649 | return; 650 | } 651 | 652 | const { 653 | targetTweet, 654 | tweetTargetStr 655 | } = await getTargetTweet(twtr, tweet, text.match(/(ocr)|(extract text)|(save)|(search)|(fetch)|(^(\s*@\w+)*\s*@AltTextUtil\s*$)/i)) 656 | 657 | if (tweetTargetStr === "no-images-found") { 658 | await reply( 659 | twtr, 660 | tweet.id_str, 661 | tweet.user.screen_name, 662 | "I don't see any images to process, sorry." 663 | ); 664 | return 665 | } 666 | 667 | if (!targetTweet) { 668 | await reply( 669 | twtr, 670 | tweet.id_str, 671 | tweet.user.screen_name, 672 | "Couldn't fetch tweet, is the account private?" 673 | ); 674 | return; 675 | } 676 | 677 | let cmdReply = []; 678 | if (text.match(/(ocr)|(extract text)/i) || text.match(/^(\s*@\w+)*\s*@AltTextUtil\s*$/i)) { 679 | await handleOcrMention(twtr, tweet, targetTweet, cmdReply) 680 | } else if (text.match(/analyze link(s?)/i)) { 681 | let urls = getUrls(targetTweet); 682 | if (urls.length === 0) { 683 | cmdReply.push( 684 | `Hmm, I don't see any links to analyze on ${tweetTargetStr}.` 685 | ); 686 | } else { 687 | let analysis = await analyzeUrls(urls, tweetTargetStr); 688 | cmdReply.push(...analysis); 689 | } 690 | } else if (text.match(/save/i)) { 691 | console.log(`${ts()}: Got save request: ${text} for ${targetTweet.user.screen_name}/${targetTweet.id_str}`) 692 | const imagesAndAlts = getTweetImagesAndAlts(targetTweet); 693 | console.log(`Found ${JSON.stringify(imagesAndAlts)}`) 694 | for (const [imageUrl, alt] of Object.entries(imagesAndAlts)) { 695 | console.log(`Attempting to save alt text for '${imageUrl}'`) 696 | let sent = await saveAltTextForImage(config.writeToken, imageUrl, targetTweet.lang, alt, targetTweet.user.id_str) 697 | console.log(`${ts()}: Saved alt text for ${imageUrl}: ${sent}`) 698 | } 699 | } else if (text.match(/explain/i)) { 700 | if (targetTweet.id_str === tweet.id_str) { 701 | cmdReply.push(explain); 702 | } else { 703 | cmdReply.push(`@${targetTweet.user.screen_name} ` + explain); 704 | } 705 | } else if (text.match(/(fetch)|(search)/i)) { 706 | await handleFetchMention(twtr, targetTweet, cmdReply) 707 | } else { 708 | console.log( 709 | `${ts()}: Got tweet https://twitter.com/status/${tweet.user.screen_name}/${ 710 | tweet.id_str 711 | }, but it didn't contain a command. Text: '${text}'` 712 | ); 713 | return; 714 | } 715 | 716 | if (cmdReply.length > 0) { 717 | await replyChain(twtr, cmdReply, tweet.id_str, tweet.user.screen_name); 718 | } else { 719 | console.log(`${ts()}: Command '${text}' processed, but no reply generated`); 720 | } 721 | } 722 | 723 | function handleEvent(twtr, oauth) { 724 | return async event => { 725 | if (event.direct_message_events && event.direct_message_events.forEach) { 726 | // console.log(`Got webhook event: ${JSON.stringify(event)}`); 727 | event.direct_message_events.forEach(msg => 728 | handleDMEvent(twtr, oauth, msg).catch(err => { 729 | console.log(`${ts()}: Uncaught error in DM handler`) 730 | console.log(err) 731 | }) 732 | ); 733 | } else if (event.tweet_create_events && event.tweet_create_events.forEach) { 734 | // console.log(`Got webhook event: ${JSON.stringify(event)}`); 735 | event.tweet_create_events.forEach(tweet => 736 | handleMention(twtr, oauth, tweet).catch(err => { 737 | console.log(`${ts()}: Uncaught error in mention handler`) 738 | console.log(err) 739 | }) 740 | ); 741 | } 742 | }; 743 | } 744 | 745 | async function startMonitor(twtr, oauth) { 746 | const hook = new twtrHook.Autohook(config.activityApiConfig); 747 | await hook.removeWebhooks(); 748 | hook.on("event", handleEvent(twtr, oauth)); 749 | await hook.start(); 750 | await hook.subscribe(config.activityApiConfig); 751 | return hook; 752 | } 753 | 754 | async function run() { 755 | const twtr = new twitter.TwitterClient(config.twitterClientConfig); 756 | let list = getListRecord(config.list); 757 | console.log("Found list:"); 758 | console.log(list); 759 | 760 | const oauth = OAuth({ 761 | consumer: { 762 | key: config.twitterClientConfig.apiKey, 763 | secret: config.twitterClientConfig.apiSecret 764 | }, 765 | signature_method: "HMAC-SHA1", 766 | hash_function(base_string, key) { 767 | return crypto 768 | .createHmac("sha1", key) 769 | .update(base_string) 770 | .digest("base64"); 771 | } 772 | }); 773 | 774 | startMonitor(twtr, oauth) 775 | .catch(err => { 776 | console.log(err); 777 | }); 778 | setInterval(pollLiveTweeters(twtr, list), 3000); 779 | } 780 | 781 | run(); 782 | -------------------------------------------------------------------------------- /src/alt-text-org.js: -------------------------------------------------------------------------------- 1 | const {ts, getTweetImagesAndAlts} = require("./util"); 2 | const {getTweet} = require("./twtr"); 3 | 4 | const fetch = require("node-fetch"); 5 | 6 | async function saveAltTextForImage(token, url, lang, alt, userId) { 7 | return await fetch("https://api.alt-text.org/library/v1/save", { 8 | method: "POST", 9 | headers: { 10 | "Content-Type": "application/json", 11 | "Accept": "application/json", 12 | "Authorization": `Bearer ${token}` 13 | }, 14 | body: JSON.stringify({ 15 | image: { 16 | url: url 17 | }, 18 | image_url: url, 19 | language: lang || "en", 20 | alt_text: alt, 21 | id_scope: "twitter", 22 | author_id: userId 23 | }) 24 | }).then(resp => { 25 | if (resp.ok) { 26 | return true; 27 | } else { 28 | console.log(`${ts()}: Unsuccessful save for url '${url}': ${resp.status} ${resp.statusText}`); 29 | return false; 30 | } 31 | }).catch(err => { 32 | console.log(`${ts()}: Failed to save alt for '${url}:`); 33 | console.log(err); 34 | return false; 35 | }); 36 | } 37 | 38 | async function fetchAltTextForTweet(twtr, tweetId) { 39 | let reply = []; 40 | let tweet = await getTweet(twtr, tweetId); 41 | if (tweet) { 42 | let images = Object.keys(getTweetImagesAndAlts(tweet)); 43 | if (images.length > 0) { 44 | let fetched = await Promise.all(images.map((img, idx) => { 45 | return fetchAltTextForUrl(img, tweet.lang || "en") 46 | .then(foundText => { 47 | if (foundText) { 48 | foundText.exact.map(text => 49 | `${tweet.user.screen_name}/${tweetId}: ${idx + 1}/${images.length} (exact): ${text.alt_text}` 50 | ).concat(foundText.fuzzy.map(text => 51 | `${tweet.user.screen_name}/${tweetId}: ${idx + 1}/${images.length} (Similarity: ${text.score}): ${text.alt_text}` 52 | )); 53 | } else { 54 | return [`${tweet.user.screen_name}/${tweetId}: ${idx + 1}/${images.length}: Couldn't find any saved alt text`]; 55 | } 56 | }) 57 | .catch(e => { 58 | console.log(`${ts()}: Error fetching text for image ${img}: ${JSON.stringify(e)}`); 59 | return `${tweet.user.screen_name}/${tweetId}: ${idx + 1}/${images.length}: Error fetching altText`; 60 | }); 61 | })); 62 | 63 | fetched.forEach(texts => reply.push(...texts)); 64 | } else { 65 | reply.push(`${tweet.user.screen_name}/${tweetId}: No images found`); 66 | } 67 | } else { 68 | reply.push(`Couldn't fetch tweet ${tweetId}`); 69 | } 70 | 71 | return reply; 72 | } 73 | 74 | async function fetchAltTextForUrl(url, lang) { 75 | return await fetch("https://api.alt-text.org/library/v1/fetch", { 76 | method: "POST", headers: { 77 | "Content-Type": "application/json" 78 | }, body: JSON.stringify({ 79 | image: { 80 | url: url 81 | }, 82 | language: lang || "en" 83 | }) 84 | }).then(async resp => { 85 | if (resp.ok) { 86 | return await resp.json(); 87 | } else if (resp.status === 404) { 88 | return null; 89 | } else { 90 | console.log(`${ts()}: Failed to fetch for url '${url}': Status: ${resp.status} Body: ${await resp.text()}`); 91 | return null; 92 | } 93 | }).catch(err => { 94 | console.log(`${ts()}: Failed to fetch alt for '${url}: ${err}`); 95 | return null; 96 | }); 97 | } 98 | 99 | async function fetchAltTextForBase64(image, lang) { 100 | let resp = await fetch("https://api.alt-text.org/library/v1/fetch", { 101 | method: "POST", 102 | headers: { 103 | "Content-Type": "application/json" 104 | }, 105 | body: JSON.stringify({ 106 | image:{ 107 | base64: `data:${image.mimeType};base64,${image.data}` 108 | }, 109 | language: lang || "en" 110 | }) 111 | }); 112 | 113 | if (resp.ok) { 114 | return await resp.json(); 115 | } else if (resp.status === 404) { 116 | return null; 117 | } else { 118 | console.log(`${ts()}: Failed to fetch for raw image: Status: ${resp.status} Body: ${await resp.text()}`); 119 | return null; 120 | } 121 | } 122 | 123 | exports.fetchAltTextForUrl = fetchAltTextForUrl; 124 | exports.fetchAltTextForTweet = fetchAltTextForTweet; 125 | exports.fetchAltTextForBase64 = fetchAltTextForBase64; 126 | exports.saveAltTextForImage = saveAltTextForImage; 127 | -------------------------------------------------------------------------------- /src/analyze-links.js: -------------------------------------------------------------------------------- 1 | const { ts } = require("./util"); 2 | 3 | const html = require("node-html-parser"); 4 | const fetch = require("node-fetch"); 5 | 6 | function fetchPage(url) { 7 | return fetch(url, { 8 | method: "GET" 9 | }) 10 | .then(async resp => { 11 | if (resp.ok) { 12 | let text = await resp.text(); 13 | return html.parse(text); 14 | } else { 15 | return null; 16 | } 17 | }) 18 | .catch(err => { 19 | console.log(`${ts()}: Failed to fetch page at ${url}: ${err}`); 20 | return null; 21 | }); 22 | } 23 | 24 | function getMetaTagDetails(dom) { 25 | let metas = dom.querySelectorAll("meta"); 26 | let result = { 27 | hasPreviewImage: false, 28 | hasPreviewAltText: false 29 | }; 30 | 31 | metas.forEach(meta => { 32 | let property = meta.getAttribute("property"); 33 | if (property) { 34 | if (property === "og:image" || property === "twitter:image") { 35 | result.hasPreviewImage = true; 36 | } else if ( 37 | property === "og:image:alt" || 38 | property === "twitter:image:alt" 39 | ) { 40 | result.hasPreviewAltText = true; 41 | } 42 | } 43 | }); 44 | 45 | return result; 46 | } 47 | 48 | async function analyzeLink(url) { 49 | let dom = await fetchPage(url); 50 | if (!dom) { 51 | return { error: `${ts()}: Couldn't fetch link: '${url}'` }; 52 | } 53 | 54 | return { 55 | openGraphDetails: getMetaTagDetails(dom), 56 | imageStats: await getImageStats(dom) 57 | }; 58 | } 59 | 60 | function getUrls(tweet) { 61 | if (!tweet.entities) { 62 | console.log( 63 | `${ts()}: Tweet had no entities. Tweet: ${JSON.stringify(tweet, null, 2)}` 64 | ); 65 | return []; 66 | } else if (!tweet.entities.urls) { 67 | console.log( 68 | `${ts()}: Tweet entities had no urls. Tweet: ${JSON.stringify( 69 | tweet.entities, 70 | null, 71 | 2 72 | )}` 73 | ); 74 | return []; 75 | } 76 | 77 | return tweet.entities.urls.map(url => { 78 | return { 79 | expanded: url.expanded_url, 80 | display: url.display_url 81 | }; 82 | }); 83 | } 84 | 85 | async function getImageStats(dom) { 86 | let imgs = dom.querySelectorAll("img"); 87 | let result = { 88 | images: 0, 89 | withAltText: 0 90 | }; 91 | 92 | for (const img of imgs) { 93 | let src = img.getAttribute("src"); 94 | if (src) { 95 | result.images++; 96 | let alt = img.getAttribute("alt"); 97 | if (alt && alt.length > 16) { 98 | result.withAltText++; 99 | } 100 | } 101 | } 102 | 103 | return result; 104 | } 105 | 106 | function stringifyAnalysis(analysis) { 107 | let parts = []; 108 | if (analysis.openGraphDetails.hasPreviewImage) { 109 | if (analysis.openGraphDetails.hasPreviewAltText) { 110 | parts.push("Preview has alt text,"); 111 | } else { 112 | parts.push("Preview has no alt text,"); 113 | } 114 | } 115 | 116 | if (analysis.imageStats.images > 0) { 117 | parts.push( 118 | `${analysis.imageStats.withAltText}/${analysis.imageStats.images} images have alt text` 119 | ); 120 | } else { 121 | parts.push("No static images on page."); 122 | } 123 | 124 | return parts.join(" "); 125 | } 126 | 127 | async function analyzeUrls(urls, tweetTargetStr) { 128 | return Promise.all( 129 | urls.map(async url => { 130 | let analysis = await analyzeLink(url.expanded); 131 | return `${url.display}: ${stringifyAnalysis(analysis)}`; 132 | }) 133 | ) 134 | } 135 | 136 | exports.analyzeUrls = analyzeUrls; 137 | exports.getUrls = getUrls; -------------------------------------------------------------------------------- /src/check.js: -------------------------------------------------------------------------------- 1 | const { getUserId, getTweets, getTweet } = require("./twtr"); 2 | const { hasImage, hasImageWithoutAltText } = require("./tweet-predicates"); 3 | const { getTweetImagesAndAlts } = require("./util"); 4 | 5 | async function checkUserTweets(twtr, screenName) { 6 | if (!screenName.match(/^@/)) { 7 | screenName = "@" + screenName; 8 | } 9 | 10 | let userId = await getUserId(twtr, screenName); 11 | if (userId) { 12 | let userTweets = await getTweets(twtr, userId, 200); 13 | let hasImages = userTweets.filter(t => hasImage(t)); 14 | let hasNoAlt = hasImages.filter(t => hasImageWithoutAltText(t)); 15 | 16 | return `User: ${screenName}: Checked ${userTweets.length} tweets and found ${hasImages.length} with images, of which ${hasNoAlt.length} were missing alt text.`; 17 | } else { 18 | return `User: ${screenName}: Couldn't find user`; 19 | } 20 | } 21 | 22 | async function checkTweet(twtr, tweetId) { 23 | let reply = []; 24 | let tweet = await getTweet(twtr, tweetId); 25 | if (tweet) { 26 | let texts = Object.values(getTweetImagesAndAlts(tweet)); 27 | if (texts.length > 0) { 28 | texts.forEach((text, idx) => { 29 | if (text) { 30 | reply.push(`Image ${idx + 1}/${texts.length}: ${text}`); 31 | } else { 32 | reply.push(`Image ${idx + 1}/${texts.length}: No alt text provided`); 33 | } 34 | }); 35 | } else { 36 | reply.push("I don't see any images on that tweet"); 37 | } 38 | } else { 39 | reply.push(`Couldn't fetch tweet: ${tweetId}`); 40 | } 41 | 42 | return reply; 43 | } 44 | 45 | exports.checkUserTweets = checkUserTweets; 46 | exports.checkTweet = checkTweet; 47 | -------------------------------------------------------------------------------- /src/describe.js: -------------------------------------------------------------------------------- 1 | const fetch = require("node-fetch") 2 | const {ts, getTweetImagesAndAlts} = require("./util"); 3 | 4 | function responseToMessage(descriptionBody) { 5 | const lines = [] 6 | if (descriptionBody.captions && descriptionBody.captions.length > 0) { 7 | descriptionBody.captions.forEach(caption => { 8 | lines.push(`${Math.floor(caption.confidence * 100)}% confidence: ${caption.text}`) 9 | }) 10 | } else { 11 | lines.push("No descriptions could be generated") 12 | } 13 | 14 | if (descriptionBody.tags && descriptionBody.tags.length > 0) { 15 | lines.push("Tags: " + descriptionBody.tags.join(" ")) 16 | } 17 | 18 | return lines.join("\n") 19 | } 20 | 21 | async function doFetch(azureUrl, fetchArgs) { 22 | return await fetch(`${azureUrl}/vision/v3.2/describe`, fetchArgs).then(async resp => { 23 | if (resp) { 24 | if (resp.ok) { 25 | const json = await resp.json(); 26 | if (!json.description) { 27 | console.log(`${ts()}: Got description response, but without description field: ${JSON.stringify(json)}`) 28 | return null 29 | } 30 | 31 | return responseToMessage(json.description) 32 | } else { 33 | console.log(`${ts()}: Failed to fetch image description for type ${fetchArgs.headers["Content-Type"]}: ${resp.status} ${resp.statusText}: ${await resp.text()}`) 34 | return null 35 | } 36 | } else { 37 | console.log(`${ts()}: Got null response for type ${fetchArgs.headers["Content-Type"]}`) 38 | return null 39 | } 40 | }).catch(err => { 41 | console.log(`${ts()}: Error fetching image description for type: ${fetchArgs.headers["Content-Type"]}`) 42 | console.log(err) 43 | return null 44 | }) 45 | } 46 | 47 | async function describeRaw(azureUrl, azureKey, image) { 48 | return await doFetch(azureUrl, { 49 | method: "POST", 50 | headers: { 51 | "Content-Type": "application/octet-stream", 52 | "Ocp-Apim-Subscription-Key": azureKey 53 | }, 54 | body: Buffer.from(image.data, "base64") 55 | }) 56 | } 57 | 58 | async function describeUrl(azureUrl, azureKey, url) { 59 | return await doFetch(azureUrl,{ 60 | method: "POST", 61 | headers: { 62 | "Content-Type": "application/json", 63 | "Ocp-Apim-Subscription-Key": azureKey 64 | }, 65 | body: JSON.stringify({url}) 66 | }) 67 | } 68 | 69 | async function describeTweetImages(azureUrl, azureKey, tweet) { 70 | let images = Object.keys(getTweetImagesAndAlts(tweet)); 71 | if (images.length > 0) { 72 | return await Promise.all( 73 | images.map((img) => { 74 | return describeUrl(azureUrl, azureKey, img) 75 | .then(imgDesc => { 76 | if (imgDesc) { 77 | return {img: img, text: imgDesc}; 78 | } else { 79 | return {img: img, text: "No description found"}; 80 | } 81 | }) 82 | .catch(e => { 83 | console.log( 84 | `Error fetching description for image ${img}: ${JSON.stringify(e)}` 85 | ); 86 | return {img: img, text: "Error describing image"}; 87 | }); 88 | }) 89 | ).catch(err => { 90 | console.log(`${ts()}: Error attempting to describe images on https://twitter.com/status/${tweet.user.screen_name}/${tweet.id_str}`) 91 | console.log(err) 92 | return null 93 | }); 94 | } else { 95 | return null 96 | } 97 | } 98 | 99 | exports.describeUrl = describeUrl; 100 | exports.describeRaw = describeRaw; 101 | exports.describeTweetImages = describeTweetImages; -------------------------------------------------------------------------------- /src/live-tweeters.js: -------------------------------------------------------------------------------- 1 | const fs = require("fs"); 2 | 3 | const { fetchListTweets, retweet } = require("./twtr"); 4 | const { hasImageWithoutAltTextOrVideo } = require("./tweet-predicates"); 5 | 6 | const enabled = getEnabled(); 7 | 8 | function saveEnabled(userId, isEnabled) { 9 | enabled[userId] = isEnabled; 10 | fs.writeFileSync("enabled.json", JSON.stringify(enabled)); 11 | } 12 | 13 | function getEnabled() { 14 | return JSON.parse(fs.readFileSync("enabled.json", "utf8")); 15 | } 16 | 17 | function getListRecord(listId) { 18 | let lastSeen; 19 | try { 20 | lastSeen = fs.readFileSync(`lists/${listId}.tweet`, "utf8"); 21 | } catch (err) { 22 | if (err.code === "ENOENT") { 23 | console.log("No file found for list: " + listId); 24 | lastSeen = null; 25 | } else { 26 | throw err; 27 | } 28 | } 29 | 30 | return { 31 | id: listId, 32 | lastSeen: lastSeen 33 | }; 34 | } 35 | 36 | function markLastTweetsSeen(list, tweets) { 37 | let last = tweets.sort( 38 | (t1, t2) => Date.parse(t2.created_at) - Date.parse(t1.created_at) 39 | )[0]; 40 | 41 | fs.writeFileSync(`lists/${list.id}.tweet`, last.id_str); 42 | list.lastSeen = last.id_str; 43 | } 44 | 45 | function pollLiveTweeters(twtr, list) { 46 | return async () => { 47 | let newTweets = await fetchListTweets(twtr, list).catch(err => { 48 | console.log(err); 49 | return []; 50 | }); 51 | 52 | let badTweets = newTweets.filter(hasImageWithoutAltTextOrVideo); 53 | badTweets.forEach(tweet => { 54 | if (enabled[tweet.user.id_str]) { 55 | retweet(twtr, tweet); 56 | } 57 | }); 58 | 59 | if (newTweets.length > 0) { 60 | console.log(`Found ${newTweets.length} new tweets for list ${list.id}`); 61 | markLastTweetsSeen(list, newTweets); 62 | } 63 | }; 64 | } 65 | 66 | exports.saveEnabled = saveEnabled; 67 | exports.getListRecord = getListRecord; 68 | exports.pollLiveTweeters = pollLiveTweeters; -------------------------------------------------------------------------------- /src/ocr.js: -------------------------------------------------------------------------------- 1 | const vision = require("@google-cloud/vision"); 2 | const {createCanvas} = require("canvas"); 3 | const {default: canvasTxt} = require('canvas-txt') 4 | 5 | const {ts, getTweetImagesAndAlts} = require("./util"); 6 | const fs = require("fs"); 7 | 8 | const visionClient = new vision.ImageAnnotatorClient(); 9 | 10 | async function ocr(url) { 11 | console.log(`${ts()}: Attempting to recognize ${url}`); 12 | let [result] = await visionClient 13 | .textDetection(url) 14 | .catch(err => { 15 | console.log(err) 16 | return [] 17 | }); 18 | if (result && result.textAnnotations) { 19 | const text = result.textAnnotations 20 | .filter(t => !!t.locale) 21 | .map(t => t.description) 22 | .join(" ") 23 | .replace(/(\r\n|\n|\r)/gm, " "); 24 | 25 | if (!text) { 26 | return null 27 | } 28 | 29 | const locales = result.textAnnotations 30 | .filter(t => !!t.locale) 31 | .reduce((loc, t) => { 32 | loc[t.locale] = (loc[t.locale] || 0) + 1 33 | return loc 34 | }, {}) 35 | 36 | const localeAndCount = Object.entries(locales) 37 | .sort((entryA, entryB) => entryA[1] - entryB[1])[0] || ["default", 0] 38 | 39 | return { 40 | text: text, 41 | locale: localeAndCount[0] 42 | }; 43 | } else { 44 | return null; 45 | } 46 | } 47 | 48 | async function ocrRaw(rawImage) { 49 | let requests = [ 50 | { 51 | image: { 52 | content: rawImage.data 53 | }, 54 | features: [{type: "TEXT_DETECTION"}] 55 | } 56 | ]; 57 | 58 | let [result] = await visionClient 59 | .batchAnnotateImages({requests}) 60 | .catch(err => { 61 | console.log(err) 62 | return []; 63 | }); 64 | 65 | if ( 66 | result && 67 | result.responses && 68 | result.responses[0] && 69 | result.responses[0].fullTextAnnotation && 70 | result.responses[0].fullTextAnnotation.text 71 | ) { 72 | const fullText = result.responses[0].fullTextAnnotation 73 | 74 | let detectedLang = "default" 75 | if (fullText.pages && 76 | fullText.pages[0] && 77 | fullText.pages[0].property && 78 | fullText.pages[0].property.detectedLanguages && 79 | fullText.pages[0].property.detectedLanguages[0] && 80 | fullText.pages[0].property.detectedLanguages[0].languageCode) { 81 | detectedLang = fullText.pages[0].property.detectedLanguages[0].languageCode 82 | } 83 | 84 | return { 85 | locale: detectedLang, 86 | text: fullText.text 87 | }; 88 | } else { 89 | console.log("No text found. Full response: " + JSON.stringify(result)); 90 | return null; 91 | } 92 | } 93 | 94 | async function ocrTweetImages(twtr, tweet) { 95 | let images = Object.keys(getTweetImagesAndAlts(tweet)); 96 | if (images.length > 0) { 97 | return await Promise.all( 98 | images.map((img) => { 99 | return ocr(img) 100 | .then(imgOcr => { 101 | if (imgOcr) { 102 | return {img: img, text: imgOcr.text, locale: imgOcr.locale, extracted: true}; 103 | } else { 104 | return {img: img, text: "No text extracted", locale: "default", extracted: false}; 105 | } 106 | }) 107 | .catch(e => { 108 | console.log( 109 | `Error fetching OCR for image ${img}: ${JSON.stringify(e)}` 110 | ); 111 | return {img: img, text: "Error extracting text", locale: "default", extracted: false}; 112 | }); 113 | }) 114 | ).catch(err => { 115 | console.log(`${ts()}: Error attempting to recognize images on https://twitter.com/status/${tweet.user.screen_name}/${tweet.id_str}`) 116 | console.log(err) 117 | return null 118 | }); 119 | } else { 120 | return null 121 | } 122 | } 123 | 124 | const additionalImageText = { 125 | default: "Alt Text Continued", 126 | ca: "Continuació de la descripció de les imatges", 127 | de: "Bildbeschreibung fortgesetzt", 128 | en: "Alt Text Continued", 129 | es: "Continuación de la descripción de las imágenes", 130 | fa: "توضیحات عکس ادامه دارد", 131 | fr: "Description de l'image, suite", 132 | ja: "画像の説明(続き", 133 | nl: "Overloop van tekst uit het vorige plaatje", 134 | pt: "Descrição da imagem continuação" 135 | } 136 | 137 | const auxImageEdgeLength = 1000; 138 | const auxImageFontPixels = 100 139 | 140 | function getAuxImage(locale, num, total) { 141 | const canvas = createCanvas(auxImageEdgeLength, auxImageEdgeLength); 142 | const ctx = canvas.getContext('2d'); 143 | const text = additionalImageText[locale] || additionalImageText.default 144 | 145 | ctx.fillStyle = "white" 146 | ctx.fillRect(0, 0, auxImageEdgeLength, auxImageEdgeLength) 147 | 148 | ctx.fillStyle = "black" 149 | ctx.font = `bold ${auxImageFontPixels}px sans-serif`; 150 | 151 | // ctx.fillText(text, center, center - (textMetrics.actualBoundingBoxDescent / 2), auxImageEdgeLength - 20) 152 | canvasTxt.fontSize = 100 153 | canvasTxt.fontStyle = "bold" 154 | canvasTxt.align = "center" 155 | canvasTxt.vAlign = "middle" 156 | canvasTxt.drawText(ctx, text, 50, 0, auxImageEdgeLength - 100, auxImageEdgeLength - 100) 157 | 158 | ctx.textAlign = "right" 159 | ctx.textBaseline = "bottom" 160 | ctx.font = `${auxImageFontPixels / 2}px sans-serif` 161 | ctx.fillText(`${num}/${total}`, auxImageEdgeLength - 20, auxImageEdgeLength - 20) 162 | 163 | return canvas.toDataURL().split(",")[1]; 164 | } 165 | 166 | const responseText = { 167 | default: "Extracted text in image descriptions", 168 | ca: "El text extret és a les descripcions de les imatges", 169 | de: "Extrahierter Text in Bildbeschreibungen", 170 | en: "Extracted text in image descriptions", 171 | es: "El texto extraído está en las descripciones de las imágenes", 172 | fa: "توضیحات چاپی درعکس را درتوضیحات تصویر میخونید", 173 | fr: "Texte extrait dans les descriptions d'images", 174 | ja: "抽出されたテキストは画像の説明にあります", 175 | nl: "Tekst uit afbeeldingsbeschrijvingen gehaald", 176 | pt: "Texto extraído nas descrições das imagens" 177 | } 178 | 179 | function getResponseText(imageRecords) { 180 | const locales = imageRecords 181 | .filter(r => r.locale !== "default") 182 | .reduce((loc, r) => { 183 | loc[r.locale] = (loc[r.locale] || 0) + 1 184 | return loc 185 | }, {}) 186 | 187 | const localeAndCount = Object.entries(locales) 188 | .sort((entryA, entryB) => entryA[1] - entryB[1])[0] || ["default", 0] 189 | const locale = localeAndCount[0] 190 | 191 | return responseText[locale] || responseText.default 192 | } 193 | 194 | exports.ocr = ocr; 195 | exports.ocrRaw = ocrRaw; 196 | exports.ocrTweetImages = ocrTweetImages; 197 | exports.getAuxImage = getAuxImage; 198 | exports.getResponseText = getResponseText; 199 | -------------------------------------------------------------------------------- /src/tweet-predicates.js: -------------------------------------------------------------------------------- 1 | function getMedia(tweet) { 2 | let entities = tweet["extended_entities"]; 3 | if (!entities) { 4 | return null; 5 | } 6 | 7 | return entities["media"]; 8 | } 9 | 10 | function hasImageWithoutAltTextOrVideo(tweet) { 11 | let media = getMedia(tweet); 12 | if (!media) { 13 | return false; 14 | } 15 | 16 | let hasPicWithoutAltText = false; 17 | media.forEach(m => { 18 | if ( 19 | ((m["type"] === "photo" || m["type"] === "animated_gif") && 20 | !m["ext_alt_text"]) || 21 | m["type"] === "video" 22 | ) { 23 | hasPicWithoutAltText = true; 24 | } 25 | }); 26 | 27 | return hasPicWithoutAltText; 28 | } 29 | 30 | function hasImageWithoutAltText(tweet) { 31 | let media = getMedia(tweet); 32 | if (!media) { 33 | return false; 34 | } 35 | 36 | let hasPicWithoutAltText = false; 37 | media.forEach(m => { 38 | if ( 39 | (m["type"] === "photo" || m["type"] === "animated_gif") && 40 | !m["ext_alt_text"] 41 | ) { 42 | hasPicWithoutAltText = true; 43 | } 44 | }); 45 | 46 | return hasPicWithoutAltText; 47 | } 48 | 49 | function hasImage(tweet) { 50 | let media = getMedia(tweet); 51 | if (!media) { 52 | return false; 53 | } 54 | 55 | let hasPic = false; 56 | media.forEach(m => { 57 | if (m["type"] === "photo" || m["type"] === "animated_gif") { 58 | hasPic = true; 59 | } 60 | }); 61 | 62 | return hasPic; 63 | } 64 | 65 | exports.getMedia = getMedia; 66 | exports.hasImageWithoutAltTextOrVideo = hasImageWithoutAltTextOrVideo; 67 | exports.hasImageWithoutAltText = hasImageWithoutAltText; 68 | exports.hasImage = hasImage; 69 | -------------------------------------------------------------------------------- /src/twtr.js: -------------------------------------------------------------------------------- 1 | const {ts} = require("./util"); 2 | 3 | async function tweet(twtr, userId, contentFun) { 4 | await twtr.accountsAndUsers 5 | .usersShow({ 6 | user_id: userId, 7 | "user.fields": "name,username" 8 | }) 9 | .then(async user => { 10 | let content = contentFun(user.name, user.screen_name); 11 | console.log(`${ts()}: Tweeting '${content}'`); 12 | return await twtr.tweets.statusesUpdate({ 13 | status: content 14 | }); 15 | }) 16 | .catch(err => { 17 | console.log(err); 18 | return null; 19 | }); 20 | } 21 | 22 | async function retweet(twtr, tweet, qtStatus) { 23 | let tweetLink = `https://twitter.com/${tweet.user.screen_name}/status/${tweet.id_str}`; 24 | console.log(`Retweeting '${tweetLink}' with status '${qtStatus}'`); 25 | return await twtr.tweets 26 | .statusesUpdate({ 27 | status: qtStatus, 28 | attachment_url: tweetLink 29 | }) 30 | .catch(err => { 31 | console.log(err); 32 | return null; 33 | }); 34 | } 35 | 36 | async function fetchListTweets(twtr, list) { 37 | let params; 38 | if (list.lastSeen) { 39 | params = { 40 | list_id: list.id, 41 | since_id: list.lastSeen, 42 | include_rts: false, 43 | include_entities: true, 44 | include_ext_alt_text: true, 45 | tweet_mode: "extended" 46 | }; 47 | } else { 48 | params = { 49 | list_id: list.id, 50 | include_rts: false, 51 | include_entities: true, 52 | include_ext_alt_text: true, 53 | count: 1, 54 | tweet_mode: "extended" 55 | }; 56 | } 57 | 58 | return twtr.accountsAndUsers.listsStatuses(params); 59 | } 60 | 61 | async function sendDM(twtr, userId, message) { 62 | if (typeof message === "string") { 63 | await twtr.directMessages 64 | .eventsNew({ 65 | event: { 66 | type: "message_create", 67 | message_create: { 68 | target: { 69 | recipient_id: userId 70 | }, 71 | message_data: { 72 | text: message 73 | } 74 | } 75 | } 76 | }) 77 | .catch(err => { 78 | console.log(`${ts()}: DM Error sending ${message}`); 79 | console.log(err) 80 | }); 81 | console.log(`${ts()}: DMing: '${message}'`); 82 | } else if (typeof message === "object") { 83 | await twtr.directMessages 84 | .eventsNew({ 85 | event: { 86 | type: "message_create", 87 | message_create: { 88 | target: { 89 | recipient_id: userId 90 | }, 91 | message_data: { 92 | text: message.text, 93 | attachment: { 94 | type: "media", 95 | media: { 96 | id: message.mediaId 97 | } 98 | } 99 | } 100 | } 101 | } 102 | }) 103 | .catch(err => { 104 | console.log(`${ts()}: DM Error sending ${message}`); 105 | console.log(err) 106 | }); 107 | console.log(`${ts()}: DMing: '${JSON.stringify(message)}'`); 108 | } else { 109 | console.log(`${ts()}: Got unexpected message type: ${typeof message}: '${JSON.stringify(message)}'`) 110 | } 111 | } 112 | 113 | async function getUserId(twtr, userName) { 114 | const result = await twtr.accountsAndUsers 115 | .usersShow({ 116 | screen_name: userName 117 | }) 118 | .catch(e => { 119 | console.log(JSON.stringify(e)); 120 | return null; 121 | }); 122 | if (result) { 123 | return result.id_str; 124 | } else { 125 | return null; 126 | } 127 | } 128 | 129 | async function getTweets(twtr, userId, limit) { 130 | let tweets = []; 131 | let batch = await twtr.tweets.statusesUserTimeline({ 132 | user_id: userId, 133 | count: limit < 200 ? limit : 200, 134 | include_rts: false, 135 | include_ext_alt_text: true, 136 | tweet_mode: "extended" 137 | }); 138 | batch.forEach(tweet => tweets.push(tweet)); 139 | 140 | return tweets; 141 | } 142 | 143 | async function getTweet(twtr, tweetId) { 144 | console.log(`${ts()}: Attempting to fetch tweetId: '${tweetId}'`); 145 | return await twtr.tweets 146 | .statusesShow({ 147 | id: tweetId, 148 | include_entities: true, 149 | trim_user: false, 150 | include_ext_alt_text: true, 151 | tweet_mode: "extended" 152 | }) 153 | .catch(err => { 154 | console.log( 155 | `${ts()}: Fetch tweetId ${tweetId} failed: '${JSON.stringify(err)}'` 156 | ); 157 | return null; 158 | }); 159 | } 160 | 161 | async function reply(twtr, replyToId, replyToUsername, body) { 162 | console.log( 163 | `Got body to reply: ${JSON.stringify(body)} of type ${typeof body}` 164 | ); 165 | if (typeof body === "string") { 166 | body = { 167 | text: body, 168 | quoted: null, 169 | media: null 170 | }; 171 | } 172 | 173 | let request = { 174 | status: `@${replyToUsername} ${body.text}`, 175 | in_reply_to_status_id: replyToId 176 | }; 177 | 178 | if (body.media) { 179 | request.media_ids = body.media.join(","); 180 | } 181 | 182 | if (body.quoted) { 183 | request.attachment_url = body.quoted; 184 | } 185 | 186 | console.log( 187 | `${ts()}: Replying ${body.text.replace( 188 | "\n", 189 | "\\n" 190 | )} media: ${body.media ? body.media.join( 191 | "," 192 | ) : "N/A"} to tweet ${replyToId} and username ${replyToUsername}` 193 | ); 194 | return await twtr.tweets 195 | .statusesUpdate(request) 196 | .then(resp => { 197 | return resp.id_str; 198 | }) 199 | .catch(err => { 200 | console.log(err); 201 | return replyToId; 202 | }); 203 | } 204 | 205 | async function replyChain(twtr, split, replyToId, replyToUsername) { 206 | if (split.length === 1) { 207 | return await reply(twtr, replyToId, replyToUsername, split[0]); 208 | } else { 209 | let replyChainId = replyToId; 210 | let replyChainUsername = replyToUsername; 211 | for (let i = 0; i < split.length; i++) { 212 | let message = null; 213 | if (typeof split[i] === "string") { 214 | message = 215 | replyChainUsername === replyToUsername 216 | ? split[i] 217 | : `@${replyToUsername} ${split[i]}`; 218 | } else { 219 | message = { 220 | text: 221 | (replyChainUsername === replyToUsername 222 | ? split[i].text 223 | : `@${replyToUsername} ${split[i].text}`), 224 | media: split[i].media, 225 | quoted: split[i].quoted 226 | }; 227 | } 228 | 229 | replyChainId = await reply(twtr, replyChainId, replyChainUsername, message); 230 | replyChainUsername = "AltTextUtil"; 231 | } 232 | } 233 | } 234 | 235 | async function uploadMedia(twtr, mediaBytes) { 236 | return await twtr.media 237 | .mediaUpload({media_data: mediaBytes}) 238 | .then(resp => { 239 | return resp.media_id_string; 240 | }) 241 | .catch(err => { 242 | console.log(`${ts()}: Failed to upload media: ${JSON.stringify(err)}`); 243 | return null; 244 | }); 245 | } 246 | 247 | async function setAltText(twtr, mediaId, altText) { 248 | return await twtr.media.mediaMetadataCreate({ 249 | media_id: mediaId, 250 | alt_text: {text: altText} 251 | }).then(() => true).catch(err => { 252 | console.log(`${ts()}: Error setting alt text on mediaId ${mediaId}: '${altText}'`) 253 | console.log(err) 254 | return null 255 | }); 256 | } 257 | 258 | async function uploadImageWithAltText(twtr, mediaBytes, altText) { 259 | let mediaId = await uploadMedia(twtr, mediaBytes); 260 | if (!mediaId) { 261 | return null; 262 | } 263 | 264 | const altSet = await setAltText(twtr, mediaId, altText); 265 | if (altSet) { 266 | return mediaId; 267 | } else { 268 | return null 269 | } 270 | } 271 | 272 | exports.tweet = tweet; 273 | exports.retweet = retweet; 274 | exports.fetchListTweets = fetchListTweets; 275 | exports.sendDM = sendDM; 276 | exports.getUserId = getUserId; 277 | exports.getTweets = getTweets; 278 | exports.getTweet = getTweet; 279 | exports.reply = reply; 280 | exports.replyChain = replyChain; 281 | exports.uploadMedia = uploadMedia; 282 | exports.setAltText = setAltText; 283 | exports.uploadImageWithAltText = uploadImageWithAltText; 284 | -------------------------------------------------------------------------------- /src/util.js: -------------------------------------------------------------------------------- 1 | const splitter = require("unicode-default-word-boundary"); 2 | const fetch = require("node-fetch"); 3 | const base64 = require("base64-arraybuffer"); 4 | const fs = require("fs"); 5 | 6 | function ts() { 7 | return new Date().toISOString(); 8 | } 9 | 10 | async function resolveTCoUrl(shortUrl) { 11 | let resp = await fetch(shortUrl, {method: "HEAD", redirect: "manual"}); 12 | if (resp.status === 301) { 13 | return resp.headers.get("location"); 14 | } else { 15 | console.log(`Got status ${resp.status} attempting to HEAD '${shortUrl}'`); 16 | return null; 17 | } 18 | } 19 | 20 | async function extractTargets(text) { 21 | let result = { 22 | tweet: new Set(), 23 | user: new Set(), 24 | web: new Set() 25 | }; 26 | 27 | let chunks = text.split(/\s+/g); 28 | let toCheck = chunks.filter(chunk => 29 | chunk.match(/^https:\/\/t.co\/\S*$/gi) 30 | ); 31 | if (toCheck.length === 0) { 32 | return result; 33 | } 34 | 35 | for (let i = 0; i < toCheck.length; i++) { 36 | let redirect = await resolveTCoUrl(toCheck[i]); 37 | let tweetId = redirect.match( 38 | /^https:\/\/twitter.com\/[^\/]*\/status\/(\d+)/i 39 | ); 40 | let profile = redirect.match(/^https:\/\/twitter.com\/([^?\/]+)$/i); 41 | 42 | if (tweetId) { 43 | result.tweet.add(tweetId[1]); 44 | } else if (profile) { 45 | result.user.add(profile[1]); 46 | } else { 47 | result.web.add(redirect); 48 | } 49 | } 50 | 51 | return result; 52 | } 53 | 54 | async function extractMessageMedia(oauth, token, msg) { 55 | if ( 56 | msg.message_create.message_data && 57 | msg.message_create.message_data.attachment && 58 | msg.message_create.message_data.attachment.media && 59 | msg.message_create.message_data.attachment.media.media_url_https 60 | ) { 61 | return await fetchImage( 62 | msg.message_create.message_data.attachment.media.media_url_https, 63 | oauth, 64 | token, 65 | ).catch(e => { 66 | console.log("Error fetching raw image: " + JSON.stringify(e)); 67 | return null; 68 | }); 69 | } else { 70 | return null; 71 | } 72 | } 73 | 74 | async function fetchImage(url, oauth, token) { 75 | const request_data = { 76 | url: url, 77 | method: "GET" 78 | }; 79 | 80 | const headers = 81 | oauth && token ? oauth.toHeader(oauth.authorize(request_data, token)) : {}; 82 | let resp = await fetch(url, {headers: headers, redirect: "manual"}).catch( 83 | err => { 84 | console.log( 85 | `${ts()}: Failed to issue fetch for url '${url}': ${JSON.stringify( 86 | err 87 | )}` 88 | ); 89 | return null; 90 | } 91 | ); 92 | 93 | let mimeType = null; 94 | if (url.match(/jpe?g/i)) { 95 | mimeType = "image/jpeg"; 96 | } else if (url.match(/\.png/i)) { 97 | mimeType = "image/png"; 98 | } 99 | 100 | if (!mimeType) { 101 | console.log(`${ts()}: Unable to extract MIME type from URL '${url}'`); 102 | return null; 103 | } 104 | 105 | if (resp) { 106 | if (resp.ok) { 107 | return {mimeType: mimeType, data: base64.encode(await resp.arrayBuffer())}; 108 | } else { 109 | console.log( 110 | `${ts()}: Failed to fetch image: ${url}. Status: ${resp.status}` 111 | ); 112 | return null; 113 | } 114 | } else { 115 | return null; 116 | } 117 | } 118 | 119 | function readLocalImage(path) { 120 | let raw = null; 121 | try { 122 | raw = fs.readFileSync(path) 123 | } catch (e) { 124 | console.log(`${ts()}: Couldn't find file '${path}'`); 125 | return null; 126 | } 127 | 128 | if (!raw) { 129 | console.log(`${ts()}: File read returned null for '${path}'`); 130 | return null; 131 | } 132 | 133 | let mimeType = null; 134 | if (path.match(/jpe?g/i)) { 135 | mimeType = "image/jpeg"; 136 | } else if (path.match(/png/i)) { 137 | mimeType = "image/png"; 138 | } 139 | 140 | if (!mimeType) { 141 | console.log(`${ts()}: Unable to extract MIME type from path '${path}'`); 142 | return null; 143 | } 144 | 145 | return {mimeType: mimeType, data: raw.toString("base64")}; 146 | } 147 | 148 | function splitText(text, maxLen) { 149 | let result = []; 150 | let lastSpan = {end: 0}; 151 | let lenBase = 0; 152 | let split = Array.from(splitter.findSpans(text)); 153 | split.forEach(span => { 154 | if (span.end - lenBase > maxLen) { 155 | result.push(text.substring(lenBase, lastSpan.end)); 156 | lenBase = span.start; 157 | } 158 | lastSpan = span; 159 | }); 160 | 161 | if (text.length > lenBase) { 162 | result.push(text.substring(lenBase, text.length)); 163 | } 164 | 165 | return result; 166 | } 167 | 168 | function getTweetImagesAndAlts(tweet) { 169 | let entities = tweet["extended_entities"]; 170 | if (!entities) { 171 | return {}; 172 | } 173 | 174 | let media = entities["media"]; 175 | if (!media) { 176 | return {}; 177 | } 178 | 179 | let images = {}; 180 | media.forEach(m => { 181 | if (m["type"] === "photo" || m["type"] === "animated_gif") { 182 | images[m["media_url_https"]] = m["ext_alt_text"] || null; 183 | } 184 | }); 185 | 186 | return images; 187 | } 188 | 189 | exports.ts = ts; 190 | exports.resolveTCoUrl = resolveTCoUrl; 191 | exports.extractTargets = extractTargets; 192 | exports.extractMessageMedia = extractMessageMedia; 193 | exports.fetchImage = fetchImage; 194 | exports.readLocalImage = readLocalImage; 195 | exports.splitText = splitText; 196 | exports.getTweetImagesAndAlts = getTweetImagesAndAlts; 197 | -------------------------------------------------------------------------------- /testbed.js: -------------------------------------------------------------------------------- 1 | const twitter = require("twitter-api-client"); 2 | const {getTweet} = require("./src/twtr"); 3 | const {ocrTweetImages, getFillImage} = require("./src/ocr"); 4 | const {describeUrl, describeRaw} = require("./src/describe") 5 | const fs = require("fs"); 6 | 7 | const config = { 8 | myUser: process.env.USER, 9 | writeToken: process.env.API_WRITER_TOKEN, 10 | twitterClientConfig: { 11 | accessToken: process.env.TWITTER_ACCESS_TOKEN, 12 | accessTokenSecret: process.env.TWITTER_ACCESS_TOKEN_SECRET, 13 | apiKey: process.env.TWITTER_CONSUMER_KEY, 14 | apiSecret: process.env.TWITTER_CONSUMER_SECRET, 15 | disableCache: true 16 | }, 17 | twitterToken: { 18 | key: process.env.TWITTER_ACCESS_TOKEN, 19 | secret: process.env.TWITTER_ACCESS_TOKEN_SECRET 20 | }, 21 | azure: { 22 | descriptionKey: process.env.AZURE_KEY, 23 | descriptionEndpoint: process.env.AZURE_DESCRIPTION_ENDPOINT 24 | } 25 | }; 26 | 27 | function generateFillImage(lang) { 28 | const image = getFillImage(lang, 1, 10) 29 | fs.writeFileSync(`${lang}.jpg`, image, 'base64') 30 | } 31 | 32 | const exampleImg = "https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/images/bw_buildings.png" 33 | 34 | async function desc() { 35 | const imageBuf = fs.readFileSync("./img/more-alt-text-1.png") 36 | const img = { 37 | data: imageBuf.toString("base64"), 38 | mimeType: "image/png" 39 | } 40 | 41 | const description = await describeRaw(config.azure.descriptionEndpoint, config.azure.descriptionKey, img) 42 | console.log(description) 43 | } 44 | 45 | desc() --------------------------------------------------------------------------------