├── images ├── 1_model.jpg ├── 2_main.jpg ├── 3_ key_elements.jpg ├── 5_scale_condition.jpg ├── 6_canny_match_mask.jpg └── 4_different_conditions.jpg ├── text_drawing ├── from_image.py └── raw_text.py ├── README.md └── prompt_generator.ipynb /images/1_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/1_model.jpg -------------------------------------------------------------------------------- /images/2_main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/2_main.jpg -------------------------------------------------------------------------------- /images/3_ key_elements.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/3_ key_elements.jpg -------------------------------------------------------------------------------- /images/5_scale_condition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/5_scale_condition.jpg -------------------------------------------------------------------------------- /images/6_canny_match_mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/6_canny_match_mask.jpg -------------------------------------------------------------------------------- /images/4_different_conditions.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/4_different_conditions.jpg -------------------------------------------------------------------------------- /text_drawing/from_image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | image = cv2.imread('image.jpg') 5 | mask = cv2.imread('mask.jpg', cv2.IMREAD_GRAYSCALE) 6 | 7 | edges = cv2.Canny(image, 100, 200) 8 | 9 | # Apply the mask to the Canny edges 10 | masked_edges = cv2.bitwise_and(edges, edges, mask=mask) 11 | 12 | # cv2.imshow('Original Image', image) 13 | # cv2.imshow('Mask', mask) 14 | # cv2.imshow('Canny Edges', edges) 15 | cv2.imshow('Canny Edges', masked_edges) 16 | cv2.imwrite('canny.jpg', masked_edges) 17 | cv2.waitKey(0) 18 | cv2.destroyAllWindows() 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD 2 | 3 | This repository contains the official code for the paper: 4 | 5 | **[NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD](https://openreview.net/pdf?id=qjrvRK24S0)** 6 | 7 | ## Overview 8 | This paper introduces a novel method to better integrate text into images, significantly improving how text appears on various objects within the generated images. 9 | 10 | 11 | ## Model Architecture 12 | ![](images/1_model.jpg) 13 | 14 | ## Results 15 | ![](images/2_main.jpg) 16 | 17 | 18 | ## Citation 19 | If you find this work helpful in your research, please consider citing our paper: 20 | ```bash 21 | @inproceedings{tabatabaei2024no, 22 | title={NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD}, 23 | author={Tabatabaei, Aref and Dehghanian, Zahra and Movaghatian, Negar and Amirmazlaghani, Maryam}, 24 | booktitle={The Second Tiny Papers Track at ICLR 2024} 25 | } 26 | -------------------------------------------------------------------------------- /text_drawing/raw_text.py: -------------------------------------------------------------------------------- 1 | from PIL import Image, ImageDraw, ImageFont 2 | import numpy as np 3 | 4 | 5 | def generate_text_image(text, font_path, font_size, output_path, image_size, text_color=(0, 0, 0)): 6 | img = Image.new('RGB', image_size, color='white') 7 | 8 | font = ImageFont.truetype(font_path, size=font_size) 9 | 10 | font = ImageFont.truetype(font_path, size=font_size) 11 | 12 | draw = ImageDraw.Draw(img) 13 | 14 | text_width = draw.textlength(text, font=font) 15 | text_height = font_size 16 | 17 | placement = (min_x + ((max_x - min_x) // 2 - text_width // 2), \ 18 | min_y + ((max_y - min_y) // 2 - text_height // 2)) 19 | 20 | draw.text(placement, text, fill=text_color, font=font) 21 | 22 | img.save(output_path) 23 | img.show() 24 | 25 | 26 | font_size = 50 27 | user_text = "fashion" 28 | user_font_path = "C:/Windows/Fonts/Candara.ttf" 29 | input_mask_path = "./mask2.jpg" 30 | output_image_path = "text.jpg" 31 | 32 | mask = Image.open(input_mask_path).convert("L") 33 | 34 | mask_array = np.array(mask) 35 | 36 | white_pixels = np.where(mask_array == 255) 37 | min_x = np.min(white_pixels[1]) 38 | max_x = np.max(white_pixels[1]) 39 | min_y = np.min(white_pixels[0]) 40 | max_y = np.max(white_pixels[0]) 41 | bounding_box = (min_x, min_y, max_x, max_y) 42 | 43 | generate_text_image(user_text, user_font_path, font_size, output_image_path, mask.size) 44 | -------------------------------------------------------------------------------- /prompt_generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "ycWUftDgaqC_" 7 | }, 8 | "source": [ 9 | "## OCR" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "cRCBmz99jXCX" 16 | }, 17 | "source": [ 18 | "##### user text :)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "id": "HE1AdtPthAlh" 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "text = \"bakery\"" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "l7d6I_SddCdW" 36 | }, 37 | "source": [ 38 | "##### Easy OCR" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "colab": { 46 | "base_uri": "https://localhost:8080/" 47 | }, 48 | "id": "KXqx0xNydPRs", 49 | "outputId": "b4ba5dae-ac64-43c0-9a31-b65bc3eefa4a" 50 | }, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (4.8.0.76)\n", 57 | "Collecting easyocr\n", 58 | " Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)\n", 59 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.9/2.9 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 60 | "\u001b[?25hRequirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from opencv-python) (1.23.5)\n", 61 | "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.1.0+cu118)\n", 62 | "Requirement already satisfied: torchvision>=0.5 in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.16.0+cu118)\n", 63 | "Requirement already satisfied: opencv-python-headless in /usr/local/lib/python3.10/dist-packages (from easyocr) (4.8.1.78)\n", 64 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from easyocr) (1.11.4)\n", 65 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from easyocr) (9.4.0)\n", 66 | "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.19.3)\n", 67 | "Collecting python-bidi (from easyocr)\n", 68 | " Downloading python_bidi-0.4.2-py2.py3-none-any.whl (30 kB)\n", 69 | "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from easyocr) (6.0.1)\n", 70 | "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.0.2)\n", 71 | "Collecting pyclipper (from easyocr)\n", 72 | " Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (908 kB)\n", 73 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m908.3/908.3 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 74 | "\u001b[?25hCollecting ninja (from easyocr)\n", 75 | " Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n", 76 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 77 | "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision>=0.5->easyocr) (2.31.0)\n", 78 | "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.13.1)\n", 79 | "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (4.5.0)\n", 80 | "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (1.12)\n", 81 | "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.2.1)\n", 82 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.1.2)\n", 83 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (2023.6.0)\n", 84 | "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (2.1.0)\n", 85 | "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from python-bidi->easyocr) (1.16.0)\n", 86 | "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2.31.6)\n", 87 | "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2023.9.26)\n", 88 | "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (1.5.0)\n", 89 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (23.2)\n", 90 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->easyocr) (2.1.3)\n", 91 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (3.3.2)\n", 92 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (3.6)\n", 93 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (2.0.7)\n", 94 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (2023.11.17)\n", 95 | "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->easyocr) (1.3.0)\n", 96 | "Installing collected packages: pyclipper, ninja, python-bidi, easyocr\n", 97 | "Successfully installed easyocr-1.7.1 ninja-1.11.1.1 pyclipper-1.3.0.post5 python-bidi-0.4.2\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "! pip install opencv-python easyocr" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "colab": { 110 | "base_uri": "https://localhost:8080/", 111 | "height": 53 112 | }, 113 | "id": "tdYC6UIwdYsX", 114 | "outputId": "bc1f3ec6-d869-4fdb-d04e-1539a51f48ec" 115 | }, 116 | "outputs": [ 117 | { 118 | "name": "stderr", 119 | "output_type": "stream", 120 | "text": [ 121 | "WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.\n" 122 | ] 123 | }, 124 | { 125 | "data": { 126 | "application/vnd.google.colaboratory.intrinsic+json": { 127 | "type": "string" 128 | }, 129 | "text/plain": [ 130 | "'Mockup STORE SIGN'" 131 | ] 132 | }, 133 | "execution_count": 64, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "from easyocr import Reader\n", 140 | "import cv2\n", 141 | "\n", 142 | "image = cv2.imread('store sign.jpg')\n", 143 | "\n", 144 | "reader = Reader(['en'], gpu=False)\n", 145 | "results = reader.readtext(image)\n", 146 | "\n", 147 | "text = ' '.join([res[1] for res in results])\n", 148 | "text" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "id": "mox4xA6qfGmN" 155 | }, 156 | "source": [ 157 | "##### Keras OCR" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "colab": { 165 | "base_uri": "https://localhost:8080/" 166 | }, 167 | "id": "NE9cjRY-fJ2t", 168 | "outputId": "db58ddf3-8d04-4dea-b642-d8490acac54f" 169 | }, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "Requirement already satisfied: keras-ocr in /usr/local/lib/python3.10/dist-packages (0.9.3)\n", 176 | "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.6.2)\n", 177 | "Requirement already satisfied: efficientnet==1.0.0 in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0.0)\n", 178 | "Requirement already satisfied: essential_generators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0)\n", 179 | "Requirement already satisfied: fonttools in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.46.0)\n", 180 | "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.4.0)\n", 181 | "Requirement already satisfied: pyclipper in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.3.0.post5)\n", 182 | "Requirement already satisfied: shapely in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (2.0.2)\n", 183 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.66.1)\n", 184 | "Requirement already satisfied: validators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.22.0)\n", 185 | "Requirement already satisfied: keras-applications<=1.0.8,>=1.0.7 in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (1.0.8)\n", 186 | "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (0.19.3)\n", 187 | "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.16.0)\n", 188 | "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.23.5)\n", 189 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.11.4)\n", 190 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (9.4.0)\n", 191 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (3.7.1)\n", 192 | "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (4.8.0.76)\n", 193 | "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (2.31.6)\n", 194 | "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from keras-applications<=1.0.8,>=1.0.7->efficientnet==1.0.0->keras-ocr) (3.9.0)\n", 195 | "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (3.2.1)\n", 196 | "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (2023.9.26)\n", 197 | "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (1.5.0)\n", 198 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (23.2)\n", 199 | "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.2.0)\n", 200 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (0.12.1)\n", 201 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.4.5)\n", 202 | "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (3.1.1)\n", 203 | "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (2.8.2)\n", 204 | "Collecting keras-ocr\n", 205 | " Cloning https://github.com/faustomorales/keras-ocr.git to /tmp/pip-install-rh9tim45/keras-ocr_84b266c0f309432a9c4a81841d813098\n", 206 | " Running command git clone --filter=blob:none --quiet https://github.com/faustomorales/keras-ocr.git /tmp/pip-install-rh9tim45/keras-ocr_84b266c0f309432a9c4a81841d813098\n", 207 | " Resolved https://github.com/faustomorales/keras-ocr.git to commit e8d34a46f07d50158e1d86d6c617e99bfe99e2f8\n", 208 | " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", 209 | " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", 210 | " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 211 | "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.6.2)\n", 212 | "Requirement already satisfied: efficientnet==1.0.0 in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0.0)\n", 213 | "Requirement already satisfied: essential_generators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0)\n", 214 | "Requirement already satisfied: fonttools in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.46.0)\n", 215 | "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.4.0)\n", 216 | "Requirement already satisfied: pyclipper in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.3.0.post5)\n", 217 | "Requirement already satisfied: shapely in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (2.0.2)\n", 218 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.66.1)\n", 219 | "Requirement already satisfied: validators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.22.0)\n", 220 | "Requirement already satisfied: keras-applications<=1.0.8,>=1.0.7 in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (1.0.8)\n", 221 | "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (0.19.3)\n", 222 | "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.16.0)\n", 223 | "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.23.5)\n", 224 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.11.4)\n", 225 | "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (9.4.0)\n", 226 | "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (3.7.1)\n", 227 | "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (4.8.0.76)\n", 228 | "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (2.31.6)\n", 229 | "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from keras-applications<=1.0.8,>=1.0.7->efficientnet==1.0.0->keras-ocr) (3.9.0)\n", 230 | "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (3.2.1)\n", 231 | "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (2023.9.26)\n", 232 | "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (1.5.0)\n", 233 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (23.2)\n", 234 | "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.2.0)\n", 235 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (0.12.1)\n", 236 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.4.5)\n", 237 | "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (3.1.1)\n", 238 | "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (2.8.2)\n", 239 | "Building wheels for collected packages: keras-ocr\n", 240 | " Building wheel for keras-ocr (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", 241 | " Created wheel for keras-ocr: filename=keras_ocr-0.0.0-py3-none-any.whl size=42310 sha256=f78512b0926069c585989353da3cb181a69ca7da8dbf7f3b81714bca1c056cc0\n", 242 | " Stored in directory: /tmp/pip-ephem-wheel-cache-oo3l6ubj/wheels/ee/e8/3a/3915fd372ea68434aa50a06b4b9633c1446cc8b83b5d6975db\n", 243 | "Successfully built keras-ocr\n", 244 | "Installing collected packages: keras-ocr\n", 245 | " Attempting uninstall: keras-ocr\n", 246 | " Found existing installation: keras-ocr 0.9.3\n", 247 | " Uninstalling keras-ocr-0.9.3:\n", 248 | " Successfully uninstalled keras-ocr-0.9.3\n", 249 | "Successfully installed keras-ocr-0.0.0\n" 250 | ] 251 | } 252 | ], 253 | "source": [ 254 | "! pip install keras-ocr\n", 255 | "! pip install git+https://github.com/faustomorales/keras-ocr.git#egg=keras-ocr" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "colab": { 263 | "base_uri": "https://localhost:8080/", 264 | "height": 105 265 | }, 266 | "id": "ZyhIgw5_fjoR", 267 | "outputId": "01ae8938-069c-404f-a0e9-d0bf0fd0730f" 268 | }, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "Looking for /root/.keras-ocr/craft_mlt_25k.h5\n", 275 | "Looking for /root/.keras-ocr/crnn_kurapan.h5\n", 276 | "1/1 [==============================] - 21s 21s/step\n", 277 | "1/1 [==============================] - 2s 2s/step\n" 278 | ] 279 | }, 280 | { 281 | "data": { 282 | "application/vnd.google.colaboratory.intrinsic+json": { 283 | "type": "string" 284 | }, 285 | "text/plain": [ 286 | "'mockup store sign'" 287 | ] 288 | }, 289 | "execution_count": 62, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "import keras_ocr\n", 296 | "import matplotlib.pyplot as plt\n", 297 | "\n", 298 | "pipeline = keras_ocr.pipeline.Pipeline()\n", 299 | "\n", 300 | "# Read images from folder path to image object\n", 301 | "images = [\n", 302 | " keras_ocr.tools.read('store sign.jpg')\n", 303 | "]\n", 304 | "\n", 305 | "prediction_groups = pipeline.recognize(images)\n", 306 | "\n", 307 | "text = ' '.join([res[0] for res in prediction_groups[0]])\n", 308 | "text" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": { 314 | "id": "7Fdc86-Xarzr" 315 | }, 316 | "source": [ 317 | "## BLIP" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "colab": { 325 | "base_uri": "https://localhost:8080/" 326 | }, 327 | "id": "WeKkQGHakdC8", 328 | "outputId": "a51b9b98-0d64-4bab-8e9d-402378728d31" 329 | }, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "Requirement already satisfied: replicate in /usr/local/lib/python3.10/dist-packages (0.21.1)\n", 336 | "Requirement already satisfied: httpx<1,>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from replicate) (0.25.2)\n", 337 | "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from replicate) (23.2)\n", 338 | "Requirement already satisfied: pydantic>1 in /usr/local/lib/python3.10/dist-packages (from replicate) (1.10.13)\n", 339 | "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from replicate) (4.5.0)\n", 340 | "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (3.7.1)\n", 341 | "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (2023.11.17)\n", 342 | "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (1.0.2)\n", 343 | "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (3.6)\n", 344 | "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (1.3.0)\n", 345 | "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.21.0->replicate) (0.14.0)\n", 346 | "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.21.0->replicate) (1.2.0)\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "! pip install replicate" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "id": "wGJgTB1SFdX-" 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "import replicate\n", 363 | "import os\n", 364 | "\n", 365 | "os.environ[\"REPLICATE_API_TOKEN\"] = \"r8_TpwxvWqeqdwL8BTbKj8YamBWtY45p0L36dI8e\"\n", 366 | "\n", 367 | "replicate = replicate.Client(api_token='')" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "id": "OEnhQlhAFiwr" 374 | }, 375 | "source": [ 376 | "##### text extraction using BLIP" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "colab": { 384 | "background_save": true 385 | }, 386 | "id": "i3Zs65tdFmo9", 387 | "outputId": "1dd96f56-d45b-4feb-b483-23f6c7e1c5e1" 388 | }, 389 | "outputs": [ 390 | { 391 | "data": { 392 | "application/vnd.google.colaboratory.intrinsic+json": { 393 | "type": "string" 394 | }, 395 | "text/plain": [ 396 | "'candy shoppe'" 397 | ] 398 | }, 399 | "execution_count": 22, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "image = open(\"/content/text.jpg\", \"rb\")\n", 406 | "\n", 407 | "# identify the object\n", 408 | "output = replicate.run(\n", 409 | " \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n", 410 | " input={\n", 411 | " \"image\": image,\n", 412 | " \"task\": \"visual_question_answering\",\n", 413 | " \"question\": \"what is in this picture?\"\n", 414 | " }\n", 415 | ")\n", 416 | "text = ' '.join(output.split(' ')[1:])\n", 417 | "text" 418 | ] 419 | }, 420 | { 421 | "cell_type": "markdown", 422 | "metadata": { 423 | "id": "uRbaiGmZFw6u" 424 | }, 425 | "source": [ 426 | "##### caption generation using BLIP" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": { 433 | "colab": { 434 | "base_uri": "https://localhost:8080/", 435 | "height": 35 436 | }, 437 | "id": "CgeDXv7lVW5T", 438 | "outputId": "4d91cfca-0371-4b67-b38e-9ad28c094458" 439 | }, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "application/vnd.google.colaboratory.intrinsic+json": { 444 | "type": "string" 445 | }, 446 | "text/plain": [ 447 | "'the word (bakery) written on a (black) (sign)'" 448 | ] 449 | }, 450 | "execution_count": 21, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [ 456 | "image = open(\"/content/store sign - Copy.jpg\", \"rb\")\n", 457 | "\n", 458 | "# identify the object\n", 459 | "output = replicate.run(\n", 460 | " \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n", 461 | " input={\n", 462 | " \"image\": image,\n", 463 | " \"task\": \"visual_question_answering\",\n", 464 | " \"question\": \"what is the main object in this picture?\"\n", 465 | " }\n", 466 | ")\n", 467 | "obj = ' '.join(output.split(' ')[1:])\n", 468 | "\n", 469 | "# identify the color of the object\n", 470 | "output = replicate.run(\n", 471 | " \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n", 472 | " input={\n", 473 | " \"image\": image,\n", 474 | " \"task\": \"visual_question_answering\",\n", 475 | " \"question\": f\"what is the color of the {obj}?\"\n", 476 | " }\n", 477 | ")\n", 478 | "color = ' '.join(output.split(' ')[1:])\n", 479 | "\n", 480 | "prompt = f\"the word ({text}) written on a ({color}) ({obj})\"\n", 481 | "prompt" 482 | ] 483 | } 484 | ], 485 | "metadata": { 486 | "colab": { 487 | "provenance": [] 488 | }, 489 | "kernelspec": { 490 | "display_name": "Python 3", 491 | "name": "python3" 492 | }, 493 | "language_info": { 494 | "name": "python" 495 | } 496 | }, 497 | "nbformat": 4, 498 | "nbformat_minor": 0 499 | } --------------------------------------------------------------------------------