├── images
    ├── 1_model.jpg
    ├── 2_main.jpg
    ├── 3_ key_elements.jpg
    ├── 5_scale_condition.jpg
    ├── 6_canny_match_mask.jpg
    └── 4_different_conditions.jpg
├── text_drawing
    ├── from_image.py
    └── raw_text.py
├── README.md
└── prompt_generator.ipynb


/images/1_model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/1_model.jpg


--------------------------------------------------------------------------------
/images/2_main.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/2_main.jpg


--------------------------------------------------------------------------------
/images/3_ key_elements.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/3_ key_elements.jpg


--------------------------------------------------------------------------------
/images/5_scale_condition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/5_scale_condition.jpg


--------------------------------------------------------------------------------
/images/6_canny_match_mask.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/6_canny_match_mask.jpg


--------------------------------------------------------------------------------
/images/4_different_conditions.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ArefMYTB/text_into_image/HEAD/images/4_different_conditions.jpg


--------------------------------------------------------------------------------
/text_drawing/from_image.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | image = cv2.imread('image.jpg')
 5 | mask = cv2.imread('mask.jpg', cv2.IMREAD_GRAYSCALE)
 6 | 
 7 | edges = cv2.Canny(image, 100, 200)
 8 | 
 9 | # Apply the mask to the Canny edges
10 | masked_edges = cv2.bitwise_and(edges, edges, mask=mask)
11 | 
12 | # cv2.imshow('Original Image', image)
13 | # cv2.imshow('Mask', mask)
14 | # cv2.imshow('Canny Edges', edges)
15 | cv2.imshow('Canny Edges', masked_edges)
16 | cv2.imwrite('canny.jpg', masked_edges)
17 | cv2.waitKey(0)
18 | cv2.destroyAllWindows()
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD
 2 | 
 3 | This repository contains the official code for the paper:
 4 | 
 5 | **[NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD](https://openreview.net/pdf?id=qjrvRK24S0)**
 6 | 
 7 | ## Overview
 8 | This paper introduces a novel method to better integrate text into images, significantly improving how text appears on various objects within the generated images.
 9 | 
10 | 
11 | ## Model Architecture
12 | ![](images/1_model.jpg)
13 | 
14 | ## Results
15 | ![](images/2_main.jpg)
16 | 
17 | 
18 | ## Citation
19 | If you find this work helpful in your research, please consider citing our paper:
20 | ```bash
21 | @inproceedings{tabatabaei2024no,
22 |   title={NO MORE BLAH-BLAH: EMBRACING REAL TEXT IN THE IMAGE SYNTHESIS WORLD},
23 |   author={Tabatabaei, Aref and Dehghanian, Zahra and Movaghatian, Negar and Amirmazlaghani, Maryam},
24 |   booktitle={The Second Tiny Papers Track at ICLR 2024}
25 | }
26 | 


--------------------------------------------------------------------------------
/text_drawing/raw_text.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image, ImageDraw, ImageFont
 2 | import numpy as np
 3 | 
 4 | 
 5 | def generate_text_image(text, font_path, font_size, output_path, image_size, text_color=(0, 0, 0)):
 6 |     img = Image.new('RGB', image_size, color='white')
 7 | 
 8 |     font = ImageFont.truetype(font_path, size=font_size)
 9 |   
10 |     font = ImageFont.truetype(font_path, size=font_size)
11 |   
12 |     draw = ImageDraw.Draw(img)
13 |   
14 |     text_width = draw.textlength(text, font=font)
15 |     text_height = font_size
16 | 
17 |     placement = (min_x + ((max_x - min_x) // 2 - text_width // 2), \
18 |         min_y + ((max_y - min_y) // 2 - text_height // 2))
19 | 
20 |     draw.text(placement, text, fill=text_color, font=font)
21 |   
22 |     img.save(output_path)
23 |     img.show()
24 | 
25 | 
26 | font_size = 50
27 | user_text = "fashion"
28 | user_font_path = "C:/Windows/Fonts/Candara.ttf"
29 | input_mask_path = "./mask2.jpg"
30 | output_image_path = "text.jpg"
31 | 
32 | mask = Image.open(input_mask_path).convert("L")
33 | 
34 | mask_array = np.array(mask)
35 | 
36 | white_pixels = np.where(mask_array == 255)
37 | min_x = np.min(white_pixels[1])
38 | max_x = np.max(white_pixels[1])
39 | min_y = np.min(white_pixels[0])
40 | max_y = np.max(white_pixels[0])
41 | bounding_box = (min_x, min_y, max_x, max_y)
42 | 
43 | generate_text_image(user_text, user_font_path, font_size, output_image_path, mask.size)
44 | 


--------------------------------------------------------------------------------
/prompt_generator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {
  6 |         "id": "ycWUftDgaqC_"
  7 |       },
  8 |       "source": [
  9 |         "## OCR"
 10 |       ]
 11 |     },
 12 |     {
 13 |       "cell_type": "markdown",
 14 |       "metadata": {
 15 |         "id": "cRCBmz99jXCX"
 16 |       },
 17 |       "source": [
 18 |         "##### user text :)"
 19 |       ]
 20 |     },
 21 |     {
 22 |       "cell_type": "code",
 23 |       "execution_count": null,
 24 |       "metadata": {
 25 |         "id": "HE1AdtPthAlh"
 26 |       },
 27 |       "outputs": [],
 28 |       "source": [
 29 |         "text = \"bakery\""
 30 |       ]
 31 |     },
 32 |     {
 33 |       "cell_type": "markdown",
 34 |       "metadata": {
 35 |         "id": "l7d6I_SddCdW"
 36 |       },
 37 |       "source": [
 38 |         "##### Easy OCR"
 39 |       ]
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "execution_count": null,
 44 |       "metadata": {
 45 |         "colab": {
 46 |           "base_uri": "https://localhost:8080/"
 47 |         },
 48 |         "id": "KXqx0xNydPRs",
 49 |         "outputId": "b4ba5dae-ac64-43c0-9a31-b65bc3eefa4a"
 50 |       },
 51 |       "outputs": [
 52 |         {
 53 |           "name": "stdout",
 54 |           "output_type": "stream",
 55 |           "text": [
 56 |             "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (4.8.0.76)\n",
 57 |             "Collecting easyocr\n",
 58 |             "  Downloading easyocr-1.7.1-py3-none-any.whl (2.9 MB)\n",
 59 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.9/2.9 MB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 60 |             "\u001b[?25hRequirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from opencv-python) (1.23.5)\n",
 61 |             "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.1.0+cu118)\n",
 62 |             "Requirement already satisfied: torchvision>=0.5 in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.16.0+cu118)\n",
 63 |             "Requirement already satisfied: opencv-python-headless in /usr/local/lib/python3.10/dist-packages (from easyocr) (4.8.1.78)\n",
 64 |             "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from easyocr) (1.11.4)\n",
 65 |             "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from easyocr) (9.4.0)\n",
 66 |             "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from easyocr) (0.19.3)\n",
 67 |             "Collecting python-bidi (from easyocr)\n",
 68 |             "  Downloading python_bidi-0.4.2-py2.py3-none-any.whl (30 kB)\n",
 69 |             "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from easyocr) (6.0.1)\n",
 70 |             "Requirement already satisfied: Shapely in /usr/local/lib/python3.10/dist-packages (from easyocr) (2.0.2)\n",
 71 |             "Collecting pyclipper (from easyocr)\n",
 72 |             "  Downloading pyclipper-1.3.0.post5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (908 kB)\n",
 73 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m908.3/908.3 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 74 |             "\u001b[?25hCollecting ninja (from easyocr)\n",
 75 |             "  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
 76 |             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m29.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 77 |             "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision>=0.5->easyocr) (2.31.0)\n",
 78 |             "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.13.1)\n",
 79 |             "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (4.5.0)\n",
 80 |             "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (1.12)\n",
 81 |             "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.2.1)\n",
 82 |             "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (3.1.2)\n",
 83 |             "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (2023.6.0)\n",
 84 |             "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->easyocr) (2.1.0)\n",
 85 |             "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from python-bidi->easyocr) (1.16.0)\n",
 86 |             "Requirement already satisfied: imageio>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2.31.6)\n",
 87 |             "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (2023.9.26)\n",
 88 |             "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (1.5.0)\n",
 89 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->easyocr) (23.2)\n",
 90 |             "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->easyocr) (2.1.3)\n",
 91 |             "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (3.3.2)\n",
 92 |             "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (3.6)\n",
 93 |             "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (2.0.7)\n",
 94 |             "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision>=0.5->easyocr) (2023.11.17)\n",
 95 |             "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->easyocr) (1.3.0)\n",
 96 |             "Installing collected packages: pyclipper, ninja, python-bidi, easyocr\n",
 97 |             "Successfully installed easyocr-1.7.1 ninja-1.11.1.1 pyclipper-1.3.0.post5 python-bidi-0.4.2\n"
 98 |           ]
 99 |         }
100 |       ],
101 |       "source": [
102 |         "! pip install opencv-python easyocr"
103 |       ]
104 |     },
105 |     {
106 |       "cell_type": "code",
107 |       "execution_count": null,
108 |       "metadata": {
109 |         "colab": {
110 |           "base_uri": "https://localhost:8080/",
111 |           "height": 53
112 |         },
113 |         "id": "tdYC6UIwdYsX",
114 |         "outputId": "bc1f3ec6-d869-4fdb-d04e-1539a51f48ec"
115 |       },
116 |       "outputs": [
117 |         {
118 |           "name": "stderr",
119 |           "output_type": "stream",
120 |           "text": [
121 |             "WARNING:easyocr.easyocr:Using CPU. Note: This module is much faster with a GPU.\n"
122 |           ]
123 |         },
124 |         {
125 |           "data": {
126 |             "application/vnd.google.colaboratory.intrinsic+json": {
127 |               "type": "string"
128 |             },
129 |             "text/plain": [
130 |               "'Mockup STORE SIGN'"
131 |             ]
132 |           },
133 |           "execution_count": 64,
134 |           "metadata": {},
135 |           "output_type": "execute_result"
136 |         }
137 |       ],
138 |       "source": [
139 |         "from easyocr import Reader\n",
140 |         "import cv2\n",
141 |         "\n",
142 |         "image = cv2.imread('store sign.jpg')\n",
143 |         "\n",
144 |         "reader = Reader(['en'], gpu=False)\n",
145 |         "results = reader.readtext(image)\n",
146 |         "\n",
147 |         "text = ' '.join([res[1] for res in results])\n",
148 |         "text"
149 |       ]
150 |     },
151 |     {
152 |       "cell_type": "markdown",
153 |       "metadata": {
154 |         "id": "mox4xA6qfGmN"
155 |       },
156 |       "source": [
157 |         "##### Keras OCR"
158 |       ]
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "execution_count": null,
163 |       "metadata": {
164 |         "colab": {
165 |           "base_uri": "https://localhost:8080/"
166 |         },
167 |         "id": "NE9cjRY-fJ2t",
168 |         "outputId": "db58ddf3-8d04-4dea-b642-d8490acac54f"
169 |       },
170 |       "outputs": [
171 |         {
172 |           "name": "stdout",
173 |           "output_type": "stream",
174 |           "text": [
175 |             "Requirement already satisfied: keras-ocr in /usr/local/lib/python3.10/dist-packages (0.9.3)\n",
176 |             "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.6.2)\n",
177 |             "Requirement already satisfied: efficientnet==1.0.0 in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0.0)\n",
178 |             "Requirement already satisfied: essential_generators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0)\n",
179 |             "Requirement already satisfied: fonttools in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.46.0)\n",
180 |             "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.4.0)\n",
181 |             "Requirement already satisfied: pyclipper in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.3.0.post5)\n",
182 |             "Requirement already satisfied: shapely in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (2.0.2)\n",
183 |             "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.66.1)\n",
184 |             "Requirement already satisfied: validators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.22.0)\n",
185 |             "Requirement already satisfied: keras-applications<=1.0.8,>=1.0.7 in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (1.0.8)\n",
186 |             "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (0.19.3)\n",
187 |             "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.16.0)\n",
188 |             "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.23.5)\n",
189 |             "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.11.4)\n",
190 |             "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (9.4.0)\n",
191 |             "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (3.7.1)\n",
192 |             "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (4.8.0.76)\n",
193 |             "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (2.31.6)\n",
194 |             "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from keras-applications<=1.0.8,>=1.0.7->efficientnet==1.0.0->keras-ocr) (3.9.0)\n",
195 |             "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (3.2.1)\n",
196 |             "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (2023.9.26)\n",
197 |             "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (1.5.0)\n",
198 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (23.2)\n",
199 |             "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.2.0)\n",
200 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (0.12.1)\n",
201 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.4.5)\n",
202 |             "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (3.1.1)\n",
203 |             "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (2.8.2)\n",
204 |             "Collecting keras-ocr\n",
205 |             "  Cloning https://github.com/faustomorales/keras-ocr.git to /tmp/pip-install-rh9tim45/keras-ocr_84b266c0f309432a9c4a81841d813098\n",
206 |             "  Running command git clone --filter=blob:none --quiet https://github.com/faustomorales/keras-ocr.git /tmp/pip-install-rh9tim45/keras-ocr_84b266c0f309432a9c4a81841d813098\n",
207 |             "  Resolved https://github.com/faustomorales/keras-ocr.git to commit e8d34a46f07d50158e1d86d6c617e99bfe99e2f8\n",
208 |             "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
209 |             "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
210 |             "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
211 |             "Requirement already satisfied: editdistance in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.6.2)\n",
212 |             "Requirement already satisfied: efficientnet==1.0.0 in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0.0)\n",
213 |             "Requirement already satisfied: essential_generators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.0)\n",
214 |             "Requirement already satisfied: fonttools in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.46.0)\n",
215 |             "Requirement already satisfied: imgaug in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.4.0)\n",
216 |             "Requirement already satisfied: pyclipper in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (1.3.0.post5)\n",
217 |             "Requirement already satisfied: shapely in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (2.0.2)\n",
218 |             "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (4.66.1)\n",
219 |             "Requirement already satisfied: validators in /usr/local/lib/python3.10/dist-packages (from keras-ocr) (0.22.0)\n",
220 |             "Requirement already satisfied: keras-applications<=1.0.8,>=1.0.7 in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (1.0.8)\n",
221 |             "Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from efficientnet==1.0.0->keras-ocr) (0.19.3)\n",
222 |             "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.16.0)\n",
223 |             "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.23.5)\n",
224 |             "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (1.11.4)\n",
225 |             "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (9.4.0)\n",
226 |             "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (3.7.1)\n",
227 |             "Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (4.8.0.76)\n",
228 |             "Requirement already satisfied: imageio in /usr/local/lib/python3.10/dist-packages (from imgaug->keras-ocr) (2.31.6)\n",
229 |             "Requirement already satisfied: h5py in /usr/local/lib/python3.10/dist-packages (from keras-applications<=1.0.8,>=1.0.7->efficientnet==1.0.0->keras-ocr) (3.9.0)\n",
230 |             "Requirement already satisfied: networkx>=2.2 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (3.2.1)\n",
231 |             "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (2023.9.26)\n",
232 |             "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (1.5.0)\n",
233 |             "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from scikit-image->efficientnet==1.0.0->keras-ocr) (23.2)\n",
234 |             "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.2.0)\n",
235 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (0.12.1)\n",
236 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (1.4.5)\n",
237 |             "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (3.1.1)\n",
238 |             "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->imgaug->keras-ocr) (2.8.2)\n",
239 |             "Building wheels for collected packages: keras-ocr\n",
240 |             "  Building wheel for keras-ocr (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
241 |             "  Created wheel for keras-ocr: filename=keras_ocr-0.0.0-py3-none-any.whl size=42310 sha256=f78512b0926069c585989353da3cb181a69ca7da8dbf7f3b81714bca1c056cc0\n",
242 |             "  Stored in directory: /tmp/pip-ephem-wheel-cache-oo3l6ubj/wheels/ee/e8/3a/3915fd372ea68434aa50a06b4b9633c1446cc8b83b5d6975db\n",
243 |             "Successfully built keras-ocr\n",
244 |             "Installing collected packages: keras-ocr\n",
245 |             "  Attempting uninstall: keras-ocr\n",
246 |             "    Found existing installation: keras-ocr 0.9.3\n",
247 |             "    Uninstalling keras-ocr-0.9.3:\n",
248 |             "      Successfully uninstalled keras-ocr-0.9.3\n",
249 |             "Successfully installed keras-ocr-0.0.0\n"
250 |           ]
251 |         }
252 |       ],
253 |       "source": [
254 |         "! pip install keras-ocr\n",
255 |         "! pip install git+https://github.com/faustomorales/keras-ocr.git#egg=keras-ocr"
256 |       ]
257 |     },
258 |     {
259 |       "cell_type": "code",
260 |       "execution_count": null,
261 |       "metadata": {
262 |         "colab": {
263 |           "base_uri": "https://localhost:8080/",
264 |           "height": 105
265 |         },
266 |         "id": "ZyhIgw5_fjoR",
267 |         "outputId": "01ae8938-069c-404f-a0e9-d0bf0fd0730f"
268 |       },
269 |       "outputs": [
270 |         {
271 |           "name": "stdout",
272 |           "output_type": "stream",
273 |           "text": [
274 |             "Looking for /root/.keras-ocr/craft_mlt_25k.h5\n",
275 |             "Looking for /root/.keras-ocr/crnn_kurapan.h5\n",
276 |             "1/1 [==============================] - 21s 21s/step\n",
277 |             "1/1 [==============================] - 2s 2s/step\n"
278 |           ]
279 |         },
280 |         {
281 |           "data": {
282 |             "application/vnd.google.colaboratory.intrinsic+json": {
283 |               "type": "string"
284 |             },
285 |             "text/plain": [
286 |               "'mockup store sign'"
287 |             ]
288 |           },
289 |           "execution_count": 62,
290 |           "metadata": {},
291 |           "output_type": "execute_result"
292 |         }
293 |       ],
294 |       "source": [
295 |         "import keras_ocr\n",
296 |         "import matplotlib.pyplot as plt\n",
297 |         "\n",
298 |         "pipeline = keras_ocr.pipeline.Pipeline()\n",
299 |         "\n",
300 |         "# Read images from folder path to image object\n",
301 |         "images = [\n",
302 |         "    keras_ocr.tools.read('store sign.jpg')\n",
303 |         "]\n",
304 |         "\n",
305 |         "prediction_groups = pipeline.recognize(images)\n",
306 |         "\n",
307 |         "text = ' '.join([res[0] for res in prediction_groups[0]])\n",
308 |         "text"
309 |       ]
310 |     },
311 |     {
312 |       "cell_type": "markdown",
313 |       "metadata": {
314 |         "id": "7Fdc86-Xarzr"
315 |       },
316 |       "source": [
317 |         "## BLIP"
318 |       ]
319 |     },
320 |     {
321 |       "cell_type": "code",
322 |       "execution_count": null,
323 |       "metadata": {
324 |         "colab": {
325 |           "base_uri": "https://localhost:8080/"
326 |         },
327 |         "id": "WeKkQGHakdC8",
328 |         "outputId": "a51b9b98-0d64-4bab-8e9d-402378728d31"
329 |       },
330 |       "outputs": [
331 |         {
332 |           "name": "stdout",
333 |           "output_type": "stream",
334 |           "text": [
335 |             "Requirement already satisfied: replicate in /usr/local/lib/python3.10/dist-packages (0.21.1)\n",
336 |             "Requirement already satisfied: httpx<1,>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from replicate) (0.25.2)\n",
337 |             "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from replicate) (23.2)\n",
338 |             "Requirement already satisfied: pydantic>1 in /usr/local/lib/python3.10/dist-packages (from replicate) (1.10.13)\n",
339 |             "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from replicate) (4.5.0)\n",
340 |             "Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (3.7.1)\n",
341 |             "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (2023.11.17)\n",
342 |             "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (1.0.2)\n",
343 |             "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (3.6)\n",
344 |             "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.21.0->replicate) (1.3.0)\n",
345 |             "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.21.0->replicate) (0.14.0)\n",
346 |             "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx<1,>=0.21.0->replicate) (1.2.0)\n"
347 |           ]
348 |         }
349 |       ],
350 |       "source": [
351 |         "! pip install replicate"
352 |       ]
353 |     },
354 |     {
355 |       "cell_type": "code",
356 |       "execution_count": null,
357 |       "metadata": {
358 |         "id": "wGJgTB1SFdX-"
359 |       },
360 |       "outputs": [],
361 |       "source": [
362 |         "import replicate\n",
363 |         "import os\n",
364 |         "\n",
365 |         "os.environ[\"REPLICATE_API_TOKEN\"] = \"r8_TpwxvWqeqdwL8BTbKj8YamBWtY45p0L36dI8e\"\n",
366 |         "\n",
367 |         "replicate = replicate.Client(api_token='')"
368 |       ]
369 |     },
370 |     {
371 |       "cell_type": "markdown",
372 |       "metadata": {
373 |         "id": "OEnhQlhAFiwr"
374 |       },
375 |       "source": [
376 |         "##### text extraction using BLIP"
377 |       ]
378 |     },
379 |     {
380 |       "cell_type": "code",
381 |       "execution_count": null,
382 |       "metadata": {
383 |         "colab": {
384 |           "background_save": true
385 |         },
386 |         "id": "i3Zs65tdFmo9",
387 |         "outputId": "1dd96f56-d45b-4feb-b483-23f6c7e1c5e1"
388 |       },
389 |       "outputs": [
390 |         {
391 |           "data": {
392 |             "application/vnd.google.colaboratory.intrinsic+json": {
393 |               "type": "string"
394 |             },
395 |             "text/plain": [
396 |               "'candy shoppe'"
397 |             ]
398 |           },
399 |           "execution_count": 22,
400 |           "metadata": {},
401 |           "output_type": "execute_result"
402 |         }
403 |       ],
404 |       "source": [
405 |         "image = open(\"/content/text.jpg\", \"rb\")\n",
406 |         "\n",
407 |         "# identify the object\n",
408 |         "output = replicate.run(\n",
409 |         "  \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n",
410 |         "  input={\n",
411 |         "    \"image\": image,\n",
412 |         "    \"task\": \"visual_question_answering\",\n",
413 |         "    \"question\": \"what is in this picture?\"\n",
414 |         "  }\n",
415 |         ")\n",
416 |         "text = ' '.join(output.split(' ')[1:])\n",
417 |         "text"
418 |       ]
419 |     },
420 |     {
421 |       "cell_type": "markdown",
422 |       "metadata": {
423 |         "id": "uRbaiGmZFw6u"
424 |       },
425 |       "source": [
426 |         "##### caption generation using BLIP"
427 |       ]
428 |     },
429 |     {
430 |       "cell_type": "code",
431 |       "execution_count": null,
432 |       "metadata": {
433 |         "colab": {
434 |           "base_uri": "https://localhost:8080/",
435 |           "height": 35
436 |         },
437 |         "id": "CgeDXv7lVW5T",
438 |         "outputId": "4d91cfca-0371-4b67-b38e-9ad28c094458"
439 |       },
440 |       "outputs": [
441 |         {
442 |           "data": {
443 |             "application/vnd.google.colaboratory.intrinsic+json": {
444 |               "type": "string"
445 |             },
446 |             "text/plain": [
447 |               "'the word (bakery) written on a (black) (sign)'"
448 |             ]
449 |           },
450 |           "execution_count": 21,
451 |           "metadata": {},
452 |           "output_type": "execute_result"
453 |         }
454 |       ],
455 |       "source": [
456 |         "image = open(\"/content/store sign - Copy.jpg\", \"rb\")\n",
457 |         "\n",
458 |         "# identify the object\n",
459 |         "output = replicate.run(\n",
460 |         "  \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n",
461 |         "  input={\n",
462 |         "    \"image\": image,\n",
463 |         "    \"task\": \"visual_question_answering\",\n",
464 |         "    \"question\": \"what is the main object in this picture?\"\n",
465 |         "  }\n",
466 |         ")\n",
467 |         "obj = ' '.join(output.split(' ')[1:])\n",
468 |         "\n",
469 |         "# identify the color of the object\n",
470 |         "output = replicate.run(\n",
471 |         "  \"salesforce/blip:2e1dddc8621f72155f24cf2e0adbde548458d3cab9f00c0139eea840d0ac4746\",\n",
472 |         "  input={\n",
473 |         "    \"image\": image,\n",
474 |         "    \"task\": \"visual_question_answering\",\n",
475 |         "    \"question\": f\"what is the color of the {obj}?\"\n",
476 |         "  }\n",
477 |         ")\n",
478 |         "color = ' '.join(output.split(' ')[1:])\n",
479 |         "\n",
480 |         "prompt = f\"the word ({text}) written on a ({color}) ({obj})\"\n",
481 |         "prompt"
482 |       ]
483 |     }
484 |   ],
485 |   "metadata": {
486 |     "colab": {
487 |       "provenance": []
488 |     },
489 |     "kernelspec": {
490 |       "display_name": "Python 3",
491 |       "name": "python3"
492 |     },
493 |     "language_info": {
494 |       "name": "python"
495 |     }
496 |   },
497 |   "nbformat": 4,
498 |   "nbformat_minor": 0
499 | }


--------------------------------------------------------------------------------