├── .gitignore
├── README.md
├── commands.txt
├── flaskserver.py
├── helpers.py
├── preprocess.py
├── tag_extraction.py
└── text_extraction.pyc
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # IMAGE-BOT
2 | chatbot for interaction with the text content of images like QA, text detecting and extraction this project presents an approach of converting digital images into a personal knowledge base that enables users to make more benefits of their digital images and find an answer for any relevant questions of its contained information. It also enables the user to extract the text from a photo of printed paper and search his photos by any text tag that exists in it.
3 |
4 |
5 | ## Set up
6 | ```bash
7 | $ git clone https://github.com/Amgad-Abdelkhaleq/IMAGE-BOT
8 | $ bash commands.txt
9 | $ cd express/
10 | $ npm install
11 | ```
12 | ## Run
13 | ```bash
14 | $ python3 flaskserver.py
15 | $ node express/express.js
16 | ```
17 |
18 |
19 | ## System features
20 | ### Optical character recognition
21 | 
22 |
23 |
24 |
25 |
26 |
27 |
28 | ### Question Answering
29 | 
30 |
31 |
32 |
33 |
34 | ### Searching images with text tags
35 | 
36 |
37 | ## Demo
38 |
39 |
40 | ## References
41 | https://github.com/notAI-tech/keras-craft
42 | https://github.com/winkjs/wink-bm25-text-search
43 |
44 |
--------------------------------------------------------------------------------
/commands.txt:
--------------------------------------------------------------------------------
1 | sudo apt update
2 | pip3 install flask
3 | pip3 install requests
4 | pip3 install opencv-python
5 | pip3 install -U numpy
6 | pip3 install pytesseract
7 | sudo apt install tesseract-ocr
8 | sudo apt install libtesseract-dev
9 | pip install git+https://github.com/notAI-tech/keras-craft
10 |
--------------------------------------------------------------------------------
/flaskserver.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from flask import request
3 | from flask import jsonify
4 | from flask import render_template
5 | import requests
6 | import json
7 | from helpers import *
8 | from preprocess import *
9 | from tag_extraction import *
10 |
11 |
12 | #intialize flask app
13 | app = Flask(__name__)
14 | #set config for upload folder and allowed image extentions
15 | app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(),"static/images")
16 | app.config['ALLOWED_EXTENSIONS'] = set(['png', 'jpg', 'jpeg'])
17 | def allowed_file(filename):
18 | return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
19 |
20 | #home route to ender index html file
21 | @app.route("/")
22 | def home():
23 | return render_template("index.html")
24 |
25 | #chat UI route for user questions
26 | @app.route("/chat",methods=['GET'])
27 | def get_bot_response():
28 | #classify user messege wether it is an extract feature or tag feature or help or just a question
29 | userText = request.args.get('msg')
30 | if("extract:" in userText):
31 | #if extract take image name then OCR it and return its text
32 | try:
33 | image_str=userText[userText.find(":")+1:]
34 | print(image_str)
35 | path=os.getcwd()+"/static/images/text-based/"+image_str
36 | image = cv2.imread(str(path))
37 | text = extract_text(image)
38 | print(text)
39 | return {"Answer":text,"image_name":image_str,"type":"extract"}
40 | except:
41 | return {"Answer":"image not found","type":"error"}
42 |
43 | elif("tag:" in userText):
44 | #if tag searched in tags knowledge base then return image name to front end
45 | tag=userText[userText.find(":")+1:]
46 | print("tag entered: ",tag)
47 | search_result=find_tag(tag)
48 | if search_result== "not found":
49 | result={"Answer":"not found","image_name":search_result ,"type":"tag"}
50 | else:
51 | result={"Answer": "found" ,"image_name":search_result["image"] ,"type":"tag"}
52 | return result
53 |
54 | elif(userText=="help"):
55 | help_message="you can ask me any question related to text content in your image and I'll help you find the answer and its image, you also can use extract feature by typing 'extract:' followed by image name to extract text content in the image and 'tag:' followed by a keyword to find any natural image with this tag"
56 | return {"Answer":help_message ,"type":"help"}
57 |
58 |
59 | else:
60 | print("flask will send this:",userText)
61 | newdata = {"question": userText} # this is the question we are going to send to the Node server to get it answer
62 | # now immediately sending a post request with user question then return it answer to front end
63 | try:
64 | post = requests.post('http://localhost:7000/postdata', json=newdata) # the POST request
65 | print("flask recived this :",post.text)
66 | result=json.loads(post.text)
67 | return result
68 | except:
69 | print("i failed connecting node")
70 |
71 |
72 |
73 | #upload route for images
74 | @app.route("/upload",methods=['POST'])
75 | def uploader():
76 | if request.method == 'POST':
77 | #store images files
78 | uploaded_files =request.files.getlist("file[]")
79 | for file in uploaded_files:
80 | #check image extention if it is allowed
81 | if file and allowed_file(file.filename):
82 | filename = file.filename
83 | #decode image string
84 | img = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
85 | #extract image text with ocr to determine if it text-based image or scene (natural image)
86 | text= extract_text(img,custom_config = r'-l eng -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz --oem 1')
87 | page= extract_text(img)
88 | print(filename,": ",len(text),"p:",len(page))
89 | #will consider any image with text-length below 200 char to be a scene
90 | if(len(text)<200):
91 | #if it is a scence image save to it certain folder and add its tage to tag knowledge base
92 | cv2.imwrite(os.path.join(app.config['UPLOAD_FOLDER'],"photo",filename), img)
93 | try:
94 | extract_images_tags(filename)
95 | except Exception as e:
96 | print("Exception extracting image tags",e)
97 | else:
98 | #if image is text-based insert it split its text to paragraph and then add to its knowledge base
99 | cv2.imwrite(os.path.join(app.config['UPLOAD_FOLDER'],"text-based",filename), img)
100 | insert_into_KB(page=page,filename=file.filename)
101 |
102 | return render_template("index.html")
103 |
104 |
105 | if __name__ == "__main__":
106 | app.run(host='localhost', port=5002,debug=True)
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
1 | from cv2 import cv2
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | import pytesseract
5 |
6 | # get grayscale image
7 | def get_grayscale(image):
8 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
9 |
10 | # noise removal
11 | def remove_noise(image):
12 | return cv2.medianBlur(image,5)
13 |
14 | #thresholding
15 | def thresholding(image):
16 | return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
17 |
18 | #dilation
19 | def dilate(image):
20 | kernel = np.ones((5,5),np.uint8)
21 | return cv2.dilate(image, kernel, iterations = 1)
22 |
23 | #erosion
24 | def erode(image):
25 | kernel = np.ones((5,5),np.uint8)
26 | return cv2.erode(image, kernel, iterations = 1)
27 |
28 | #opening - erosion followed by dilation
29 | def opening(image):
30 | kernel = np.ones((5,5),np.uint8)
31 | return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
32 |
33 | #canny edge detection
34 | def canny(image):
35 | return cv2.Canny(image, 100, 200)
36 |
37 | #skew correction
38 | def deskew(image):
39 | coords = np.column_stack(np.where(image > 0))
40 | angle = cv2.minAreaRect(coords)[-1]
41 | if angle < -45:
42 | angle = -(90 + angle)
43 | else:
44 | angle = -angle
45 | (h, w) = image.shape[:2]
46 | center = (w // 2, h // 2)
47 | M = cv2.getRotationMatrix2D(center, angle, 1.0)
48 | rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
49 | return rotated
50 |
51 | #template matching
52 | def match_template(image, template):
53 | return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
54 |
55 | def extract_text(image,custom_config = r'-l eng -c tessedit_char_whitelist=" "0123456789abcdefghijklmnopqrstuvwxyz --psm 6'):
56 | gray = get_grayscale(image)
57 | thresh = thresholding(gray)
58 | text=''
59 | text += str(pytesseract.image_to_string(thresh, config=custom_config))
60 | return text
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | from helpers import *
4 | import pprint as pp
5 | import json
6 |
7 |
8 | def split_page(text,min_length=200,include_line_breaks=False):
9 | paragraphs = re.split("\n\n(?=\u2028|[A-Z-0-9])", text)
10 | list_par = []
11 | temp_para = "" # variable that stores paragraphs with length= min_length:
17 | if temp_para:
18 | # if True, append temp_para which holds concatenated
19 | # lines to form a paragraph before current paragraph p
20 | list_par.append(temp_para.strip())
21 | temp_para = (
22 | ""
23 | ) # reset temp_para for new lines to be concatenated
24 | list_par.append(
25 | p.replace("\n", "")
26 | ) # append current paragraph with length>min_length
27 | else:
28 | list_par.append(p.replace("\n", ""))
29 | else:
30 | # paragraph p (line) is concatenated to temp_para
31 | line = p.replace("\n", " ").strip()
32 | temp_para = temp_para + f" {line}"
33 | else:
34 | # appending paragraph p as is to list_par
35 | list_par.append(p.replace("\n", ""))
36 | else:
37 | if temp_para:
38 | list_par.append(temp_para.strip())
39 | return list_par
40 |
41 |
42 | #function to update knowlegde base with text of new uploaded images
43 | def insert_into_KB(page,filename): #input image text and name
44 | paragraphs=[]
45 | p_threshold=150 #set threshold on paragraph length to 150 chars
46 | fname = os.path.join(os.getcwd(),"static/KB/output.json")
47 | KB = json.load(open(fname,'r')) # load the current data from knowledge base
48 | if (len(page) > p_threshold) :
49 | #split page text into small paragraphs
50 | paragraphs= split_page(page)
51 | for p in paragraphs:
52 | if len(p)> p_threshold :
53 | #store image name to be retrived to the user in chat
54 | data_dict= {"image":filename ,"body":p}
55 | print(data_dict,"\n")
56 | KB.append(data_dict) # append the new dictionary to the KB list
57 | # dump it to KB json file.
58 | json.dump(KB, open(fname, 'w'))
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/tag_extraction.py:
--------------------------------------------------------------------------------
1 | import keras_craft
2 | from helpers import *
3 | import os
4 | import json
5 |
6 | #intialize an instance from keras detector
7 | detector = keras_craft.Detector()
8 |
9 | #function to extract text tags exists in scene image (not text-based image)
10 | def extract_images_tags(filename):
11 | #allocate file for tags knowledge base
12 | fname = os.path.join(os.getcwd(),"static/KB/tags.json")
13 | tags_KB = json.load(open(fname,'r')) # load the current data
14 | #allocate tags images folder
15 | folder=os.path.join(os.getcwd(),"static/images/photo")
16 | global detector
17 | image_path = [os.path.join(folder,filename)]
18 | if os.path.isfile(image_path[0]):
19 | #for all images in folder will extract tags as cropped images
20 | all_boxes,cropped_images = detector.detect(image_path,return_cropped_images=True)
21 | for cropped_boxes in cropped_images:
22 | tags=set()
23 | for cropped_box in cropped_boxes:
24 | #for each cropped image box extract its text
25 | tags.add(extract_text(cropped_box,custom_config = r'-l eng -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz --psm 6').lower())
26 | print(tags)
27 | #store image tags to tags kowledge base with its image name
28 | tags_KB.append({"image":filename , "tags":'++'.join(tags)})
29 | # then we dump it to the file.
30 | json.dump(tags_KB, open(fname, 'w'))
31 | else: print("file not found")
32 |
33 | #function to find image with specific tag
34 | def find_tag(entered_tag):
35 | fname = os.path.join(os.getcwd(),"static/KB/tags.json")
36 | tags_KB = json.load(open(fname,'r')) # load the current data
37 | entered_tag=entered_tag.lower() #commet this line for testing
38 | tag_words=entered_tag.split(" ")
39 | #if any keyword from tag exist in knowledgw base retrun info dict for that image
40 | for word in tag_words:
41 | found=False
42 | for dic in tags_KB:
43 | for tag in dic["tags"].split("++"):
44 | if word==tag:
45 | found=True
46 | break
47 | if found: return dic
48 | return "not found"
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/text_extraction.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/satheesh045/Image-chatbot/16299ae70b88d3cbc5abae74acb1d2ebaa8376d3/text_extraction.pyc
--------------------------------------------------------------------------------