├── .gitignore ├── README.md ├── commands.txt ├── flaskserver.py ├── helpers.py ├── preprocess.py ├── tag_extraction.py └── text_extraction.pyc /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IMAGE-BOT 2 | chatbot for interaction with the text content of images like QA, text detecting and extraction this project presents an approach of converting digital images into a personal knowledge base that enables users to make more benefits of their digital images and find an answer for any relevant questions of its contained information. It also enables the user to extract the text from a photo of printed paper and search his photos by any text tag that exists in it. 3 | 4 | 5 | ## Set up 6 | ```bash 7 | $ git clone https://github.com/Amgad-Abdelkhaleq/IMAGE-BOT 8 | $ bash commands.txt 9 | $ cd express/ 10 | $ npm install 11 | ``` 12 | ## Run 13 | ```bash 14 | $ python3 flaskserver.py 15 | $ node express/express.js 16 | ``` 17 | 18 | 19 | ## System features 20 | ### Optical character recognition 21 | ![Screenshot (51)](https://user-images.githubusercontent.com/36202618/80548389-1e644980-89bb-11ea-99b6-1e41f49e3f0c.png) 22 | 23 | 24 | 25 | 26 | 27 | 28 | ### Question Answering 29 | ![Screenshot (50)](https://user-images.githubusercontent.com/36202618/80548169-89f9e700-89ba-11ea-9247-848fa72a2491.png) 30 | 31 | 32 | 33 | 34 | ### Searching images with text tags 35 | ![Screenshot (52)](https://user-images.githubusercontent.com/36202618/80548525-6f743d80-89bb-11ea-81a1-305ba9a21c90.png) 36 | 37 | ## Demo 38 | 39 | 40 | ## References 41 | https://github.com/notAI-tech/keras-craft 42 | https://github.com/winkjs/wink-bm25-text-search 43 | 44 | -------------------------------------------------------------------------------- /commands.txt: -------------------------------------------------------------------------------- 1 | sudo apt update 2 | pip3 install flask 3 | pip3 install requests 4 | pip3 install opencv-python 5 | pip3 install -U numpy 6 | pip3 install pytesseract 7 | sudo apt install tesseract-ocr 8 | sudo apt install libtesseract-dev 9 | pip install git+https://github.com/notAI-tech/keras-craft 10 | -------------------------------------------------------------------------------- /flaskserver.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask import request 3 | from flask import jsonify 4 | from flask import render_template 5 | import requests 6 | import json 7 | from helpers import * 8 | from preprocess import * 9 | from tag_extraction import * 10 | 11 | 12 | #intialize flask app 13 | app = Flask(__name__) 14 | #set config for upload folder and allowed image extentions 15 | app.config['UPLOAD_FOLDER'] = os.path.join(os.getcwd(),"static/images") 16 | app.config['ALLOWED_EXTENSIONS'] = set(['png', 'jpg', 'jpeg']) 17 | def allowed_file(filename): 18 | return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS'] 19 | 20 | #home route to ender index html file 21 | @app.route("/") 22 | def home(): 23 | return render_template("index.html") 24 | 25 | #chat UI route for user questions 26 | @app.route("/chat",methods=['GET']) 27 | def get_bot_response(): 28 | #classify user messege wether it is an extract feature or tag feature or help or just a question 29 | userText = request.args.get('msg') 30 | if("extract:" in userText): 31 | #if extract take image name then OCR it and return its text 32 | try: 33 | image_str=userText[userText.find(":")+1:] 34 | print(image_str) 35 | path=os.getcwd()+"/static/images/text-based/"+image_str 36 | image = cv2.imread(str(path)) 37 | text = extract_text(image) 38 | print(text) 39 | return {"Answer":text,"image_name":image_str,"type":"extract"} 40 | except: 41 | return {"Answer":"image not found","type":"error"} 42 | 43 | elif("tag:" in userText): 44 | #if tag searched in tags knowledge base then return image name to front end 45 | tag=userText[userText.find(":")+1:] 46 | print("tag entered: ",tag) 47 | search_result=find_tag(tag) 48 | if search_result== "not found": 49 | result={"Answer":"not found","image_name":search_result ,"type":"tag"} 50 | else: 51 | result={"Answer": "found" ,"image_name":search_result["image"] ,"type":"tag"} 52 | return result 53 | 54 | elif(userText=="help"): 55 | help_message="you can ask me any question related to text content in your image and I'll help you find the answer and its image, you also can use extract feature by typing 'extract:' followed by image name to extract text content in the image and 'tag:' followed by a keyword to find any natural image with this tag" 56 | return {"Answer":help_message ,"type":"help"} 57 | 58 | 59 | else: 60 | print("flask will send this:",userText) 61 | newdata = {"question": userText} # this is the question we are going to send to the Node server to get it answer 62 | # now immediately sending a post request with user question then return it answer to front end 63 | try: 64 | post = requests.post('http://localhost:7000/postdata', json=newdata) # the POST request 65 | print("flask recived this :",post.text) 66 | result=json.loads(post.text) 67 | return result 68 | except: 69 | print("i failed connecting node") 70 | 71 | 72 | 73 | #upload route for images 74 | @app.route("/upload",methods=['POST']) 75 | def uploader(): 76 | if request.method == 'POST': 77 | #store images files 78 | uploaded_files =request.files.getlist("file[]") 79 | for file in uploaded_files: 80 | #check image extention if it is allowed 81 | if file and allowed_file(file.filename): 82 | filename = file.filename 83 | #decode image string 84 | img = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED) 85 | #extract image text with ocr to determine if it text-based image or scene (natural image) 86 | text= extract_text(img,custom_config = r'-l eng -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyz --oem 1') 87 | page= extract_text(img) 88 | print(filename,": ",len(text),"p:",len(page)) 89 | #will consider any image with text-length below 200 char to be a scene 90 | if(len(text)<200): 91 | #if it is a scence image save to it certain folder and add its tage to tag knowledge base 92 | cv2.imwrite(os.path.join(app.config['UPLOAD_FOLDER'],"photo",filename), img) 93 | try: 94 | extract_images_tags(filename) 95 | except Exception as e: 96 | print("Exception extracting image tags",e) 97 | else: 98 | #if image is text-based insert it split its text to paragraph and then add to its knowledge base 99 | cv2.imwrite(os.path.join(app.config['UPLOAD_FOLDER'],"text-based",filename), img) 100 | insert_into_KB(page=page,filename=file.filename) 101 | 102 | return render_template("index.html") 103 | 104 | 105 | if __name__ == "__main__": 106 | app.run(host='localhost', port=5002,debug=True) 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | from cv2 import cv2 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pytesseract 5 | 6 | # get grayscale image 7 | def get_grayscale(image): 8 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 9 | 10 | # noise removal 11 | def remove_noise(image): 12 | return cv2.medianBlur(image,5) 13 | 14 | #thresholding 15 | def thresholding(image): 16 | return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] 17 | 18 | #dilation 19 | def dilate(image): 20 | kernel = np.ones((5,5),np.uint8) 21 | return cv2.dilate(image, kernel, iterations = 1) 22 | 23 | #erosion 24 | def erode(image): 25 | kernel = np.ones((5,5),np.uint8) 26 | return cv2.erode(image, kernel, iterations = 1) 27 | 28 | #opening - erosion followed by dilation 29 | def opening(image): 30 | kernel = np.ones((5,5),np.uint8) 31 | return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) 32 | 33 | #canny edge detection 34 | def canny(image): 35 | return cv2.Canny(image, 100, 200) 36 | 37 | #skew correction 38 | def deskew(image): 39 | coords = np.column_stack(np.where(image > 0)) 40 | angle = cv2.minAreaRect(coords)[-1] 41 | if angle < -45: 42 | angle = -(90 + angle) 43 | else: 44 | angle = -angle 45 | (h, w) = image.shape[:2] 46 | center = (w // 2, h // 2) 47 | M = cv2.getRotationMatrix2D(center, angle, 1.0) 48 | rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) 49 | return rotated 50 | 51 | #template matching 52 | def match_template(image, template): 53 | return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 54 | 55 | def extract_text(image,custom_config = r'-l eng -c tessedit_char_whitelist=" "0123456789abcdefghijklmnopqrstuvwxyz --psm 6'): 56 | gray = get_grayscale(image) 57 | thresh = thresholding(gray) 58 | text='' 59 | text += str(pytesseract.image_to_string(thresh, config=custom_config)) 60 | return text 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | from helpers import * 4 | import pprint as pp 5 | import json 6 | 7 | 8 | def split_page(text,min_length=200,include_line_breaks=False): 9 | paragraphs = re.split("\n\n(?=\u2028|[A-Z-0-9])", text) 10 | list_par = [] 11 | temp_para = "" # variable that stores paragraphs with length= min_length: 17 | if temp_para: 18 | # if True, append temp_para which holds concatenated 19 | # lines to form a paragraph before current paragraph p 20 | list_par.append(temp_para.strip()) 21 | temp_para = ( 22 | "" 23 | ) # reset temp_para for new lines to be concatenated 24 | list_par.append( 25 | p.replace("\n", "") 26 | ) # append current paragraph with length>min_length 27 | else: 28 | list_par.append(p.replace("\n", "")) 29 | else: 30 | # paragraph p (line) is concatenated to temp_para 31 | line = p.replace("\n", " ").strip() 32 | temp_para = temp_para + f" {line}" 33 | else: 34 | # appending paragraph p as is to list_par 35 | list_par.append(p.replace("\n", "")) 36 | else: 37 | if temp_para: 38 | list_par.append(temp_para.strip()) 39 | return list_par 40 | 41 | 42 | #function to update knowlegde base with text of new uploaded images 43 | def insert_into_KB(page,filename): #input image text and name 44 | paragraphs=[] 45 | p_threshold=150 #set threshold on paragraph length to 150 chars 46 | fname = os.path.join(os.getcwd(),"static/KB/output.json") 47 | KB = json.load(open(fname,'r')) # load the current data from knowledge base 48 | if (len(page) > p_threshold) : 49 | #split page text into small paragraphs 50 | paragraphs= split_page(page) 51 | for p in paragraphs: 52 | if len(p)> p_threshold : 53 | #store image name to be retrived to the user in chat 54 | data_dict= {"image":filename ,"body":p} 55 | print(data_dict,"\n") 56 | KB.append(data_dict) # append the new dictionary to the KB list 57 | # dump it to KB json file. 58 | json.dump(KB, open(fname, 'w')) 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /tag_extraction.py: -------------------------------------------------------------------------------- 1 | import keras_craft 2 | from helpers import * 3 | import os 4 | import json 5 | 6 | #intialize an instance from keras detector 7 | detector = keras_craft.Detector() 8 | 9 | #function to extract text tags exists in scene image (not text-based image) 10 | def extract_images_tags(filename): 11 | #allocate file for tags knowledge base 12 | fname = os.path.join(os.getcwd(),"static/KB/tags.json") 13 | tags_KB = json.load(open(fname,'r')) # load the current data 14 | #allocate tags images folder 15 | folder=os.path.join(os.getcwd(),"static/images/photo") 16 | global detector 17 | image_path = [os.path.join(folder,filename)] 18 | if os.path.isfile(image_path[0]): 19 | #for all images in folder will extract tags as cropped images 20 | all_boxes,cropped_images = detector.detect(image_path,return_cropped_images=True) 21 | for cropped_boxes in cropped_images: 22 | tags=set() 23 | for cropped_box in cropped_boxes: 24 | #for each cropped image box extract its text 25 | tags.add(extract_text(cropped_box,custom_config = r'-l eng -c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyz --psm 6').lower()) 26 | print(tags) 27 | #store image tags to tags kowledge base with its image name 28 | tags_KB.append({"image":filename , "tags":'++'.join(tags)}) 29 | # then we dump it to the file. 30 | json.dump(tags_KB, open(fname, 'w')) 31 | else: print("file not found") 32 | 33 | #function to find image with specific tag 34 | def find_tag(entered_tag): 35 | fname = os.path.join(os.getcwd(),"static/KB/tags.json") 36 | tags_KB = json.load(open(fname,'r')) # load the current data 37 | entered_tag=entered_tag.lower() #commet this line for testing 38 | tag_words=entered_tag.split(" ") 39 | #if any keyword from tag exist in knowledgw base retrun info dict for that image 40 | for word in tag_words: 41 | found=False 42 | for dic in tags_KB: 43 | for tag in dic["tags"].split("++"): 44 | if word==tag: 45 | found=True 46 | break 47 | if found: return dic 48 | return "not found" 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /text_extraction.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/satheesh045/Image-chatbot/16299ae70b88d3cbc5abae74acb1d2ebaa8376d3/text_extraction.pyc --------------------------------------------------------------------------------