├── .gitignore
├── Initial.py
├── LICENSE
├── README.md
├── feature.py
├── optimized.py
└── prelib.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask instance folder
57 | instance/
58 | 
59 | # Scrapy stuff:
60 | .scrapy
61 | 
62 | # Sphinx documentation
63 | docs/_build/
64 | 
65 | # PyBuilder
66 | target/
67 | 
68 | # IPython Notebook
69 | .ipynb_checkpoints
70 | 
71 | # pyenv
72 | .python-version
73 | 
74 | # celery beat schedule file
75 | celerybeat-schedule
76 | 
77 | # dotenv
78 | .env
79 | 
80 | # Spyder project settings
81 | .spyderproject
82 | 
83 | #Coil
84 | coil-20/
85 | coil-20-unproc/
86 | coil-100/
87 | 
88 | #Peper
89 | *.pdf
90 | 
91 | #Numpy
92 | *.npy
93 | 
94 | *zip
95 | 
96 | 


--------------------------------------------------------------------------------
/Initial.py:
--------------------------------------------------------------------------------
 1 | from feature import *
 2 | from prelib import preprocess
 3 | import os
 4 | from operator import itemgetter
 5 | import time
 6 | import sys
 7 | 
 8 | import pickle as pickle
 9 | 
10 | imagelocation = "" #Input Image path
11 | indir = "" #Directory Path
12 | 
13 | class Image(object):
14 |     def __init__(self, path):
15 |         self.path = path
16 |         img = cv2.imread(self.path,0)
17 |         imgm = preprocess(img)
18 |         segm = segment(imgm)
19 |         self.glfeature = globalfeature(imgm,16)
20 |         self.llfeature = localfeature(segm)
21 |         self.numberofones = self.glfeature.sum(dtype=int)
22 | 
23 | img = Image(imagelocation)
24 | 
25 | maxglosim = 0
26 | maxlocsim = 0
27 | maximum = 0
28 | count = 0
29 | start_time = time.time()
30 | print("Processing : ")
31 | for root, dirs, filenames in os.walk(indir):
32 |     for f in filenames:
33 |         i1 = Image(f)
34 |         count = count+1
35 |         perc = (count/360)  * 100
36 |         sys.stdout.write("\r%d%%" % perc)
37 |         sys.stdout.flush()
38 |         locsim = np.absolute((i1.llfeature-img.llfeature).sum())
39 |         glosim = np.logical_xor(img.glfeature,i1.glfeature).sum()
40 |         distance = locsim+glosim
41 |         if(glosim>maxglosim):
42 |             gridmax = i1.glfeature
43 |             maxglosim=glosim
44 |         if(locsim>maxlocsim):
45 |             maxlocsim=locsim
46 |             vectormax = i1.llfeature
47 |         if(distance>maximum):
48 |             vectmostdif= i1.llfeature
49 |             gridmostdif = i1.glfeature
50 |             imgmax = i1
51 |             maximum = distance
52 | maxilocsim = np.absolute((vectormax-img.llfeature).sum())
53 | 
54 | maxiglosim = np.logical_xor(gridmax,img.glfeature).sum()
55 | processed_time = time.time()
56 | print("\nTotal Processing Time : {0:.2f} seconds".format(processed_time-start_time))
57 | def gloDist(gridA,gridB):
58 |     glosim = np.logical_xor(gridA,gridB).sum()
59 |     return glosim/maxiglosim
60 | def locDist(vectorA,vectorB):
61 |     locsim = np.absolute((vectorA-vectorB).sum())
62 |     return locsim/maxilocsim
63 | ranking = []
64 | count = 0
65 | print("\nSearching:")
66 | for root, dirs, filenames in os.walk(indir):
67 |     for f in filenames:
68 |         img1 = Image(f)
69 |         count = count+1
70 |         perc = (count/360)  * 100
71 |         sys.stdout.write("\r%d%%" % perc)
72 |         sys.stdout.flush()
73 |         g1 = gloDist(img1.glfeature,img.glfeature)
74 |         l1 = locDist(img1.llfeature,img.llfeature)
75 |         sim = ((2-(g1+l1))/2)*100
76 |         ranking.append([sim,f])
77 | search_time = time.time()
78 | print("\nTotal Searching Time : {0:.2f} seconds".format(search_time-processed_time))
79 | print("\nTotal Time : {0:.2f} seconds".format(search_time-start_time))
80 | ranking = sorted(ranking, key=itemgetter(0),reverse=True)
81 | 
82 | #Results stored in ranking
83 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Devashish Purandare
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Content Based Image Retrival
 2 | 
 3 | # Things you need.
 4 | * [OpenCV](http://opencv.org/)
 5 | * [numpy](http://www.numpy.org/)
 6 | * [pywt](http://www.pybytes.com/pywavelets/)
 7 | * [Scipy](https://www.scipy.org/)
 8 | 
 9 | # How it works
10 | Basic Details :
11 | [Based on research by Yen Do et
12 | al.](http://dl.acm.org/citation.cfm?id=2448648&dl=ACM&coll=DL&CFID=804778667&CFTOKEN=26088710)
13 | Optimization Details : 
14 | [Our Approach](http://devashishpurandare.me/assets/pdf/hcbir.pdf)
15 | 
16 | 
17 | # Organization
18 | 
19 | - initial.py - Without database, unoptimized.
20 | - feature.py - Feature extraction, similarity measurement,
21 | - optimized.py - With database, optimized.
22 | - prelib.py - Preprocessing and standardization.
23 | 


--------------------------------------------------------------------------------
/feature.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | from prelib import preprocess
 5 | import pywt
 6 | 
 7 | 
 8 | def segment(img):
 9 |     seg = []
10 |     for i in range(4):
11 |         for j in range(4):
12 |             start = i*128
13 |             start1 = j*128
14 |             end = start+128
15 |             end1 = start1+128
16 |             seg.append(img[start:end,start1:end1])
17 |     return seg
18 | 
19 | def localfeature(seg):
20 |     ll2=[]
21 |     lh1=[]
22 |     hl1=[]
23 |     hh1=[]
24 |     dwvt=[]
25 |     ht = []
26 |     vt = []
27 | 
28 |     for i in range(16):
29 |         wp = pywt.WaveletPacket2D(data=seg[i],wavelet='haar',mode='sym')
30 |         lh1.append(wp['v'].data)
31 |         hl1.append(wp['h'].data)
32 |         hh1.append(wp['d'].data)
33 |         level1 = np.hstack((np.vstack((wp['aa'].data,wp['vv'].data)),np.vstack((wp['hh'].data,wp['dd'].data))))
34 |         level2 = np.hstack((np.vstack((wp['aaa'].data,wp['vvv'].data)),np.vstack((wp['hhh'].data,wp['ddd'].data))))
35 |         level3 = np.hstack((np.vstack((wp['aaaa'].data,wp['vvvv'].data)),np.vstack((wp['hhhh'].data,wp['dddd'].data))))
36 |         level4 = np.hstack((np.vstack((wp['aaaaa'].data,wp['vvvvv'].data)),np.vstack((wp['hhhhh'].data,wp['ddddd'].data))))
37 |         level3[:8,:8] = level4
38 |         level2[:16,:16] = level3
39 |         level1[:32,:32] = level2
40 |         ll2.append(level1)
41 |         vt.append(np.vstack((ll2[i],lh1[i])))
42 |         ht.append(np.vstack((hl1[i],hh1[i])))
43 |         dwvt.append(np.hstack((vt[i],ht[i])))
44 |     s1 = []
45 |     s2 = []
46 |     s3 = []
47 |     s4 = []
48 |     subvector = []
49 |     vector = []
50 |     for i in range(16):
51 |         s1.append(np.linalg.svd(ll2[i], compute_uv=False))
52 |         s2.append(np.linalg.svd(hl1[i], compute_uv=False))
53 |         s3.append(np.linalg.svd(lh1[i], compute_uv=False))
54 |         s4.append(np.linalg.svd(hh1[i], compute_uv=False))
55 |         subvector.append(np.vstack((np.vstack((s1[i],s2[i])),np.vstack((s3[i],s4[i])))))
56 |         vector.append(subvector[i])
57 |         vector1 = np.concatenate(vector,axis=0)
58 |         vector1 = np.array(vector1,dtype=int)
59 |     return vector1
60 | 
61 | def globalfeature(img,gran):
62 |     gloseg = np.zeros((gran,gran),dtype=int)
63 |     displ = 512/gran
64 |     for i in range(gran):
65 |         for j in range(gran):
66 |             start = i*displ
67 |             start1 = j*displ
68 |             end = start+displ
69 |             end1 = start1+displ
70 |             if(img[start:end,start1:end1].any()):
71 |                 gloseg[i,j]= 1
72 |     return gloseg
73 | 


--------------------------------------------------------------------------------
/optimized.py:
--------------------------------------------------------------------------------
  1 | from feature import *
  2 | from pymongo import MongoClient
  3 | from bson.binary import Binary as BsonBinary
  4 | import pickle
  5 | import os
  6 | from operator import itemgetter
  7 | import time
  8 | import sys
  9 | 
 10 | imagelocation = "" #Input Image path
 11 | indir = "" #Directory Path
 12 | 
 13 | client = MongoClient('mongodb://localhost:27017')
 14 | 
 15 | 
 16 | 
 17 | db = client.coil #Insert your database in place of coil
 18 | col = db.images #Insert your collection in place of images
 19 | 
 20 | class Image(object):
 21 |     """docstring for Image"""
 22 |     def __init__(self, path):
 23 |         self.path = path
 24 |         img = cv2.imread(self.path,0)
 25 |         imgm = preprocess(img)
 26 |         segm = segment(imgm)
 27 |         self.glfeature = globalfeature(imgm,16)
 28 |         self.llfeature = localfeature(segm)
 29 |         self.numberofones = self.glfeature.sum(dtype=int)
 30 | 
 31 | start_time = time.time()
 32 | count = 0
 33 | 
 34 | for root, dirs, filenames in os.walk(indir):
 35 |     for f in filenames:
 36 |         i1 = Image(f)
 37 |         count = count+1
 38 |         perc = (count/360)  * 100
 39 |         sys.stdout.write("\r%d%%" % perc)
 40 |         sys.stdout.flush()
 41 |         new_posts = [{'path': i1.path,
 42 |              'llfeature': BsonBinary(pickle.dumps(i1.llfeature,protocol=2)),
 43 |              'glfeature': BsonBinary(pickle.dumps(i1.glfeature,protocol=2)),
 44 |              'numberofones' : int(i1.numberofones)}]
 45 |         post_id = col.insert(new_posts)
 46 |         # print(post_id)
 47 | img = Image(imagelocation)
 48 | count = 0
 49 | maxglosim = 0
 50 | maxlocsim = 0
 51 | maximum = 0
 52 | gridmax=0
 53 | vectormax=0
 54 | 
 55 | for f in col.find():
 56 |     llfeature = pickle.loads(f['llfeature'])
 57 |     glfeature = pickle.loads(f['glfeature'])
 58 |     count = count+1
 59 |     perc = (count/360)  * 100
 60 |     sys.stdout.write("\r%d%%" % perc)
 61 |     sys.stdout.flush()
 62 |     locsim = np.absolute((llfeature-img.llfeature).sum())
 63 |     glosim = np.logical_xor(glfeature,img.glfeature).sum()
 64 |     distance = locsim+glosim
 65 |     if(glosim>maxglosim):
 66 |         gridmax = glfeature
 67 |         maxglosim=glosim
 68 |     if(locsim>maxlocsim):
 69 |         maxlocsim=locsim
 70 |         vectormax = llfeature
 71 |     if(distance>maximum):
 72 |         vectmostdif= llfeature
 73 |         gridmostdif = glfeature
 74 |         maximum = distance
 75 | 
 76 | maxilocsim = np.absolute((vectormax-img.llfeature).sum())
 77 | maxiglosim = np.logical_xor(gridmax,img.glfeature).sum()
 78 | processed_time = time.time()
 79 | 
 80 | print("\nTotal Processing Time : {0:.2f} seconds".format(processed_time-start_time))
 81 | 
 82 | def gloDist(gridA,gridB):
 83 |     glosim = np.logical_xor(gridA,gridB).sum()
 84 |     return glosim/maxiglosim
 85 | 
 86 | def locDist(vectorA,vectorB):
 87 |     locsim = np.absolute((vectorA-vectorB).sum())
 88 |     return locsim/maxilocsim
 89 | ranking = []
 90 | count = 0
 91 | print("\nSearching:")
 92 | 
 93 | for f in col.find():
 94 |     llfeature = pickle.loads(f['llfeature'])
 95 |     glfeature = pickle.loads(f['glfeature'])
 96 |     count = count+1
 97 |     perc = (count/360)  * 100
 98 |     sys.stdout.write("\r%d%%" % perc)
 99 |     sys.stdout.flush()
100 |     g1 = gloDist(glfeature,img.glfeature)
101 |     l1 = locDist(llfeature,img.llfeature)
102 |     sim = ((2-(g1+l1))/2)*100
103 |     ranking.append([sim,f['path']])
104 | 
105 | search_time = time.time()
106 | print("\nTotal Searching Time : {0:.2f} seconds".format(search_time-processed_time))
107 | print("\nTotal Time : {0:.2f} seconds".format(search_time-start_time))
108 | ranking = sorted(ranking, key=itemgetter(0),reverse=True)
109 | #Ranking : Results in a list
110 | 


--------------------------------------------------------------------------------
/prelib.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | def threshold(img):
 5 |     ret,thresh = cv2.threshold(img,80,255,cv2.THRESH_BINARY)
 6 |     return thresh
 7 | 
 8 | def contour(img):
 9 |     im2, contours, hierarchy = cv2.findContours(img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
10 |     return contours
11 | 
12 | def rotate(img1, scaleFactor = 1, degreesCCW = 30):
13 |     (oldY,oldX) = img1.shape #note: numpy uses (y,x) convention but most OpenCV functions use (x,y)
14 |     M = cv2.getRotationMatrix2D(center=(oldX/2,oldY/2), angle=degreesCCW, scale=scaleFactor) #rotate about center of image.
15 |     newX,newY = oldX*scaleFactor,oldY*scaleFactor
16 |     r = np.deg2rad(degreesCCW)
17 |     newX,newY = (abs(np.sin(r)*newY) + abs(np.cos(r)*newX),abs(np.sin(r)*newX) + abs(np.cos(r)*newY))
18 |     (tx,ty) = ((newX-oldX)/2,(newY-oldY)/2)
19 |     M[0,2] += tx
20 |     M[1,2] += ty
21 |     rotatedImg = cv2.warpAffine(img1, M, dsize=(int(newX),int(newY)))
22 |     return rotatedImg
23 | 
24 | def rotinv(cnt,img):
25 |     ellipse = cv2.fitEllipse(cnt)
26 |     angle = ellipse[2]
27 |     if(90-angle>-45):
28 |         rot_image = rotate(img,1,90-angle)
29 |     else:
30 |         rot_image = rotate(img,1,90-angle)
31 |     return rot_image
32 | 
33 | def cropit(img,cnt):
34 |     x,y,w,h = cv2.boundingRect(cnt)
35 |     crop = img[y:y+h,x:x+w]
36 |     return crop
37 | 
38 | def largestcont(contours):
39 |     cnt = contours[0]
40 | 
41 |     for cont in contours:
42 |         if(cv2.contourArea(cont) > cv2.contourArea(cnt)):
43 |             cnt = cont
44 |     return cnt
45 | 
46 | def extractBorder(img):
47 |     return cv2.Canny(img,100,200)
48 | 
49 | def preprocess(img):
50 |     thresh = threshold(img)
51 |     contours = contour(thresh)
52 |     thresh = threshold(img)
53 |     cnt = largestcont(contours)
54 |     crop = cropit(thresh,cnt)
55 |     rot_image = rotinv(cnt,crop)
56 |     border = extractBorder(rot_image)
57 |     resized_image = cv2.resize(border, (512, 512))
58 |     return resized_image
59 | 


--------------------------------------------------------------------------------