├── python_imagesearch ├── __init__.py └── imagesearch.py ├── panda.png ├── github.png ├── requirements.txt ├── simple_example.py ├── setup.py ├── LICENSE ├── README.md └── full_examples.py /python_imagesearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /panda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drov0/python-imagesearch/HEAD/panda.png -------------------------------------------------------------------------------- /github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drov0/python-imagesearch/HEAD/github.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | numpy 3 | python3_xlib 4 | pyautogui 5 | mss 6 | -------------------------------------------------------------------------------- /simple_example.py: -------------------------------------------------------------------------------- 1 | from python_imagesearch.imagesearch import imagesearch 2 | 3 | pos = imagesearch("./github.png") 4 | if pos[0] != -1: 5 | print("position : ", pos[0], pos[1]) 6 | else: 7 | print("image not found") 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="python-imagesearch", 8 | version="1.3.0", 9 | install_requires=['opencv-python', 'numpy', 'python3_xlib', 'pyautogui', 'mss'], 10 | author="Martin Lees", 11 | author_email="drov.fr@protonmail.com", 12 | description="A wrapper around openCv to perform image searching", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/drov0/python-imagesearch", 16 | packages=setuptools.find_packages(), 17 | classifiers=[ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: OS Independent", 21 | ], 22 | python_requires='>=3.5', 23 | ) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 drov0 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-ImageSearch 2 | 3 | it's a wrapper around pyautogui and opencv2, to allow you to easily add cross-platform image searching capabilities 4 | to your project. 5 | 6 | See the documentation for examples. If you have any questions feel free to contact me. 7 | 8 | Documentation : https://brokencode.io/how-to-easily-image-search-with-python/ 9 | 10 | This is the currently maintained package, so if you've come from 11 | https://pypi.org/project/python-imagesearch-drov0/ 12 | 13 | you are at the right place 14 | 15 | ## Easy Example: 16 | 17 | ## Install: 18 | 19 | > pip install python-imagesearch 20 | 21 | You may need to install dependencies depending on your OS: 22 | 23 | ### Linux : 24 | ``` 25 | sudo pip3 install python3-xlib 26 | sudo apt-get install scrot python3-tk python3-dev python3-opencv 27 | ``` 28 | ### Windows : 29 | 30 | No setup should be needed 31 | 32 | ### MacOs : 33 | ``` 34 | brew install opencv 35 | pip3 install -U pyobjc-core 36 | pip3 install -U pyobjc 37 | ``` 38 | I have not tested myself as I don't own a mac, but it was tested and documented in this issue : https://github.com/drov0/python-imagesearch/issues/5 39 | 40 | ## Quick start 41 | 42 | The simplest example to do image search with python is this: 43 | 44 | ``` 45 | from python_imagesearch.imagesearch import imagesearch 46 | 47 | pos = imagesearch("./github.png") 48 | if pos[0] != -1: 49 | print("position : ", pos[0], pos[1]) 50 | else: 51 | print("image not found") 52 | ``` 53 | 54 | this searches for one occurrence of the image “github.png” on the screen and print its x/y position 55 | 56 | Some advanced examples exists here (yes I shut down my blog and forgot to save it): https://web.archive.org/web/20221130045749/https://brokencode.io/how-to-easily-image-search-with-python/ 57 | 58 | Exact function definitions can be found here: https://github.com/drov0/python-imagesearch/blob/master/python_imagesearch/imagesearch.py 59 | -------------------------------------------------------------------------------- /full_examples.py: -------------------------------------------------------------------------------- 1 | from python_imagesearch.imagesearch import * 2 | 3 | # Search for the github logo on the whole screen 4 | # note that the search only works on your primary screen. 5 | 6 | # This is intended to be used as examples to be copy pasted, do not run the whole file at once 7 | 8 | pos = imagesearch("./github.png") 9 | if pos[0] != -1: 10 | print("position : ", pos[0], pos[1]) 11 | pyautogui.moveTo(pos[0], pos[1]) 12 | else: 13 | print("image not found") 14 | 15 | # search for the github logo until found : 16 | 17 | pos = imagesearch_loop("./github.png", 0.5) 18 | 19 | print("image found ", pos[0], pos[1]) 20 | 21 | # search for the logo on the 0,0,800,600 region 22 | # (a rectangle starting from the top left going 800 pixels to the right and down 600 pixels) 23 | 24 | pos = imagesearcharea("./github.png", 0, 0, 800, 600) 25 | if pos[0] != -1: 26 | print("position : ", pos[0], pos[1]) 27 | pyautogui.moveTo(pos[0], pos[1]) 28 | else: 29 | print("image not found") 30 | 31 | # the im parameter is useful if you plan on looking for several different images without the need for recapturing the screen 32 | # the screen capture being one of the most time consuming function it's a good way to optimize 33 | 34 | # non -optimized way : 35 | time1 = time.clock() 36 | for i in range(10): 37 | imagesearcharea("./github.png", 0, 0, 800, 600) 38 | imagesearcharea("./panda.png", 0, 0, 800, 600) 39 | print(str(time.clock() - time1) + " seconds (non optimized)") 40 | 41 | # optimized way : 42 | 43 | time1 = time.clock() 44 | im = region_grabber((0, 0, 800, 600)) 45 | for i in range(10): 46 | imagesearcharea("./github.png", 0, 0, 800, 600, 0.8, im) 47 | imagesearcharea("./panda.png", 0, 0, 800, 600, 0.8, im) 48 | print(str(time.clock() - time1) + " seconds (optimized)") 49 | 50 | # sample output : 51 | 52 | # 1.6233619831305721 seconds (non optimized) 53 | # 0.4075934110084374 seconds (optimized) 54 | 55 | 56 | # click image is to be used after having found the image 57 | 58 | pos = imagesearch("github.png") 59 | if pos[0] != -1: 60 | click_image("github.png", pos, "right", 0.2, offset=5) 61 | 62 | # when you have various images to find on the screen, you can 63 | # use this function, it iterates through the files in the path 64 | # you provide and outputs an dictionary where the key is the path 65 | # to the file and the value is the position array. 66 | # 67 | # I create this to be used with the same image with different sizes, 68 | # so no matter the size it appears on the screen, i can find it. 69 | # 70 | # In this example it iterates through the main folder of the project 71 | # and find the panda.png and github.png 72 | 73 | print(str(imagesearch_from_folder('./', 0.8))) 74 | -------------------------------------------------------------------------------- /python_imagesearch/imagesearch.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import pyautogui 4 | import random 5 | import time 6 | import platform 7 | import subprocess 8 | import os 9 | import mss 10 | 11 | is_retina = False 12 | if platform.system() == "Darwin": 13 | is_retina = subprocess.call("system_profiler SPDisplaysDataType | grep -i 'retina'", shell=True) == 0 14 | 15 | ''' 16 | 17 | grabs a region (topx, topy, bottomx, bottomy) 18 | to the tuple (topx, topy, width, height) 19 | 20 | input : a tuple containing the 4 coordinates of the region to capture 21 | 22 | output : a PIL image of the area selected. 23 | 24 | ''' 25 | 26 | 27 | def region_grabber(region): 28 | if is_retina: region = [n * 2 for n in region] 29 | x1 = region[0] 30 | y1 = region[1] 31 | width = region[2] 32 | height = region[3] 33 | 34 | region = x1, y1, width, height 35 | with mss.mss() as sct: 36 | return sct.grab(region) 37 | 38 | ''' 39 | 40 | Searchs for an image within an area 41 | 42 | input : 43 | 44 | image : path to the image file (see opencv imread for supported types) 45 | x1 : top left x value 46 | y1 : top left y value 47 | x2 : bottom right x value 48 | y2 : bottom right y value 49 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 50 | im : a PIL image, usefull if you intend to search the same unchanging region for several elements 51 | 52 | returns : 53 | the top left corner coordinates of the element if found as an array [x,y] or [-1,-1] if not 54 | 55 | ''' 56 | 57 | 58 | def imagesearcharea(image, x1, y1, x2, y2, precision=0.8, im=None): 59 | if im is None: 60 | im = region_grabber(region=(x1, y1, x2, y2)) 61 | if is_retina: 62 | im.thumbnail((round(im.size[0] * 0.5), round(im.size[1] * 0.5))) 63 | # im.save('testarea.png') usefull for debugging purposes, this will save the captured region as "testarea.png" 64 | 65 | img_rgb = np.array(im) 66 | img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) 67 | template = cv2.imread(image, 0) 68 | if template is None: 69 | raise FileNotFoundError('Image file not found: {}'.format(image)) 70 | 71 | res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED) 72 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 73 | if max_val < precision: 74 | return [-1, -1] 75 | return max_loc 76 | 77 | 78 | ''' 79 | 80 | click on the center of an image with a bit of random. 81 | eg, if an image is 100*100 with an offset of 5 it may click at 52,50 the first time and then 55,53 etc 82 | Usefull to avoid anti-bot monitoring while staying precise. 83 | 84 | this function doesn't search for the image, it's only ment for easy clicking on the images. 85 | 86 | input : 87 | 88 | image : path to the image file (see opencv imread for supported types) 89 | pos : array containing the position of the top left corner of the image [x,y] 90 | action : button of the mouse to activate : "left" "right" "middle", see pyautogui.click documentation for more info 91 | time : time taken for the mouse to move from where it was to the new position 92 | ''' 93 | 94 | 95 | def click_image(image, pos, action, timestamp, offset=5): 96 | img = cv2.imread(image) 97 | if img is None: 98 | raise FileNotFoundError('Image file not found: {}'.format(image)) 99 | height, width, channels = img.shape 100 | pyautogui.moveTo(pos[0] + r(width / 2, offset), pos[1] + r(height / 2, offset), 101 | timestamp) 102 | pyautogui.click(button=action) 103 | 104 | 105 | ''' 106 | Searchs for an image on the screen 107 | 108 | input : 109 | 110 | image : path to the image file (see opencv imread for supported types) 111 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 112 | im : a PIL image, usefull if you intend to search the same unchanging region for several elements 113 | 114 | returns : 115 | the top left corner coordinates of the element if found as an array [x,y] or [-1,-1] if not 116 | 117 | ''' 118 | 119 | 120 | def imagesearch(image, precision=0.8): 121 | with mss.mss() as sct: 122 | im = sct.grab(sct.monitors[0]) 123 | if is_retina: 124 | im.thumbnail((round(im.size[0] * 0.5), round(im.size[1] * 0.5))) 125 | # im.save('testarea.png') useful for debugging purposes, this will save the captured region as "testarea.png" 126 | img_rgb = np.array(im) 127 | img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) 128 | template = cv2.imread(image, 0) 129 | if template is None: 130 | raise FileNotFoundError('Image file not found: {}'.format(image)) 131 | template.shape[::-1] 132 | 133 | res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED) 134 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 135 | if max_val < precision: 136 | return [-1, -1] 137 | return max_loc 138 | 139 | 140 | ''' 141 | Searchs for an image on screen continuously until it's found. 142 | 143 | input : 144 | image : path to the image file (see opencv imread for supported types) 145 | time : Waiting time after failing to find the image 146 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 147 | 148 | returns : 149 | the top left corner coordinates of the element if found as an array [x,y] 150 | 151 | ''' 152 | 153 | 154 | def imagesearch_loop(image, timesample, precision=0.8): 155 | pos = imagesearch(image, precision) 156 | while pos[0] == -1: 157 | print(image + " not found, waiting") 158 | time.sleep(timesample) 159 | pos = imagesearch(image, precision) 160 | return pos 161 | 162 | 163 | ''' 164 | Searchs for an image on screen continuously until it's found or max number of samples reached. 165 | 166 | input : 167 | image : path to the image file (see opencv imread for supported types) 168 | time : Waiting time after failing to find the image 169 | maxSamples: maximum number of samples before function times out. 170 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 171 | 172 | returns : 173 | the top left corner coordinates of the element if found as an array [x,y] 174 | 175 | ''' 176 | 177 | 178 | def imagesearch_numLoop(image, timesample, maxSamples, precision=0.8): 179 | pos = imagesearch(image, precision) 180 | count = 0 181 | while pos[0] == -1: 182 | print(image + " not found, waiting") 183 | time.sleep(timesample) 184 | pos = imagesearch(image, precision) 185 | count = count + 1 186 | if count > maxSamples: 187 | break 188 | return pos 189 | 190 | 191 | ''' 192 | Searchs for an image on a region of the screen continuously until it's found. 193 | 194 | input : 195 | image : path to the image file (see opencv imread for supported types) 196 | time : Waiting time after failing to find the image 197 | x1 : top left x value 198 | y1 : top left y value 199 | x2 : bottom right x value 200 | y2 : bottom right y value 201 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 202 | 203 | returns : 204 | the top left corner coordinates of the element as an array [x,y] 205 | 206 | ''' 207 | 208 | 209 | def imagesearch_region_loop(image, timesample, x1, y1, x2, y2, precision=0.8): 210 | pos = imagesearcharea(image, x1, y1, x2, y2, precision) 211 | 212 | while pos[0] == -1: 213 | time.sleep(timesample) 214 | pos = imagesearcharea(image, x1, y1, x2, y2, precision) 215 | return pos 216 | 217 | 218 | ''' 219 | Searchs for an image on a region of the screen continuously until it's found or max number of samples reached. 220 | 221 | input : 222 | image : path to the image file (see opencv imread for supported types) 223 | time : Waiting time after failing to find the image 224 | maxSamples: maximum number of samples before function times out. 225 | x1 : top left x value 226 | y1 : top left y value 227 | x2 : bottom right x value 228 | y2 : bottom right y value 229 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 230 | 231 | returns : 232 | the top left corner coordinates of the element as an array [x,y] 233 | ''' 234 | 235 | 236 | def imagesearch_region_numLoop(image, timesample, maxSamples, x1, y1, x2, y2, precision=0.8): 237 | pos = imagesearcharea(image, x1, y1, x2, y2, precision) 238 | count = 0 239 | while pos[0] == -1: 240 | time.sleep(timesample) 241 | pos = imagesearcharea(image, x1, y1, x2, y2, precision) 242 | count = count + 1 243 | if count > maxSamples: 244 | break 245 | return pos 246 | 247 | 248 | 249 | ''' 250 | Searches for an image on the screen and counts the number of occurrences. 251 | 252 | input : 253 | image : path to the target image file (see opencv imread for supported types) 254 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.9 255 | 256 | returns : 257 | the number of times a given image appears on the screen. 258 | optionally an output image with all the occurances boxed with a red outline. 259 | 260 | ''' 261 | 262 | 263 | def imagesearch_count(image, precision=0.95): 264 | with mss.mss() as sct: 265 | im = sct.grab(sct.monitors[0]) 266 | if is_retina: 267 | im.thumbnail((round(im.size[0] * 0.5), round(im.size[1] * 0.5))) 268 | img_rgb = np.array(im) 269 | img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) 270 | template = cv2.imread(image, 0) 271 | if template is None: 272 | raise FileNotFoundError('Image file not found: {}'.format(image)) 273 | template.shape[::-1] 274 | res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED) 275 | loc = np.where(res >= precision) 276 | count = 0 277 | for pt in zip(*loc[::-1]): # Swap columns and rows 278 | count = count + 1 279 | return count 280 | 281 | 282 | ''' 283 | Get all screens on the provided folder and search them on screen. 284 | 285 | input : 286 | path : path of the folder with the images to be searched on screen 287 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 288 | 289 | returns : 290 | A dictionary where the key is the path to image file and the value is the position where was found. 291 | ''' 292 | 293 | 294 | def imagesearch_from_folder(path, precision): 295 | print(path) 296 | imagesPos = {} 297 | path = path if path[-1] == '/' or '\\' else path+'/' 298 | valid_images = [".jpg", ".gif", ".png", ".jpeg"] 299 | files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and os.path.splitext(f)[1].lower() in valid_images] 300 | for file in files: 301 | pos = imagesearch(path+file, precision) 302 | imagesPos[path+file] = pos 303 | return imagesPos 304 | 305 | 306 | def r(num, rand): 307 | return num + rand * random.random() 308 | 309 | ''' 310 | Wrapper around imagesearch and click_image 311 | 312 | # TODO: optimize so that we only read the file once. 313 | 314 | input : 315 | image : path to the image file (see opencv imread for supported types) 316 | action : button of the mouse to activate : "left" "right" "middle", see pyautogui.click documentation for more info 317 | delay : time taken for the mouse to move from where it was to the new position 318 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 319 | ''' 320 | 321 | def imagesearch_click(image, action, delay, offset=5, precision=0.8): 322 | pos = imagesearch(image, precision) 323 | img = cv2.imread(image) 324 | if img is None: 325 | raise FileNotFoundError('Image file not found: {}'.format(image)) 326 | height, width, channels = img.shape 327 | pyautogui.moveTo(pos[0] + r(width / 2, offset), pos[1] + r(height / 2, offset), 328 | delay) 329 | pyautogui.click(button=action) 330 | 331 | 332 | 333 | def chi_2(l1, l2): 334 | """ 335 | sort of chi squared test to check if the two color distributions are compatible 336 | """ 337 | s = sum(l2) # number of pixels in template 338 | l1 = [x / s * 100 for x in l1] # scaling the bins of the histograms 339 | l2 = [x / s * 100 for x in l2] # scaling the bins of the histograms 340 | 341 | chi = 0 342 | for i in range(len(l2)): 343 | if l2[i] != 0: 344 | chi += (l1[i] - l2[i])**2 / l2[i] 345 | else: 346 | chi += l1[i]**2 347 | 348 | return chi / len(l2) 349 | 350 | 351 | 352 | def channels_similarity(base, template, max_loc): 353 | b_channels = [base[:, :, i] for i in range(3)] 354 | t_channels = [template[:, :, i] for i in range(3)] 355 | chis = [] # list with chi squared tests results 356 | for b, t in zip(b_channels, t_channels): # for each channel, check if the color distributions are compatible 357 | #b = np.reshape(b, (-1)) 358 | t = np.reshape(t, (-1)) 359 | n_classes = 10 # I chose 10 because of the rule of thumb with histograms (which is to use ~~ Total_number_of_observations bins, 360 | # in fact the function chi_2 scales the bins as if there were 100 pixels) 361 | dx = 255 / n_classes # width of the bins 362 | t_bins = [t[(k * dx <= t) & ((k+1) * dx > t)].shape[0] for k in range(n_classes)] 363 | t_bins[n_classes - 1] += t[t == 255].shape[0] # every bin contains the number of pixels that have a value between k*dx and (k+1)*dx, k=0,1,...,9 364 | b_zone = b[max_loc[1]: max_loc[1] + template.shape[1], max_loc[0]: max_loc[0] + template.shape[0]] # portion of the screen located by cv2.matchTemplate 365 | b = np.reshape(b_zone, (-1)) 366 | b_bins = [b[(k * dx <= b) & ((k+1) * dx > b)].shape[0] for k in range(n_classes)] 367 | b_bins[n_classes - 1] += b[b == 255].shape[0] 368 | chi = chi_2(b_bins, t_bins) 369 | chis.append(chi) 370 | 371 | return chis 372 | 373 | 374 | 375 | """ 376 | Returns the coordinates of the given image but only if the colors match 377 | 378 | input : 379 | image : path to the image file (see opencv imread for supported types) 380 | precision : the higher, the lesser tolerant and fewer false positives are found default is 0.8 381 | 382 | returns : 383 | the top left corner coordinates of the element if found as an array [x,y] or [-1,-1] if not 384 | """ 385 | def imagesearch_colored(image, precision=0.8): 386 | with mss.mss() as sct: 387 | im = sct.grab(sct.monitors[0]) 388 | if is_retina: 389 | im.thumbnail((round(im.size[0] * 0.5), round(im.size[1] * 0.5))) 390 | # im.save('testarea.png') 391 | base = np.array(im) 392 | template = cv2.imread(image) 393 | if template is None: 394 | raise FileNotFoundError(f'Image file not found: {image}') 395 | template = np.array(template) 396 | 397 | res = cv2.matchTemplate(cv2.cvtColor(base, cv2.COLOR_BGR2GRAY), cv2.cvtColor(template, cv2.COLOR_BGR2GRAY), cv2.TM_CCOEFF_NORMED) 398 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 399 | 400 | chis = channels_similarity(base, template, max_loc) 401 | 402 | if chis[0] < 5 and chis[1] < 5 and chis[2] < 5 and max_val > 0.8: # 5 is maybe a high threshold for a chi squared, but when the colors are different 403 | # it's easy to reach hundreds or more. 404 | # Anyway this value needs more testing to be tuned 405 | return(max_loc) 406 | else: 407 | return [-1, -1] 408 | --------------------------------------------------------------------------------