├── .gitignore ├── IOfuncs.py ├── README.md ├── flaskserver.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | whitelist.txt 2 | 3 | # Created by https://www.gitignore.io/api/osx,vim,python,virtualenv 4 | 5 | ### OSX ### 6 | *.DS_Store 7 | .AppleDouble 8 | .LSOverride 9 | 10 | # Icon must end with two \r 11 | Icon 12 | 13 | # Thumbnails 14 | ._* 15 | 16 | # Files that might appear in the root of a volume 17 | .DocumentRevisions-V100 18 | .fseventsd 19 | .Spotlight-V100 20 | .TemporaryItems 21 | .Trashes 22 | .VolumeIcon.icns 23 | .com.apple.timemachine.donotpresent 24 | 25 | # Directories potentially created on remote AFP share 26 | .AppleDB 27 | .AppleDesktop 28 | Network Trash Folder 29 | Temporary Items 30 | .apdisk 31 | 32 | ### Python ### 33 | # Byte-compiled / optimized / DLL files 34 | __pycache__/ 35 | *.py[cod] 36 | *$py.class 37 | 38 | # C extensions 39 | *.so 40 | 41 | # Distribution / packaging 42 | .Python 43 | build/ 44 | develop-eggs/ 45 | dist/ 46 | downloads/ 47 | eggs/ 48 | .eggs/ 49 | lib/ 50 | lib64/ 51 | parts/ 52 | sdist/ 53 | var/ 54 | wheels/ 55 | *.egg-info/ 56 | .installed.cfg 57 | *.egg 58 | 59 | # PyInstaller 60 | # Usually these files are written by a python script from a template 61 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 62 | *.manifest 63 | *.spec 64 | 65 | # Installer logs 66 | pip-log.txt 67 | pip-delete-this-directory.txt 68 | 69 | # Unit test / coverage reports 70 | htmlcov/ 71 | .tox/ 72 | .coverage 73 | .coverage.* 74 | .cache 75 | nosetests.xml 76 | coverage.xml 77 | *.cover 78 | .hypothesis/ 79 | 80 | # Translations 81 | *.mo 82 | *.pot 83 | 84 | # Django stuff: 85 | *.log 86 | local_settings.py 87 | 88 | # Flask stuff: 89 | instance/ 90 | .webassets-cache 91 | 92 | # Scrapy stuff: 93 | .scrapy 94 | 95 | # Sphinx documentation 96 | docs/_build/ 97 | 98 | # PyBuilder 99 | target/ 100 | 101 | # Jupyter Notebook 102 | .ipynb_checkpoints 103 | 104 | # pyenv 105 | .python-version 106 | 107 | # celery beat schedule file 108 | celerybeat-schedule 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | 135 | ### Vim ### 136 | # swap 137 | [._]*.s[a-v][a-z] 138 | [._]*.sw[a-p] 139 | [._]s[a-v][a-z] 140 | [._]sw[a-p] 141 | # session 142 | Session.vim 143 | # temporary 144 | .netrwhist 145 | *~ 146 | # auto-generated tag files 147 | tags 148 | 149 | ### VirtualEnv ### 150 | # Virtualenv 151 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 152 | [Bb]in 153 | [Ii]nclude 154 | [Ll]ib 155 | [Ll]ib64 156 | [Ll]ocal 157 | [Mm]an 158 | [Ss]cripts 159 | [Tt]cl 160 | pyvenv.cfg 161 | pip-selfcheck.json 162 | 163 | # End of https://www.gitignore.io/api/osx,vim,python,virtualenv 164 | -------------------------------------------------------------------------------- /IOfuncs.py: -------------------------------------------------------------------------------- 1 | import os, shutil, time 2 | from random import random 3 | from io import BytesIO 4 | import urllib 5 | from PIL import Image 6 | 7 | def assert_client_data_path(request): 8 | client_name = request.remote_addr.replace(".", "-"); 9 | add_on = str(int(time.time())) + "-" + str(int(random()*10000)) 10 | path_to_client_data = os.path.join("temp-client-data", client_name+add_on) 11 | if os.path.isdir(path_to_client_data): 12 | shutil.rmtree(path_to_client_data) 13 | os.makedirs(path_to_client_data) 14 | return path_to_client_data 15 | 16 | def download_file(url, path_to_data, i, w, h, resize): 17 | local_filename = os.path.join(path_to_data, "temp"+str(i)+".jpg") 18 | try: 19 | f = BytesIO(urllib.urlopen(url).read()) 20 | im = Image.open(f) 21 | size = w,h 22 | if resize == True: 23 | im.thumbnail(size, Image.ANTIALIAS) 24 | im.save(local_filename) 25 | except Exception as e: 26 | # print "[Error] while downloading image\n\tsrc:",url,"\n\tError:", e 27 | return None, (0,0) 28 | return local_filename, im.size 29 | 30 | def download_images(data, path_to_client_data, w=640, h=640): 31 | image_data_object = dict() 32 | resize = True 33 | try: 34 | if data["resize"] == 0: resize = False 35 | except: 36 | pass 37 | urls = data["urls"] 38 | # print "[Downloading]...", len(urls), "urls" 39 | err_count = 0 40 | for i, url in enumerate(urls, 0): 41 | image_data_object[i] = dict() 42 | local_path, size = download_file(url, path_to_client_data, i, w, h, resize) 43 | if local_path == None: err_count += 1 44 | image_data_object[i]["url"] = url 45 | image_data_object[i]["path"] = local_path 46 | image_data_object[i]["img_resize"] = {"w": size[0], "h":size[1]} 47 | print "[+] Downloaded", len(urls) - err_count, "/", len(urls), "images" 48 | return image_data_object, len(urls) - err_count 49 | 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image Classification API for Data Selfie 2 | 3 | This is the code for the image classification API that is used by [Data Selfie](https://github.com/d4t4x/data-selfie). 4 | Its main components are [Yolo and Darknet](https://pjreddie.com/darknet/yolo/), used via the [pyyolo-wrapper](https://github.com/digitalbrain79/pyyolo) for image classification and [Gunicorn](http://gunicorn.org) for reliable server functionality. 5 | 6 | ## Build it yourself 7 | 8 | ### Install pyyolo 9 | 10 | Follow the installation instructions of [pyyolo](https://github.com/digitalbrain79/pyyolo). To avoid unexcessary logging of the prediction times for each image, I got rid of [this line](https://github.com/digitalbrain79/pyyolo/blob/master/libyolo.c#L140) before the install. 11 | 12 | ### Download weight file 13 | 14 | For Data Selfie, we are using weights from the makers of Darknet as described on [this](https://pjreddie.com/darknet/yolo/) page. 15 | 16 | ``` 17 | wget https://pjreddie.com/media/files/yolo.weights 18 | ``` 19 | 20 | ### Download this repo 21 | 22 | Finally, clone this repo with 23 | 24 | ``` 25 | git clone git@github.com:d4t4x/data-selfie-image-classification.git 26 | ``` 27 | 28 | The folder structure should look like this: 29 | 30 | ``` 31 | . 32 | ├── data-selfie-image-classification 33 | │   ├── ... 34 | ├── pyyolo 35 | │   ├── ... 36 | └── weights 37 | └── yolo.weights 38 | ``` 39 | 40 | ### A few more dependencies 41 | 42 | Before running the server, we need to install pillow, flask, request, numpy 43 | ``` 44 | pip install pillow flask request numpy 45 | ``` 46 | and gunicorn, greenlet and gevent 47 | ``` 48 | pip install gunicorn greenlet gevent 49 | ``` 50 | 51 | ### Run the API 52 | 53 | For Data Selfie we run the API like this, from the directory of this repo: 54 | 55 | ``` 56 | gunicorn --workers=2 --bind=0.0.0.0:8888 -t 100 -k gevent wsgi 57 | ``` 58 | 59 | 60 | Good luck! File a issue in this repo, contact us or [Leon Eckert](http://leoneckert.com) if you have any questions. 61 | 62 | 63 | -------------------------------------------------------------------------------- /flaskserver.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify, abort 2 | # import json, urllib, sys, cv2, shutil, time 3 | import json, urllib, sys, shutil, time 4 | from io import BytesIO 5 | from PIL import Image 6 | import numpy as np 7 | import pyyolo 8 | import IOfuncs as iof 9 | 10 | darknet_path = '../pyyolo/darknet' 11 | datacfg = 'cfg/coco.data' 12 | cfgfile = 'cfg/yolo.cfg' 13 | weightfile = '../../weights/yolo.weights' 14 | thresh = 0.24 15 | hier_thresh = 0.5 16 | pyyolo.init(darknet_path, datacfg, cfgfile, weightfile) 17 | 18 | application = Flask(__name__) 19 | 20 | 21 | # # WHITELIST 22 | # # Limit use of API to IPs in whitelist.txt 23 | # @application.before_request 24 | # def limit_remote_addr(): 25 | # whitelist = list() 26 | # for line in open('whitelist.txt'): 27 | # line = line.strip() 28 | # whitelist.append(line) 29 | # if request.remote_addr not in whitelist: 30 | # print "[ABORT] IP not on whitelist:", request.remote_addr 31 | # abort(403) 32 | 33 | @application.route("/", methods=['GET', 'POST']) 34 | def serve(): 35 | data = json.loads(request.data.decode('utf-8')) 36 | print "\n" 37 | print "[TIME] " + time.strftime("%d/%m/%Y") + " " + time.strftime("%H:%M:%S") 38 | path_to_client_data = iof.assert_client_data_path(request) 39 | 40 | image_data, num_valid_images = iof.download_images(data, path_to_client_data) 41 | 42 | out = list() 43 | # print "[Classifying]...", num_valid_images, "images" 44 | err_count = 0 45 | for idx in image_data: 46 | img_data = image_data[idx] 47 | out.append(dict()) 48 | o = out[-1] 49 | o["url"] = img_data["url"] 50 | o["img_resize"] = img_data["img_resize"] 51 | if img_data["path"] == None: 52 | o["pred"] = None 53 | continue 54 | try: 55 | o["pred"] = pyyolo.test(img_data["path"], thresh, hier_thresh, 0) 56 | if len(o["pred"]) == 0: 57 | o["pred"] = None 58 | except Exception as e: 59 | o["pred"] = None 60 | err_count += 1 61 | print "[ERROR] while classifiying image\n\tsrc:", o["url"], "\n\tError:", e 62 | print "[+] Classified", num_valid_images - err_count, "/", num_valid_images, "images" 63 | shutil.rmtree(path_to_client_data) 64 | print "[+] Deleted downloaded images." 65 | print "[+] Returning predictions..." 66 | return jsonify(out) 67 | 68 | 69 | if __name__ == "__main__": 70 | 71 | application.run(host='0.0.0.0', port=8888) 72 | -------------------------------------------------------------------------------- /wsgi.py: -------------------------------------------------------------------------------- 1 | from flaskserver import application 2 | 3 | if __name__ == "__main__": 4 | print("RUNNING APPLICATION / SERVER!") 5 | application.run() 6 | --------------------------------------------------------------------------------