├── .gitignore ├── Gathered Notebook.ipynb ├── PetDetectorTraining backup.ipynb ├── PetDetectorTraining.ipynb ├── README.md ├── helper.py ├── inference ├── .funcignore ├── .gitignore ├── classifynew │ ├── __init__.py │ ├── function.json │ ├── helper.py │ ├── labels.txt │ └── predict.py ├── host.json └── requirements.txt ├── labels.txt ├── model └── checkpoint.pth └── website ├── .deployment └── index.html /.gitignore: -------------------------------------------------------------------------------- 1 | images/ 2 | images.tar.gz 3 | .python_packages 4 | __pycache__ 5 | .venv 6 | .vscode/ -------------------------------------------------------------------------------- /Gathered Notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{},"source":["# Gathered Notebook\nGathered from ```/Users/jeffreymew/Downloads/Pet Detector PyTorch/PetDetectorTraining.ipynb```\n\n| | |\n|---|---|\n|   |This notebook was generated by the Gather Extension. The intent is that it contains only the code and cells required to produce the same results as the cell originally selected for gathering. Please note that the Python analysis is quite conservative, so if it is unsure whether a line of code is necessary for execution, it will err on the side of including it.|\n\n**Are you satisfied with the code that was gathered?**\n\n[Yes](https://command:python.datascience.gatherquality?yes) [No](https://command:python.datascience.gatherquality?no)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["import os\n","from torchvision import datasets, models, transforms"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["from helper import *\n","path = os.path.join(os.getcwd(), 'images')\n","dataset_url = 'http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz'\n","dataset_path = fetch_and_untar(dataset_url, path)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["dataloaders, dataset_sizes, class_names = transform_images_to_tensors(dataset_path)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["device = use_gpu_if_avail()"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["model_ft = models.resnet18(pretrained=True)\n","setup_model(model_ft, device)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["model_ft = train_model(model_ft, device, dataloaders, dataset_sizes, num_epochs=1)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["save_best_model(model_ft)\n",""]}],"nbformat":4,"nbformat_minor":2,"metadata":{"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":3},"orig_nbformat":2}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pet-detector-pytorch 2 | 3 | Working demo at https://jemew-pytorch-webapp.azurewebsites.net/ 4 | -------------------------------------------------------------------------------- /helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import re 4 | import copy 5 | import time 6 | import shutil 7 | import tarfile 8 | from os.path import basename, isfile 9 | from pathlib import Path 10 | from re import split 11 | from urllib.parse import urlparse 12 | from urllib.request import urlopen 13 | 14 | import matplotlib.pyplot as plt 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | from ipywidgets import interact 19 | from PIL import Image 20 | from torchvision import datasets, models, transforms 21 | 22 | 23 | # Fetch a file from uri, unzip and untar it into its own directory. 24 | def fetch_and_untar(uri, path): 25 | # If the data set exists, skip this step 26 | if os.path.isdir(path): 27 | return 28 | 29 | # Parse the uri to extract the local filename 30 | parsed_uri = urlparse(uri) 31 | local_filename = basename(parsed_uri.path) 32 | 33 | # If file is not already on disk, retrieve from uri 34 | if not isfile(local_filename): 35 | with urlopen(uri) as response: 36 | with open(local_filename, 'bw+') as f: 37 | shutil.copyfileobj(response, f) 38 | 39 | # Expand the archive 40 | with tarfile.open(local_filename) as tar: 41 | tar.extractall() 42 | 43 | move_images_into_labelled_directories(path) 44 | split_train_val(path) 45 | return path 46 | 47 | def move_images_into_labelled_directories(image_dir): 48 | images_path = Path(image_dir) 49 | extract_breed_from_filename = re.compile(r'([^/]+)_\d+.jpg$') 50 | 51 | for filename in os.listdir(image_dir): 52 | match = extract_breed_from_filename.match(filename) 53 | if match is not None: 54 | breed = match.group(1) 55 | if not os.path.exists(images_path / breed): 56 | os.makedirs(images_path / breed) 57 | src_path = images_path / filename 58 | dest_path = images_path / breed / filename 59 | shutil.move(src_path, dest_path) 60 | 61 | # Split into training and validation folders (80/20 split) 62 | def split_train_val(path): 63 | if os.path.isdir(path): 64 | return 65 | 66 | for subdir, dirs, files in os.walk(path): 67 | if subdir == path: 68 | if not os.path.exists(os.path.join(path, 'val')): 69 | os.mkdir(os.path.join(path, 'val')) 70 | 71 | if not os.path.exists(os.path.join(path, 'train')): 72 | os.mkdir(os.path.join(path, 'train')) 73 | continue 74 | for x in range(40): 75 | fil = random.choice(os.listdir(subdir)) 76 | os.replace(os.path.join(subdir, fil), os.path.join(path, os.path.join('val', fil))) 77 | 78 | new_dir = os.path.join(os.path.split(subdir)[0], os.path.join('train', os.path.split(subdir)[1])) 79 | shutil.move(subdir, new_dir) 80 | 81 | move_images_into_labelled_directories(os.path.join(os.getcwd(), os.path.join('images', 'val'))) 82 | 83 | def get_sample_images_for_each_species(dirname): 84 | d = Path(os.path.join(dirname, 'train')) 85 | species_dirs = [d for d in d.iterdir() if d.is_dir()] 86 | species_images_and_labels = [] 87 | for species_dir in species_dirs: 88 | for image_path in species_dir.iterdir(): 89 | image = Image.open(image_path) 90 | image_label = species_dir.parts[-1].lower().replace('_', ' ') 91 | species_images_and_labels.append((image, image_label)) 92 | break 93 | return species_images_and_labels 94 | 95 | def plot_images_in_grid(images_data, number_columns): 96 | f, subplots = plt.subplots(len(images_data) // number_columns + 1, number_columns) 97 | f.set_size_inches(16, 16) 98 | 99 | row = 0 100 | col = 0 101 | 102 | for record in images_data: 103 | subplot = subplots[row, col] 104 | subplot.imshow(record[0]) 105 | subplot.set_axis_off() 106 | subplot.set_title(record[1], color='#358CD6') 107 | col += 1 108 | if col == number_columns: 109 | row += 1 110 | col = 0 111 | 112 | for c in range(col, number_columns): 113 | subplots[row, c].set_axis_off() 114 | 115 | def browse_images(digits): 116 | n = len(digits) 117 | def view_image(i): 118 | plt.imshow(digits[i][0], cmap=plt.cm.gray_r, interpolation='nearest') 119 | plt.title('Training: %s' % digits[i][1]) 120 | plt.show() 121 | interact(view_image, i=(0,n-1)) 122 | 123 | def transform_images_to_tensors(dataset_path): 124 | data_transforms = { 125 | 'train': transforms.Compose([ 126 | transforms.RandomResizedCrop(224), 127 | transforms.RandomHorizontalFlip(), 128 | transforms.ToTensor(), 129 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 130 | ]), 131 | 'val': transforms.Compose([ 132 | transforms.Resize(256), 133 | transforms.CenterCrop(224), 134 | transforms.ToTensor(), 135 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 136 | ]), 137 | } 138 | 139 | data_dir = 'images/' 140 | 141 | image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), 142 | data_transforms[x]) 143 | for x in ['train', 'val']} 144 | dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, 145 | shuffle=True, num_workers=4) 146 | for x in ['train', 'val']} 147 | dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} 148 | class_names = image_datasets['train'].classes 149 | 150 | save_labels(class_names) 151 | 152 | return dataloaders, dataset_sizes, class_names 153 | 154 | def save_labels(class_names): 155 | with open("labels.txt", "w") as output: 156 | for row in class_names: 157 | output.write(str(row).replace('_', ' ').title() + '\n') 158 | 159 | def setup_model(model_ft, device): 160 | num_ftrs = model_ft.fc.in_features 161 | model_ft.fc = nn.Linear(num_ftrs, 37) 162 | model_ft = model_ft.to(device) 163 | 164 | def save_best_model(model_ft): 165 | if not os.path.exists(os.path.join(os.getcwd(), 'model')): 166 | os.mkdir(os.path.join(os.getcwd(), 'model')) 167 | 168 | save_dir = os.path.join(os.getcwd(), os.path.join('model', 'checkpoint.pth')) 169 | 170 | torch.save(model_ft, save_dir) 171 | 172 | def use_gpu_if_avail(): 173 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 174 | return device 175 | 176 | # For demo purposes for the sake of time 177 | # def train_model(model, device, dataloaders, dataset_sizes, num_epochs=25): 178 | # print("Epoch 0/0") 179 | # print("----------") 180 | # print("train Loss: 1.7429 Acc: 0.4984") 181 | # print("val Loss: 0.5585 Acc: 0.8297") 182 | # print("") 183 | # print("Training complete in 38m 41s") 184 | # print("Best val Acc: 0.829730") 185 | # return model 186 | 187 | def train_model(model, device, dataloaders, dataset_sizes, num_epochs=25): 188 | criterion = nn.CrossEntropyLoss() 189 | optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) 190 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) 191 | 192 | since = time.time() 193 | 194 | best_model_wts = copy.deepcopy(model.state_dict()) 195 | best_acc = 0.0 196 | 197 | for epoch in range(num_epochs): 198 | print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 199 | print('-' * 10) 200 | 201 | # Each epoch has a training and validation phase 202 | for phase in ['train', 'val']: 203 | if phase == 'train': 204 | model.train() # Set model to training mode 205 | else: 206 | model.eval() # Set model to evaluate mode 207 | 208 | running_loss = 0.0 209 | running_corrects = 0 210 | 211 | x = 0 212 | 213 | for inputs, labels in dataloaders[phase]: 214 | inputs = inputs.to(device) 215 | labels = labels.to(device) 216 | 217 | optimizer.zero_grad() 218 | 219 | with torch.set_grad_enabled(phase == 'train'): 220 | outputs = model(inputs) 221 | _, preds = torch.max(outputs, 1) 222 | loss = criterion(outputs, labels) 223 | 224 | if phase == 'train': 225 | loss.backward() 226 | optimizer.step() 227 | 228 | running_loss += loss.item() * inputs.size(0) 229 | running_corrects += torch.sum(preds == labels.data) 230 | 231 | x+=1 232 | print(x) 233 | if phase == 'train': 234 | scheduler.step() 235 | 236 | epoch_loss = running_loss / dataset_sizes[phase] 237 | epoch_acc = float(running_corrects) / dataset_sizes[phase] 238 | 239 | print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) 240 | 241 | # deep copy the model 242 | if phase == 'val' and epoch_acc > best_acc: 243 | best_acc = epoch_acc 244 | best_model_wts = copy.deepcopy(model.state_dict()) 245 | 246 | print() 247 | 248 | time_elapsed = time.time() - since 249 | print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 250 | print('Best val Acc: {:4f}'.format(best_acc)) 251 | 252 | # Load final best model weights 253 | model.load_state_dict(best_model_wts) 254 | return model -------------------------------------------------------------------------------- /inference/.funcignore: -------------------------------------------------------------------------------- 1 | .venv -------------------------------------------------------------------------------- /inference/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | obj 3 | csx 4 | .vs 5 | edge 6 | Publish 7 | 8 | *.user 9 | *.suo 10 | *.cscfg 11 | *.Cache 12 | project.lock.json 13 | 14 | /packages 15 | /TestResults 16 | 17 | /tools/NuGet.exe 18 | /App_Data 19 | /secrets 20 | /data 21 | .secrets 22 | appsettings.json 23 | local.settings.json 24 | 25 | node_modules 26 | dist 27 | 28 | # Local python packages 29 | .python_packages/ 30 | 31 | # Python Environments 32 | .env 33 | .venv 34 | env/ 35 | venv/ 36 | ENV/ 37 | env.bak/ 38 | venv.bak/ 39 | 40 | # Byte-compiled / optimized / DLL files 41 | __pycache__/ 42 | *.py[cod] 43 | *$py.class -------------------------------------------------------------------------------- /inference/classifynew/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | import azure.functions as func 4 | 5 | from .predict import predict_image_from_url 6 | 7 | def main(req: func.HttpRequest) -> func.HttpResponse: 8 | image_url = req.params.get('img') 9 | logging.info('Image URL received: ' + image_url) 10 | 11 | results = predict_image_from_url(image_url) 12 | 13 | headers = { 14 | "Content-type": "application/json", 15 | "Access-Control-Allow-Origin": "*" 16 | } 17 | 18 | return func.HttpResponse(json.dumps(results), headers = headers) -------------------------------------------------------------------------------- /inference/classifynew/function.json: -------------------------------------------------------------------------------- 1 | { 2 | "scriptFile": "__init__.py", 3 | "bindings": [ 4 | { 5 | "authLevel": "function", 6 | "type": "httpTrigger", 7 | "direction": "in", 8 | "name": "req", 9 | "methods": [ 10 | "get", 11 | "post" 12 | ] 13 | }, 14 | { 15 | "type": "http", 16 | "direction": "out", 17 | "name": "$return" 18 | } 19 | ] 20 | } -------------------------------------------------------------------------------- /inference/classifynew/helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import torch 4 | 5 | from azure.storage.blob import BlobServiceClient 6 | from torchvision import transforms 7 | from PIL import Image 8 | 9 | def get_model_from_az_storage(): 10 | model_path = 'checkpoint.pth' 11 | 12 | # Get environment variable for Az Storage connection string to reference model 13 | if 'connect_str' in os.environ: 14 | connect_str = os.environ['connect_str'] 15 | else: 16 | raise Exception('msg', 'connection string not found') 17 | 18 | # Get the model from Az Storage 19 | blob_service_client = BlobServiceClient.from_connection_string(connect_str) 20 | blob_client = blob_service_client.get_blob_client(container='petdetector', blob='checkpoint.pth') 21 | 22 | with open(os.path.join(tempfile.gettempdir(), model_path), "wb") as my_blob: 23 | download_stream = blob_client.download_blob() 24 | my_blob.write(download_stream.readall()) 25 | 26 | model = torch.load(os.path.join(tempfile.gettempdir(), model_path), map_location=torch.device('cpu')) 27 | model.eval() 28 | 29 | return model 30 | 31 | # Get the classification labels based on the .txt file 32 | def get_class_labels(): 33 | try: 34 | dirname = os.path.dirname(__file__) 35 | with open(os.path.join(dirname, 'labels.txt'), 'r') as f: 36 | classes = f.read().splitlines() 37 | except FileNotFoundError: 38 | raise 39 | 40 | return classes 41 | 42 | def convert_image_to_tensor(model, image): 43 | input_image = Image.open(image).convert('RGB') 44 | preprocess = transforms.Compose([ 45 | transforms.Resize(256), 46 | transforms.CenterCrop(224), 47 | transforms.ToTensor(), 48 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 49 | ]) 50 | input_tensor = preprocess(input_image) 51 | input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model 52 | 53 | # move the input and model to GPU for speed if available 54 | if torch.cuda.is_available(): 55 | input_batch = input_batch.to('cuda') 56 | model.to('cuda') 57 | 58 | return input_batch 59 | 60 | def get_model_prediction(model, input_batch): 61 | class_dict = get_class_labels() 62 | 63 | with torch.no_grad(): 64 | output = model(input_batch) 65 | 66 | # The output has unnormalized scores. To get probabilities, you can run a softmax on it 67 | softmax = (torch.nn.functional.softmax(output[0], dim=0)) 68 | out = class_dict[softmax.argmax().item()] 69 | 70 | return out -------------------------------------------------------------------------------- /inference/classifynew/labels.txt: -------------------------------------------------------------------------------- 1 | Abyssinian 2 | Bengal 3 | Birman 4 | Bombay 5 | British Shorthair 6 | Egyptian Mau 7 | Maine_Coon 8 | Persian 9 | Ragdoll 10 | Russian Blue 11 | Siamese 12 | Sphynx 13 | American Bulldog 14 | American Pit Bull Terrier 15 | Basset Hound 16 | Beagle 17 | Boxer 18 | Chihuahua 19 | English Cocker Spaniel 20 | English Setter 21 | German Shorthaired 22 | Great Pyrenees 23 | Havanese 24 | Japanese Chin 25 | Keeshond 26 | Leonberger 27 | Miniature Pinscher 28 | Newfoundland 29 | Pomeranian 30 | Pug 31 | Saint Bernard 32 | Samoyed 33 | Scottish Terrier 34 | Shiba Inu 35 | Staffordshire Bull Terrier 36 | Wheaten Terrier 37 | Yorkshire Terrier -------------------------------------------------------------------------------- /inference/classifynew/predict.py: -------------------------------------------------------------------------------- 1 | 2 | from urllib.request import urlopen 3 | 4 | import sys 5 | 6 | from .helper import * 7 | 8 | model = get_model_from_az_storage() 9 | 10 | def predict_image_from_url(image_url): 11 | with urlopen(image_url) as test_image: 12 | try: 13 | img_tensor = convert_image_to_tensor(model, test_image) 14 | prediction = get_model_prediction(model, img_tensor) 15 | 16 | response = {'predictedTagName': prediction} 17 | except: 18 | response = {'error' : 'image url is invalid'} 19 | 20 | return response 21 | 22 | if __name__ == '__main__': 23 | predict_image_from_url(sys.argv[1]) -------------------------------------------------------------------------------- /inference/host.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0", 3 | "extensionBundle": { 4 | "id": "Microsoft.Azure.Functions.ExtensionBundle", 5 | "version": "[1.*, 2.0.0)" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /inference/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-functions 2 | azure-storage-blob 3 | -f https://download.pytorch.org/whl/torch_stable.html 4 | torch==1.5.0+cpu 5 | torchvision==0.6.0+cpu 6 | pillow 7 | 8 | -------------------------------------------------------------------------------- /labels.txt: -------------------------------------------------------------------------------- 1 | Abyssinian 2 | Bengal 3 | Birman 4 | Bombay 5 | British Shorthair 6 | Egyptian Mau 7 | Maine Coon 8 | Persian 9 | Ragdoll 10 | Russian Blue 11 | Siamese 12 | Sphynx 13 | American Bulldog 14 | American Pit Bull Terrier 15 | Basset Hound 16 | Beagle 17 | Boxer 18 | Chihuahua 19 | English Cocker Spaniel 20 | English Setter 21 | German Shorthaired 22 | Great Pyrenees 23 | Havanese 24 | Japanese Chin 25 | Keeshond 26 | Leonberger 27 | Miniature Pinscher 28 | Newfoundland 29 | Pomeranian 30 | Pug 31 | Saint Bernard 32 | Samoyed 33 | Scottish Terrier 34 | Shiba Inu 35 | Staffordshire Bull Terrier 36 | Wheaten Terrier 37 | Yorkshire Terrier 38 | -------------------------------------------------------------------------------- /model/checkpoint.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmew/pet-detector-pytorch/32c6894cc057ffd9002814c193e62eb43828b1bf/model/checkpoint.pth -------------------------------------------------------------------------------- /website/.deployment: -------------------------------------------------------------------------------- 1 | [config] 2 | SCM_DO_BUILD_DURING_DEPLOYMENT=true -------------------------------------------------------------------------------- /website/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Pet Detector with PyTorch 9 | 10 | 12 | 13 | 85 | 86 | 87 | 88 | 89 |
90 |
91 | 108 |
109 |
110 | 111 | 112 | 113 | 114 | 150 | 151 | 152 | --------------------------------------------------------------------------------