├── .gitignore
├── 1_computeRois.py
├── 2_cntkGenerateInputs.py
├── 3_runCntk.py
├── 4_trainSvm.py
├── 5_evaluateResults.py
├── 5_visualizeResults.py
├── 6_scoreImage.py
├── A1_annotateImages.py
├── A2_annotateBboxLabels.py
├── B1_evaluateRois.py
├── B2_cntkVisualizeInputs.py
├── B3_cntkAnalyzeInputs.py
├── PARAMETERS.py
├── README.md
├── __init__.py
├── data
    └── grocery
    │   ├── negative
    │       ├── 1.jpg
    │       ├── 2.jpg
    │       ├── 3.jpg
    │       ├── 4.jpg
    │       └── 5.jpg
    │   ├── positive
    │       ├── 0.bboxes.labels.tsv
    │       ├── 0.bboxes.tsv
    │       ├── 0.jpg
    │       ├── 11.bboxes.labels.tsv
    │       ├── 11.bboxes.tsv
    │       ├── 11.jpg
    │       ├── 12.bboxes.labels.tsv
    │       ├── 12.bboxes.tsv
    │       ├── 12.jpg
    │       ├── 13.bboxes.labels.tsv
    │       ├── 13.bboxes.tsv
    │       ├── 13.jpg
    │       ├── 14.bboxes.labels.tsv
    │       ├── 14.bboxes.tsv
    │       ├── 14.jpg
    │       ├── 17.bboxes.labels.tsv
    │       ├── 17.bboxes.tsv
    │       ├── 17.jpg
    │       ├── 18.bboxes.labels.tsv
    │       ├── 18.bboxes.tsv
    │       ├── 18.jpg
    │       ├── 19.bboxes.labels.tsv
    │       ├── 19.bboxes.tsv
    │       ├── 19.jpg
    │       ├── 2.bboxes.labels.tsv
    │       ├── 2.bboxes.tsv
    │       ├── 2.jpg
    │       ├── 21.bboxes.labels.tsv
    │       ├── 21.bboxes.tsv
    │       ├── 21.jpg
    │       ├── 22.bboxes.labels.tsv
    │       ├── 22.bboxes.tsv
    │       ├── 22.jpg
    │       ├── 23.bboxes.labels.tsv
    │       ├── 23.bboxes.tsv
    │       ├── 23.jpg
    │       ├── 24.bboxes.labels.tsv
    │       ├── 24.bboxes.tsv
    │       ├── 24.jpg
    │       ├── 26.bboxes.labels.tsv
    │       ├── 26.bboxes.tsv
    │       ├── 26.jpg
    │       ├── 3.bboxes.labels.tsv
    │       ├── 3.bboxes.tsv
    │       ├── 3.jpg
    │       ├── 4.bboxes.labels.tsv
    │       ├── 4.bboxes.tsv
    │       ├── 4.jpg
    │       ├── 6.bboxes.labels.tsv
    │       ├── 6.bboxes.tsv
    │       ├── 6.jpg
    │       ├── 7.bboxes.labels.tsv
    │       ├── 7.bboxes.tsv
    │       ├── 7.jpg
    │       ├── 8.bboxes.labels.tsv
    │       ├── 8.bboxes.tsv
    │       ├── 8.jpg
    │       ├── 9.bboxes.labels.tsv
    │       ├── 9.bboxes.tsv
    │       └── 9.jpg
    │   └── testImages
    │       ├── 10.bboxes.labels.tsv
    │       ├── 10.bboxes.tsv
    │       ├── 10.jpg
    │       ├── 15.bboxes.labels.tsv
    │       ├── 15.bboxes.tsv
    │       ├── 15.jpg
    │       ├── 20.bboxes.labels.tsv
    │       ├── 20.bboxes.tsv
    │       ├── 20.jpg
    │       ├── 25.bboxes.labels.tsv
    │       ├── 25.bboxes.tsv
    │       ├── 25.jpg
    │       ├── 5.bboxes.labels.tsv
    │       ├── 5.bboxes.tsv
    │       └── 5.jpg
├── deprecated_3_runCntk_brainscript.py
├── doc
    ├── 0.filter.roi.jpg
    ├── 0.grid.roi.jpg
    ├── 0.ss.roi.jpg
    ├── anno_boxes.jpg
    ├── anno_labels.jpg
    ├── nn_00.jpg
    ├── nn_00_no_nms.jpg
    ├── nn_01.jpg
    ├── nn_110.jpg
    ├── nn_215.jpg
    ├── nn_425.jpg
    ├── nn_55.jpg
    ├── precision_recall.jpg
    ├── rcnnPipeline.JPG
    ├── svm_010.jpg
    ├── svm_115.jpg
    ├── svm_220.jpg
    ├── svm_325.jpg
    └── svm_45.jpg
├── fastRCNN
    ├── __init__.py
    ├── imdb.py
    ├── nms.py
    ├── pascal_voc.py
    ├── test.py
    ├── timer.py
    ├── train_svms.py
    ├── utils34_win64
    │   ├── cython_bbox.pyd
    │   └── cython_nms.pyd
    ├── utils35_win64
    │   ├── cython_bbox.pyd
    │   └── cython_nms.pyd
    └── voc_eval.py
├── helpers.py
├── helpers_cntk.py
├── imdb_data.py
└── resources
    ├── cntk
        ├── config.cntk
        └── model.pdf
    └── python35_64bit_requirements
        ├── opencv_python-3.2.0-cp35-cp35m-win_amd64.whl
        └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.pyx
 3 | /__pycache__/
 4 | /backup_v0/
 5 | /data/liebherr_v4/
 6 | /fastRCNN/__pycache__/
 7 | /proc/
 8 | /resources/cntk/AlexNet.model
 9 | /resources/pascalVocData/
10 | /results/
11 | /selectivesearch/
12 | 


--------------------------------------------------------------------------------
/1_computeRois.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys, os, importlib, random
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | ####################################
 8 | # Parameters
 9 | ####################################
10 | boShowImg = True
11 | subdirs = ['positive', 'testImages', 'negative']
12 | 
13 | #no need to change these parameters
14 | boAddSelectiveSearchROIs = True
15 | boAddGridROIs = True
16 | boFilterROIs = True
17 | if datasetName.lower() == "pascalvoc":
18 |     print("No need to run ROI computation since Pascal VOC comes with pre-computed ROIs.")
19 |     exit()
20 | 
21 | 
22 | ####################################
23 | # Main
24 | ####################################
25 | #init
26 | for subdir in subdirs:
27 |     makeDirectory(roiDir)
28 |     makeDirectory(roiDir + subdir)
29 |     imgFilenames = getFilesInDirectory(imgDir + subdir, ".jpg")
30 | 
31 |     #loop over all images
32 |     times = []
33 |     for imgIndex, imgFilename in enumerate(imgFilenames):
34 |         #if os.path.exists(roiPath):
35 |         #    print "Skipping image since roi file already exists: " + imgFilename, imgIndex
36 |         #    continue
37 | 
38 |         # load image
39 |         print("Processing image {} of {}: subdir={}, filename={}".format(imgIndex, len(imgFilenames), subdir, imgFilename))
40 |         imgPath = join(imgDir, subdir, imgFilename)
41 |         imgOrig = imread(imgPath)
42 | 
43 |         # compute ROIs
44 |         tstart = datetime.datetime.now()
45 |         rois = computeRois(imgOrig, boAddSelectiveSearchROIs, boAddGridROIs, boFilterROIs, ss_kvals, ss_minSize, ss_max_merging_iterations, ss_nmsThreshold,
46 |                            roi_minDimRel, roi_maxDimRel, roi_maxImgDim, roi_maxAspectRatio, roi_minNrPixelsRel, roi_maxNrPixelsRel,
47 |                            grid_nrScales, grid_aspectRatios, grid_downscaleRatioPerIteration, grid_stepSizeRel)
48 |         times.append((datetime.datetime.now() - tstart).total_seconds() * 1000)
49 |         print("   Time roi computation [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
50 |         roiPath = "{}/{}/{}.roi.txt".format(roiDir, subdir, imgFilename[:-4])
51 |         np.savetxt(roiPath, rois, fmt='%d')
52 | 
53 |         #visualize ROIs
54 |         if boShowImg:
55 |             debugScale = 800.0 / max(imWidthHeight(imgOrig))
56 |             img = imresize(imgOrig, debugScale)
57 |             drawRectangles(img, rois*debugScale, color=(0, 255, 0), thickness=1)
58 |             imshow(img, waitDuration = 1)
59 |             roiImgPath = os.path.join(roiDir, subdir, imgFilename[:-4] + ".roi.jpg")
60 |             imwrite(img, roiImgPath)
61 | 
62 |     print("Time per image [ms]: median={:.1f}, std={:.1f}, 90%-percentile={:.1f}".format(np.median(times), np.std(times), np.percentile(times, 90)))
63 | print("DONE.")


--------------------------------------------------------------------------------
/2_cntkGenerateInputs.py:
--------------------------------------------------------------------------------
 1 | import os, sys, importlib
 2 | import shutil, time
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | ####################################
 8 | # Parameters
 9 | ####################################
10 | image_sets = ["train", "test"]
11 | 
12 | 
13 | ####################################
14 | # Main
15 | ####################################
16 | #clear imdb cache and other files
17 | if os.path.exists(cntkFilesDir):
18 |     assert(cntkFilesDir.endswith("cntkFiles/"))
19 |     userInput = input('--> INPUT: Press "y" to delete directory ' + cntkFilesDir + ": ")
20 |     if userInput.lower() not in ['y', 'yes']:
21 |         print("User input is %s: exiting now." % userInput)
22 |         exit(-1)
23 |     shutil.rmtree(cntkFilesDir)
24 |     time.sleep(0.2) #avoid file access errors
25 | 
26 | 
27 | #create cntk representation for each image
28 | makeDirectory(cntkFilesDir)
29 | for image_set in image_sets:
30 |     imdb = imdbs[image_set]
31 |     counterGt = np.zeros(len(classes), np.int32)
32 |     print("Number of images in set '{}' = {}".format(image_set, imdb.num_images))
33 | 
34 |     #open files for writing
35 |     cntkImgsPath, cntkRoiCoordsPath, cntkRoiLabelsPath, nrRoisPath = cntkInputPaths(cntkFilesDir, image_set)
36 |     with open(cntkImgsPath, 'w')      as cntkImgsFile, \
37 |          open(cntkRoiCoordsPath, 'w') as cntkRoiCoordsFile, \
38 |          open(cntkRoiLabelsPath, 'w') as cntkRoiLabelsFile, \
39 |          open(nrRoisPath, 'w')        as nrRoisFile:
40 | 
41 |             # for each image, transform rois etc to cntk format
42 |             for imgIndex in range(0, imdb.num_images):
43 |                 if imgIndex % 200 == 0:
44 |                     print("Processing image set '{}', image {} of {}".format(image_set, imgIndex, imdb.num_images))
45 |                 imgPath = imdb.image_path_at(imgIndex)
46 |                 currRois = imdb.roidb[imgIndex]['boxes']
47 |                 currGtOverlaps = imdb.roidb[imgIndex]['gt_overlaps']
48 |                 for i in imdb.roidb[imgIndex]['gt_classes']:
49 |                     counterGt[i] += 1
50 | 
51 |                 #get DNN inputs for image
52 |                 #Note: this also marks other ROIs as 'positives', if overlap with GT is above a threshold
53 |                 labelsStr, roisStr, _ = getCntkInputs(imgPath, currRois, currGtOverlaps, train_posOverlapThres, nrClasses, cntk_nrRois, cntk_padWidth, cntk_padHeight)
54 | 
55 |                 #update cntk data
56 |                 nrRoisFile.write("{}\n".format(len(currRois)))
57 |                 cntkImgsFile.write("{}\t{}\t0\n".format(imgIndex, imgPath))
58 |                 cntkRoiCoordsFile.write("{} |rois{}\n".format(imgIndex, roisStr))
59 |                 cntkRoiLabelsFile.write("{} |roiLabels{}\n".format(imgIndex, labelsStr))
60 | 
61 |     #print debug info
62 |     if image_set == 'train':
63 |         for i in range(len(classes)):
64 |             print("   {:3}: Found {} objects of class {}.".format(i, counterGt[i], classes[i]))
65 | 
66 | print("DONE.")
67 | 


--------------------------------------------------------------------------------
/3_runCntk.py:
--------------------------------------------------------------------------------
 1 | from PARAMETERS import *
 2 | from helpers_cntk import *
 3 | 
 4 | 
 5 | ####################################
 6 | # MAIN
 7 | ####################################
 8 | makeDirectory(modelDir)
 9 | print ("classifier = " + classifier)
10 | print ("cntk_lr_per_image = " + str(cntk_lr_per_image))
11 | 
12 | # optionally retrain DNN
13 | # if the classifier is svm, then simply return the 4096-floats penultimate layer as model
14 | # otherwise add new output layer, retrain the DNN, and return this new model.
15 | if classifier == 'svm':
16 |     boSkipTraining = True
17 | else:
18 |     boSkipTraining = False
19 | model = init_train_fast_rcnn(cntk_padHeight, cntk_padWidth, nrClasses, cntk_nrRois, cntk_mb_size, cntk_max_epochs,
20 |                              cntk_lr_per_image, cntk_l2_reg_weight, cntk_momentum_time_constant, cntkFilesDir, boSkipTraining)
21 | 
22 | # write model to disk
23 | model_path = os.path.join(modelDir, "frcn_" + classifier + ".model")
24 | print("Writing model to %s" % model_path)
25 | model.save(model_path)
26 | 
27 | # compute output of every image and write to disk
28 | image_sets = ["test", "train"]
29 | for image_set in image_sets:
30 |     outParsedDir = cntkFilesDir + image_set + "_" + classifier + "_parsed/"
31 |     makeDirectory(outParsedDir)
32 |     run_fast_rcnn(model, image_set, cntk_padHeight, cntk_padWidth, nrClasses, cntk_nrRois, cntkFilesDir, outParsedDir)
33 | 
34 | print("DONE.")


--------------------------------------------------------------------------------
/4_trainSvm.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from fastRCNN.train_svms import SVMTrainer
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | #################################################
 8 | # Parameters
 9 | #################################################
10 | experimentName = "exp1"
11 | 
12 | #no need to change these params
13 | cntkParsedOutputDir = cntkFilesDir + "train_svm_parsed/"
14 | 
15 | 
16 | 
17 | #################################################
18 | # Main
19 | #################################################
20 | if classifier == "nn":
21 |     print("No need to train SVM since using 'nn' classifier.")
22 |     exit()
23 | print ("svm_targetNorm = " + str(svm_targetNorm))
24 | print ("svm_retrainLimit = " + str(svm_retrainLimit))
25 | print ("svm_posWeight = " + str(svm_posWeight))
26 | print ("svm_C = " + str(svm_C))
27 | print ("svm_B = " + str(svm_B))
28 | print ("svm_penality = " + str(svm_penality))
29 | print ("svm_loss = " + str(svm_loss))
30 | print ("svm_evictThreshold = " + str(svm_evictThreshold))
31 | print ("svm_nrEpochs = " + str(svm_nrEpochs))
32 | 
33 | #init
34 | makeDirectory(trainedSvmDir)
35 | np.random.seed(svm_rngSeed)
36 | imdb = imdbs["train"]
37 | net = DummyNet(4096, imdb.num_classes, cntkParsedOutputDir)
38 | svmWeightsPath, svmBiasPath, svmFeatScalePath = svmModelPaths(trainedSvmDir, experimentName)
39 | 
40 | # add ROIs which significantly overlap with a ground truth object as positives
41 | if train_posOverlapThres > 0:
42 |     print ("Adding ROIs with gt overlap >= %2.2f as positives ..." % (train_posOverlapThres))
43 |     existingPosCounter, addedPosCounter = updateRoisGtClassIfHighGtOverlap(imdb, train_posOverlapThres)
44 |     print ("Number of positives originally: {} (in {} images)".format(existingPosCounter, imdb.num_images))
45 |     print ("Number of additional positives: {}.".format(addedPosCounter))
46 | 
47 | # start training
48 | svm = SVMTrainer(net, imdb, im_detect, svmWeightsPath, svmBiasPath, svmFeatScalePath,
49 |                  svm_C, svm_B, svm_nrEpochs, svm_retrainLimit, svm_evictThreshold, svm_posWeight,
50 |                  svm_targetNorm, svm_penality, svm_loss, svm_rngSeed)
51 | svm.train()
52 | print ("DONE.")
53 | 


--------------------------------------------------------------------------------
/5_evaluateResults.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from fastRCNN.test import test_net
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | ####################################
 8 | # Parameters
 9 | ####################################
10 | image_set = 'test'
11 | svmExperimentName = "exp1"
12 | 
13 | #no need to change these
14 | cntkParsedOutputDir = cntkFilesDir + image_set + "_" + classifier + "_parsed/"
15 | 
16 | 
17 | ####################################
18 | # Main
19 | ####################################
20 | print("classifier = " + classifier)
21 | print("image_set = " + image_set)
22 | imdb = imdbs[image_set]
23 | net = DummyNet(4096, imdb.num_classes, cntkParsedOutputDir)
24 | 
25 | #load svm
26 | svmFeatScale = None
27 | if classifier == 'svm':
28 |     svmWeights, svmBias, svmFeatScale = loadSvm(trainedSvmDir, svmExperimentName)
29 |     net.params['cls_score'][0].data = svmWeights
30 |     net.params['cls_score'][1].data = svmBias
31 | 
32 | #create empty directory for evaluation files
33 | if type(imdb) == imdb_data:
34 |     evalTempDir = None
35 | else:
36 |     #pascal_voc implementation requires temporary directory for evaluation
37 |     evalTempDir = os.path.join(procDir, "eval_mAP_" + image_set)
38 |     makeDirectory(evalTempDir)
39 |     deleteAllFilesInDirectory(evalTempDir, None)
40 | 
41 | #compute mAPs
42 | evalResults = test_net(net, imdb, evalTempDir, svmFeatScale, classifier, nmsThreshold, boUsePythonImpl = True, overlapThreshold = evalVocOverlapThreshold) #, boApplyNms = False) #, boThresholdDetections = False)
43 | #writeTable("evalResults.tsv", [["CLASS","Average Precision (AP)"]] + evalResults)
44 | 
45 | print("DONE.")


--------------------------------------------------------------------------------
/5_visualizeResults.py:
--------------------------------------------------------------------------------
  1 | import os, importlib, sys
  2 | import PARAMETERS
  3 | locals().update(importlib.import_module("PARAMETERS").__dict__)
  4 | 
  5 | 
  6 | ####################################
  7 | # Parameters
  8 | ####################################
  9 | image_set = 'test'      #'train', 'test'
 10 | svm_experimentName = 'exp1'
 11 | 
 12 | #no need to change these parameters
 13 | boIncludeGroundTruthRois = False   #remove GT (perfect) ROIs which were added to the 'train' imageSet
 14 | boUseNonMaximaSurpression = True
 15 | visualizationDir = resultsDir + "visualizations"
 16 | cntkParsedOutputDir = cntkFilesDir + image_set + "_" + classifier + "_parsed/"
 17 | if classifier == 'svm':
 18 |     prThresholds = np.linspace(0, 10, 21)
 19 | else:
 20 |     prThresholds = np.linspace(0, 1, 21)
 21 | 
 22 | 
 23 | 
 24 | ####################################
 25 | # Main
 26 | ####################################
 27 | #init
 28 | imdb     = imdbs[image_set]
 29 | gt_roidb = imdb.gt_roidb()
 30 | recalls    = collections.defaultdict(list)
 31 | precisions = collections.defaultdict(list)
 32 | 
 33 | #load svm
 34 | print("classifier = " + classifier)
 35 | makeDirectory(resultsDir)
 36 | makeDirectory(visualizationDir)
 37 | if classifier == "svm":
 38 |     print("Loading svm weights..")
 39 |     svmWeights, svmBias, svmFeatScale = loadSvm(trainedSvmDir, svm_experimentName)
 40 | else:
 41 |     svmWeights, svmBias, svmFeatScale = (None, None, None)
 42 | 
 43 | 
 44 | #loop over all images and visualize
 45 | for imgIndex in range(0, imdb.num_images):
 46 |     imgPath = imdb.image_path_at(imgIndex)
 47 |     imgWidth, imgHeight = imWidthHeight(imgPath)
 48 |     print("Processing image {} of {}: {}".format(imgIndex, imdb.num_images, imgPath))
 49 | 
 50 |     #load DNN output
 51 |     cntkOutputPath = os.path.join(cntkParsedOutputDir,  str(imgIndex) + ".dat.npz")
 52 |     dnnOutput = np.load(cntkOutputPath)['arr_0']
 53 |     assert(len(dnnOutput) == cntk_nrRois)
 54 | 
 55 |     #evaluate classifier for all rois and remove the zero-padded rois
 56 |     labels, scores = scoreRois(classifier, dnnOutput, svmWeights, svmBias, svmFeatScale, len(classes)) #, vis_decisionThresholds[classifier])
 57 |     scores = scores[:len(imdb.roidb[imgIndex]['boxes'])]
 58 |     labels = labels[:len(imdb.roidb[imgIndex]['boxes'])]
 59 | 
 60 |     #remove the ground truth ROIs which were added for training purposes
 61 |     if not boIncludeGroundTruthRois:
 62 |         inds = np.where(imdb.roidb[imgIndex]['gt_classes'] == 0)[0]
 63 |         labels = [labels[i] for i in inds]
 64 |         scores = [scores[i] for i in inds]
 65 |         imdb.roidb[imgIndex]['boxes'] = imdb.roidb[imgIndex]['boxes'][inds]
 66 | 
 67 |     #perform non-maxima surpression. note that the set of labels detected in the image is not affected by this.
 68 |     nmsKeepIndices = []
 69 |     if boUseNonMaximaSurpression:
 70 |         nmsKeepIndices = applyNonMaximaSuppression(nmsThreshold, labels, scores, imdb.roidb[imgIndex]['boxes'])
 71 |         print("Non-maxima surpression kept {:4} of {:4} rois (nmsThreshold={})".format(len(nmsKeepIndices), len(labels), nmsThreshold))
 72 | 
 73 |     #visualize results
 74 |     imgDebug = visualizeResults(imgPath, labels, scores, imdb.roidb[imgIndex]['boxes'], classes, nmsKeepIndices,
 75 |                                 boDrawNegativeRois=False, boDrawNmsRejectedRois=False, decisionThreshold = vis_decisionThresholds[classifier])
 76 |     imshow(imgDebug, waitDuration=1, maxDim = 800)
 77 |     imwrite(imgDebug, visualizationDir + "/" + classifier + "_" + str(imgIndex) + os.path.basename(imgPath))
 78 | 
 79 | 
 80 |     #compute precision recall of the detection for different thresholds
 81 |     gtLabels = gt_roidb[imgIndex]['gt_classes']
 82 |     gtBboxes = [Bbox(*rect) for rect in gt_roidb[imgIndex]['boxes']]
 83 | 
 84 |     for thres in prThresholds:
 85 |         # get detections with scores higher than the threshold and which were kept by nms
 86 |         keepInds = set(np.where((np.array(labels) > 0) & (np.array(scores) > thres))[0])
 87 |         if boUseNonMaximaSurpression:
 88 |             keepInds = keepInds.intersection(nmsKeepIndices)
 89 |         detLabels = [labels[i] for i in keepInds]
 90 |         detBboxes = [Bbox(*imdb.roidb[imgIndex]['boxes'][i]) for i in keepInds]
 91 | 
 92 |         #compute precision recall of the detection
 93 |         precision, recall = detPrecisionRecall(detBboxes, detLabels, gtBboxes, gtLabels,
 94 |                                                evalVocOverlapThreshold, boPenalizeMultipleDetections=False)
 95 |         recalls[thres].append(recall)
 96 |         if precision != None:
 97 |             precisions[thres].append(precision)
 98 | 
 99 | 
100 | #compute precision and recall at different thresholds
101 | print("Precision/recall when rejecting detections below a given threshold:")
102 | outPR = [("Threshold", "Precision", "Recall")]
103 | for thres in prThresholds:
104 |     if precisions[thres] == []:
105 |         break
106 |     p = np.mean(precisions[thres])
107 |     r = np.mean(recalls[thres])
108 |     outPR.append((thres, p, r))
109 |     print("   At threshold {:.2f}: precision = {:2.2f}, recall = {:2.2f}".format(thres, p, r))
110 | #writeTable("precisionRecalls.tsv", outPR)
111 | 
112 | print("DONE.")


--------------------------------------------------------------------------------
/6_scoreImage.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sys, os, importlib, random, json
  3 | import PARAMETERS
  4 | from helpers_cntk import *
  5 | locals().update(importlib.import_module("PARAMETERS").__dict__)
  6 | 
  7 | 
  8 | ####################################
  9 | # Parameters
 10 | ####################################
 11 | imgPath = r"C:/Users/pabuehle/Desktop/newImgs/WIN_20160803_11_30_07_Pro.jpg"
 12 | 
 13 | #choose which classifier to use
 14 | classifier = 'svm'
 15 | svm_experimentName = 'exp1'
 16 | 
 17 | # no need to change these parameters
 18 | boAddSelectiveSearchROIs = True
 19 | boAddGridROIs = True
 20 | boFilterROIs = True
 21 | boUseNonMaximaSurpression = True
 22 | 
 23 | 
 24 | ####################################
 25 | # Main
 26 | ####################################
 27 | random.seed(0)
 28 | 
 29 | # load cntk model
 30 | print("Loading DNN..")
 31 | tstart = datetime.datetime.now()
 32 | model_path = os.path.join(modelDir, "frcn_" + classifier + ".model")
 33 | if not os.path.exists(model_path):
 34 |     raise Exception("Model {} not found.".format(model_path))
 35 | model = load_model(model_path)
 36 | print("Time loading DNN [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 37 | 
 38 | # load trained svm
 39 | if classifier == "svm":
 40 |     print("Loading svm weights..")
 41 |     tstart = datetime.datetime.now()
 42 |     svmWeights, svmBias, svmFeatScale = loadSvm(trainedSvmDir, svm_experimentName)
 43 |     print("Time loading svm [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 44 | else:
 45 |     svmWeights, svmBias, svmFeatScale = (None, None, None)
 46 | 
 47 | # compute ROIs
 48 | tstart = datetime.datetime.now()
 49 | imgOrig = imread(imgPath)
 50 | currRois = computeRois(imgOrig, boAddSelectiveSearchROIs, boAddGridROIs, boFilterROIs, ss_kvals, ss_minSize,
 51 |                    ss_max_merging_iterations, ss_nmsThreshold,
 52 |                    roi_minDimRel, roi_maxDimRel, roi_maxImgDim, roi_maxAspectRatio, roi_minNrPixelsRel,
 53 |                    roi_maxNrPixelsRel, grid_nrScales, grid_aspectRatios, grid_downscaleRatioPerIteration, grid_stepSizeRel)
 54 | currRois = currRois[:cntk_nrRois]  # only keep first cntk_nrRois rois
 55 | print("Time roi computation [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 56 | 
 57 | # prepare DNN inputs
 58 | tstart = datetime.datetime.now()
 59 | imgPadded = imresizeAndPad(imgOrig, cntk_padWidth, cntk_padHeight)
 60 | _, _, roisCntk = getCntkInputs(imgPath, currRois, None, train_posOverlapThres, nrClasses, cntk_nrRois, cntk_padWidth, cntk_padHeight)
 61 | arguments = {
 62 |     model.arguments[0]: [np.ascontiguousarray(np.array(imgPadded, dtype=np.float32).transpose(2, 0, 1))], # convert to CNTK's HWC format
 63 |     model.arguments[1]: [np.array(roisCntk, np.float32)]
 64 | }
 65 | print("Time cnkt input generation [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 66 | 
 67 | # run DNN model
 68 | print("Running model..")
 69 | tstart = datetime.datetime.now()
 70 | dnnOutputs = model.eval(arguments)[0]
 71 | dnnOutputs = dnnOutputs[:len(currRois)]  # remove the zero-padded rois
 72 | print("Time running model [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 73 | 
 74 | # score all ROIs
 75 | tstart = datetime.datetime.now()
 76 | labels, scores = scoreRois(classifier, dnnOutputs, svmWeights, svmBias, svmFeatScale, len(classes),
 77 |                            decisionThreshold = vis_decisionThresholds[classifier])
 78 | print("Time making prediction [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 79 | 
 80 | # perform non-maxima surpression
 81 | tstart = datetime.datetime.now()
 82 | nmsKeepIndices = []
 83 | if boUseNonMaximaSurpression:
 84 |     nmsKeepIndices = applyNonMaximaSuppression(nmsThreshold, labels, scores, currRois)
 85 |     print("Non-maxima surpression kept {:4} of {:4} rois (nmsThreshold={})".format(
 86 |         len(nmsKeepIndices), len(labels), nmsThreshold))
 87 | print("Time non-maxima surpression [ms]: " + str((datetime.datetime.now() - tstart).total_seconds() * 1000))
 88 | 
 89 | # visualize results
 90 | imgDebug = visualizeResults(imgPath, labels, scores, currRois, classes, nmsKeepIndices,
 91 |                             boDrawNegativeRois=False, boDrawNmsRejectedRois=False)
 92 | imshow(imgDebug, waitDuration=0, maxDim=800)
 93 | 
 94 | # create json-encoded string of all detections
 95 | outDict = [{"label": str(l), "score": str(s), "nms": str(False), "left": str(r[0]), "top": str(r[1]), "right": str(r[2]), "bottom": str(r[3])} for l,s, r in zip(labels, scores, currRois)]
 96 | for i in nmsKeepIndices:
 97 |     outDict[i]["nms"] = str(True)
 98 | outJsonString = json.dumps(outDict)
 99 | print("Json-encoded detections: " + outJsonString[:120] + "...")
100 | print("DONE.")
101 | 
102 | #--- optional code ---#
103 | 
104 | # write all detections to file, and show how to read in again to visualize
105 | # writeDetectionsFile("detections.tsv", outDict, classes)
106 | # labels2, scores2, currRois2, nmsKeepIndices2 = parseDetectionsFile("detections.tsv", lutClass2Id)
107 | # imgDebug2 = visualizeResults(imgPath, labels2, scores2, currRois2, classes, nmsKeepIndices2,  # identical to imgDebug
108 | #                              boDrawNegativeRois=False, boDrawNmsRejectedRois=False)
109 | # imshow(imgDebug2, waitDuration=0, maxDim=800)
110 | 
111 | # extract crop of the highest scored ROI
112 | # maxScore = -float("inf")
113 | # maxScoreRoi = []
114 | # for index, (label,score) in enumerate(zip(labels,scores)):
115 | #    if score > maxScore and label > 0: #and index in nmsKeepIndices:
116 | #        maxScore = score
117 | #        maxScoreRoi = currRois[index]
118 | # if maxScoreRoi == []:
119 | #    print("WARNING: not a single object detected")
120 | # else:
121 | #    imgCrop = imgOrig[maxScoreRoi[1]:maxScoreRoi[3], maxScoreRoi[0]:maxScoreRoi[2], :]
122 | #    imwrite(imgCrop, outCropDir + os.path.basename(imgPath))
123 | #    imshow(imgCrop)
124 | 
125 | 


--------------------------------------------------------------------------------
/A1_annotateImages.py:
--------------------------------------------------------------------------------
  1 | import os, sys, importlib, shutil
  2 | import PARAMETERS
  3 | locals().update(importlib.import_module("PARAMETERS").__dict__)
  4 | 
  5 | 
  6 | ####################################
  7 | # Parameters
  8 | ####################################
  9 | imagesToAnnotateDir = "C:/Users/pabuehle/Desktop/newImgs/"
 10 | 
 11 | #no need to change these params
 12 | drawingMaxImgSize = 1000.0
 13 | annotationsFile = resultsDir + "annotations.tsv"
 14 | minNrPixels = -1
 15 | 
 16 | 
 17 | ####################################
 18 | # Functions
 19 | ####################################
 20 | def event_cv2GetRectangles(event, x, y, flags, param):
 21 |     global cv2GetRectangle_global_bboxes
 22 |     global cv2GetRectangle_global_leftButtonDownPoint
 23 |     boLeftMouseDown = flags == cv2.EVENT_FLAG_LBUTTON
 24 | 
 25 |     #draw all previous bounding boxes
 26 |     imgCopy = image.copy()
 27 |     drawRectangles(imgCopy, cv2GetRectangle_global_bboxes)
 28 |     if len(cv2GetRectangle_global_bboxes)>0:
 29 |         drawRectangles(imgCopy, [cv2GetRectangle_global_bboxes[-1]], color = (255, 0, 0))
 30 | 
 31 |     #handle mouse clicks
 32 |     if event == cv2.EVENT_LBUTTONDOWN:
 33 |         cv2GetRectangle_global_leftButtonDownPoint = (x, y)
 34 |     elif event == cv2.EVENT_LBUTTONUP:
 35 |         pt1 = cv2GetRectangle_global_leftButtonDownPoint
 36 |         pt2 = (x, y)
 37 |         minPt = (min(pt1[0], pt2[0]), min(pt1[1], pt2[1]))
 38 |         maxPt = (max(pt1[0], pt2[0]), max(pt1[1], pt2[1]))
 39 |         imgWidth, imgHeight = imWidthHeight(image)
 40 |         minPt = ptClip(minPt, imgWidth, imgHeight)
 41 |         maxPt = ptClip(maxPt, imgWidth, imgHeight)
 42 |         cv2GetRectangle_global_bboxes.append(minPt + maxPt)
 43 |     elif boLeftMouseDown:
 44 |         cv2.rectangle(imgCopy, cv2GetRectangle_global_leftButtonDownPoint, (x, y), (255, 255, 0), 1)
 45 |     else:
 46 |         drawCrossbar(imgCopy, (x, y))
 47 |     cv2.imshow("image", imgCopy)
 48 | 
 49 | 
 50 | def procBoundingBoxes(rectsIn, imageUnscaled, scaleFactor):
 51 |     if len(rectsIn) <= 0:
 52 |         return rectsIn
 53 |     else:
 54 |         rects = copy.deepcopy(rectsIn)
 55 |         for index in range(len(rects)):
 56 |             for i in range(4):
 57 |                 rects[index][i] = int(round(rects[index][i] / scaleFactor))
 58 |         imgWidth, imgHeight = imWidthHeight(imageUnscaled)
 59 |         bboxes = [Bbox(*rect) for rect in rects]
 60 |         for bbox in bboxes:
 61 |             bbox.crop(imgWidth, imgHeight)
 62 |             assert(bbox.isValid())
 63 |         return [bbox.rect() for bbox in bboxes]
 64 | 
 65 | 
 66 | 
 67 | ####################################
 68 | # Main
 69 | ####################################
 70 | makeDirectory(resultsDir)
 71 | imgFilenames = [f for f in os.listdir(imagesToAnnotateDir) if f.lower().endswith(".jpg")]
 72 | 
 73 | print("Using annotations file: " + annotationsFile)
 74 | if annotationsFile and os.path.exists(annotationsFile):
 75 |     shutil.copyfile(annotationsFile, annotationsFile + ".backup.tsv")
 76 |     data = readTable(annotationsFile)
 77 |     annotationsLUT = getDictionary(getColumn(data,0), getColumn(data,1), False)
 78 | else:
 79 |     annotationsLUT = dict()
 80 | 
 81 | 
 82 | #loop over each image and get annotation
 83 | for imgFilenameIndex,imgFilename in enumerate(imgFilenames):
 84 |     print("imgFilenameIndex = {}, imgFilename = {}".format(imgFilenameIndex, imgFilename))
 85 |     imgPath = imagesToAnnotateDir + imgFilename
 86 |     print("Processing image {0} of {1}: {2}".format(imgFilenameIndex, len(imgFilenames), imgPath))
 87 |     bBoxPath = imgPath[:-4] + ".bboxes.tsv"
 88 | 
 89 |     #compute scale factor
 90 |     imgWidth, imgHeight = imWidthHeight(imgPath)
 91 |     scaleFactor = min(1, drawingMaxImgSize / max(imgWidth, imgHeight))
 92 |     if imgWidth * imgHeight < minNrPixels:
 93 |         print("Low resolution ({0},{1}) hence skipping image: {2}.".format(imgWidth, imgHeight, imgPath))
 94 |         continue
 95 | 
 96 |     #load existing ground truth if provided
 97 |     cv2GetRectangle_global_bboxes = []
 98 |     if os.path.exists(bBoxPath):
 99 |         print("Skipping image since ground truth already exists: %s." % imgPath)
100 |         continue
101 | 
102 |     #draw image
103 |     imageUnscaled = imread(imgPath)
104 |     image = imresize(imageUnscaled, scaleFactor)
105 |     cv2.namedWindow("image")
106 |     cv2.setMouseCallback("image", event_cv2GetRectangles)
107 |     imgCopy = image.copy()
108 |     drawRectangles(imgCopy, cv2GetRectangle_global_bboxes)
109 |     cv2.imshow("image", imgCopy)
110 | 
111 |     #wait for user input
112 |     while True:
113 |         key = chr(cv2.waitKey()) #& 0xFF
114 | 
115 |         #skip
116 |         if key == "s":
117 |             if os.path.exists(bBoxPath):
118 |                 print("Skipping image hence deleting existing bbox file: " + bBoxPath)
119 |                 os.remove(bBoxPath)
120 |             annotationsLUT[imgPath] = "skip"
121 |             if annotationsFile:
122 |                 writeTable(annotationsFile, sortDictionary(annotationsLUT))
123 |             break
124 | 
125 |         #undo
126 |         if key == "u":
127 |             if len(cv2GetRectangle_global_bboxes) >= 1:
128 |                 cv2GetRectangle_global_bboxes = cv2GetRectangle_global_bboxes[:-1]
129 |                 imgCopy = image.copy()
130 |                 drawRectangles(imgCopy, cv2GetRectangle_global_bboxes)
131 |                 cv2.imshow("image", imgCopy)
132 | 
133 |         #next image
134 |         elif key == "n":
135 |             bboxes = procBoundingBoxes(cv2GetRectangle_global_bboxes, imageUnscaled, scaleFactor)
136 |             writeTable(bBoxPath, bboxes)
137 |             annotationsLUT[imgPath] = bboxes
138 |             if annotationsFile:
139 |                 writeTable(annotationsFile, sortDictionary(annotationsLUT))
140 |             break
141 | 
142 |         #quit
143 |         elif key == "q":
144 |             sys.exit()
145 | 
146 | cv2.destroyAllWindows()
147 | print("DONE.")


--------------------------------------------------------------------------------
/A2_annotateBboxLabels.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import cv2, os, sys, time, importlib
 3 | from tkinter import *
 4 | from PIL import ImageTk
 5 | import PARAMETERS
 6 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 7 | 
 8 | 
 9 | ####################################
10 | # Parameters
11 | ####################################
12 | imagesToAnnotateDir = "C:/Users/pabuehle/Desktop/newImgs/"
13 | 
14 | #no need to change these
15 | boxWidth = 10
16 | boxHeight = 2
17 | drawingMaxImgSize = 1000
18 | objectNames = classes[1:]
19 | objectNames = np.sort(objectNames).tolist()
20 | objectNames += ["UNDECIDED", "EXCLUDE"]
21 | 
22 | 
23 | 
24 | ####################################
25 | # Helper functions
26 | ####################################
27 | def buttonPressedCallback(s):
28 |     global tkLastButtonPressed
29 |     global tkBoButtonPressed
30 |     tkLastButtonPressed = s
31 |     tkBoButtonPressed = True
32 | 
33 | 
34 | 
35 | ####################################
36 | # Main
37 | ####################################
38 | #create UI
39 | tk = Tk()
40 | w = Canvas(tk, width=len(objectNames) * boxWidth, height=len(objectNames) * boxHeight, bd = boxWidth, bg = 'white')
41 | w.grid(row = len(objectNames), column = 0, columnspan = 2)
42 | for objectIndex,objectName in enumerate(objectNames):
43 |     b = Button(width=boxWidth, height=boxHeight, text=objectName, command=lambda s = objectName: buttonPressedCallback(s))
44 |     b.grid(row = objectIndex, column = 0)
45 | 
46 | 
47 | #loop over all images
48 | imgFilenames = getFilesInDirectory(imagesToAnnotateDir, ".jpg")
49 | for imgIndex, imgFilename in enumerate(imgFilenames):
50 |     print("imgIndex={}, imgFilename={}".format(imgIndex, imgFilename))
51 |     labelsPath = imagesToAnnotateDir + "/" + imgFilename[:-4] + ".bboxes.labels.tsv"
52 |     if os.path.exists(labelsPath):
53 |         continue
54 | 
55 |     #load image and bboxes
56 |     imgPath = imagesToAnnotateDir + "/" + imgFilename
57 |     print("imgIndex = {}, imgPath = {}".format(imgIndex, imgPath))
58 |     img = imread(imgPath)
59 |     rectsPath = imgPath = imagesToAnnotateDir + "/" + imgFilename[:-4] + ".bboxes.tsv"
60 |     rects = readTable(rectsPath)
61 |     rects = [ToIntegers(rect) for rect in rects]
62 | 
63 |     #annotate each rectangle in turn
64 |     labels = []
65 |     for rectIndex,rect in enumerate(rects):
66 |         imgCopy = img.copy()
67 |         drawRectangles(imgCopy, [rect], thickness = 15)
68 | 
69 |         #draw image in tk window
70 |         imgTk, _ = imresizeMaxDim(imgCopy, drawingMaxImgSize)
71 |         imgTk = imconvertCv2Pil(imgTk)
72 |         imgTk = ImageTk.PhotoImage(imgTk)
73 |         label = Label(tk, image=imgTk)
74 |         label.grid(row=0, column=1, rowspan=drawingMaxImgSize)
75 |         tk.update_idletasks()
76 |         tk.update()
77 | 
78 |         #busy-wait until button pressed
79 |         tkBoButtonPressed = False
80 |         tkLastButtonPressed = None
81 |         while not tkBoButtonPressed:
82 |             tk.update_idletasks()
83 |             tk.update()
84 | 
85 |         #store result
86 |         print("tkLastButtonPressed", tkLastButtonPressed)
87 |         labels.append(tkLastButtonPressed)
88 | 
89 |     writeFile(labelsPath, labels)
90 | tk.destroy()
91 | print("DONE.")


--------------------------------------------------------------------------------
/B1_evaluateRois.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys, os, importlib
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | 
 8 | ####################################
 9 | # Parameters
10 | ####################################
11 | subdirs = ['positive']
12 | 
13 | 
14 | ####################################
15 | # Main
16 | ####################################
17 | overlaps = []
18 | roiCounts = []
19 | for subdir in subdirs:
20 |     imgFilenames = getFilesInDirectory(imgDir + subdir, ".jpg")
21 | 
22 |     #loop over all iamges
23 |     for imgIndex,imgFilename in enumerate(imgFilenames):
24 |         if imgIndex % 50 == 0:
25 |             print("Processing subdir '{}', image {} of {}".format(subdir, imgIndex, len(imgFilenames)))
26 |         # load ground truth
27 |         imgPath = imgDir + subdir + "/" + imgFilename
28 |         imgWidth, imgHeight = imWidthHeight(imgPath)
29 |         gtRois, gtLabels = readGtAnnotation(imgPath)
30 |         gtRois = [Bbox(*roi) for roi in gtRois]
31 | 
32 |         # load rois and compute scale
33 |         rois = readRois(roiDir, subdir, imgFilename)
34 |         rois = rois[:cntk_nrRois] # only use the first N rois (similar to rest of code)
35 |         rois = [Bbox(*roi) for roi in rois]
36 |         roiCounts.append(len(rois))
37 | 
38 |         # for each ground truth, compute if it is covered by an roi
39 |         for gtIndex, (gtLabel, gtRoi) in enumerate(zip(gtLabels,gtRois)):
40 |             maxOverlap = -1
41 |             assert (gtRoi.max() <= max(imgWidth, imgHeight) and gtRoi.max() >= 0)
42 |             if gtLabel in classes[1:]:
43 |                 for roi in rois:
44 |                     assert (roi.max() <= max(imgWidth, imgHeight) and roi.max() >= 0)
45 |                     overlap = bboxComputeOverlapVoc(gtRoi, roi)
46 |                     maxOverlap = max(maxOverlap, overlap)
47 |             overlaps.append(maxOverlap)
48 | print("Average number of rois per image " + str(int(1.0 * sum(roiCounts) / len(imgFilenames))))
49 | 
50 | #compute recall at different overlaps
51 | recalls = []
52 | overlaps = np.array(overlaps, np.float32)
53 | for overlapThreshold in np.linspace(0,1,21):
54 |     recall = 1.0 * sum(overlaps >= overlapThreshold) / len(overlaps)
55 |     recalls.append(recall)
56 |     print("At threshold {:.2f}: recall = {:2.2f}".format(overlapThreshold, recall))
57 | print("Mean recall = {:2.2}".format(np.mean(recalls)))


--------------------------------------------------------------------------------
/B2_cntkVisualizeInputs.py:
--------------------------------------------------------------------------------
 1 | import os, importlib, sys
 2 | import PARAMETERS
 3 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 4 | 
 5 | 
 6 | ####################################
 7 | # Parameters
 8 | ####################################
 9 | image_set = 'test'  # 'train', 'test'
10 | 
11 | #no need to change these parameters
12 | parseNrImages = 50  #for speed reasons only parse CNTK file for the first N images
13 | boUseNonMaximaSurpression = False
14 | 
15 | 
16 | 
17 | ####################################
18 | # Main
19 | ####################################
20 | print("Load ROI co-ordinates and labels")
21 | cntkImgsPath, cntkRoiCoordsPath, cntkRoiLabelsPath, nrRoisPath = cntkInputPaths(cntkFilesDir, image_set)
22 | imgPaths = getColumn(readTable(cntkImgsPath),1)
23 | nrRealRois = [int(s) for s in readFile(nrRoisPath)]
24 | roiAllLabels = parseCntkRoiLabels(cntkRoiLabelsPath, cntk_nrRois, len(classes), parseNrImages)
25 | if parseNrImages:
26 |     imgPaths = imgPaths[:parseNrImages]
27 |     nrRealRois = nrRealRois[:parseNrImages]
28 |     roiAllLabels = roiAllLabels[:parseNrImages]
29 | roiAllCoords = parseCntkRoiCoords(imgPaths, cntkRoiCoordsPath, cntk_nrRois, cntk_padWidth, cntk_padHeight, parseNrImages)
30 | assert(len(imgPaths) == len(roiAllCoords) == len(roiAllLabels) == len(nrRealRois))
31 | 
32 | 
33 | #loop over all images and visualize
34 | for imgIndex,imgPath in enumerate(imgPaths):
35 |     print("Visualizing image %d at %s..." %(imgIndex,imgPath))
36 |     roiCoords = roiAllCoords[imgIndex][:nrRealRois[imgIndex]]
37 |     roiLabels = roiAllLabels[imgIndex][:nrRealRois[imgIndex]]
38 | 
39 |     #perform non-maxima surpression. note that the detected classes in the image is not affected by this.
40 |     nmsKeepIndices = []
41 |     if boUseNonMaximaSurpression:
42 |         imgWidth, imgHeight = imWidthHeight(imgPath)
43 |         nmsKeepIndices = applyNonMaximaSuppression(nmsThreshold, roiLabels, [0] * len(roiLabels), roiCoords)
44 |         print("Non-maxima surpression kept {} of {} rois (nmsThreshold={})".format(len(nmsKeepIndices), len(roiLabels), nmsThreshold))
45 | 
46 |     #visualize results
47 |     imgDebug = visualizeResults(imgPath, roiLabels, None, roiCoords, classes, nmsKeepIndices, boDrawNegativeRois=False)
48 |     imshow(imgDebug, waitDuration=0, maxDim = 800)
49 | print("DONE.")
50 | 


--------------------------------------------------------------------------------
/B3_cntkAnalyzeInputs.py:
--------------------------------------------------------------------------------
 1 | import os, sys, importlib
 2 | import shutil, time
 3 | import PARAMETERS
 4 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 5 | 
 6 | 
 7 | ####################################
 8 | # Parameters
 9 | ####################################
10 | image_set = "train"
11 | 
12 | 
13 | ####################################
14 | # Main
15 | ####################################
16 | # read ground truth and ROIs
17 | if not os.path.exists(cntkFilesDir + image_set + ".cache_gt_roidb.pkl"):
18 |     raise Exception("Run 2_cntkGenerateInputs.py before executing this script.")
19 | imdb = imdbs[image_set]
20 | gtRois = imdb.gt_roidb()
21 | print("Number of images in set '{}' = {}".format(image_set, imdb.num_images))
22 | 
23 | # extract width, height, etc for all ground truth annotations in all images
24 | roiInfos = []
25 | for imgIndex in range(0, imdb.num_images):
26 |     imgPath = imdb.image_path_at(imgIndex)
27 |     imgWidth, imgHeight = imWidthHeight(imgPath)
28 | 
29 |     if gtRois[imgIndex] != None:
30 |         for gtRoi in gtRois[imgIndex]['boxes']:
31 |             roiWidth  = gtRoi[2] - gtRoi[0] +1
32 |             roiHeight = gtRoi[3] - gtRoi[1] +1
33 |             roiRelWidth  = float(roiWidth)  / imgWidth
34 |             roiRelHeight = float(roiHeight) / imgHeight
35 |             roiInfos.append((roiRelWidth, roiRelHeight, roiRelWidth * roiRelHeight, roiRelWidth / roiRelHeight))
36 | 
37 | # analyse typical width, height, etc of the ground truth annotations
38 | print("\nStatistics for ground truth annotations:")
39 | for percentile in np.linspace(0, 100, 21):
40 |     print("   Percentile {:3.0f}: width = {:<.2f}, height = {:<.2f}, area = {:<.3f}, aspectRatio = {:<.2f}".format(
41 |             percentile,
42 |             np.percentile(getColumn(roiInfos, 0), percentile),
43 |             np.percentile(getColumn(roiInfos, 1), percentile),
44 |             np.percentile(getColumn(roiInfos, 2), percentile),
45 |             np.percentile(getColumn(roiInfos, 3), percentile)))
46 | print("DONE.")
47 | 


--------------------------------------------------------------------------------
/PARAMETERS.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from helpers import *
  3 | from imdb_data import imdb_data
  4 | import fastRCNN, time, datetime
  5 | from fastRCNN.pascal_voc import pascal_voc
  6 | print(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
  7 | 
  8 | 
  9 | ############################
 10 | # Adjust these parameters
 11 | # to make scripts run
 12 | ############################
 13 | rootDir = os.path.dirname(os.path.realpath(sys.argv[0]))
 14 | 
 15 | ############################
 16 | # default parameters
 17 | ############################
 18 | datasetName = "grocery"
 19 | 
 20 | #directories
 21 | imgDir = rootDir + "/data/" + datasetName + "/"
 22 | procDir = rootDir + "/proc/" + datasetName + "/"
 23 | resultsDir = rootDir + "/results/" + datasetName + "/"
 24 | roiDir = procDir + "rois/"
 25 | modelDir = procDir + "models/"
 26 | cntkFilesDir = procDir + "cntkFiles/"
 27 | trainedSvmDir = procDir + "trainedSvm/"
 28 | cntkResourcesDir = rootDir + "/resources/cntk/"
 29 | 
 30 | # ROI generation
 31 | roi_maxImgDim = 200       # image size used for ROI generation
 32 | roi_minDimRel = 0.01      # minimum relative width/height of a ROI
 33 | roi_maxDimRel = 1.0       # maximum relative width/height of a ROI
 34 | roi_minNrPixelsRel = 0    # minimum relative area covered by a ROI
 35 | roi_maxNrPixelsRel = 1.0  # maximum relative area covered by a ROI
 36 | roi_maxAspectRatio = 4.0  # maximum aspect Ratio of a ROI, both vertically and horizontally
 37 | ss_minSize = 20                 # for a description of the selective search parameters see:
 38 | ss_kvals   = (50, 500, 6)       #   http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations
 39 | ss_max_merging_iterations = 20  #
 40 | ss_nmsThreshold = 0.85          # non-maxima surpression threshold run after selective search
 41 | grid_nrScales = 7               # uniform grid ROIs: number of iterations from largest possible ROI to smaller ROIs
 42 | grid_stepSizeRel = 0.5          # uniform grid ROIs: step size for sliding windows
 43 | grid_aspectRatios = [1.0, 2.0, 0.5]    # uniform grid ROIs: allowed aspect ratio of ROIs
 44 | grid_downscaleRatioPerIteration = 1.5  # uniform grid ROIs: relative ROI width/height reduction per iteration, starting from largest possible ROI
 45 | 
 46 | # cntk model
 47 | cntk_nrRois     = 2000     # DNN input number of ROIs per image. Zero-padded/truncated if necessary
 48 | cntk_padWidth   = 1000     # DNN input image width [pixels]
 49 | cntk_padHeight  = 1000     # DNN input image height [pixels]
 50 | cntk_featureDimensions = {'svm': 4096} # DNN output, dimension of each ROI
 51 | 
 52 | # nn and svm training
 53 | classifier = 'svm'               # Options: 'svm', 'nn'. Train either a Support Vector Machine, or directly the Neural Network
 54 | train_posOverlapThres = 0.5      # DNN and SVM threshold for marking ROIs with significant overlap with a GT object as positive
 55 | 
 56 | # nn training
 57 | cntk_max_epochs = 18             # number of training epochs (only relevant if 'lassifier' is set to: 'nn')
 58 | cntk_mb_size = 5                 # minibatch size
 59 | cntk_l2_reg_weight = 0.0005      # l2 regularizer weight
 60 | cntk_lr_per_image  = [0.01] * 10 + [0.001] * 5 + [0.0001]  #learning rate per image
 61 | cntk_momentum_time_constant = 10 # momentum
 62 | 
 63 | # svm training
 64 | svm_C = 0.001             # regularization parameter of the soft-margin error term
 65 | svm_B = 10.0              # intercept scaling
 66 | svm_nrEpochs = 2          # number of training iterations
 67 | svm_retrainLimit = 2000   # number of new items to trigger SVM training
 68 | svm_evictThreshold = -1.1 # remove easy negatives with decision value below this threshold
 69 | svm_posWeight = "balanced"# automatically balance training set to correct for the majority of ROIs being negative
 70 | svm_targetNorm = 20.0     # magic value from traditional R-CNN (helps with convergence)
 71 | svm_penality = 'l2'       # penalty norm
 72 | svm_loss = 'l1'           # loss norm
 73 | svm_rngSeed = 3           # seed for randomization
 74 | 
 75 | # postprocessing
 76 | nmsThreshold = 0.3                      # Non-Maxima suppression threshold (in range [0,1])
 77 |                                         # The lower the more ROIs will be combined. Used during evaluation and visualization (scripts 5_)
 78 | vis_decisionThresholds = {'svm' : 0.5,  # Reject detections with low confidence, used only in 5_visualizeResults
 79 |                           'nn' : None}
 80 | 
 81 | # evaluation
 82 | evalVocOverlapThreshold = 0.5 # voc-style intersection-over-union threshold used to determine if object was found
 83 | 
 84 | 
 85 | 
 86 | ############################
 87 | # project-specific
 88 | # parameters / overrides
 89 | ############################
 90 | if datasetName.startswith("grocery"):
 91 |     classes = ('__background__',  # always have '__background__' be at index 0
 92 |                "orange", "eggBox", "joghurt", "ketchup", "squash", "mushroom", "water", "mustard")
 93 | 
 94 | 
 95 |     # roi generation
 96 |     cntk_nrRois = 200    #this number is too low to get good accuracy but allows for fast training and scoring (for demo purposes)
 97 |     roi_minDimRel = 0.04
 98 |     roi_maxDimRel = 0.4
 99 |     roi_minNrPixelsRel = 2    * roi_minDimRel * roi_minDimRel
100 |     roi_maxNrPixelsRel = 0.33 * roi_maxDimRel * roi_maxDimRel
101 | 
102 |     # postprocessing
103 |     nmsThreshold = 0.01
104 | 
105 |     # database
106 |     imdbs = dict()      # database provider of images and image annotations
107 |     for image_set in ["train", "test"]:
108 |         imdbs[image_set] = imdb_data(image_set, classes, cntk_nrRois, imgDir, roiDir, cntkFilesDir, boAddGroundTruthRois = (image_set!='test'))
109 | 
110 | 
111 | elif datasetName.startswith("pascalVoc"):
112 |     classes = ('__background__',
113 |                'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
114 |                'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
115 |     lutImageSet = {"train": "trainval", "test": "test"}
116 | 
117 |     # model training / scoring
118 |     classifier = 'nn'
119 | 
120 |     # cntk model  (Should train a model with mean-AP around 0.45)
121 |     # more than 99% of the test images have less than 4000 rois, but 50% more than 2000
122 |     cntk_mb_size = 2
123 |     cntk_nrRois = 4000
124 |     cntk_lr_per_image = [0.05] * 10 + [0.005] * 5 + [0.0005]
125 | 
126 |     # database
127 |     imdbs = dict()
128 |     for image_set, year in zip(["train", "test"], ["2007", "2007"]):
129 |         imdbs[image_set] = fastRCNN.pascal_voc(lutImageSet[image_set], year, classes, cntk_nrRois, cacheDir = cntkFilesDir)
130 |         print("Number of {} images: {}".format(image_set, imdbs[image_set].num_images))
131 | 
132 | else:
133 |      ERROR
134 | 
135 | 
136 | 
137 | ############################
138 | # computed parameters
139 | ############################
140 | nrClasses = len(classes)
141 | cntk_featureDimensions['nn'] = nrClasses
142 | lutClass2Id = dict(zip(classes, range(len(classes))))
143 | 
144 | print("PARAMETERS: datasetName = " + datasetName)
145 | assert cntk_padWidth == cntk_padHeight, "ERROR: different width and height for padding not supported."
146 | assert classifier.lower() in ['svm','nn'], "ERROR: only 'nn' or 'svm' classifier supported."
147 | assert not (datasetName == 'pascalVoc' and classifier == 'svm'), "ERROR: 'svm' classifier for pascal VOC not supported."
148 | assert(train_posOverlapThres >= 0 and train_posOverlapThres <= 1)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Fast R-CNN Object Detection Tutorial for Microsoft Cognitive Toolkit (CNTK)
  3 | ==============
  4 | 
  5 | ```diff
  6 | + Update V2.0.1 (June 2017):
  7 | + Updated documentation to include Visual Object Tagging Tool as an annotation option.
  8 | + Update v2 (June 2017):
  9 | + Updated code to be compatible with the CNTK 2.0.0 release.
 10 | + Update v1 (Feb 2017):
 11 | + This tutorial was updated to use CNTK's python wrappers. Now all processing happens in-memory during scoring. See script 6_runSingleImage for an example. Furthermore, we switched to a much more accurate and faster implementation of Selective Search.
 12 | + Note that, at the time of writing, CNTK does not support Python 2. If you need Python 2 then please refer to the [previous version](https://github.com/Azure/ObjectDetectionUsingCntk/tree/7edd3276a189bad862dc54e9f73b7cfcec5ae562) of this tutorial.
 13 | ```
 14 | 
 15 | DESCRIPTION
 16 | --------------
 17 | 
 18 | Object Detection is one of the main problems in Computer Vision. Traditionally, this required expert knowledge to identify and implement so called “features” that highlight the position of objects in the image. Starting in 2012 with the famous AlexNet paper, Deep Neural Networks are used to automatically find these features. This lead to a huge improvement in the field for a large range of problems.
 19 | 
 20 | This tutorial uses Microsoft Cognitive Toolkit's (CNTK) fast R-CNN implementation (see the [Fast R-CNN](#fast-r-cnn) section for a description) which was shown to produce state-of-the-art results for [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/), one of the main object detection challenges in the field.
 21 | 
 22 | GOALS
 23 | --------------
 24 | 
 25 | The goal of this tutorial is to show how to train and test your own Deep Learning object detection model using [Microsoft Cognitive Toolkit (CNTK)](https://github.com/Microsoft/CNTK). Example data and annotations are provided, but the reader can also bring their own images and train their own, unique, object detector.
 26 | 
 27 | The tutorial is split into four parts:
 28 | -	[Part 1](#part-1) shows how to train an object detection model for the example data without retraining the provided Neural Network, but instead training an external classifier on its output. This approach works particularly well with small datasets, and does not require expertise with deep learning.
 29 | -	[Part 2](#part-2) extends this approach to refine the Neural Network directly without the need for an external classifier.
 30 | -	[Part 3](#part-3) illustrates how to annotate your own images and use these to train an object detection model for your specific use case.
 31 | -	[Part 4](#part-4) covers how to reproduce published results on the Pascal VOC dataset.
 32 | 
 33 | Previous expertise with Machine Learning while not required to complete this tutorial, however is very helpful to understand the underlying principles. More information on the topic can also be found at [CNTK's Fast-RCNN page](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN).
 34 | 
 35 | 
 36 | 
 37 | 
 38 | PREREQUISITES
 39 | --------------
 40 | 
 41 | This tutorial was tested using CNTK v2.0.0, and assumes that CNTK was installed with the (default) Anaconda Python interpreter. Note that the code will only run on v2.0 due to breaking changes in other versions.
 42 | 
 43 | CNTK can be easily installed by following the instructions on the [script-driven installation page](https://github.com/Microsoft/CNTK/wiki/Setup-Windows-Binary-Script). This will also automatically add an Anaconda Python distribution. At the time of writing, the default python version is 3.5.
 44 | 
 45 | A dedicated GPU is not required, but recommended for retraining of the Neural Network (part 2). If you lack a strong GPU, don't want to install CNTK yourself, or want to train a model using multiple GPUs, then consider using Azure's Data Science Virtual Machine. See the [Cortana Intelligence Gallery](https://gallery.cortanaintelligence.com/Solution/Linux-Data-Science-Virtual-Machine-3) for a 1-click deployment solution.
 46 | 
 47 | <!--
 48 | The only change needed to instead install Python 3.4 is by adding the string '-PyVersion 34' when starting the installation:
 49 | ````bash
 50 | *./install.ps1 -execute -PyVersion 34
 51 | ````
 52 | 
 53 | In the following, we assume that the python interpreter is in *C:/local/Anaconda3-4.1.1-Windows-x86_64/* and the CNTK root directory is  *C:/local/CNTK-2-0-rc1/*.
 54 | 
 55 | -->
 56 | 
 57 | Several Python packages are required to execute the python scripts. These libraries can be installed easily using provided python wheels by opening a command prompt and running:
 58 | ````bash
 59 | c:/local/CNTK-2-0/cntk/Scripts/cntkpy35.bat
 60 | cd resources/python35_64bit_requirements/
 61 | pip.exe install -r requirements.txt
 62 | ````
 63 | 
 64 | In the code snippet above, we assumed that the CNTK root directory is C:/local/CNTK-2-0/. The python wheels were originally downloaded from this [page](http://www.lfd.uci.edu/~gohlke/pythonlibs/).
 65 | 
 66 | Finally, the file *AlexNet.model* is too big to be hosted in Github and hence needs to be downloaded manually from [here](https://www.cntk.ai/Models/AlexNet/AlexNet.model) and placed into the subfolder */resources/cntk/AlexNet.model*.
 67 | 
 68 | 
 69 | 
 70 | FOLDER STRUCTURE
 71 | --------------
 72 | 
 73 | |Folder| Description
 74 | |---|---
 75 | |/|				Root directory
 76 | |/data/|			Directory containing images for different object recognition projects
 77 | |/data/grocery/|			Example data for grocery item detection in refrigerators
 78 | |/data/grocery/positives/|	Images and annotations to train the model
 79 | |/data/grocery/negatives/|	Images used as negatives during model training
 80 | |/data/grocery/testImages/|	Test images used to evaluate model accuracy
 81 | |/doc/|	Resources such as images for this readme page
 82 | |/fastRCNN/|			Slightly modified code used in R-CNN publications
 83 | |/resources/|		  All provided resources are in here
 84 | |/resources/cntk/|   CNTK configuration file and pre-trained AlexNet model
 85 | |/resources/python35_64_bit_requirements/|   Python wheels and requirements file for 64bit Python version 3.5
 86 | 
 87 | 
 88 | All scripts used in this tutorial are located in the root folder.
 89 | 
 90 | 
 91 | PART 1
 92 | --------------
 93 | In the first part of this tutorial we will train a classifier which uses, but does not modify, a pre-trained deep neural network. See the [Fast R-CNN](#fast-r-cnn) section for details of the employed approaches. As example data 25 images of grocery items inside refrigerators are provided, split into 20 images for training and the remaining 5 images are used as test set. The training images contain in total 180 annotated objects, these are:
 94 | ```
 95 | Egg box, joghurt, ketchup, mushroom, mustard, orange, squash, and water.
 96 | ```
 97 | Note that 20 training images is a very low number and too little train a high-accuracy detector. Nevertheless, even this small dataset is sufficient to return plausible detections as can be seen in step 5.  
 98 | Every step has to be executed in order, and we recommend after each step to inspect which files are written, where they are written to, and what the content of these files is (mostly the content is written as text file).
 99 | 
100 | 
101 | 
102 | 
103 | ### STEP 1: Computing Region of Interests
104 | `Script: 1_computeRois.py`
105 | 
106 | Region-of-interests (ROIs) are computed for each image independently using a 3-step approach: First, Selective Search is used to generate hundreds of ROIs per Image. These ROIs often fit tightly around some objects but miss other objects in the image (see [Selective Search](#selective-search) section). Many of the ROIs are bigger, smaller, etc. than the typical grocery item in our dataset. Hence in a second step these ROIs, as well as ROIs which are too similar, are discarded. Finally, to complement the detected ROIs from Selective Search, ROIs that uniform cover the image are added at different scales and aspect ratios.
107 | 
108 | The final ROIs are written for each image separately to the files *[imageName].roi.txt* in the *proc/grocery/rois/* folder.
109 | 
110 | For the grocery dataset, selective search typically generates around 1000 ROIs per image, plus on average another 2000 ROIs sampled uniformly from the image. A high number of ROIs typically leads to better object detection performance, at the expense however of longer running time. Hence the parameter `cntk_nrRois` can be used to only keep a subset of the ROIs (e.g. if `cntk_nrRois = 2000` then typically all ROIs from selective search are preserved, plus the 1000 largest ROIs generated using uniform sampling).
111 | 
112 | The goodness of these ROIs can be measured by counting how many of the ground truth annotated objects in the image are covered by at least one ROI, where "covered" is defined as having an overlap greater than a given threshold. Script `B1_evaluateRois.py` outputs these counts at different threshold values. For example for a threshold of 0.5 and 2000 ROIs, the recall is around 98%, while with 200 ROIs the recall is around 85%. It is important that the recall at a threshold of 0.5 is close to 100%, since even a perfect classifier cannot find an object in the image if it is not covered by at least one ROI.
113 | 
114 | ROIs computed using Selective Search (left); ROIs from the image above after discarding ROIs that are too small, too big, etc. (middle); Final set of ROIs after adding ROIs that uniformly cover the image (right).
115 | <p align="center">
116 | <img src="doc/0.ss.roi.jpg" alt="alt text" height="300"/>
117 | <img src="doc/0.filter.roi.jpg" alt="alt text" height="300"/>
118 | <img src="doc/0.grid.roi.jpg" alt="alt text" height="300"/>
119 | </p>
120 | 
121 | 
122 | ### STEP 2: Computing CNTK inputs
123 | `Script: 2_cntkGenerateInputs.py`
124 | 
125 | Each ROI generated in the last step has to run through the CNTK model to compute its 4,096 float Deep Neural Network representation (see the [Fast R-CNN](#fast-r-cnn) section). This requires three CNTK-specific input files to be generated for the training and the test set:
126 | - *{train,test}.txt*: each row contains the path to an image.
127 | -	*{train,test}.rois.txt*: each row contains all ROIs for an image in relative (x,y,w,h) co-ordinates.
128 | -	*{train,test}.roilabels.txt*: each row contains the labels for the ROIs in one-hot-encoding.
129 | 
130 | An in-depth understanding of how these files are structured is not necessary to understand this tutorial. However, two points are worth pointing out:
131 | -	CNTK’s fast R-CNN implementation requires all images to be of the same size. For this reason, all images are first scaled and then centered and zero-padded (i.e. columns of gray-colored pixels are added to the left and right of the image, or respectively rows at the top and bottom). Note that the scaling preserves the original aspect ratio. For our experiments we use input width and height of 1000 x 1000 pixels to the Neural Network.  
132 | Interestingly, upscaling an image can significantly improve accuracy if the objects to be detected are small (this is due to objects in ImageNet typically having a width and height of 100-200 pixels).
133 | -	CNTK expects each image to have the same number of ROIs (for our experiments we use 2000). Hence, if the computation in step 1 returned more ROIs, then only the first 2000 are used. Likewise, if less ROIs were found, then the remaining spots are filled using ROIs with co-ordinates of (0,0,0,0). These “zero-padded” ROIs are only used during CNTK execution and have no influence on the training / test performance.
134 | 
135 | This step writes the above mentioned files to the directory *proc/grocery/cntkFiles/*. For debugging, the script `B2_cntkVisualizeInputs.py` can be used to visualize the content of these files (e.g. the Figure at the end of step 4 was generated using this script).
136 | 
137 | 
138 | 
139 | ### STEP 3: Running CNTK
140 | `Script: 3_runCntk.py`
141 | 
142 | We can now run the CNTK training which takes as input the co-ordinates and labels files from the last step and writes the 4096 float embedding for each ROI and for each image to *proc/grocery/cntkFiles/{train,test}_svm_parsed/[imageName].dat.npz*. This will take a few minutes, and will automatically run on GPU if detected.
143 | 
144 | Note: Look for the line "Using GPU for training." in the console output to make sure the training runs on GPU and not CPU (which would be too slow). Note that a previous CNTK run might still be open and holding a block on the GPU.
145 | 
146 | 
147 | ### STEP 4: Classifier training
148 | `Script: 4_trainSvm.py`
149 | 
150 | We now train the classifier which given an ROI as input, assigns it to one of the grocery items or to a “background” class.
151 | 
152 | We use a slightly modified version of the published R-CNN code to train a linear SVM classifier. The main change is to load the 4096 floats ROI embedding from disk rather than to run the network on-the-fly. An in-depth explanation of the training procedure can be found in the [R-CNN paper](http://arxiv.org/abs/1311.2524). For the purpose of this tutorial we consider the training script a black box, which uses the training ROIs as input (or to be precise the 4096 floats representations), and outputs N+1 linear classifiers, one for each class, plus one for the background.
153 | 
154 | The training starts by loading all positive ROIs into memory. Positive here corresponds to each ROI that has a significant overlap with a ground truth annotated object. Negatives are then iteratively added using hard negative mining, and the SVM is retrained. A list and short description of the parameters that govern the SVM training can be found in the script `PARAMETERS.py`.
155 | 
156 | The learned linear classifiers for each class, i.e. a weight vector of dimension 4096 floats plus a float that represents the bias term, are then written to the folder *proc/grocery/trainedSVMs/*.
157 | 
158 | 
159 | 
160 | ### STEP 5: Evaluation and visualization
161 | `Scripts: 5_evaluateResults.py and 5_visualizeResults.py`
162 | 
163 | Once training succeeded, the model can be used to find objects in images. For this, every ROI in an image is classified and assigned a confidence to be orange, ketchup, ... and background. The class with highest confidence is then selected (most often “background”) and optionally a threshold applied to reject detections with low confidence.
164 | 
165 | The accuracy of the classifier can be measured using the script `5_evaluateResults.py`. This outputs the mean Average Precision (mAP; see the [Mean Average Precision](#mean-average-precision) section) for either the training or the test set. Keep in mind that the test set only contains 5 images and hence these numbers need to be taken with a grain of salt. Due to randomization effects one might get very different results when running the script.
166 | 
167 | <!-- Note that the mAP for the training set is much higher than for the test set. This is because the classifier was able to memorize the (tiny) training set but not to generalize well to new images (aka. over-fitting). Also, the accuracy on cars is much better than for people, which might be due to the fact that cars are typically much bigger in the image. The accuracy on the test is very bad, which is due to the detections being not very accuracy (see the results for the three images below) and due to test set containing only three images. -->
168 | 
169 | Results using 200 ROIs (this number is too low to get good accuracy but for demo purposes allows for fast training and scoring):
170 | 
171 | |Dataset|     AP(orange)|AP(eggBox)|AP(joghurt)|AP(ketchup)|   | mAP
172 | |---|---|---|---|---|---|---
173 | |Test Set|    0.45       |1.00      |0.82     |0.76          |   |**0.63**
174 | 
175 | Results using 2000 ROIs:
176 | 
177 | |Dataset|     AP(orange)|AP(eggBox)|AP(joghurt)|AP(ketchup)|   | mAP
178 | |---|---|---|---|---|---|---
179 | |Test Set|    0.32       | 0.48      | 0.82      | 0.82          |   |**0.65**
180 | 
181 | The output of the classifier using 2000 ROIs can be visualized using the script `5_visualizeResults.py`. Only ROIs classified as grocery item are shown (not background), and only if the confidence in the detection is greater or above 0.5. Multiple ROIs are combined into single detections using   [Non-Maxima Suppression](#non-maxima-suppression), the output of which is visualized below for the test images.
182 | 
183 | <p align="center">
184 | <img src="doc/svm_010.jpg" alt="alt text" height="300"/>
185 | <img src="doc/svm_45.jpg" alt="alt text" height="300"/>
186 | <img src="doc/svm_115.jpg" alt="alt text" height="300"/>
187 | <img src="doc/svm_220.jpg" alt="alt text" height="300"/>
188 | <img src="doc/svm_325.jpg" alt="alt text" height="300"/>
189 | </p>
190 | 
191 | In addition to visualizing the detected objects, script `5_visualizeResults.py` also computes precision and recall after rejecting detections with confidence scores less than a given threshold. This information can be used to set an operating point of the final classifier: for example, given the table below, to reach 85% precision all detections with score less than 5.0 would have to be rejected.
192 | 
193 | <p align="center">
194 | <img src="doc/precision_recall.jpg" alt="alt text" height="300"/>
195 | </p>
196 | 
197 | 
198 | 
199 | ### STEP 6: Scoring images
200 | `Script: 6_scoreImage`
201 | 
202 | Up to now our focus was on training a model and evaluating its performance. Hence all steps were performed one-by-one, and intermediate results were written to and loaded from disk. During scoring, given one or more images, it would be preferable to perform all steps in-memory. Exactly this is done in script `6_scoreImage`: it loads a given image, computes the ROIs, runs each ROI through the DNN, evaluates the trained SVM if needed, and finally outputs a list of the detected objects.
203 | 
204 | Note that the script makes call to functions in `cntk_helpers.py` which were originally written for steps 1-5. Loading the model takes a few seconds, but this only has to be done once and can then be kept in-memory (e.g. in a web-service which waits for images to be uploaded).
205 | 
206 | 
207 | 
208 | PART 2
209 | --------------
210 | In part 1 we learned how to classify ROIs by training a linear Support Vector Machine on the output of a given Neural Network. We will now show how to instead perform this classification directly in the Deep Neural Network. This can be achieved by adding a new last layer which, given the input from the last fully connected layer, outputs the probabilities for each ROI to be of a certain class. See section [SVM vs NN training](#svm-vs-nn-training) for pros/cons of the two different approaches.
211 | 
212 | Training the Neural Network instead of an SVM is done by simply changing the variable `classifier` in `PARAMETERS.py` from "svm" to "nn". Then, as described in part 1, all the scripts need to be executed in order, except for the SVM training in step 4. This will add a classification layer to the network and train the last layer(s) of the network, and for each ROI write its classification label and confidence to disk (rather than the 4096 floats representation which was required to train the SVM). Note that NN training can cause an out-of-memory error on less powerful machines which can possibly be avoided by reducing the minibatch size and if needed also the number of ROIs per image (see variables `cntk_mb_size` and `cntk_nrRois` in `PARAMETERS.py`).
213 | 
214 | The mean Average Precision measure after running all steps should roughly look like the results below.
215 | 
216 | Using 200 ROIs:
217 | 
218 | |Dataset|     AP(orange)|AP(eggBox)|AP(joghurt)|AP(ketchup)|   | mAP
219 | |---|---|---|---|---|---|---
220 | |Test Set|    0.45       |0.97      |0.82      |1.00          |   |**0.70**
221 | 
222 | Using 2000 ROIs:
223 | 
224 | |Dataset|     AP(orange)|AP(eggBox)|AP(joghurt)|AP(ketchup)|   | mAP
225 | |---|---|---|---|---|---|---
226 | |Test Set|    1.00       |0.92      |1.00      |0.07         |   |**0.87**
227 | 
228 | <!-- AP for         avocado = 1.0000
229 | AP for          orange = 1.0000
230 | AP for          butter = 0.6667
231 | AP for       champagne = 1.0000
232 | AP for          eggBox = 0.7500
233 | AP for          gerkin = 1.0000
234 | AP for         joghurt = 0.6667
235 | AP for         ketchup = 1.0000
236 | AP for     orangeJuice = 1.0000
237 | AP for           onion = 1.0000
238 | AP for          pepper = 1.0000
239 | AP for          tomato = 0.8000
240 | AP for           water = 0.5000
241 | AP for            milk = 1.0000
242 | AP for         tabasco = 0.5000
243 | AP for         mustard = 1.0000
244 | Mean AP = 0.8677 -->
245 | 
246 | <!-- AP for         avocado = 1.0000
247 | AP for          orange = 0.8281
248 | AP for          butter = 0.9112
249 | AP for       champagne = 1.0000
250 | AP for          eggBox = 0.9231
251 | AP for          gerkin = 1.0000
252 | AP for         joghurt = 1.0000
253 | AP for         ketchup = 1.0000
254 | AP for     orangeJuice = 0.9412
255 | AP for           onion = 0.8900
256 | AP for          pepper = 1.0000
257 | AP for          tomato = 0.9418
258 | AP for           water = 1.0000
259 | AP for            milk = 1.0000
260 | AP for         tabasco = 0.9762
261 | AP for         mustard = 1.0000
262 | Mean AP = 0.9632 -->
263 | 
264 | The output of the Neural Network with 2000 ROIs on the five test images after Non-Maxima Suppression to combine multiple detections should look like this:  
265 | <p align="center">
266 | <img src="doc/nn_01.jpg" alt="alt text" height="300"/>
267 | <img src="doc/nn_55.jpg" alt="alt text" height="300"/>
268 | <img src="doc/nn_110.jpg" alt="alt text" height="300"/>
269 | <img src="doc/nn_215.jpg" alt="alt text" height="300"/>
270 | <img src="doc/nn_425.jpg" alt="alt text" height="300"/>
271 | </p>
272 | 
273 | PART 3
274 | --------------
275 | So far we trained and evaluated object detectors using the provided grocery dataset. It is very straight forward to use a custom dataset instead: the necessary scripts for image annotation are included in the repository, and only minor code changes are required to point to a new dataset.
276 | 
277 | First, lets have a look at the folder structure and the provided annotation files for the grocery data:  
278 | Note how all positive, negative and test images and their annotations are in the subfolders *positive*, *negative* and *testImages* of *data/grocery/*. Each image (with the exception of the negative images) has (i) a similarly named *[imageName].bboxes.txt* file where each row corresponds to the co-ordinates of a manually labeled object (aka. bounding box); and (ii) a *[imageName].bboxes.labels.txt* file where each row corresponds to the class of the object (e.g. avocado or orange).
279 | 
280 | 
281 | ### Image Annotation
282 | 
283 | **Option #1: Visual Object Tagging Tool (Recommended)**
284 | 
285 | The [Visual Object Tagging Tool (VOTT)](https://github.com/CatalystCode/VOTT) is a cross platform annotation tool for tagging video and image assets.
286 | 
287 | ![Vott Screen Shot](https://github.com/CatalystCode/VOTT/blob/master/media/4_Tagging_Job.jpg)
288 | 
289 | VOTT provides the following **features**:
290 | 
291 | - Computer-assisted tagging and tracking of objects in videos using the [Camshift tracking algorithm](http://opencv.jp/opencv-1.0.0_org/docs/papers/camshift.pdf).
292 | - Exporting tags and assets to CNTK Fast-RCNN format for training an object detection model.
293 | - Running and validating a trained CNTK object detection model on new videos to generate stronger models.
294 | 
295 | How to annotate with VOTT:
296 | 
297 | 1. Download the latest [Release](https://github.com/CatalystCode/VOTT/releases)
298 | 2. Follow the [Readme](https://github.com/CatalystCode/VOTT/blob/master/README.md) to run a tagging job
299 | 3. After tagging Export to the dataset directory
300 | 
301 | 
302 | **Option #2: Using Annotation Scripts**
303 | 
304 | These two *.txt* files per image can be generated using the scripts `A1_annotateImages.py` and `A2_annotateBboxLabels.py`.
305 | 
306 | The first script lets the user draw rectangles around each object (see left image below). Once all objects in an image are annotated, pressing key 'n' writes the *.bboxes.txt* file and then proceeds to the next image, 'u' undoes (i.e. removes) the last rectangle, and 'q' quits the annotation tool.
307 | 
308 | The second script loads these manually annotated rectangles for each image, displays them one-by-one, and asks the user to provide the object class by clicking on the respective button to the left of the window (see right image below). Ground truth annotations marked as either "undecided" or "exclude" are fully excluded from further processing.   
309 | <p align="center">
310 | <img src="doc/anno_boxes.jpg" alt="alt text" height="300"/>
311 | <img src="doc/anno_labels.jpg" alt="alt text" height="300"/>
312 | </p>
313 | 
314 | ### Using a custom dataset
315 | 
316 | If you used VOTT to generate and export your datatset, it will all ready be in sorted in to positive*, *negative* and *testImages* subfolders.
317 | 
318 | Otherwise, once all (non-negative) images are annotated using the annotation scripts, the images and *.txt* annotation files should be copied to the *positive*, *negative* and *testImages* subfolders of a new directory called *data/myOwnImages/*, where the string "myOwnImages" can be replaced at will.
319 | 
320 | The only required code change is to update the `datasetName` variable in `PARAMETERS.py` to the newly created folder:
321 | ```python
322 | datasetName = "myOwnImages"
323 | ```
324 | 
325 | All steps in part 1 can then be executed in order and will use the new dataset.
326 | 
327 | 
328 | ### How to get good results
329 | 
330 | As is true for most Machine Learning project, getting good results requires careful parameter tuning. To help with this, all important parameters are specified, and a short explanation provided, in a single place: the `PARAMETERS.py` file.
331 | 
332 | Here now a few tips on how to find good parameters / design a good training set:
333 | - Select images carefully and perform annotations identically across all images. Typically, all objects in the image need to be annotated, even if the image contains many of them. It is common practice to remove such cluttered images. This is similarly true also for images where one is uncertain about the label of an object or where it is unclear whether the object should even be annotated (e.g. due to truncation, occlusion, motion blur, etc.).
334 | - During Region-of-Interest generation in step 1, all ROIs which are deemed too small, too big, etc. are discarded. This filtering step relies on thresholds on the respective properties and are defined in `PARAMETERS.py` (paragraph "ROI generation").  
335 | Visualizing the generated ROIs helps tremendously for debugging and can be done either while computing the ROIs in the script `1_computeRois.py` itself, or by visualizing the CNTK training files using the script `B2_cntkVisualizeInputs.py`. In addition, script `B1_evaluateRois.py` computes the percentage of annotated ground truth objects that are covered by one or more ROI (i.e. recall). Generally the more ROIs (variable `cntk_nrRois`) the better the accuracy, but at slower training and scoring speeds.
336 | - Training a linear SVM (step 4) is relatively robust and hence for most problems the corresponding parameters in `PARAMETERS.py` (paragraph "svm training") do not need to be modified.
337 | The evaluation script `5_evaluateResults.py` can be used to verify that the SVM successfully learned to capture the training data (typically the APs are above 0.5).
338 | - Training a Neural Network (part 2) is significantly more difficult, and often requires expert knowledge to make the network converge to a good solution (see [Michael Nielsen's](http://neuralnetworksanddeeplearning.com/) great introduction to Deep Neural Networks). The arguably most important parameter here is the learning rate (parameter `cntk_lr_per_image`).
339 | - In addition to computing mAP, always also visualize the results on the test and on the training set. This is done with script `5_visualizeResults.py` and helps getting an understanding of the error modes, and to verify the model is behaving as expected.
340 | 
341 | ### Publishing the model as Rest API
342 | 
343 | Finally, the trained model can be used to create a web service or Rest API on Azure. For this, we recommend using a technology called Flask, which makes it easy to run Python code in the cloud. See the tutorial [Creating web apps with Flask in Azure](https://azure.microsoft.com/en-us/documentation/articles/web-sites-python-create-deploy-flask-app/) for an introduction to Flask, and the GitHub repo [Azure-WebApp-w-CNTK](https://github.com/ilkarman/Azure-WebApp-w-CNTK) for an example how to deploy and run CNTK inside a web-service on Azure.
344 | 
345 | 
346 | 
347 | 
348 | PART 4
349 | --------------
350 | <!--
351 | <span style="color:red"> Part 4, i.e. accuracy evaluation on Pascal VOC, is not supported anymore in this version of the tutorial. This is mainly due to the long time required to train the DNN, however the code should still mostly work. Please instead use the [previous version](https://github.com/Azure/ObjectDetectionUsingCntk/tree/7edd3276a189bad862dc54e9f73b7cfcec5ae562) of this tutorial, or refer to the "Run Pascal VOC" section in CNTK's Fast R-CNN [tutorial](https://github.com/Microsoft/CNTK/wiki/Object-Detection-using-Fast-R-CNN#run-pascal-voc).
352 | </span>
353 | -->
354 | 
355 | The last part of this tutorial shows how to reproduce published results on the Pascal VOC dataset.
356 | 
357 | First, the Pascal VOC data as well as the pre-computed Selective Search boxes need to be downloaded from these links: [VOCtest_06-Nov-2007.tar](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar),
358 | [VOCtrainval_06-Nov-2007.tar](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar),
359 | [selective_search_data.tgz](http://www.cs.berkeley.edu/~rbg/fast-rcnn-data/selective_search_data.tgz).
360 | 
361 | <VOC 2012 trainval http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar>
362 | 
363 | <DELETE How to download the Pascal VOC 2007 and 2012 datasets is described on the [Fast R-CNN](https://github.com/rbgirshick/fast-rcnn) page, section "Beyond the demo: installation for training and testing models". The page also describes how to download the pre-computed Selective Search boxes (using script `fetch_selective_search_data.sh`).>
364 | 
365 | 
366 | These three tar-compressed files should to be extracted and copied into the *resources/pascalVocData/* directory. Your resources folder should look like this:
367 | ```bash
368 | resources/pascalVocData/selective_search_data
369 | resources/pascalVocData/VOCdevkit2007/VOC2007
370 | resources/pascalVocData/VOCdevkit2007/VOC2007/Annotations
371 | resources/pascalVocData/VOCdevkit2007/VOC2007/ImageSets
372 | resources/pascalVocData/VOCdevkit2007/VOC2007/JPEGImages
373 | ```
374 | 
375 | Second, the `datasetName` variable in `PARAMETERS.py` needs to point to the Pascal VOC dataset instead of our grocery dataset:
376 | ```python
377 | datasetName = "pascalVoc"
378 | ```
379 | 
380 | Now the steps from part 1 can be executed in order with the exception of:
381 | - Step 1: ROI generation is not necessary since we use the downloaded Selective Search boxes instead.
382 | - Step 4: SVM training is not necessary since the classification is done by adding a new softmax layer to the network (similar to part 2).
383 | 
384 | Note that Pascal VOC is a very big dataset and hence some of the steps (especially the CNTK training in step 3) will take hours to complete.
385 | 
386 | The table below shows the mean Average Precision (mAP) of our final model, and compares this figure to the corresponding experiment in the [Fast R-CNN](https://arxiv.org/pdf/1504.08083v2.pdf) paper (Table 6, group "S"). Note that this tutorial uses an AlexNet architecture, and we do not perform bounding box regression. To be consistent with the paper, our model is trained using the VOC 2007 "trainval" set, and the mean Average Precision is computed on the VOC 2007 "test" set.
387 | 
388 | |Dataset| mAP
389 | |---|---
390 | |Published results|0.52
391 | |Our results|0.48
392 | 
393 | More information on training a PascalVOC classifier (including a download link to a trained model) can be found at [CNTK's Fast-RCNN page](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FastRCNN).
394 | 
395 | 
396 | TECHNOLOGY
397 | --------------
398 | 
399 | ### Fast R-CNN
400 | R-CNNs for Object Detection were first presented in 2014 by [Ross Girshick et al.](http://arxiv.org/abs/1311.2524), and shown to outperform previous state-of-the-art approaches on one of the major object recognition challenges in the field: [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/). Since then, two follow-up papers were published which contain significant speed improvements: [Fast R-CNN](https://arxiv.org/pdf/1504.08083v2.pdf) and [Faster R-CNN](https://arxiv.org/abs/1506.01497).
401 | 
402 | The basic idea of R-CNN is to take a deep Neural Network which was originally trained for image classification using millions of annotated images and modify it for the purpose of object detection. The basic idea from the first R-CNN paper is illustrated in the Figure below (taken from the paper): (1) Given an input image, (2) in a first step, a large number region proposals are generated. (3) These region proposals, or Regions-of-Interests (ROIs), are then each independently sent through the network which outputs a vector of e.g. 4096 floating point values for each ROI. Finally, (4) a classifier is learned which takes the 4096 float ROI representation as input and outputs a label and confidence to each ROI.  
403 | <p align="center">
404 | <img src="doc/rcnnPipeline.JPG" alt="alt text" width="600" align="center"/>
405 | </p>
406 | 
407 | While this approach works well in terms of accuracy, it is very costly to compute since the Neural Network has to be evaluated for each ROI. Fast R-CNN addresses this drawback by only evaluating most of the network (to be specific: the convolution layers) a single time per image. According to the authors, this leads to a 213 times speed-up during testing and a 9x speed-up during training without loss of accuracy.
408 | 
409 | The original Caffe implementation used in the R-CNN papers can be found at github:
410 | [RCNN](https://github.com/rbgirshick/rcnn), [Fast R-CNN](https://github.com/rbgirshick/fast-rcnn), and [Faster R-CNN](https://github.com/rbgirshick/py-faster-rcnn). This tutorial uses some of the code from these repositories, notably (but not exclusively) for svm training and model evaluation.
411 | 
412 | ### SVM vs NN training
413 | In the last section, we describe how a linear SVM model is trained on the ROI 4096 float embedding. Alternatively, and this has pros/cons which are outlined below, one can do this classification directly in the neural network in a soft-max layer that takes the 4096 floats of the 2nd-to-last fully-connected layer as input.
414 | 
415 | The advantage of adding a new soft-max layer is that the full network can be retrained using backpropagation, including all convolution layers, which can lead to (slightly to moderately) better prediction accuracies. Another (implementation-dependent) advantage is that only (number of classes +1) floats per ROI need to be written to disk compared to the 4096 floats ROI embedding used to train a SVM.  
416 | On the other hand, training a Neural Network requires a good GPU, is even then 1-2 magnitudes slower than training a SVM, and requires extensive parameter tweaking and expert knowledge.
417 | 
418 | ### Selective Search
419 | [Selective Search](http://koen.me/research/pub/uijlings-ijcv2013-draft.pdf) is a method for finding a large set of possible object locations in an image, independent of the class of the actual object. It works by clustering image pixels into segments, and then performing hierarchical clustering to combine segments from the same object into object proposals. The first image in part 1 shows an example output of Selective Search, where each possible object location is visualized by a green rectangle. These rectangles are then used as Regions-of-Interests (ROIs) in the R-CNN pipeline.
420 | 
421 | The goal of ROI generation is to find a small set of ROIs which however tightly cover as many objects in the image as possible. This computation has to be sufficiently quick, while at the same time finding object locations at different scales and aspect ratios. Selective Search was shown to perform well for this task, with good accuracy to speed trade-offs.
422 | 
423 | 
424 | ### Non-maxima suppression
425 | Object detection methods often output multiple detections which fully or partly cover the same object in an image. These ROIs need to be merged to be able to count objects and obtain their exact locations in the image. This is traditionally done using a technique called Non-Maxima Suppression (NMS). The version of NMS we use (and which was also used in the R-CNN publications) does not merge ROIs but instead tries to identify which ROIs best cover the real locations of an object and discards all other ROIs. This is implemented by iteratively selecting the ROI with highest confidence and removing all other ROIs which significantly overlap this ROI and are classified to be of the same class.
426 | 
427 | Detection results before (left) and after (right) Non-maxima Suppression:
428 | <p align="center">
429 | <img src="doc/nn_00.jpg" alt="alt text" height="300"/>
430 | <img src="doc/nn_00_no_nms.jpg" alt="alt text" height="300"/>
431 | </p>
432 | 
433 | ### Mean Average Precision
434 | Once trained, the quality of the model can be measured using different criteria, such as precision, recall, accuracy, area-under-curve, etc. A common metric which is used for the Pascal VOC object recognition challenge is to measure the Average Precision (AP) for each class. Average Precision takes confidence in the detections into account and hence assigns a smaller penalty to false detections with low confidence. For a description of Average Precision see [Everingham et. al](http://homepages.inf.ed.ac.uk/ckiw/postscript/ijcv_voc09.pdf). The mean Average Precision (mAP) is computed by taking the average over all APs.
435 | 
436 | FUTURE WORK
437 | ---------------
438 | 
439 | One big item for future work is to use CNTK's Python APIs. Once these are fully available, the following changes can be made which should significantly improve run-time performance and simplify the code:
440 | - Reduce start-up time by loading the model only once and then keeping it persistent in memory. <-- Done in v1.
441 | - Reduce processing time using in-memory calls of the python wrappers, rather than writing all inputs and outputs to file first and subsequently parsing the CNTK output back into memory (e.g. this is especially expensive for the temporary file *train.z* in step 3 which can be many Gigabytes in size). <-- Done in v1.
442 | - Reduce code complexity by evaluating the network for each ROI on-the-fly in the `im_detect()` function rather than pre-computing all outputs in steps 4 and 5.
443 | 
444 | Other items for future work include:
445 | - Replace Selective Search with a faster and more accurate implementation. <-- Done in v1.
446 | - Adding bounding box regression.
447 | - Implementation of fast*er* R-CNN, i.e. performing ROI generation inside the DNN.
448 | - Using a more recent DNN topology such as ResNet instead of AlexNet.
449 | 
450 | 
451 | AUTHOR
452 | ---------------
453 | Patrick Buehler, Senior Data Scientist
454 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/__init__.py


--------------------------------------------------------------------------------
/data/grocery/negative/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/negative/1.jpg


--------------------------------------------------------------------------------
/data/grocery/negative/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/negative/2.jpg


--------------------------------------------------------------------------------
/data/grocery/negative/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/negative/3.jpg


--------------------------------------------------------------------------------
/data/grocery/negative/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/negative/4.jpg


--------------------------------------------------------------------------------
/data/grocery/negative/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/negative/5.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/0.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | joghurt
 2 | squash
 3 | mushroom
 4 | eggBox
 5 | ketchup
 6 | mustard
 7 | water
 8 | orange
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/0.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 213	337	329	473
 2 | 356	347	476	468
 3 | 489	408	621	468
 4 | 663	393	804	467
 5 | 623	549	720	619
 6 | 475	559	565	623
 7 | 656	709	726	892
 8 | 361	810	435	880
 9 | 207	741	327	881
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/0.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/11.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | water
 2 | squash
 3 | mushroom
 4 | orange
 5 | eggBox
 6 | mustard
 7 | joghurt
 8 | ketchup
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/11.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 175	457	265	536
 2 | 419	389	537	528
 3 | 617	460	760	527
 4 | 724	603	806	670
 5 | 536	579	677	677
 6 | 694	873	770	962
 7 | 499	774	603	916
 8 | 383	793	443	868
 9 | 296	1010	419	1153
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/11.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/12.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | water
 2 | mushroom
 3 | squash
 4 | eggBox
 5 | joghurt
 6 | mustard
 7 | ketchup
 8 | orange
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/12.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 312	316	373	484
 2 | 463	423	587	493
 3 | 655	361	758	485
 4 | 541	541	686	641
 5 | 596	718	690	841
 6 | 737	848	824	932
 7 | 387	749	448	824
 8 | 225	814	301	882
 9 | 295	957	416	1090
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/12.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/13.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | squash
 2 | water
 3 | squash
 4 | eggBox
 5 | mushroom
 6 | joghurt
 7 | ketchup
 8 | orange
 9 | mustard
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/13.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 201	348	324	491
 2 | 480	425	612	493
 3 | 681	349	814	499
 4 | 520	545	672	640
 5 | 694	573	830	647
 6 | 584	721	681	850
 7 | 375	754	439	826
 8 | 209	821	285	888
 9 | 724	856	804	940
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/13.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/14.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | water
 3 | squash
 4 | orange
 5 | eggBox
 6 | joghurt
 7 | squash
 8 | ketchup
 9 | mushroom
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/14.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 304	487	399	537
 2 | 485	471	635	527
 3 | 700	375	836	527
 4 | 553	607	633	684
 5 | 706	577	864	676
 6 | 613	760	720	886
 7 | 728	817	853	962
 8 | 461	861	580	954
 9 | 236	849	377	949
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/14.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/17.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | orange
 4 | squash
 5 | water
 6 | mushroom
 7 | joghurt
 8 | squash
 9 | eggBox
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/17.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 264	469	333	520
 2 | 343	461	440	523
 3 | 455	453	533	527
 4 | 539	376	670	528
 5 | 774	445	901	529
 6 | 656	593	820	676
 7 | 688	800	820	952
 8 | 539	781	648	908
 9 | 227	838	509	933
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/17.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/18.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | squash
 4 | squash
 5 | mushroom
 6 | orange
 7 | water
 8 | joghurt
 9 | eggBox
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/18.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 195	492	295	548
 2 | 369	412	436	547
 3 | 587	403	721	552
 4 | 729	399	876	552
 5 | 660	623	824	706
 6 | 553	625	629	700
 7 | 742	793	814	981
 8 | 604	820	720	965
 9 | 240	813	409	953
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/18.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/19.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | mushroom
 4 | eggBox
 5 | orange
 6 | water
 7 | joghurt
 8 | squash
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/19.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 244	444	307	565
 2 | 336	420	409	564
 3 | 563	504	686	569
 4 | 689	500	814	571
 5 | 532	641	608	714
 6 | 697	806	770	990
 7 | 563	841	681	997
 8 | 376	846	497	978
 9 | 216	842	335	981
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/19.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/2.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | eggBox
 2 | mustard
 3 | joghurt
 4 | orange
 5 | squash
 6 | water
 7 | squash
 8 | mushroom
 9 | ketchup
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/2.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 208	425	369	503
 2 | 432	387	503	508
 3 | 603	389	701	507
 4 | 728	440	808	513
 5 | 571	532	700	657
 6 | 674	700	730	844
 7 | 509	785	627	921
 8 | 380	769	504	841
 9 | 231	753	291	898
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/2.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/21.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mushroom
 2 | eggBox
 3 | water
 4 | mustard
 5 | ketchup
 6 | squash
 7 | squash
 8 | joghurt
 9 | orange
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/21.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 176	299	301	451
 2 | 188	448	487	527
 3 | 573	324	649	531
 4 | 736	217	825	371
 5 | 724	372	812	531
 6 | 704	559	838	680
 7 | 649	822	777	960
 8 | 251	790	367	922
 9 | 240	732	315	798
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/21.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/21.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/22.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | eggBox
 2 | water
 3 | ketchup
 4 | squash
 5 | mushroom
 6 | squash
 7 | mustard
 8 | orange
 9 | joghurt
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/22.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 201	439	511	519
 2 | 595	309	677	520
 3 | 750	344	841	519
 4 | 736	547	868	673
 5 | 527	589	668	673
 6 | 680	810	808	954
 7 | 577	806	655	932
 8 | 445	860	517	932
 9 | 272	782	391	929
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/22.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/22.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/23.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | orange
 2 | ketchup
 3 | water
 4 | squash
 5 | squash
 6 | mustard
 7 | eggBox
 8 | mushroom
 9 | joghurt
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/23.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 303	424	368	479
 2 | 508	360	564	489
 3 | 704	319	765	483
 4 | 692	517	826	637
 5 | 651	774	774	909
 6 | 567	714	620	818
 7 | 363	814	635	910
 8 | 408	750	509	821
 9 | 248	756	365	894
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/23.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/24.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | orange
 2 | ketchup
 3 | water
 4 | mustard
 5 | squash
 6 | mushroom
 7 | squash
 8 | eggBox
 9 | joghurt
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/24.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 365	412	421	471
 2 | 515	351	579	485
 3 | 672	324	733	487
 4 | 832	353	914	496
 5 | 744	528	880	645
 6 | 549	560	708	643
 7 | 682	777	809	918
 8 | 440	773	627	910
 9 | 291	738	403	877
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/24.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/24.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/26.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | eggBox
 4 | squash
 5 | mushroom
 6 | orange
 7 | joghurt
 8 | squash
 9 | water
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/26.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 215	371	275	501
 2 | 335	372	403	499
 3 | 508	439	681	513
 4 | 730	365	866	512
 5 | 529	587	690	672
 6 | 405	571	487	647
 7 | 436	746	543	878
 8 | 252	784	379	914
 9 | 740	765	816	952
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/26.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/26.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/3.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | eggBox
 3 | ketchup
 4 | squash
 5 | orange
 6 | joghurt
 7 | mushroom
 8 | water
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/3.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 196	468	325	515
 2 | 355	455	584	513
 3 | 607	459	696	517
 4 | 557	539	688	659
 5 | 724	596	809	665
 6 | 666	784	785	936
 7 | 388	785	515	858
 8 | 268	746	333	926
 9 | 588	1018	716	1162
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/3.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/4.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | ketchup
 2 | mustard
 3 | eggBox
 4 | squash
 5 | orange
 6 | joghurt
 7 | water
 8 | mushroom
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/4.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 307	481	423	544
 2 | 532	489	661	544
 3 | 704	471	852	541
 4 | 701	568	837	693
 5 | 565	621	644	689
 6 | 674	766	781	893
 7 | 355	724	409	865
 8 | 465	872	605	965
 9 | 603	1052	730	1201
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/4.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/6.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mushroom
 2 | squash
 3 | mustard
 4 | ketchup
 5 | orange
 6 | joghurt
 7 | eggBox
 8 | water
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/6.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 179	463	328	532
 2 | 403	391	497	511
 3 | 580	381	636	516
 4 | 701	343	774	503
 5 | 575	585	653	659
 6 | 677	777	792	918
 7 | 515	786	635	924
 8 | 341	737	408	906
 9 | 611	1006	741	1152
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/6.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/7.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | squash
 2 | mushroom
 3 | ketchup
 4 | mustard
 5 | orange
 6 | water
 7 | eggBox
 8 | joghurt
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/7.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 213	344	344	481
 2 | 472	424	589	480
 3 | 627	329	694	484
 4 | 741	361	817	485
 5 | 714	567	792	637
 6 | 643	845	841	926
 7 | 492	757	589	888
 8 | 307	752	423	898
 9 | 311	969	435	1106
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/7.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/8.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | mushroom
 4 | squash
 5 | water
 6 | eggBox
 7 | joghurt
 8 | orange
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/8.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 248	384	309	509
 2 | 347	384	403	511
 3 | 519	453	661	520
 4 | 726	384	869	525
 5 | 682	607	838	682
 6 | 702	776	804	905
 7 | 347	730	439	852
 8 | 277	840	347	909
 9 | 299	982	417	1125
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/8.jpg


--------------------------------------------------------------------------------
/data/grocery/positive/9.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | water
 3 | squash
 4 | mushroom
 5 | orange
 6 | eggBox
 7 | ketchup
 8 | joghurt
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/9.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 232	452	379	509
 2 | 512	341	572	512
 3 | 720	364	866	515
 4 | 535	588	685	670
 5 | 758	596	837	663
 6 | 692	772	805	900
 7 | 488	841	625	904
 8 | 333	732	427	861
 9 | 291	1001	411	1144
10 | 


--------------------------------------------------------------------------------
/data/grocery/positive/9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/positive/9.jpg


--------------------------------------------------------------------------------
/data/grocery/testImages/10.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | squash
 2 | orange
 3 | water
 4 | mushroom
 5 | eggBox
 6 | ketchup
 7 | mustard
 8 | joghurt
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/10.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 209	361	339	496
 2 | 399	424	471	495
 3 | 509	319	568	489
 4 | 737	332	858	489
 5 | 535	541	682	648
 6 | 617	820	732	910
 7 | 467	812	563	882
 8 | 280	762	396	904
 9 | 305	980	423	1117
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/testImages/10.jpg


--------------------------------------------------------------------------------
/data/grocery/testImages/15.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | orange
 4 | water
 5 | squash
 6 | squash
 7 | joghurt
 8 | eggBox
 9 | mushroom
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/15.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 212	379	295	508
 2 | 325	352	408	512
 3 | 451	443	532	513
 4 | 571	303	644	515
 5 | 696	353	837	509
 6 | 714	797	842	938
 7 | 551	786	670	938
 8 | 391	788	509	932
 9 | 237	824	372	918
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/testImages/15.jpg


--------------------------------------------------------------------------------
/data/grocery/testImages/20.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mushroom
 2 | ketchup
 3 | eggBox
 4 | water
 5 | orange
 6 | mustard
 7 | joghurt
 8 | squash
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/20.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 571	475	693	537
 2 | 697	336	768	468
 3 | 698	472	814	540
 4 | 635	615	832	686
 5 | 560	615	635	682
 6 | 601	713	673	837
 7 | 576	810	684	962
 8 | 469	812	577	946
 9 | 349	809	465	936
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/testImages/20.jpg


--------------------------------------------------------------------------------
/data/grocery/testImages/25.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mustard
 2 | ketchup
 3 | water
 4 | squash
 5 | eggBox
 6 | mushroom
 7 | orange
 8 | joghurt
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/25.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 245	369	315	499
 2 | 320	348	392	499
 3 | 407	329	463	499
 4 | 524	385	624	501
 5 | 773	444	920	523
 6 | 560	587	716	670
 7 | 433	565	513	644
 8 | 556	793	673	945
 9 | 271	772	395	905
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/25.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/testImages/25.jpg


--------------------------------------------------------------------------------
/data/grocery/testImages/5.bboxes.labels.tsv:
--------------------------------------------------------------------------------
 1 | mushroom
 2 | joghurt
 3 | eggBox
 4 | squash
 5 | orange
 6 | water
 7 | ketchup
 8 | mustard
 9 | squash
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/5.bboxes.tsv:
--------------------------------------------------------------------------------
 1 | 288	425	445	493
 2 | 511	371	613	495
 3 | 740	425	893	503
 4 | 733	529	872	657
 5 | 585	577	666	648
 6 | 372	713	436	878
 7 | 545	836	690	912
 8 | 714	824	821	896
 9 | 623	1012	749	1156
10 | 


--------------------------------------------------------------------------------
/data/grocery/testImages/5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/data/grocery/testImages/5.jpg


--------------------------------------------------------------------------------
/deprecated_3_runCntk_brainscript.py:
--------------------------------------------------------------------------------
 1 | import os, sys, importlib
 2 | import shutil, time
 3 | import subprocess
 4 | import PARAMETERS
 5 | locals().update(importlib.import_module("PARAMETERS").__dict__)
 6 | 
 7 | 
 8 | 
 9 | ####################################
10 | # Parameters
11 | ####################################
12 | cntkBinariesDir = "C:/local/CNTK-2-0-rc1/cntk/cntk/"
13 | 
14 | # no need to change this
15 | cntkCmdStrPattern = "{0}/cntk.exe configFile={1}config.cntk currentDirectory={1}"
16 | 
17 | 
18 | 
19 | ####################################
20 | # Main
21 | ####################################
22 | print("classifier = " + classifier)
23 | if not os.path.exists(cntkBinariesDir + "/cntk.exe"):
24 |     raise Exception("Cannot find cntk.exe in directory: " + cntkBinariesDir)
25 | deleteAllFilesInDirectory(cntkFilesDir + "/tmp", None)
26 | shutil.copy(os.path.join(cntkResourcesDir, "config.cntk"), cntkFilesDir)
27 | 
28 | #generate cntk command string
29 | cmdStr = cntkCmdStrPattern.format(cntkBinariesDir, cntkFilesDir, classifier)
30 | cmdStr += " ImageH={} ImageW={}".format(cntk_padHeight, cntk_padWidth)
31 | cmdStr += " NumLabels={0} NumTrainROIs={1} NumTestROIs={1}".format(len(classes), cntk_nrRois)
32 | cmdStr += " TrainROIDim={} TrainROILabelDim={}".format(4*cntk_nrRois, cntk_nrRois * cntk_featureDimensions[classifier])
33 | cmdStr += " TestROIDim={} TestROILabelDim={}".format(  4*cntk_nrRois, cntk_nrRois * cntk_featureDimensions[classifier])
34 | if classifier == 'svm':
35 |     cmdStr += " [Train=[SGD=[maxEpochs=0]]]" #no need to train the network if just using it as featurizer
36 |     cmdStr += " [WriteTest=[outputNodeNames=(z.fcOut.h2.y)]]"
37 |     cmdStr += " [WriteTrain=[outputNodeNames=(z.fcOut.h2.y)]]"
38 | 
39 | #run cntk
40 | tstart = datetime.datetime.now()
41 | os.environ['ACML_FMA'] = str(0)
42 | print(cmdStr)
43 | pid = subprocess.Popen(cmdStr, cwd = cntkFilesDir) #, creationflags=subprocess.CREATE_NEW_CONSOLE)
44 | pid.wait()
45 | print ("Time running cntk [s]: " + str((datetime.datetime.now() - tstart).total_seconds()))
46 | 
47 | #delete model files written during cntk training
48 | filenames = getFilesInDirectory(cntkFilesDir + "/tmp/", postfix = None)
49 | for filename in filenames:
50 |     if filename.startswith('Fast-RCNN.'):
51 |         os.remove(cntkFilesDir + "/tmp/" + filename)
52 | assert pid.returncode == 0, "ERROR: cntk ended with exit code {}".format(pid.returncode)
53 | 
54 | #parse cntk output
55 | print("classifier = " + classifier)
56 | image_sets = ["test", "train"]
57 | for image_set in image_sets:
58 |     print("Parsing CNTK output for image set: " + image_set)
59 |     cntkImgsListPath = cntkFilesDir + image_set + ".txt"
60 |     outParsedDir = cntkFilesDir + image_set + "_" + classifier + "_parsed/"
61 |     if classifier == 'svm':
62 |         cntkOutputPath = cntkFilesDir + image_set + ".z.fcOut.h2.y"
63 |     elif classifier == 'nn':
64 |         cntkOutputPath = cntkFilesDir + image_set + ".z"
65 |     else:
66 |         error
67 | 
68 |     #write cntk output for each image to separate file
69 |     makeDirectory(outParsedDir)
70 |     parseCntkOutput(cntkImgsListPath, cntkOutputPath, outParsedDir, cntk_nrRois, cntk_featureDimensions[classifier],
71 |                     saveCompressed = True, skipCheck = False) #, skip5Mod = 0)
72 | 
73 |     #delete cntk output file which can be very large and are no longer needed
74 |     deleteFile(cntkOutputPath)
75 | print("DONE.")


--------------------------------------------------------------------------------
/doc/0.filter.roi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/0.filter.roi.jpg


--------------------------------------------------------------------------------
/doc/0.grid.roi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/0.grid.roi.jpg


--------------------------------------------------------------------------------
/doc/0.ss.roi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/0.ss.roi.jpg


--------------------------------------------------------------------------------
/doc/anno_boxes.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/anno_boxes.jpg


--------------------------------------------------------------------------------
/doc/anno_labels.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/anno_labels.jpg


--------------------------------------------------------------------------------
/doc/nn_00.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_00.jpg


--------------------------------------------------------------------------------
/doc/nn_00_no_nms.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_00_no_nms.jpg


--------------------------------------------------------------------------------
/doc/nn_01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_01.jpg


--------------------------------------------------------------------------------
/doc/nn_110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_110.jpg


--------------------------------------------------------------------------------
/doc/nn_215.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_215.jpg


--------------------------------------------------------------------------------
/doc/nn_425.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_425.jpg


--------------------------------------------------------------------------------
/doc/nn_55.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/nn_55.jpg


--------------------------------------------------------------------------------
/doc/precision_recall.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/precision_recall.jpg


--------------------------------------------------------------------------------
/doc/rcnnPipeline.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/rcnnPipeline.JPG


--------------------------------------------------------------------------------
/doc/svm_010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/svm_010.jpg


--------------------------------------------------------------------------------
/doc/svm_115.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/svm_115.jpg


--------------------------------------------------------------------------------
/doc/svm_220.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/svm_220.jpg


--------------------------------------------------------------------------------
/doc/svm_325.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/svm_325.jpg


--------------------------------------------------------------------------------
/doc/svm_45.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/doc/svm_45.jpg


--------------------------------------------------------------------------------
/fastRCNN/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from .imdb import imdb
 8 | from .pascal_voc import pascal_voc
 9 | 
10 | 


--------------------------------------------------------------------------------
/fastRCNN/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os, sys
  9 | import os.path as osp
 10 | import PIL
 11 | import numpy as np
 12 | import scipy.sparse
 13 | import platform
 14 | from builtins import range
 15 | 
 16 | if sys.version_info[1] == 4 and sys.version_info[0] == 3:
 17 |     from .utils34_win64.cython_bbox import bbox_overlaps
 18 | elif sys.version_info[1] == 5 and sys.version_info[0] == 3:
 19 |     from .utils35_win64.cython_bbox import bbox_overlaps
 20 | else:
 21 |     print("ERROR: Python version {} not supported".format(sys.version_info))
 22 |     error
 23 | 
 24 | 
 25 | class imdb(object):
 26 |     """Image database."""
 27 | 
 28 |     def __init__(self, name):
 29 |         self._name = name
 30 |         self._num_classes = 0
 31 |         self._classes = []
 32 |         self._image_index = []
 33 |         self._obj_proposer = 'selective_search'
 34 |         self._roidb = None
 35 |         self._roidb_handler = self.default_roidb
 36 |         # Use this dict for storing dataset specific config options
 37 |         self.config = {}
 38 | 
 39 |     @property
 40 |     def name(self):
 41 |         return self._name
 42 | 
 43 |     @property
 44 |     def num_classes(self):
 45 |         return len(self._classes)
 46 | 
 47 |     @property
 48 |     def classes(self):
 49 |         return self._classes
 50 | 
 51 |     @property
 52 |     def image_index(self):
 53 |         return self._image_index
 54 | 
 55 |     @property
 56 |     def roidb_handler(self):
 57 |         return self._roidb_handler
 58 | 
 59 |     @roidb_handler.setter
 60 |     def roidb_handler(self, val):
 61 |         self._roidb_handler = val
 62 | 
 63 |     @property
 64 |     def roidb(self):
 65 |         # A roidb is a list of dictionaries, each with the following keys:
 66 |         #   boxes
 67 |         #   gt_overlaps
 68 |         #   gt_classes
 69 |         #   flipped
 70 |         if self._roidb is not None:
 71 |             return self._roidb
 72 |         self._roidb = self.roidb_handler()
 73 |         return self._roidb
 74 | 
 75 |     # @property
 76 |     # def cache_path(self):
 77 |     #     cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 78 |     #     print cache_path
 79 |     #     if not os.path.exists(cache_path):
 80 |     #         os.makedirs(cache_path)
 81 |     #     return cache_path
 82 | 
 83 |     @property
 84 |     def num_images(self):
 85 |       return len(self.image_index)
 86 | 
 87 |     def image_path_at(self, i):
 88 |         raise NotImplementedError
 89 | 
 90 |     def default_roidb(self):
 91 |         raise NotImplementedError
 92 | 
 93 |     def evaluate_detections(self, all_boxes, output_dir=None):
 94 |         """
 95 |         all_boxes is a list of length number-of-classes.
 96 |         Each list element is a list of length number-of-images.
 97 |         Each of those list elements is either an empty list []
 98 |         or a numpy array of detection.
 99 | 
100 |         all_boxes[class][image] = [] or np.array of shape #dets x 5
101 |         """
102 |         raise NotImplementedError
103 | 
104 |     def append_flipped_images(self):
105 |         num_images = self.num_images
106 |         widths = [PIL.Image.open(self.image_path_at(i)).size[0]
107 |                   for i in range(num_images)]
108 |         for i in range(num_images):
109 |             boxes = self.roidb[i]['boxes'].copy()
110 |             oldx1 = boxes[:, 0].copy()
111 |             oldx2 = boxes[:, 2].copy()
112 |             boxes[:, 0] = widths[i] - oldx2 - 1
113 |             boxes[:, 2] = widths[i] - oldx1 - 1
114 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
115 |             entry = {'boxes' : boxes,
116 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
117 |                      'gt_classes' : self.roidb[i]['gt_classes'],
118 |                      'flipped' : True}
119 |             self.roidb.append(entry)
120 |         self._image_index = self._image_index * 2
121 | 
122 |     def evaluate_recall(self, candidate_boxes, ar_thresh=0.5):
123 |         # Record max overlap value for each gt box
124 |         # Return vector of overlap values
125 |         gt_overlaps = np.zeros(0)
126 |         for i in range(self.num_images):
127 |             gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0]
128 |             gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
129 | 
130 |             boxes = candidate_boxes[i]
131 |             if boxes.shape[0] == 0:
132 |                 continue
133 |             overlaps = bbox_overlaps(boxes.astype(np.float),
134 |                                      gt_boxes.astype(np.float))
135 | 
136 |             # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
137 |             _gt_overlaps = np.zeros((gt_boxes.shape[0]))
138 |             for j in range(gt_boxes.shape[0]):
139 |                 argmax_overlaps = overlaps.argmax(axis=0)
140 |                 max_overlaps = overlaps.max(axis=0)
141 |                 gt_ind = max_overlaps.argmax()
142 |                 gt_ovr = max_overlaps.max()
143 |                 assert(gt_ovr >= 0)
144 |                 box_ind = argmax_overlaps[gt_ind]
145 |                 _gt_overlaps[j] = overlaps[box_ind, gt_ind]
146 |                 assert(_gt_overlaps[j] == gt_ovr)
147 |                 overlaps[box_ind, :] = -1
148 |                 overlaps[:, gt_ind] = -1
149 | 
150 |             gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
151 | 
152 |         num_pos = gt_overlaps.size
153 |         gt_overlaps = np.sort(gt_overlaps)
154 |         step = 0.001
155 |         thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0)
156 |         recalls = np.zeros_like(thresholds)
157 |         for i, t in enumerate(thresholds):
158 |             recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
159 |         ar = 2 * np.trapz(recalls, thresholds)
160 | 
161 |         return ar, gt_overlaps, recalls, thresholds
162 | 
163 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
164 |         assert len(box_list) == self.num_images, \
165 |                 'Number of boxes must match number of ground-truth images'
166 |         roidb = []
167 |         for i in range(self.num_images):
168 |             boxes = box_list[i]
169 |             num_boxes = boxes.shape[0]
170 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
171 | 
172 |             if gt_roidb and gt_roidb[i]:
173 |                 gt_boxes = gt_roidb[i]['boxes']
174 |                 gt_classes = gt_roidb[i]['gt_classes']
175 |                 if len(gt_classes) > 0: #for pascal every image has at least one annotated object. This is not the case however if including negative images
176 |                     gt_overlaps = bbox_overlaps(boxes.astype(np.float),
177 |                                                 gt_boxes.astype(np.float))
178 | 
179 |                     argmaxes = gt_overlaps.argmax(axis=1)
180 |                     maxes = gt_overlaps.max(axis=1)
181 |                     I = np.where(maxes > 0)[0]
182 |                     overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
183 | 
184 |             overlaps = scipy.sparse.csr_matrix(overlaps)
185 |             roidb.append({'boxes' : boxes,
186 |                           'gt_classes' : np.zeros((num_boxes,),
187 |                                                   dtype=np.int32),
188 |                           'gt_overlaps' : overlaps,
189 |                           'flipped' : False})
190 |         return roidb
191 | 
192 |     @staticmethod
193 |     def merge_roidbs(a, b):
194 |         assert len(a) == len(b)
195 |         for i in range(len(a)):
196 |             if a[i]: #if image has at least one annotated object
197 |                 a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
198 |                 a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
199 |                                                 b[i]['gt_classes']))
200 |                 a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
201 |                                                            b[i]['gt_overlaps']])
202 |             else:
203 |                 a[i] = b[i]
204 |         return a
205 | 
206 |     def competition_mode(self, on):
207 |         """Turn competition mode on or off."""
208 |         pass
209 | 


--------------------------------------------------------------------------------
/fastRCNN/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/fastRCNN/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from __future__ import print_function
  9 | import os, pdb
 10 | import xml.dom.minidom as minidom
 11 | import numpy as np
 12 | import scipy.sparse
 13 | import scipy.io as sio
 14 | import pickle as cp
 15 | import subprocess
 16 | from .imdb import imdb
 17 | from .voc_eval import voc_eval
 18 | #from fastRCNN.imdb import imdb
 19 | #from fastRCNN.voc_eval import voc_eval
 20 | 
 21 | class pascal_voc(imdb):
 22 |     def __init__(self, image_set, year, classes, maxNrRois, cacheDir, devkit_path=None):
 23 |         imdb.__init__(self, 'voc_' + year + '_' + image_set)
 24 |         self._year = year
 25 |         self._image_set = image_set
 26 |         self._maxNrRois = maxNrRois
 27 |         self._ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
 28 |         self._cacheDir = cacheDir
 29 |         self._devkit_path = self._get_default_path() if devkit_path is None \
 30 |                             else devkit_path
 31 |         self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
 32 |         self._classes = classes
 33 |                          #('__background__', # always index 0
 34 |                          # 'aeroplane', 'bicycle', 'bird', 'boat',
 35 |                          # 'bottle', 'bus', 'car', 'cat', 'chair',
 36 |                          # 'cow', 'diningtable', 'dog', 'horse',
 37 |                          # 'motorbike', 'person', 'pottedplant',
 38 |                          # 'sheep', 'sofa', 'train', 'tvmonitor')
 39 |         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
 40 |         self._image_ext = '.jpg'
 41 |         self._image_index = self._load_image_set_index()
 42 |         # Default to roidb handler
 43 |         self._roidb_handler = self.selective_search_roidb
 44 | 
 45 |         # PASCAL specific config options
 46 |         self.config = {'cleanup'  : True,
 47 |                        'use_salt' : True,
 48 |                        'top_k'    : 2000}
 49 | 
 50 |         assert os.path.exists(self._devkit_path), \
 51 |                 'VOCdevkit path does not exist: {}'.format(self._devkit_path)
 52 |         assert os.path.exists(self._data_path), \
 53 |                 'Path does not exist: {}'.format(self._data_path)
 54 | 
 55 |     @property
 56 |     def cache_path(self):
 57 |         cache_path = self._cacheDir
 58 |         #cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache'))
 59 |         if not os.path.exists(cache_path):
 60 |             os.makedirs(cache_path)
 61 |         return cache_path
 62 | 
 63 |     def image_path_at(self, i):
 64 |         """
 65 |         Return the absolute path to image i in the image sequence.
 66 |         """
 67 |         return self.image_path_from_index(self._image_index[i])
 68 | 
 69 |     def image_path_from_index(self, index):
 70 |         """
 71 |         Construct an image path from the image's "index" identifier.
 72 |         """
 73 |         image_path = os.path.join(self._data_path, 'JPEGImages',
 74 |                                   index + self._image_ext)
 75 |         assert os.path.exists(image_path), \
 76 |                 'Path does not exist: {}'.format(image_path)
 77 |         return image_path
 78 | 
 79 |     def _load_image_set_index(self):
 80 |         """
 81 |         Load the indexes listed in this dataset's image set file.
 82 |         """
 83 |         # Example path to image set file:
 84 |         # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
 85 |         image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
 86 |                                       self._image_set + '.txt')
 87 |         assert os.path.exists(image_set_file), \
 88 |                 'Path does not exist: {}'.format(image_set_file)
 89 |         with open(image_set_file) as f:
 90 |             image_index = [x.strip() for x in f.readlines()]
 91 |         return image_index
 92 | 
 93 |     def _get_default_path(self):
 94 |         """
 95 |         Return the default path where PASCAL VOC is expected to be installed.
 96 |         """
 97 |         return os.path.join(self._ROOT_DIR, 'resources', 'pascalVocData', 'VOCdevkit' + self._year)
 98 | 
 99 |     def gt_roidb(self):
100 |         """
101 |         Return the database of ground-truth regions of interest.
102 | 
103 |         This function loads/saves from/to a cache file to speed up future calls.
104 |         """
105 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
106 |         if os.path.exists(cache_file):
107 |             with open(cache_file, 'rb') as fid:
108 |                 roidb = cp.load(fid)
109 |             print ('{} gt roidb loaded from {}'.format(self.name, cache_file))
110 |             return roidb
111 | 
112 |         gt_roidb = [self._load_pascal_annotation(index)
113 |                     for index in self.image_index]
114 |         with open(cache_file, 'wb') as fid:
115 |             cp.dump(gt_roidb, fid, cp.HIGHEST_PROTOCOL)
116 |         print ('wrote gt roidb to {}'.format(cache_file))
117 | 
118 |         return gt_roidb
119 | 
120 |     def selective_search_roidb(self):
121 |         """
122 |         Return the database of selective search regions of interest.
123 |         Ground-truth ROIs are also included.
124 | 
125 |         This function loads/saves from/to a cache file to speed up future calls.
126 |         """
127 |         cache_file = os.path.join(self.cache_path,
128 |                                   self.name + '_selective_search_roidb.pkl')
129 | 
130 |         if os.path.exists(cache_file):
131 |             with open(cache_file, 'rb') as fid:
132 |                 roidb = cp.load(fid, encoding='latin1')
133 |             print ('{} ss roidb loaded from {}'.format(self.name, cache_file))
134 |             return roidb
135 | 
136 |         if int(self._year) == 2007 or not self._image_set.startswith('test'):
137 |             gt_roidb = self.gt_roidb()
138 |             ss_roidb = self._load_selective_search_roidb(gt_roidb)
139 |             roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
140 |         else:
141 |             roidb = self._load_selective_search_roidb(None)
142 | 
143 |         # Keep max of e.g. 2000 rois
144 |         if type(self._maxNrRois) == int:
145 |             print ("Only keep the first %d ROIs..." % self._maxNrRois)
146 |             for i in range(self.num_images):
147 |                 gt_overlaps = roidb[i]['gt_overlaps']
148 |                 gt_overlaps = gt_overlaps.todense()[:self._maxNrRois]
149 |                 gt_overlaps = scipy.sparse.csr_matrix(gt_overlaps)
150 |                 roidb[i]['boxes'] = roidb[i]['boxes'][:self._maxNrRois, :]
151 |                 roidb[i]['gt_classes'] = roidb[i]['gt_classes'][:self._maxNrRois]
152 |                 roidb[i]['gt_overlaps'] = roidb[i]['gt_overlaps'] = gt_overlaps
153 | 
154 |         with open(cache_file, 'wb') as fid:
155 |             cp.dump(roidb, fid, cp.HIGHEST_PROTOCOL)
156 |         print ('wrote ss roidb to {}'.format(cache_file))
157 | 
158 |         return roidb
159 | 
160 |     def _load_selective_search_roidb(self, gt_roidb):
161 |         filename = os.path.abspath(os.path.join(self._devkit_path, '..',
162 |                                                  'selective_search_data',
163 |                                                  self.name + '.mat'))
164 |         assert os.path.exists(filename), \
165 |                'Selective search data not found at: {}'.format(filename)
166 |         raw_data = sio.loadmat(filename)['boxes'].ravel()
167 | 
168 |         box_list = []
169 |         for i in range(raw_data.shape[0]):
170 |             box_list.append(raw_data[i][:, (1, 0, 3, 2)] - 1)
171 | 
172 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
173 | 
174 |     def selective_search_IJCV_roidb(self):
175 |         """
176 |         Return the database of selective search regions of interest.
177 |         Ground-truth ROIs are also included.
178 | 
179 |         This function loads/saves from/to a cache file to speed up future calls.
180 |         """
181 |         cache_file = os.path.join(self.cache_path,
182 |                 '{:s}_selective_search_IJCV_top_{:d}_roidb.pkl'.
183 |                 format(self.name, self.config['top_k']))
184 | 
185 |         if os.path.exists(cache_file):
186 |             with open(cache_file, 'rb') as fid:
187 |                 roidb = cp.load(fid)
188 |             print ('{} ss roidb loaded from {}'.format(self.name, cache_file))
189 |             return roidb
190 | 
191 |         gt_roidb = self.gt_roidb()
192 |         ss_roidb = self._load_selective_search_IJCV_roidb(gt_roidb)
193 |         roidb = imdb.merge_roidbs(gt_roidb, ss_roidb)
194 |         with open(cache_file, 'wb') as fid:
195 |             cp.dump(roidb, fid, cp.HIGHEST_PROTOCOL)
196 |         print ('wrote ss roidb to {}'.format(cache_file))
197 | 
198 |         return roidb
199 | 
200 |     def _load_selective_search_IJCV_roidb(self, gt_roidb):
201 |         IJCV_path = os.path.abspath(os.path.join(self.cache_path, '..',
202 |                                                  'selective_search_IJCV_data',
203 |                                                  'voc_' + self._year))
204 |         assert os.path.exists(IJCV_path), \
205 |                'Selective search IJCV data not found at: {}'.format(IJCV_path)
206 | 
207 |         top_k = self.config['top_k']
208 |         box_list = []
209 |         for i in range(self.num_images):
210 |             filename = os.path.join(IJCV_path, self.image_index[i] + '.mat')
211 |             raw_data = sio.loadmat(filename)
212 |             box_list.append((raw_data['boxes'][:top_k, :]-1).astype(np.uint16))
213 | 
214 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
215 | 
216 |     def _load_pascal_annotation(self, index):
217 |         """
218 |         Load image and bounding boxes info from XML file in the PASCAL VOC
219 |         format.
220 |         """
221 |         filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
222 |         # print ('Loading: {}'.format(filename))
223 |         def get_data_from_tag(node, tag):
224 |             return node.getElementsByTagName(tag)[0].childNodes[0].data
225 | 
226 |         with open(filename) as f:
227 |             data = minidom.parseString(f.read())
228 | 
229 |         objs = data.getElementsByTagName('object')
230 |         num_objs = len(objs)
231 | 
232 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
233 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
234 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
235 | 
236 |         # Load object bounding boxes into a data frame.
237 |         for ix, obj in enumerate(objs):
238 |             # Make pixel indexes 0-based
239 |             x1 = float(get_data_from_tag(obj, 'xmin')) - 1
240 |             y1 = float(get_data_from_tag(obj, 'ymin')) - 1
241 |             x2 = float(get_data_from_tag(obj, 'xmax')) - 1
242 |             y2 = float(get_data_from_tag(obj, 'ymax')) - 1
243 |             cls = self._class_to_ind[
244 |                     str(get_data_from_tag(obj, "name")).lower().strip()]
245 |             boxes[ix, :] = [x1, y1, x2, y2]
246 |             gt_classes[ix] = cls
247 |             overlaps[ix, cls] = 1.0
248 | 
249 |         overlaps = scipy.sparse.csr_matrix(overlaps)
250 | 
251 |         return {'boxes' : boxes,
252 |                 'gt_classes': gt_classes,
253 |                 'gt_overlaps' : overlaps,
254 |                 'flipped' : False}
255 | 
256 |     def _write_voc_results_file(self, all_boxes, output_dir):
257 |         comp_id = 'comp4'
258 |         if self.config['use_salt']:
259 |             comp_id += '-{}'.format(os.getpid())
260 | 
261 |         for cls_ind, cls in enumerate(self.classes):
262 |             if cls == '__background__':
263 |                 continue
264 |             print ('Writing {} VOC results file'.format(cls))
265 |             filename = self._get_voc_results_file_template(output_dir).format(cls)
266 |             with open(filename, 'wt') as f:
267 |                 for im_ind, index in enumerate(self.image_index):
268 |                     dets = all_boxes[cls_ind][im_ind]
269 |                     if dets == []:
270 |                         continue
271 |                     # the VOCdevkit expects 1-based indices
272 |                     for k in range(dets.shape[0]):
273 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
274 |                                 format(index, dets[k, -1],
275 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
276 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
277 |         return comp_id
278 | 
279 |     def evaluate_detections(self, all_boxes, output_dir, boUsePythonImpl = True, use_07_metric = False):
280 |         self._write_voc_results_file(all_boxes, output_dir)
281 |         if not boUsePythonImpl:
282 |             self._do_matlab_eval(comp_id, output_dir)
283 |         else:
284 |             self._do_python_eval(output_dir, use_07_metric)
285 |         return []
286 | 
287 |     def _do_matlab_eval(self, comp_id, output_dir='output'):
288 |         rm_results = self.config['cleanup']
289 | 
290 |         path = os.path.join(os.path.dirname(__file__),
291 |                             'VOCdevkit-matlab-wrapper')
292 |         cmd = 'cd {} && '.format(path)
293 |         cmd += '{:s} -nodisplay -nodesktop '.format(datasets.MATLAB)
294 |         cmd += '-r "dbstop if error; '
295 |         cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\',{:d}); quit;"' \
296 |                .format(self._devkit_path, comp_id,
297 |                        self._image_set, output_dir, int(rm_results))
298 |         print('Running:\n{}'.format(cmd))
299 |         status = subprocess.call(cmd, shell=True)
300 | 
301 |     def competition_mode(self, on):
302 |         if on:
303 |             self.config['use_salt'] = False
304 |             self.config['cleanup'] = False
305 |         else:
306 |             self.config['use_salt'] = True
307 |             self.config['cleanup'] = True
308 | 
309 |     #########################################################################
310 |     # Python evaluation functions (copied from faster-RCNN)
311 |     ##########################################################################
312 |     def _get_voc_results_file_template(self, evalDir):
313 |         if not os.path.exists(evalDir):
314 |             os.makedirs(evalDir)
315 |         filename = self._image_set + '_{:s}.txt'
316 |         return os.path.join(evalDir, filename)
317 | 
318 |     def _do_python_eval(self, output_dir='output', use_07_metric=None):
319 |         annopath = os.path.join(self._devkit_path, 'VOC' + self._year, 'Annotations', '{}.xml')
320 |         imagesetfile = os.path.join(
321 |             self._devkit_path,
322 |             'VOC' + self._year,
323 |             'ImageSets',
324 |             'Main',
325 |             self._image_set + '.txt')
326 |         aps = []
327 |         # The PASCAL VOC metric changed in 2010
328 |         if use_07_metric == None:
329 |             use_07_metric = True if int(self._year) < 2010 else False
330 | 
331 |         print ('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
332 |         if not os.path.isdir(output_dir):
333 |             os.mkdir(output_dir)
334 |         for i, cls in enumerate(self._classes):
335 |             if cls == '__background__':
336 |                 continue
337 |             filename = self._get_voc_results_file_template(output_dir).format(cls)
338 | 
339 |             rec, prec, ap = voc_eval(
340 |                 filename, annopath, imagesetfile, cls, cachedir = output_dir, ovthresh=0.5,
341 |                 use_07_metric=use_07_metric)
342 |             aps += [ap]
343 |             print('AP for {} = {:.4f}'.format(cls, ap))
344 |             with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
345 |                 cp.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
346 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
347 |         # print('~~~~~~~~')
348 |         # print('Results:')
349 |         # for ap in aps:
350 |         #     print('{:.3f}'.format(ap))
351 |         # print('{:.3f}'.format(np.mean(aps)))
352 |         # print('~~~~~~~~')
353 |         # print('')
354 |         print('--------------------------------------------------------------')
355 |         print('Results computed with the **unofficial** Python eval code.')
356 |         print('Results should be very close to the official MATLAB eval code.')
357 |         print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
358 |         print('-- Thanks, The Management')
359 |     print('--------------------------------------------------------------')
360 | 
361 | if __name__ == '__main__':
362 |     d = datasets.pascal_voc('trainval', '2007')
363 |     res = d.roidb
364 |     from IPython import embed; embed()


--------------------------------------------------------------------------------
/fastRCNN/test.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Test a Fast R-CNN network on an imdb (image database)."""
  9 | 
 10 | #from config import cfg #, get_output_dir
 11 | #from blob import im_list_to_blob
 12 | from __future__ import print_function
 13 | import os, sys, cv2, numpy as np, pickle as cp, heapq
 14 | from .nms import nms as nmsPython
 15 | from .timer import Timer
 16 | from helpers import im_detect, apply_nms
 17 | from builtins import range
 18 | import pdb
 19 | 
 20 | # if sys.version_info[0] < 3:
 21 | #     from utils2_win64.cython_nms import nms
 22 | # else:
 23 | #     from .utils3_win64.cython_nms import nms
 24 | 
 25 | if sys.version_info[1] == 4 and sys.version_info[0] == 3:
 26 |     from .utils34_win64.cython_nms import nms
 27 | elif sys.version_info[1] == 5 and sys.version_info[0] == 3:
 28 |     from .utils35_win64.cython_nms import nms
 29 | else:
 30 |     print("ERROR: Python version {} not supported".format(sys.version_info))
 31 |     error
 32 | 
 33 | 
 34 | 
 35 | def _get_image_blob(im):
 36 |     """Converts an image into a network input.
 37 | 
 38 |     Arguments:
 39 |         im (ndarray): a color image in BGR order
 40 | 
 41 |     Returns:
 42 |         blob (ndarray): a data blob holding an image pyramid
 43 |         im_scale_factors (list): list of image scales (relative to im) used
 44 |             in the image pyramid
 45 |     """
 46 |     im_orig = im.astype(np.float32, copy=True)
 47 |     im_orig -= cfg.PIXEL_MEANS
 48 | 
 49 |     im_shape = im_orig.shape
 50 |     im_size_min = np.min(im_shape[0:2])
 51 |     im_size_max = np.max(im_shape[0:2])
 52 | 
 53 |     processed_ims = []
 54 |     im_scale_factors = []
 55 | 
 56 |     for target_size in cfg.TEST.SCALES:
 57 |         im_scale = float(target_size) / float(im_size_min)
 58 |         # Prevent the biggest axis from being more than MAX_SIZE
 59 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
 60 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
 61 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 62 |                         interpolation=cv2.INTER_LINEAR)
 63 |         im_scale_factors.append(im_scale)
 64 |         processed_ims.append(im)
 65 | 
 66 |     # Create a blob to hold the input images
 67 |     blob = im_list_to_blob(processed_ims)
 68 | 
 69 |     return blob, np.array(im_scale_factors)
 70 | 
 71 | def _get_rois_blob(im_rois, im_scale_factors):
 72 |     """Converts RoIs into network inputs.
 73 | 
 74 |     Arguments:
 75 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 76 |         im_scale_factors (list): scale factors as returned by _get_image_blob
 77 | 
 78 |     Returns:
 79 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid
 80 |     """
 81 |     rois, levels = _project_im_rois(im_rois, im_scale_factors)
 82 |     rois_blob = np.hstack((levels, rois))
 83 |     return rois_blob.astype(np.float32, copy=False)
 84 | 
 85 | def _project_im_rois(im_rois, scales):
 86 |     """Project image RoIs into the image pyramid built by _get_image_blob.
 87 | 
 88 |     Arguments:
 89 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
 90 |         scales (list): scale factors as returned by _get_image_blob
 91 | 
 92 |     Returns:
 93 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
 94 |         levels (list): image pyramid levels used by each projected RoI
 95 |     """
 96 |     im_rois = im_rois.astype(np.float, copy=False)
 97 | 
 98 |     if len(scales) > 1:
 99 |         widths = im_rois[:, 2] - im_rois[:, 0] + 1
100 |         heights = im_rois[:, 3] - im_rois[:, 1] + 1
101 | 
102 |         areas = widths * heights
103 |         scaled_areas = areas[:, np.newaxis] * (scales[np.newaxis, :] ** 2)
104 |         diff_areas = np.abs(scaled_areas - 224 * 224)
105 |         levels = diff_areas.argmin(axis=1)[:, np.newaxis]
106 |     else:
107 |         levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
108 | 
109 |     rois = im_rois * scales[levels]
110 | 
111 |     return rois, levels
112 | 
113 | def _get_blobs(im, rois):
114 |     """Convert an image and RoIs within that image into network inputs."""
115 |     blobs = {'data' : None, 'rois' : None}
116 |     blobs['data'], im_scale_factors = _get_image_blob(im)
117 |     blobs['rois'] = _get_rois_blob(rois, im_scale_factors)
118 |     return blobs, im_scale_factors
119 | 
120 | def _bbox_pred(boxes, box_deltas):
121 |     """Transform the set of class-agnostic boxes into class-specific boxes
122 |     by applying the predicted offsets (box_deltas)
123 |     """
124 |     if boxes.shape[0] == 0:
125 |         return np.zeros((0, box_deltas.shape[1]))
126 | 
127 |     boxes = boxes.astype(np.float, copy=False)
128 |     widths = boxes[:, 2] - boxes[:, 0] + cfg.EPS
129 |     heights = boxes[:, 3] - boxes[:, 1] + cfg.EPS
130 |     ctr_x = boxes[:, 0] + 0.5 * widths
131 |     ctr_y = boxes[:, 1] + 0.5 * heights
132 | 
133 |     dx = box_deltas[:, 0::4]
134 |     dy = box_deltas[:, 1::4]
135 |     dw = box_deltas[:, 2::4]
136 |     dh = box_deltas[:, 3::4]
137 | 
138 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
139 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
140 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
141 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
142 | 
143 |     pred_boxes = np.zeros(box_deltas.shape)
144 |     # x1
145 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
146 |     # y1
147 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
148 |     # x2
149 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
150 |     # y2
151 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
152 | 
153 |     return pred_boxes
154 | 
155 | def _clip_boxes(boxes, im_shape):
156 |     """Clip boxes to image boundaries."""
157 |     # x1 >= 0
158 |     boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
159 |     # y1 >= 0
160 |     boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
161 |     # x2 < im_shape[1]
162 |     boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
163 |     # y2 < im_shape[0]
164 |     boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
165 |     return boxes
166 | 
167 | # def im_detect(net, im, boxes):
168 | #     """Detect object classes in an image given object proposals.
169 | #
170 | #     Arguments:
171 | #         net (caffe.Net): Fast R-CNN network to use
172 | #         im (ndarray): color image to test (in BGR order)
173 | #         boxes (ndarray): R x 4 array of object proposals
174 | #
175 | #     Returns:
176 | #         scores (ndarray): R x K array of object class scores (K includes
177 | #             background as object category 0)
178 | #         boxes (ndarray): R x (4*K) array of predicted bounding boxes
179 | #     """
180 | #     blobs, unused_im_scale_factors = _get_blobs(im, boxes)
181 | #
182 | #     # When mapping from image ROIs to feature map ROIs, there's some aliasing
183 | #     # (some distinct image ROIs get mapped to the same feature ROI).
184 | #     # Here, we identify duplicate feature ROIs, so we only compute features
185 | #     # on the unique subset.
186 | #     if cfg.DEDUP_BOXES > 0:
187 | #         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
188 | #         hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
189 | #         _, index, inv_index = np.unique(hashes, return_index=True,
190 | #                                         return_inverse=True)
191 | #         blobs['rois'] = blobs['rois'][index, :]
192 | #         boxes = boxes[index, :]
193 | #
194 | #     # reshape network inputs
195 | #     net.blobs['data'].reshape(*(blobs['data'].shape))
196 | #     net.blobs['rois'].reshape(*(blobs['rois'].shape))
197 | #     blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False),
198 | #                             rois=blobs['rois'].astype(np.float32, copy=False))
199 | #     if cfg.TEST.SVM:
200 | #         # use the raw scores before softmax under the assumption they
201 | #         # were trained as linear SVMs
202 | #         scores = net.blobs['cls_score'].data
203 | #     else:
204 | #         # use softmax estimated probabilities
205 | #         scores = blobs_out['cls_prob']
206 | #
207 | #     if cfg.TEST.BBOX_REG:
208 | #         # Apply bounding-box regression deltas
209 | #         box_deltas = blobs_out['bbox_pred']
210 | #         pred_boxes = _bbox_pred(boxes, box_deltas)
211 | #         pred_boxes = _clip_boxes(pred_boxes, im.shape)
212 | #     else:
213 | #         # Simply repeat the boxes, once for each class
214 | #         pred_boxes = np.tile(boxes, (1, scores.shape[1]))
215 | #
216 | #     if cfg.DEDUP_BOXES > 0:
217 | #         # Map scores and predictions back to the original set of boxes
218 | #         scores = scores[inv_index, :]
219 | #         pred_boxes = pred_boxes[inv_index, :]
220 | #
221 | #     return scores, pred_boxes
222 | 
223 | def vis_detections(im, class_name, dets, thresh=0.3):
224 |     """Visual debugging of detections."""
225 |     import matplotlib.pyplot as plt
226 |     im = im[:, :, (2, 1, 0)]
227 |     for i in range(np.minimum(10, dets.shape[0])):
228 |         bbox = dets[i, :4]
229 |         score = dets[i, -1]
230 |         if score > thresh:
231 |             plt.cla()
232 |             plt.imshow(im)
233 |             plt.gca().add_patch(
234 |                 plt.Rectangle((bbox[0], bbox[1]),
235 |                               bbox[2] - bbox[0],
236 |                               bbox[3] - bbox[1], fill=False,
237 |                               edgecolor='g', linewidth=3)
238 |                 )
239 |             plt.title('{}  {:.3f}'.format(class_name, score))
240 |             plt.show()
241 | 
242 | 
243 | 
244 | # TODO: MOVE THIS TO CNTK HELPERS
245 | # def test_net_noThreshold():
246 | #     #boxes = roidb[i]['boxes']
247 | #     scores, _, _ = im_detect(net, i, boxes, feature_scale=feature_scale, classifier=classifier)
248 | #
249 | #     for j in range(1, imdb.num_classes):
250 | #         inds = np.where(roidb[i]['gt_classes'] == 0)[0]
251 | #         cls_scores = scores[inds, j]
252 | #         cls_boxes = roidb[i]['boxes'][inds]
253 | #         all_boxes[j][i] = \
254 | #             np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
255 | #                 .astype(np.float32, copy=False)
256 | #
257 | 
258 | 
259 | def test_net(net, imdb, output_dir, feature_scale, classifier = 'svm', nmsThreshold = 0.3,
260 |              boUsePythonImpl = False, boThresholdDetections = True, boApplyNms = True,
261 |              overlapThreshold=0.5):
262 |     """Test a Fast R-CNN network on an image database."""
263 |     num_images = len(imdb.image_index)
264 |     # heuristic: keep an average of 40 detections per class per images prior
265 |     # to NMS
266 |     max_per_set = 40 * num_images
267 |     # heuristic: keep at most 100 detection per class per image prior to NMS
268 |     max_per_image = 100
269 |     # detection thresold for each class (this is adaptively set based on the
270 |     # max_per_set constraint)
271 |     thresh = -np.inf * np.ones(imdb.num_classes)
272 |     # top_scores will hold one minheap of scores per class (used to enforce
273 |     # the max_per_set constraint)
274 |     top_scores = [[] for _ in range(imdb.num_classes)]
275 |     # all detections are collected into:
276 |     #    all_boxes[cls][image] = N x 5 array of detections in
277 |     #    (x1, y1, x2, y2, score)
278 |     all_boxes = [[[] for _ in range(num_images)]
279 |                  for _ in range(imdb.num_classes)]
280 | 
281 |     #output_dir = get_output_dir(imdb, net)
282 | 
283 |     # timers
284 |     _t = {'im_detect' : Timer(), 'misc' : Timer()}
285 |     roidb = imdb.roidb
286 | 
287 |     if not boThresholdDetections:
288 |         for i in range(num_images):
289 |             if i % 100 == 0:
290 |                 print ("   Processing image {} of {}..".format(i, num_images))
291 |             scores, _, _ = im_detect(net, i, roidb[i]['boxes'], feature_scale=feature_scale, classifier=classifier)
292 | 
293 |             for j in range(1, imdb.num_classes):
294 |                 inds = np.where(roidb[i]['gt_classes'] == 0)[0]
295 |                 cls_scores = scores[inds, j]
296 |                 cls_boxes = roidb[i]['boxes'][inds]
297 |                 all_boxes[j][i] = \
298 |                     np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
299 |                         .astype(np.float32, copy=False)
300 | 
301 |     else:
302 |         for i in range(num_images):
303 |             if i % 100 == 0:
304 |                 print ("   Processing image {} of {}..".format(i, num_images))
305 |             #im = cv2.imread(imdb.image_path_at(i))
306 |             #_t['im_detect'].tic()
307 |             scores, _, _ = im_detect(net, i, roidb[i]['boxes'], feature_scale = feature_scale, classifier = classifier)
308 |             #_t['im_detect'].toc()
309 | 
310 |             _t['misc'].tic()
311 |             for j in range(1, imdb.num_classes):
312 |                 #only get detections with high scores AND exclude ground truth ROIs
313 |                 inds = np.where((scores[:, j] > thresh[j]) &
314 |                                 (roidb[i]['gt_classes'] == 0))[0]
315 |                 cls_scores = scores[inds, j]
316 | 
317 |                 # cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
318 |                 boxes = roidb[i]['boxes']
319 |                 cls_boxes = boxes[inds]
320 | 
321 |                 top_inds = np.argsort(-cls_scores)[:max_per_image]
322 |                 cls_scores = cls_scores[top_inds]
323 |                 cls_boxes = cls_boxes[top_inds, :]
324 |                 # push new scores onto the minheap
325 |                 for val in cls_scores:
326 |                     heapq.heappush(top_scores[j], val)
327 |                 # if we've collected more than the max number of detection,
328 |                 # then pop items off the minheap and update the class threshold
329 |                 if len(top_scores[j]) > max_per_set:
330 |                     while len(top_scores[j]) > max_per_set:
331 |                         heapq.heappop(top_scores[j])
332 |                     thresh[j] = top_scores[j][0]
333 | 
334 |                 all_boxes[j][i] = \
335 |                         np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
336 |                         .astype(np.float32, copy=False)
337 | 
338 |                 #visualize rois
339 |                 if False and i == 6 and j == 15:
340 |                     im = cv2.imread(imdb.image_path_at(i))
341 |                     if boUsePythonImpl:
342 |                         nms_boxes, nms_keepIndices = apply_nms(all_boxes, nmsThreshold, boUsePythonImpl = True)
343 |                         keep = nms_keepIndices[j][i]
344 |                     else:
345 |                         keep = nms(all_boxes[j][i], 0.3)
346 |                     #vis_detections(im, imdb.classes[j], all_boxes[j][i])
347 |                     vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) #, thres=-10.0)
348 |             _t['misc'].toc()
349 | 
350 |             # print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
351 |             #       .format(i + 1, num_images, _t['im_detect'].average_time,
352 |             #               _t['misc'].average_time))
353 | 
354 |         # for j in range(1, imdb.num_classes):
355 |         #     thresh[j] = max(0.5, thresh[j])
356 |         #     print("thresh[{}] = {}".format(j, thresh[j]))
357 | 
358 |         #keep only the boxes with highest score for each class
359 |         #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coord+score
360 |         for j in range(1, imdb.num_classes):
361 |             for i in range(num_images):
362 |                 inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0]
363 |                 if len(inds) == 0:
364 |                     all_boxes[j][i] = []
365 |                 else:
366 |                     all_boxes[j][i] = all_boxes[j][i][inds, :]
367 | 
368 |     if output_dir:
369 |         det_file = os.path.join(output_dir, 'detections.pkl')
370 |         with open(det_file, 'wb') as f:
371 |             cp.dump(all_boxes, f, cp.HIGHEST_PROTOCOL)
372 | 
373 |     if boApplyNms:
374 |         print ("Number of rois before non-maxima surpression: %d" % sum([len(all_boxes[i][j]) for i in range(imdb.num_classes) for j in range(imdb.num_images)]))
375 |         nms_dets,_ = apply_nms(all_boxes, nmsThreshold, boUsePythonImpl)
376 |         print ("Number of rois  after non-maxima surpression: %d" % sum([len(nms_dets[i][j]) for i in range(imdb.num_classes) for j in range(imdb.num_images)]))
377 |     else:
378 |         print ("Skipping non-maxima surpression")
379 |         nms_dets = all_boxes
380 | 
381 |     print ('Evaluating detections')
382 |     return imdb.evaluate_detections(nms_dets, output_dir, overlapThreshold)
383 | 


--------------------------------------------------------------------------------
/fastRCNN/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/fastRCNN/train_svms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # --------------------------------------------------------
  4 | # Fast R-CNN
  5 | # Copyright (c) 2015 Microsoft
  6 | # Licensed under The MIT License [see LICENSE for details]
  7 | # Written by Ross Girshick
  8 | # --------------------------------------------------------
  9 | 
 10 | """
 11 | Train post-hoc SVMs using the algorithm and hyper-parameters from
 12 | traditional R-CNN.
 13 | """
 14 | 
 15 | from .timer import Timer
 16 | from sklearn import svm
 17 | import numpy as np
 18 | 
 19 | 
 20 | 
 21 | #################################################
 22 | # Slightly modified SVM training functions
 23 | #################################################
 24 | class SVMTrainer(object):
 25 |     """
 26 |     Trains post-hoc detection SVMs for all classes using the algorithm
 27 |     and hyper-parameters of traditional R-CNN.
 28 |     """
 29 | 
 30 |     def __init__(self, net, imdb, im_detect, svmWeightsPath, svmBiasPath, svmFeatScalePath,
 31 |                  svm_C, svm_B, svm_nrEpochs, svm_retrainLimit, svm_evictThreshold, svm_posWeight,
 32 |                  svm_targetNorm, svm_penality, svm_loss, svm_rngSeed):
 33 |         self.net = net
 34 |         self.imdb = imdb
 35 |         self.im_detect = im_detect
 36 |         self.svm_nrEpochs = svm_nrEpochs
 37 |         self.svm_targetNorm = svm_targetNorm
 38 |         self.svmWeightsPath = svmWeightsPath
 39 |         self.svmBiasPath = svmBiasPath
 40 |         self.svmFeatScalePath = svmFeatScalePath
 41 |         self.layer = 'fc7'
 42 |         self.hard_thresh = -1.0001
 43 |         self.neg_iou_thresh = 0.3
 44 |         dim = net.params['cls_score'][0].data.shape[1]
 45 |         self.feature_scale = self._get_feature_scale()
 46 |         print('Feature dim: {}'.format(dim))
 47 |         print('Feature scale: {:.3f}'.format(self.feature_scale))
 48 |         self.trainers = [SVMClassTrainer(cls, dim, self.feature_scale, svm_C, svm_B, svm_posWeight, svm_penality, svm_loss,
 49 |                                          svm_rngSeed, svm_retrainLimit, svm_evictThreshold) for cls in imdb.classes]
 50 | 
 51 | 
 52 |     def _get_feature_scale(self, num_images=100):
 53 |         _t = Timer()
 54 |         roidb = self.imdb.roidb
 55 |         total_norm = 0.0
 56 |         total_sum = 0.0
 57 |         count = 0.0
 58 |         num_images = min(num_images, self.imdb.num_images)
 59 |         inds = np.random.choice(range(self.imdb.num_images), size=num_images, replace=False)
 60 | 
 61 |         for i_, i in enumerate(inds):
 62 |             #im = cv2.imread(self.imdb.image_path_at(i))
 63 |             #if roidb[i]['flipped']:
 64 |             #    im = im[:, ::-1, :]
 65 |             #im = self.imdb.image_path_at(i)
 66 |             _t.tic()
 67 |             scores, boxes, feat = self.im_detect(self.net, i, roidb[i]['boxes'], boReturnClassifierScore = False)
 68 |             _t.toc()
 69 |             #feat = self.net.blobs[self.layer].data
 70 |             total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum()
 71 |             total_sum += 1.0 * sum(sum(feat)) / len(feat)
 72 |             count += feat.shape[0]
 73 |             print('{}/{}: avg feature norm: {:.3f}, average value: {:.3f}'.format(i_ + 1, num_images,
 74 |                                                            total_norm / count, total_sum / count))
 75 | 
 76 |         return self.svm_targetNorm * 1.0 / (total_norm / count)
 77 | 
 78 |     def _get_pos_counts(self):
 79 |         counts = np.zeros((len(self.imdb.classes)), dtype=np.int)
 80 |         roidb = self.imdb.roidb
 81 |         for i in range(len(roidb)):
 82 |             for j in range(1, self.imdb.num_classes):
 83 |                 I = np.where(roidb[i]['gt_classes'] == j)[0]
 84 |                 counts[j] += len(I)
 85 | 
 86 |         for j in range(1, self.imdb.num_classes):
 87 |             print('class {:s} has {:d} positives'.
 88 |                   format(self.imdb.classes[j], counts[j]))
 89 | 
 90 |         return counts
 91 | 
 92 |     def get_pos_examples(self):
 93 |         counts = self._get_pos_counts()
 94 |         for i in range(len(counts)):
 95 |             self.trainers[i].alloc_pos(counts[i])
 96 | 
 97 |         _t = Timer()
 98 |         roidb = self.imdb.roidb
 99 |         num_images = len(roidb)
100 |         for i in range(num_images):
101 |             #im = cv2.imread(self.imdb.image_path_at(i))
102 |             #if roidb[i]['flipped']:
103 |             #    im = im[:, ::-1, :]
104 |             #im = self.imdb.image_path_at(i)
105 |             gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0]
106 |             gt_boxes = roidb[i]['boxes'][gt_inds]
107 |             _t.tic()
108 |             scores, boxes, feat = self.im_detect(self.net, i, gt_boxes, self.feature_scale, gt_inds, boReturnClassifierScore = False)
109 |             _t.toc()
110 |             #feat = self.net.blobs[self.layer].data
111 |             for j in range(1, self.imdb.num_classes):
112 |                 cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0]
113 |                 if len(cls_inds) > 0:
114 |                     cls_feat = feat[cls_inds, :]
115 |                     self.trainers[j].append_pos(cls_feat)
116 |             if i % 50 == 0:
117 |                 print('get_pos_examples: {:d}/{:d} {:.3f}s' \
118 |                       .format(i + 1, len(roidb), _t.average_time))
119 | 
120 |     def initialize_net(self):
121 |         # Start all SVM parameters at zero
122 |         self.net.params['cls_score'][0].data[...] = 0
123 |         self.net.params['cls_score'][1].data[...] = 0
124 | 
125 |         # Initialize SVMs in a smart way. Not doing this because its such
126 |         # a good initialization that we might not learn something close to
127 |         # the SVM solution.
128 |     #        # subtract background weights and biases for the foreground classes
129 |     #        w_bg = self.net.params['cls_score'][0].data[0, :]
130 |     #        b_bg = self.net.params['cls_score'][1].data[0]
131 |     #        self.net.params['cls_score'][0].data[1:, :] -= w_bg
132 |     #        self.net.params['cls_score'][1].data[1:] -= b_bg
133 |     #        # set the background weights and biases to 0 (where they shall remain)
134 |     #        self.net.params['cls_score'][0].data[0, :] = 0
135 |     #        self.net.params['cls_score'][1].data[0] = 0
136 | 
137 |     def update_net(self, cls_ind, w, b):
138 |         self.net.params['cls_score'][0].data[cls_ind, :] = w
139 |         self.net.params['cls_score'][1].data[cls_ind] = b
140 | 
141 |     def train_with_hard_negatives(self):
142 |         _t = Timer()
143 |         roidb = self.imdb.roidb
144 |         num_images = len(roidb)
145 | 
146 |         for epoch in range(0,self.svm_nrEpochs):
147 | 
148 |             # num_images = 100
149 |             for i in range(num_images):
150 |                 print("*** EPOCH = %d, IMAGE = %d *** " % (epoch, i))
151 |                 #im = cv2.imread(self.imdb.image_path_at(i))
152 |                 #if roidb[i]['flipped']:
153 |                 #    im = im[:, ::-1, :]
154 |                 #im = self.imdb.image_path_at(i)
155 |                 _t.tic()
156 |                 scores, boxes, feat = self.im_detect(self.net, i, roidb[i]['boxes'], self.feature_scale)
157 |                 _t.toc()
158 |                 #feat = self.net.blobs[self.layer].data
159 |                 for j in range(1, self.imdb.num_classes):
160 |                     hard_inds = \
161 |                         np.where((scores[:, j] > self.hard_thresh) &
162 |                                  (roidb[i]['gt_overlaps'][:, j].toarray().ravel() <
163 |                                   self.neg_iou_thresh))[0]
164 |                     if len(hard_inds) > 0:
165 |                         hard_feat = feat[hard_inds, :].copy()
166 |                         new_w_b = \
167 |                             self.trainers[j].append_neg_and_retrain(feat=hard_feat)
168 |                         if new_w_b is not None:
169 |                             self.update_net(j, new_w_b[0], new_w_b[1])
170 |                             np.savetxt(self.svmWeightsPath[:-4]   + "_epoch" + str(epoch) + ".txt", self.net.params['cls_score'][0].data)
171 |                             np.savetxt(self.svmBiasPath[:-4]      + "_epoch" + str(epoch) + ".txt", self.net.params['cls_score'][1].data)
172 |                             np.savetxt(self.svmFeatScalePath[:-4] + "_epoch" + str(epoch) + ".txt", [self.feature_scale])
173 | 
174 |             print(('train_with_hard_negatives: '
175 |                    '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb),
176 |                                                _t.average_time))
177 | 
178 |     def train(self):
179 |         # Initialize SVMs using
180 |         #   a. w_i = fc8_w_i - fc8_w_0
181 |         #   b. b_i = fc8_b_i - fc8_b_0
182 |         #   c. Install SVMs into net
183 |         self.initialize_net()
184 | 
185 |         # Pass over roidb to count num positives for each class
186 |         #   a. Pre-allocate arrays for positive feature vectors
187 |         # Pass over roidb, computing features for positives only
188 |         self.get_pos_examples()
189 | 
190 |         # Pass over roidb
191 |         #   a. Compute cls_score with forward pass
192 |         #   b. For each class
193 |         #       i. Select hard negatives
194 |         #       ii. Add them to cache
195 |         #   c. For each class
196 |         #       i. If SVM retrain criteria met, update SVM
197 |         #       ii. Install new SVM into net
198 |         self.train_with_hard_negatives()
199 | 
200 |         # One final SVM retraining for each class
201 |         # Install SVMs into net
202 |         for j in range(1, self.imdb.num_classes):
203 |             new_w_b = self.trainers[j].append_neg_and_retrain(force=True)
204 |             self.update_net(j, new_w_b[0], new_w_b[1])
205 | 
206 |         #save svm
207 |         np.savetxt(self.svmWeightsPath,   self.net.params['cls_score'][0].data)
208 |         np.savetxt(self.svmBiasPath,      self.net.params['cls_score'][1].data)
209 |         np.savetxt(self.svmFeatScalePath, [self.feature_scale])
210 | 
211 | 
212 | class SVMClassTrainer(object):
213 |     """Manages post-hoc SVM training for a single object class."""
214 | 
215 |     def __init__(self, cls, dim, feature_scale,
216 |                  C, B, pos_weight, svm_penality, svm_loss, svm_rngSeed, svm_retrainLimit, svm_evictThreshold):
217 |         self.pos = np.zeros((0, dim), dtype=np.float32)
218 |         self.neg = np.zeros((0, dim), dtype=np.float32)
219 |         self.B = B
220 |         self.C = C
221 |         self.cls = cls
222 |         self.pos_weight = pos_weight
223 |         self.dim = dim
224 |         self.feature_scale = feature_scale
225 |         if type(pos_weight) == str:  #e.g. pos_weight == 'auto'
226 |             class_weight = pos_weight
227 |         else:
228 |             class_weight = {1: pos_weight, -1: 1}
229 | 
230 |         self.svm = svm.LinearSVC(C=C, class_weight=class_weight,
231 |                                  intercept_scaling=B, verbose=1,
232 |                                  penalty=svm_penality, loss=svm_loss,
233 |                                  random_state=svm_rngSeed, dual=True)
234 | 
235 |         self.pos_cur = 0
236 |         self.num_neg_added = 0
237 |         self.retrain_limit = svm_retrainLimit
238 |         self.evict_thresh = svm_evictThreshold
239 |         self.loss_history = []
240 | 
241 |     def alloc_pos(self, count):
242 |         self.pos_cur = 0
243 |         self.pos = np.zeros((count, self.dim), dtype=np.float32)
244 | 
245 |     def append_pos(self, feat):
246 |         num = feat.shape[0]
247 |         self.pos[self.pos_cur:self.pos_cur + num, :] = feat
248 |         self.pos_cur += num
249 | 
250 |     def train(self):
251 |         print('>>> Updating {} detector <<<'.format(self.cls))
252 |         num_pos = self.pos.shape[0]
253 |         num_neg = self.neg.shape[0]
254 |         print('Cache holds {} pos examples and {} neg examples'.
255 |               format(num_pos, num_neg))
256 |         X = np.vstack((self.pos, self.neg)) * self.feature_scale
257 |         y = np.hstack((np.ones(num_pos),
258 |                        -np.ones(num_neg)))
259 |         self.svm.fit(X, y)
260 |         w = self.svm.coef_
261 |         b = self.svm.intercept_[0]
262 | 
263 |         scores = self.svm.decision_function(X)
264 |         pos_scores = scores[:num_pos]
265 |         neg_scores = scores[num_pos:]
266 | 
267 |         num_neg_wrong = sum(neg_scores > 0)
268 |         num_pos_wrong = sum(pos_scores < 0)
269 |         meanAcc = 0.5 * (num_pos - num_pos_wrong) / num_pos + 0.5*(num_neg - num_neg_wrong) / num_neg
270 |         if type(self.pos_weight) == str:
271 |             pos_loss = 0
272 |         else:
273 |             pos_loss = (self.C * self.pos_weight *
274 |                         np.maximum(0, 1 - pos_scores).sum())
275 |         neg_loss = self.C * np.maximum(0, 1 + neg_scores).sum()
276 |         reg_loss = 0.5 * np.dot(w.ravel(), w.ravel()) + 0.5 * b ** 2
277 |         tot_loss = pos_loss + neg_loss + reg_loss
278 |         self.loss_history.append((meanAcc, num_pos_wrong, num_pos, num_neg_wrong, num_neg, tot_loss, pos_loss, neg_loss, reg_loss))
279 |         for i, losses in enumerate(self.loss_history):
280 |             print(('    {:4d}: meanAcc={:.3f} -- pos wrong: {:5}/{:5}; neg wrong: {:5}/{:5};  '
281 |                    '     obj val: {:.3f} = {:.3f}  (posUnscaled) + {:.3f} (neg) + {:.3f} (reg)').format(i, *losses))
282 | 
283 |         # Sanity check
284 | 
285 |         scores_ret = (
286 |                          X * 1.0 / self.feature_scale).dot(w.T * self.feature_scale) + b
287 |         assert np.allclose(scores, scores_ret[:, 0], atol=1e-5), \
288 |                 "Scores from returned model don't match decision function"
289 | 
290 |         return ((w * self.feature_scale, b), pos_scores, neg_scores)
291 | 
292 |     def append_neg_and_retrain(self, feat=None, force=False):
293 |         if feat is not None:
294 |             num = feat.shape[0]
295 |             self.neg = np.vstack((self.neg, feat))
296 |             self.num_neg_added += num
297 |         if self.num_neg_added > self.retrain_limit or force:
298 |             self.num_neg_added = 0
299 |             new_w_b, pos_scores, neg_scores = self.train()
300 |             # scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1]
301 |             # easy_inds = np.where(neg_scores < self.evict_thresh)[0]
302 |             print('    Pruning easy negatives')
303 |             print('         before pruning: #neg = ' + str(len(self.neg)))
304 |             not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0]
305 |             if len(not_easy_inds) > 0:
306 |                 self.neg = self.neg[not_easy_inds, :]
307 |                 # self.neg = np.delete(self.neg, easy_inds)
308 |             print('         after pruning: #neg = ' + str(len(self.neg)))
309 |             print('    Cache holds {} pos examples and {} neg examples'.
310 |                   format(self.pos.shape[0], self.neg.shape[0]))
311 |             print('    {} pos support vectors'.format((pos_scores <= 1).sum()))
312 |             print('    {} neg support vectors'.format((neg_scores >= -1).sum()))
313 |             return new_w_b
314 |         else:
315 |             return None
316 | 


--------------------------------------------------------------------------------
/fastRCNN/utils34_win64/cython_bbox.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/fastRCNN/utils34_win64/cython_bbox.pyd


--------------------------------------------------------------------------------
/fastRCNN/utils34_win64/cython_nms.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/fastRCNN/utils34_win64/cython_nms.pyd


--------------------------------------------------------------------------------
/fastRCNN/utils35_win64/cython_bbox.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/fastRCNN/utils35_win64/cython_bbox.pyd


--------------------------------------------------------------------------------
/fastRCNN/utils35_win64/cython_nms.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/fastRCNN/utils35_win64/cython_nms.pyd


--------------------------------------------------------------------------------
/fastRCNN/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | from __future__ import print_function
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import pickle as cp
 11 | import numpy as np
 12 | 
 13 | def parse_rec(filename):
 14 |     """ Parse a PASCAL VOC xml file """
 15 |     tree = ET.parse(filename)
 16 |     objects = []
 17 |     for obj in tree.findall('object'):
 18 |         obj_struct = {}
 19 |         obj_struct['name'] = obj.find('name').text
 20 |         obj_struct['pose'] = obj.find('pose').text
 21 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 22 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 23 |         bbox = obj.find('bndbox')
 24 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 25 |                               int(bbox.find('ymin').text),
 26 |                               int(bbox.find('xmax').text),
 27 |                               int(bbox.find('ymax').text)]
 28 |         objects.append(obj_struct)
 29 | 
 30 |     return objects
 31 | 
 32 | def voc_ap(rec, prec, use_07_metric=False):
 33 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 34 |     Compute VOC AP given precision and recall.
 35 |     If use_07_metric is true, uses the
 36 |     VOC 07 11 point method (default:False).
 37 |     """
 38 |     if use_07_metric:
 39 |         # 11 point metric
 40 |         ap = 0.
 41 |         for t in np.arange(0., 1.1, 0.1):
 42 |             if np.sum(rec >= t) == 0:
 43 |                 p = 0
 44 |             else:
 45 |                 p = np.max(prec[rec >= t])
 46 |             ap = ap + p / 11.
 47 |     else:
 48 |         # correct AP calculation
 49 |         # first append sentinel values at the end
 50 |         mrec = np.concatenate(([0.], rec, [1.]))
 51 |         mpre = np.concatenate(([0.], prec, [0.]))
 52 | 
 53 |         # compute the precision envelope
 54 |         for i in range(mpre.size - 1, 0, -1):
 55 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 56 | 
 57 |         # to calculate area under PR curve, look for points
 58 |         # where X axis (recall) changes value
 59 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 60 | 
 61 |         # and sum (\Delta recall) * prec
 62 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 63 |     return ap
 64 | 
 65 | def voc_eval(detpath,
 66 |              annopath,
 67 |              imagesetfile,
 68 |              classname,
 69 |              cachedir,
 70 |              ovthresh=0.5,
 71 |              use_07_metric=False):
 72 |     """rec, prec, ap = voc_eval(detpath,
 73 |                                 annopath,
 74 |                                 imagesetfile,
 75 |                                 classname,
 76 |                                 [ovthresh],
 77 |                                 [use_07_metric])
 78 | 
 79 |     Top level function that does the PASCAL VOC evaluation.
 80 | 
 81 |     detpath: Path to detections
 82 |         detpath.format(classname) should produce the detection results file.
 83 |     annopath: Path to annotations
 84 |         annopath.format(imagename) should be the xml annotations file.
 85 |     imagesetfile: Text file containing the list of images, one image per line.
 86 |     classname: Category name (duh)
 87 |     cachedir: Directory for caching the annotations
 88 |     [ovthresh]: Overlap threshold (default = 0.5)
 89 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 90 |         (default False)
 91 |     """
 92 |     # assumes detections are in detpath.format(classname)
 93 |     # assumes annotations are in annopath.format(imagename)
 94 |     # assumes imagesetfile is a text file with each line an image name
 95 |     # cachedir caches the annotations in a pickle file
 96 | 
 97 |     # first load gt
 98 |     if cachedir:
 99 |         if not os.path.isdir(cachedir):
100 |             os.mkdir(cachedir)
101 |         cachefile = os.path.join(cachedir, 'annots.pkl')
102 |     # read list of images
103 |     with open(imagesetfile, 'r') as f:
104 |         lines = f.readlines()
105 |     imagenames = [x.strip() for x in lines]
106 | 
107 |     if not cachedir or not os.path.isfile(cachefile):
108 |         # load annots
109 |         recs = {}
110 |         for i, imagename in enumerate(imagenames):
111 |             recs[imagename] = parse_rec(annopath.format(imagename))
112 |             if i % 1000 == 0:
113 |                 print ('Reading annotation for {:d}/{:d}'.format(
114 |                     i + 1, len(imagenames)))
115 |         # save
116 |         if cachedir:
117 |             print ('Saving cached annotations to {:s}'.format(cachefile))
118 |             with open(cachefile, 'wb') as f:
119 |                 cp.dump(recs, f)
120 |     else:
121 |         # load
122 |         with open(cachefile, 'rb') as f:
123 |             recs = cp.load(f)
124 | 
125 |     # extract gt objects for this class
126 |     class_recs = {}
127 |     npos = 0
128 |     for imagename in imagenames:
129 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
130 |         bbox = np.array([x['bbox'] for x in R])
131 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
132 |         det = [False] * len(R)
133 |         npos = npos + sum(~difficult)
134 |         class_recs[imagename] = {'bbox': bbox,
135 |                                  'difficult': difficult,
136 |                                  'det': det}
137 | 
138 |     # read dets
139 |     detfile = detpath.format(classname)
140 |     with open(detfile, 'r') as f:
141 |         lines = f.readlines()
142 | 
143 |     splitlines = [x.strip().split(' ') for x in lines]
144 |     image_ids = [x[0] for x in splitlines]
145 |     confidence = np.array([float(x[1]) for x in splitlines])
146 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
147 | 
148 |     # sort by confidence
149 |     sorted_ind = np.argsort(-confidence)
150 |     sorted_scores = np.sort(-confidence)
151 | 
152 |     BB = BB[sorted_ind, :]
153 |     image_ids = [image_ids[x] for x in sorted_ind]
154 | 
155 |     # go down dets and mark TPs and FPs
156 |     nd = len(image_ids)
157 |     tp = np.zeros(nd)
158 |     fp = np.zeros(nd)
159 |     for d in range(nd):
160 |         R = class_recs[image_ids[d]]
161 |         bb = BB[d, :].astype(float)
162 |         ovmax = -np.inf
163 |         BBGT = R['bbox'].astype(float)
164 | 
165 |         if BBGT.size > 0:
166 |             # compute overlaps
167 |             # intersection
168 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
169 |             iymin = np.maximum(BBGT[:, 1], bb[1])
170 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
171 |             iymax = np.minimum(BBGT[:, 3], bb[3])
172 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
173 |             ih = np.maximum(iymax - iymin + 1., 0.)
174 |             inters = iw * ih
175 | 
176 |             # union
177 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 | 
181 |             overlaps = inters / uni
182 |             ovmax = np.max(overlaps)
183 |             jmax = np.argmax(overlaps)
184 | 
185 |         if ovmax > ovthresh:
186 |             if not R['difficult'][jmax]:
187 |                 if not R['det'][jmax]:
188 |                     tp[d] = 1.
189 |                     R['det'][jmax] = 1
190 |                 else:
191 |                     fp[d] = 1.
192 |         else:
193 |             fp[d] = 1.
194 | 
195 |     # compute precision recall
196 |     fp = np.cumsum(fp)
197 |     tp = np.cumsum(tp)
198 |     rec = tp / float(npos)
199 |     # avoid divide by zero in case the first detection matches a difficult
200 |     # ground truth
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec, use_07_metric)
203 | 
204 |     return rec, prec, ap
205 | 


--------------------------------------------------------------------------------
/helpers_cntk.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | from past.utils import old_div
  4 | 
  5 | import os, pdb, sys, numpy as np
  6 | from os.path import join
  7 | from helpers import readTable
  8 | 
  9 | from cntk import load_model, Trainer, UnitType, use_default_device, placeholder, constant, cross_entropy_with_softmax, classification_error
 10 | from cntk.device import use_default_device #default #gpu, set_default_device
 11 | from cntk.initializer import glorot_uniform
 12 | from cntk.io import MinibatchSource, ImageDeserializer, CTFDeserializer, StreamDefs, StreamDef
 13 | from cntk.io.transforms import scale
 14 | from cntk.learners import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule
 15 | from cntk.logging import log_number_of_parameters, ProgressPrinter, TensorBoardProgressWriter
 16 | from cntk.logging.graph import find_by_name, plot
 17 | from cntk.ops import input_variable, parameter, times, combine, roipooling
 18 | from cntk.ops.functions import CloneMethod
 19 | 
 20 | 
 21 | ####################################
 22 | # CNTK-python wrapper functions
 23 | ####################################
 24 | def create_mb_source(data_set, img_height, img_width, n_classes, n_rois, data_path, randomize):
 25 |     # set paths
 26 |     map_file   = join(data_path, data_set + '.txt')
 27 |     roi_file   = join(data_path, data_set + '.rois.txt')
 28 |     label_file = join(data_path, data_set + '.roilabels.txt')
 29 |     if not os.path.exists(map_file) or not os.path.exists(roi_file) or not os.path.exists(label_file):
 30 |         raise RuntimeError("File '%s', '%s' or '%s' does not exist. " % (map_file, roi_file, label_file))
 31 | 
 32 |     # read images
 33 |     nrImages = len(readTable(map_file))
 34 |     transforms = [scale(width=img_width, height=img_height, channels=3,
 35 |                         scale_mode="pad", pad_value=114, interpolations='linear')]
 36 |     image_source = ImageDeserializer(map_file, StreamDefs(features = StreamDef(field='image', transforms=transforms)))
 37 | 
 38 |     # read rois and labels
 39 |     rois_dim  = 4 * n_rois
 40 |     label_dim = n_classes * n_rois
 41 |     roi_source = CTFDeserializer(roi_file, StreamDefs(
 42 |         rois = StreamDef(field='rois', shape=rois_dim, is_sparse=False)))
 43 |     label_source = CTFDeserializer(label_file, StreamDefs(
 44 |         roiLabels = StreamDef(field='roiLabels', shape=label_dim, is_sparse=False)))
 45 | 
 46 |     # define a composite reader
 47 |     mb = MinibatchSource([image_source, roi_source, label_source], max_samples=sys.maxsize, randomize=randomize)
 48 |     return (mb, nrImages)
 49 | 
 50 | 
 51 | # Defines the Fast R-CNN network model for detecting objects in images
 52 | def frcn_predictor(features, rois, n_classes, base_path):
 53 |     # model specific variables for AlexNet
 54 |     model_file = base_path + "/../../../resources/cntk/AlexNet.model"
 55 |     roi_dim = 6
 56 |     feature_node_name = "features"
 57 |     last_conv_node_name = "conv5.y"
 58 |     pool_node_name = "pool3"
 59 |     last_hidden_node_name = "h2_d"
 60 | 
 61 |     # Load the pretrained classification net and find nodes
 62 |     print("Loading pre-trained model...")
 63 |     loaded_model = load_model(model_file)
 64 |     print("Loading pre-trained model... DONE.")
 65 |     feature_node = find_by_name(loaded_model, feature_node_name)
 66 |     conv_node    = find_by_name(loaded_model, last_conv_node_name)
 67 |     pool_node    = find_by_name(loaded_model, pool_node_name)
 68 |     last_node    = find_by_name(loaded_model, last_hidden_node_name)
 69 | 
 70 |     # Clone the conv layers and the fully connected layers of the network
 71 |     conv_layers = combine([conv_node.owner]).clone(CloneMethod.freeze, {feature_node: placeholder()})
 72 |     fc_layers   = combine([last_node.owner]).clone(CloneMethod.clone,  {pool_node: placeholder()})
 73 | 
 74 |     # Create the Fast R-CNN model
 75 |     feat_norm = features - constant(114)
 76 |     conv_out  = conv_layers(feat_norm)
 77 |     roi_out   = roipooling(conv_out, rois, (roi_dim, roi_dim))
 78 |     fc_out    = fc_layers(roi_out)
 79 |     #fc_out.set_name("fc_out")
 80 | 
 81 |     # z = Dense(rois[0], num_classes, map_rank=1)(fc_out)  # --> map_rank=1 is not yet supported
 82 |     W = parameter(shape=(4096, n_classes), init=glorot_uniform())
 83 |     b = parameter(shape=n_classes, init=0)
 84 |     z = times(fc_out, W) + b
 85 |     return z, fc_out
 86 | 
 87 | 
 88 | # Initialize and train a Fast R-CNN model
 89 | def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight,
 90 |                          momentum_time_constant, base_path, boSkipTraining = False, debug_output=False, tensorboardLogDir = None):
 91 | 
 92 |     #make sure we use GPU for training
 93 |     if use_default_device().type() == 0:
 94 |         print("WARNING: using CPU for training.")
 95 |     else:
 96 |         print("Using GPU for training.")
 97 | 
 98 |     # Instantiate the Fast R-CNN prediction model
 99 |     image_input = input_variable((3, image_height, image_width))
100 |     roi_input   = input_variable((num_rois, 4))
101 |     label_input = input_variable((num_rois, num_classes))
102 |     frcn_output, frcn_penultimateLayer = frcn_predictor(image_input, roi_input, num_classes, base_path)
103 | 
104 |     if boSkipTraining:
105 |         print("Using pre-trained DNN without refinement")
106 |         return frcn_penultimateLayer
107 | 
108 |     # Create the minibatch source and define mapping from reader streams to network inputs
109 |     minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois,
110 |                                                     base_path, randomize=True)
111 |     input_map = {
112 |         image_input: minibatch_source.streams.features,
113 |         roi_input: minibatch_source.streams.rois,
114 |         label_input: minibatch_source.streams.roiLabels
115 |     }
116 | 
117 |     # set loss / error functions
118 |     ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1)
119 |     pe = classification_error(frcn_output, label_input, axis=1)
120 |     if debug_output:
121 |         plot(frcn_output, "graph_frcn.png")
122 | 
123 |     # set the progress printer(s)
124 |     progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)]
125 |     if tensorboardLogDir != None:
126 |         tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboardLogDir, model=frcn_output)
127 |         progress_writers.append(tensorboard_writer)
128 | 
129 |     # Set learning parameters and instantiate the trainer object
130 |     lr_per_sample = [f/float(num_rois) for f in cntk_lr_per_image]
131 |     lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
132 |     mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
133 |     learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight)
134 |     trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers)
135 | 
136 |     # Get minibatches of images and perform model training
137 |     print("Training Fast R-CNN model for %s epochs." % max_epochs)
138 |     log_number_of_parameters(frcn_output)
139 |     for epoch in range(max_epochs):
140 |         sample_count = 0
141 | 
142 |         # loop over minibatches in the epoch
143 |         while sample_count < epoch_size:
144 |             data = minibatch_source.next_minibatch(min(mb_size, epoch_size - sample_count), input_map=input_map)
145 |             if sample_count % 100 == 1:
146 |                 print("Training in progress: epoch {} of {}, sample count {} of {}".format(epoch, max_epochs, sample_count, epoch_size))
147 |             trainer.train_minibatch(data)
148 |             sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
149 |         trainer.summarize_training_progress()
150 | 
151 |         # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed.
152 |         if tensorboardLogDir != None:
153 |             for parameter in frcn_output.parameters:
154 |                 tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch)
155 |                 tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch)
156 |                 tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch)
157 | 
158 |         if debug_output:
159 |             frcn_output.save_model("frcn_py_%s.model" % (epoch + 1))
160 |     return frcn_output
161 | 
162 | 
163 | def run_fast_rcnn(model, data_set, image_height, image_width, num_classes, num_rois, base_path, outDir):
164 |     # Create the minibatch source and define mapping from reader streams to network inputs
165 |     minibatch_source, num_images = create_mb_source(data_set, image_height, image_width, num_classes, num_rois, base_path, randomize=False)
166 |     input_map = {
167 |         model.arguments[0]: minibatch_source['features'],
168 |         model.arguments[1]: minibatch_source['rois']
169 |     }
170 | 
171 |     # evaluate test images and write to file
172 |     for imgIndex in range(0, num_images):
173 |         if imgIndex % 100 == 1:
174 |             print("Evaluating images {} of {}".format(imgIndex, num_images))
175 |         data = minibatch_source.next_minibatch(1, input_map=input_map)
176 |         output = model.eval(data)[0]
177 |         output = np.array(output, np.float32)
178 | 
179 |         # write to disk
180 |         if imgIndex % 100 == 1:
181 |             print("Writing DNN output of dimension {} to disk".format(output.shape))
182 |         outPath = outDir + str(imgIndex) + ".dat"
183 |         np.savez_compressed(outPath, output)


--------------------------------------------------------------------------------
/imdb_data.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | from __future__ import print_function
  9 | from builtins import range
 10 | import sys, os
 11 | from helpers import *
 12 | import scipy.sparse
 13 | import scipy.io as sio
 14 | import pickle as cp
 15 | import numpy as np
 16 | import fastRCNN
 17 | 
 18 | 
 19 | class imdb_data(fastRCNN.imdb):
 20 |     def __init__(self, image_set, classes, maxNrRois, imgDir, roiDir, cacheDir, boAddGroundTruthRois):
 21 |         fastRCNN.imdb.__init__(self, image_set + ".cache") #'data_' + image_set)
 22 |         self._image_set = image_set
 23 |         self._maxNrRois = maxNrRois
 24 |         self._imgDir = imgDir
 25 |         self._roiDir = roiDir
 26 |         self._cacheDir = cacheDir #cache_path
 27 |         self._imgSubdirs ={'train': ['positive', 'negative'], 'test': ['testImages']}
 28 |         self._classes = classes
 29 |         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
 30 |         self._image_ext = '.jpg'
 31 |         self._image_index, self._image_subdirs = self._load_image_set_index()
 32 |         self._roidb_handler = self.selective_search_roidb
 33 |         self._boAddGroundTruthRois = boAddGroundTruthRois
 34 | 
 35 | 
 36 |     #overwrite parent definition
 37 |     @property
 38 |     def cache_path(self):
 39 |         return self._cacheDir
 40 | 
 41 |     def image_path_at(self, i):
 42 |         """
 43 |         Return the absolute path to image i in the image sequence.
 44 |         """
 45 |         return self.image_path_from_index(self._image_subdirs[i], self._image_index[i])
 46 | 
 47 |     def image_path_from_index(self, subdir, fname):
 48 |         """
 49 |         Construct an image path from the image's "index" identifier.
 50 |         """
 51 |         image_path = os.path.join(self._imgDir, subdir, fname)
 52 |         assert os.path.exists(image_path), \
 53 |                 'Path does not exist: {}'.format(image_path)
 54 |         return image_path
 55 | 
 56 |     def _load_image_set_index(self):
 57 |         """
 58 |         Compile list of image indices and the subdirectories they are in.
 59 |         """
 60 |         image_index = []
 61 |         image_subdirs = []
 62 |         for subdir in self._imgSubdirs[self._image_set]:
 63 |             imgFilenames = getFilesInDirectory(os.path.join(self._imgDir,subdir), self._image_ext)
 64 |             image_index += imgFilenames
 65 |             image_subdirs += [subdir] * len(imgFilenames)
 66 |         return image_index, image_subdirs
 67 | 
 68 |     def gt_roidb(self):
 69 |         """
 70 |         Return the database of ground-truth regions of interest.
 71 | 
 72 |         This function loads/saves from/to a cache file to speed up future calls.
 73 |         """
 74 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
 75 |         if os.path.exists(cache_file):
 76 |             with open(cache_file, 'rb') as fid:
 77 |                 roidb = cp.load(fid)
 78 |             print ('{} gt roidb loaded from {}'.format(self.name, cache_file))
 79 |             return roidb
 80 | 
 81 |         gt_roidb = [self._load_annotation(i) for i in range(self.num_images)]
 82 |         with open(cache_file, 'wb') as fid:
 83 |             cp.dump(gt_roidb, fid, cp.HIGHEST_PROTOCOL)
 84 |         print ('wrote gt roidb to {}'.format(cache_file))
 85 | 
 86 |         return gt_roidb
 87 | 
 88 |     def selective_search_roidb(self):
 89 |         """
 90 |         Return the database of selective search regions of interest.
 91 |         Ground-truth ROIs are also included.
 92 | 
 93 |         This function loads/saves from/to a cache file to speed up future calls.
 94 |         """
 95 |         cache_file = os.path.join(self.cache_path,
 96 |                                   self.name + '_selective_search_roidb.pkl')
 97 | 
 98 |         if os.path.exists(cache_file):
 99 |             with open(cache_file, 'rb') as fid:
100 |                 if sys.version_info[0] < 3: 
101 |                     roidb = cp.load(fid)
102 |                 else: 
103 |                     roidb = cp.load(fid, encoding='latin1')
104 |             print ('{} ss roidb loaded from {}'.format(self.name, cache_file))
105 |             return roidb
106 | 
107 |         gt_roidb = self.gt_roidb()
108 |         ss_roidb = self._load_selective_search_roidb(gt_roidb)
109 | 
110 |         #add ground truth ROIs
111 |         if self._boAddGroundTruthRois:
112 |             roidb = self.merge_roidbs(gt_roidb, ss_roidb)
113 |         else:
114 |             roidb = ss_roidb
115 | 
116 |         #Keep max of e.g. 2000 rois
117 |         if self._maxNrRois and self._maxNrRois > 0:
118 |             print ("Only keeping the first %d ROIs.." % self._maxNrRois)
119 |             for i in range(self.num_images):
120 |                 gt_overlaps = roidb[i]['gt_overlaps']
121 |                 gt_overlaps = gt_overlaps.todense()[:self._maxNrRois]
122 |                 gt_overlaps = scipy.sparse.csr_matrix(gt_overlaps)
123 |                 roidb[i]['gt_overlaps'] = gt_overlaps
124 |                 roidb[i]['boxes'] = roidb[i]['boxes'][:self._maxNrRois,:]
125 |                 roidb[i]['gt_classes'] = roidb[i]['gt_classes'][:self._maxNrRois]
126 | 
127 |         with open(cache_file, 'wb') as fid:
128 |             cp.dump(roidb, fid, cp.HIGHEST_PROTOCOL)
129 |         print ('wrote ss roidb to {}'.format(cache_file))
130 | 
131 |         return roidb
132 | 
133 |     def _load_selective_search_roidb(self, gt_roidb):
134 |         # box_list = nrImages x nrBoxes x 4
135 |         box_list = []
136 |         for imgFilename, subdir in zip(self._image_index, self._image_subdirs):
137 |             roiPath = "{}/{}/{}.roi.txt".format(self._roiDir, subdir, imgFilename[:-4])
138 |             assert os.path.exists(roiPath), "Error: rois file not found: " + roiPath
139 |             rois = np.loadtxt(roiPath, np.int32)
140 |             box_list.append(rois)
141 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
142 | 
143 |     def _load_annotation(self, imgIndex):
144 |         """
145 |         Load image and bounding boxes info from human annotations.
146 | 		"""
147 |         #negative images do not have any ground truth annotations
148 |         if self._image_subdirs[imgIndex].lower() == "negative":
149 |             return None
150 | 
151 |         imgPath = self.image_path_at(imgIndex)
152 |         bboxesPaths = imgPath[:-4] + ".bboxes.tsv"
153 |         labelsPaths = imgPath[:-4] + ".bboxes.labels.tsv"
154 |         assert os.path.exists(bboxesPaths), "Error: ground truth bounding boxes file not found: " + bboxesPaths
155 |         assert os.path.exists(labelsPaths), "Error: ground truth labels file not found: " + bboxesPaths
156 |         bboxes = np.loadtxt(bboxesPaths, np.float32)
157 |         labels = readFile(labelsPaths)
158 | 
159 |         # in case there's only one annotation and numpy read the array as single array,
160 |         # we need to make sure the input is treated as a multi dimensional array instead of a list/ 1D array
161 |         #if len(bboxes.shape) == 1:
162 |         if len(bboxes)>0 and type(bboxes[0]) == np.float32:
163 |             bboxes = np.array([bboxes])
164 | 
165 |         #remove boxes marked as 'undecided' or 'exclude'
166 |         indicesToKeep = find(labels, lambda x: x!='EXCLUDE' and x!='UNDECIDED')
167 |         bboxes = [bboxes[i] for i in indicesToKeep]
168 |         labels = [labels[i] for i in indicesToKeep]
169 | 
170 |         # Load object bounding boxes into a data frame.
171 |         num_objs = len(bboxes)
172 |         boxes = np.zeros((num_objs,4), dtype=np.uint16)
173 |         gt_classes = np.zeros(num_objs, dtype=np.int32)
174 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
175 |         for bboxIndex,(bbox,label) in enumerate(zip(bboxes,labels)):
176 |             cls = self._class_to_ind[label] #.decode('utf-8')]
177 |             boxes[bboxIndex, :] = bbox
178 |             gt_classes[bboxIndex] = cls
179 |             overlaps[bboxIndex, cls] = 1.0
180 | 
181 |         overlaps = scipy.sparse.csr_matrix(overlaps)
182 | 
183 |         return {'boxes' : boxes,
184 |                 'gt_classes': gt_classes,
185 |                 'gt_overlaps' : overlaps,
186 |                 'flipped' : False}
187 | 
188 |     # main call to compute per-calass average precision
189 |     #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
190 |     #  (see also test_net() in fastRCNN\test.py)
191 |     def evaluate_detections(self, all_boxes, output_dir, use_07_metric=False, overlapThreshold = 0.5):
192 |         aps = []
193 |         for classIndex, className in enumerate(self._classes):
194 |             if className != '__background__':
195 |                 rec, prec, ap = self._evaluate_detections(classIndex, all_boxes, use_07_metric, overlapThreshold)
196 |                 aps += [[className,ap]]
197 |                 print('AP for {:>15} = {:.4f}'.format(className, ap))
198 |         print('Mean AP = {:.4f}'.format(np.nanmean(getColumn(aps,1))))
199 |         return aps
200 | 
201 |     def _evaluate_detections(self, classIndex, all_boxes, use_07_metric = False, overlapThreshold = 0.5):
202 |         """
203 |         Top level function that does the PASCAL VOC evaluation.
204 | 
205 |         [overlapThreshold]: Overlap threshold (default = 0.5)
206 |         [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False)
207 |         """
208 |         assert (len(all_boxes) == self.num_classes)
209 |         assert (len(all_boxes[0]) == self.num_images)
210 | 
211 |         # load ground truth annotations for this class
212 |         gtInfos = []
213 |         for imgIndex in range(self.num_images):
214 |             imgPath = self.image_path_at(imgIndex)
215 |             imgSubir  = os.path.normpath(imgPath).split(os.path.sep)[-2]
216 |             if imgSubir != 'negative':
217 |                 gtBoxes, gtLabels = readGtAnnotation(imgPath)
218 |                 gtBoxes = [box for box, label in zip(gtBoxes, gtLabels) if label == self.classes[classIndex]]  #.decode('utf-8')
219 |             else:
220 |                 gtBoxes = []
221 |             gtInfos.append({'bbox': np.array(gtBoxes),
222 |                            'difficult': [False] * len(gtBoxes),
223 |                            'det': [False] * len(gtBoxes)})
224 | 
225 |         # parse detections for this class
226 |         # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
227 |         detBboxes = []
228 |         detImgIndices = []
229 |         detConfidences = []
230 |         for imgIndex in range(self.num_images):
231 |             dets = all_boxes[classIndex][imgIndex]
232 |             if dets != []:
233 |                 for k in range(dets.shape[0]):
234 |                     detImgIndices.append(imgIndex)
235 |                     detConfidences.append(dets[k, -1])
236 |                     # the VOCdevkit expects 1-based indices
237 |                     detBboxes.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1])
238 |         detBboxes = np.array(detBboxes)
239 |         detConfidences = np.array(detConfidences)
240 | 
241 |         # debug: visualize GT and detections
242 |         # if classIndex == 15: # and imgPath.endswith("WIN_20160803_11_42_36_Pro.jpg"):
243 |         #     imgIndex = 6
244 |         #     imgPath = self.image_path_at(imgIndex)
245 |         #     img = imread(imgPath)
246 |         #     tmp_gtBoxes = gtInfos[imgIndex]['bbox']
247 |         #     inds = np.where(np.array(detImgIndices) == 1)[0]
248 |         #     tmp_detBoxes = detBboxes[inds]
249 |         #     print(detConfidences[inds])
250 |         #     drawRectangles(img, tmp_gtBoxes, color = (255, 0, 0)) #thickness=thickness)
251 |         #     drawRectangles(img, tmp_detBoxes, color= (0, 255, 0))  # thickness=thickness)
252 |         #     imshow(img, maxDim=800)
253 | 
254 |         # compute precision / recall / ap
255 |         rec, prec, ap = self._voc_computePrecisionRecallAp(
256 |             class_recs=gtInfos,
257 |             confidence=detConfidences,
258 |             image_ids=detImgIndices,
259 |             BB=detBboxes,
260 |             ovthresh=overlapThreshold,
261 |             use_07_metric=use_07_metric)
262 | 
263 |         return rec, prec, ap
264 | 
265 | 
266 |     #########################################################################
267 |     # Python evaluation functions (copied/refactored from faster-RCNN)
268 |     ##########################################################################
269 |     def _voc_computePrecisionRecallAp(self, class_recs, confidence, image_ids, BB, ovthresh=0.5, use_07_metric=False):
270 |         # sort by confidence
271 |         sorted_ind = np.argsort(-confidence)
272 |         BB = BB[sorted_ind, :]
273 |         image_ids = [image_ids[x] for x in sorted_ind]
274 | 
275 |         # go down dets and mark TPs and FPs
276 |         nd = len(image_ids)
277 |         tp = np.zeros(nd)
278 |         fp = np.zeros(nd)
279 |         for d in range(nd):
280 |             R = class_recs[image_ids[d]]
281 |             bb = BB[d, :].astype(float)
282 |             ovmax = -np.inf
283 |             BBGT = R['bbox'].astype(float)
284 | 
285 |             if BBGT.size > 0:
286 |                 # compute overlaps
287 |                 ixmin = np.maximum(BBGT[:, 0], bb[0])
288 |                 iymin = np.maximum(BBGT[:, 1], bb[1])
289 |                 ixmax = np.minimum(BBGT[:, 2], bb[2])
290 |                 iymax = np.minimum(BBGT[:, 3], bb[3])
291 |                 iw = np.maximum(ixmax - ixmin + 1., 0.)
292 |                 ih = np.maximum(iymax - iymin + 1., 0.)
293 |                 inters = iw * ih
294 | 
295 |                 # union
296 |                 uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
297 |                        (BBGT[:, 2] - BBGT[:, 0] + 1.) *
298 |                        (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
299 | 
300 |                 overlaps = inters / uni
301 |                 ovmax = np.max(overlaps)
302 |                 jmax = np.argmax(overlaps)
303 | 
304 |             if ovmax > ovthresh:
305 |                 if not R['difficult'][jmax]:
306 |                     if not R['det'][jmax]:
307 |                         tp[d] = 1.
308 |                         R['det'][jmax] = 1
309 |                     else:
310 |                         fp[d] = 1.
311 |             else:
312 |                 fp[d] = 1.
313 | 
314 |         # compute precision recall
315 |         npos = sum([len(cr['bbox']) for cr in class_recs])
316 |         fp = np.cumsum(fp)
317 |         tp = np.cumsum(tp)
318 |         rec = tp / float(npos)
319 |         # avoid divide by zero in case the first detection matches a difficult
320 |         # ground truth
321 |         prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
322 |         ap = computeAveragePrecision(rec, prec, use_07_metric)
323 |         return rec, prec, ap


--------------------------------------------------------------------------------
/resources/cntk/config.cntk:
--------------------------------------------------------------------------------
  1 | # Fast-RCNN configuration for CNTK
  2 | # For algorithm and details see http://arxiv.org/abs/1504.08083
  3 | # Overview:
  4 | # The Fast-RCNN algorithm uses a DNN that takes as inputs a set of images 
  5 | # and for each image a set of ROIs (Regions of interest). It first computes
  6 | # a convolutional feature map for the entire image using a series of
  7 | # of convolutional layers (usually from a pretrained network). Then it 
  8 | # employs ROI pooling to crop out the part of the conv feature map 
  9 | # that corresponds to an ROI and resizes it to the input size expected
 10 | # by the following layer (usually a set of pretrained fully connected layers).
 11 | # Classification error and evaluation criterion are computed for each ROI.
 12 | 
 13 | #makeMode = false
 14 | command = Train:WriteTest:WriteTrain
 15 | 
 16 | deviceId = "Auto"
 17 | precision = "float"
 18 | parallelTrain = "false"
 19 | traceLevel = 1
 20 | 
 21 | rootDir = "." 
 22 | dataDir = "$rootDir$"
 23 | outputDir = "$rootDir$/tmp"
 24 | 
 25 | modelPath = "$outputDir$/Fast-RCNN"
 26 | #stderr = "$outputDir$/Fast-RCNN.log"
 27 | 
 28 | ImageH = 1000
 29 | ImageW = 1000
 30 | ImageC = 3
 31 | 
 32 | NumLabels = 22
 33 | NumTrainROIs = 200
 34 | NumTestROIs = 1000
 35 | 
 36 | TrainROIDim = 800               # $NumTrainROIs$ * 4 
 37 | TrainROILabelDim = 4400         # $NumTrainROIs$ * $NumLabels$
 38 | TestROIDim = 4000               # $NumTestROIs$ * 4
 39 | TestROILabelDim = 22000         # $NumTestROIs$ * $NumLabels$
 40 | 
 41 | # For training we load a pretrained AlexNet model (AlexNet.model) and clone three parts of it.
 42 | # For the first part (up to pool1) we keep the weights fixed. The middle part contains the
 43 | # remaining convolutional and pooling layers and the last part are the FC layers. 
 44 | # In the model we apply the first two cloned parts, then an ROI pooling layer and 
 45 | # finally the pretrained FC layers followed by a new FC layer that maps to the new 
 46 | # label dimensionality of 21 classes. 
 47 | # The inputs are images (1000 x 1000 x 3), ROIs (64 ROIs x 4 coordinates (x, y, w, h))
 48 | # and ground truht labels per ROI (64 ROIs x 21 classes).
 49 | Train = {
 50 |     action = "train"
 51 |     
 52 |     BrainScriptNetworkBuilder = {
 53 |         imageShape = $ImageH$:$ImageW$:$ImageC$         # 1000:1000:3
 54 |         labelShape = $NumLabels$:$NumTrainROIs$         # 21:64
 55 |         ROIShape   = 4:$NumTrainROIs$                   # 4:64
 56 | 
 57 |         network     = BS.Network.Load ("../../../resources/cntk/AlexNet.model")
 58 | 		convLayers  = BS.Network.CloneFunction(network.features, network.conv5_y, parameters = "constant")    
 59 |         fcLayers    = BS.Network.CloneFunction(network.pool3, network.h2_d)
 60 | 		
 61 | 		
 62 |         model (features, rois) = {
 63 |             featNorm = features - 114
 64 | 			convOut  = convLayers (featNorm)
 65 |             roiOut   = ROIPooling (convOut, rois, (6:6))
 66 |             fcOut    = fcLayers (roiOut)
 67 |             W        = ParameterTensor{($NumLabels$:4096), init="glorotUniform"}
 68 |             b        = ParameterTensor{$NumLabels$, init = 'zero'}
 69 |             z        = W * fcOut + b
 70 |         }.z
 71 | 
 72 |         features = Input {imageShape}
 73 |         roiLabels = Input {labelShape}
 74 |         rois = Input {ROIShape}
 75 | 
 76 |         z = model (features, rois)
 77 |         
 78 |         ce = CrossEntropyWithSoftmax(roiLabels, z, axis = 1)
 79 |         errs = ClassificationError(roiLabels, z, axis = 1)
 80 |         
 81 |         featureNodes    = (features:rois)
 82 |         labelNodes      = (roiLabels)
 83 |         criterionNodes  = (ce)
 84 |         evaluationNodes = (errs)
 85 |         outputNodes     = (z)
 86 |     }
 87 | 
 88 |     SGD = {
 89 |         epochSize = 0
 90 |         minibatchSize = 1
 91 |         maxEpochs = 17
 92 |         
 93 |         #learningRatesPerSample = 0.00001
 94 |         #momentumAsTimeConstant = 0*5:10
 95 |         #dropoutRate = 0
 96 |       
 97 |         learningRatesPerMB=0.00001*10:0.000001*5:0.0000001
 98 |         momentumPerMB=0.9
 99 |         gradUpdateType=None
100 |         L2RegWeight=0.0005
101 |         dropoutRate=0.5 #0*5:0.5
102 | 	  
103 |         numMBsToShowResult = 50		
104 | 	}
105 | 
106 |     reader = {
107 |         randomize = true
108 |         verbosity = 2
109 |         deserializers = ({
110 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
111 |             file = $dataDir$/train.rois.txt
112 |             input = { rois = { dim = $TrainROIDim$ ; format = "dense" } }
113 |         }:{
114 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
115 |             file = $dataDir$/train.roilabels.txt
116 |             input = { roiLabels = { dim = $TrainROILabelDim$ ; format = "dense" } }
117 |         }:{
118 |             type = "ImageDeserializer" ; module = "ImageReader"
119 |             file = $dataDir$/train.txt
120 |             input = {
121 |                 features = { transforms = (
122 |                     { type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
123 |                     { type = "Transpose" }
124 |                 )}
125 |                 ignored = {labelDim = 1000}
126 |             }
127 |         })
128 |     }
129 | }
130 | 
131 | # Write network output for entire test data set
132 | WriteTest = {
133 |     action = "write"
134 |     minibatchSize = 1
135 | 
136 |     # outputPath = "$OutputDir$/fastrcnnNetOutput"
137 |     outputPath=test
138 |     
139 |     reader = {
140 |         randomize = false
141 |         verbosity = 2
142 |         deserializers = ({
143 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
144 |             file = $dataDir$/test.rois.txt
145 |             input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
146 |         }:{
147 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
148 |             file = $dataDir$/test.roilabels.txt
149 |             input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
150 |         }:{
151 |             type = "ImageDeserializer" ; module = "ImageReader"
152 |             file = $dataDir$/test.txt
153 |             input = {
154 |                 features = { transforms = (
155 |                     { type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
156 |                     { type = "Transpose" }
157 |                 )}
158 |                 ignored = {labelDim = 1000}
159 |             }
160 |         })
161 |     }
162 | }
163 | 
164 | # Write network output for entire train data set
165 | WriteTrain = {
166 |     action = "write"
167 |     minibatchSize = 1
168 | 
169 |     # outputPath = "$OutputDir$/fastrcnnNetOutput"
170 |     outputPath=train
171 |     
172 |     reader = {
173 |         randomize = false
174 |         verbosity = 2
175 |         deserializers = ({
176 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
177 |             file = $dataDir$/train.rois.txt
178 |             input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
179 |         }:{
180 |             type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
181 |             file = $dataDir$/train.roilabels.txt
182 |             input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
183 |         }:{
184 |             type = "ImageDeserializer" ; module = "ImageReader"
185 |             file = $dataDir$/train.txt
186 |             input = {
187 |                 features = { transforms = (
188 |                     { type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
189 |                     { type = "Transpose" }
190 |                 )}
191 |                 ignored = {labelDim = 1000}
192 |             }
193 |         })
194 |     }
195 | }
196 | 
197 | 


--------------------------------------------------------------------------------
/resources/cntk/model.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/resources/cntk/model.pdf


--------------------------------------------------------------------------------
/resources/python35_64bit_requirements/opencv_python-3.2.0-cp35-cp35m-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/ObjectDetectionUsingCntk/9114d38f2fd7370ebb4c33268afa0d908f0116f6/resources/python35_64bit_requirements/opencv_python-3.2.0-cp35-cp35m-win_amd64.whl


--------------------------------------------------------------------------------
/resources/python35_64bit_requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | ./opencv_python-3.2.0-cp35-cp35m-win_amd64.whl
2 | scikit-learn
3 | Pillow
4 | future
5 | dlib
6 | EasyDict


--------------------------------------------------------------------------------