├── .gitignore
├── README.md
└── project
    ├── proj1
        ├── Assignment 1.docx
        ├── code
        │   ├── __init__.py
        │   ├── proj1.ipynb
        │   ├── proj1_test_filtering.ipynb
        │   ├── student_code.py
        │   └── utils.py
        ├── data
        │   ├── bicycle.bmp
        │   ├── bird.bmp
        │   ├── cat.bmp
        │   ├── dog.bmp
        │   ├── einstein.bmp
        │   ├── fish.bmp
        │   ├── marilyn.bmp
        │   ├── motorcycle.bmp
        │   ├── plane.bmp
        │   └── submarine.bmp
        ├── results
        │   ├── blur_image.jpg
        │   ├── high_frequencies.jpg
        │   ├── high_pass_image.jpg
        │   ├── hybrid_image.jpg
        │   ├── hybrid_image_scales.jpg
        │   ├── identity_image.jpg
        │   ├── laplacian_image.jpg
        │   ├── large_blur_image.jpg
        │   ├── low_frequencies.jpg
        │   └── sobel_image.jpg
        └── zip_submission.py
    ├── proj2
        ├── Assignment 2.pdf
        ├── annotate_correspondences
        │   ├── collect_ground_truth_corr.py
        │   ├── eval_file.pkl
        │   ├── sydney_opera_house1.jpg
        │   └── sydney_opera_house2.jpg
        ├── code
        │   ├── __init__.py
        │   ├── examples.py
        │   ├── proj2.ipynb
        │   ├── student_feature_matching.py
        │   ├── student_harris.py
        │   ├── student_sift.py
        │   └── utils.py
        ├── data
        │   ├── Episcopal Gaudi
        │   │   ├── 3743214471_1b5bbfda98_o.jpg
        │   │   └── 4386465943_8cf9776378_o.jpg
        │   ├── Mount Rushmore
        │   │   ├── 9021235130_7c2acd9554_o.jpg
        │   │   └── 9318872612_a255c874fb_o.jpg
        │   └── Notre Dame
        │   │   ├── 4191453057_c86028ce1f_o.jpg
        │   │   └── 921919841_a30df938f2_o.jpg
        └── results
        │   ├── circles0.jpg
        │   ├── circles1.jpg
        │   ├── circles2.jpg
        │   ├── eval.jpg
        │   ├── lines0.jpg
        │   ├── lines1.jpg
        │   ├── lines2.jpg
        │   ├── vis_circles.jpg
        │   └── vis_lines.jpg
    ├── proj3
        ├── Assigment3.pdf
        └── code
        │   ├── __init__.py
        │   ├── assignment3.ipynb
        │   ├── student_code.py
        │   ├── utils.py
        │   └── vocab.pkl
    ├── proj4
        ├── Assignment4.pdf
        └── code
        │   ├── __init__.py
        │   ├── proj5.ipynb
        │   ├── student_code.py
        │   └── utils.py
    └── proj5
        ├── Assigment5.pdf
        └── code
            ├── Assignment5.ipynb
            ├── __init__.py
            ├── student_code.py
            ├── utils.py
            └── utils_gpu.py


/.gitignore:
--------------------------------------------------------------------------------
1 | project/proj2/data/
2 | project/proj3/data/
3 | project/proj4/data/
4 | project/proj5/data/
5 | .ipynb_checkpoints/
6 | *.pyc
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CS308-Computer-Vision
  2 | 
  3 | ## Image Channels
  4 | 
  5 | RGB -> HSV (Hue, Saturation, Value)
  6 | 
  7 | - V = max
  8 | - S = max - min / max
  9 | - H = 
 10 |   - if max = R, 60 * (G - B) / (max - min)
 11 |   - if max = G, 120 * (B - R) / (max - min)
 12 |   - if max = B, 240 * (R - G) / (max - min)
 13 | 
 14 | ## Histogram Equalization
 15 | 
 16 | **T(k) = floor((L - 1) sum(p0..k))**
 17 | 
 18 | - L = intensity
 19 | - pn = number of pixels with intensity n / total number of pixels
 20 | 
 21 | ## Convolution
 22 | 
 23 | ```python
 24 | h, w, channels = image.shape
 25 | fh, fw = kernel.shape[:2]
 26 | pad_h = (fh - 1) // 2
 27 | pad_w = (fw - 1) // 2
 28 | image = np.pad(image, [(pad_h, pad_h), (pad_w, pad_w), (0, 0)], 'symmetric')
 29 | 
 30 | kernel = kernel[..., None]  # [fH, fW] -> [fH, fW, 1]
 31 | output = np.zeros((h, w, channels), dtype='float32')
 32 | for r in range(h):
 33 |     for c in range(w):
 34 |         # image patch: [fH, fW, 3]
 35 |         # `filter`: [fH, fW, 1] -> [fH, fW, 3]
 36 |         result = image[r:r+fh, c:c+fw] * kernel
 37 |         output[r, c, :] = np.sum(np.sum(result, axis=0), axis=0)
 38 | ```
 39 | 
 40 | ## Filtering
 41 | 
 42 | **Gaussian filter**
 43 | 
 44 | - weights = 1/(2 * pi * std^2) * exp(-(x^2 + y^2) / (2 * std^2)) / max
 45 | 
 46 | **Sobel filter**
 47 | 
 48 | - mean = [1, 2, 1]
 49 | - gradient = [1 0 -1]
 50 | - horizontal sobel = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]]
 51 | - vertical sobel = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]
 52 | 
 53 | Laplacian of Gaussian (LoG) filter
 54 | 
 55 | - 2nd derivative = (x^2 + y^2 / std^4 - 2 / std^2) * G
 56 | 
 57 | Low pass
 58 | 
 59 | - Gaussian
 60 | 
 61 | High pass
 62 | 
 63 | - 1 - Gaussian
 64 | 
 65 | ## Fourier
 66 | 
 67 | DFT
 68 | 
 69 | - X(k) = sum(n=0..N-1, x(n) * exp(-2pi * j * k * n / N)
 70 | 
 71 | IDFT
 72 | 
 73 | - x(n) = 1/N * sum(k=0..N-1, X(k) * exp(2pi * j * k * n / N) 
 74 | 
 75 | 2D DFT
 76 | 
 77 | - F(u, v) = sum(x=0..M-1, y=0..N-1, f(x, y) * exp(-2pi * j * (ux / M + vy / N)))
 78 | 
 79 | Gabor
 80 | 
 81 | ## Pyramid
 82 | 
 83 | Gaussian Pyramid
 84 | 
 85 | - scales (2^k)
 86 | 
 87 | - Gaussian filter with (2^k) stds
 88 | 
 89 | **Laplacian pyramid**
 90 | 
 91 | - interpolate
 92 | 
 93 | - DoG: G(k) - G(k-1)
 94 | 
 95 | Image Blending
 96 | 
 97 | - G(k) + L(k-1) + ... + L(1)
 98 | 
 99 | ## Transformation
100 | 
101 | Affinity Transform
102 | 
103 | - I -> aI + b
104 | 
105 | Rotation Matrix
106 | 
107 | - R = [[cos, -sin], [sin, cos]]
108 | - [x', y'] = R * [x, y]
109 | 
110 | Warping
111 | 
112 | ## Keypoint Matching 
113 | 
114 | Interesting points
115 | 
116 | - repeatability
117 | - distinctiveness
118 | 
119 | **Harris Corner Detection**
120 | 
121 | - Change in appearance of window w(x,y) for the shift [u,v]: 
122 |   - E(u,v) = ∑ w(x, y)*[I(x+u, y+v) - I(x,y)]^2
123 |   - w(x, y) = (1 or Gaussian) in window
124 | - Second-order Taylor expansion of E(u,v) about (0,0):
125 |   - E(u,v) = [u, v] M [u, v].T
126 |   - M = ∑ w(x, y) [[Ix^2, Ix * Iy], [Ix * Iy, Iy^2]]
127 |     - M =  [[grad(x)^2, grad(x) * grad(y)], [grad(x) * grad(y), grad(y)^2]]
128 |     - Gaussian filtering
129 | - Corner response function
130 |   - R = det(M) − a * trace(M)^2
131 |     - R = grad(x)^2 * grad(y)^2 - [grad(x) * grad(y)]^2 - a * [grad(x)^2 + grad(y)^2]^2
132 | - R > threshold
133 | - Take the points of local maxima (non-maximum suppression)
134 | 
135 | - Invariance:
136 |   - invariant to translation and rotation
137 |   - not invariant to scaling
138 | 
139 | **SIFT Descriptor**
140 | 
141 | - gradient orientation histogram
142 |   - 4x4x8=128 array weighted by gradient magnitude
143 | - define feature width for each keypoint
144 | - normalize, threshold, normalize
145 | 
146 | **Matching**
147 | 
148 | - Nearest Neighbor Distance Ratio
149 |   - NN1 / NN2
150 | 
151 | ## RANSAC
152 | 
153 | 
154 | 
155 | ## Hough Transform
156 | 
157 | shape detection
158 | 
159 | voting scheme
160 | 
161 | p = xcos(theta) + ysin(theta) 
162 | 
163 | ## Manifold Learning
164 | 
165 | Dimensionality Reduction
166 | 
167 | Unsupervised learning + continuous
168 | 
169 | Linear methods
170 | 
171 | - **Principal component analysis (PCA)**
172 |   - find the principal axes are those orthonormal axes onto which the variance retained under  projection is maximal
173 |   - cov(X, Y) = 1/n * ∑ (Xi - Xm) * (Yi - Ym)
174 | - Multidimensional scaling (MDS)
175 | 
176 | Nonlinear methods
177 | 
178 | - Kernel PCA
179 | - Locally linear embedding (LLE)
180 | - Isomap
181 | - Laplacian eigenmaps (LE)
182 | - **T-distributed stochastic neighbor embedding (TSNE)**
183 | 
184 | ## Classification
185 | 
186 | Supervised learning + discrete
187 | 
188 | **Support Vector Machine**
189 | 
190 | - 2 classes
191 | 
192 | - for support vector x, wx + b = +-1 -> w (x+ - x-) = 2 -> Margin = 2 / |w|
193 | 
194 | - min{1/2 * |w|^2}, y(wx+b) >=1
195 | 
196 | - Lagrange: a>=0, L = 1/2 * |w|^2 - ∑ {a*(y-wx+b) - 1)}
197 | 
198 |   - dual problem: min(w,b) max(a>=0) L -> max(a>=0) min(w,b) L
199 |   - derivatives = 0 -> w = ∑ a * y * x, ∑ a * y = 0
200 |   - L = max(a>=0) ∑ a - 1/2 * ∑∑ {ai * aj * yi * yj * xi * xj}
201 |   - KKT -> a(y(wx+b) - 1) = 0 (complementary slackness)
202 |   - f = ∑ a * yi * xi * x + b
203 |   - SMO algorithm
204 | 
205 | - Soft margin
206 | 
207 |   - slack variables ξ
208 | 
209 |   - min{1/2 * |w|^2 + C * ∑ξ}, y(wx+b) >= 1 - ξ, ξ >=0
210 |   - 0 <= a <= C
211 | 
212 | - Non-linear classification
213 | 
214 |   - K(x, z) = Φ(x) * Φ(z), x = input space, z = feature space, Φ = feature map
215 |   - K is semi-positive definite symmetric function
216 |   - L = max(a>=0) ∑ a - 1/2 * ∑∑ {ai * aj * yi * yj * K(xi, xj)}
217 |   - f = ∑ a * yi * K(xi, x) + b
218 | 
219 | - Multi-class classification
220 | 
221 |   - m class -> m SVMs
222 | 
223 | ## Clustering
224 | 
225 | unsupervised learning + discrete
226 | 
227 | - Applications
228 |   - summary
229 |   - counting
230 |   - segmentation
231 |   - prediction
232 | - K-means
233 | - Mean-shift


--------------------------------------------------------------------------------
/project/proj1/Assignment 1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/Assignment 1.docx


--------------------------------------------------------------------------------
/project/proj1/code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/code/__init__.py


--------------------------------------------------------------------------------
/project/proj1/code/student_code.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import utils
 4 | 
 5 | def conv2d(img, krnl):
 6 |     krnl_h, krnl_w = krnl.shape[:2]
 7 |     pad_h = (krnl_h - 1) // 2
 8 |     pad_w = (krnl_w - 1) // 2
 9 |     img = np.pad(img, [(pad_h, pad_h), (pad_w, pad_w)], 'constant') # zero-padding
10 |     img_h, img_w = img.shape[:2]
11 |     shape = (img_h - krnl_h + 1, img_w - krnl_w + 1, krnl_h, krnl_w)
12 |     strides = np.array([img_w, 1, img_w, 1]) * img.itemsize
13 |     img = np.lib.stride_tricks.as_strided(img, shape, strides)
14 |     return np.tensordot(img, krnl, axes=[(2, 3), (0, 1)])
15 | 
16 | 
17 | def my_imfilter(image, filter):
18 |     """
19 |     Apply a filter to an image. Return the filtered image.
20 | 
21 |     Args
22 |     - image: numpy nd-array of dim (m, n, c)
23 |     - filter: numpy nd-array of dim (k, k)
24 |     Returns
25 |     - filtered_image: numpy nd-array of dim (m, n, c)
26 | 
27 |     HINTS:
28 |     - You may not use any libraries that do the work for you. Using numpy to work
29 |      with matrices is fine and encouraged. Using opencv or similar to do the
30 |      filtering for you is not allowed.
31 |     - I encourage you to try implementing this naively first, just be aware that
32 |      it may take an absurdly long time to run. You will need to get a function
33 |      that takes a reasonable amount of time to run so that the TAs can verify
34 |      your code works.
35 |     - Remember these are RGB images, accounting for the final image dimension.
36 |     """
37 | 
38 |     assert filter.shape[0] % 2 == 1
39 |     assert filter.shape[1] % 2 == 1
40 | 
41 |     filtered_image = np.zeros_like(image)
42 |     
43 |     if len(image.shape) == 2:
44 |         filtered_image = conv2d(image, filter)
45 |     else:
46 |         for i in range(image.shape[2]):
47 |             filtered_image[:,:,i] = conv2d(image[:,:,i], filter)
48 | 
49 |     return filtered_image
50 | 
51 | 
52 | def create_hybrid_image(image1, image2, filter):
53 |     """
54 |     Takes two images and creates a hybrid image. Returns the low
55 |     frequency content of image1, the high frequency content of
56 |     image 2, and the hybrid image.
57 | 
58 |     Args
59 |     - image1: numpy nd-array of dim (m, n, c)
60 |     - image2: numpy nd-array of dim (m, n, c)
61 |     Returns
62 |     - low_frequencies: numpy nd-array of dim (m, n, c)
63 |     - high_frequencies: numpy nd-array of dim (m, n, c)
64 |     - hybrid_image: numpy nd-array of dim (m, n, c)
65 | 
66 |     HINTS:
67 |     - You will use your my_imfilter function in this function.
68 |     - You can get just the high frequency content of an image by removing its low
69 |       frequency content. Think about how to do this in mathematical terms.
70 |     - Don't forget to make sure the pixel values are >= 0 and <= 1. This is known
71 |       as 'clipping'.
72 |     - If you want to use images with different dimensions, you should resize them
73 |       in the notebook code.
74 |     """
75 | 
76 |     assert image1.shape[0] == image2.shape[0]
77 |     assert image1.shape[1] == image2.shape[1]
78 |     assert image1.shape[2] == image2.shape[2]
79 | 
80 |     low_frequencies = my_imfilter(image1, filter)
81 |     high_frequencies = image2 - my_imfilter(image2, filter)
82 |     hybrid_image = low_frequencies + high_frequencies
83 |     
84 |     # visualization
85 |     high_frequencies += 1 - high_frequencies.max()
86 | 
87 |     return np.clip(low_frequencies, 0, 1), np.clip(high_frequencies, 0, 1), np.clip(hybrid_image, 0, 1)
88 | 


--------------------------------------------------------------------------------
/project/proj1/code/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | def vis_hybrid_image(hybrid_image):
 5 |   scales = 5
 6 |   scale_factor = 0.5
 7 |   padding = 5
 8 |   original_height = hybrid_image.shape[0]
 9 |   num_colors = 1 if hybrid_image.ndim == 2 else 3
10 | 
11 |   output = np.copy(hybrid_image)
12 |   cur_image = np.copy(hybrid_image)
13 |   for scale in range(2, scales+1):
14 |     # add padding
15 |     output = np.hstack((output, np.ones((original_height, padding, num_colors),
16 |                                         dtype=np.float32)))
17 | 
18 |     # downsample image
19 |     cur_image = cv2.resize(cur_image, (0, 0), fx=scale_factor, fy=scale_factor)
20 | 
21 |     # pad the top to append to the output
22 |     pad = np.ones((original_height-cur_image.shape[0], cur_image.shape[1],
23 |                    num_colors), dtype=np.float32)
24 |     tmp = np.vstack((pad, cur_image))
25 |     output = np.hstack((output, tmp))
26 | 
27 |   return output
28 | 
29 | def im2single(im):
30 |   im = im.astype(np.float32) / 255
31 |   return im
32 | 
33 | def single2im(im):
34 |   im *= 255
35 |   im = im.astype(np.uint8)
36 |   return im
37 | 
38 | def load_image(path):
39 |   return im2single(cv2.imread(path))[:, :, ::-1]
40 | 
41 | def save_image(path, im):
42 |   return cv2.imwrite(path, single2im(im.copy())[:, :, ::-1])
43 |   
44 | def im_range(im):
45 |   im -= im.min()
46 |   im = im / im.max()
47 |   return im
48 | 
49 | 


--------------------------------------------------------------------------------
/project/proj1/data/bicycle.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/bicycle.bmp


--------------------------------------------------------------------------------
/project/proj1/data/bird.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/bird.bmp


--------------------------------------------------------------------------------
/project/proj1/data/cat.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/cat.bmp


--------------------------------------------------------------------------------
/project/proj1/data/dog.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/dog.bmp


--------------------------------------------------------------------------------
/project/proj1/data/einstein.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/einstein.bmp


--------------------------------------------------------------------------------
/project/proj1/data/fish.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/fish.bmp


--------------------------------------------------------------------------------
/project/proj1/data/marilyn.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/marilyn.bmp


--------------------------------------------------------------------------------
/project/proj1/data/motorcycle.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/motorcycle.bmp


--------------------------------------------------------------------------------
/project/proj1/data/plane.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/plane.bmp


--------------------------------------------------------------------------------
/project/proj1/data/submarine.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/data/submarine.bmp


--------------------------------------------------------------------------------
/project/proj1/results/blur_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/blur_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/high_frequencies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/high_frequencies.jpg


--------------------------------------------------------------------------------
/project/proj1/results/high_pass_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/high_pass_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/hybrid_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/hybrid_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/hybrid_image_scales.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/hybrid_image_scales.jpg


--------------------------------------------------------------------------------
/project/proj1/results/identity_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/identity_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/laplacian_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/laplacian_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/large_blur_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/large_blur_image.jpg


--------------------------------------------------------------------------------
/project/proj1/results/low_frequencies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/low_frequencies.jpg


--------------------------------------------------------------------------------
/project/proj1/results/sobel_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj1/results/sobel_image.jpg


--------------------------------------------------------------------------------
/project/proj1/zip_submission.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | def copy_directory(src, dest):
 5 |   try:
 6 |     shutil.copytree(src, dest)
 7 |   except shutil.Error as e:
 8 |     print('Directory not copied. Error: %s' % e)
 9 |   except OSError as e:
10 |     print('Directory not copied. Error: %s' % e)
11 | 
12 | shutil.rmtree('temp_submission', ignore_errors=True)
13 | os.mkdir('temp_submission')
14 | for dir_name in ['code', 'results']:
15 |   copy_directory(dir_name, '/'.join(['temp_submission', dir_name]))
16 | shutil.make_archive('submission', 'zip', 'temp_submission')
17 | shutil.rmtree('temp_submission', ignore_errors=True)


--------------------------------------------------------------------------------
/project/proj2/Assignment 2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/Assignment 2.pdf


--------------------------------------------------------------------------------
/project/proj2/annotate_correspondences/collect_ground_truth_corr.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # CS 6476 Computer Vision, Georgia Tech
  3 | # Written by James Hays, John Lambert
  4 | 
  5 | # This file is completely optional for the assignment, but is a way to provide
  6 | # helpful service.
  7 | 
  8 | # An interactive method to specify and then save many point correspondences
  9 | # between two photographs, which will be used to generate a projective
 10 | # transformation.
 11 | 
 12 | # Pick a dozen corresponding points throughout the images, although more is
 13 | # better.
 14 | 
 15 | import pickle
 16 | import cv2
 17 | import matplotlib.pyplot as plt
 18 | import numpy as np
 19 | import sys
 20 | import pdb
 21 | FIGURE_HEIGHT = 6
 22 | FIGURE_WIDTH = 10
 23 | plt.rcParams["figure.figsize"] = (FIGURE_WIDTH,FIGURE_HEIGHT)
 24 | 
 25 | sys.path.append('../code')
 26 | 
 27 | from utils import load_image, show_correspondence_lines
 28 | from pathlib import Path
 29 | 
 30 | class CorrespondenceAnnotator(object):
 31 | 	def __init__(self):
 32 | 
 33 | 		self.image1 = load_image('./sydney_opera_house1.jpg')
 34 | 		self.image2 = load_image('./sydney_opera_house2.jpg')
 35 | 		self.corr_file = Path('./sydney_opera_house_correspondences.pkl')
 36 | 		f, (ax1, ax2) = plt.subplots(1, 2, figsize=(FIGURE_WIDTH,FIGURE_HEIGHT))
 37 | 		self.ax1 = ax1
 38 | 		self.ax2 = ax2
 39 | 		self.x1 = [] # x locations in image 1
 40 | 		self.y1 = [] # y locations in image 1
 41 | 		self.x2 = [] # corresponding x locations in image 2
 42 | 		self.y2 = [] # corresponding y locations in image 2
 43 | 
 44 | 	def collect_ground_truth_corr(self):
 45 | 		"""
 46 | 		Collect ground truth image-to-image correspondences by manually annotating them.
 47 | 
 48 | 		This function checks if some corresponding points are already saved, and
 49 | 		if so, resumes work from there.
 50 | 		"""
 51 | 		if self.corr_file.exists():
 52 | 			self.load_pkl_correspondences()
 53 | 
 54 | 			# The correspondences that already exist
 55 | 			corr_image = show_correspondence_lines(	self.image1, self.image2, 
 56 | 													np.array(self.x1), np.array(self.y1), 
 57 | 													np.array(self.x2), np.array(self.y2))
 58 | 		else:
 59 | 			self.x1 = [] 
 60 | 			self.y1 = [] 
 61 | 			self.x2 = [] 
 62 | 			self.y2 = [] 
 63 | 
 64 | 		self.ax1.imshow(self.image1)
 65 | 		self.ax2.imshow(self.image2)
 66 | 
 67 | 		self.mark_corrs_with_clicks()
 68 | 		self.dump_pkl_correspondences()
 69 | 
 70 | 		corr_image = show_correspondence_lines(	self.image1, self.image2, 
 71 | 												np.array(self.x1), np.array(self.y1), 
 72 | 												np.array(self.x2), np.array(self.y2))
 73 | 		plt.gcf().clear()
 74 | 		plt.imshow(corr_image)
 75 | 		plt.show()
 76 | 
 77 | 	def load_pkl_correspondences(self):
 78 | 		with open(str(self.corr_file), 'rb') as f:
 79 | 			d = pickle.load(f)
 80 | 
 81 | 		self.x1 = d['x1']
 82 | 		self.y1 = d['y1']
 83 | 		self.x2 = d['x2']
 84 | 		self.y2 = d['y2']
 85 | 
 86 | 	def dump_pkl_correspondences(self):
 87 | 		print('saving matched points')
 88 | 		data_dict = {}
 89 | 		data_dict['x1'] = self.x1
 90 | 		data_dict['y1'] = self.y1
 91 | 		data_dict['x2'] = self.x2
 92 | 		data_dict['y2'] = self.y2
 93 | 
 94 | 		with open(str(self.corr_file), 'wb') as f:
 95 | 			pickle.dump(data_dict,f)
 96 | 
 97 | 	def mark_corrs_with_clicks(self):
 98 | 		"""
 99 | 		Mark correspondences with clicks
100 | 		"""
101 | 		print('Exit the matplotlib window to stop annotation.')
102 | 		title = 'Click on a point in the left window\n'
103 | 		title += 'then on a point in the right window.\n'
104 | 		title += 'Exit the matplotlib window to stop annotation.\n'
105 | 		title += 'Afterwards, you will see the plotted correspondences.'
106 | 		self.ax1.set_title(title)
107 | 		while(1):
108 | 			pt = plt.ginput(1)
109 | 			if len(pt) == 0:
110 | 				break
111 | 			x = pt[0][0]
112 | 			y = pt[0][1]
113 | 
114 | 			self.ax1.scatter(x,y,30,color='r', marker='o')
115 | 			self.x1 += [x]
116 | 			self.y1 += [y]
117 | 
118 | 			pt = plt.ginput(1)
119 | 			if len(pt) == 0:
120 | 				break
121 | 			x = pt[0][0]
122 | 			y = pt[0][1]
123 | 
124 | 			self.ax2.scatter(x,y,30,color='r', marker='o')
125 | 			self.x2 += [x]
126 | 			self.y2 += [y]
127 | 		    
128 | 			print('({}, {}) matches to ({},{})'.format(	self.x1[-1], 
129 | 														self.y1[-1], 
130 | 														self.x2[-1], 
131 | 														self.y2[-1]))
132 | 			print('{} total points corresponded'.format(len(self.x1)))
133 | 
134 | if __name__ == '__main__':
135 | 	ca = CorrespondenceAnnotator()
136 | 	ca.collect_ground_truth_corr()
137 | 


--------------------------------------------------------------------------------
/project/proj2/annotate_correspondences/eval_file.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/annotate_correspondences/eval_file.pkl


--------------------------------------------------------------------------------
/project/proj2/annotate_correspondences/sydney_opera_house1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/annotate_correspondences/sydney_opera_house1.jpg


--------------------------------------------------------------------------------
/project/proj2/annotate_correspondences/sydney_opera_house2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/annotate_correspondences/sydney_opera_house2.jpg


--------------------------------------------------------------------------------
/project/proj2/code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/code/__init__.py


--------------------------------------------------------------------------------
/project/proj2/code/examples.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import time
 5 | from utils import *
 6 | from student_feature_matching import match_features
 7 | from student_sift import get_features
 8 | from student_harris import get_interest_points
 9 | 
10 | 
11 | scale_factor = 0.5
12 | feature_width = 16  # width and height of each local feature, in pixels.
13 | 
14 | images = [
15 |     ['../data/Mount Rushmore/9021235130_7c2acd9554_o.jpg',
16 |         '../data/Mount Rushmore/9318872612_a255c874fb_o.jpg'],
17 |     ['../data/Episcopal Gaudi/4386465943_8cf9776378_o.jpg',
18 |         '../data/Episcopal Gaudi/3743214471_1b5bbfda98_o.jpg'],
19 |     ['../data/Capricho Gaudi/36185369_1dcbb23308_o.jpg',
20 |         '../data/Capricho Gaudi/6727732233_4564516d61_o.jpg']
21 | ]
22 | 
23 | for i, pair in enumerate(images):
24 |     image1 = load_image(pair[0])
25 |     image2 = load_image(pair[1])
26 | 
27 |     print(f"\nstart matching image pair {i}")
28 |     start_time = time.time()
29 | 
30 |     image1 = cv2.resize(image1, (0, 0), fx=scale_factor, fy=scale_factor)
31 |     image2 = cv2.resize(image2, (0, 0), fx=scale_factor, fy=scale_factor)
32 |     image1_bw = cv2.cvtColor(image1, cv2.COLOR_RGB2GRAY)
33 |     image2_bw = cv2.cvtColor(image2, cv2.COLOR_RGB2GRAY)
34 | 
35 |     x1, y1, _, _, _ = get_interest_points(image1_bw, feature_width)
36 |     x2, y2, _, _, _ = get_interest_points(image2_bw, feature_width)
37 | 
38 |     print('{:d} corners in image 1, {:d} corners in image 2'.format(len(x1), len(x2)))
39 | 
40 |     image1_features = get_features(image1_bw, x1, y1, feature_width)
41 |     image2_features = get_features(image2_bw, x2, y2, feature_width)
42 | 
43 |     matches, _ = match_features(
44 |         image1_features, image2_features, x1, y1, x2, y2)
45 |     print('{:d} matches from {:d} corners'.format(len(matches), len(x1)))
46 | 
47 |     print(f"time cost: {time.time() - start_time}")
48 | 
49 |     num_pts_to_visualize = 100
50 |     c1 = show_correspondence_circles(image1, image2,
51 |                                      x1[matches[:num_pts_to_visualize, 0]],
52 |                                      y1[matches[:num_pts_to_visualize, 0]],
53 |                                      x2[matches[:num_pts_to_visualize, 1]],
54 |                                      y2[matches[:num_pts_to_visualize, 1]])
55 |     plt.figure()
56 |     plt.imshow(c1)
57 |     plt.savefig(f'../results/circles{i}.jpg', dpi=1000)
58 |     c2 = show_correspondence_lines(image1, image2,
59 |                                    x1[matches[:num_pts_to_visualize, 0]],
60 |                                    y1[matches[:num_pts_to_visualize, 0]],
61 |                                    x2[matches[:num_pts_to_visualize, 1]],
62 |                                    y2[matches[:num_pts_to_visualize, 1]])
63 |     plt.figure()
64 |     plt.imshow(c2)
65 |     plt.savefig(f'../results/lines{i}.jpg', dpi=1000)
66 |     print("result saved")
67 | 


--------------------------------------------------------------------------------
/project/proj2/code/student_feature_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def match_features(features1, features2, x1, y1, x2, y2):
 5 |     """
 6 |     This function does not need to be symmetric (e.g. it can produce
 7 |     different numbers of matches depending on the order of the arguments).
 8 | 
 9 |     To start with, simply implement the "ratio test", equation 4.18 in
10 |     section 4.1.3 of Szeliski. There are a lot of repetitive features in
11 |     these images, and all of their descriptors will look similar. The
12 |     ratio test helps us resolve this issue (also see Figure 11 of David
13 |     Lowe's IJCV paper).
14 | 
15 |     For extra credit you can implement various forms of spatial/geometric
16 |     verification of matches, e.g. using the x and y locations of the features.
17 | 
18 |     Args:
19 |     -   features1: A numpy array of shape (n,feat_dim) representing one set of
20 |             features, where feat_dim denotes the feature dimensionality
21 |     -   features2: A numpy array of shape (m,feat_dim) representing a second set
22 |             features (m not necessarily equal to n)
23 |     -   x1: A numpy array of shape (n,) containing the x-locations of features1
24 |     -   y1: A numpy array of shape (n,) containing the y-locations of features1
25 |     -   x2: A numpy array of shape (m,) containing the x-locations of features2
26 |     -   y2: A numpy array of shape (m,) containing the y-locations of features2
27 | 
28 |     Returns:
29 |     -   matches: A numpy array of shape (k,2), where k is the number of matches.
30 |             The first column is an index in features1, and the second column is
31 |             an index in features2
32 |     -   confidences: A numpy array of shape (k,) with the real valued confidence for
33 |             every match
34 | 
35 |     'matches' and 'confidences' can be empty e.g. (0x2) and (0x1)
36 |     """
37 |     threshold = 0.6
38 |     max_matches = 100
39 |     n = len(x1)
40 |     m = len(x2)
41 | 
42 |     match_points = []
43 |     for i in range(n):
44 |         distance = np.sum(
45 |             np.square(np.tile(features1[i, :], (m, 1)) - features2), 1)
46 |         argnn = np.argpartition(distance, 2)[:2]
47 |         ratio = distance[argnn[0]] / distance[argnn[1]]
48 |         if ratio < threshold:
49 |             match_points.append([ratio, i, argnn[0]])
50 | 
51 |     sorted_match_points = sorted(match_points, key=lambda x: x[0])
52 |     matches = np.array([[i[1], i[2]]
53 |                         for i in sorted_match_points[:max_matches]]).astype(int)
54 |     confidences = np.array(i[0] for i in sorted_match_points[:max_matches])
55 | 
56 |     return matches, confidences
57 | 


--------------------------------------------------------------------------------
/project/proj2/code/student_harris.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | 
  6 | def get_interest_points(image, feature_width):
  7 |     """
  8 |     Implement the Harris corner detector (See Szeliski 4.1.1) to start with.
  9 |     You can create additional interest point detector functions (e.g. MSER)
 10 |     for extra credit.
 11 | 
 12 |     If you're finding spurious interest point detections near the boundaries,
 13 |     it is safe to simply suppress the gradients / corners near the edges of
 14 |     the image.
 15 | 
 16 |     Useful in this function in order to (a) suppress boundary interest
 17 |     points (where a feature wouldn't fit entirely in the image, anyway)
 18 |     or (b) scale the image filters being used. Or you can ignore it.
 19 | 
 20 |     By default you do not need to make scale and orientation invariant
 21 |     local features.
 22 | 
 23 |     The lecture slides and textbook are a bit vague on how to do the
 24 |     non-maximum suppression once you've thresholded the cornerness score.
 25 |     You are free to experiment. For example, you could compute connected
 26 |     components and take the maximum value within each component.
 27 |     Alternatively, you could run a max() operator on each sliding window. You
 28 |     could use this to ensure that every interest point is at a local maximum
 29 |     of cornerness.
 30 | 
 31 |     Args:
 32 |     -   image: A numpy array of shape (m,n,c),
 33 |                 image may be grayscale of color (your choice)
 34 |     -   feature_width: integer representing the local feature width in pixels.
 35 | 
 36 |     Returns:
 37 |     -   x: A numpy array of shape (N,) containing x-coordinates of interest points
 38 |     -   y: A numpy array of shape (N,) containing y-coordinates of interest points
 39 |     -   confidences (optional): numpy nd-array of dim (N,) containing the strength
 40 |             of each interest point
 41 |     -   scales (optional): A numpy array of shape (N,) containing the scale at each
 42 |             interest point
 43 |     -   orientations (optional): A numpy array of shape (N,) containing the orientation
 44 |             at each interest point
 45 |     """
 46 |     confidences, scales, orientations = None, None, None
 47 | 
 48 |     # dest = cv2.cornerHarris(image, 5, 3, 0.06)
 49 |     # ind = np.argwhere(dest > 0.01 * dest.max())
 50 |     # x = ind[:, 0]
 51 |     # y = ind[:, 1]
 52 | 
 53 |     ksize1 = 3
 54 |     ksize2 = 5
 55 |     sigma = 1.5
 56 |     k = 0.06
 57 |     threshold = 10000
 58 |     n = 1500
 59 | 
 60 |     # Compute the horizontal and vertical derivatives of the image I x and I y
 61 |     # by convolving the original image with derivatives of Gaussians
 62 |     ix = cv2.Sobel(image, -1, 1, 0, ksize=ksize1)
 63 |     iy = cv2.Sobel(image, -1, 0, 1, ksize=ksize1)
 64 | 
 65 |     # Compute the three images corresponding to the outer products of these gradients.
 66 |     # (The matrix A is symmetric, so only three entries are needed.)
 67 |     ixx = np.square(ix)
 68 |     iyy = np.square(iy)
 69 |     ixy = np.multiply(ix, iy)
 70 | 
 71 |     # Convolve each of these images with a larger Gaussian.
 72 |     gaussian = cv2.getGaussianKernel(ksize2, sigma)
 73 |     gxx = cv2.filter2D(ixx, -1, gaussian)
 74 |     gyy = cv2.filter2D(iyy, -1, gaussian)
 75 |     gxy = cv2.filter2D(ixy, -1, gaussian)
 76 | 
 77 |     # Compute a scalar interest measure using one of the formulas discussed above.
 78 |     # np.linalg.det(A) - k * (np.trace(A) ** 2)
 79 |     R = np.multiply(gxx, gyy) - np.square(gxy) - k * np.square(gxx + gyy)
 80 | 
 81 |     # Find local maxima above a certain threshold and report them as detected feature point locations.
 82 |     corners = []
 83 |     for row in range(R.shape[0]):
 84 |         for col in range(R.shape[1]):
 85 |             corners.append([R[row, col], col, row])
 86 |     # threshold
 87 |     corners = np.array(sorted(corners, key=lambda x:x[0], reverse=True)[:threshold])
 88 | 
 89 |     responses = corners[:, 0]
 90 |     x = corners[:, 1]
 91 |     y = corners[:, 2]
 92 | 
 93 |     # non-maxima suppress    
 94 |     points = np.vstack([y, x]).T
 95 |     size = len(x)
 96 |     radii = np.zeros(size)
 97 |     radii[0] = np.inf
 98 |     for i in range(1, size):
 99 |         curr_response = responses[i]
100 |         idx = i
101 |         # while idx < size - 1:
102 |         #     if responses[idx+1] * 1.1 > curr_response:
103 |         #         idx += 1
104 |         #     else:
105 |         #         break
106 |         radii[i] = np.min(np.sum(np.square(points[:idx] - points[i]), 1))
107 | 
108 |     x = np.array(x[np.argpartition(radii, -n)[-n:]])
109 |     y = np.array(y[np.argpartition(radii, -n)[-n:]])
110 | 
111 |     return x, y, confidences, scales, orientations
112 | 


--------------------------------------------------------------------------------
/project/proj2/code/student_sift.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | def get_features(image, x, y, feature_width, scales=None):
  6 |     """
  7 |     To start with, you might want to simply use normalized patches as your
  8 |     local feature. This is very simple to code and works OK. However, to get
  9 |     full credit you will need to implement the more effective SIFT descriptor
 10 |     (See Szeliski 4.1.2 or the original publications at
 11 |     http://www.cs.ubc.ca/~lowe/keypoints/)
 12 | 
 13 |     Your implementation does not need to exactly match the SIFT reference.
 14 |     Here are the key properties your (baseline) descriptor should have:
 15 |     (1) a 4x4 grid of cells, each feature_width/4. It is simply the
 16 |         terminology used in the feature literature to describe the spatial
 17 |         bins where gradient distributions will be described.
 18 |     (2) each cell should have a histogram of the local distribution of
 19 |         gradients in 8 orientations. Appending these histograms together will
 20 |         give you 4x4 x 8 = 128 dimensions.
 21 |     (3) Each feature should be normalized to unit length.
 22 | 
 23 |     You do not need to perform the interpolation in which each gradient
 24 |     measurement contributes to multiple orientation bins in multiple cells
 25 |     As described in Szeliski, a single gradient measurement creates a
 26 |     weighted contribution to the 4 nearest cells and the 2 nearest
 27 |     orientation bins within each cell, for 8 total contributions. This type
 28 |     of interpolation probably will help, though.
 29 | 
 30 |     You do not have to explicitly compute the gradient orientation at each
 31 |     pixel (although you are free to do so). You can instead filter with
 32 |     oriented filters (e.g. a filter that responds to edges with a specific
 33 |     orientation). All of your SIFT-like feature can be constructed entirely
 34 |     from filtering fairly quickly in this way.
 35 | 
 36 |     You do not need to do the normalize -> threshold -> normalize again
 37 |     operation as detailed in Szeliski and the SIFT paper. It can help, though.
 38 | 
 39 |     Another simple trick which can help is to raise each element of the final
 40 |     feature vector to some power that is less than one.
 41 | 
 42 |     Args:
 43 |     -   image: A numpy array of shape (m,n) or (m,n,c). can be grayscale or color, your choice
 44 |     -   x: A numpy array of shape (k,), the x-coordinates of interest points
 45 |     -   y: A numpy array of shape (k,), the y-coordinates of interest points
 46 |     -   feature_width: integer representing the local feature width in pixels.
 47 |             You can assume that feature_width will be a multiple of 4 (i.e. every
 48 |                 cell of your local SIFT-like feature will have an integer width
 49 |                 and height). This is the initial window size we examine around
 50 |                 each keypoint.
 51 |     -   scales: Python list or tuple if you want to detect and describe features
 52 |             at multiple scales
 53 | 
 54 |     You may also detect and describe features at particular orientations.
 55 | 
 56 |     Returns:
 57 |     -   fv: A numpy array of shape (k, feat_dim) representing a feature vector.
 58 |             "feat_dim" is the feature_dimensionality (e.g. 128 for standard SIFT).
 59 |             These are the computed features.
 60 |     """
 61 |     assert image.ndim == 2, 'Image must be grayscale'
 62 |     #############################################################################
 63 |     # TODO: YOUR CODE HERE                                                      #
 64 |     # If you choose to implement rotation invariance, enabling it should not    #
 65 |     # decrease your matching accuracy.                                          #
 66 |     #############################################################################
 67 | 
 68 |     # features = 0
 69 |     # octave_layers = 3
 70 |     # contrast_threshold = 0.04
 71 |     # edge_threshold = 10
 72 |     # sigma = 1.6
 73 |     # sift_init_sigma = 0.5
 74 | 
 75 |     # # init Gaussian
 76 |     # sig_diff = np.sqrt(
 77 |     #     max(sigma * sigma - sift_init_sigma * sift_init_sigma * 4, 0.01))
 78 |     # resized = cv2.resize(
 79 |     #     image, (2 * image.shape[1], 2 * image.shape[0]), interpolation=cv2.INTER_LINEAR)
 80 |     # base = cv2.GaussianBlur(resized, 0, sig_diff)
 81 | 
 82 |     # # number of octaves
 83 |     # # for( size_t i = 0; i < keypoints.size(); i++ )
 84 |     # # {
 85 |     # #     KeyPoint& kpt = keypoints[i];
 86 |     # #     float scale = 1.f/(float)(1 << -firstOctave);
 87 |     # #     kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255);
 88 |     # #     kpt.pt *= scale;
 89 |     # #     kpt.size *= scale;
 90 |     # # }
 91 |     # for i in range(len(x)):
 92 |     #     scale = 0.5
 93 |     #     octave = 0
 94 | 
 95 |     # octaves = int(round(np.log(min(base.shape)) / np.log(2) - 2)) + 1
 96 | 
 97 |     # # build Gaussian pyramid
 98 |     # sig_len = octave_layers + 3
 99 |     # sig = [sigma] * sig_len
100 |     # gpyr = [None] * octaves * sig_len
101 |     # k = np.pow(2, 1 / octave_layers)
102 |     # for i in range(1, octave_layers + 3):
103 |     #     sig_prev = np.pow(k, i-1) * sigma
104 |     #     sig_total = sig_prev * k
105 |     #     sig[i] = np.sqrt(sig_total * sig_total - sig_prev * sig_prev)
106 |     # for o in range(octaves):
107 |     #     for i in range(sig_len):
108 |     #         if o == 0 and i == 0:
109 |     #             gpyr[o * sig_len + i] = base
110 |     #         elif i == 0:
111 |     #             src = gpyr[(o - 1) * sig_len + octave_layers]
112 |     #             cv2.resize(src, (src.shape[1] // 2, src.shape[0] // 2),
113 |     #                        gpyr[o * sig_len + i], interpolation=cv2.INTER_NEAREST)
114 |     #         else:
115 |     #             src = gpyr[o * sig_len + i - 1]
116 |     #             gpyr[o * sig_len + i] = cv2.GaussianBlur(src, 0, sig[i])
117 | 
118 |     # # build DoG pyramid
119 |     # dogpyr = [None] * octaves * (octave_layers + 2)
120 |     # for a in range(len(dogpyr)):
121 |     #     o = a // (octave_layers + 2)
122 |     #     i = a % (octave_layers + 2)
123 |     #     src1 = gpyr[o*(octave_layers + 3) + i]
124 |     #     src2 = gpyr[o*(octave_layers + 3) + i + 1]
125 |     #     cv2.subtract(src1, src2, dogpyr[o*(octave_layers + 2) + i])
126 | 
127 |     # SIFT descriptor
128 |     ksize = 3
129 |     n_angles = 8
130 |     n_bins = 4
131 |     n_samples = n_bins * n_bins
132 |     n_pts = len(x)
133 |     threshold = 0.2
134 | 
135 |     fv = np.zeros((n_pts, n_angles * n_samples))
136 | 
137 |     # padding
138 |     image = np.pad(image, feature_width // 2)
139 | 
140 |     # histogram of oriented gradients
141 |     ix = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=ksize)
142 |     iy = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=ksize)
143 |     magnitude = np.sqrt(np.square(ix) + np.square(iy))
144 |     theta = np.arctan2(iy, ix)
145 | 
146 |     theta[theta > 1] = 2
147 |     theta[theta < -1] = -1
148 |     x = x.astype(int)
149 |     y = y.astype(int)
150 | 
151 |     for k in range(n_pts):
152 |         histogram = np.zeros((n_bins, n_bins, n_angles))
153 |         for j in range(feature_width):
154 |             for i in range(feature_width):
155 |                 curr = (y[k] + j, x[k] + i)
156 |                 if ix[curr] > 0:
157 |                     histogram[j // n_bins, i // n_bins,
158 |                               int(np.ceil(theta[curr] + 1))] += magnitude[curr]
159 |                 else:
160 |                     histogram[j // n_bins, i // n_bins,
161 |                               int(np.ceil(theta[curr] + 1 + n_angles//2))] += magnitude[curr]
162 |         fv[k, :] = np.reshape(histogram, (1, n_angles * n_samples))
163 | 
164 |     # normalize, threshold, normalize
165 |     tmp = np.sqrt(np.sum(np.power(fv, 2), 1))
166 |     fv_norm = np.divide(fv, np.tile(tmp, (fv.shape[1], 1)).T)
167 |     fv_norm[fv_norm > threshold] = threshold
168 |     tmp = np.sqrt(np.sum(np.power(fv_norm, 2), 1))
169 |     fv = np.divide(fv_norm, np.tile(tmp, (fv.shape[1], 1)).T)
170 |     return fv
171 | 


--------------------------------------------------------------------------------
/project/proj2/code/utils.py:
--------------------------------------------------------------------------------
  1 | # Please do not modify this file.
  2 | 
  3 | import numpy as np
  4 | import cv2
  5 | import pickle
  6 | 
  7 | 
  8 | def im2single(im):
  9 |     im = im.astype(np.float32) / 255
 10 | 
 11 |     return im
 12 | 
 13 | def single2im(im):
 14 |     im *= 255
 15 |     im = im.astype(np.uint8)
 16 | 
 17 |     return im
 18 | 
 19 | def rgb2gray(rgb):
 20 |     """Convert RGB image to grayscale
 21 |     Args:
 22 |     - rgb: A numpy array of shape (m,n,c) representing an RGB image
 23 |     Returns:
 24 |     - gray: A numpy array of shape (m,n) representing the corresponding grayscale image
 25 |     """
 26 |     return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
 27 | 
 28 | 
 29 | def load_image(path):
 30 |     """
 31 |     Args:
 32 |     - path: string representing a filepath to an image
 33 |     """
 34 |     return im2single(cv2.imread(path))[:, :, ::-1]
 35 | 
 36 | def save_image(path, im):
 37 |     """
 38 |     Args:
 39 |     - path:
 40 |     - im: A numpy array of shape
 41 |     """
 42 |     return cv2.imwrite(path, single2im(im.copy())[:, :, ::-1])
 43 | 
 44 | def cheat_interest_points(eval_file, scale_factor):
 45 |     """
 46 |     This function is provided for development and debugging but cannot be used in
 47 |     the final handin. It 'cheats' by generating interest points from known
 48 |     correspondences. It will only work for the 3 image pairs with known
 49 |     correspondences.
 50 | 
 51 |     Args:
 52 |     - eval_file: string representing the file path to the list of known correspondences
 53 |     - scale_factor: Python float representing the scale needed to map from the original
 54 |             image coordinates to the resolution being used for the current experiment.
 55 | 
 56 |     Returns:
 57 |     - x1: A numpy array of shape (k,) containing ground truth x-coordinates of imgA correspondence pts
 58 |     - y1: A numpy array of shape (k,) containing ground truth y-coordinates of imgA correspondence pts
 59 |     - x2: A numpy array of shape (k,) containing ground truth x-coordinates of imgB correspondence pts
 60 |     - y2: A numpy array of shape (k,) containing ground truth y-coordinates of imgB correspondence pts
 61 |     """
 62 |     with open(eval_file, 'rb') as f:
 63 |         d = pickle.load(f, encoding='latin1')
 64 | 
 65 |     return d['x1'] * scale_factor, d['y1'] * scale_factor, d['x2'] * scale_factor,\
 66 |                  d['y2'] * scale_factor
 67 | 
 68 | def hstack_images(imgA, imgB):
 69 |     """
 70 |     Stacks 2 images side-by-side and creates one combined image.
 71 | 
 72 |     Args:
 73 |     - imgA: A numpy array of shape (M,N,3) representing rgb image
 74 |     - imgB: A numpy array of shape (D,E,3) representing rgb image
 75 | 
 76 |     Returns:
 77 |     - newImg: A numpy array of shape (max(M,D), N+E, 3)
 78 |     """
 79 |     Height = max(imgA.shape[0], imgB.shape[0])
 80 |     Width  = imgA.shape[1] + imgB.shape[1]
 81 | 
 82 |     newImg = np.zeros((Height, Width, 3), dtype=imgA.dtype)
 83 |     newImg[:imgA.shape[0], :imgA.shape[1], :] = imgA
 84 |     newImg[:imgB.shape[0], imgA.shape[1]:, :] = imgB
 85 | 
 86 |     return newImg
 87 | 
 88 | def show_interest_points(img, X, Y):
 89 |     """
 90 |     Visualized interest points on an image with random colors
 91 | 
 92 |     Args:
 93 |     - img: A numpy array of shape (M,N,C)
 94 |     - X: A numpy array of shape (k,) containing x-locations of interest points
 95 |     - Y: A numpy array of shape (k,) containing y-locations of interest points
 96 | 
 97 |     Returns:
 98 |     - newImg: A numpy array of shape (M,N,C) showing the original image with
 99 |             colored circles at keypoints plotted on top of it
100 |     """
101 |     newImg = img.copy()
102 |     for x, y in zip(X.astype(int), Y.astype(int)):
103 |         cur_color = np.random.rand(3)
104 |         newImg = cv2.circle(newImg, (x, y), 10, cur_color, -1, cv2.LINE_AA)
105 | 
106 |     return newImg
107 | 
108 | def show_correspondence_circles(imgA, imgB, X1, Y1, X2, Y2):
109 |     """
110 |     Visualizes corresponding points between two images by plotting circles at
111 |     each correspondence location. Corresponding points will have the same random color.
112 | 
113 |     Args:
114 |     - imgA: A numpy array of shape (M,N,3)
115 |     - imgB: A numpy array of shape (D,E,3)
116 |     - x1: A numpy array of shape (k,) containing x-locations of keypoints in imgA
117 |     - y1: A numpy array of shape (k,) containing y-locations of keypoints in imgA
118 |     - x2: A numpy array of shape (j,) containing x-locations of keypoints in imgB
119 |     - y2: A numpy array of shape (j,) containing y-locations of keypoints in imgB
120 | 
121 |     Returns:
122 |     - newImg: A numpy array of shape (max(M,D), N+E, 3)
123 |     """
124 |     newImg = hstack_images(imgA, imgB)
125 |     shiftX = imgA.shape[1]
126 |     X1 = X1.astype(np.int)
127 |     Y1 = Y1.astype(np.int)
128 |     X2 = X2.astype(np.int)
129 |     Y2 = Y2.astype(np.int)
130 | 
131 |     for x1, y1, x2, y2 in zip(X1, Y1, X2, Y2):
132 |         cur_color = np.random.rand(3)
133 |         green = (0, 1, 0)
134 |         newImg = cv2.circle(newImg, (x1, y1), 10, cur_color, -1, cv2.LINE_AA)
135 |         newImg = cv2.circle(newImg, (x1, y1), 10, green, 2, cv2.LINE_AA)
136 |         newImg = cv2.circle(newImg, (x2+shiftX, y2), 10, cur_color, -1, cv2.LINE_AA)
137 |         newImg = cv2.circle(newImg, (x2+shiftX, y2), 10, green, 2, cv2.LINE_AA)
138 | 
139 |     return newImg
140 | 
141 | def show_correspondence_lines(imgA, imgB, X1, Y1, X2, Y2, line_colors=None):
142 |     """
143 |     Visualizes corresponding points between two images by drawing a line segment
144 |     between the two images for each (x1,y1) (x2,y2) pair.
145 | 
146 |     Args:
147 |     - imgA: A numpy array of shape (M,N,3)
148 |     - imgB: A numpy array of shape (D,E,3)
149 |     - x1: A numpy array of shape (k,) containing x-locations of keypoints in imgA
150 |     - y1: A numpy array of shape (k,) containing y-locations of keypoints in imgA
151 |     - x2: A numpy array of shape (j,) containing x-locations of keypoints in imgB
152 |     - y2: A numpy array of shape (j,) containing y-locations of keypoints in imgB
153 |     - line_colors: A numpy array of shape (N x 3) with colors of correspondence lines (optional)
154 | 
155 |     Returns:
156 |     - newImg: A numpy array of shape (max(M,D), N+E, 3)
157 |     """
158 |     newImg = hstack_images(imgA, imgB)
159 |     shiftX = imgA.shape[1]
160 |     X1 = X1.astype(np.int)
161 |     Y1 = Y1.astype(np.int)
162 |     X2 = X2.astype(np.int)
163 |     Y2 = Y2.astype(np.int)
164 | 
165 |     dot_colors = np.random.rand(len(X1), 3)
166 |     if line_colors is None:
167 |         line_colors = dot_colors
168 | 
169 |     for x1, y1, x2, y2, dot_color, line_color in zip(X1, Y1, X2, Y2, dot_colors,
170 |             line_colors):
171 |         newImg = cv2.circle(newImg, (x1, y1), 5, dot_color, -1)
172 |         newImg = cv2.circle(newImg, (x2+shiftX, y2), 5, dot_color, -1)
173 |         newImg = cv2.line(newImg, (x1, y1), (x2+shiftX, y2), line_color, 2,
174 |                                             cv2.LINE_AA)
175 |     return newImg
176 | 
177 | def show_ground_truth_corr(imgA, imgB, corr_file, show_lines=True):
178 |     """
179 |     Show the ground truth correspondeces
180 | 
181 |     Args:
182 |     - imgA: string, representing the filepath to the first image
183 |     - imgB: string, representing the filepath to the second image
184 |     - corr_file: filepath to pickle (.pkl) file containing the correspondences
185 |     - show_lines: boolean, whether to visualize the correspondences as line segments
186 |     """
187 |     imgA = load_image(imgA)
188 |     imgB = load_image(imgB)
189 |     with open(corr_file, 'rb') as f:
190 |         d = pickle.load(f)
191 |     if show_lines:
192 |         return show_correspondence_lines(imgA, imgB, d['x1'], d['y1'], d['x2'], d['y2'])
193 |     else:
194 |         # show circles
195 |         return show_correspondence_circles(imgA, imgB, d['x1'], d['y1'], d['x2'], d['y2'])
196 | 
197 | def load_corr_pkl_file(corr_fpath):
198 |     """ Load ground truth correspondences from a pickle (.pkl) file. """
199 |     with open(corr_fpath, 'rb') as f:
200 |         d = pickle.load(f, encoding='latin1')
201 |     x1 = d['x1'].squeeze()
202 |     y1 = d['y1'].squeeze()
203 |     x2 = d['x2'].squeeze()
204 |     y2 = d['y2'].squeeze()
205 | 
206 |     return x1,y1,x2,y2
207 | 
208 | 
209 | def evaluate_correspondence(imgA, imgB, corr_fpath, scale_factor, x1_est, y1_est,
210 |         x2_est, y2_est, confidences=None, num_req_matches=100):
211 |     """
212 |     Function to evaluate estimated correspondences against ground truth.
213 | 
214 |     The evaluation requires 100 matches to receive full credit
215 |     when num_req_matches=100 because we define accuracy as:
216 | 
217 |     Accuracy = (true_pos)/(true_pos+false_pos) * min(num_matches,num_req_matches)/num_req_matches
218 | 
219 |     Args:
220 |     - imgA: A numpy array of shape (M,N,C) representing a first image
221 |     - imgB: A numpy array of shape (M,N,C) representing a second image
222 |     - corr_fpath: string, representing a filepath to a .pkl file containing ground truth correspondences
223 |     - scale_factor: scale factor on the size of the images
224 |     - x1_est: A numpy array of shape (k,) containing estimated x-coordinates of imgA correspondence pts
225 |     - y1_est: A numpy array of shape (k,) containing estimated y-coordinates of imgA correspondence pts
226 |     - x2_est: A numpy array of shape (k,) containing estimated x-coordinates of imgB correspondence pts
227 |     - y2_est: A numpy array of shape (k,) containing estimated y-coordinates of imgB correspondence pts
228 |     - confidences: (optional) confidence values in the matches
229 |     """
230 |     if confidences is None:
231 |         confidences = np.random.rand(len(x1_est))
232 |         confidences /= np.max(confidences)
233 | 
234 |     x1_est = x1_est.squeeze() / scale_factor
235 |     y1_est = y1_est.squeeze() / scale_factor
236 |     x2_est = x2_est.squeeze() / scale_factor
237 |     y2_est = y2_est.squeeze() / scale_factor
238 | 
239 |     num_matches = x1_est.shape[0]
240 | 
241 |     x1,y1,x2,y2 = load_corr_pkl_file(corr_fpath)
242 | 
243 |     good_matches = [False for _ in range(len(x1_est))]
244 |     # array marking which GT pairs are already matched
245 |     matched = [False for _ in range(len(x1))]
246 | 
247 |     # iterate through estimated pairs in decreasing order of confidence
248 |     priority = np.argsort(-confidences)
249 |     for i in priority:
250 |         # print('Examining ({:4.0f}, {:4.0f}) to ({:4.0f}, {:4.0f})'.format(
251 |         #     x1_est[i], y1_est[i], x2_est[i], y2_est[i]))
252 |         cur_offset = np.asarray([x1_est[i]-x2_est[i], y1_est[i]-y2_est[i]])
253 |         # for each x1_est find nearest ground truth point in x1
254 |         dists = np.linalg.norm(np.vstack((x1_est[i]-x1, y1_est[i]-y1)), axis=0)
255 |         best_matches = np.argsort(dists)
256 | 
257 |         # find the best match that is not taken yet
258 |         for match_idx in best_matches:
259 |             if not matched[match_idx]:
260 |                 break
261 |         else:
262 |             continue
263 | 
264 |         # A match is good only if
265 |         # (1) An unmatched GT point exists within 150 pixels, and
266 |         # (2) GT correspondence offset is within 25 pixels of estimated
267 |         #     correspondence offset
268 |         gt_offset = np.asarray([x1[match_idx]-x2[match_idx],
269 |             y1[match_idx]-y2[match_idx]])
270 |         offset_dist = np.linalg.norm(cur_offset-gt_offset)
271 |         if (dists[match_idx] < 150.0) and (offset_dist < 25):
272 |             good_matches[i] = True
273 |             print('Correct')
274 |         else:
275 |             print('Incorrect')
276 | 
277 |     print('You found {}/{} required matches'.format(num_matches, num_req_matches))
278 |     accuracy = np.mean(good_matches) * min(num_matches, num_req_matches)*1./num_req_matches
279 |     print('Accuracy = {:f}'.format(accuracy))
280 |     green = np.asarray([0, 1, 0], dtype=float)
281 |     red = np.asarray([1, 0, 0], dtype=float)
282 |     line_colors = np.asarray([green if m else red for m in good_matches])
283 | 
284 |     return accuracy, show_correspondence_lines(imgA, imgB,
285 |                                                x1_est*scale_factor, y1_est*scale_factor,
286 |                                                x2_est*scale_factor, y2_est*scale_factor,
287 |                                                line_colors)
288 | 


--------------------------------------------------------------------------------
/project/proj2/data/Episcopal Gaudi/3743214471_1b5bbfda98_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Episcopal Gaudi/3743214471_1b5bbfda98_o.jpg


--------------------------------------------------------------------------------
/project/proj2/data/Episcopal Gaudi/4386465943_8cf9776378_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Episcopal Gaudi/4386465943_8cf9776378_o.jpg


--------------------------------------------------------------------------------
/project/proj2/data/Mount Rushmore/9021235130_7c2acd9554_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Mount Rushmore/9021235130_7c2acd9554_o.jpg


--------------------------------------------------------------------------------
/project/proj2/data/Mount Rushmore/9318872612_a255c874fb_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Mount Rushmore/9318872612_a255c874fb_o.jpg


--------------------------------------------------------------------------------
/project/proj2/data/Notre Dame/4191453057_c86028ce1f_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Notre Dame/4191453057_c86028ce1f_o.jpg


--------------------------------------------------------------------------------
/project/proj2/data/Notre Dame/921919841_a30df938f2_o.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/data/Notre Dame/921919841_a30df938f2_o.jpg


--------------------------------------------------------------------------------
/project/proj2/results/circles0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/circles0.jpg


--------------------------------------------------------------------------------
/project/proj2/results/circles1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/circles1.jpg


--------------------------------------------------------------------------------
/project/proj2/results/circles2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/circles2.jpg


--------------------------------------------------------------------------------
/project/proj2/results/eval.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/eval.jpg


--------------------------------------------------------------------------------
/project/proj2/results/lines0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/lines0.jpg


--------------------------------------------------------------------------------
/project/proj2/results/lines1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/lines1.jpg


--------------------------------------------------------------------------------
/project/proj2/results/lines2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/lines2.jpg


--------------------------------------------------------------------------------
/project/proj2/results/vis_circles.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/vis_circles.jpg


--------------------------------------------------------------------------------
/project/proj2/results/vis_lines.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj2/results/vis_lines.jpg


--------------------------------------------------------------------------------
/project/proj3/Assigment3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj3/Assigment3.pdf


--------------------------------------------------------------------------------
/project/proj3/code/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/project/proj3/code/student_code.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import pickle
  4 | from utils import load_image, load_image_gray
  5 | import cyvlfeat as vlfeat
  6 | import sklearn.metrics.pairwise as sklearn_pairwise
  7 | from sklearn.svm import LinearSVC
  8 | from IPython.core.debugger import set_trace
  9 | from cyvlfeat.sift.dsift import dsift
 10 | from cyvlfeat.kmeans import kmeans
 11 | from time import time
 12 | from joblib import Parallel, delayed, parallel_backend
 13 | 
 14 | 
 15 | def get_tiny_images(image_paths):
 16 |     """
 17 |     This feature is inspired by the simple tiny images used as features in
 18 |     80 million tiny images: a large dataset for non-parametric object and
 19 |     scene recognition. A. Torralba, R. Fergus, W. T. Freeman. IEEE
 20 |     Transactions on Pattern Analysis and Machine Intelligence, vol.30(11),
 21 |     pp. 1958-1970, 2008. http://groups.csail.mit.edu/vision/TinyImages/
 22 | 
 23 |     To build a tiny image feature, simply resize the original image to a very
 24 |     small square resolution, e.g. 16x16. You can either resize the images to
 25 |     square while ignoring their aspect ratio or you can crop the center
 26 |     square portion out of each image. Making the tiny images zero mean and
 27 |     unit length (normalizing them) will increase performance modestly.
 28 | 
 29 |     Useful functions:
 30 |     -   cv2.resize
 31 |     -   use load_image(path) to load a RGB images and load_image_gray(path) to
 32 |         load grayscale images
 33 | 
 34 |     Args:
 35 |     -   image_paths: list of N elements containing image paths
 36 | 
 37 |     Returns:
 38 |     -   feats: N x d numpy array of resized and then vectorized tiny images
 39 |               e.g. if the images are resized to 16x16, d would be 256
 40 |     """
 41 |     # parameter
 42 |     width = 16
 43 | 
 44 |     N = len(image_paths)
 45 |     d = width * width
 46 |     # dummy feats variable
 47 |     feats = np.zeros((N, d))
 48 |     for i in range(N):
 49 |         image = load_image_gray(image_paths[i])
 50 |         image = cv2.resize(image, (width, width),
 51 |                            interpolation=cv2.INTER_LINEAR)
 52 |         image = np.reshape(image, (1, d))
 53 |         image -= np.mean(image)
 54 |         image_normalized = image / np.std(image)
 55 |         feats[i, :] = image_normalized
 56 |     return feats
 57 | 
 58 | 
 59 | def build_vocabulary(image_paths, vocab_size):
 60 |     """
 61 |     This function will sample SIFT descriptors from the training images,
 62 |     cluster them with kmeans, and then return the cluster centers.
 63 | 
 64 |     Useful functions:
 65 |     -   Use load_image(path) to load RGB images and load_image_gray(path) to load
 66 |             grayscale images
 67 |     -   frames, descriptors = vlfeat.sift.dsift(img)
 68 |           http://www.vlfeat.org/matlab/vl_dsift.html
 69 |             -  frames is a N x 2 matrix of locations, which can be thrown away
 70 |             here (but possibly used for extra credit in get_bags_of_sifts if
 71 |             you're making a "spatial pyramid").
 72 |             -  descriptors is a N x 128 matrix of SIFT features
 73 |           Note: there are step, bin size, and smoothing parameters you can
 74 |           manipulate for dsift(). We recommend debugging with the 'fast'
 75 |           parameter. This approximate version of SIFT is about 20 times faster to
 76 |           compute. Also, be sure not to use the default value of step size. It
 77 |           will be very slow and you'll see relatively little performance gain
 78 |           from extremely dense sampling. You are welcome to use your own SIFT
 79 |           feature code! It will probably be slower, though.
 80 |     -   cluster_centers = vlfeat.kmeans.kmeans(X, K)
 81 |             http://www.vlfeat.org/matlab/vl_kmeans.html
 82 |               -  X is a N x d numpy array of sampled SIFT features, where N is
 83 |                  the number of features sampled. N should be pretty large!
 84 |               -  K is the number of clusters desired (vocab_size)
 85 |                  cluster_centers is a K x d matrix of cluster centers. This is
 86 |                  your vocabulary.
 87 | 
 88 |     Args:
 89 |     -   image_paths: list of image paths.
 90 |     -   vocab_size: size of vocabulary
 91 | 
 92 |     Returns:
 93 |     -   vocab: This is a vocab_size x d numpy array (vocabulary). Each row is a
 94 |         cluster center / visual word
 95 |     """
 96 |     # Load images from the training set. To save computation time, you don't
 97 |     # necessarily need to sample from all images, although it would be better
 98 |     # to do so. You can randomly sample the descriptors from each image to save
 99 |     # memory and speed up the clustering. Or you can simply call vl_dsift with
100 |     # a large step size here, but a smaller step size in get_bags_of_sifts.
101 |     #
102 |     # For each loaded image, get some SIFT features. You don't have to get as
103 |     # many SIFT features as you will in get_bags_of_sift, because you're only
104 |     # trying to get a representative sample here.
105 |     #
106 |     # Once you have tens of thousands of SIFT features from many training
107 |     # images, cluster them with kmeans. The resulting centroids are now your
108 |     # visual word vocabulary.
109 | 
110 |     # length of the SIFT descriptors that you are going to compute.
111 |     dim = 128
112 |     vocab = np.zeros((vocab_size, dim))
113 | 
114 |     # parameters
115 |     step = 10
116 |     sample = 200
117 | 
118 |     N = len(image_paths)
119 |     features = np.zeros((sample * N, dim))
120 | 
121 |     def parallel_func(image_path, sample):
122 |         image = load_image_gray(image_path)
123 |         _, descriptors = vlfeat.sift.dsift(image, fast=True, step=step)
124 |         sample_idx = np.random.permutation(descriptors.shape[0])
125 |         return descriptors[sample_idx[:sample], :]
126 | 
127 |     results = Parallel(n_jobs=-1)(delayed(parallel_func)(image_path, sample)
128 |                                   for image_path in image_paths)
129 | 
130 |     idx = 0
131 |     for result in results:
132 |         features[idx:sample + idx, :] = result
133 |         idx += sample
134 | 
135 |     vocab = vlfeat.kmeans.kmeans(features, vocab_size)
136 | 
137 |     return vocab
138 | 
139 | 
140 | def get_bags_of_sifts(image_paths, vocab_filename):
141 |     """
142 |     This feature representation is described in the handout, lecture
143 |     materials, and Szeliski chapter 14.
144 |     You will want to construct SIFT features here in the same way you
145 |     did in build_vocabulary() (except for possibly changing the sampling
146 |     rate) and then assign each local feature to its nearest cluster center
147 |     and build a histogram indicating how many times each cluster was used.
148 |     Don't forget to normalize the histogram, or else a larger image with more
149 |     SIFT features will look very different from a smaller version of the same
150 |     image.
151 | 
152 |     Useful functions:
153 |     -   Use load_image(path) to load RGB images and load_image_gray(path) to load
154 |             grayscale images
155 |     -   frames, descriptors = vlfeat.sift.dsift(img)
156 |             http://www.vlfeat.org/matlab/vl_dsift.html
157 |           frames is a M x 2 matrix of locations, which can be thrown away here
158 |             (but possibly used for extra credit in get_bags_of_sifts if you're
159 |             making a "spatial pyramid").
160 |           descriptors is a M x 128 matrix of SIFT features
161 |             note: there are step, bin size, and smoothing parameters you can
162 |             manipulate for dsift(). We recommend debugging with the 'fast'
163 |             parameter. This approximate version of SIFT is about 20 times faster
164 |             to compute. Also, be sure not to use the default value of step size.
165 |             It will be very slow and you'll see relatively little performance
166 |             gain from extremely dense sampling. You are welcome to use your own
167 |             SIFT feature code! It will probably be slower, though.
168 |     -   assignments = vlfeat.kmeans.kmeans_quantize(data, vocab)
169 |             finds the cluster assigments for features in data
170 |               -  data is a M x d matrix of image features
171 |               -  vocab is the vocab_size x d matrix of cluster centers
172 |               (vocabulary)
173 |               -  assignments is a Mx1 array of assignments of feature vectors to
174 |               nearest cluster centers, each element is an integer in
175 |               [0, vocab_size)
176 | 
177 |     Args:
178 |     -   image_paths: paths to N images
179 |     -   vocab_filename: Path to the precomputed vocabulary.
180 |             This function assumes that vocab_filename exists and contains an
181 |             vocab_size x 128 ndarray 'vocab' where each row is a kmeans centroid
182 |             or visual word. This ndarray is saved to disk rather than passed in
183 |             as a parameter to avoid recomputing the vocabulary every run.
184 | 
185 |     Returns:
186 |     -   image_feats: N x d matrix, where d is the dimensionality of the
187 |             feature representation. In this case, d will equal the number of
188 |             clusters or equivalently the number of entries in each image's
189 |             histogram (vocab_size) below.
190 |     """
191 |     # load vocabulary
192 |     with open(vocab_filename, 'rb') as f:
193 |         vocab = pickle.load(f)
194 | 
195 |     # parameter
196 |     step = 10
197 | 
198 |     N = len(image_paths)
199 |     vocab_size = vocab.shape[0]
200 | 
201 |     # dummy features variable
202 |     feats = np.zeros((N, vocab_size))
203 | 
204 |     def parallel_func(i, image_paths, step, vocab, vocab_size):
205 |         image = load_image_gray(image_paths[i])
206 |         _, descriptors = vlfeat.sift.dsift(image, fast=True, step=step)
207 |         assignments = vlfeat.kmeans.kmeans_quantize(
208 |             descriptors.astype('float64'), vocab)
209 |         bags_of_sifts = np.zeros((1, vocab_size))
210 |         for assignment in assignments:
211 |             bags_of_sifts[0, assignment] += 1
212 |         return bags_of_sifts / np.linalg.norm(bags_of_sifts)
213 | 
214 |     result = Parallel(n_jobs=-1)(delayed(parallel_func)(i, image_paths, step, vocab, vocab_size)
215 |                                  for i in range(N))
216 | 
217 |     for i in range(N):
218 |         feats[i, :] = result[i]
219 | 
220 |     return feats
221 | 
222 | 
223 | def nearest_neighbor_classify(train_image_feats, train_labels, test_image_feats,
224 |                               metric='euclidean'):
225 |     """
226 |     This function will predict the category for every test image by finding
227 |     the training image with most similar features. Instead of 1 nearest
228 |     neighbor, you can vote based on k nearest neighbors which will increase
229 |     performance (although you need to pick a reasonable value for k).
230 | 
231 |     Useful functions:
232 |     -   D = sklearn_pairwise.pairwise_distances(X, Y)
233 |           computes the distance matrix D between all pairs of rows in X and Y.
234 |             -  X is a N x d numpy array of d-dimensional features arranged along
235 |             N rows
236 |             -  Y is a M x d numpy array of d-dimensional features arranged along
237 |             N rows
238 |             -  D is a N x M numpy array where d(i, j) is the distance between row
239 |             i of X and row j of Y
240 | 
241 |     Args:
242 |     -   train_image_feats:  N x d numpy array, where d is the dimensionality of
243 |             the feature representation
244 |     -   train_labels: N element list, where each entry is a string indicating
245 |             the ground truth category for each training image
246 |     -   test_image_feats: M x d numpy array, where d is the dimensionality of the
247 |             feature representation. You can assume N = M, unless you have changed
248 |             the starter code
249 |     -   metric: (optional) metric to be used for nearest neighbor.
250 |             Can be used to select different distance functions. The default
251 |             metric, 'euclidean' is fine for tiny images. 'chi2' tends to work
252 |             well for histograms
253 | 
254 |     Returns:
255 |     -   test_labels: M element list, where each entry is a string indicating the
256 |             predicted category for each testing image
257 |     """
258 | 
259 |     # parameter
260 |     k = 5
261 | 
262 |     N = len(train_labels)
263 |     M = test_image_feats.shape[0]
264 |     test_labels = [None] * M
265 | 
266 |     # distance
267 |     D = sklearn_pairwise.pairwise_distances(
268 |         test_image_feats, train_image_feats,  metric, n_jobs=2)
269 | 
270 |     string_int_dict = {}
271 |     int_to_string_dict = {}
272 |     int_train_labels = [0] * N
273 |     counter = 0
274 |     for i in range(N):
275 |         key = train_labels[i]
276 |         if key in string_int_dict:
277 |             _id = string_int_dict[key]
278 |         else:
279 |             _id = counter
280 |             counter += 1
281 |             string_int_dict[key] = _id
282 |             int_to_string_dict[_id] = train_labels[i]
283 |         int_train_labels[i] = _id
284 | 
285 |     # find knn
286 |     for i in range(M):
287 |         knn = np.argpartition(D[i, :], k, axis=0)[:k]
288 |         knn_labels = np.take(int_train_labels, knn)
289 |         test_labels[i] = int_to_string_dict[np.argmax(np.bincount(knn_labels))]
290 | 
291 |     return test_labels
292 | 
293 | 
294 | def svm_classify(train_image_feats, train_labels, test_image_feats):
295 |     """
296 |     This function will train a linear SVM for every category (i.e. one vs all)
297 |     and then use the learned linear classifiers to predict the category of
298 |     every test image. Every test feature will be evaluated with all 15 SVMs
299 |     and the most confident SVM will "win". Confidence, or distance from the
300 |     margin, is W*X + B where '*' is the inner product or dot product and W and
301 |     B are the learned hyperplane parameters.
302 | 
303 |     Useful functions:
304 |     -   sklearn LinearSVC
305 |           http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
306 |     -   svm.fit(X, y)
307 |     -   set(l)
308 | 
309 |     Args:
310 |     -   train_image_feats:  N x d numpy array, where d is the dimensionality of
311 |             the feature representation
312 |     -   train_labels: N element list, where each entry is a string indicating the
313 |             ground truth category for each training image
314 |     -   test_image_feats: M x d numpy array, where d is the dimensionality of the
315 |             feature representation. You can assume N = M, unless you have changed
316 |             the starter code
317 |     Returns:
318 |     -   test_labels: M element list, where each entry is a string indicating the
319 |             predicted category for each testing image
320 |     """
321 |     # categories
322 |     categories = list(set(train_labels))
323 | 
324 |     # construct 1 vs all SVMs for each category
325 |     svms = {cat: LinearSVC(random_state=0, tol=1e-3, loss='hinge', C=5)
326 |             for cat in categories}
327 | 
328 |     test_labels = []
329 | 
330 |     N = train_image_feats.shape[0]
331 |     M = test_image_feats.shape[0]
332 |     C = len(categories)
333 |     confidence_scores = np.zeros((M, C))
334 | 
335 |     for i in range(C):
336 |         cat = categories[i]
337 |         y_train = np.zeros(N)
338 |         for j in range(N):
339 |             if cat == train_labels[j]:
340 |                 y_train[j] = 1
341 |         svms[cat].fit(train_image_feats, y_train)
342 |         confidence_scores[:, i] = svms[cat].decision_function(test_image_feats)
343 | 
344 |     for i in range(M):
345 |         test_labels.append(categories[np.argmax(confidence_scores[i, :])])
346 | 
347 |     return test_labels
348 | 


--------------------------------------------------------------------------------
/project/proj3/code/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | from sklearn.metrics import confusion_matrix
 4 | import matplotlib.pyplot as plt
 5 | import os.path as osp
 6 | from glob import glob
 7 | from random import shuffle
 8 | 
 9 | 
10 | def im2single(im):
11 |     im = im.astype(np.float32) / 255
12 |     return im
13 | 
14 | 
15 | def single2im(im):
16 |     im *= 255
17 |     im = im.astype(np.uint8)
18 |     return im
19 | 
20 | 
21 | def load_image(path):
22 |     return im2single(cv2.imread(path))[:, :, ::-1]
23 | 
24 | 
25 | def load_image_gray(path):
26 |     img = load_image(path)
27 |     return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
28 | 
29 | 
30 | def get_image_paths(data_path, categories, num_train_per_cat=100, fmt='jpg'):
31 |     """
32 |     This function returns lists containing the file path for each train
33 |     and test image, as well as listss with the label of each train and
34 |     test image. By default all four of these arrays will have 1500
35 |     elements where each element is a string.
36 |     :param data_path: path to the 'test' and 'train' directories
37 |     :param categories: list of category names
38 |     :param num_train_per_cat: max number of training images to use (per category)
39 |     :param fmt: file extension of the images
40 |     :return: lists: train_image_paths, test_image_paths, train_labels, test_labels
41 |     """
42 |     train_image_paths = []
43 |     test_image_paths = []
44 |     train_labels = []
45 |     test_labels = []
46 | 
47 |     for cat in categories:
48 |         # train
49 |         pth = osp.join(data_path, 'train', cat, '*.{:s}'.format(fmt))
50 |         pth = glob(pth)
51 |         shuffle(pth)
52 |         pth = pth[:num_train_per_cat]
53 |         train_image_paths.extend(pth)
54 |         train_labels.extend([cat]*len(pth))
55 | 
56 |         # test
57 |         pth = osp.join(data_path, 'test', cat, '*.{:s}'.format(fmt))
58 |         pth = glob(pth)
59 |         shuffle(pth)
60 |         pth = pth[:num_train_per_cat]
61 |         test_image_paths.extend(pth)
62 |         test_labels.extend([cat]*len(pth))
63 | 
64 |     return train_image_paths, test_image_paths, train_labels, test_labels
65 | 
66 | 
67 | def show_results(train_image_paths, test_image_paths, train_labels, test_labels,
68 |                  categories, abbr_categories, predicted_categories):
69 |     """
70 |     shows the results
71 |     :param train_image_paths:
72 |     :param test_image_paths:
73 |     :param train_labels:
74 |     :param test_labels:
75 |     :param categories:
76 |     :param abbr_categories:
77 |     :param predicted_categories:
78 |     :return:
79 |     """
80 |     cat2idx = {cat: idx for idx, cat in enumerate(categories)}
81 | 
82 |     # confusion matrix
83 |     y_true = [cat2idx[cat] for cat in test_labels]
84 |     y_pred = [cat2idx[cat] for cat in predicted_categories]
85 |     cm = confusion_matrix(y_true, y_pred)
86 |     cm = cm.astype(np.float) / cm.sum(axis=1)[:, np.newaxis]
87 |     acc = np.mean(np.diag(cm))
88 |     plt.figure()
89 |     plt.imshow(cm, interpolation='nearest', cmap=plt.cm.get_cmap('jet'))
90 |     plt.title('Confusion matrix. Mean of diagonal = {:4.2f}%'.format(acc*100))
91 |     tick_marks = np.arange(len(categories))
92 |     plt.tight_layout()
93 |     plt.xticks(tick_marks, abbr_categories, rotation=45)
94 |     plt.yticks(tick_marks, categories)
95 | 


--------------------------------------------------------------------------------
/project/proj3/code/vocab.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj3/code/vocab.pkl


--------------------------------------------------------------------------------
/project/proj4/Assignment4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj4/Assignment4.pdf


--------------------------------------------------------------------------------
/project/proj4/code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj4/code/__init__.py


--------------------------------------------------------------------------------
/project/proj4/code/student_code.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cyvlfeat as vlfeat
  3 | from utils import *
  4 | import os.path as osp
  5 | from glob import glob
  6 | from random import shuffle
  7 | from sklearn.svm import LinearSVC
  8 | 
  9 | 
 10 | def get_positive_features(train_path_pos, feature_params):
 11 |     """
 12 |     This function should return all positive training examples (faces) from
 13 |     36x36 images in 'train_path_pos'. Each face should be converted into a
 14 |     HoG template according to 'feature_params'.
 15 | 
 16 |     Useful functions:
 17 |     -   vlfeat.hog.hog(im, cell_size): computes HoG features
 18 | 
 19 |     Args:
 20 |     -   train_path_pos: (string) This directory contains 36x36 face images
 21 |     -   feature_params: dictionary of HoG feature computation parameters.
 22 |         You can include various parameters in it. Two defaults are:
 23 |             -   template_size: (default 36) The number of pixels spanned by
 24 |             each train/test template.
 25 |             -   hog_cell_size: (default 6) The number of pixels in each HoG
 26 |             cell. template size should be evenly divisible by hog_cell_size.
 27 |             Smaller HoG cell sizes tend to work better, but they make things
 28 |             slower because the feature dimensionality increases and more
 29 |             importantly the step size of the classifier decreases at test time
 30 |             (although you don't have to make the detector step size equal a
 31 |             single HoG cell).
 32 | 
 33 |     Returns:
 34 |     -   feats: N x D matrix where N is the number of faces and D is the template
 35 |             dimensionality, which would be (feature_params['template_size'] /
 36 |             feature_params['hog_cell_size'])^2 * 31 if you're using the default
 37 |             hog parameters.
 38 |     """
 39 |     # params for HOG computation
 40 |     win_size = feature_params.get('template_size', 36)
 41 |     cell_size = feature_params.get('hog_cell_size', 6)
 42 | 
 43 |     positive_files = glob(osp.join(train_path_pos, '*.jpg'))
 44 | 
 45 |     ###########################################################################
 46 |     #                           TODO: YOUR CODE HERE                          #
 47 |     ###########################################################################
 48 | 
 49 |     n_cell = np.ceil(win_size/cell_size).astype('int')
 50 |     feats = np.zeros((len(positive_files), n_cell*n_cell*31))
 51 | 
 52 |     for i in range(len(positive_files)):
 53 |         im = load_image_gray(positive_files[i])
 54 |         feats[i, :] = vlfeat.hog.hog(im, cell_size).ravel()
 55 | 
 56 |     ###########################################################################
 57 |     #                             END OF YOUR CODE                            #
 58 |     ###########################################################################
 59 | 
 60 |     return feats
 61 | 
 62 | 
 63 | def get_random_negative_features(non_face_scn_path, feature_params, num_samples):
 64 |     """
 65 |     This function should return negative training examples (non-faces) from any
 66 |     images in 'non_face_scn_path'. Images should be loaded in grayscale because
 67 |     the positive training data is only available in grayscale (use
 68 |     load_image_gray()).
 69 | 
 70 |     Useful functions:
 71 |     -   vlfeat.hog.hog(im, cell_size): computes HoG features
 72 | 
 73 |     Args:
 74 |     -   non_face_scn_path: string. This directory contains many images which
 75 |             have no faces in them.
 76 |     -   feature_params: dictionary of HoG feature computation parameters. See
 77 |             the documentation for get_positive_features() for more information.
 78 |     -   num_samples: number of negatives to be mined. It is not important for
 79 |             the function to find exactly 'num_samples' non-face features. For
 80 |             example, you might try to sample some number from each image, but
 81 |             some images might be too small to find enough.
 82 | 
 83 |     Returns:
 84 |     -   N x D matrix where N is the number of non-faces and D is the feature
 85 |             dimensionality, which would be (feature_params['template_size'] /
 86 |             feature_params['hog_cell_size'])^2 * 31 if you're using the default
 87 |             hog parameters.
 88 |     """
 89 |     # params for HOG computation
 90 |     win_size = feature_params.get('template_size', 36)
 91 |     cell_size = feature_params.get('hog_cell_size', 6)
 92 | 
 93 |     negative_files = glob(osp.join(non_face_scn_path, '*.jpg'))
 94 | 
 95 |     ###########################################################################
 96 |     #                           TODO: YOUR CODE HERE                          #
 97 |     ###########################################################################
 98 | 
 99 |     n_cell = np.ceil(win_size/cell_size).astype('int')
100 |     scales = [1, 0.9, 0.8, 0.7, 0.6, 0.5]
101 |     feats = []
102 |     samples_per_image = int(num_samples / len(negative_files) / len(scales))
103 |     for i in range(len(negative_files)):
104 |         im = load_image_gray(negative_files[i])
105 |         for scale in scales:
106 |             hog = vlfeat.hog.hog(cv2.resize(
107 |                 im, None, fx=scale, fy=scale), cell_size)
108 |             for j in range(samples_per_image):
109 |                 r1 = np.random.randint(hog.shape[0])
110 |                 r2 = np.random.randint(hog.shape[1])
111 |                 if (r1 + n_cell < hog.shape[0]) and (r2 + n_cell < hog.shape[1]):
112 |                     feats.append(
113 |                         hog[r1:r1+n_cell, r2:r2+n_cell, :].ravel())
114 | 
115 |     feats = np.array(feats)
116 |     print(feats.shape)
117 | 
118 |     ###########################################################################
119 |     #                             END OF YOUR CODE                            #
120 |     ###########################################################################
121 | 
122 |     return feats
123 | 
124 | 
125 | def train_classifier(features_pos, features_neg, C):
126 |     """
127 |     This function trains a linear SVM classifier on the positive and negative
128 |     features obtained from the previous steps. We fit a model to the features
129 |     and return the svm object.
130 | 
131 |     Args:
132 |     -   features_pos: N X D array. This contains an array of positive features
133 |             extracted from get_positive_feats().
134 |     -   features_neg: M X D array. This contains an array of negative features
135 |             extracted from get_negative_feats().
136 | 
137 |     Returns:
138 |     -   svm: LinearSVC object. This returns a SVM classifier object trained
139 |             on the positive and negative features.
140 |     """
141 |     ###########################################################################
142 |     #                           TODO: YOUR CODE HERE                          #
143 |     ###########################################################################
144 | 
145 |     svm = LinearSVC(tol=1e-5, loss='hinge', C=C)
146 |     feats = np.vstack((features_pos, features_neg))
147 |     labels = np.hstack(
148 |         (np.ones(len(features_pos)), -np.ones(len(features_neg))))
149 |     svm.fit(feats, labels)
150 | 
151 |     ###########################################################################
152 |     #                             END OF YOUR CODE                            #
153 |     ###########################################################################
154 | 
155 |     return svm
156 | 
157 | 
158 | def mine_hard_negs(non_face_scn_path, svm, feature_params):
159 |     """
160 |     This function is pretty similar to get_random_negative_features(). The only
161 |     difference is that instead of returning all the extracted features, you only
162 |     return the features with false-positive prediction.
163 | 
164 |     Useful functions:
165 |     -   vlfeat.hog.hog(im, cell_size): computes HoG features
166 |     -   svm.predict(feat): predict features
167 | 
168 |     Args:
169 |     -   non_face_scn_path: string. This directory contains many images which
170 |             have no faces in them.
171 |     -   feature_params: dictionary of HoG feature computation parameters. See
172 |             the documentation for get_positive_features() for more information.
173 |     -   svm: LinearSVC object
174 | 
175 |     Returns:
176 |     -   N x D matrix where N is the number of non-faces which are
177 |             false-positive and D is the feature dimensionality.
178 |     """
179 | 
180 |     # params for HOG computation
181 |     win_size = feature_params.get('template_size', 36)
182 |     cell_size = feature_params.get('hog_cell_size', 6)
183 | 
184 |     negative_files = glob(osp.join(non_face_scn_path, '*.jpg'))
185 | 
186 |     ###########################################################################
187 |     #                           TODO: YOUR CODE HERE                          #
188 |     ###########################################################################
189 | 
190 |     num_samples = 5000
191 |     feats = get_random_negative_features(
192 |         non_face_scn_path, feature_params, num_samples)
193 |     feats = feats[svm.predict(feats) > 0]
194 | 
195 |     ###########################################################################
196 |     #                             END OF YOUR CODE                            #
197 |     ###########################################################################
198 | 
199 |     return feats
200 | 
201 | 
202 | def run_detector(test_scn_path, svm, feature_params, verbose=False):
203 |     """
204 |     This function returns detections on all of the images in a given path. You
205 |     will want to use non-maximum suppression on your detections or your
206 |     performance will be poor (the evaluation counts a duplicate detection as
207 |     wrong). The non-maximum suppression is done on a per-image basis. The
208 |     starter code includes a call to a provided non-max suppression function.
209 | 
210 |     The placeholder version of this code will return random bounding boxes in
211 |     each test image. It will even do non-maximum suppression on the random
212 |     bounding boxes to give you an example of how to call the function.
213 | 
214 |     Your actual code should convert each test image to HoG feature space with
215 |     a _single_ call to vlfeat.hog.hog() for each scale. Then step over the HoG
216 |     cells, taking groups of cells that are the same size as your learned
217 |     template, and classifying them. If the classification is above some
218 |     confidence, keep the detection and then pass all the detections for an
219 |     image to non-maximum suppression. For your initial debugging, you can
220 |     operate only at a single scale and you can skip calling non-maximum
221 |     suppression. Err on the side of having a low confidence threshold (even
222 |     less than zero) to achieve high enough recall.
223 | 
224 |     Args:
225 |     -   test_scn_path: (string) This directory contains images which may or
226 |             may not have faces in them. This function should work for the
227 |             MIT+CMU test set but also for any other images (e.g. class photos).
228 |     -   svm: A trained sklearn.svm.LinearSVC object
229 |     -   feature_params: dictionary of HoG feature computation parameters.
230 |         You can include various parameters in it. Two defaults are:
231 |             -   template_size: (default 36) The number of pixels spanned by
232 |             each train/test template.
233 |             -   hog_cell_size: (default 6) The number of pixels in each HoG
234 |             cell. template size should be evenly divisible by hog_cell_size.
235 |             Smaller HoG cell sizes tend to work better, but they make things
236 |             slower because the feature dimensionality increases and more
237 |             importantly the step size of the classifier decreases at test time.
238 |     -   verbose: prints out debug information if True
239 | 
240 |     Returns:
241 |     -   bboxes: N x 4 numpy array. N is the number of detections.
242 |             bboxes(i,:) is [x_min, y_min, x_max, y_max] for detection i.
243 |     -   confidences: (N, ) size numpy array. confidences(i) is the real-valued
244 |             confidence of detection i.
245 |     -   image_ids: List with N elements. image_ids[i] is the image file name
246 |             for detection i. (not the full path, just 'albert.jpg')
247 |     """
248 |     im_filenames = sorted(glob(osp.join(test_scn_path, '*.jpg')))
249 |     bboxes = np.empty((0, 4))
250 |     confidences = np.empty(0)
251 |     image_ids = []
252 | 
253 |     # number of top detections to feed to NMS
254 |     topk = 20
255 | 
256 |     # params for HOG computation
257 |     win_size = feature_params.get('template_size', 36)
258 |     cell_size = feature_params.get('hog_cell_size', 6)
259 |     template_size = int(win_size / cell_size)
260 | 
261 |     for idx, im_filename in enumerate(im_filenames):
262 |         print('Detecting faces in {:s}'.format(im_filename))
263 |         im = load_image_gray(im_filename)
264 |         im_id = osp.split(im_filename)[-1]
265 |         im_shape = im.shape
266 |         # create scale space HOG pyramid and return scores for prediction
267 | 
268 |         #######################################################################
269 |         #                        TODO: YOUR CODE HERE                         #
270 |         #######################################################################
271 | 
272 |         cur_bboxes = []
273 |         cur_confidences = []
274 | 
275 |         min_dim = min(im_shape[0], im_shape[1])
276 |         iteration = 0
277 |         max_iter = 100
278 |         scale_factor = 1
279 |         step_size = 1
280 | 
281 |         while scale_factor * min_dim > win_size and iteration < max_iter:
282 |             resized_im = cv2.resize(im, None, fx=scale_factor, fy=scale_factor)
283 |             hog_feats = vlfeat.hog.hog(resized_im, cell_size)
284 |             for r in range(0, hog_feats.shape[0]-template_size, step_size):
285 |                 for c in range(0, hog_feats.shape[1]-template_size, step_size):
286 |                     hog = hog_feats[r:r+template_size, c:c+template_size, :]
287 |                     score = svm.decision_function(
288 |                         hog.ravel().reshape(1, -1))[0]
289 |                     if score >= 0:
290 |                         cur_bboxes.append(cell_size/scale_factor * np.array(
291 |                             [c, r, c+template_size, r+template_size]).astype(int))
292 |                         cur_confidences.append(score)
293 |             scale_factor *= 0.85
294 |             iteration += 1
295 | 
296 |         if len(cur_bboxes) == 0:
297 |             cur_bboxes = np.zeros((1, 4))
298 |             cur_confidences = np.array([0])
299 |         else:
300 |             cur_bboxes = np.array(cur_bboxes)
301 |             cur_confidences = np.array(cur_confidences)
302 | 
303 |         #######################################################################
304 |         #                          END OF YOUR CODE                           #
305 |         #######################################################################
306 | 
307 |         ### non-maximum suppression ###
308 |         # non_max_supr_bbox() can actually get somewhat slow with thousands of
309 |         # initial detections. You could pre-filter the detections by confidence,
310 |         # e.g. a detection with confidence -1.1 will probably never be
311 |         # meaningful. You probably _don't_ want to threshold at 0.0, though. You
312 |         # can get higher recall with a lower threshold. You should not modify
313 |         # anything in non_max_supr_bbox(). If you want to try your own NMS methods,
314 |         # please create another function.
315 | 
316 |         idsort = np.argsort(-cur_confidences)[:topk]
317 |         cur_bboxes = cur_bboxes[idsort]
318 |         cur_confidences = cur_confidences[idsort]
319 |         is_valid_bbox = non_max_suppression_bbox(cur_bboxes, cur_confidences,
320 |                                                  im_shape, verbose=verbose)
321 | 
322 |         print('NMS done, {:d} detections passed'.format(sum(is_valid_bbox)))
323 |         cur_bboxes = cur_bboxes[is_valid_bbox]
324 |         cur_confidences = cur_confidences[is_valid_bbox]
325 | 
326 |         bboxes = np.vstack((bboxes, cur_bboxes))
327 |         confidences = np.hstack((confidences, cur_confidences))
328 |         image_ids.extend([im_id] * len(cur_confidences))
329 | 
330 |     return bboxes, confidences, image_ids
331 | 


--------------------------------------------------------------------------------
/project/proj4/code/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os.path as osp
  4 | from glob import glob
  5 | import matplotlib.pyplot as plt
  6 | from skimage import draw
  7 | 
  8 | plt.rcParams.update({'figure.max_open_warning': 0})
  9 | 
 10 | 
 11 | def load_image(path):
 12 |     im = cv2.imread(path)
 13 |     im = im[:, :, ::-1]  # BGR -> RGB
 14 |     im = im.astype(np.float32)  # for vlfeat functions
 15 |     return im
 16 | 
 17 | 
 18 | def load_image_gray(path):
 19 |     im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
 20 |     im = im.astype(np.float32)  # for vlfeat functions
 21 |     return im
 22 | 
 23 | 
 24 | def report_accuracy(confidences, label_vector):
 25 |     """
 26 |     Calculates various accuracy metrics on the given predictions
 27 |     :param confidences: 1D numpy array holding predicted confidence scores
 28 |     :param label_vector: 1D numpy array holding ground truth labels (same size
 29 |     as confidences
 30 |     :return: tp_rate, fp_rate, tn_rate, fn_rate
 31 |     """
 32 |     preds = confidences.copy()
 33 |     preds[preds >= 0] = 1
 34 |     preds[preds < 0] = -1
 35 | 
 36 |     tp = np.logical_and(preds > 0, preds == label_vector)
 37 |     fp = np.logical_and(preds > 0, preds != label_vector)
 38 |     tn = np.logical_and(preds < 0, preds == label_vector)
 39 |     fn = np.logical_and(preds < 0, preds != label_vector)
 40 | 
 41 |     N = len(label_vector)
 42 | 
 43 |     tp_rate = sum(tp) / (sum(tp) + sum(fn)) * 100
 44 |     fp_rate = sum(fp) / (sum(fp) + sum(tn)) * 100
 45 |     tn_rate = 100 - fp_rate
 46 |     fn_rate = 100 - tp_rate
 47 |     accuracy = (sum(tp) + sum(tn)) / N * 100
 48 | 
 49 |     print('Accuracy = {:4.3f}%\n'
 50 |           'True Positive rate = {:4.3f}%\nFalse Positive rate = {:4.3f}%\n'
 51 |           'True Negative rate = {:4.3f}%\nFalse Negative rate = {:4.3f}%'.
 52 |           format(accuracy, tp_rate, fp_rate, tn_rate, fn_rate))
 53 | 
 54 |     return tp_rate, fp_rate, tn_rate, fn_rate
 55 | 
 56 | 
 57 | def non_max_suppression_bbox(bboxes, confidences, img_size, verbose=False):
 58 |     """
 59 |     high confidence detections suppress all overlapping detections (including
 60 |     detections at other scales). Detections can partially overlap, but the
 61 |     center of one detection can not be within another detection.
 62 | 
 63 |     :param bboxes: Nx4 numpy array, where N is the number of bounding boxes. Each
 64 |     row is [xmin, ymin, xmax, ymax]
 65 |     :param confidences: size (N, ) numpy array, holding the final confidence of
 66 |     each detection
 67 |     :param img_size: the [height, width] of the image
 68 |     :param verbose: boolean
 69 |     :return: size (N, ) numpy logical array. Element i indicates if the i'th
 70 |     bounding box survives non-maximum suppression.
 71 |     """
 72 |     # truncate the bounding boxes to image dimensions
 73 |     bboxes[:, 2] = np.minimum(bboxes[:, 2], img_size[1])
 74 |     bboxes[:, 3] = np.minimum(bboxes[:, 3], img_size[0])
 75 | 
 76 |     # higher confidence detections get priority
 77 |     order = np.argsort(-confidences)
 78 |     confidences = confidences[order]
 79 |     bboxes = bboxes[order]
 80 | 
 81 |     # output indicator vector
 82 |     is_valid_bbox = np.asarray([False] * len(confidences))
 83 | 
 84 |     # overlap threshold above which the less confident detection is suppressed
 85 |     overlap_thresh = 0.3
 86 | 
 87 |     for i in range(len(confidences)):
 88 |         cur_bb = bboxes[i]
 89 |         cur_bb_is_valid = True
 90 | 
 91 |         for j in np.where(is_valid_bbox)[0]:
 92 |             prev_bb = bboxes[j]
 93 |             bi = [max(cur_bb[0], prev_bb[0]), max(cur_bb[1], prev_bb[1]),
 94 |                   min(cur_bb[2], prev_bb[2]), min(cur_bb[3], prev_bb[3])]
 95 |             iw = bi[2] - bi[0] + 1
 96 |             ih = bi[3] - bi[1] + 1
 97 |             if (iw > 0) and (ih > 0):
 98 |                 # overlap = area of intersection / area of union
 99 |                 ua = (cur_bb[2] - cur_bb[0] + 1) * (cur_bb[3] - cur_bb[1] + 1) + \
100 |                      (prev_bb[2] - prev_bb[0] + 1) * (prev_bb[3] - prev_bb[1] + 1) - \
101 |                      iw * ih
102 |                 ov = (iw * ih) / ua
103 | 
104 |                 if ov > overlap_thresh:
105 |                     cur_bb_is_valid = False
106 | 
107 |                 # special case: center coordinate of current bbox is inside the previous
108 |                 # bbox
109 |                 cx = (cur_bb[0] + cur_bb[2]) / 2
110 |                 cy = (cur_bb[1] + cur_bb[3]) / 2
111 |                 if (cx > prev_bb[0]) and (cx < prev_bb[2]) and (cy > prev_bb[1]) and \
112 |                         (cy < prev_bb[3]):
113 |                     cur_bb_is_valid = False
114 | 
115 |                 if verbose:
116 |                     print('Detection {:d}, bbox = [{:d}, {:d}, {:d}, {:d}], {:f} overlap '
117 |                           'with detection {:d} [{:d}, {:d}, {:d}, {:d}]'
118 |                           .format(i, cur_bb[0], cur_bb[1], cur_bb[2], cur_bb[3], ov, j,
119 |                                   prev_bb[0], prev_bb[1], prev_bb[2], prev_bb[3]))
120 | 
121 |                 if not cur_bb_is_valid:
122 |                     break
123 | 
124 |         is_valid_bbox[i] = cur_bb_is_valid
125 | 
126 |     # return back to the original order
127 |     order = np.argsort(order)
128 |     is_valid_bbox = is_valid_bbox[order]
129 | 
130 |     return is_valid_bbox
131 | 
132 | 
133 | def voc_ap(rec, prec):
134 |     mrec = np.hstack((0, rec, 1))
135 |     mpre = np.hstack((0, prec, 0))
136 | 
137 |     for i in reversed(range(len(mpre) - 1)):
138 |         mpre[i] = max(mpre[i], mpre[i + 1])
139 | 
140 |     i = np.where(mrec[1:] != mrec[:-1])[0] + 1
141 |     ap = sum((mrec[i] - mrec[i - 1]) * mpre[i])
142 |     return ap
143 | 
144 | 
145 | def visualize_hog(svm, feature_params):
146 |     win_size = feature_params.get('template_size', 36)
147 |     cell_size = feature_params.get('hog_cell_size', 6)
148 |     n_cell = np.ceil(win_size / cell_size).astype('int')
149 | 
150 |     test_feat = svm.coef_ - np.min(svm.coef_)
151 |     test_feat = np.reshape(test_feat, [n_cell, n_cell, 31])
152 | 
153 |     radius = 22
154 |     orientations = 9
155 | 
156 |     cx, cy = 48, 48
157 |     sy, sx = cy * n_cell, cx * n_cell
158 | 
159 |     n_cellsx = n_cell
160 |     n_cellsy = n_cell
161 | 
162 |     orientation_histogram = test_feat
163 | 
164 |     orientations_arr = np.arange(orientations)
165 |     dx_arr = radius * np.cos(orientations_arr / orientations * np.pi)
166 |     dy_arr = radius * np.sin(orientations_arr / orientations * np.pi)
167 |     hog_image = np.zeros((sy, sx), dtype=float)
168 | 
169 |     for x in range(n_cellsx):
170 |         for y in range(n_cellsy):
171 |             for o, dx, dy in zip(orientations_arr, dx_arr, dy_arr):
172 |                 centre = tuple([y * cy + cy // 2, x * cx + cx // 2])
173 |                 wt = (orientation_histogram[y, x, 18 + o]) * 2.5
174 | 
175 |                 xmin = int(centre[0] - dx)
176 |                 xmax = int(centre[0] + dx)
177 |                 ymin = int(centre[1] + dy)
178 |                 ymax = int(centre[1] - dy)
179 | 
180 |                 rr, cc = draw.line(xmin, ymin, xmax, ymax)
181 | 
182 |                 hog_image[rr, cc] = np.maximum(hog_image[rr, cc], wt)
183 |                 hog_image[rr + 1, cc] = np.maximum(hog_image[rr + 1, cc], wt)
184 |                 hog_image[rr, cc + 1] = np.maximum(hog_image[rr, cc + 1], wt)
185 |                 hog_image[rr - 1, cc] = np.maximum(hog_image[rr - 1, cc], wt)
186 |                 hog_image[rr, cc - 1] = np.maximum(hog_image[rr, cc - 1], wt)
187 | 
188 |     hog_image_2 = hog_image.copy()
189 | 
190 |     hog_image = hog_image ** 3 / np.max(hog_image ** 3)
191 |     hog_image = hog_image * 255
192 | 
193 |     hog_image_2[hog_image_2 == 0] = 0.5 * np.max(hog_image_2)
194 |     hog_image_2 = hog_image_2 / np.max(hog_image_2)
195 |     hog_image_2 = hog_image_2 * 255
196 | 
197 |     fig = plt.figure(figsize=[8, 4])
198 |     ax = fig.add_subplot(121)
199 |     ax.imshow((hog_image).astype("uint8"), cmap="gray")
200 |     ax.axis("off")
201 | 
202 |     ax = fig.add_subplot(122)
203 |     ax.imshow((hog_image_2).astype("uint8"), cmap="gray")
204 |     ax.axis("off")
205 | 
206 | 
207 | def evaluate_detections(bboxes, confidences, image_ids, label_path, draw=True):
208 |     """
209 |     :param bboxes:
210 |     :param confidences:
211 |     :param image_ids:
212 |     :param label_path:
213 |     :param draw:
214 |     :return:
215 |     """
216 |     gt_ids = []
217 |     gt_bboxes = []
218 |     with open(label_path, 'r') as f:
219 |         for line in f:
220 |             gt_id, xmin, ymin, xmax, ymax = line.split(' ')
221 |             gt_ids.append(gt_id)
222 |             gt_bboxes.append([float(xmin), float(ymin), float(xmax), float(ymax)])
223 |     gt_bboxes = np.vstack(gt_bboxes)
224 | 
225 |     npos = len(gt_ids)
226 |     gt_isclaimed = np.asarray([False] * len(gt_ids))
227 | 
228 |     # sort detections by decreasing confidence
229 |     order = np.argsort(-confidences)
230 |     confidences = confidences[order]
231 |     image_ids = [image_ids[i] for i in order]
232 |     bboxes = bboxes[order]
233 | 
234 |     # assign detections to GT objects
235 |     nd = len(confidences)
236 |     tp = np.asarray([False] * nd)
237 |     fp = np.asarray([False] * nd)
238 |     duplicate_detections = np.asarray([False] * nd)
239 | 
240 |     for d in range(nd):
241 |         cur_gt_ids = [i for i, gt_id in enumerate(gt_ids) if gt_id == image_ids[d]]
242 | 
243 |         bb = bboxes[d]
244 |         ovmax = -float('inf')
245 | 
246 |         for j in cur_gt_ids:
247 |             bbgt = gt_bboxes[j]
248 |             bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]),
249 |                   min(bb[3], bbgt[3])]
250 |             iw = bi[2] - bi[0] + 1
251 |             ih = bi[3] - bi[1] + 1
252 | 
253 |             if (iw > 0) and (ih > 0):
254 |                 ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + \
255 |                      (bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] + 1) - \
256 |                      iw * ih
257 |                 ov = iw * ih / ua
258 |                 if ov > ovmax:
259 |                     ovmax = ov
260 |                     jmax = j
261 | 
262 |         if ovmax >= 0.3:
263 |             if not gt_isclaimed[jmax]:
264 |                 tp[d] = True
265 |                 gt_isclaimed[jmax] = True
266 |             else:
267 |                 fp[d] = True
268 |                 duplicate_detections[d] = True
269 |         else:
270 |             fp[d] = True
271 | 
272 |     cum_fp = np.cumsum(fp)
273 |     cum_tp = np.cumsum(tp)
274 |     rec = cum_tp / npos
275 |     prec = cum_tp / (cum_tp + cum_fp)
276 |     ap = voc_ap(rec, prec)
277 | 
278 |     if draw:
279 |         plt.figure()
280 |         plt.plot(rec, prec, '-')
281 |         plt.xlim(0, 1)
282 |         plt.ylim(0, 1)
283 |         plt.xlabel('Recall')
284 |         plt.ylabel('Precision')
285 |         plt.title('Average precision = {:4.3f}'.format(ap))
286 | 
287 |     order = np.argsort(order)
288 |     tp = tp[order]
289 |     fp = fp[order]
290 |     duplicate_detections = duplicate_detections[order]
291 | 
292 |     return gt_ids, gt_bboxes, gt_isclaimed, tp, fp, duplicate_detections
293 | 
294 | 
295 | def visualize_detections_by_image(bboxes, confidences, image_ids, tp, fp,
296 |                                   test_scn_path, label_filename, onlytp=False):
297 |     """
298 |     Visuaize the detection bounding boxes and ground truth on images
299 |     :param bboxes: N x 4 numpy matrix, where N is the number of detections. Each
300 |     row is [xmin, ymin, xmax, ymax]
301 |     :param confidences: size (N, ) numpy array of detection confidences
302 |     :param image_ids: N-element list of image names for each detection
303 |     :param tp: size (N, ) numpy array of true positive indicator variables
304 |     :param fp: size (N, ) numpy array of false positive indicator variables
305 |     :param test_scn_path: path to directory holding test images (in .jpg format)
306 |     :param label_filename: path to .txt file containing labels. Format is
307 |     image_id xmin ymin xmax ymax for each row
308 |     :param onlytp: show only true positives
309 |     :return:
310 |     """
311 |     gt_ids = []
312 |     gt_bboxes = []
313 |     with open(label_filename, 'r') as f:
314 |         for line in f:
315 |             gt_id, xmin, ymin, xmax, ymax = line.split(' ')
316 |             gt_ids.append(gt_id)
317 |             gt_bboxes.append([float(xmin), float(ymin), float(xmax), float(ymax)])
318 |     gt_bboxes = np.vstack(gt_bboxes)
319 | 
320 |     gt_file_list = list(set(gt_ids))
321 | 
322 |     for gt_file in gt_file_list:
323 |         cur_test_image = load_image(osp.join(test_scn_path, gt_file))
324 | 
325 |         cur_gt_detections = [i for i, gt_id in enumerate(gt_ids) if gt_id == gt_file]
326 |         cur_gt_bboxes = gt_bboxes[cur_gt_detections]
327 | 
328 |         cur_detections = [i for i, gt_id in enumerate(image_ids) if gt_id == gt_file]
329 |         cur_bboxes = bboxes[cur_detections]
330 |         cur_confidences = confidences[cur_detections]
331 |         cur_tp = tp[cur_detections]
332 |         cur_fp = fp[cur_detections]
333 | 
334 |         plt.figure()
335 |         plt.imshow(cur_test_image.astype(np.uint8))
336 | 
337 |         for i, bb in enumerate(cur_bboxes):
338 |             if cur_tp[i]:  # true positive
339 |                 plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'g')
340 |             elif cur_fp[i]:  # false positive
341 |                 if not onlytp:
342 |                     plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'r')
343 |             else:
344 |                 raise AssertionError
345 | 
346 |         for bb in cur_gt_bboxes:
347 |             plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'y')
348 | 
349 |         plt.axis("off")
350 |         plt.title('{:s} (green=true pos, red=false pos, yellow=ground truth), '
351 |                   '{:d}/{:d} found'.format(gt_file, sum(cur_tp), len(cur_gt_bboxes)))
352 | 
353 | 
354 | def visualize_detections_by_confidence(bboxes, confidences, image_ids,
355 |                                        test_scn_path, label_filename, onlytp=False):
356 |     """
357 |     Visuaize the detection bounding boxes and ground truth on images, sorted by
358 |     confidence
359 |     :param bboxes: N x 4 numpy matrix, where N is the number of detections. Each
360 |     row is [xmin, ymin, xmax, ymax]
361 |     :param confidences: size (N, ) numpy array of detection confidences
362 |     :param image_ids: N-element list of image names for each detection
363 |     :param test_scn_path: path to directory holding test images (in .jpg format)
364 |     :param label_filename: path to .txt file containing labels. Format is
365 |     image_id xmin ymin xmax ymax for each row
366 |     :param onlytp: show only true positives
367 |     :return:
368 |     """
369 |     gt_ids = []
370 |     gt_bboxes = []
371 |     with open(label_filename, 'r') as f:
372 |         for line in f:
373 |             gt_id, xmin, ymin, xmax, ymax = line.split(' ')
374 |             gt_ids.append(gt_id)
375 |             gt_bboxes.append([float(xmin), float(ymin), float(xmax), float(ymax)])
376 |     gt_bboxes = np.vstack(gt_bboxes)
377 | 
378 |     # sort detections by decreasing confidence
379 |     order = np.argsort(-confidences)
380 |     image_ids = [image_ids[i] for i in order]
381 |     bboxes = bboxes[order]
382 |     confidences = confidences[order]
383 | 
384 |     for d in range(len(confidences)):
385 |         cur_gt_idxs = [i for i, gt_id in enumerate(gt_ids) if gt_id == image_ids[d]]
386 |         bb = bboxes[d]
387 |         ovmax = -float('inf')
388 | 
389 |         for j in cur_gt_idxs:
390 |             bbgt = gt_bboxes[j]
391 |             bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]),
392 |                   min(bb[3], bbgt[3])]
393 |             iw = bi[2] - bi[0] + 1
394 |             ih = bi[3] - bi[1] + 1
395 | 
396 |             if (iw > 0) and (ih > 0):
397 |                 ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + \
398 |                      (bbgt[2] - bbgt[0] + 1) * (bbgt[3] - bbgt[1] + 1) - \
399 |                      iw * ih
400 |                 ov = iw * ih / ua
401 |                 if ov > ovmax:
402 |                     ovmax = ov
403 |                     jmax = j
404 | 
405 |         if onlytp and ovmax < 0.3:
406 |             continue
407 | 
408 |         im = load_image(osp.join(test_scn_path, image_ids[d]))
409 |         plt.figure()
410 |         plt.imshow(im.astype(np.uint8))
411 |         if ovmax >= 0.3:
412 |             bbgt = gt_bboxes[jmax]
413 |             plt.plot(bbgt[[0, 2, 2, 0, 0]], bbgt[[1, 1, 3, 3, 1]], 'y')
414 |             plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'g')
415 |         else:
416 |             plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'r')
417 |         plt.title('Image {:s} [{:d}/{:d}], (green=true pos, red=false pos, '
418 |                   'yellow=ground truth)'.format(image_ids[d], d, len(confidences)))
419 | 
420 | 
421 | def visualize_detections_by_image_no_gt(bboxes, confidences, image_ids,
422 |                                         test_scn_path):
423 |     """
424 |     Visualize detection bounding boxes on images that don't have ground truth
425 |     labels
426 |     :param bboxes: N x 4 numpy matrix, where N is the number of detections. Each
427 |     row is [xmin, ymin, xmax, ymax]
428 |     :param confidences: size (N, ) numpy array of detection confidences
429 |     :param image_ids: N-element list of image names for each detection
430 |     :param test_scn_path: path to directory holding test images (in .jpg format)
431 |     :return:
432 |     """
433 |     test_filenames = glob(osp.join(test_scn_path, '*.jpg'))
434 | 
435 |     for im_filename in test_filenames:
436 |         test_id = im_filename.split('/')[-1]
437 |         test_id = test_id.split('\\')[-1]  # in case the file path use backslash
438 |         cur_test_image = load_image(im_filename)
439 |         cur_detections = [i for i, im_id in enumerate(image_ids) if im_id == test_id]
440 |         cur_bboxes = bboxes[cur_detections]
441 |         cur_confidences = confidences[cur_detections]
442 | 
443 |         plt.figure()
444 |         plt.imshow(cur_test_image.astype(np.uint8))
445 | 
446 |         for bb in cur_bboxes:
447 |             plt.plot(bb[[0, 2, 2, 0, 0]], bb[[1, 1, 3, 3, 1]], 'g')
448 |         plt.title('{:s} green=detection'.format(test_id))
449 | 
450 | 
451 | class PseudoSVM():
452 | 
453 |     def __init__(self, C=10, dim=1116):
454 |         self.C = C
455 |         self.coef_ = np.random.rand(dim, 1)
456 | 
457 |     def decision_function(self, feats):
458 |         return np.random.rand(len(feats))
459 | 


--------------------------------------------------------------------------------
/project/proj5/Assigment5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj5/Assigment5.pdf


--------------------------------------------------------------------------------
/project/proj5/code/Assignment5.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# [Deep Learning](https://www.cc.gatech.edu/~hays/compvision/proj6/)"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "markdown",
  12 |    "metadata": {},
  13 |    "source": [
  14 |     "## Setup"
  15 |    ]
  16 |   },
  17 |   {
  18 |    "cell_type": "code",
  19 |    "execution_count": 1,
  20 |    "metadata": {
  21 |     "pycharm": {
  22 |      "is_executing": false
  23 |     }
  24 |    },
  25 |    "outputs": [],
  26 |    "source": [
  27 |     "%matplotlib notebook\n",
  28 |     "%load_ext autoreload\n",
  29 |     "%autoreload 2\n",
  30 |     "import cv2\n",
  31 |     "import numpy as np\n",
  32 |     "import random\n",
  33 |     "import torch.nn as nn\n",
  34 |     "import torch.optim as optim\n",
  35 |     "import os.path as osp\n",
  36 |     "import matplotlib.pyplot as plt\n",
  37 |     "from utils import *\n",
  38 |     "import student_code as sc\n",
  39 |     "from torchvision.models import alexnet\n",
  40 |     "\n",
  41 |     "data_path = osp.join('../data', '15SceneData')\n",
  42 |     "num_classes = 15\n",
  43 |     "\n",
  44 |     "# If you have a good Nvidia GPU with an appropriate environment, \n",
  45 |     "# try setting the use_GPU flag to True (the environment provided does\n",
  46 |     "# not support GPUs and we will not provide any support for GPU\n",
  47 |     "# computation in this project). Please note that \n",
  48 |     "# we will evaluate your implementations only using CPU mode so even if\n",
  49 |     "# you use a GPU, make sure your code runs in the CPU mode with the\n",
  50 |     "# environment we provided. \n",
  51 |     "use_GPU = True\n",
  52 |     "if use_GPU:\n",
  53 |     "    from utils_gpu import *"
  54 |    ]
  55 |   },
  56 |   {
  57 |    "cell_type": "markdown",
  58 |    "metadata": {},
  59 |    "source": [
  60 |     "To train a network in PyTorch, we need 4 components:\n",
  61 |     "1. **Dataset** - an object which can load the data and labels given an index.\n",
  62 |     "2. **Model** - an object that contains the network architecture definition.\n",
  63 |     "3. **Loss function** - a function that measures how far the network output is from the ground truth label.\n",
  64 |     "4. **Optimizer** - an object that optimizes the network parameters to reduce the loss value."
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "markdown",
  69 |    "metadata": {},
  70 |    "source": [
  71 |     "This project has two main parts. In Part 1, you will train a deep network from scratch. In Part 2, you will \"fine-tune\" a trained network. "
  72 |    ]
  73 |   },
  74 |   {
  75 |    "cell_type": "markdown",
  76 |    "metadata": {},
  77 |    "source": [
  78 |     "## Part 1: Modifying the Dataloaders and the Simple Network create_datasets"
  79 |    ]
  80 |   },
  81 |   {
  82 |    "cell_type": "code",
  83 |    "execution_count": 2,
  84 |    "metadata": {
  85 |     "pycharm": {
  86 |      "is_executing": false
  87 |     }
  88 |    },
  89 |    "outputs": [],
  90 |    "source": [
  91 |     "# Fix random seeds so that results will be reproducible\n",
  92 |     "set_seed(0, use_GPU)"
  93 |    ]
  94 |   },
  95 |   {
  96 |    "cell_type": "code",
  97 |    "execution_count": 3,
  98 |    "metadata": {
  99 |     "pycharm": {
 100 |      "is_executing": false
 101 |     }
 102 |    },
 103 |    "outputs": [],
 104 |    "source": [
 105 |     "# Training parameters.\n",
 106 |     "input_size = (64, 64)\n",
 107 |     "RGB = False  \n",
 108 |     "base_lr = 1e-2  # may try a smaller lr if not using batch norm\n",
 109 |     "weight_decay = 5e-4\n",
 110 |     "momentum = 0.9"
 111 |    ]
 112 |   },
 113 |   {
 114 |    "cell_type": "markdown",
 115 |    "metadata": {},
 116 |    "source": [
 117 |     "Now you will modify the create_datasets function from student_code. You will add random left-right mirroring and normalization to the transformations applied to the training dataset. You will also add normalization to the transformations applied to the testing dataset. "
 118 |    ]
 119 |   },
 120 |   {
 121 |    "cell_type": "code",
 122 |    "execution_count": 4,
 123 |    "metadata": {
 124 |     "pycharm": {
 125 |      "is_executing": false
 126 |     }
 127 |    },
 128 |    "outputs": [
 129 |     {
 130 |      "name": "stdout",
 131 |      "output_type": "stream",
 132 |      "text": [
 133 |       "Computing pixel mean and stdev...\n",
 134 |       "Batch 0 / 30\n",
 135 |       "Batch 20 / 30\n",
 136 |       "Done, mean = \n",
 137 |       "[0.45579668]\n",
 138 |       "std = \n",
 139 |       "[0.23624939]\n",
 140 |       "Computing pixel mean and stdev...\n",
 141 |       "Batch 0 / 60\n",
 142 |       "Batch 20 / 60\n",
 143 |       "Batch 40 / 60\n",
 144 |       "Done, mean = \n",
 145 |       "[0.45517009]\n",
 146 |       "std = \n",
 147 |       "[0.2350788]\n"
 148 |      ]
 149 |     }
 150 |    ],
 151 |    "source": [
 152 |     "# Create the training and testing datasets.\n",
 153 |     "train_dataset, test_dataset = sc.create_datasets(data_path=data_path, input_size=input_size, rgb=RGB)\n",
 154 |     "assert test_dataset.classes == train_dataset.classes"
 155 |    ]
 156 |   },
 157 |   {
 158 |    "cell_type": "markdown",
 159 |    "metadata": {},
 160 |    "source": [
 161 |     "Now you will modify SimpleNet by adding droppout, batch normalization, and additional convolution/maxpool/relu layers. You should achieve an accuracy of at least **50%**. Make sure your network passes this threshold--it is required for full credit on this section!\n",
 162 |     "\n",
 163 |     "You can also use the following two blocks to determine the stucture of your network."
 164 |    ]
 165 |   },
 166 |   {
 167 |    "cell_type": "code",
 168 |    "execution_count": 5,
 169 |    "metadata": {
 170 |     "pycharm": {
 171 |      "is_executing": false
 172 |     },
 173 |     "scrolled": true
 174 |    },
 175 |    "outputs": [
 176 |     {
 177 |      "name": "stdout",
 178 |      "output_type": "stream",
 179 |      "text": [
 180 |       "SimpleNet(\n",
 181 |       "  (features): Sequential(\n",
 182 |       "    (0): Conv2d(1, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 183 |       "    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
 184 |       "    (2): ReLU(inplace=True)\n",
 185 |       "    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
 186 |       "    (4): Conv2d(12, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 187 |       "    (5): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
 188 |       "    (6): ReLU(inplace=True)\n",
 189 |       "    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
 190 |       "    (8): Conv2d(24, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 191 |       "    (9): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
 192 |       "    (10): ReLU(inplace=True)\n",
 193 |       "    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
 194 |       "    (12): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
 195 |       "    (13): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
 196 |       "    (14): ReLU(inplace=True)\n",
 197 |       "    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
 198 |       "  )\n",
 199 |       "  (classifier): Sequential(\n",
 200 |       "    (0): Linear(in_features=1536, out_features=1024, bias=True)\n",
 201 |       "    (1): ReLU(inplace=True)\n",
 202 |       "    (2): Dropout(p=0.8, inplace=False)\n",
 203 |       "    (3): Linear(in_features=1024, out_features=15, bias=True)\n",
 204 |       "  )\n",
 205 |       ")\n"
 206 |      ]
 207 |     }
 208 |    ],
 209 |    "source": [
 210 |     "# create the network model\n",
 211 |     "model = sc.SimpleNet(num_classes=num_classes, rgb=False, verbose=False)\n",
 212 |     "if use_GPU:\n",
 213 |     "    model = model.cuda()\n",
 214 |     "print(model)"
 215 |    ]
 216 |   },
 217 |   {
 218 |    "cell_type": "code",
 219 |    "execution_count": 6,
 220 |    "metadata": {
 221 |     "pycharm": {
 222 |      "is_executing": false
 223 |     }
 224 |    },
 225 |    "outputs": [
 226 |     {
 227 |      "name": "stdout",
 228 |      "output_type": "stream",
 229 |      "text": [
 230 |       "Network output size is  torch.Size([15])\n"
 231 |      ]
 232 |     }
 233 |    ],
 234 |    "source": [
 235 |     "# Use this block to determine the kernel size of the conv2d layer in the classifier\n",
 236 |     "# first, set the kernel size of that conv2d layer to 1, and run this block\n",
 237 |     "# then, use that size of input to the classifier printed by this block to\n",
 238 |     "# go back and update the kernel size of the conv2d layer in the classifier\n",
 239 |     "# Finally, run this block again and verify that the network output size is a scalar\n",
 240 |     "# Don't forget to re-run the block above every time you update the SimpleNet class!\n",
 241 |     "from torch.autograd import Variable\n",
 242 |     "data, _ = train_dataset[0]\n",
 243 |     "s = data.size()\n",
 244 |     "data = Variable(data.view(1, *s))\n",
 245 |     "if use_GPU:\n",
 246 |     "    data = data.cuda()\n",
 247 |     "out = model(data)\n",
 248 |     "print('Network output size is ', out.size())"
 249 |    ]
 250 |   },
 251 |   {
 252 |    "cell_type": "markdown",
 253 |    "metadata": {},
 254 |    "source": [
 255 |     "Next we will create the loss function and the optimizer. You do not have to modify the custom_part1_trainer in student_code if you use the same loss_function, optimizer, scheduler and parameters (n_epoch, batch_size etc.) as provided in this notebook to hit the required threshold of 50% accuracy. If you changed any of these values, it is important that you modify this function in student_code since we will not be using the notebook you submit to evaluate. "
 256 |    ]
 257 |   },
 258 |   {
 259 |    "cell_type": "code",
 260 |    "execution_count": 7,
 261 |    "metadata": {
 262 |     "pycharm": {
 263 |      "is_executing": false
 264 |     }
 265 |    },
 266 |    "outputs": [],
 267 |    "source": [
 268 |     "# Set up the trainer. You can modify custom_part1_trainer in\n",
 269 |     "# student_copy.py if you want to try different learning settings.\n",
 270 |     "custom_part1_trainer = sc.custom_part1_trainer(model)\n",
 271 |     "\n",
 272 |     "if custom_part1_trainer is None:\n",
 273 |     "    # Create the loss function.\n",
 274 |     "    # see http://pytorch.org/docs/0.3.0/nn.html#loss-functions for a list of available loss functions\n",
 275 |     "    loss_function = nn.CrossEntropyLoss()\n",
 276 |     "\n",
 277 |     "    # Create the optimizer and a learning rate scheduler.\n",
 278 |     "    optimizer = optim.SGD(params=model.parameters(), lr=base_lr, weight_decay=weight_decay, momentum=momentum)\n",
 279 |     "    # Currently a simple step scheduler, but you can get creative.\n",
 280 |     "    # See http://pytorch.org/docs/0.3.0/optim.html#how-to-adjust-learning-rate for various LR schedulers\n",
 281 |     "    # and how to use them\n",
 282 |     "    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1)\n",
 283 |     "\n",
 284 |     "    params = {'n_epochs': 100, 'batch_size': 50, 'experiment': 'part1'}\n",
 285 |     "    \n",
 286 |     "else:\n",
 287 |     "    if 'loss_function' in custom_part1_trainer:\n",
 288 |     "        loss_function = custom_part1_trainer['loss_function']\n",
 289 |     "    if 'optimizer' in custom_part1_trainer:\n",
 290 |     "        optimizer = custom_part1_trainer['optimizer']\n",
 291 |     "    if 'lr_scheduler' in custom_part1_trainer:\n",
 292 |     "        lr_scheduler = custom_part1_trainer['lr_scheduler']\n",
 293 |     "    if 'params' in custom_part1_trainer:\n",
 294 |     "        params = custom_part1_trainer['params']"
 295 |    ]
 296 |   },
 297 |   {
 298 |    "cell_type": "markdown",
 299 |    "metadata": {},
 300 |    "source": [
 301 |     "We are ready to train our network! As before, we will start a local server to see the training progress of our network (if you server is already running, you should not start another one). Open a new terminal and activate the environment for this project. Then run the following command: **python -m visdom.server**. This will start a local server. The terminal output should give out a link like: \"http://localhost:8097\". Open this link in your browser. After you run the following block, visit this link again, and you will be able to see graphs showing the progress of your training! If you do not see any graphs, select Part 1 on the top left bar where is says Environment (only select Part 1, do not check main or Part 2)."
 302 |    ]
 303 |   },
 304 |   {
 305 |    "cell_type": "code",
 306 |    "execution_count": 8,
 307 |    "metadata": {
 308 |     "pycharm": {
 309 |      "is_executing": false
 310 |     }
 311 |    },
 312 |    "outputs": [
 313 |     {
 314 |      "name": "stderr",
 315 |      "output_type": "stream",
 316 |      "text": [
 317 |       "Setting up a new session...\n"
 318 |      ]
 319 |     },
 320 |     {
 321 |      "name": "stdout",
 322 |      "output_type": "stream",
 323 |      "text": [
 324 |       "---------------------------------------\n",
 325 |       "Experiment: part1\n",
 326 |       "n_epochs: 100\n",
 327 |       "batch_size: 50\n",
 328 |       "do_val: True\n",
 329 |       "shuffle: True\n",
 330 |       "num_workers: 4\n",
 331 |       "val_freq: 1\n",
 332 |       "print_freq: 100\n",
 333 |       "experiment: part1\n",
 334 |       "checkpoint_file: None\n",
 335 |       "resume_optim: True\n",
 336 |       "---------------------------------------\n",
 337 |       "part1 Epoch 0 / 100\n",
 338 |       "train part1: batch 0/29, loss 2.716, top-1 accuracy 10.000, top-5 accuracy 24.000\n",
 339 |       "train part1: loss 2.660641\n",
 340 |       "val part1: batch 0/59, loss 2.453, top-1 accuracy 10.000, top-5 accuracy 48.000\n",
 341 |       "val part1: loss 2.674087\n",
 342 |       "Checkpoint saved\n",
 343 |       "BEST TOP1 ACCURACY SO FAR\n",
 344 |       "part1 Epoch 1 / 100\n",
 345 |       "train part1: batch 0/29, loss 2.418, top-1 accuracy 16.000, top-5 accuracy 64.000\n",
 346 |       "train part1: loss 2.474717\n",
 347 |       "val part1: batch 0/59, loss 2.351, top-1 accuracy 22.000, top-5 accuracy 70.000\n",
 348 |       "val part1: loss 2.311414\n",
 349 |       "Checkpoint saved\n",
 350 |       "BEST TOP1 ACCURACY SO FAR\n",
 351 |       "part1 Epoch 2 / 100\n",
 352 |       "train part1: batch 0/29, loss 2.262, top-1 accuracy 28.000, top-5 accuracy 68.000\n",
 353 |       "train part1: loss 2.332324\n",
 354 |       "val part1: batch 0/59, loss 2.624, top-1 accuracy 22.000, top-5 accuracy 48.000\n",
 355 |       "val part1: loss 2.108762\n",
 356 |       "Checkpoint saved\n",
 357 |       "BEST TOP1 ACCURACY SO FAR\n",
 358 |       "part1 Epoch 3 / 100\n",
 359 |       "train part1: batch 0/29, loss 2.374, top-1 accuracy 24.000, top-5 accuracy 66.000\n",
 360 |       "train part1: loss 2.277820\n",
 361 |       "val part1: batch 0/59, loss 2.299, top-1 accuracy 30.000, top-5 accuracy 66.000\n",
 362 |       "val part1: loss 2.034218\n",
 363 |       "Checkpoint saved\n",
 364 |       "BEST TOP1 ACCURACY SO FAR\n",
 365 |       "part1 Epoch 4 / 100\n",
 366 |       "train part1: batch 0/29, loss 2.404, top-1 accuracy 28.000, top-5 accuracy 62.000\n",
 367 |       "train part1: loss 2.246646\n",
 368 |       "val part1: batch 0/59, loss 2.203, top-1 accuracy 2.000, top-5 accuracy 90.000\n",
 369 |       "val part1: loss 2.223471\n",
 370 |       "Checkpoint saved\n",
 371 |       "part1 Epoch 5 / 100\n",
 372 |       "train part1: batch 0/29, loss 2.088, top-1 accuracy 38.000, top-5 accuracy 82.000\n",
 373 |       "train part1: loss 2.194817\n",
 374 |       "val part1: batch 0/59, loss 2.883, top-1 accuracy 6.000, top-5 accuracy 40.000\n",
 375 |       "val part1: loss 2.443704\n",
 376 |       "Checkpoint saved\n",
 377 |       "part1 Epoch 6 / 100\n",
 378 |       "train part1: batch 0/29, loss 2.317, top-1 accuracy 24.000, top-5 accuracy 70.000\n",
 379 |       "train part1: loss 2.188513\n",
 380 |       "val part1: batch 0/59, loss 2.388, top-1 accuracy 26.000, top-5 accuracy 76.000\n",
 381 |       "val part1: loss 2.040629\n",
 382 |       "Checkpoint saved\n",
 383 |       "BEST TOP1 ACCURACY SO FAR\n",
 384 |       "part1 Epoch 7 / 100\n",
 385 |       "train part1: batch 0/29, loss 2.052, top-1 accuracy 30.000, top-5 accuracy 78.000\n",
 386 |       "train part1: loss 2.090109\n",
 387 |       "val part1: batch 0/59, loss 2.429, top-1 accuracy 10.000, top-5 accuracy 66.000\n",
 388 |       "val part1: loss 1.852616\n",
 389 |       "Checkpoint saved\n",
 390 |       "BEST TOP1 ACCURACY SO FAR\n",
 391 |       "part1 Epoch 8 / 100\n",
 392 |       "train part1: batch 0/29, loss 2.074, top-1 accuracy 34.000, top-5 accuracy 72.000\n",
 393 |       "train part1: loss 2.022772\n",
 394 |       "val part1: batch 0/59, loss 2.426, top-1 accuracy 16.000, top-5 accuracy 64.000\n",
 395 |       "val part1: loss 1.945282\n",
 396 |       "Checkpoint saved\n",
 397 |       "part1 Epoch 9 / 100\n",
 398 |       "train part1: batch 0/29, loss 2.030, top-1 accuracy 34.000, top-5 accuracy 78.000\n",
 399 |       "train part1: loss 2.049587\n",
 400 |       "val part1: batch 0/59, loss 1.840, top-1 accuracy 46.000, top-5 accuracy 90.000\n",
 401 |       "val part1: loss 1.799102\n",
 402 |       "Checkpoint saved\n",
 403 |       "BEST TOP1 ACCURACY SO FAR\n",
 404 |       "part1 Epoch 10 / 100\n",
 405 |       "train part1: batch 0/29, loss 2.037, top-1 accuracy 38.000, top-5 accuracy 72.000\n",
 406 |       "train part1: loss 2.012847\n",
 407 |       "val part1: batch 0/59, loss 2.321, top-1 accuracy 16.000, top-5 accuracy 72.000\n",
 408 |       "val part1: loss 1.853063\n",
 409 |       "Checkpoint saved\n",
 410 |       "part1 Epoch 11 / 100\n",
 411 |       "train part1: batch 0/29, loss 1.784, top-1 accuracy 40.000, top-5 accuracy 84.000\n",
 412 |       "train part1: loss 1.918819\n",
 413 |       "val part1: batch 0/59, loss 2.004, top-1 accuracy 20.000, top-5 accuracy 90.000\n",
 414 |       "val part1: loss 1.740404\n",
 415 |       "Checkpoint saved\n",
 416 |       "BEST TOP1 ACCURACY SO FAR\n",
 417 |       "part1 Epoch 12 / 100\n",
 418 |       "train part1: batch 0/29, loss 1.655, top-1 accuracy 54.000, top-5 accuracy 86.000\n",
 419 |       "train part1: loss 1.881280\n",
 420 |       "val part1: batch 0/59, loss 2.029, top-1 accuracy 26.000, top-5 accuracy 92.000\n",
 421 |       "val part1: loss 1.778705\n",
 422 |       "Checkpoint saved\n",
 423 |       "part1 Epoch 13 / 100\n",
 424 |       "train part1: batch 0/29, loss 1.871, top-1 accuracy 42.000, top-5 accuracy 78.000\n",
 425 |       "train part1: loss 1.858144\n",
 426 |       "val part1: batch 0/59, loss 2.073, top-1 accuracy 24.000, top-5 accuracy 78.000\n",
 427 |       "val part1: loss 1.753376\n",
 428 |       "Checkpoint saved\n",
 429 |       "part1 Epoch 14 / 100\n",
 430 |       "train part1: batch 0/29, loss 1.662, top-1 accuracy 48.000, top-5 accuracy 90.000\n",
 431 |       "train part1: loss 1.856867\n",
 432 |       "val part1: batch 0/59, loss 2.117, top-1 accuracy 28.000, top-5 accuracy 82.000\n",
 433 |       "val part1: loss 1.657026\n",
 434 |       "Checkpoint saved\n",
 435 |       "BEST TOP1 ACCURACY SO FAR\n",
 436 |       "part1 Epoch 15 / 100\n",
 437 |       "train part1: batch 0/29, loss 1.631, top-1 accuracy 40.000, top-5 accuracy 94.000\n",
 438 |       "train part1: loss 1.873407\n",
 439 |       "val part1: batch 0/59, loss 2.191, top-1 accuracy 18.000, top-5 accuracy 82.000\n",
 440 |       "val part1: loss 1.590518\n",
 441 |       "Checkpoint saved\n",
 442 |       "BEST TOP1 ACCURACY SO FAR\n",
 443 |       "part1 Epoch 16 / 100\n",
 444 |       "train part1: batch 0/29, loss 1.556, top-1 accuracy 50.000, top-5 accuracy 84.000\n",
 445 |       "train part1: loss 1.713181\n",
 446 |       "val part1: batch 0/59, loss 2.037, top-1 accuracy 26.000, top-5 accuracy 84.000\n",
 447 |       "val part1: loss 1.500988\n",
 448 |       "Checkpoint saved\n",
 449 |       "BEST TOP1 ACCURACY SO FAR\n",
 450 |       "part1 Epoch 17 / 100\n",
 451 |       "train part1: batch 0/29, loss 1.738, top-1 accuracy 40.000, top-5 accuracy 86.000\n",
 452 |       "train part1: loss 1.687592\n",
 453 |       "val part1: batch 0/59, loss 1.510, top-1 accuracy 48.000, top-5 accuracy 98.000\n",
 454 |       "val part1: loss 1.448492\n",
 455 |       "Checkpoint saved\n",
 456 |       "BEST TOP1 ACCURACY SO FAR\n",
 457 |       "part1 Epoch 18 / 100\n",
 458 |       "train part1: batch 0/29, loss 1.599, top-1 accuracy 48.000, top-5 accuracy 86.000\n",
 459 |       "train part1: loss 1.680492\n",
 460 |       "val part1: batch 0/59, loss 1.719, top-1 accuracy 34.000, top-5 accuracy 92.000\n",
 461 |       "val part1: loss 1.454456\n",
 462 |       "Checkpoint saved\n",
 463 |       "BEST TOP1 ACCURACY SO FAR\n",
 464 |       "part1 Epoch 19 / 100\n",
 465 |       "train part1: batch 0/29, loss 1.789, top-1 accuracy 48.000, top-5 accuracy 78.000\n",
 466 |       "train part1: loss 1.646169\n",
 467 |       "val part1: batch 0/59, loss 2.208, top-1 accuracy 26.000, top-5 accuracy 72.000\n",
 468 |       "val part1: loss 1.490147\n",
 469 |       "Checkpoint saved\n",
 470 |       "part1 Epoch 20 / 100\n",
 471 |       "train part1: batch 0/29, loss 1.923, top-1 accuracy 36.000, top-5 accuracy 86.000\n",
 472 |       "train part1: loss 1.595892\n",
 473 |       "val part1: batch 0/59, loss 2.060, top-1 accuracy 24.000, top-5 accuracy 82.000\n",
 474 |       "val part1: loss 1.519088\n",
 475 |       "Checkpoint saved\n",
 476 |       "part1 Epoch 21 / 100\n",
 477 |       "train part1: batch 0/29, loss 1.250, top-1 accuracy 62.000, top-5 accuracy 94.000\n",
 478 |       "train part1: loss 1.565675\n",
 479 |       "val part1: batch 0/59, loss 2.194, top-1 accuracy 14.000, top-5 accuracy 78.000\n",
 480 |       "val part1: loss 1.879126\n",
 481 |       "Checkpoint saved\n",
 482 |       "part1 Epoch 22 / 100\n",
 483 |       "train part1: batch 0/29, loss 1.585, top-1 accuracy 54.000, top-5 accuracy 90.000\n",
 484 |       "train part1: loss 1.534763\n",
 485 |       "val part1: batch 0/59, loss 1.503, top-1 accuracy 60.000, top-5 accuracy 90.000\n",
 486 |       "val part1: loss 1.521252\n",
 487 |       "Checkpoint saved\n",
 488 |       "part1 Epoch 23 / 100\n",
 489 |       "train part1: batch 0/29, loss 1.447, top-1 accuracy 58.000, top-5 accuracy 84.000\n",
 490 |       "train part1: loss 1.452563\n",
 491 |       "val part1: batch 0/59, loss 1.673, top-1 accuracy 42.000, top-5 accuracy 90.000\n",
 492 |       "val part1: loss 1.344078\n",
 493 |       "Checkpoint saved\n",
 494 |       "BEST TOP1 ACCURACY SO FAR\n",
 495 |       "part1 Epoch 24 / 100\n",
 496 |       "train part1: batch 0/29, loss 1.142, top-1 accuracy 64.000, top-5 accuracy 96.000\n",
 497 |       "train part1: loss 1.432810\n",
 498 |       "val part1: batch 0/59, loss 1.908, top-1 accuracy 28.000, top-5 accuracy 88.000\n",
 499 |       "val part1: loss 1.289051\n",
 500 |       "Checkpoint saved\n",
 501 |       "BEST TOP1 ACCURACY SO FAR\n",
 502 |       "part1 Epoch 25 / 100\n",
 503 |       "train part1: batch 0/29, loss 1.658, top-1 accuracy 42.000, top-5 accuracy 86.000\n",
 504 |       "train part1: loss 1.420957\n",
 505 |       "val part1: batch 0/59, loss 1.851, top-1 accuracy 38.000, top-5 accuracy 84.000\n",
 506 |       "val part1: loss 1.310904\n",
 507 |       "Checkpoint saved\n",
 508 |       "part1 Epoch 26 / 100\n",
 509 |       "train part1: batch 0/29, loss 1.580, top-1 accuracy 48.000, top-5 accuracy 86.000\n",
 510 |       "train part1: loss 1.440826\n",
 511 |       "val part1: batch 0/59, loss 1.717, top-1 accuracy 36.000, top-5 accuracy 92.000\n",
 512 |       "val part1: loss 1.312577\n",
 513 |       "Checkpoint saved\n",
 514 |       "part1 Epoch 27 / 100\n",
 515 |       "train part1: batch 0/29, loss 1.277, top-1 accuracy 54.000, top-5 accuracy 94.000\n",
 516 |       "train part1: loss 1.349694\n",
 517 |       "val part1: batch 0/59, loss 1.985, top-1 accuracy 18.000, top-5 accuracy 84.000\n",
 518 |       "val part1: loss 1.180214\n",
 519 |       "Checkpoint saved\n",
 520 |       "BEST TOP1 ACCURACY SO FAR\n",
 521 |       "part1 Epoch 28 / 100\n",
 522 |       "train part1: batch 0/29, loss 1.234, top-1 accuracy 64.000, top-5 accuracy 92.000\n",
 523 |       "train part1: loss 1.313300\n",
 524 |       "val part1: batch 0/59, loss 1.840, top-1 accuracy 30.000, top-5 accuracy 82.000\n",
 525 |       "val part1: loss 1.273597\n",
 526 |       "Checkpoint saved\n",
 527 |       "part1 Epoch 29 / 100\n",
 528 |       "train part1: batch 0/29, loss 1.282, top-1 accuracy 60.000, top-5 accuracy 90.000\n",
 529 |       "train part1: loss 1.291305\n",
 530 |       "val part1: batch 0/59, loss 1.711, top-1 accuracy 38.000, top-5 accuracy 92.000\n",
 531 |       "val part1: loss 1.377620\n",
 532 |       "Checkpoint saved\n",
 533 |       "part1 Epoch 30 / 100\n"
 534 |      ]
 535 |     },
 536 |     {
 537 |      "name": "stdout",
 538 |      "output_type": "stream",
 539 |      "text": [
 540 |       "train part1: batch 0/29, loss 1.031, top-1 accuracy 64.000, top-5 accuracy 98.000\n",
 541 |       "train part1: loss 1.258324\n",
 542 |       "val part1: batch 0/59, loss 2.020, top-1 accuracy 24.000, top-5 accuracy 90.000\n",
 543 |       "val part1: loss 1.195394\n",
 544 |       "Checkpoint saved\n",
 545 |       "part1 Epoch 31 / 100\n",
 546 |       "train part1: batch 0/29, loss 1.137, top-1 accuracy 56.000, top-5 accuracy 96.000\n",
 547 |       "train part1: loss 1.216911\n",
 548 |       "val part1: batch 0/59, loss 1.532, top-1 accuracy 44.000, top-5 accuracy 92.000\n",
 549 |       "val part1: loss 1.414781\n",
 550 |       "Checkpoint saved\n",
 551 |       "part1 Epoch 32 / 100\n",
 552 |       "train part1: batch 0/29, loss 0.987, top-1 accuracy 66.000, top-5 accuracy 98.000\n",
 553 |       "train part1: loss 1.111287\n",
 554 |       "val part1: batch 0/59, loss 1.819, top-1 accuracy 28.000, top-5 accuracy 90.000\n",
 555 |       "val part1: loss 1.145157\n",
 556 |       "Checkpoint saved\n",
 557 |       "BEST TOP1 ACCURACY SO FAR\n",
 558 |       "part1 Epoch 33 / 100\n",
 559 |       "train part1: batch 0/29, loss 1.042, top-1 accuracy 64.000, top-5 accuracy 98.000\n",
 560 |       "train part1: loss 1.222887\n",
 561 |       "val part1: batch 0/59, loss 1.851, top-1 accuracy 28.000, top-5 accuracy 86.000\n",
 562 |       "val part1: loss 1.380957\n",
 563 |       "Checkpoint saved\n",
 564 |       "part1 Epoch 34 / 100\n",
 565 |       "train part1: batch 0/29, loss 1.042, top-1 accuracy 68.000, top-5 accuracy 96.000\n",
 566 |       "train part1: loss 1.174929\n",
 567 |       "val part1: batch 0/59, loss 0.674, top-1 accuracy 82.000, top-5 accuracy 100.000\n",
 568 |       "val part1: loss 1.449894\n",
 569 |       "Checkpoint saved\n",
 570 |       "part1 Epoch 35 / 100\n",
 571 |       "train part1: batch 0/29, loss 0.938, top-1 accuracy 72.000, top-5 accuracy 98.000\n",
 572 |       "train part1: loss 1.063462\n",
 573 |       "val part1: batch 0/59, loss 1.240, top-1 accuracy 62.000, top-5 accuracy 94.000\n",
 574 |       "val part1: loss 1.201662\n",
 575 |       "Checkpoint saved\n",
 576 |       "part1 Epoch 36 / 100\n",
 577 |       "train part1: batch 0/29, loss 0.953, top-1 accuracy 66.000, top-5 accuracy 98.000\n",
 578 |       "train part1: loss 1.067647\n",
 579 |       "val part1: batch 0/59, loss 1.356, top-1 accuracy 44.000, top-5 accuracy 100.000\n",
 580 |       "val part1: loss 1.169855\n",
 581 |       "Checkpoint saved\n",
 582 |       "part1 Epoch 37 / 100\n",
 583 |       "train part1: batch 0/29, loss 0.820, top-1 accuracy 74.000, top-5 accuracy 100.000\n",
 584 |       "train part1: loss 1.034231\n",
 585 |       "val part1: batch 0/59, loss 1.767, top-1 accuracy 36.000, top-5 accuracy 88.000\n",
 586 |       "val part1: loss 1.176197\n",
 587 |       "Checkpoint saved\n",
 588 |       "part1 Epoch 38 / 100\n",
 589 |       "train part1: batch 0/29, loss 1.085, top-1 accuracy 60.000, top-5 accuracy 98.000\n",
 590 |       "train part1: loss 1.075923\n",
 591 |       "val part1: batch 0/59, loss 1.035, top-1 accuracy 58.000, top-5 accuracy 100.000\n",
 592 |       "val part1: loss 1.250081\n",
 593 |       "Checkpoint saved\n",
 594 |       "part1 Epoch 39 / 100\n",
 595 |       "train part1: batch 0/29, loss 0.700, top-1 accuracy 76.000, top-5 accuracy 98.000\n",
 596 |       "train part1: loss 0.958608\n",
 597 |       "val part1: batch 0/59, loss 1.734, top-1 accuracy 30.000, top-5 accuracy 90.000\n",
 598 |       "val part1: loss 1.146434\n",
 599 |       "Checkpoint saved\n",
 600 |       "BEST TOP1 ACCURACY SO FAR\n",
 601 |       "part1 Epoch 40 / 100\n",
 602 |       "train part1: batch 0/29, loss 0.917, top-1 accuracy 72.000, top-5 accuracy 90.000\n",
 603 |       "train part1: loss 0.954836\n",
 604 |       "val part1: batch 0/59, loss 1.747, top-1 accuracy 28.000, top-5 accuracy 92.000\n",
 605 |       "val part1: loss 1.254555\n",
 606 |       "Checkpoint saved\n",
 607 |       "part1 Epoch 41 / 100\n",
 608 |       "train part1: batch 0/29, loss 0.920, top-1 accuracy 72.000, top-5 accuracy 98.000\n",
 609 |       "train part1: loss 1.014113\n",
 610 |       "val part1: batch 0/59, loss 1.905, top-1 accuracy 28.000, top-5 accuracy 84.000\n",
 611 |       "val part1: loss 1.315546\n",
 612 |       "Checkpoint saved\n",
 613 |       "part1 Epoch 42 / 100\n",
 614 |       "train part1: batch 0/29, loss 0.859, top-1 accuracy 66.000, top-5 accuracy 96.000\n",
 615 |       "train part1: loss 0.964082\n",
 616 |       "val part1: batch 0/59, loss 1.278, top-1 accuracy 48.000, top-5 accuracy 96.000\n",
 617 |       "val part1: loss 1.213332\n",
 618 |       "Checkpoint saved\n",
 619 |       "part1 Epoch 43 / 100\n",
 620 |       "train part1: batch 0/29, loss 1.106, top-1 accuracy 68.000, top-5 accuracy 100.000\n",
 621 |       "train part1: loss 0.900427\n",
 622 |       "val part1: batch 0/59, loss 2.251, top-1 accuracy 14.000, top-5 accuracy 80.000\n",
 623 |       "val part1: loss 1.178361\n",
 624 |       "Checkpoint saved\n",
 625 |       "part1 Epoch 44 / 100\n",
 626 |       "train part1: batch 0/29, loss 1.066, top-1 accuracy 66.000, top-5 accuracy 92.000\n",
 627 |       "train part1: loss 0.850803\n",
 628 |       "val part1: batch 0/59, loss 1.310, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 629 |       "val part1: loss 1.108495\n",
 630 |       "Checkpoint saved\n",
 631 |       "BEST TOP1 ACCURACY SO FAR\n",
 632 |       "part1 Epoch 45 / 100\n",
 633 |       "train part1: batch 0/29, loss 0.912, top-1 accuracy 68.000, top-5 accuracy 100.000\n",
 634 |       "train part1: loss 0.851264\n",
 635 |       "val part1: batch 0/59, loss 1.556, top-1 accuracy 40.000, top-5 accuracy 86.000\n",
 636 |       "val part1: loss 1.169370\n",
 637 |       "Checkpoint saved\n",
 638 |       "part1 Epoch 46 / 100\n",
 639 |       "train part1: batch 0/29, loss 0.781, top-1 accuracy 78.000, top-5 accuracy 98.000\n",
 640 |       "train part1: loss 0.841260\n",
 641 |       "val part1: batch 0/59, loss 1.468, top-1 accuracy 48.000, top-5 accuracy 92.000\n",
 642 |       "val part1: loss 1.093216\n",
 643 |       "Checkpoint saved\n",
 644 |       "BEST TOP1 ACCURACY SO FAR\n",
 645 |       "part1 Epoch 47 / 100\n",
 646 |       "train part1: batch 0/29, loss 0.596, top-1 accuracy 76.000, top-5 accuracy 100.000\n",
 647 |       "train part1: loss 0.827810\n",
 648 |       "val part1: batch 0/59, loss 1.288, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 649 |       "val part1: loss 1.070031\n",
 650 |       "Checkpoint saved\n",
 651 |       "BEST TOP1 ACCURACY SO FAR\n",
 652 |       "part1 Epoch 48 / 100\n",
 653 |       "train part1: batch 0/29, loss 0.810, top-1 accuracy 72.000, top-5 accuracy 100.000\n",
 654 |       "train part1: loss 0.775189\n",
 655 |       "val part1: batch 0/59, loss 0.921, top-1 accuracy 66.000, top-5 accuracy 98.000\n",
 656 |       "val part1: loss 1.524725\n",
 657 |       "Checkpoint saved\n",
 658 |       "part1 Epoch 49 / 100\n",
 659 |       "train part1: batch 0/29, loss 0.775, top-1 accuracy 70.000, top-5 accuracy 96.000\n",
 660 |       "train part1: loss 0.709857\n",
 661 |       "val part1: batch 0/59, loss 0.957, top-1 accuracy 62.000, top-5 accuracy 96.000\n",
 662 |       "val part1: loss 1.261353\n",
 663 |       "Checkpoint saved\n",
 664 |       "part1 Epoch 50 / 100\n",
 665 |       "train part1: batch 0/29, loss 0.730, top-1 accuracy 78.000, top-5 accuracy 98.000\n",
 666 |       "train part1: loss 0.777712\n",
 667 |       "val part1: batch 0/59, loss 1.595, top-1 accuracy 42.000, top-5 accuracy 88.000\n",
 668 |       "val part1: loss 1.394617\n",
 669 |       "Checkpoint saved\n",
 670 |       "part1 Epoch 51 / 100\n",
 671 |       "train part1: batch 0/29, loss 0.627, top-1 accuracy 76.000, top-5 accuracy 100.000\n",
 672 |       "train part1: loss 0.717591\n",
 673 |       "val part1: batch 0/59, loss 1.419, top-1 accuracy 46.000, top-5 accuracy 88.000\n",
 674 |       "val part1: loss 1.157158\n",
 675 |       "Checkpoint saved\n",
 676 |       "part1 Epoch 52 / 100\n",
 677 |       "train part1: batch 0/29, loss 0.915, top-1 accuracy 68.000, top-5 accuracy 96.000\n",
 678 |       "train part1: loss 0.684333\n",
 679 |       "val part1: batch 0/59, loss 1.151, top-1 accuracy 62.000, top-5 accuracy 92.000\n",
 680 |       "val part1: loss 1.355746\n",
 681 |       "Checkpoint saved\n",
 682 |       "part1 Epoch 53 / 100\n",
 683 |       "train part1: batch 0/29, loss 0.533, top-1 accuracy 82.000, top-5 accuracy 100.000\n",
 684 |       "train part1: loss 0.650421\n",
 685 |       "val part1: batch 0/59, loss 0.721, top-1 accuracy 76.000, top-5 accuracy 98.000\n",
 686 |       "val part1: loss 1.270329\n",
 687 |       "Checkpoint saved\n",
 688 |       "part1 Epoch 54 / 100\n",
 689 |       "train part1: batch 0/29, loss 0.678, top-1 accuracy 78.000, top-5 accuracy 100.000\n",
 690 |       "train part1: loss 0.660190\n",
 691 |       "val part1: batch 0/59, loss 2.038, top-1 accuracy 20.000, top-5 accuracy 86.000\n",
 692 |       "val part1: loss 1.179264\n",
 693 |       "Checkpoint saved\n",
 694 |       "part1 Epoch 55 / 100\n",
 695 |       "train part1: batch 0/29, loss 0.483, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 696 |       "train part1: loss 0.721452\n",
 697 |       "val part1: batch 0/59, loss 2.422, top-1 accuracy 20.000, top-5 accuracy 84.000\n",
 698 |       "val part1: loss 1.309595\n",
 699 |       "Checkpoint saved\n",
 700 |       "part1 Epoch 56 / 100\n",
 701 |       "train part1: batch 0/29, loss 0.583, top-1 accuracy 84.000, top-5 accuracy 96.000\n",
 702 |       "train part1: loss 0.681370\n",
 703 |       "val part1: batch 0/59, loss 1.417, top-1 accuracy 48.000, top-5 accuracy 86.000\n",
 704 |       "val part1: loss 1.156318\n",
 705 |       "Checkpoint saved\n",
 706 |       "part1 Epoch 57 / 100\n",
 707 |       "train part1: batch 0/29, loss 0.540, top-1 accuracy 80.000, top-5 accuracy 98.000\n",
 708 |       "train part1: loss 0.670764\n",
 709 |       "val part1: batch 0/59, loss 1.354, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 710 |       "val part1: loss 1.246445\n",
 711 |       "Checkpoint saved\n",
 712 |       "part1 Epoch 58 / 100\n",
 713 |       "train part1: batch 0/29, loss 0.406, top-1 accuracy 84.000, top-5 accuracy 100.000\n",
 714 |       "train part1: loss 0.616679\n",
 715 |       "val part1: batch 0/59, loss 1.581, top-1 accuracy 48.000, top-5 accuracy 88.000\n",
 716 |       "val part1: loss 1.153193\n",
 717 |       "Checkpoint saved\n",
 718 |       "part1 Epoch 59 / 100\n",
 719 |       "train part1: batch 0/29, loss 0.546, top-1 accuracy 84.000, top-5 accuracy 96.000\n",
 720 |       "train part1: loss 0.588246\n",
 721 |       "val part1: batch 0/59, loss 1.056, top-1 accuracy 58.000, top-5 accuracy 98.000\n",
 722 |       "val part1: loss 1.489484\n",
 723 |       "Checkpoint saved\n",
 724 |       "part1 Epoch 60 / 100\n",
 725 |       "train part1: batch 0/29, loss 0.505, top-1 accuracy 84.000, top-5 accuracy 98.000\n",
 726 |       "train part1: loss 0.497765\n",
 727 |       "val part1: batch 0/59, loss 1.450, top-1 accuracy 48.000, top-5 accuracy 92.000\n",
 728 |       "val part1: loss 1.059363\n",
 729 |       "Checkpoint saved\n",
 730 |       "BEST TOP1 ACCURACY SO FAR\n",
 731 |       "part1 Epoch 61 / 100\n",
 732 |       "train part1: batch 0/29, loss 0.479, top-1 accuracy 88.000, top-5 accuracy 96.000\n",
 733 |       "train part1: loss 0.475674\n",
 734 |       "val part1: batch 0/59, loss 1.289, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 735 |       "val part1: loss 1.053961\n",
 736 |       "Checkpoint saved\n",
 737 |       "BEST TOP1 ACCURACY SO FAR\n",
 738 |       "part1 Epoch 62 / 100\n"
 739 |      ]
 740 |     },
 741 |     {
 742 |      "name": "stdout",
 743 |      "output_type": "stream",
 744 |      "text": [
 745 |       "train part1: batch 0/29, loss 0.450, top-1 accuracy 90.000, top-5 accuracy 98.000\n",
 746 |       "train part1: loss 0.456607\n",
 747 |       "val part1: batch 0/59, loss 1.264, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 748 |       "val part1: loss 1.069520\n",
 749 |       "Checkpoint saved\n",
 750 |       "part1 Epoch 63 / 100\n",
 751 |       "train part1: batch 0/29, loss 0.425, top-1 accuracy 88.000, top-5 accuracy 100.000\n",
 752 |       "train part1: loss 0.444092\n",
 753 |       "val part1: batch 0/59, loss 1.291, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 754 |       "val part1: loss 1.065126\n",
 755 |       "Checkpoint saved\n",
 756 |       "part1 Epoch 64 / 100\n",
 757 |       "train part1: batch 0/29, loss 0.348, top-1 accuracy 92.000, top-5 accuracy 100.000\n",
 758 |       "train part1: loss 0.411879\n",
 759 |       "val part1: batch 0/59, loss 1.152, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 760 |       "val part1: loss 1.063277\n",
 761 |       "Checkpoint saved\n",
 762 |       "part1 Epoch 65 / 100\n",
 763 |       "train part1: batch 0/29, loss 0.652, top-1 accuracy 80.000, top-5 accuracy 98.000\n",
 764 |       "train part1: loss 0.439077\n",
 765 |       "val part1: batch 0/59, loss 1.234, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 766 |       "val part1: loss 1.065789\n",
 767 |       "Checkpoint saved\n",
 768 |       "part1 Epoch 66 / 100\n",
 769 |       "train part1: batch 0/29, loss 0.430, top-1 accuracy 90.000, top-5 accuracy 98.000\n",
 770 |       "train part1: loss 0.384757\n",
 771 |       "val part1: batch 0/59, loss 1.307, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 772 |       "val part1: loss 1.048810\n",
 773 |       "Checkpoint saved\n",
 774 |       "BEST TOP1 ACCURACY SO FAR\n",
 775 |       "part1 Epoch 67 / 100\n",
 776 |       "train part1: batch 0/29, loss 0.330, top-1 accuracy 92.000, top-5 accuracy 100.000\n",
 777 |       "train part1: loss 0.396477\n",
 778 |       "val part1: batch 0/59, loss 1.360, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 779 |       "val part1: loss 1.063634\n",
 780 |       "Checkpoint saved\n",
 781 |       "part1 Epoch 68 / 100\n",
 782 |       "train part1: batch 0/29, loss 0.282, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 783 |       "train part1: loss 0.399306\n",
 784 |       "val part1: batch 0/59, loss 1.261, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 785 |       "val part1: loss 1.075185\n",
 786 |       "Checkpoint saved\n",
 787 |       "part1 Epoch 69 / 100\n",
 788 |       "train part1: batch 0/29, loss 0.375, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 789 |       "train part1: loss 0.397953\n",
 790 |       "val part1: batch 0/59, loss 1.339, top-1 accuracy 48.000, top-5 accuracy 90.000\n",
 791 |       "val part1: loss 1.067044\n",
 792 |       "Checkpoint saved\n",
 793 |       "part1 Epoch 70 / 100\n",
 794 |       "train part1: batch 0/29, loss 0.575, top-1 accuracy 78.000, top-5 accuracy 100.000\n",
 795 |       "train part1: loss 0.385016\n",
 796 |       "val part1: batch 0/59, loss 1.199, top-1 accuracy 50.000, top-5 accuracy 96.000\n",
 797 |       "val part1: loss 1.080763\n",
 798 |       "Checkpoint saved\n",
 799 |       "part1 Epoch 71 / 100\n",
 800 |       "train part1: batch 0/29, loss 0.439, top-1 accuracy 82.000, top-5 accuracy 100.000\n",
 801 |       "train part1: loss 0.378764\n",
 802 |       "val part1: batch 0/59, loss 1.228, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 803 |       "val part1: loss 1.090090\n",
 804 |       "Checkpoint saved\n",
 805 |       "part1 Epoch 72 / 100\n",
 806 |       "train part1: batch 0/29, loss 0.336, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 807 |       "train part1: loss 0.343431\n",
 808 |       "val part1: batch 0/59, loss 1.270, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 809 |       "val part1: loss 1.083456\n",
 810 |       "Checkpoint saved\n",
 811 |       "part1 Epoch 73 / 100\n",
 812 |       "train part1: batch 0/29, loss 0.366, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 813 |       "train part1: loss 0.372473\n",
 814 |       "val part1: batch 0/59, loss 1.341, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 815 |       "val part1: loss 1.078837\n",
 816 |       "Checkpoint saved\n",
 817 |       "BEST TOP1 ACCURACY SO FAR\n",
 818 |       "part1 Epoch 74 / 100\n",
 819 |       "train part1: batch 0/29, loss 0.340, top-1 accuracy 86.000, top-5 accuracy 100.000\n",
 820 |       "train part1: loss 0.366458\n",
 821 |       "val part1: batch 0/59, loss 1.193, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 822 |       "val part1: loss 1.083876\n",
 823 |       "Checkpoint saved\n",
 824 |       "part1 Epoch 75 / 100\n",
 825 |       "train part1: batch 0/29, loss 0.323, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 826 |       "train part1: loss 0.358552\n",
 827 |       "val part1: batch 0/59, loss 1.314, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 828 |       "val part1: loss 1.082528\n",
 829 |       "Checkpoint saved\n",
 830 |       "part1 Epoch 76 / 100\n",
 831 |       "train part1: batch 0/29, loss 0.336, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 832 |       "train part1: loss 0.370824\n",
 833 |       "val part1: batch 0/59, loss 1.288, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 834 |       "val part1: loss 1.092666\n",
 835 |       "Checkpoint saved\n",
 836 |       "part1 Epoch 77 / 100\n",
 837 |       "train part1: batch 0/29, loss 0.232, top-1 accuracy 94.000, top-5 accuracy 100.000\n",
 838 |       "train part1: loss 0.357634\n",
 839 |       "val part1: batch 0/59, loss 1.152, top-1 accuracy 52.000, top-5 accuracy 96.000\n",
 840 |       "val part1: loss 1.100708\n",
 841 |       "Checkpoint saved\n",
 842 |       "part1 Epoch 78 / 100\n",
 843 |       "train part1: batch 0/29, loss 0.308, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 844 |       "train part1: loss 0.343843\n",
 845 |       "val part1: batch 0/59, loss 1.376, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 846 |       "val part1: loss 1.085893\n",
 847 |       "Checkpoint saved\n",
 848 |       "part1 Epoch 79 / 100\n",
 849 |       "train part1: batch 0/29, loss 0.320, top-1 accuracy 90.000, top-5 accuracy 98.000\n",
 850 |       "train part1: loss 0.328905\n",
 851 |       "val part1: batch 0/59, loss 1.325, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 852 |       "val part1: loss 1.093788\n",
 853 |       "Checkpoint saved\n",
 854 |       "part1 Epoch 80 / 100\n",
 855 |       "train part1: batch 0/29, loss 0.201, top-1 accuracy 94.000, top-5 accuracy 100.000\n",
 856 |       "train part1: loss 0.318215\n",
 857 |       "val part1: batch 0/59, loss 1.261, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 858 |       "val part1: loss 1.092609\n",
 859 |       "Checkpoint saved\n",
 860 |       "part1 Epoch 81 / 100\n",
 861 |       "train part1: batch 0/29, loss 0.369, top-1 accuracy 92.000, top-5 accuracy 100.000\n",
 862 |       "train part1: loss 0.322468\n",
 863 |       "val part1: batch 0/59, loss 1.316, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 864 |       "val part1: loss 1.091750\n",
 865 |       "Checkpoint saved\n",
 866 |       "part1 Epoch 82 / 100\n",
 867 |       "train part1: batch 0/29, loss 0.309, top-1 accuracy 88.000, top-5 accuracy 98.000\n",
 868 |       "train part1: loss 0.329091\n",
 869 |       "val part1: batch 0/59, loss 1.280, top-1 accuracy 50.000, top-5 accuracy 94.000\n",
 870 |       "val part1: loss 1.110126\n",
 871 |       "Checkpoint saved\n",
 872 |       "part1 Epoch 83 / 100\n",
 873 |       "train part1: batch 0/29, loss 0.400, top-1 accuracy 84.000, top-5 accuracy 100.000\n",
 874 |       "train part1: loss 0.299104\n",
 875 |       "val part1: batch 0/59, loss 1.431, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 876 |       "val part1: loss 1.106963\n",
 877 |       "Checkpoint saved\n",
 878 |       "part1 Epoch 84 / 100\n",
 879 |       "train part1: batch 0/29, loss 0.287, top-1 accuracy 88.000, top-5 accuracy 100.000\n",
 880 |       "train part1: loss 0.319058\n",
 881 |       "val part1: batch 0/59, loss 1.241, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 882 |       "val part1: loss 1.117356\n",
 883 |       "Checkpoint saved\n",
 884 |       "part1 Epoch 85 / 100\n",
 885 |       "train part1: batch 0/29, loss 0.307, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 886 |       "train part1: loss 0.344792\n",
 887 |       "val part1: batch 0/59, loss 1.448, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 888 |       "val part1: loss 1.112332\n",
 889 |       "Checkpoint saved\n",
 890 |       "part1 Epoch 86 / 100\n",
 891 |       "train part1: batch 0/29, loss 0.312, top-1 accuracy 88.000, top-5 accuracy 100.000\n",
 892 |       "train part1: loss 0.310987\n",
 893 |       "val part1: batch 0/59, loss 1.231, top-1 accuracy 52.000, top-5 accuracy 92.000\n",
 894 |       "val part1: loss 1.104170\n",
 895 |       "Checkpoint saved\n",
 896 |       "part1 Epoch 87 / 100\n",
 897 |       "train part1: batch 0/29, loss 0.398, top-1 accuracy 92.000, top-5 accuracy 98.000\n",
 898 |       "train part1: loss 0.331794\n",
 899 |       "val part1: batch 0/59, loss 1.287, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 900 |       "val part1: loss 1.106151\n",
 901 |       "Checkpoint saved\n",
 902 |       "part1 Epoch 88 / 100\n",
 903 |       "train part1: batch 0/29, loss 0.208, top-1 accuracy 94.000, top-5 accuracy 100.000\n",
 904 |       "train part1: loss 0.305059\n",
 905 |       "val part1: batch 0/59, loss 1.418, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 906 |       "val part1: loss 1.113987\n",
 907 |       "Checkpoint saved\n",
 908 |       "part1 Epoch 89 / 100\n",
 909 |       "train part1: batch 0/29, loss 0.387, top-1 accuracy 86.000, top-5 accuracy 100.000\n",
 910 |       "train part1: loss 0.329037\n",
 911 |       "val part1: batch 0/59, loss 1.205, top-1 accuracy 52.000, top-5 accuracy 94.000\n",
 912 |       "val part1: loss 1.121447\n",
 913 |       "Checkpoint saved\n",
 914 |       "part1 Epoch 90 / 100\n",
 915 |       "train part1: batch 0/29, loss 0.536, top-1 accuracy 82.000, top-5 accuracy 98.000\n",
 916 |       "train part1: loss 0.317368\n",
 917 |       "val part1: batch 0/59, loss 1.574, top-1 accuracy 48.000, top-5 accuracy 90.000\n",
 918 |       "val part1: loss 1.114163\n",
 919 |       "Checkpoint saved\n",
 920 |       "part1 Epoch 91 / 100\n",
 921 |       "train part1: batch 0/29, loss 0.359, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
 922 |       "train part1: loss 0.299202\n",
 923 |       "val part1: batch 0/59, loss 1.324, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 924 |       "val part1: loss 1.111513\n",
 925 |       "Checkpoint saved\n",
 926 |       "part1 Epoch 92 / 100\n",
 927 |       "train part1: batch 0/29, loss 0.584, top-1 accuracy 76.000, top-5 accuracy 100.000\n",
 928 |       "train part1: loss 0.292010\n",
 929 |       "val part1: batch 0/59, loss 1.230, top-1 accuracy 52.000, top-5 accuracy 92.000\n",
 930 |       "val part1: loss 1.121627\n",
 931 |       "Checkpoint saved\n",
 932 |       "part1 Epoch 93 / 100\n",
 933 |       "train part1: batch 0/29, loss 0.291, top-1 accuracy 88.000, top-5 accuracy 100.000\n",
 934 |       "train part1: loss 0.299472\n",
 935 |       "val part1: batch 0/59, loss 1.316, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 936 |       "val part1: loss 1.112493\n",
 937 |       "Checkpoint saved\n",
 938 |       "part1 Epoch 94 / 100\n",
 939 |       "train part1: batch 0/29, loss 0.269, top-1 accuracy 90.000, top-5 accuracy 100.000\n"
 940 |      ]
 941 |     },
 942 |     {
 943 |      "name": "stdout",
 944 |      "output_type": "stream",
 945 |      "text": [
 946 |       "train part1: loss 0.301035\n",
 947 |       "val part1: batch 0/59, loss 1.374, top-1 accuracy 50.000, top-5 accuracy 90.000\n",
 948 |       "val part1: loss 1.111464\n",
 949 |       "Checkpoint saved\n",
 950 |       "part1 Epoch 95 / 100\n",
 951 |       "train part1: batch 0/29, loss 0.404, top-1 accuracy 92.000, top-5 accuracy 96.000\n",
 952 |       "train part1: loss 0.297892\n",
 953 |       "val part1: batch 0/59, loss 1.367, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 954 |       "val part1: loss 1.120183\n",
 955 |       "Checkpoint saved\n",
 956 |       "part1 Epoch 96 / 100\n",
 957 |       "train part1: batch 0/29, loss 0.249, top-1 accuracy 94.000, top-5 accuracy 100.000\n",
 958 |       "train part1: loss 0.303044\n",
 959 |       "val part1: batch 0/59, loss 1.392, top-1 accuracy 50.000, top-5 accuracy 92.000\n",
 960 |       "val part1: loss 1.126989\n",
 961 |       "Checkpoint saved\n",
 962 |       "part1 Epoch 97 / 100\n",
 963 |       "train part1: batch 0/29, loss 0.165, top-1 accuracy 96.000, top-5 accuracy 100.000\n",
 964 |       "train part1: loss 0.276733\n",
 965 |       "val part1: batch 0/59, loss 1.391, top-1 accuracy 52.000, top-5 accuracy 92.000\n",
 966 |       "val part1: loss 1.127524\n",
 967 |       "Checkpoint saved\n",
 968 |       "part1 Epoch 98 / 100\n",
 969 |       "train part1: batch 0/29, loss 0.160, top-1 accuracy 94.000, top-5 accuracy 100.000\n",
 970 |       "train part1: loss 0.280525\n",
 971 |       "val part1: batch 0/59, loss 1.274, top-1 accuracy 52.000, top-5 accuracy 92.000\n",
 972 |       "val part1: loss 1.138553\n",
 973 |       "Checkpoint saved\n",
 974 |       "BEST TOP1 ACCURACY SO FAR\n",
 975 |       "part1 Epoch 99 / 100\n",
 976 |       "train part1: batch 0/29, loss 0.119, top-1 accuracy 98.000, top-5 accuracy 100.000\n",
 977 |       "train part1: loss 0.270406\n",
 978 |       "val part1: batch 0/59, loss 1.260, top-1 accuracy 52.000, top-5 accuracy 92.000\n",
 979 |       "val part1: loss 1.146958\n",
 980 |       "Checkpoint saved\n",
 981 |       "Best top-1 Accuracy = 69.916\n"
 982 |      ]
 983 |     }
 984 |    ],
 985 |    "source": [
 986 |     "# Train the network!\n",
 987 |     "trainer = Trainer(train_dataset, test_dataset, model, loss_function, optimizer, lr_scheduler, params)\n",
 988 |     "best_prec1 = trainer.train_val()\n",
 989 |     "print('Best top-1 Accuracy = {:4.3f}'.format(best_prec1))"
 990 |    ]
 991 |   },
 992 |   {
 993 |    "cell_type": "markdown",
 994 |    "metadata": {},
 995 |    "source": [
 996 |     "Make sure you get at least 50% accuracy in this section! If you tried different settings than the ones provided to get 50%, you should modify custom_part1_trainer in student code to return a dictionary with your changed settings. "
 997 |    ]
 998 |   },
 999 |   {
1000 |    "cell_type": "markdown",
1001 |    "metadata": {},
1002 |    "source": [
1003 |     "## Part 2. Fine-Tuning a Pre-Trained Network"
1004 |    ]
1005 |   },
1006 |   {
1007 |    "cell_type": "code",
1008 |    "execution_count": 9,
1009 |    "metadata": {
1010 |     "pycharm": {
1011 |      "is_executing": false
1012 |     }
1013 |    },
1014 |    "outputs": [],
1015 |    "source": [
1016 |     "# Fix random seeds so that results will be reproducible\n",
1017 |     "set_seed(0, use_GPU)"
1018 |    ]
1019 |   },
1020 |   {
1021 |    "cell_type": "markdown",
1022 |    "metadata": {},
1023 |    "source": [
1024 |     "Training a network from scratch takes a lof of time. Instead of training from scratch, we can take a pre-trained model and fine tune it for our purposes. This is the goal of Part 2--you will train a pre-trained network, and achieve at least 80% accuracy. "
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": 10,
1030 |    "metadata": {
1031 |     "pycharm": {
1032 |      "is_executing": false
1033 |     }
1034 |    },
1035 |    "outputs": [],
1036 |    "source": [
1037 |     "# training parameters\n",
1038 |     "input_size = (224, 224)\n",
1039 |     "RGB = True\n",
1040 |     "base_lr = 1e-3\n",
1041 |     "weight_decay = 5e-4\n",
1042 |     "momentum = 0.9\n",
1043 |     "backprop_depth = 3"
1044 |    ]
1045 |   },
1046 |   {
1047 |    "cell_type": "code",
1048 |    "execution_count": 11,
1049 |    "metadata": {
1050 |     "pycharm": {
1051 |      "is_executing": false
1052 |     },
1053 |     "scrolled": true
1054 |    },
1055 |    "outputs": [
1056 |     {
1057 |      "name": "stdout",
1058 |      "output_type": "stream",
1059 |      "text": [
1060 |       "Computing pixel mean and stdev...\n",
1061 |       "Batch 0 / 30\n",
1062 |       "Batch 20 / 30\n",
1063 |       "Done, mean = \n",
1064 |       "[0.45586014 0.45586014 0.45586014]\n",
1065 |       "std = \n",
1066 |       "[0.24808612 0.24808612 0.24808612]\n",
1067 |       "Computing pixel mean and stdev...\n",
1068 |       "Batch 0 / 60\n",
1069 |       "Batch 20 / 60\n",
1070 |       "Batch 40 / 60\n",
1071 |       "Done, mean = \n",
1072 |       "[0.45524448 0.45524448 0.45524448]\n",
1073 |       "std = \n",
1074 |       "[0.24719196 0.24719196 0.24719196]\n"
1075 |      ]
1076 |     }
1077 |    ],
1078 |    "source": [
1079 |     "# Create the training and testing datasets.\n",
1080 |     "train_dataset, test_dataset = sc.create_datasets(data_path=data_path, input_size=input_size, rgb=RGB)\n",
1081 |     "assert test_dataset.classes == train_dataset.classes"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "markdown",
1086 |    "metadata": {},
1087 |    "source": [
1088 |     "Following block loads a pretrained AlexNet."
1089 |    ]
1090 |   },
1091 |   {
1092 |    "cell_type": "code",
1093 |    "execution_count": 12,
1094 |    "metadata": {
1095 |     "pycharm": {
1096 |      "is_executing": false
1097 |     }
1098 |    },
1099 |    "outputs": [
1100 |     {
1101 |      "name": "stdout",
1102 |      "output_type": "stream",
1103 |      "text": [
1104 |       "AlexNet(\n",
1105 |       "  (features): Sequential(\n",
1106 |       "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
1107 |       "    (1): ReLU(inplace=True)\n",
1108 |       "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1109 |       "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
1110 |       "    (4): ReLU(inplace=True)\n",
1111 |       "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1112 |       "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1113 |       "    (7): ReLU(inplace=True)\n",
1114 |       "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1115 |       "    (9): ReLU(inplace=True)\n",
1116 |       "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1117 |       "    (11): ReLU(inplace=True)\n",
1118 |       "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1119 |       "  )\n",
1120 |       "  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n",
1121 |       "  (classifier): Sequential(\n",
1122 |       "    (0): Dropout(p=0.5, inplace=False)\n",
1123 |       "    (1): Linear(in_features=9216, out_features=4096, bias=True)\n",
1124 |       "    (2): ReLU(inplace=True)\n",
1125 |       "    (3): Dropout(p=0.5, inplace=False)\n",
1126 |       "    (4): Linear(in_features=4096, out_features=4096, bias=True)\n",
1127 |       "    (5): ReLU(inplace=True)\n",
1128 |       "    (6): Linear(in_features=4096, out_features=1000, bias=True)\n",
1129 |       "  )\n",
1130 |       ")\n"
1131 |      ]
1132 |     }
1133 |    ],
1134 |    "source": [
1135 |     "# Create the network model.\n",
1136 |     "model = alexnet(pretrained=True)\n",
1137 |     "print(model)"
1138 |    ]
1139 |   },
1140 |   {
1141 |    "cell_type": "markdown",
1142 |    "metadata": {},
1143 |    "source": [
1144 |     "Now, you modify create_part2_model from student code in order to fine-tune AlexNet. As you can see in the docs (https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py) and in the model printout above, AlexNet has 2 parts: 'features', which constists of conv layers that extract feature maps from the image, and 'classifier' which consists of FC layers that classify the features. We want to replace the last Linear layer in model.classifier. "
1145 |    ]
1146 |   },
1147 |   {
1148 |    "cell_type": "code",
1149 |    "execution_count": 13,
1150 |    "metadata": {
1151 |     "pycharm": {
1152 |      "is_executing": false
1153 |     },
1154 |     "scrolled": true
1155 |    },
1156 |    "outputs": [
1157 |     {
1158 |      "name": "stdout",
1159 |      "output_type": "stream",
1160 |      "text": [
1161 |       "Linear(in_features=4096, out_features=1000, bias=True)\n",
1162 |       "AlexNet(\n",
1163 |       "  (features): Sequential(\n",
1164 |       "    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))\n",
1165 |       "    (1): ReLU(inplace=True)\n",
1166 |       "    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1167 |       "    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))\n",
1168 |       "    (4): ReLU(inplace=True)\n",
1169 |       "    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1170 |       "    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1171 |       "    (7): ReLU(inplace=True)\n",
1172 |       "    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1173 |       "    (9): ReLU(inplace=True)\n",
1174 |       "    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
1175 |       "    (11): ReLU(inplace=True)\n",
1176 |       "    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
1177 |       "  )\n",
1178 |       "  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))\n",
1179 |       "  (classifier): Sequential(\n",
1180 |       "    (0): Dropout(p=0.5, inplace=False)\n",
1181 |       "    (1): Linear(in_features=9216, out_features=4096, bias=True)\n",
1182 |       "    (2): ReLU(inplace=True)\n",
1183 |       "    (3): Dropout(p=0.5, inplace=False)\n",
1184 |       "    (4): Linear(in_features=4096, out_features=4096, bias=True)\n",
1185 |       "    (5): ReLU(inplace=True)\n",
1186 |       "    (6): Linear(in_features=4096, out_features=128, bias=True)\n",
1187 |       "    (7): ReLU(inplace=True)\n",
1188 |       "    (8): Linear(in_features=128, out_features=15, bias=True)\n",
1189 |       "  )\n",
1190 |       ")\n"
1191 |      ]
1192 |     }
1193 |    ],
1194 |    "source": [
1195 |     "model = sc.create_part2_model(model, num_classes)\n",
1196 |     "if use_GPU:\n",
1197 |     "    model = model.cuda()\n",
1198 |     "print(model)"
1199 |    ]
1200 |   },
1201 |   {
1202 |    "cell_type": "markdown",
1203 |    "metadata": {},
1204 |    "source": [
1205 |     "Next we will create the loss function and the optimizer. Just as with part 1, if you modify any of the setttings to hit the required accuracy, you must modify custom_part2_trainer function to return a dictionary containing your changes. "
1206 |    ]
1207 |   },
1208 |   {
1209 |    "cell_type": "code",
1210 |    "execution_count": 14,
1211 |    "metadata": {
1212 |     "pycharm": {
1213 |      "is_executing": false
1214 |     }
1215 |    },
1216 |    "outputs": [],
1217 |    "source": [
1218 |     "# Set up the trainer. You can modify custom_part2_trainer in\n",
1219 |     "# student_copy.py if you want to try different learning settings.\n",
1220 |     "custom_part2_trainer = sc.custom_part2_trainer(model)\n",
1221 |     "\n",
1222 |     "if custom_part2_trainer is None:\n",
1223 |     "    # Create the loss function\n",
1224 |     "    # see http://pytorch.org/docs/0.3.0/nn.html#loss-functions for a list of available loss functions\n",
1225 |     "    loss_function = nn.CrossEntropyLoss()\n",
1226 |     "\n",
1227 |     "    # Since we do not want to optimize the whole network, we must extract a list of parameters of interest that will be\n",
1228 |     "    # optimized by the optimizer.\n",
1229 |     "    params_to_optimize = []\n",
1230 |     "\n",
1231 |     "    # List of modules in the network\n",
1232 |     "    mods = list(model.features.children()) + list(model.classifier.children())\n",
1233 |     "\n",
1234 |     "    # Extract parameters from the last `backprop_depth` modules in the network and collect them in\n",
1235 |     "    # the params_to_optimize list.\n",
1236 |     "    for m in mods[::-1][:backprop_depth]:\n",
1237 |     "        params_to_optimize.extend(list(m.parameters()))\n",
1238 |     "\n",
1239 |     "    # Construct the optimizer    \n",
1240 |     "    optimizer = optim.SGD(params=params_to_optimize, lr=base_lr, weight_decay=weight_decay, momentum=momentum)\n",
1241 |     "\n",
1242 |     "    # Create a scheduler, currently a simple step scheduler, but you can get creative.\n",
1243 |     "    # See http://pytorch.org/docs/0.3.0/optim.html#how-to-adjust-learning-rate for various LR schedulers\n",
1244 |     "    # and how to use them\n",
1245 |     "    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)\n",
1246 |     "    \n",
1247 |     "    params = {'n_epochs': 4, 'batch_size': 10, 'experiment': 'part2'} \n",
1248 |     "    \n",
1249 |     "else:\n",
1250 |     "    if 'loss_function' in custom_part2_trainer:\n",
1251 |     "        loss_function = custom_part2_trainer['loss_function']\n",
1252 |     "    if 'optimizer' in custom_part2_trainer:\n",
1253 |     "        optimizer = custom_part2_trainer['optimizer']\n",
1254 |     "    if 'lr_scheduler' in custom_part2_trainer:\n",
1255 |     "        lr_scheduler = custom_part2_trainer['lr_scheduler']\n",
1256 |     "    if 'params' in custom_part2_trainer:\n",
1257 |     "        params = custom_part2_trainer['params']"
1258 |    ]
1259 |   },
1260 |   {
1261 |    "cell_type": "markdown",
1262 |    "metadata": {},
1263 |    "source": [
1264 |     "We are ready to fine tune our network! Just like before, we will start a local server to see the training progress of our network. Open a new terminal and activate the environment for this project. Then run the following command: **python -m visdom.server**. This will start a local server. The terminal output should give out a link like: \"http://localhost:8097\". Open this link in your browser. After you run the following block, visit this link again, and you will be able to see graphs showing the progress of your training! If you do not see any graphs, select Part 2 on the top left bar where is says Environment (only select Part 2, do not check main or Part 1)."
1265 |    ]
1266 |   },
1267 |   {
1268 |    "cell_type": "code",
1269 |    "execution_count": 15,
1270 |    "metadata": {
1271 |     "pycharm": {
1272 |      "is_executing": false
1273 |     }
1274 |    },
1275 |    "outputs": [
1276 |     {
1277 |      "name": "stderr",
1278 |      "output_type": "stream",
1279 |      "text": [
1280 |       "Setting up a new session...\n"
1281 |      ]
1282 |     },
1283 |     {
1284 |      "name": "stdout",
1285 |      "output_type": "stream",
1286 |      "text": [
1287 |       "---------------------------------------\n",
1288 |       "Experiment: part2\n",
1289 |       "n_epochs: 4\n",
1290 |       "batch_size: 10\n",
1291 |       "do_val: True\n",
1292 |       "shuffle: True\n",
1293 |       "num_workers: 4\n",
1294 |       "val_freq: 1\n",
1295 |       "print_freq: 100\n",
1296 |       "experiment: part2\n",
1297 |       "checkpoint_file: None\n",
1298 |       "resume_optim: True\n",
1299 |       "---------------------------------------\n",
1300 |       "part2 Epoch 0 / 4\n",
1301 |       "train part2: batch 0/149, loss 2.682, top-1 accuracy 0.000, top-5 accuracy 60.000\n",
1302 |       "train part2: batch 100/149, loss 0.757, top-1 accuracy 80.000, top-5 accuracy 90.000\n",
1303 |       "train part2: loss 1.222762\n",
1304 |       "val part2: batch 0/298, loss 1.443, top-1 accuracy 30.000, top-5 accuracy 100.000\n",
1305 |       "val part2: batch 100/298, loss 0.736, top-1 accuracy 80.000, top-5 accuracy 100.000\n",
1306 |       "val part2: batch 200/298, loss 0.562, top-1 accuracy 70.000, top-5 accuracy 100.000\n",
1307 |       "val part2: loss 0.561482\n",
1308 |       "Checkpoint saved\n",
1309 |       "BEST TOP1 ACCURACY SO FAR\n",
1310 |       "part2 Epoch 1 / 4\n",
1311 |       "train part2: batch 0/149, loss 0.417, top-1 accuracy 80.000, top-5 accuracy 100.000\n",
1312 |       "train part2: batch 100/149, loss 0.132, top-1 accuracy 100.000, top-5 accuracy 100.000\n",
1313 |       "train part2: loss 0.493316\n",
1314 |       "val part2: batch 0/298, loss 1.460, top-1 accuracy 30.000, top-5 accuracy 100.000\n",
1315 |       "val part2: batch 100/298, loss 0.349, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
1316 |       "val part2: batch 200/298, loss 0.410, top-1 accuracy 80.000, top-5 accuracy 100.000\n",
1317 |       "val part2: loss 0.458179\n",
1318 |       "Checkpoint saved\n",
1319 |       "BEST TOP1 ACCURACY SO FAR\n",
1320 |       "part2 Epoch 2 / 4\n",
1321 |       "train part2: batch 0/149, loss 0.278, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
1322 |       "train part2: batch 100/149, loss 0.428, top-1 accuracy 80.000, top-5 accuracy 100.000\n",
1323 |       "train part2: loss 0.374604\n",
1324 |       "val part2: batch 0/298, loss 1.957, top-1 accuracy 20.000, top-5 accuracy 100.000\n",
1325 |       "val part2: batch 100/298, loss 0.265, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
1326 |       "val part2: batch 200/298, loss 1.368, top-1 accuracy 60.000, top-5 accuracy 100.000\n",
1327 |       "val part2: loss 0.506881\n",
1328 |       "Checkpoint saved\n",
1329 |       "part2 Epoch 3 / 4\n",
1330 |       "train part2: batch 0/149, loss 0.123, top-1 accuracy 100.000, top-5 accuracy 100.000\n",
1331 |       "train part2: batch 100/149, loss 0.152, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
1332 |       "train part2: loss 0.335377\n",
1333 |       "val part2: batch 0/298, loss 0.604, top-1 accuracy 80.000, top-5 accuracy 100.000\n",
1334 |       "val part2: batch 100/298, loss 0.329, top-1 accuracy 90.000, top-5 accuracy 100.000\n",
1335 |       "val part2: batch 200/298, loss 0.570, top-1 accuracy 70.000, top-5 accuracy 100.000\n",
1336 |       "val part2: loss 0.387761\n",
1337 |       "Checkpoint saved\n",
1338 |       "BEST TOP1 ACCURACY SO FAR\n",
1339 |       "Best top-1 Accuracy = 86.533\n"
1340 |      ]
1341 |     }
1342 |    ],
1343 |    "source": [
1344 |     "# Train the network!\n",
1345 |     "trainer = Trainer(train_dataset, test_dataset, model, loss_function, optimizer, lr_scheduler, params)\n",
1346 |     "best_prec1 = trainer.train_val()\n",
1347 |     "print('Best top-1 Accuracy = {:4.3f}'.format(best_prec1))"
1348 |    ]
1349 |   },
1350 |   {
1351 |    "cell_type": "markdown",
1352 |    "metadata": {},
1353 |    "source": [
1354 |     "Expect this code to take around 10 minutes on CPU or 30 seconds on GPU. You should hit 80% accuracy. "
1355 |    ]
1356 |   }
1357 |  ],
1358 |  "metadata": {
1359 |   "kernelspec": {
1360 |    "display_name": "Python 3",
1361 |    "language": "python",
1362 |    "name": "python3"
1363 |   },
1364 |   "language_info": {
1365 |    "codemirror_mode": {
1366 |     "name": "ipython",
1367 |     "version": 3
1368 |    },
1369 |    "file_extension": ".py",
1370 |    "mimetype": "text/x-python",
1371 |    "name": "python",
1372 |    "nbconvert_exporter": "python",
1373 |    "pygments_lexer": "ipython3",
1374 |    "version": "3.7.4"
1375 |   },
1376 |   "pycharm": {
1377 |    "stem_cell": {
1378 |     "cell_type": "raw",
1379 |     "metadata": {
1380 |      "collapsed": false
1381 |     },
1382 |     "source": []
1383 |    }
1384 |   },
1385 |   "widgets": {
1386 |    "state": {},
1387 |    "version": "1.1.2"
1388 |   }
1389 |  },
1390 |  "nbformat": 4,
1391 |  "nbformat_minor": 2
1392 | }
1393 | 


--------------------------------------------------------------------------------
/project/proj5/code/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kemo-Huang/CS308-Computer-Vision/bc83936aa04bf0dca86294595632a9e1d0092f9a/project/proj5/code/__init__.py


--------------------------------------------------------------------------------
/project/proj5/code/student_code.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision.datasets as datasets
  4 | import torchvision.transforms as transforms
  5 | import os.path as osp
  6 | import utils
  7 | 
  8 | 
  9 | def create_datasets(data_path, input_size, rgb=False):
 10 |     """
 11 |     This function creates and returns a training data loader and a
 12 |     testing/validation data loader. The dataloader should also perform some
 13 |     pre-processing steps on each of the datasets. Most of this function is
 14 |     implemented for you, you will only need to add a few additional lines.
 15 |     A data loader in pyTorch is a class inherited from the
 16 |     torch.utils.data.Dataset abstract class. In this project we will use the
 17 |     ImageFolder data loader. See
 18 |     http://pytorch.org/docs/master/torchvision/datasets.html#imagefolder for
 19 |     details. Although you don't need to for this project, you can also create your
 20 |     own custom data loader by inheriting from the abstract class and implementing
 21 |     the __len__() and __getitem__() methods as described in
 22 |     http://pytorch.org/tutorials/beginner/data_loading_tutorial.html
 23 |     As mentioned, the data loader should perform any necessary pre-processing
 24 |     steps on the data (images) and targets (labels). In pyTorch, this is done
 25 |     with 'transforms', which can be composed (chained together) as shown in
 26 |     http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#transforms.
 27 |     While that example implements its own transforms, for this project the
 28 |     built-in transforms in torchvision.transforms should suffice. See
 29 |     http://pytorch.org/docs/master/torchvision/transforms.html for the list of
 30 |     available built-in transforms.
 31 |     Args:
 32 |     - data_path: (string) Path to the directory that contains the 'test' and
 33 |         'train' data directories.
 34 |     - input_size: (w, h) Size of input image. The images will be resized to
 35 |         this size.
 36 |     - rgb: (boolean) Flag indicating if input images are RGB or grayscale. If
 37 |         False, images will be converted to grayscale.
 38 |     Returns:
 39 |     - train_dataloader: Dataloader for the training dataset.
 40 |     - test_dataloader: Dataloader for the testing/validation dataset.
 41 |     """
 42 |     train_data_path = osp.join(data_path, 'train')
 43 |     test_data_path = osp.join(data_path, 'test')
 44 |     # Below variables are provided for your convenience. You may or may not need
 45 |     # all of them.
 46 |     train_mean, train_std = utils.get_mean_std(train_data_path, input_size, rgb)
 47 |     test_mean, test_std = utils.get_mean_std(test_data_path, input_size, rgb)
 48 | 
 49 |     """ TRAIN DATA TRANSFORMS """
 50 |     train_data_tforms = []
 51 |     train_data_tforms.append(transforms.Resize(size=max(input_size)))
 52 |     train_data_tforms.append(transforms.CenterCrop(size=input_size))
 53 |     if not rgb:
 54 |         train_data_tforms.append(transforms.Grayscale())
 55 | 
 56 |     #######################################################################
 57 |     #                        TODO: YOUR CODE HERE                         #
 58 |     #######################################################################
 59 |     # TODO Add a transformation to you train_data_tforms that left-right mirrors
 60 |     # the image randomly. Which transformation should you add?
 61 |     # pass
 62 |     train_data_tforms.append(transforms.RandomHorizontalFlip(p=0.5))
 63 |     # train_data_tforms.append(transforms.RandomAffine(degrees=(-30, 30)))
 64 |     # Do not move the position of the below line (leave it between the left-right
 65 |     # mirroring and normalization transformations.
 66 |     train_data_tforms.append(transforms.ToTensor())
 67 | 
 68 |     # TODO Add a transformation to your train_data_tforms that normalizes the
 69 |     # tensor by subtracting mean and dividing by std. You may use train_mean,
 70 |     # test_mean, train_std, or test_std values that are already calculated for
 71 |     # you. Which mean and std should you use to normalize the data?
 72 |     # pass
 73 |     train_data_tforms.append(transforms.Normalize(train_mean, train_std))
 74 |     #######################################################################
 75 |     #                          END OF YOUR CODE                           #
 76 |     #######################################################################
 77 |     train_data_tforms = transforms.Compose(train_data_tforms)
 78 | 
 79 |     """ TEST/VALIDATION DATA TRANSFORMS """
 80 | 
 81 |     test_data_tforms = []
 82 |     test_data_tforms.append(transforms.Resize(size=max(input_size)))
 83 |     test_data_tforms.append(transforms.CenterCrop(size=input_size))
 84 |     if not rgb:
 85 |         test_data_tforms.append(transforms.Grayscale())
 86 |     test_data_tforms.append(transforms.ToTensor())
 87 |     #######################################################################
 88 |     #                        TODO: YOUR CODE HERE                         #
 89 |     #######################################################################
 90 |     # TODO Add a transformation to your test_data_tforms that normalizes the
 91 |     # tensor by subtracting mean and dividing by std. You may use train_mean,
 92 |     # test_mean, train_std, or test_std values that are already calculated for
 93 |     # you. Which mean and std should you use to normalize the data?
 94 |     # pass
 95 |     test_data_tforms.append(transforms.Normalize(test_mean, test_std))
 96 |     #######################################################################
 97 |     #                          END OF YOUR CODE                           #
 98 |     #######################################################################
 99 |     test_data_tforms = transforms.Compose(test_data_tforms)
100 | 
101 |     """ DATASET LOADERS """
102 |     # Creating dataset loaders using the transformations specified above.
103 |     train_dset = datasets.ImageFolder(root=osp.join(data_path, 'train'),
104 |                                       transform=train_data_tforms)
105 |     test_dset = datasets.ImageFolder(root=osp.join(data_path, 'test'),
106 |                                      transform=test_data_tforms)
107 |     return train_dset, test_dset
108 | 
109 | 
110 | class SimpleNet(nn.Module):
111 |     """
112 |     This class implements the network model needed for part 1. Network models in
113 |     pyTorch are inherited from torch.nn.Module, only require implementing the
114 |     __init__() and forward() methods. The backpropagation is handled automatically
115 |     by pyTorch.
116 |     The __init__() function defines the various operators needed for
117 |     the forward pass e.g. conv, batch norm, fully connected, etc.
118 |     The forward() defines how these blocks act on the input data to produce the
119 |     network output. For hints on how to implement your network model, see the
120 |     AlexNet example at
121 |     https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
122 |     """
123 | 
124 |     def __init__(self, num_classes, droprate=0.8, rgb=False, verbose=False):
125 |         """
126 |         This is where you set up and initialize your network. A basic network is
127 |         already set up for you. You will need to add a few more layers to it as
128 |         described. You can refer to https://pytorch.org/docs/stable/nn.html for
129 |         documentation.
130 |         Args:
131 |         - num_classes: (int) Number of output classes.
132 |         - droprate: (float) Droprate of the network (used for droppout).
133 |         - rgb: (boolean) Flag indicating if input images are RGB or grayscale, used
134 |           to set the number of input channels.
135 |         - verbose: (boolean) If True a hook is registered to print the size of input
136 |           to classifier everytime the forward function is called.
137 |         """
138 |         super(SimpleNet, self).__init__()  # initialize the parent class, a must
139 |         in_channels = 3 if rgb else 1
140 | 
141 |         """ NETWORK SETUP """
142 |         #####################################################################
143 |         #                       TODO: YOUR CODE HERE                        #
144 |         #####################################################################
145 |         # TODO modify the simple network
146 |         # 1) add one dropout layer after the last relu layer.
147 |         # 2) add more convolution, maxpool and relu layers.
148 |         # 3) add one batch normalization layer after each convolution/linear layer
149 |         #    except the last convolution/linear layer of the WHOLE model (meaning
150 |         #    including the classifier).
151 | 
152 |         self.features = nn.Sequential(
153 |             # 64 * 64 * 1
154 |             nn.Conv2d(in_channels=in_channels, out_channels=12, kernel_size=3,
155 |                       stride=1, padding=1),
156 |             nn.BatchNorm2d(12),
157 |             nn.ReLU(inplace=True),
158 | 
159 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
160 | 
161 |             nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3,
162 |                       stride=1, padding=1),
163 |             nn.BatchNorm2d(24),
164 |             nn.ReLU(inplace=True),
165 | 
166 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
167 | 
168 |             nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3,
169 |                       stride=1, padding=1),
170 |             nn.BatchNorm2d(48),
171 |             nn.ReLU(inplace=True),
172 | 
173 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
174 | 
175 |             nn.Conv2d(in_channels=48, out_channels=96, kernel_size=3,
176 |                       stride=1, padding=1),
177 |             nn.BatchNorm2d(96),
178 |             nn.ReLU(inplace=True),
179 | 
180 |             nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
181 |         )
182 | 
183 |         self.classifier = nn.Sequential(
184 |             nn.Linear(4 * 4 * 96, 1024),
185 |             nn.ReLU(True),
186 |             nn.Dropout(droprate),
187 |             nn.Linear(1024, num_classes),
188 |         )
189 | 
190 |         #####################################################################
191 |         #                         END OF YOUR CODE                          #
192 |         #####################################################################
193 | 
194 |         """ NETWORK INITIALIZATION """
195 |         for name, m in self.named_modules():
196 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
197 |                 # Initializing weights with randomly sampled numbers from a normal
198 |                 # distribution.
199 |                 m.weight.data.normal_(0, 1)
200 |                 m.weight.data.mul_(1e-2)
201 |                 if m.bias is not None:
202 |                     # Initializing biases with zeros.
203 |                     nn.init.constant_(m.bias.data, 0)
204 |             elif isinstance(m, nn.BatchNorm2d):
205 |                 #################################################################
206 |                 #                     TODO: YOUR CODE HERE                      #
207 |                 #################################################################
208 |                 # TODO How should you initialize the weights and biases for BatchNorm?
209 |                 # Initialize them here.
210 |                 m.weight.data.normal_(0, 1)
211 |                 # m.weight.data.mul_(1e-2)
212 |                 if m.bias is not None:
213 |                     # Initializing biases with zeros.
214 |                     nn.init.constant_(m.bias.data, 0)
215 | 
216 |                 #################################################################
217 |                 #                       END OF YOUR CODE                        #
218 |                 #################################################################
219 | 
220 |         if verbose:
221 |             # Hook that prints the size of input to classifier every time the forward
222 |             # function is called.
223 |             self.classifier.register_forward_hook(utils.print_input_size_hook)
224 | 
225 |     def forward(self, x):
226 |         """
227 |         Forward step of the network.
228 |         Args:
229 |         - x: input data.
230 |         Returns:
231 |         - x: output of the classifier.
232 |         """
233 |         x = self.features(x)
234 |         x = self.classifier(torch.flatten(x, 1))
235 |         return x.squeeze()
236 | 
237 | 
238 | def custom_part1_trainer(model):
239 |     # return a dict that contains your customized learning settings.
240 |     pass
241 |     return None
242 | 
243 | 
244 | def create_part2_model(model, num_classes):
245 |     """
246 |     Take the passed in model and prepare it for finetuning by following the
247 |     instructions.
248 |     Args:
249 |     - model: The model you need to prepare for finetuning. For the purposes of
250 |       this project, you will pass in AlexNet.
251 |     - num_classes: number of classes the model should output.
252 |     Returns:
253 |     - model: The model ready to be fine tuned.
254 |     """
255 |     # Getting all layers from the input model's classifier.
256 |     new_classifier = list(model.classifier.children())
257 |     print(new_classifier[-1])
258 |     new_classifier = new_classifier[:-1]
259 |     #######################################################################
260 |     #                        TODO: YOUR CODE HERE                         #
261 |     #######################################################################
262 |     # TODO modify the classifier of the model for finetuning. new_classifier is
263 |     # now a list containing the layers of the classifier network, the last element
264 |     # being the last layer of the classifier.
265 |     # 1) Create a linear layer with correct in_features and out_features. What
266 |     #    should these values be?
267 |     # 2) Initialize the weights and the bias in the new linear layer. Look at how
268 |     #    is the linear layer initialized in SimpleNetPart1.
269 |     # 3) Append your new layer to your new_classifier.
270 | 
271 |     new_linear = nn.Linear(in_features=4096, out_features=128)
272 |     if new_linear.bias is not None:
273 |         nn.init.constant_(new_linear.bias.data, 0)
274 |     new_classifier.append(new_linear)
275 | 
276 |     new_classifier.append(nn.ReLU(True))
277 | 
278 |     new_linear = nn.Linear(in_features=128, out_features=num_classes)
279 |     if new_linear.bias is not None:
280 |         nn.init.constant_(new_linear.bias.data, 0)
281 |     new_classifier.append(new_linear)
282 | 
283 |     #######################################################################
284 |     #                          END OF YOUR CODE                           #
285 |     #######################################################################
286 |     # Connecting all layers to form a new classifier.
287 |     model.classifier = nn.Sequential(*new_classifier)
288 | 
289 |     return model
290 | 
291 | 
292 | def custom_part2_trainer(model):
293 |     # return a dict that contains your customized learning settings.
294 |     pass
295 |     return None
296 | 


--------------------------------------------------------------------------------
/project/proj5/code/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import shutil
  5 | import time
  6 | import random
  7 | import numpy as np
  8 | from visdom import Visdom
  9 | 
 10 | import torch
 11 | import torch.utils.data
 12 | from torch.autograd import Variable
 13 | from IPython.core.debugger import set_trace
 14 | 
 15 | import torchvision.transforms as transforms
 16 | import torchvision.datasets as datasets
 17 | from torch.utils.data import DataLoader
 18 | from sklearn.preprocessing import StandardScaler
 19 | 
 20 | 
 21 | def set_seed(seed, use_GPU=False):
 22 |     torch.manual_seed(seed)
 23 |     np.random.seed(seed)
 24 |     random.seed(seed)
 25 |     if use_GPU:
 26 |         torch.cuda.manual_seed(seed)
 27 |         torch.cuda.manual_seed_all(seed)
 28 |         torch.backends.cudnn.deterministic = True
 29 | 
 30 | 
 31 | def print_input_size_hook(self, input, output):
 32 |     print('Input size to classifier is', input[0].size())
 33 | 
 34 | 
 35 | class AverageMeter(object):
 36 |     """Computes and stores the average and current value"""
 37 | 
 38 |     def __init__(self):
 39 |         self.reset()
 40 | 
 41 |     def reset(self):
 42 |         self.val = 0
 43 |         self.avg = 0
 44 |         self.sum = 0
 45 |         self.count = 0
 46 | 
 47 |     def update(self, val, n=1):
 48 |         self.val = val
 49 |         self.sum += val * n
 50 |         self.count += n
 51 |         self.avg = self.sum / self.count
 52 | 
 53 | 
 54 | def accuracy(output, target, topk=(1,)):
 55 |     """Computes the precision@k for the specified values of k"""
 56 |     maxk = max(topk)
 57 |     batch_size = target.size(0)
 58 | 
 59 |     _, pred = output.topk(maxk, 1, True, True)
 60 |     pred = pred.t()
 61 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
 62 | 
 63 |     res = []
 64 |     for k in topk:
 65 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
 66 |         res.append(correct_k.mul_(100.0 / batch_size))
 67 |     return res
 68 | 
 69 | 
 70 | class Trainer(object):
 71 |     def __init__(self, train_dataset, val_dataset, model, loss_fn, optimizer,
 72 |                  lr_scheduler, params):
 73 |         """
 74 |         General purpose training script
 75 |         :param train_dataset: PyTorch dataset that loads training images
 76 |         :param val_dataset: PyTorch dataset that loads testing / validation images
 77 |         :param model: Network model
 78 |         :param optimizer: PyTorch optimizer object
 79 |         :param lr_scheduler: PyTorch learning rate scheduler object
 80 |         :param loss_fn: loss function
 81 |         :param params: dictionary containing parameters for the training process
 82 |         It can contain the following fields (fields with no default value mentioned
 83 |         are mandatory):
 84 |           n_epochs: number of epochs of training
 85 |           batch_size: batch size for one iteration
 86 |           do_val: perform validation? (default: True)
 87 |           shuffle: shuffle training data? (default: True)
 88 |           num_workers: number of CPU threads for loading data (default: 4)
 89 |           val_freq: frequency of validation (in number of epochs) (default: 1)
 90 |           print_freq: progress printing frequency (in number of iterations
 91 |             (default: 20)
 92 |           experiment: name of the experiment, used to create logs and checkpoints
 93 |           checkpoint_file: Name of file with saved weights. Loaded at before
 94 |             start of training if provided (default: None)
 95 |           resume_optim: whether to resume optimization from loaded weights
 96 |             (default: True)
 97 |         """
 98 |         self.model = model
 99 |         self.loss_fn = loss_fn
100 |         self.optimizer = optimizer
101 |         self.lr_scheduler = lr_scheduler
102 |         self.best_prec1 = -float('inf')
103 | 
104 |         # parse params with default values
105 |         self.config = {
106 |             'n_epochs': params['n_epochs'],
107 |             'batch_size': params['batch_size'],
108 |             'do_val': params.get('do_val', True),
109 |             'shuffle': params.get('shuffle', True),
110 |             'num_workers': params.get('num_workers', 4),
111 |             'val_freq': params.get('val_freq', 1),
112 |             'print_freq': params.get('print_freq', 100),
113 |             'experiment': params['experiment'],
114 |             'checkpoint_file': params.get('checkpoint_file'),
115 |             'resume_optim': params.get('resume_optim', True)
116 |         }
117 | 
118 |         self.logdir = osp.join(os.getcwd(), 'logs', self.config['experiment'])
119 |         if not osp.isdir(self.logdir):
120 |             os.makedirs(self.logdir)
121 | 
122 |         # visdom plots
123 |         self.vis_env = self.config['experiment']
124 |         self.loss_win = 'loss_win'
125 |         self.vis = Visdom()
126 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.loss_win,
127 |                       opts={'legend': ['train_loss', 'val_loss'], 'xlabel': 'epochs',
128 |                             'ylabel': 'loss'}, env=self.vis_env)
129 |         self.lr_win = 'lr_win'
130 |         self.vis.line(X=np.zeros(1), Y=np.zeros(1), win=self.lr_win,
131 |                       opts={'legend': ['learning_rate'], 'xlabel': 'epochs',
132 |                             'ylabel': 'log(lr)'}, env=self.vis_env)
133 |         self.top1_win = 'top1_win'
134 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.top1_win,
135 |                       opts={'legend': ['train_top1_prec', 'val_top1_prec'], 'xlabel': 'epochs',
136 |                             'ylabel': 'top1_prec (%)'}, env=self.vis_env)
137 |         self.top5_win = 'top5_win'
138 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.top5_win,
139 |                       opts={'legend': ['train_top5_prec', 'val_top5_prec'], 'xlabel': 'epochs',
140 |                             'ylabel': 'top5_prec (%)'}, env=self.vis_env)
141 | 
142 |         # log all the command line options
143 |         print('---------------------------------------')
144 |         print('Experiment: {:s}'.format(self.config['experiment']))
145 |         for k, v in self.config.items():
146 |             print('{:s}: {:s}'.format(k, str(v)))
147 |         print('---------------------------------------')
148 | 
149 |         self.start_epoch = int(0)
150 |         checkpoint_file = self.config['checkpoint_file']
151 |         if checkpoint_file:
152 |             if osp.isfile(checkpoint_file):
153 |                 checkpoint = torch.load(checkpoint_file)
154 |                 self.model.load_state_dict(checkpoint['model_state_dict'])
155 |                 self.best_prec1 = checkpoint['best_prec1']
156 |                 if self.config['resume_optim']:
157 |                     self.optimizer.load_state_dict(checkpoint['optim_state_dict'])
158 |                     self.start_epoch = checkpoint['epoch']
159 |                 print('Loaded checkpoint {:s} epoch {:d}'.format(checkpoint_file,
160 |                                                                  checkpoint['epoch']))
161 | 
162 |         self.train_loader = torch.utils.data.DataLoader(train_dataset,
163 |                                                         batch_size=self.config['batch_size'],
164 |                                                         shuffle=self.config['shuffle'],
165 |                                                         num_workers=self.config['num_workers'])
166 |         if self.config['do_val']:
167 |             self.val_loader = torch.utils.data.DataLoader(val_dataset,
168 |                                                           batch_size=self.config['batch_size'], shuffle=False,
169 |                                                           num_workers=self.config['num_workers'])
170 |         else:
171 |             self.val_loader = None
172 | 
173 |     def save_checkpoint(self, epoch, is_best):
174 |         filename = osp.join(self.logdir, 'checkpoint.pth.tar')
175 |         checkpoint_dict = \
176 |             {'epoch': epoch, 'model_state_dict': self.model.state_dict(),
177 |              'optim_state_dict': self.optimizer.state_dict(),
178 |              'best_prec1': self.best_prec1}
179 |         torch.save(checkpoint_dict, filename)
180 |         if is_best:
181 |             shutil.copyfile(filename, osp.join(self.logdir, 'best_model.pth.tar'))
182 | 
183 |     def step_func(self, train):
184 |         batch_time = AverageMeter()
185 |         data_time = AverageMeter()
186 |         losses = AverageMeter()
187 |         top1 = AverageMeter()
188 |         top5 = AverageMeter()
189 | 
190 |         if train:
191 |             self.model.train()
192 |             status = 'train'
193 |             loader = self.train_loader
194 |         else:
195 |             self.model.eval()
196 |             status = 'val'
197 |             loader = self.val_loader
198 | 
199 |         end = time.time()
200 | 
201 |         for batch_idx, (data, target) in enumerate(loader):
202 |             data_time.update(time.time() - end)
203 | 
204 |             kwargs = dict(target=target, loss_fn=self.loss_fn,
205 |                           optim=self.optimizer, train=train)
206 |             loss, output = step_feedfwd(data, self.model, **kwargs)
207 | 
208 |             # measure accuracy and calculate loss
209 |             prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
210 |             losses.update(loss, data.size(0))
211 |             top1.update(prec1[0], data.size(0))
212 |             top5.update(prec5[0], data.size(0))
213 | 
214 |             # measure batch time
215 |             batch_time.update(time.time() - end)
216 |             end = time.time()
217 | 
218 |             if batch_idx % self.config['print_freq'] == 0:
219 |                 print('{:s} {:s}: batch {:d}/{:d}, loss {:4.3f}, top-1 accuracy {:4.3f},'
220 |                       ' top-5 accuracy {:4.3f}'.format(status, self.config['experiment'],
221 |                                                        batch_idx, len(loader) - 1, loss, prec1[0], prec5[0]))
222 | 
223 |         print('{:s} {:s}: loss {:f}'.format(status, self.config['experiment'],
224 |                                             losses.avg))
225 | 
226 |         return losses.avg, top1.avg, top5.avg
227 | 
228 |     def train_val(self):
229 |         for epoch in range(self.start_epoch, self.config['n_epochs']):
230 |             print('{:s} Epoch {:d} / {:d}'.format(self.config['experiment'], epoch,
231 |                                                   self.config['n_epochs']))
232 | 
233 |             # ADJUST LR
234 |             self.lr_scheduler.step()
235 |             lr = self.lr_scheduler.get_lr()[0]
236 |             self.vis.line(X=np.asarray([epoch]), Y=np.asarray([np.log10(lr)]),
237 |                           win=self.lr_win, name='learning_rate', update='append', env=self.vis_env)
238 | 
239 |             # TRAIN
240 |             loss, top1_prec, top5_prec = self.step_func(train=True)
241 |             self.vis.line(X=np.asarray([epoch]), Y=np.asarray([loss]),
242 |                           win=self.loss_win, name='train_loss', update='append', env=self.vis_env)
243 |             self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top1_prec]),
244 |                           win=self.top1_win, name='train_top1_prec', update='append', env=self.vis_env)
245 |             self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top5_prec]),
246 |                           win=self.top5_win, name='train_top5_prec', update='append', env=self.vis_env)
247 |             self.vis.save(envs=[self.vis_env])
248 | 
249 |             # VALIDATION
250 |             if self.config['do_val'] and ((epoch % self.config['val_freq'] == 0) or
251 |                                           (epoch == self.config['n_epochs'] - 1)):
252 |                 loss, top1_prec, top5_prec = self.step_func(train=False)
253 |                 self.vis.line(X=np.asarray([epoch]), Y=np.asarray([loss]),
254 |                               win=self.loss_win, name='val_loss', update='append', env=self.vis_env)
255 |                 self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top1_prec]),
256 |                               win=self.top1_win, name='val_top1_prec', update='append', env=self.vis_env)
257 |                 self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top5_prec]),
258 |                               win=self.top5_win, name='val_top5_prec', update='append', env=self.vis_env)
259 |                 self.vis.save(envs=[self.vis_env])
260 | 
261 |             # SAVE CHECKPOINT
262 |             is_best = top1_prec > self.best_prec1
263 |             self.best_prec1 = max(self.best_prec1, top1_prec)
264 |             self.save_checkpoint(epoch, is_best)
265 |             print('Checkpoint saved')
266 |             if is_best:
267 |                 print('BEST TOP1 ACCURACY SO FAR')
268 | 
269 |         return self.best_prec1
270 | 
271 | 
272 | def step_feedfwd(data, model, target=None, loss_fn=None, optim=None,
273 |                  train=True):
274 |     """
275 |     training/validation step for a feedforward NN
276 |     :param data:
277 |     :param target:
278 |     :param model:
279 |     :param loss_fn:
280 |     :param optim:
281 |     :param train: training / val stage
282 |     :return:
283 |     """
284 |     if train:
285 |         assert loss_fn is not None
286 | 
287 |     with torch.no_grad():
288 |         data_var = Variable(data, requires_grad=train)
289 |     output = model(data_var)
290 | 
291 |     if loss_fn is not None:
292 |         with torch.no_grad():
293 |             target_var = Variable(target, requires_grad=False)
294 |         loss = loss_fn(output, target_var)
295 |         if train:
296 |             # SGD step
297 |             optim.zero_grad()
298 |             loss.backward()
299 |             optim.step()
300 | 
301 |         return loss.item(), output
302 |     else:
303 |         return 0, output
304 | 
305 | 
306 | def get_mean_std(data_path, input_size, rgb):
307 |     tform = []
308 |     tform.append(transforms.Resize(size=input_size))
309 |     if not rgb:
310 |         tform.append(transforms.Grayscale())
311 |     tform.append(transforms.ToTensor())
312 |     tform = transforms.Compose(tform)
313 |     dset = datasets.ImageFolder(root=data_path, transform=tform)
314 |     train_loader = DataLoader(dataset=dset, batch_size=50)
315 |     scaler = StandardScaler(with_mean=True, with_std=True)
316 |     print('Computing pixel mean and stdev...')
317 |     for idx, (data, labels) in enumerate(train_loader):
318 |         if idx % 20 == 0:
319 |             print("Batch {:d} / {:d}".format(idx, len(train_loader)))
320 |         data = data.numpy()
321 |         n_channels = data.shape[1]
322 |         # reshape into [n_pixels x 3]
323 |         data = data.transpose((0, 2, 3, 1)).reshape((-1, n_channels))
324 |         # pass batch to incremental mean and stdev calculator
325 |         scaler.partial_fit(data)
326 |     print('Done, mean = ')
327 |     pixel_mean = scaler.mean_
328 |     pixel_std = scaler.scale_
329 |     print(pixel_mean)
330 |     print('std = ')
331 |     print(pixel_std)
332 |     return pixel_mean, pixel_std
333 | 


--------------------------------------------------------------------------------
/project/proj5/code/utils_gpu.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import shutil
  5 | import time
  6 | import random
  7 | import numpy as np
  8 | from visdom import Visdom
  9 | 
 10 | import torch
 11 | import torch.utils.data
 12 | from torch.autograd import Variable
 13 | from IPython.core.debugger import set_trace
 14 | 
 15 | import torchvision.transforms as transforms
 16 | import torchvision.datasets as datasets
 17 | from torch.utils.data import DataLoader
 18 | from sklearn.preprocessing import StandardScaler
 19 | 
 20 | 
 21 | def set_seed(seed, use_GPU=False):
 22 |     torch.manual_seed(seed)
 23 |     np.random.seed(seed)
 24 |     random.seed(seed)
 25 |     if use_GPU:
 26 |         torch.cuda.manual_seed(seed)
 27 |         torch.cuda.manual_seed_all(seed)
 28 |         torch.backends.cudnn.deterministic = True
 29 | 
 30 | 
 31 | def print_input_size_hook(self, input, output):
 32 |     print('Input size to classifier is', input[0].size())
 33 | 
 34 | 
 35 | class AverageMeter(object):
 36 |     """Computes and stores the average and current value"""
 37 | 
 38 |     def __init__(self):
 39 |         self.reset()
 40 | 
 41 |     def reset(self):
 42 |         self.val = 0
 43 |         self.avg = 0
 44 |         self.sum = 0
 45 |         self.count = 0
 46 | 
 47 |     def update(self, val, n=1):
 48 |         self.val = val
 49 |         self.sum += val * n
 50 |         self.count += n
 51 |         self.avg = self.sum / self.count
 52 | 
 53 | 
 54 | def accuracy(output, target, topk=(1,)):
 55 |     """Computes the precision@k for the specified values of k"""
 56 |     maxk = max(topk)
 57 |     batch_size = target.size(0)
 58 | 
 59 |     _, pred = output.topk(maxk, 1, True, True)
 60 |     pred = pred.t()
 61 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
 62 | 
 63 |     res = []
 64 |     for k in topk:
 65 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
 66 |         res.append(correct_k.mul_(100.0 / batch_size))
 67 |     return res
 68 | 
 69 | 
 70 | class Trainer(object):
 71 |     def __init__(self, train_dataset, val_dataset, model, loss_fn, optimizer,
 72 |                  lr_scheduler, params):
 73 |         """
 74 |         General purpose training script
 75 |         :param train_dataset: PyTorch dataset that loads training images
 76 |         :param val_dataset: PyTorch dataset that loads testing / validation images
 77 |         :param model: Network model
 78 |         :param optimizer: PyTorch optimizer object
 79 |         :param lr_scheduler: PyTorch learning rate scheduler object
 80 |         :param loss_fn: loss function
 81 |         :param params: dictionary containing parameters for the training process
 82 |         It can contain the following fields (fields with no default value mentioned
 83 |         are mandatory):
 84 |           n_epochs: number of epochs of training
 85 |           batch_size: batch size for one iteration
 86 |           do_val: perform validation? (default: True)
 87 |           shuffle: shuffle training data? (default: True)
 88 |           num_workers: number of CPU threads for loading data (default: 4)
 89 |           val_freq: frequency of validation (in number of epochs) (default: 1)
 90 |           print_freq: progress printing frequency (in number of iterations
 91 |             (default: 20)
 92 |           experiment: name of the experiment, used to create logs and checkpoints
 93 |           checkpoint_file: Name of file with saved weights. Loaded at before
 94 |             start of training if provided (default: None)
 95 |           resume_optim: whether to resume optimization from loaded weights
 96 |             (default: True)
 97 |         """
 98 |         self.model = model
 99 |         self.loss_fn = loss_fn
100 |         self.optimizer = optimizer
101 |         self.lr_scheduler = lr_scheduler
102 |         self.best_prec1 = -float('inf')
103 | 
104 |         # parse params with default values
105 |         self.config = {
106 |             'n_epochs': params['n_epochs'],
107 |             'batch_size': params['batch_size'],
108 |             'do_val': params.get('do_val', True),
109 |             'shuffle': params.get('shuffle', True),
110 |             'num_workers': params.get('num_workers', 4),
111 |             'val_freq': params.get('val_freq', 1),
112 |             'print_freq': params.get('print_freq', 100),
113 |             'experiment': params['experiment'],
114 |             'checkpoint_file': params.get('checkpoint_file'),
115 |             'resume_optim': params.get('resume_optim', True)
116 |         }
117 | 
118 |         self.logdir = osp.join(os.getcwd(), 'logs', self.config['experiment'])
119 |         if not osp.isdir(self.logdir):
120 |             os.makedirs(self.logdir)
121 | 
122 |         # visdom plots
123 |         self.vis_env = self.config['experiment']
124 |         self.loss_win = 'loss_win'
125 |         self.vis = Visdom()
126 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.loss_win,
127 |                       opts={'legend': ['train_loss', 'val_loss'], 'xlabel': 'epochs',
128 |                             'ylabel': 'loss'}, env=self.vis_env)
129 |         self.lr_win = 'lr_win'
130 |         self.vis.line(X=np.zeros(1), Y=np.zeros(1), win=self.lr_win,
131 |                       opts={'legend': ['learning_rate'], 'xlabel': 'epochs',
132 |                             'ylabel': 'log(lr)'}, env=self.vis_env)
133 |         self.top1_win = 'top1_win'
134 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.top1_win,
135 |                       opts={'legend': ['train_top1_prec', 'val_top1_prec'], 'xlabel': 'epochs',
136 |                             'ylabel': 'top1_prec (%)'}, env=self.vis_env)
137 |         self.top5_win = 'top5_win'
138 |         self.vis.line(X=np.zeros((1, 2)), Y=np.zeros((1, 2)), win=self.top5_win,
139 |                       opts={'legend': ['train_top5_prec', 'val_top5_prec'], 'xlabel': 'epochs',
140 |                             'ylabel': 'top5_prec (%)'}, env=self.vis_env)
141 | 
142 |         # log all the command line options
143 |         print('---------------------------------------')
144 |         print('Experiment: {:s}'.format(self.config['experiment']))
145 |         for k, v in self.config.items():
146 |             print('{:s}: {:s}'.format(k, str(v)))
147 |         print('---------------------------------------')
148 | 
149 |         self.start_epoch = int(0)
150 |         checkpoint_file = self.config['checkpoint_file']
151 |         if checkpoint_file:
152 |             if osp.isfile(checkpoint_file):
153 |                 checkpoint = torch.load(checkpoint_file)
154 |                 self.model.load_state_dict(checkpoint['model_state_dict'])
155 |                 self.best_prec1 = checkpoint['best_prec1']
156 |                 if self.config['resume_optim']:
157 |                     self.optimizer.load_state_dict(checkpoint['optim_state_dict'])
158 |                     self.start_epoch = checkpoint['epoch']
159 |                 print('Loaded checkpoint {:s} epoch {:d}'.format(checkpoint_file,
160 |                                                                  checkpoint['epoch']))
161 | 
162 |         self.train_loader = torch.utils.data.DataLoader(train_dataset,
163 |                                                         batch_size=self.config['batch_size'],
164 |                                                         shuffle=self.config['shuffle'],
165 |                                                         num_workers=self.config['num_workers'])
166 |         if self.config['do_val']:
167 |             self.val_loader = torch.utils.data.DataLoader(val_dataset,
168 |                                                           batch_size=self.config['batch_size'], shuffle=False,
169 |                                                           num_workers=self.config['num_workers'])
170 |         else:
171 |             self.val_loader = None
172 | 
173 |     def save_checkpoint(self, epoch, is_best):
174 |         filename = osp.join(self.logdir, 'checkpoint.pth.tar')
175 |         checkpoint_dict = \
176 |             {'epoch': epoch, 'model_state_dict': self.model.state_dict(),
177 |              'optim_state_dict': self.optimizer.state_dict(),
178 |              'best_prec1': self.best_prec1}
179 |         torch.save(checkpoint_dict, filename)
180 |         if is_best:
181 |             shutil.copyfile(filename, osp.join(self.logdir, 'best_model.pth.tar'))
182 | 
183 |     def step_func(self, train):
184 |         batch_time = AverageMeter()
185 |         data_time = AverageMeter()
186 |         losses = AverageMeter()
187 |         top1 = AverageMeter()
188 |         top5 = AverageMeter()
189 | 
190 |         if train:
191 |             self.model.train()
192 |             status = 'train'
193 |             loader = self.train_loader
194 |         else:
195 |             self.model.eval()
196 |             status = 'val'
197 |             loader = self.val_loader
198 | 
199 |         end = time.time()
200 | 
201 |         for batch_idx, (data, target) in enumerate(loader):
202 |             target = target.cuda()
203 |             data_time.update(time.time() - end)
204 | 
205 |             kwargs = dict(target=target, loss_fn=self.loss_fn,
206 |                           optim=self.optimizer, train=train)
207 |             loss, output = step_feedfwd(data, self.model, **kwargs)
208 | 
209 |             # measure accuracy and calculate loss
210 |             prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
211 |             losses.update(loss, data.size(0))
212 |             top1.update(prec1[0], data.size(0))
213 |             top5.update(prec5[0], data.size(0))
214 | 
215 |             # measure batch time
216 |             batch_time.update(time.time() - end)
217 |             end = time.time()
218 | 
219 |             if batch_idx % self.config['print_freq'] == 0:
220 |                 print('{:s} {:s}: batch {:d}/{:d}, loss {:4.3f}, top-1 accuracy {:4.3f},'
221 |                       ' top-5 accuracy {:4.3f}'.format(status, self.config['experiment'],
222 |                                                        batch_idx, len(loader) - 1, loss, prec1[0], prec5[0]))
223 | 
224 |         print('{:s} {:s}: loss {:f}'.format(status, self.config['experiment'],
225 |                                             losses.avg))
226 | 
227 |         return losses.avg, top1.avg, top5.avg
228 | 
229 |     def train_val(self):
230 |         for epoch in range(self.start_epoch, self.config['n_epochs']):
231 |             print('{:s} Epoch {:d} / {:d}'.format(self.config['experiment'], epoch,
232 |                                                   self.config['n_epochs']))
233 |             # TRAIN
234 |             loss, top1_prec, top5_prec = self.step_func(train=True)
235 |             # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([loss]),
236 |             #               win=self.loss_win, name='train_loss', update='append', env=self.vis_env)
237 |             # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top1_prec]),
238 |             #               win=self.top1_win, name='train_top1_prec', update='append', env=self.vis_env)
239 |             # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top5_prec]),
240 |             #               win=self.top5_win, name='train_top5_prec', update='append', env=self.vis_env)
241 |             # self.vis.save(envs=[self.vis_env])
242 | 
243 |             # VALIDATION
244 |             if self.config['do_val'] and ((epoch % self.config['val_freq'] == 0) or
245 |                                           (epoch == self.config['n_epochs'] - 1)):
246 |                 loss, top1_prec, top5_prec = self.step_func(train=False)
247 |                 # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([loss]),
248 |                 #               win=self.loss_win, name='val_loss', update='append', env=self.vis_env)
249 |                 # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top1_prec]),
250 |                 #               win=self.top1_win, name='val_top1_prec', update='append', env=self.vis_env)
251 |                 # self.vis.line(X=np.asarray([epoch]), Y=np.asarray([top5_prec]),
252 |                 #               win=self.top5_win, name='val_top5_prec', update='append', env=self.vis_env)
253 |                 # self.vis.save(envs=[self.vis_env])
254 | 
255 |             # ADJUST LR
256 |             self.lr_scheduler.step()
257 |             lr = self.lr_scheduler.get_lr()[0]
258 |             self.vis.line(X=np.asarray([epoch]), Y=np.asarray([np.log10(lr)]),
259 |                           win=self.lr_win, name='learning_rate', update='append', env=self.vis_env)
260 | 
261 |             # SAVE CHECKPOINT
262 |             is_best = top1_prec > self.best_prec1
263 |             self.best_prec1 = max(self.best_prec1, top1_prec)
264 |             self.save_checkpoint(epoch, is_best)
265 |             print('Checkpoint saved')
266 |             if is_best:
267 |                 print('BEST TOP1 ACCURACY SO FAR')
268 | 
269 |         return self.best_prec1
270 | 
271 | 
272 | def step_feedfwd(data, model, target=None, loss_fn=None, optim=None,
273 |                  train=True):
274 |     """
275 |     training/validation step for a feedforward NN
276 |     :param data:
277 |     :param target:
278 |     :param model:
279 |     :param loss_fn:
280 |     :param optim:
281 |     :param train: training / val stage
282 |     :return:
283 |     """
284 |     if train:
285 |         assert loss_fn is not None
286 | 
287 |     with torch.no_grad():
288 |         data_var = Variable(data, requires_grad=train).cuda()
289 |     output = model(data_var)
290 | 
291 |     if loss_fn is not None:
292 |         with torch.no_grad():
293 |             target_var = Variable(target, requires_grad=False).cuda()
294 |         loss = loss_fn(output, target_var)
295 |         if train:
296 |             # SGD step
297 |             optim.zero_grad()
298 |             loss.backward()
299 |             optim.step()
300 | 
301 |         return loss.item(), output
302 |     else:
303 |         return 0, output
304 | 
305 | 
306 | def get_mean_std(data_path, input_size, rgb):
307 |     tform = []
308 |     tform.append(transforms.Resize(size=input_size))
309 |     if not rgb:
310 |         tform.append(transforms.Grayscale())
311 |     tform.append(transforms.ToTensor())
312 |     tform = transforms.Compose(tform)
313 |     dset = datasets.ImageFolder(root=data_path, transform=tform)
314 |     train_loader = DataLoader(dataset=dset, batch_size=50)
315 |     scaler = StandardScaler(with_mean=True, with_std=True)
316 |     print('Computing pixel mean and stdev...')
317 |     for idx, (data, labels) in enumerate(train_loader):
318 |         if idx % 20 == 0:
319 |             print("Batch {:d} / {:d}".format(idx, len(train_loader)))
320 |         data = data.numpy()
321 |         n_channels = data.shape[1]
322 |         # reshape into [n_pixels x 3]
323 |         data = data.transpose((0, 2, 3, 1)).reshape((-1, n_channels))
324 |         # pass batch to incremental mean and stdev calculator
325 |         scaler.partial_fit(data)
326 |     print('Done, mean = ')
327 |     pixel_mean = scaler.mean_
328 |     pixel_std = scaler.scale_
329 |     print(pixel_mean)
330 |     print('std = ')
331 |     print(pixel_std)
332 |     return pixel_mean, pixel_std
333 | 


--------------------------------------------------------------------------------