├── .DS_Store
├── .gitignore
├── Advanced_Topics_In_Machine_Learning
    ├── Introduction_to_Unsupervised_Learning
    │   ├── ML4ES_UnsupervisedLearning.ipynb
    │   ├── SOM_animation.gif
    │   ├── autoencoder.png
    │   ├── dendrogram.gif
    │   ├── elbow_method.png
    │   ├── hierarchical_gif.gif
    │   ├── kmeans_bad.gif
    │   ├── kmeans_good.gif
    │   └── nonlinear_PCA.png
    ├── ML_Model_Interpretation
    │   ├── breiman_permutation.gif
    │   ├── cnn_architecture.jpg
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── attributes_diagrams.py
    │   │   ├── keras_metrics.py
    │   │   ├── performance_diagrams.py
    │   │   └── roc_curves.py
    │   ├── interpretation
    │   │   ├── __init__.py
    │   │   ├── backwards_optimization.py
    │   │   ├── binarization.py
    │   │   ├── class_activation.py
    │   │   ├── cnn.py
    │   │   ├── normalization.py
    │   │   ├── novelty_detection.py
    │   │   ├── permutation.py
    │   │   ├── plotting.py
    │   │   ├── saliency.py
    │   │   └── utils.py
    │   ├── lak_permutation.gif
    │   ├── model_components.png
    │   ├── notebook.ipynb
    │   ├── notebook.py
    │   ├── pretrained_cnn
    │   │   ├── pretrained_cnn.h5
    │   │   ├── pretrained_cnn_metadata.json
    │   │   ├── pretrained_ucn.h5
    │   │   └── pretrained_ucn_metadata.json
    │   └── wind_barb_explainer.png
    └── README.md
├── Introduction_To_Machine_Learning
    ├── .DS_Store
    ├── Data_Science_Fundamentals
    │   ├── .DS_Store
    │   ├── README.md
    │   ├── __pycache__
    │   │   ├── attributes_diagrams.cpython-36.pyc
    │   │   ├── performance_diagrams.cpython-36.pyc
    │   │   ├── roc_curves.cpython-36.pyc
    │   │   └── utils.cpython-36.pyc
    │   ├── attributes_diagram.png
    │   ├── attributes_diagrams.py
    │   ├── contingency_table.png
    │   ├── ct_scores.png
    │   ├── ml_short_course_module_2_data_science.ipynb
    │   ├── ml_short_course_module_2_data_science.py
    │   ├── overfitting.png
    │   ├── performance_diagram.png
    │   ├── performance_diagrams.py
    │   ├── roc.png
    │   ├── roc_curves.py
    │   └── utils.py
    ├── Introduction_to_ML_and_AI
    │   ├── Images
    │   │   ├── AI-vs-ML-vs-Deep-Learning.png
    │   │   ├── PCAexample.png
    │   │   ├── SVD_example.png
    │   │   ├── ml_comic.png
    │   │   └── pca.gif
    │   ├── Introduction.ipynb
    │   └── README.md
    ├── README.md
    └── Supervised_Learning_Algorithims
    │   ├── BP.png
    │   ├── FP.png
    │   ├── Kernel.png
    │   ├── LC.png
    │   ├── LR.png
    │   ├── Models.png
    │   ├── NN.png
    │   ├── README.md
    │   ├── SK.png
    │   ├── SML.png
    │   ├── SVM.png
    │   ├── SupervisedML.png
    │   ├── Supervised_ML_Lecture_3.ipynb
    │   ├── Supervised_ML_Lecture_3.py
    │   ├── attr_diagrams.py
    │   ├── contingency_table.png
    │   ├── download_data.py
    │   ├── extract_data.py
    │   ├── performance_diagrams.py
    │   ├── roc_curves.py
    │   ├── tree_schematic.jpg
    │   └── utils.py
├── README.md
├── download_data.py
├── setup.py
└── util
    └── extract_data.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled, optimized, and DLL files.
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution and packaging.
 7 | build/
 8 | dist/
 9 | *.egg-info/
10 | 
11 | # IntelliJ things.
12 | .idea/
13 | *.iml
14 | 
15 | # Other files
16 | Advanced_Topics_In_Machine_Learning/.ipynb_checkpoints/
17 | data/
18 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/SOM_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/SOM_animation.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/autoencoder.png


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/dendrogram.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/dendrogram.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/elbow_method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/elbow_method.png


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/hierarchical_gif.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/hierarchical_gif.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/kmeans_bad.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/kmeans_bad.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/kmeans_good.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/kmeans_good.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/nonlinear_PCA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/Introduction_to_Unsupervised_Learning/nonlinear_PCA.png


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/breiman_permutation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/breiman_permutation.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/cnn_architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/cnn_architecture.jpg


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/evaluation/attributes_diagrams.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting attributes diagram."""
  2 | 
  3 | import numpy
  4 | from descartes import PolygonPatch
  5 | import shapely.geometry
  6 | import matplotlib.colors
  7 | import matplotlib.pyplot as pyplot
  8 | 
  9 | DEFAULT_NUM_BINS = 20
 10 | RELIABILITY_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
 11 | RELIABILITY_LINE_WIDTH = 3
 12 | PERFECT_LINE_COLOUR = numpy.full(3, 152. / 255)
 13 | PERFECT_LINE_WIDTH = 2
 14 | 
 15 | NO_SKILL_LINE_COLOUR = numpy.array([31, 120, 180], dtype=float) / 255
 16 | NO_SKILL_LINE_WIDTH = 2
 17 | SKILL_AREA_TRANSPARENCY = 0.2
 18 | CLIMATOLOGY_LINE_COLOUR = numpy.full(3, 152. / 255)
 19 | CLIMATOLOGY_LINE_WIDTH = 2
 20 | 
 21 | HISTOGRAM_FACE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
 22 | HISTOGRAM_EDGE_COLOUR = numpy.full(3, 0.)
 23 | HISTOGRAM_EDGE_WIDTH = 2
 24 | 
 25 | HISTOGRAM_LEFT_EDGE_COORD = 0.575
 26 | HISTOGRAM_BOTTOM_EDGE_COORD = 0.175
 27 | HISTOGRAM_WIDTH = 0.3
 28 | HISTOGRAM_HEIGHT = 0.3
 29 | 
 30 | HISTOGRAM_X_TICK_VALUES = numpy.linspace(0, 1, num=6, dtype=float)
 31 | HISTOGRAM_Y_TICK_SPACING = 0.1
 32 | 
 33 | FIGURE_WIDTH_INCHES = 15
 34 | FIGURE_HEIGHT_INCHES = 15
 35 | 
 36 | FONT_SIZE = 30
 37 | pyplot.rc('font', size=FONT_SIZE)
 38 | pyplot.rc('axes', titlesize=FONT_SIZE)
 39 | pyplot.rc('axes', labelsize=FONT_SIZE)
 40 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 41 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 42 | pyplot.rc('legend', fontsize=FONT_SIZE)
 43 | pyplot.rc('figure', titlesize=FONT_SIZE)
 44 | 
 45 | 
 46 | def _get_histogram(input_values, num_bins, min_value, max_value):
 47 |     """Creates histogram with uniform bin-spacing.
 48 | 
 49 |     E = number of input values
 50 |     B = number of bins
 51 | 
 52 |     :param input_values: length-E numpy array of values to bin.
 53 |     :param num_bins: Number of bins (B).
 54 |     :param min_value: Minimum value.  Any input value < `min_value` will be
 55 |         assigned to the first bin.
 56 |     :param max_value: Max value.  Any input value > `max_value` will be
 57 |         assigned to the last bin.
 58 |     :return: inputs_to_bins: length-E numpy array of bin indices (integers).
 59 |     """
 60 | 
 61 |     bin_cutoffs = numpy.linspace(min_value, max_value, num=num_bins + 1)
 62 |     inputs_to_bins = numpy.digitize(
 63 |         input_values, bin_cutoffs, right=False) - 1
 64 | 
 65 |     inputs_to_bins[inputs_to_bins < 0] = 0
 66 |     inputs_to_bins[inputs_to_bins > num_bins - 1] = num_bins - 1
 67 | 
 68 |     return inputs_to_bins
 69 | 
 70 | 
 71 | def _get_points_in_relia_curve(
 72 |         observed_labels, forecast_probabilities, num_bins):
 73 |     """Creates points for reliability curve.
 74 | 
 75 |     The reliability curve is the main component of the attributes diagram.
 76 | 
 77 |     E = number of examples
 78 |     B = number of bins
 79 | 
 80 |     :param observed_labels: length-E numpy array of class labels (integers in
 81 |         0...1).
 82 |     :param forecast_probabilities: length-E numpy array with forecast
 83 |         probabilities of label = 1.
 84 |     :param num_bins: Number of bins for forecast probability.
 85 |     :return: mean_forecast_probs: length-B numpy array of mean forecast
 86 |         probabilities.
 87 |     :return: mean_event_frequencies: length-B numpy array of conditional mean
 88 |         event frequencies.  mean_event_frequencies[j] = frequency of label 1
 89 |         when forecast probability is in the [j]th bin.
 90 |     :return: num_examples_by_bin: length-B numpy array with number of examples
 91 |         in each forecast bin.
 92 |     """
 93 | 
 94 |     assert numpy.all(numpy.logical_or(
 95 |         observed_labels == 0, observed_labels == 1
 96 |     ))
 97 | 
 98 |     assert numpy.all(numpy.logical_and(
 99 |         forecast_probabilities >= 0, forecast_probabilities <= 1
100 |     ))
101 | 
102 |     assert num_bins > 1
103 | 
104 |     inputs_to_bins = _get_histogram(
105 |         input_values=forecast_probabilities, num_bins=num_bins, min_value=0.,
106 |         max_value=1.)
107 | 
108 |     mean_forecast_probs = numpy.full(num_bins, numpy.nan)
109 |     mean_event_frequencies = numpy.full(num_bins, numpy.nan)
110 |     num_examples_by_bin = numpy.full(num_bins, -1, dtype=int)
111 | 
112 |     for k in range(num_bins):
113 |         these_example_indices = numpy.where(inputs_to_bins == k)[0]
114 |         num_examples_by_bin[k] = len(these_example_indices)
115 | 
116 |         mean_forecast_probs[k] = numpy.mean(
117 |             forecast_probabilities[these_example_indices])
118 | 
119 |         mean_event_frequencies[k] = numpy.mean(
120 |             observed_labels[these_example_indices].astype(float)
121 |         )
122 | 
123 |     return mean_forecast_probs, mean_event_frequencies, num_examples_by_bin
124 | 
125 | 
126 | def _vertices_to_polygon_object(x_vertices, y_vertices):
127 |     """Converts two arrays of vertices to `shapely.geometry.Polygon` object.
128 | 
129 |     V = number of vertices
130 | 
131 |     This method allows for simple polygons only (no disjoint polygons, no
132 |     holes).
133 | 
134 |     :param x_vertices: length-V numpy array of x-coordinates.
135 |     :param y_vertices: length-V numpy array of y-coordinates.
136 |     :return: polygon_object: Instance of `shapely.geometry.Polygon`.
137 |     """
138 | 
139 |     list_of_vertices = []
140 |     for i in range(len(x_vertices)):
141 |         list_of_vertices.append((x_vertices[i], y_vertices[i]))
142 | 
143 |     return shapely.geometry.Polygon(shell=list_of_vertices)
144 | 
145 | 
146 | def _plot_background(axes_object, observed_labels):
147 |     """Plots background of attributes diagram.
148 | 
149 |     E = number of examples
150 | 
151 |     :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`.
152 |         Will plot on these axes.
153 |     :param observed_labels: length-E numpy array of class labels (integers in
154 |         0...1).
155 |     """
156 | 
157 |     # Plot positive-skill area.
158 |     climatology = numpy.mean(observed_labels.astype(float))
159 |     skill_area_colour = matplotlib.colors.to_rgba(
160 |         NO_SKILL_LINE_COLOUR, SKILL_AREA_TRANSPARENCY)
161 | 
162 |     x_vertices_left = numpy.array([0, climatology, climatology, 0, 0])
163 |     y_vertices_left = numpy.array([0, 0, climatology, climatology / 2, 0])
164 | 
165 |     left_polygon_object = _vertices_to_polygon_object(
166 |         x_vertices=x_vertices_left, y_vertices=y_vertices_left)
167 |     left_polygon_patch = PolygonPatch(
168 |         left_polygon_object, lw=0, ec=skill_area_colour, fc=skill_area_colour)
169 |     axes_object.add_patch(left_polygon_patch)
170 | 
171 |     x_vertices_right = numpy.array(
172 |         [climatology, 1, 1, climatology, climatology])
173 |     y_vertices_right = numpy.array(
174 |         [climatology, (1 + climatology) / 2, 1, 1, climatology])
175 | 
176 |     right_polygon_object = _vertices_to_polygon_object(
177 |         x_vertices=x_vertices_right, y_vertices=y_vertices_right)
178 |     right_polygon_patch = PolygonPatch(
179 |         right_polygon_object, lw=0, ec=skill_area_colour, fc=skill_area_colour)
180 |     axes_object.add_patch(right_polygon_patch)
181 | 
182 |     # Plot no-skill line (at edge of positive-skill area).
183 |     no_skill_x_coords = numpy.array([0, 1], dtype=float)
184 |     no_skill_y_coords = numpy.array([climatology, 1 + climatology]) / 2
185 |     axes_object.plot(
186 |         no_skill_x_coords, no_skill_y_coords, color=NO_SKILL_LINE_COLOUR,
187 |         linestyle='solid', linewidth=NO_SKILL_LINE_WIDTH)
188 | 
189 |     # Plot climatology line (vertical).
190 |     climo_line_x_coords = numpy.full(2, climatology)
191 |     climo_line_y_coords = numpy.array([0, 1], dtype=float)
192 |     axes_object.plot(
193 |         climo_line_x_coords, climo_line_y_coords, color=CLIMATOLOGY_LINE_COLOUR,
194 |         linestyle='dashed', linewidth=CLIMATOLOGY_LINE_WIDTH)
195 | 
196 |     # Plot no-resolution line (horizontal).
197 |     no_resolution_x_coords = climo_line_y_coords + 0.
198 |     no_resolution_y_coords = climo_line_x_coords + 0.
199 |     axes_object.plot(
200 |         no_resolution_x_coords, no_resolution_y_coords,
201 |         color=CLIMATOLOGY_LINE_COLOUR, linestyle='dashed',
202 |         linewidth=CLIMATOLOGY_LINE_WIDTH)
203 | 
204 | 
205 | def _floor_to_nearest(input_value_or_array, increment):
206 |     """Rounds number(s) down to the nearest multiple of `increment`.
207 | 
208 |     :param input_value_or_array: Input (either scalar or numpy array).
209 |     :param increment: Increment (or rounding base -- whatever you want to call
210 |         it).
211 |     :return: output_value_or_array: Rounded version of `input_value_or_array`.
212 |     """
213 | 
214 |     return increment * numpy.floor(input_value_or_array / increment)
215 | 
216 | 
217 | def _plot_forecast_histogram(figure_object, num_examples_by_bin):
218 |     """Plots forecast histogram as inset in the attributes diagram.
219 | 
220 |     B = number of bins
221 | 
222 |     :param figure_object: Instance of `matplotlib.figure.Figure`.  Will plot in
223 |         this figure.
224 |     :param num_examples_by_bin: length-B numpy array, where
225 |         num_examples_by_bin[j] = number of examples in [j]th forecast bin.
226 |     """
227 | 
228 |     num_bins = len(num_examples_by_bin)
229 |     bin_frequencies = (
230 |         num_examples_by_bin.astype(float) / numpy.sum(num_examples_by_bin)
231 |     )
232 | 
233 |     forecast_bin_edges = numpy.linspace(0, 1, num=num_bins + 1, dtype=float)
234 |     forecast_bin_width = forecast_bin_edges[1] - forecast_bin_edges[0]
235 |     forecast_bin_centers = forecast_bin_edges[:-1] + forecast_bin_width / 2
236 | 
237 |     inset_axes_object = figure_object.add_axes(
238 |         [HISTOGRAM_LEFT_EDGE_COORD, HISTOGRAM_BOTTOM_EDGE_COORD,
239 |          HISTOGRAM_WIDTH, HISTOGRAM_HEIGHT]
240 |     )
241 | 
242 |     inset_axes_object.bar(
243 |         forecast_bin_centers, bin_frequencies, forecast_bin_width,
244 |         color=HISTOGRAM_FACE_COLOUR, edgecolor=HISTOGRAM_EDGE_COLOUR,
245 |         linewidth=HISTOGRAM_EDGE_WIDTH)
246 | 
247 |     max_y_tick_value = _floor_to_nearest(
248 |         1.05 * numpy.max(bin_frequencies), HISTOGRAM_Y_TICK_SPACING)
249 |     num_y_ticks = 1 + int(numpy.round(
250 |         max_y_tick_value / HISTOGRAM_Y_TICK_SPACING
251 |     ))
252 | 
253 |     y_tick_values = numpy.linspace(0, max_y_tick_value, num=num_y_ticks)
254 |     pyplot.yticks(y_tick_values, axes=inset_axes_object)
255 |     pyplot.xticks(HISTOGRAM_X_TICK_VALUES, axes=inset_axes_object)
256 | 
257 |     inset_axes_object.set_xlim(0, 1)
258 |     inset_axes_object.set_ylim(0, 1.05 * numpy.max(bin_frequencies))
259 | 
260 | 
261 | def plot_reliability_curve(
262 |         observed_labels, forecast_probabilities, num_bins=DEFAULT_NUM_BINS,
263 |         axes_object=None):
264 |     """Plots reliability curve.
265 | 
266 |     E = number of examples
267 | 
268 |     :param observed_labels: length-E numpy array of class labels (integers in
269 |         0...1).
270 |     :param forecast_probabilities: length-E numpy array with forecast
271 |         probabilities of label = 1.
272 |     :param num_bins: Number of bins for forecast probability.
273 |     :param axes_object: Instance of `matplotlib.axes._subplots.AxesSubplot`.
274 |         Will plot on these axes.
275 |     :return: mean_forecast_probs: See doc for `_get_points_in_relia_curve`.
276 |     :return: mean_event_frequencies: Same.
277 |     :return: num_examples_by_bin: Same.
278 |     """
279 | 
280 |     mean_forecast_probs, mean_event_frequencies, num_examples_by_bin = (
281 |         _get_points_in_relia_curve(
282 |             observed_labels=observed_labels,
283 |             forecast_probabilities=forecast_probabilities, num_bins=num_bins)
284 |     )
285 | 
286 |     if axes_object is None:
287 |         _, axes_object = pyplot.subplots(
288 |             1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
289 |         )
290 | 
291 |     perfect_x_coords = numpy.array([0, 1], dtype=float)
292 |     perfect_y_coords = perfect_x_coords + 0.
293 |     axes_object.plot(
294 |         perfect_x_coords, perfect_y_coords, color=PERFECT_LINE_COLOUR,
295 |         linestyle='dashed', linewidth=PERFECT_LINE_WIDTH)
296 | 
297 |     real_indices = numpy.where(numpy.invert(numpy.logical_or(
298 |         numpy.isnan(mean_forecast_probs), numpy.isnan(mean_event_frequencies)
299 |     )))[0]
300 | 
301 |     axes_object.plot(
302 |         mean_forecast_probs[real_indices], mean_event_frequencies[real_indices],
303 |         color=RELIABILITY_LINE_COLOUR,
304 |         linestyle='solid', linewidth=RELIABILITY_LINE_WIDTH)
305 | 
306 |     axes_object.set_xlabel('Forecast probability')
307 |     axes_object.set_ylabel('Conditional event frequency')
308 |     axes_object.set_xlim(0., 1.)
309 |     axes_object.set_ylim(0., 1.)
310 | 
311 |     return mean_forecast_probs, mean_event_frequencies, num_examples_by_bin
312 | 
313 | 
314 | def plot_attributes_diagram(
315 |         observed_labels, forecast_probabilities, num_bins=DEFAULT_NUM_BINS):
316 |     """Plots attributes diagram.
317 | 
318 |     :param observed_labels: See doc for `plot_reliability_curve`.
319 |     :param forecast_probabilities: Same.
320 |     :param num_bins: Same.
321 |     :return: mean_forecast_probs: See doc for `_get_points_in_relia_curve`.
322 |     :return: mean_event_frequencies: Same.
323 |     :return: num_examples_by_bin: Same.
324 |     """
325 | 
326 |     mean_forecast_probs, mean_event_frequencies, num_examples_by_bin = (
327 |         _get_points_in_relia_curve(
328 |             observed_labels=observed_labels,
329 |             forecast_probabilities=forecast_probabilities, num_bins=num_bins)
330 |     )
331 | 
332 |     figure_object, axes_object = pyplot.subplots(
333 |         1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
334 |     )
335 | 
336 |     _plot_background(axes_object=axes_object, observed_labels=observed_labels)
337 |     _plot_forecast_histogram(figure_object=figure_object,
338 |                              num_examples_by_bin=num_examples_by_bin)
339 | 
340 |     plot_reliability_curve(
341 |         observed_labels=observed_labels,
342 |         forecast_probabilities=forecast_probabilities, num_bins=num_bins,
343 |         axes_object=axes_object)
344 | 
345 |     return mean_forecast_probs, mean_event_frequencies, num_examples_by_bin
346 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/evaluation/keras_metrics.py:
--------------------------------------------------------------------------------
  1 | """Performance metrics used to monitor Keras model while training.
  2 | 
  3 | WARNING: these metrics have the following properties, which some users may find
  4 | undesirable.
  5 | 
  6 | [1] Used only for monitoring, not to serve as loss functions.
  7 | [2] Binary metrics treat the highest class as the positive class, all others as
  8 |     the negative class.  In other words, binary metrics are for "highest class
  9 |     vs. all".
 10 | [3] Metrics are usually based on a contingency table, which contains
 11 |     deterministic forecasts.  However, metrics in this module are based only on
 12 |     probabilistic forecasts (it would take too long to compute metrics at
 13 |     various probability thresholds during training).
 14 | 
 15 | --- NOTATION ---
 16 | 
 17 | Throughout this module, I will use the following letters to denote elements of
 18 | the contingency table (even though, as mentioned above, there are no actual
 19 | contingency tables).
 20 | 
 21 | a = number of true positives ("hits")
 22 | b = number of false positives ("false alarms")
 23 | c = number of false negatives ("misses")
 24 | d = number of true negatives ("correct nulls")
 25 | 
 26 | E = number of examples
 27 | K = number of classes (possible values of target variable)
 28 | 
 29 | --- FORMAT 1: BINARY CLASSIFICATION ---
 30 | 
 31 | target_tensor: length-E tensor of target values (observed classes).  If
 32 |     target_tensor[i] = k, the [i]th example belongs to the [k]th class.
 33 | 
 34 | forecast_probability_tensor: length-E tensor of forecast probabilities.
 35 |     forecast_probability_tensor[i] = forecast probability that the [i]th example
 36 |     belongs to class 1 (as opposed to 0).
 37 | 
 38 | --- FORMAT 2: NON-BINARY CLASSIFICATION ---
 39 | 
 40 | target_tensor: E-by-K tensor of target values (observed classes).  If
 41 |     target_tensor[i, k] = 1, the [i]th example belongs to the [k]th class.
 42 | 
 43 | forecast_probability_tensor: E-by-K tensor of forecast probabilities.
 44 |     forecast_probability_tensor[i, k] = forecast probability that the [i]th
 45 |     example belongs to the [k]th class.
 46 | """
 47 | 
 48 | import keras.backend as K
 49 | 
 50 | 
 51 | def _get_num_tensor_dimensions(input_tensor):
 52 |     """Returns number of dimensions in tensor.
 53 | 
 54 |     :param input_tensor: Keras tensor.
 55 |     :return: num_dimensions: Number of dimensions.
 56 |     """
 57 | 
 58 |     return len(input_tensor.get_shape().as_list())
 59 | 
 60 | 
 61 | def _get_num_true_positives(target_tensor, forecast_probability_tensor):
 62 |     """Returns number of true positives ("a" in the docstring).
 63 | 
 64 |     :param target_tensor: See docstring for the 2 possible formats.
 65 |     :param forecast_probability_tensor: Same.
 66 |     :return: num_true_positives: Number of true positives.
 67 |     """
 68 | 
 69 |     num_dimensions = _get_num_tensor_dimensions(target_tensor)
 70 |     if num_dimensions == 1:
 71 |         return K.sum(K.clip(
 72 |             target_tensor * forecast_probability_tensor, 0., 1.))
 73 | 
 74 |     if num_dimensions == 2:
 75 |         return K.sum(K.clip(
 76 |             target_tensor[..., -1] * forecast_probability_tensor[..., -1],
 77 |             0., 1.))
 78 | 
 79 |     return None
 80 | 
 81 | 
 82 | def _get_num_false_positives(target_tensor, forecast_probability_tensor):
 83 |     """Returns number of false positives ("b" in the docstring).
 84 | 
 85 |     :param target_tensor: See docstring for the 2 possible formats.
 86 |     :param forecast_probability_tensor: Same.
 87 |     :return: num_false_positives: Number of false positives.
 88 |     """
 89 | 
 90 |     num_dimensions = _get_num_tensor_dimensions(target_tensor)
 91 |     if num_dimensions == 1:
 92 |         return K.sum(K.clip(
 93 |             (1. - target_tensor) * forecast_probability_tensor, 0., 1.))
 94 | 
 95 |     if num_dimensions == 2:
 96 |         return K.sum(K.clip(
 97 |             (1. - target_tensor[..., -1]) *
 98 |             forecast_probability_tensor[..., -1],
 99 |             0., 1.))
100 | 
101 |     return None
102 | 
103 | 
104 | def _get_num_false_negatives(target_tensor, forecast_probability_tensor):
105 |     """Returns number of false negatives ("c" in the docstring).
106 | 
107 |     :param target_tensor: See docstring for the 2 possible formats.
108 |     :param forecast_probability_tensor: Same.
109 |     :return: num_false_negatives: Number of false negatives.
110 |     """
111 | 
112 |     num_dimensions = _get_num_tensor_dimensions(target_tensor)
113 |     if num_dimensions == 1:
114 |         return K.sum(K.clip(
115 |             target_tensor * (1. - forecast_probability_tensor), 0., 1.))
116 | 
117 |     if num_dimensions == 2:
118 |         return K.sum(K.clip(
119 |             target_tensor[..., -1] *
120 |             (1. - forecast_probability_tensor[..., -1]),
121 |             0., 1.))
122 | 
123 |     return None
124 | 
125 | 
126 | def _get_num_true_negatives(target_tensor, forecast_probability_tensor):
127 |     """Returns number of false negatives ("d" in the docstring).
128 | 
129 |     :param target_tensor: See docstring for the 2 possible formats.
130 |     :param forecast_probability_tensor: Same.
131 |     :return: num_true_negatives: Number of true negatives.
132 |     """
133 | 
134 |     num_dimensions = _get_num_tensor_dimensions(target_tensor)
135 |     if num_dimensions == 1:
136 |         return K.sum(K.clip(
137 |             (1. - target_tensor) * (1. - forecast_probability_tensor), 0., 1.))
138 | 
139 |     if num_dimensions == 2:
140 |         return K.sum(K.clip(
141 |             (1. - target_tensor[..., -1]) *
142 |             (1. - forecast_probability_tensor[..., -1]),
143 |             0., 1.))
144 | 
145 |     return None
146 | 
147 | 
148 | def accuracy(target_tensor, forecast_probability_tensor):
149 |     """Returns accuracy.
150 | 
151 |     :param target_tensor: See docstring for the 2 possible formats.
152 |     :param forecast_probability_tensor: Same.
153 |     :return: accuracy: Accuracy.
154 |     """
155 | 
156 |     return K.mean(K.clip(target_tensor * forecast_probability_tensor, 0., 1.))
157 | 
158 | 
159 | def binary_accuracy(target_tensor, forecast_probability_tensor):
160 |     """Returns binary accuracy ([a + d] / [a + b + c + d]).
161 | 
162 |     :param target_tensor: See docstring for the 2 possible formats.
163 |     :param forecast_probability_tensor: Same.
164 |     :return: binary_accuracy: Binary accuracy.
165 |     """
166 | 
167 |     a = _get_num_true_positives(target_tensor, forecast_probability_tensor)
168 |     b = _get_num_false_positives(target_tensor, forecast_probability_tensor)
169 |     c = _get_num_false_negatives(target_tensor, forecast_probability_tensor)
170 |     d = _get_num_true_negatives(target_tensor, forecast_probability_tensor)
171 | 
172 |     return (a + d) / (a + b + c + d + K.epsilon())
173 | 
174 | 
175 | def binary_csi(target_tensor, forecast_probability_tensor):
176 |     """Returns binary critical success index (a / [a + b + c]).
177 | 
178 |     :param target_tensor: See docstring for the 2 possible formats.
179 |     :param forecast_probability_tensor: Same.
180 |     :return: binary_csi: Binary CSI.
181 |     """
182 | 
183 |     a = _get_num_true_positives(target_tensor, forecast_probability_tensor)
184 |     b = _get_num_false_positives(target_tensor, forecast_probability_tensor)
185 |     c = _get_num_false_negatives(target_tensor, forecast_probability_tensor)
186 | 
187 |     return a / (a + b + c + K.epsilon())
188 | 
189 | 
190 | def binary_frequency_bias(target_tensor, forecast_probability_tensor):
191 |     """Returns binary frequency bias ([a + b] / [a + c]).
192 | 
193 |     :param target_tensor: See docstring for the 2 possible formats.
194 |     :param forecast_probability_tensor: Same.
195 |     :return: binary_frequency_bias: Binary frequency bias.
196 |     """
197 | 
198 |     a = _get_num_true_positives(target_tensor, forecast_probability_tensor)
199 |     b = _get_num_false_positives(target_tensor, forecast_probability_tensor)
200 |     c = _get_num_false_negatives(target_tensor, forecast_probability_tensor)
201 | 
202 |     return (a + b) / (a + c + K.epsilon())
203 | 
204 | 
205 | def binary_pod(target_tensor, forecast_probability_tensor):
206 |     """Returns binary probability of detection (a / [a + c]).
207 | 
208 |     :param target_tensor: See docstring for the 2 possible formats.
209 |     :param forecast_probability_tensor: Same.
210 |     :return: binary_pod: Binary POD.
211 |     """
212 | 
213 |     a = _get_num_true_positives(target_tensor, forecast_probability_tensor)
214 |     c = _get_num_false_negatives(target_tensor, forecast_probability_tensor)
215 | 
216 |     return a / (a + c + K.epsilon())
217 | 
218 | 
219 | def binary_fom(target_tensor, forecast_probability_tensor):
220 |     """Returns binary frequency of misses (c / [a + c]).
221 | 
222 |     :param target_tensor: See docstring for the 2 possible formats.
223 |     :param forecast_probability_tensor: Same.
224 |     :return: binary_fom: Binary FOM.
225 |     """
226 | 
227 |     return 1. - binary_pod(target_tensor, forecast_probability_tensor)
228 | 
229 | 
230 | def binary_pofd(target_tensor, forecast_probability_tensor):
231 |     """Returns binary probability of false detection (b / [b + d]).
232 | 
233 |     :param target_tensor: See docstring for the 2 possible formats.
234 |     :param forecast_probability_tensor: Same.
235 |     :return: binary_pofd: Binary POFD.
236 |     """
237 | 
238 |     b = _get_num_false_positives(target_tensor, forecast_probability_tensor)
239 |     d = _get_num_true_negatives(target_tensor, forecast_probability_tensor)
240 | 
241 |     return b / (b + d + K.epsilon())
242 | 
243 | 
244 | def binary_peirce_score(target_tensor, forecast_probability_tensor):
245 |     """Returns binary Peirce score.
246 | 
247 |     :param target_tensor: See docstring for the 2 possible formats.
248 |     :param forecast_probability_tensor: Same.
249 |     :return: binary_peirce_score: Binary Peirce score.
250 |     """
251 | 
252 |     return binary_pod(target_tensor, forecast_probability_tensor) - binary_pofd(
253 |         target_tensor, forecast_probability_tensor)
254 | 
255 | 
256 | def binary_npv(target_tensor, forecast_probability_tensor):
257 |     """Returns binary negative predictive value (d / [b + d]).
258 | 
259 |     :param target_tensor: See docstring for the 2 possible formats.
260 |     :param forecast_probability_tensor: Same.
261 |     :return: binary_npv: Binary NPV.
262 |     """
263 | 
264 |     return 1. - binary_pofd(target_tensor, forecast_probability_tensor)
265 | 
266 | 
267 | def binary_success_ratio(target_tensor, forecast_probability_tensor):
268 |     """Returns binary success ratio (a / [a + b]).
269 | 
270 |     :param target_tensor: See docstring for the 2 possible formats.
271 |     :param forecast_probability_tensor: Same.
272 |     :return: binary_success_ratio: Binary success ratio.
273 |     """
274 | 
275 |     a = _get_num_true_positives(target_tensor, forecast_probability_tensor)
276 |     b = _get_num_false_positives(target_tensor, forecast_probability_tensor)
277 | 
278 |     return a / (a + b + K.epsilon())
279 | 
280 | 
281 | def binary_far(target_tensor, forecast_probability_tensor):
282 |     """Returns binary false-alarm rate (b / [a + b]).
283 | 
284 |     :param target_tensor: See docstring for the 2 possible formats.
285 |     :param forecast_probability_tensor: Same.
286 |     :return: binary_far: Binary false-alarm rate.
287 |     """
288 | 
289 |     return 1. - binary_success_ratio(target_tensor, forecast_probability_tensor)
290 | 
291 | 
292 | def binary_dfr(target_tensor, forecast_probability_tensor):
293 |     """Returns binary detection-failure ratio (c / [c + d]).
294 | 
295 |     :param target_tensor: See docstring for the 2 possible formats.
296 |     :param forecast_probability_tensor: Same.
297 |     :return: binary_dfr: Binary DFR.
298 |     """
299 | 
300 |     c = _get_num_false_negatives(target_tensor, forecast_probability_tensor)
301 |     d = _get_num_true_negatives(target_tensor, forecast_probability_tensor)
302 | 
303 |     return c / (c + d + K.epsilon())
304 | 
305 | 
306 | def binary_focn(target_tensor, forecast_probability_tensor):
307 |     """Returns binary frequency of correct nulls (d / [c + d]).
308 | 
309 |     :param target_tensor: See docstring for the 2 possible formats.
310 |     :param forecast_probability_tensor: Same.
311 |     :return: binary_focn: Binary FOCN.
312 |     """
313 | 
314 |     return 1. - binary_dfr(target_tensor, forecast_probability_tensor)
315 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/evaluation/performance_diagrams.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting performance diagram."""
  2 | 
  3 | import numpy
  4 | import matplotlib.colors
  5 | import matplotlib.pyplot as pyplot
  6 | 
  7 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  8 | DEFAULT_LINE_WIDTH = 3
  9 | DEFAULT_BIAS_LINE_COLOUR = numpy.full(3, 152. / 255)
 10 | DEFAULT_BIAS_LINE_WIDTH = 2
 11 | 
 12 | LEVELS_FOR_CSI_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
 13 | LEVELS_FOR_BIAS_CONTOURS = numpy.array(
 14 |     [0.25, 0.5, 0.75, 1., 1.5, 2., 3., 5.])
 15 | 
 16 | BIAS_STRING_FORMAT = '%.2f'
 17 | BIAS_LABEL_PADDING_PX = 10
 18 | 
 19 | FIGURE_WIDTH_INCHES = 15
 20 | FIGURE_HEIGHT_INCHES = 15
 21 | 
 22 | FONT_SIZE = 30
 23 | pyplot.rc('font', size=FONT_SIZE)
 24 | pyplot.rc('axes', titlesize=FONT_SIZE)
 25 | pyplot.rc('axes', labelsize=FONT_SIZE)
 26 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 27 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 28 | pyplot.rc('legend', fontsize=FONT_SIZE)
 29 | pyplot.rc('figure', titlesize=FONT_SIZE)
 30 | 
 31 | 
 32 | def _get_sr_pod_grid(success_ratio_spacing=0.01, pod_spacing=0.01):
 33 |     """Creates grid in SR-POD (success ratio / probability of detection) space.
 34 | 
 35 |     M = number of rows (unique POD values) in grid
 36 |     N = number of columns (unique success ratios) in grid
 37 | 
 38 |     :param success_ratio_spacing: Spacing between grid cells in adjacent
 39 |         columns.
 40 |     :param pod_spacing: Spacing between grid cells in adjacent rows.
 41 |     :return: success_ratio_matrix: M-by-N numpy array of success ratios.
 42 |         Success ratio increases with column index.
 43 |     :return: pod_matrix: M-by-N numpy array of POD values.  POD decreases with
 44 |         row index.
 45 |     """
 46 | 
 47 |     num_success_ratios = 1 + int(numpy.ceil(1. / success_ratio_spacing))
 48 |     num_pod_values = 1 + int(numpy.ceil(1. / pod_spacing))
 49 | 
 50 |     unique_success_ratios = numpy.linspace(0., 1., num=num_success_ratios)
 51 |     unique_pod_values = numpy.linspace(0., 1., num=num_pod_values)[::-1]
 52 |     return numpy.meshgrid(unique_success_ratios, unique_pod_values)
 53 | 
 54 | 
 55 | def _csi_from_sr_and_pod(success_ratio_array, pod_array):
 56 |     """Computes CSI (critical success index) from success ratio and POD.
 57 | 
 58 |     POD = probability of detection
 59 | 
 60 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 61 |     :param pod_array: numpy array (same shape) of POD values.
 62 |     :return: csi_array: numpy array (same shape) of CSI values.
 63 |     """
 64 | 
 65 |     return (success_ratio_array ** -1 + pod_array ** -1 - 1.) ** -1
 66 | 
 67 | 
 68 | def _bias_from_sr_and_pod(success_ratio_array, pod_array):
 69 |     """Computes frequency bias from success ratio and POD.
 70 | 
 71 |     POD = probability of detection
 72 | 
 73 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 74 |     :param pod_array: numpy array (same shape) of POD values.
 75 |     :return: frequency_bias_array: numpy array (same shape) of frequency biases.
 76 |     """
 77 | 
 78 |     return pod_array / success_ratio_array
 79 | 
 80 | 
 81 | def _get_csi_colour_scheme():
 82 |     """Returns colour scheme for CSI (critical success index).
 83 | 
 84 |     :return: colour_map_object: Colour scheme (instance of
 85 |         `matplotlib.colors.ListedColormap`).
 86 |     :return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
 87 |         defining the scale of the colour map.
 88 |     """
 89 | 
 90 |     this_colour_map_object = pyplot.cm.Blues
 91 |     this_colour_norm_object = matplotlib.colors.BoundaryNorm(
 92 |         LEVELS_FOR_CSI_CONTOURS, this_colour_map_object.N)
 93 | 
 94 |     rgba_matrix = this_colour_map_object(this_colour_norm_object(
 95 |         LEVELS_FOR_CSI_CONTOURS))
 96 |     colour_list = [
 97 |         rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])
 98 |     ]
 99 | 
100 |     colour_map_object = matplotlib.colors.ListedColormap(colour_list)
101 |     colour_map_object.set_under(numpy.array([1, 1, 1]))
102 |     colour_norm_object = matplotlib.colors.BoundaryNorm(
103 |         LEVELS_FOR_CSI_CONTOURS, colour_map_object.N)
104 | 
105 |     return colour_map_object, colour_norm_object
106 | 
107 | 
108 | def _add_colour_bar(
109 |         axes_object, colour_map_object, values_to_colour, min_colour_value,
110 |         max_colour_value, colour_norm_object=None,
111 |         orientation_string='vertical', extend_min=True, extend_max=True,
112 |         fraction_of_axis_length=1., font_size=FONT_SIZE):
113 |     """Adds colour bar to existing axes.
114 | 
115 |     :param axes_object: Existing axes (instance of
116 |         `matplotlib.axes._subplots.AxesSubplot`).
117 |     :param colour_map_object: Colour scheme (instance of
118 |         `matplotlib.pyplot.cm`).
119 |     :param values_to_colour: numpy array of values to colour.
120 |     :param min_colour_value: Minimum value in colour map.
121 |     :param max_colour_value: Max value in colour map.
122 |     :param colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
123 |         defining the scale of the colour map.  If `colour_norm_object is None`,
124 |         will assume that scale is linear.
125 |     :param orientation_string: Orientation of colour bar ("vertical" or
126 |         "horizontal").
127 |     :param extend_min: Boolean flag.  If True, the bottom of the colour bar will
128 |         have an arrow.  If False, it will be a flat line, suggesting that lower
129 |         values are not possible.
130 |     :param extend_max: Same but for top of colour bar.
131 |     :param fraction_of_axis_length: Fraction of axis length (y-axis if
132 |         orientation is "vertical", x-axis if orientation is "horizontal")
133 |         occupied by colour bar.
134 |     :param font_size: Font size for labels on colour bar.
135 |     :return: colour_bar_object: Colour bar (instance of
136 |         `matplotlib.pyplot.colorbar`) created by this method.
137 |     """
138 | 
139 |     if colour_norm_object is None:
140 |         colour_norm_object = matplotlib.colors.Normalize(
141 |             vmin=min_colour_value, vmax=max_colour_value, clip=False)
142 | 
143 |     scalar_mappable_object = pyplot.cm.ScalarMappable(
144 |         cmap=colour_map_object, norm=colour_norm_object)
145 |     scalar_mappable_object.set_array(values_to_colour)
146 | 
147 |     if extend_min and extend_max:
148 |         extend_string = 'both'
149 |     elif extend_min:
150 |         extend_string = 'min'
151 |     elif extend_max:
152 |         extend_string = 'max'
153 |     else:
154 |         extend_string = 'neither'
155 | 
156 |     if orientation_string == 'horizontal':
157 |         padding = 0.075
158 |     else:
159 |         padding = 0.05
160 | 
161 |     colour_bar_object = pyplot.colorbar(
162 |         ax=axes_object, mappable=scalar_mappable_object,
163 |         orientation=orientation_string, pad=padding, extend=extend_string,
164 |         shrink=fraction_of_axis_length)
165 | 
166 |     colour_bar_object.ax.tick_params(labelsize=font_size)
167 |     return colour_bar_object
168 | 
169 | 
170 | def _get_points_in_perf_diagram(observed_labels, forecast_probabilities):
171 |     """Creates points for performance diagram.
172 | 
173 |     E = number of examples
174 |     T = number of binarization thresholds
175 | 
176 |     :param observed_labels: length-E numpy array of class labels (integers in
177 |         0...1).
178 |     :param forecast_probabilities: length-E numpy array with forecast
179 |         probabilities of label = 1.
180 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
181 |         detection) values.
182 |     :return: success_ratio_by_threshold: length-T numpy array of success ratios.
183 |     """
184 | 
185 |     assert numpy.all(numpy.logical_or(
186 |         observed_labels == 0, observed_labels == 1
187 |     ))
188 | 
189 |     assert numpy.all(numpy.logical_and(
190 |         forecast_probabilities >= 0, forecast_probabilities <= 1
191 |     ))
192 | 
193 |     observed_labels = observed_labels.astype(int)
194 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
195 | 
196 |     num_thresholds = len(binarization_thresholds)
197 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
198 |     success_ratio_by_threshold = numpy.full(num_thresholds, numpy.nan)
199 | 
200 |     for k in range(num_thresholds):
201 |         these_forecast_labels = (
202 |             forecast_probabilities >= binarization_thresholds[k]
203 |         ).astype(int)
204 | 
205 |         this_num_hits = numpy.sum(numpy.logical_and(
206 |             these_forecast_labels == 1, observed_labels == 1
207 |         ))
208 | 
209 |         this_num_false_alarms = numpy.sum(numpy.logical_and(
210 |             these_forecast_labels == 1, observed_labels == 0
211 |         ))
212 | 
213 |         this_num_misses = numpy.sum(numpy.logical_and(
214 |             these_forecast_labels == 0, observed_labels == 1
215 |         ))
216 | 
217 |         try:
218 |             pod_by_threshold[k] = (
219 |                 float(this_num_hits) / (this_num_hits + this_num_misses)
220 |             )
221 |         except ZeroDivisionError:
222 |             pass
223 | 
224 |         try:
225 |             success_ratio_by_threshold[k] = (
226 |                 float(this_num_hits) / (this_num_hits + this_num_false_alarms)
227 |             )
228 |         except ZeroDivisionError:
229 |             pass
230 | 
231 |     pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
232 |     success_ratio_by_threshold = numpy.array(
233 |         [0.] + success_ratio_by_threshold.tolist() + [1.]
234 |     )
235 | 
236 |     return pod_by_threshold, success_ratio_by_threshold
237 | 
238 | 
239 | def plot_performance_diagram(
240 |         observed_labels, forecast_probabilities,
241 |         line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
242 |         bias_line_colour=DEFAULT_BIAS_LINE_COLOUR,
243 |         bias_line_width=DEFAULT_BIAS_LINE_WIDTH):
244 |     """Plots performance diagram.
245 | 
246 |     E = number of examples
247 | 
248 |     :param observed_labels: length-E numpy array of class labels (integers in
249 |         0...1).
250 |     :param forecast_probabilities: length-E numpy array with forecast
251 |         probabilities of label = 1.
252 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
253 |     :param line_width: Line width (real positive number).
254 |     :param bias_line_colour: Colour of contour lines for frequency bias.
255 |     :param bias_line_width: Width of contour lines for frequency bias.
256 |     :return: pod_by_threshold: See doc for `_get_points_in_perf_diagram`.
257 |         detection) values.
258 |     :return: success_ratio_by_threshold: Same.
259 |     """
260 | 
261 |     pod_by_threshold, success_ratio_by_threshold = _get_points_in_perf_diagram(
262 |         observed_labels=observed_labels,
263 |         forecast_probabilities=forecast_probabilities)
264 | 
265 |     _, axes_object = pyplot.subplots(
266 |         1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
267 |     )
268 | 
269 |     success_ratio_matrix, pod_matrix = _get_sr_pod_grid()
270 |     csi_matrix = _csi_from_sr_and_pod(success_ratio_matrix, pod_matrix)
271 |     frequency_bias_matrix = _bias_from_sr_and_pod(
272 |         success_ratio_matrix, pod_matrix)
273 | 
274 |     this_colour_map_object, this_colour_norm_object = _get_csi_colour_scheme()
275 | 
276 |     pyplot.contourf(
277 |         success_ratio_matrix, pod_matrix, csi_matrix, LEVELS_FOR_CSI_CONTOURS,
278 |         cmap=this_colour_map_object, norm=this_colour_norm_object, vmin=0.,
279 |         vmax=1., axes=axes_object)
280 | 
281 |     colour_bar_object = _add_colour_bar(
282 |         axes_object=axes_object, colour_map_object=this_colour_map_object,
283 |         colour_norm_object=this_colour_norm_object,
284 |         values_to_colour=csi_matrix, min_colour_value=0.,
285 |         max_colour_value=1., orientation_string='vertical',
286 |         extend_min=False, extend_max=False)
287 |     colour_bar_object.set_label('CSI (critical success index)')
288 | 
289 |     bias_colour_tuple = ()
290 |     for _ in range(len(LEVELS_FOR_BIAS_CONTOURS)):
291 |         bias_colour_tuple += (bias_line_colour,)
292 | 
293 |     bias_contour_object = pyplot.contour(
294 |         success_ratio_matrix, pod_matrix, frequency_bias_matrix,
295 |         LEVELS_FOR_BIAS_CONTOURS, colors=bias_colour_tuple,
296 |         linewidths=bias_line_width, linestyles='dashed', axes=axes_object)
297 |     pyplot.clabel(
298 |         bias_contour_object, inline=True, inline_spacing=BIAS_LABEL_PADDING_PX,
299 |         fmt=BIAS_STRING_FORMAT, fontsize=FONT_SIZE)
300 | 
301 |     nan_flags = numpy.logical_or(
302 |         numpy.isnan(success_ratio_by_threshold), numpy.isnan(pod_by_threshold)
303 |     )
304 | 
305 |     if not numpy.all(nan_flags):
306 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
307 |         axes_object.plot(
308 |             success_ratio_by_threshold[real_indices],
309 |             pod_by_threshold[real_indices], color=line_colour,
310 |             linestyle='solid', linewidth=line_width)
311 | 
312 |     axes_object.set_xlabel('Success ratio (1 - FAR)')
313 |     axes_object.set_ylabel('POD (probability of detection)')
314 |     axes_object.set_xlim(0., 1.)
315 |     axes_object.set_ylim(0., 1.)
316 | 
317 |     return pod_by_threshold, success_ratio_by_threshold
318 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/evaluation/roc_curves.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting ROC (receiver operating characteristic) curve."""
  2 | 
  3 | import numpy
  4 | import matplotlib.pyplot as pyplot
  5 | 
  6 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  7 | DEFAULT_LINE_WIDTH = 3
  8 | DEFAULT_RANDOM_LINE_COLOUR = numpy.full(3, 152. / 255)
  9 | DEFAULT_RANDOM_LINE_WIDTH = 2
 10 | 
 11 | FIGURE_WIDTH_INCHES = 15
 12 | FIGURE_HEIGHT_INCHES = 15
 13 | 
 14 | FONT_SIZE = 30
 15 | pyplot.rc('font', size=FONT_SIZE)
 16 | pyplot.rc('axes', titlesize=FONT_SIZE)
 17 | pyplot.rc('axes', labelsize=FONT_SIZE)
 18 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 19 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 20 | pyplot.rc('legend', fontsize=FONT_SIZE)
 21 | pyplot.rc('figure', titlesize=FONT_SIZE)
 22 | 
 23 | 
 24 | def _get_points_in_roc_curve(observed_labels, forecast_probabilities):
 25 |     """Creates points for ROC curve.
 26 | 
 27 |     E = number of examples
 28 |     T = number of binarization thresholds
 29 | 
 30 |     :param observed_labels: length-E numpy array of class labels (integers in
 31 |         0...1).
 32 |     :param forecast_probabilities: length-E numpy array with forecast
 33 |         probabilities of label = 1.
 34 |     :return: pofd_by_threshold: length-T numpy array of POFD (probability of
 35 |         false detection) values.
 36 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
 37 |         detection) values.
 38 |     """
 39 | 
 40 |     assert numpy.all(numpy.logical_or(
 41 |         observed_labels == 0, observed_labels == 1
 42 |     ))
 43 | 
 44 |     assert numpy.all(numpy.logical_and(
 45 |         forecast_probabilities >= 0, forecast_probabilities <= 1
 46 |     ))
 47 | 
 48 |     observed_labels = observed_labels.astype(int)
 49 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
 50 | 
 51 |     num_thresholds = len(binarization_thresholds)
 52 |     pofd_by_threshold = numpy.full(num_thresholds, numpy.nan)
 53 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
 54 | 
 55 |     for k in range(num_thresholds):
 56 |         these_forecast_labels = (
 57 |             forecast_probabilities >= binarization_thresholds[k]
 58 |         ).astype(int)
 59 | 
 60 |         this_num_hits = numpy.sum(numpy.logical_and(
 61 |             these_forecast_labels == 1, observed_labels == 1
 62 |         ))
 63 | 
 64 |         this_num_false_alarms = numpy.sum(numpy.logical_and(
 65 |             these_forecast_labels == 1, observed_labels == 0
 66 |         ))
 67 | 
 68 |         this_num_misses = numpy.sum(numpy.logical_and(
 69 |             these_forecast_labels == 0, observed_labels == 1
 70 |         ))
 71 | 
 72 |         this_num_correct_nulls = numpy.sum(numpy.logical_and(
 73 |             these_forecast_labels == 0, observed_labels == 0
 74 |         ))
 75 | 
 76 |         try:
 77 |             pofd_by_threshold[k] = (
 78 |                 float(this_num_false_alarms) /
 79 |                 (this_num_false_alarms + this_num_correct_nulls)
 80 |             )
 81 |         except ZeroDivisionError:
 82 |             pass
 83 | 
 84 |         try:
 85 |             pod_by_threshold[k] = (
 86 |                 float(this_num_hits) / (this_num_hits + this_num_misses)
 87 |             )
 88 |         except ZeroDivisionError:
 89 |             pass
 90 | 
 91 |     pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
 92 |     pofd_by_threshold = numpy.array([1.] + pofd_by_threshold.tolist() + [0.])
 93 | 
 94 |     return pofd_by_threshold, pod_by_threshold
 95 | 
 96 | 
 97 | def plot_roc_curve(
 98 |         observed_labels, forecast_probabilities,
 99 |         line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
100 |         random_line_colour=DEFAULT_RANDOM_LINE_COLOUR,
101 |         random_line_width=DEFAULT_RANDOM_LINE_WIDTH):
102 |     """Plots ROC curve.
103 | 
104 |     E = number of examples
105 | 
106 |     :param observed_labels: length-E numpy array of class labels (integers in
107 |         0...1).
108 |     :param forecast_probabilities: length-E numpy array with forecast
109 |         probabilities of label = 1.
110 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
111 |     :param line_width: Line width (real positive number).
112 |     :param random_line_colour: Colour of reference line (ROC curve for random
113 |         predictor).
114 |     :param random_line_width: Width of reference line (ROC curve for random
115 |         predictor).
116 |     :return: pofd_by_threshold: See doc for `_get_points_in_roc_curve`.
117 |     :return: pod_by_threshold: Same.
118 |     """
119 | 
120 |     pofd_by_threshold, pod_by_threshold = _get_points_in_roc_curve(
121 |         observed_labels=observed_labels,
122 |         forecast_probabilities=forecast_probabilities)
123 | 
124 |     _, axes_object = pyplot.subplots(
125 |         1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
126 |     )
127 | 
128 |     random_x_coords = numpy.array([0., 1.])
129 |     random_y_coords = numpy.array([0., 1.])
130 |     axes_object.plot(
131 |         random_x_coords, random_y_coords, color=random_line_colour,
132 |         linestyle='dashed', linewidth=random_line_width)
133 | 
134 |     nan_flags = numpy.logical_or(
135 |         numpy.isnan(pofd_by_threshold), numpy.isnan(pod_by_threshold)
136 |     )
137 | 
138 |     if not numpy.all(nan_flags):
139 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
140 |         axes_object.plot(
141 |             pofd_by_threshold[real_indices], pod_by_threshold[real_indices],
142 |             color=line_colour, linestyle='solid', linewidth=line_width)
143 | 
144 |     axes_object.set_xlabel('POFD (probability of false detection)')
145 |     axes_object.set_ylabel('POD (probability of detection)')
146 |     axes_object.set_xlim(0., 1.)
147 |     axes_object.set_ylim(0., 1.)
148 | 
149 |     return pofd_by_threshold, pod_by_threshold
150 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/backwards_optimization.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for backwards optimization."""
  2 | 
  3 | import numpy
  4 | from keras import backend as K
  5 | 
  6 | DEFAULT_LEARNING_RATE = 0.001
  7 | DEFAULT_NUM_ITERATIONS = 1000
  8 | DEFAULT_L2_WEIGHT = 1.
  9 | 
 10 | 
 11 | def _optimize_input_one_example(
 12 |         model_object, input_matrix, activation_tensor, loss_tensor,
 13 |         num_iterations, learning_rate, l2_weight):
 14 |     """Optimizes inputs (predictors) for one example.
 15 | 
 16 |     :param model_object: See doc for `optimize_example_for_class`.
 17 |     :param input_matrix: Same.
 18 |     :param activation_tensor: Keras tensor defining activation of relevant model
 19 |         component.
 20 |     :param loss_tensor: Keras tensor defining loss (difference between actual
 21 |         and desired activation).
 22 |     :param num_iterations: See doc for `optimize_example_for_class`.
 23 |     :param learning_rate: Same.
 24 |     :param l2_weight: Same.
 25 |     :return: optimized_input_matrix: Same.
 26 |     :return: initial_activation: Same.
 27 |     :return: final_activation: Same.
 28 |     """
 29 | 
 30 |     if isinstance(model_object.input, list):
 31 |         input_tensor = model_object.input[0]
 32 |     else:
 33 |         input_tensor = model_object.input
 34 | 
 35 |     optimized_input_matrix = input_matrix + 0.
 36 | 
 37 |     if l2_weight is not None:
 38 |         difference_tensor = (
 39 |             input_tensor[0, ...] - optimized_input_matrix[0, ...]
 40 |         )
 41 | 
 42 |         loss_tensor += l2_weight * K.sum(difference_tensor ** 2)
 43 | 
 44 |     gradient_tensor = K.gradients(loss_tensor, [input_tensor])[0]
 45 |     gradient_tensor /= K.maximum(
 46 |         K.sqrt(K.mean(gradient_tensor ** 2)),
 47 |         K.epsilon()
 48 |     )
 49 | 
 50 |     grad_descent_function = K.function(
 51 |         [input_tensor, K.learning_phase()],
 52 |         [activation_tensor, loss_tensor, gradient_tensor]
 53 |     )
 54 | 
 55 |     initial_activation = None
 56 |     current_loss = None
 57 |     current_activation = None
 58 | 
 59 |     for j in range(num_iterations):
 60 |         vals = grad_descent_function([optimized_input_matrix, 0])
 61 |         current_loss = vals[1]
 62 |         current_activation = vals[0][0]
 63 |         current_gradient = vals[2]
 64 | 
 65 |         if j == 0:
 66 |             initial_activation = current_activation
 67 | 
 68 |         if numpy.mod(j, 100) == 0:
 69 |             print((
 70 |                 'Loss after {0:d} of {1:d} iterations = {2:.2e} ... '
 71 |                 'activation = {3:.2e}'
 72 |             ).format(
 73 |                 j, num_iterations, current_loss, current_activation
 74 |             ))
 75 | 
 76 |         optimized_input_matrix -= current_gradient * learning_rate
 77 | 
 78 |     final_activation = current_activation
 79 | 
 80 |     print((
 81 |         'Loss after {0:d} iterations = {1:.2e} ... activation = {2:.2e}'
 82 |     ).format(
 83 |         num_iterations, current_loss, final_activation
 84 |     ))
 85 | 
 86 |     return optimized_input_matrix, initial_activation, final_activation
 87 | 
 88 | 
 89 | def optimize_example_for_class(
 90 |         model_object, input_matrix, target_class,
 91 |         num_iterations=DEFAULT_NUM_ITERATIONS,
 92 |         learning_rate=DEFAULT_LEARNING_RATE,
 93 |         l2_weight=DEFAULT_L2_WEIGHT):
 94 |     """Optimizes one example to maximize probability of target class.
 95 | 
 96 |     :param model_object: Trained model (instance of `keras.models.Model` or
 97 |         `keras.models.Sequential`).
 98 |     :param input_matrix: numpy array with inputs (predictors) for one example.
 99 |     :param target_class: Target class.  Must be an integer in 0...(K - 1), where
100 |         K = number of classes.
101 |     :param num_iterations: Number of iterations for gradient descent.
102 |     :param learning_rate: Learning rate for gradient descent.
103 |     :param l2_weight: Strength of L_2 penalty (on difference between original
104 |         and optimized input matrices).  If you do not want an L_2 penalty, make
105 |         this None.
106 |     :return: optimized_input_matrix: Same as input matrix but with different
107 |         values.
108 |     :return: initial_activation: Initial activation of relevant model component
109 |         (before any backwards optimization).
110 |     :return: final_activation: Final activation (after backwards optimization).
111 |     """
112 | 
113 |     # Check input args.
114 |     target_class = int(numpy.round(target_class))
115 |     num_iterations = int(numpy.round(num_iterations))
116 | 
117 |     assert not numpy.any(numpy.isnan(input_matrix))
118 |     assert target_class >= 0
119 |     assert num_iterations > 0
120 |     assert learning_rate > 0.
121 |     if l2_weight <= 0:
122 |         l2_weight = None
123 | 
124 |     num_output_neurons = (
125 |         model_object.layers[-1].output.get_shape().as_list()[-1]
126 |     )
127 | 
128 |     if num_output_neurons == 1:
129 |         assert target_class <= 1
130 | 
131 |         activation_tensor = model_object.layers[-1].output[..., 0]
132 | 
133 |         if target_class == 1:
134 |             loss_tensor = K.mean((activation_tensor - 1) ** 2)
135 |         else:
136 |             loss_tensor = K.mean(activation_tensor ** 2)
137 |     else:
138 |         assert target_class < num_output_neurons
139 | 
140 |         activation_tensor = model_object.layers[-1].output[..., target_class]
141 |         loss_tensor = K.mean((activation_tensor - 1) ** 2)
142 | 
143 |     return _optimize_input_one_example(
144 |         model_object=model_object, input_matrix=input_matrix,
145 |         activation_tensor=activation_tensor, loss_tensor=loss_tensor,
146 |         num_iterations=num_iterations, learning_rate=learning_rate,
147 |         l2_weight=l2_weight
148 |     )
149 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/binarization.py:
--------------------------------------------------------------------------------
 1 | """Helper methods for binarization of target variable."""
 2 | 
 3 | import numpy
 4 | from interpretation import utils
 5 | 
 6 | 
 7 | def get_binarization_threshold(image_file_names, percentile_level):
 8 |     """Computes binarization threshold for target variable.
 9 | 
10 |     Binarization threshold will be [q]th percentile of all image maxima, where
11 |     q = `percentile_level`.
12 | 
13 |     :param image_file_names: 1-D list of paths to input files.
14 |     :param percentile_level: q in the above discussion.
15 |     :return: binarization_threshold: Binarization threshold (used to turn each
16 |         target image into a yes-or-no label).
17 |     """
18 | 
19 |     max_target_values = numpy.array([])
20 | 
21 |     for this_file_name in image_file_names:
22 |         print('Reading data from: "{0:s}"...'.format(this_file_name))
23 |         this_image_dict = utils.read_image_file(this_file_name)
24 | 
25 |         this_target_matrix = this_image_dict[utils.TARGET_MATRIX_KEY]
26 |         this_num_examples = this_target_matrix.shape[0]
27 |         these_max_target_values = numpy.full(this_num_examples, numpy.nan)
28 | 
29 |         for i in range(this_num_examples):
30 |             these_max_target_values[i] = numpy.max(this_target_matrix[i, ...])
31 | 
32 |         max_target_values = numpy.concatenate((
33 |             max_target_values, these_max_target_values
34 |         ))
35 | 
36 |     binarization_threshold = numpy.percentile(
37 |         max_target_values, percentile_level
38 |     )
39 | 
40 |     print('\nBinarization threshold for "{0:s}" = {1:.4e}'.format(
41 |         utils.TARGET_NAME, binarization_threshold
42 |     ))
43 | 
44 |     return binarization_threshold
45 | 
46 | 
47 | def binarize_target_images(target_matrix, binarization_threshold):
48 |     """Binarizes target images.
49 | 
50 |     Specifically, this method turns each target image into a binary label,
51 |     depending on whether or not (max value in image) >= binarization_threshold.
52 | 
53 |     E = number of examples (storm objects) in file
54 |     M = number of rows in each storm-centered grid
55 |     N = number of columns in each storm-centered grid
56 | 
57 |     :param target_matrix: E-by-M-by-N numpy array of floats.
58 |     :param binarization_threshold: Binarization threshold.
59 |     :return: target_values: length-E numpy array of target values (integers in
60 |         0...1).
61 |     """
62 | 
63 |     num_examples = target_matrix.shape[0]
64 |     target_values = numpy.full(num_examples, -1, dtype=int)
65 | 
66 |     for i in range(num_examples):
67 |         target_values[i] = (
68 |             numpy.max(target_matrix[i, ...]) >= binarization_threshold
69 |         )
70 | 
71 |     return target_values
72 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/class_activation.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for class-activation maps."""
  2 | 
  3 | import numpy
  4 | from keras import backend as K
  5 | import tensorflow
  6 | from scipy.interpolate import (
  7 |     UnivariateSpline, RectBivariateSpline, RegularGridInterpolator
  8 | )
  9 | from interpretation import utils
 10 | from interpretation.saliency import _get_grid_points
 11 | 
 12 | DEFAULT_LINE_WIDTH = 2.
 13 | 
 14 | 
 15 | def _compute_gradients(loss_tensor, list_of_input_tensors):
 16 |     """Computes gradient of each input tensor with respect to loss tensor.
 17 | 
 18 |     T = number of tensors
 19 | 
 20 |     :param loss_tensor: Loss tensor.
 21 |     :param list_of_input_tensors: length-T list of input tensors.
 22 |     :return: list_of_gradient_tensors: length-T list of gradient tensors.
 23 |     """
 24 | 
 25 |     list_of_gradient_tensors = tensorflow.gradients(
 26 |         loss_tensor, list_of_input_tensors
 27 |     )
 28 | 
 29 |     for i in range(len(list_of_gradient_tensors)):
 30 |         if list_of_gradient_tensors[i] is not None:
 31 |             continue
 32 | 
 33 |         list_of_gradient_tensors[i] = tensorflow.zeros_like(
 34 |             list_of_input_tensors[i]
 35 |         )
 36 | 
 37 |     return list_of_gradient_tensors
 38 | 
 39 | 
 40 | def _normalize_tensor(input_tensor):
 41 |     """Normalizes tensor to Euclidean magnitude (or "L_2 norm") of 1.0.
 42 | 
 43 |     :param input_tensor: Input tensor.
 44 |     :return: output_tensor: Same as input but with Euclidean magnitude of 1.0.
 45 |     """
 46 | 
 47 |     rms_tensor = K.sqrt(K.mean(K.square(input_tensor)))
 48 |     return input_tensor / (rms_tensor + K.epsilon())
 49 | 
 50 | 
 51 | def _upsample_cam(class_activation_matrix, new_dimensions):
 52 |     """Upsamples class-activation map (CAM).
 53 | 
 54 |     The CAM may be 1-, 2-, or 3-dimensional.
 55 | 
 56 |     :param class_activation_matrix: numpy array of class activations.
 57 |     :param new_dimensions: numpy array of new dimensions.  If
 58 |         `class_activation_matrix` is N-dimensional, this array must be length-N.
 59 |     :return: class_activation_matrix: Upsampled version of input.
 60 |     """
 61 | 
 62 |     num_rows_new = new_dimensions[0]
 63 |     row_indices_new = numpy.linspace(
 64 |         1, num_rows_new, num=num_rows_new, dtype=float
 65 |     )
 66 |     row_indices_orig = numpy.linspace(
 67 |         1, num_rows_new, num=class_activation_matrix.shape[0], dtype=float
 68 |     )
 69 | 
 70 |     if len(new_dimensions) == 1:
 71 |         interp_object = UnivariateSpline(
 72 |             x=row_indices_orig, y=numpy.ravel(class_activation_matrix),
 73 |             k=3, s=0
 74 |         )
 75 | 
 76 |         return interp_object(row_indices_new)
 77 | 
 78 |     num_columns_new = new_dimensions[1]
 79 |     column_indices_new = numpy.linspace(
 80 |         1, num_columns_new, num=num_columns_new, dtype=float
 81 |     )
 82 |     column_indices_orig = numpy.linspace(
 83 |         1, num_columns_new, num=class_activation_matrix.shape[1], dtype=float
 84 |     )
 85 | 
 86 |     if len(new_dimensions) == 2:
 87 |         interp_object = RectBivariateSpline(
 88 |             x=row_indices_orig, y=column_indices_orig,
 89 |             z=class_activation_matrix, kx=3, ky=3, s=0
 90 |         )
 91 | 
 92 |         return interp_object(x=row_indices_new, y=column_indices_new, grid=True)
 93 | 
 94 |     num_heights_new = new_dimensions[2]
 95 |     height_indices_new = numpy.linspace(
 96 |         1, num_heights_new, num=num_heights_new, dtype=float
 97 |     )
 98 |     height_indices_orig = numpy.linspace(
 99 |         1, num_heights_new, num=class_activation_matrix.shape[2], dtype=float
100 |     )
101 | 
102 |     interp_object = RegularGridInterpolator(
103 |         points=(row_indices_orig, column_indices_orig, height_indices_orig),
104 |         values=class_activation_matrix, method='linear'
105 |     )
106 | 
107 |     column_index_matrix, row_index_matrix, height_index_matrix = (
108 |         numpy.meshgrid(column_indices_new, row_indices_new, height_indices_new)
109 |     )
110 |     query_point_matrix = numpy.stack(
111 |         (row_index_matrix, column_index_matrix, height_index_matrix), axis=-1
112 |     )
113 | 
114 |     return interp_object(query_point_matrix)
115 | 
116 | 
117 | def _plot_cam_one_channel(
118 |         class_activation_matrix_2d, axes_object, colour_map_object,
119 |         min_contour_value, max_contour_value, contour_interval,
120 |         line_width=DEFAULT_LINE_WIDTH):
121 |     """Plots 2-D class-activation map with line contours.
122 | 
123 |     M = number of rows in grid
124 |     N = number of columns in grid
125 | 
126 |     :param class_activation_matrix_2d: M-by-N numpy array of class activations.
127 |     :param axes_object: Will plot on these axes (instance of
128 |         `matplotlib.axes._subplots.AxesSubplot`).
129 |     :param colour_map_object: Colour scheme (instance of `matplotlib.pyplot.cm`
130 |         or similar).
131 |     :param min_contour_value: Minimum contour value.
132 |     :param max_contour_value: Max contour value.
133 |     :param contour_interval: Interval between successive contours.
134 |     :param line_width: Line width for contours.
135 |     """
136 | 
137 |     # Check input args.
138 |     assert not numpy.any(numpy.isnan(class_activation_matrix_2d))
139 |     assert len(class_activation_matrix_2d.shape) == 2
140 | 
141 |     max_contour_value = max([
142 |         min_contour_value + 1e-6, max_contour_value
143 |     ])
144 | 
145 |     contour_interval = max([contour_interval, 1e-7])
146 |     contour_interval = min([
147 |         contour_interval, max_contour_value - min_contour_value
148 |     ])
149 | 
150 |     num_contours = 1 + int(numpy.round(
151 |         (max_contour_value - min_contour_value) / contour_interval
152 |     ))
153 |     contour_values = numpy.linspace(
154 |         min_contour_value, max_contour_value, num=num_contours, dtype=float
155 |     )
156 | 
157 |     # Find grid coordinates.
158 |     num_grid_rows = class_activation_matrix_2d.shape[0]
159 |     num_grid_columns = class_activation_matrix_2d.shape[1]
160 |     x_coord_spacing = num_grid_columns ** -1
161 |     y_coord_spacing = num_grid_rows ** -1
162 | 
163 |     # TODO(thunderhoser): Calling private method here is a HACK.
164 |     x_coords, y_coords = _get_grid_points(
165 |         x_min=x_coord_spacing / 2, y_min=y_coord_spacing / 2,
166 |         x_spacing=x_coord_spacing, y_spacing=y_coord_spacing,
167 |         num_rows=num_grid_rows, num_columns=num_grid_columns
168 |     )
169 | 
170 |     x_coord_matrix, y_coord_matrix = numpy.meshgrid(x_coords, y_coords)
171 | 
172 |     # Plot contours.
173 |     axes_object.contour(
174 |         x_coord_matrix, y_coord_matrix, class_activation_matrix_2d,
175 |         contour_values, cmap=colour_map_object,
176 |         vmin=numpy.min(contour_values), vmax=numpy.max(contour_values),
177 |         linewidths=line_width, linestyles='solid', zorder=1e6,
178 |         transform=axes_object.transAxes
179 |     )
180 | 
181 | 
182 | def run_gradcam(model_object, input_matrix, target_class, target_layer_name):
183 |     """Runs Grad-CAM (gradient-weighted class-activation-mapping).
184 | 
185 |     :param model_object: Trained model (instance of `keras.models.Model` or
186 |         `keras.models.Sequential`).
187 |     :param input_matrix: numpy array of inputs (predictors) for one example.
188 |     :param target_class: Target class.  Class-activation maps will be created
189 |         for the [k + 1]th class, where k = `target_class`.
190 |     :param target_layer_name: Name of target layer.  Neuron-importance weights
191 |         will be based on activations in this layer.
192 |     :return: class_activation_matrix: numpy array of class activations.  This
193 |         array will have the same dimensions as `input_matrix` but without the
194 |         final axis.  For example, if `input_matrix` is 32 x 32 x 4
195 |         (32 rows x 32 columns x 4 channels), `class_activation_matrix` will be
196 |         32 x 32.
197 |     """
198 | 
199 |     # Check input args.
200 |     target_class = int(numpy.round(target_class))
201 |     assert target_class >= 0
202 | 
203 |     assert not numpy.any(numpy.isnan(input_matrix))
204 |     num_spatial_dim = len(input_matrix.shape) - 1
205 |     assert 1 <= num_spatial_dim <= 3
206 | 
207 |     # Create loss tensor.
208 |     output_layer_object = model_object.layers[-1].output
209 |     num_output_neurons = output_layer_object.get_shape().as_list()[-1]
210 | 
211 |     if num_output_neurons == 1:
212 |         assert target_class <= 1
213 | 
214 |         if target_class == 1:
215 |             loss_tensor = model_object.layers[-1].input[..., 0]
216 |         else:
217 |             loss_tensor = -1 * model_object.layers[-1].input[..., 0]
218 |     else:
219 |         assert target_class < num_output_neurons
220 |         loss_tensor = model_object.layers[-1].input[..., target_class]
221 | 
222 |     # Create gradient function.
223 |     target_layer_activation_tensor = model_object.get_layer(
224 |         name=target_layer_name
225 |     ).output
226 | 
227 |     gradient_tensor = _compute_gradients(
228 |         loss_tensor, [target_layer_activation_tensor]
229 |     )[0]
230 |     gradient_tensor = _normalize_tensor(gradient_tensor)
231 | 
232 |     if isinstance(model_object.input, list):
233 |         input_tensor = model_object.input[0]
234 |     else:
235 |         input_tensor = model_object.input
236 | 
237 |     gradient_function = K.function(
238 |         [input_tensor],
239 |         [target_layer_activation_tensor, gradient_tensor]
240 |     )
241 | 
242 |     # Evaluate gradient function.
243 |     input_matrix_with_example_axis = numpy.expand_dims(input_matrix, axis=0)
244 |     target_layer_activation_matrix, gradient_matrix = gradient_function(
245 |         [input_matrix_with_example_axis]
246 |     )
247 | 
248 |     target_layer_activation_matrix = target_layer_activation_matrix[0, ...]
249 |     gradient_matrix = gradient_matrix[0, ...]
250 | 
251 |     # Compute class-activation map.
252 |     these_axes = [i for i in range(num_spatial_dim)]
253 |     mean_weight_by_filter = numpy.mean(gradient_matrix, axis=tuple(these_axes))
254 | 
255 |     class_activation_matrix = numpy.ones(
256 |         target_layer_activation_matrix.shape[:-1]
257 |     )
258 |     num_filters = len(mean_weight_by_filter)
259 | 
260 |     for k in range(num_filters):
261 |         class_activation_matrix += (
262 |             mean_weight_by_filter[k] * target_layer_activation_matrix[..., k]
263 |         )
264 | 
265 |     # Upsample class-activation map to input space.
266 |     input_spatial_dim = numpy.array(input_matrix.shape[:-1], dtype=int)
267 |     class_activation_matrix = _upsample_cam(
268 |         class_activation_matrix=class_activation_matrix,
269 |         new_dimensions=input_spatial_dim
270 |     )
271 | 
272 |     return numpy.maximum(class_activation_matrix, 0.)
273 | 
274 | 
275 | def smooth_cams(class_activation_matrix, smoothing_radius_grid_cells):
276 |     """Smooths class-activation maps for many examples.
277 | 
278 |     E = number of examples
279 |     D = number of spatial dimensions
280 | 
281 |     :param class_activation_matrix: numpy array with class-activation maps for
282 |         one or more examples.  Should have D + 1 dimensions, and the first axis
283 |         should have length E.
284 |     :param smoothing_radius_grid_cells: e-folding radius (number of grid cells).
285 |     :return: saliency_matrices: Smoothed version of input.
286 |     """
287 | 
288 |     num_examples = class_activation_matrix.shape[0]
289 | 
290 |     for i in range(num_examples):
291 |         class_activation_matrix[i, ...] = utils.apply_gaussian_filter(
292 |             input_matrix=class_activation_matrix[i, ...],
293 |             e_folding_radius_grid_cells=smoothing_radius_grid_cells
294 |         )
295 | 
296 |     return class_activation_matrix
297 | 
298 | 
299 | def plot_2d_cam(
300 |         class_activation_matrix_2d, axes_object_matrix, num_channels,
301 |         colour_map_object, min_contour_value, max_contour_value,
302 |         contour_interval, line_width=DEFAULT_LINE_WIDTH):
303 |     """Plots 2-D class-activation map for one example.
304 | 
305 |     :param class_activation_matrix_2d: See doc for `_plot_cam_one_channel`.
306 |     :param axes_object_matrix: 2-D numpy array of axes (each an instance of
307 |         `matplotlib.axes._subplots.AxesSubplot`).
308 |     :param num_channels: Number of channels (the same CAM will be plotted on top
309 |         of each channel).
310 |     :param colour_map_object: See doc for `_plot_cam_one_channel`.
311 |     :param min_contour_value: Same.
312 |     :param max_contour_value: Same.
313 |     :param contour_interval: Same.
314 |     :param line_width: Same.
315 |     """
316 | 
317 |     num_panel_rows = axes_object_matrix.shape[0]
318 |     num_panel_columns = axes_object_matrix.shape[1]
319 | 
320 |     for k in range(num_channels):
321 |         i, j = numpy.unravel_index(k, (num_panel_rows, num_panel_columns))
322 |         this_axes_object = axes_object_matrix[i, j]
323 | 
324 |         _plot_cam_one_channel(
325 |             class_activation_matrix_2d=class_activation_matrix_2d,
326 |             axes_object=this_axes_object,
327 |             colour_map_object=colour_map_object,
328 |             min_contour_value=min_contour_value,
329 |             max_contour_value=max_contour_value,
330 |             contour_interval=contour_interval, line_width=line_width
331 |         )
332 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/normalization.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for normalization of predictors."""
  2 | 
  3 | import numpy
  4 | from interpretation import utils
  5 | 
  6 | NUM_VALUES_KEY = 'num_values'
  7 | MEAN_VALUE_KEY = 'mean_value'
  8 | MEAN_OF_SQUARES_KEY = 'mean_of_squares'
  9 | 
 10 | 
 11 | def _update_normalization_params(intermediate_normalization_dict, new_values):
 12 |     """Updates normalization params for one predictor.
 13 | 
 14 |     :param intermediate_normalization_dict: Dictionary with the following keys.
 15 |     intermediate_normalization_dict['num_values']: Number of values on which
 16 |         current estimates are based.
 17 |     intermediate_normalization_dict['mean_value']: Current estimate for mean.
 18 |     intermediate_normalization_dict['mean_of_squares']: Current mean of squared
 19 |         values.
 20 | 
 21 |     :param new_values: numpy array of new values (will be used to update
 22 |         `intermediate_normalization_dict`).
 23 |     :return: intermediate_normalization_dict: Same as input but with updated
 24 |         values.
 25 |     """
 26 | 
 27 |     if MEAN_VALUE_KEY not in intermediate_normalization_dict:
 28 |         intermediate_normalization_dict = {
 29 |             NUM_VALUES_KEY: 0,
 30 |             MEAN_VALUE_KEY: 0.,
 31 |             MEAN_OF_SQUARES_KEY: 0.
 32 |         }
 33 | 
 34 |     # Update mean value.
 35 |     these_means = numpy.array([
 36 |         intermediate_normalization_dict[MEAN_VALUE_KEY], numpy.mean(new_values)
 37 |     ])
 38 |     these_weights = numpy.array([
 39 |         intermediate_normalization_dict[NUM_VALUES_KEY], new_values.size
 40 |     ])
 41 |     intermediate_normalization_dict[MEAN_VALUE_KEY] = numpy.average(
 42 |         these_means, weights=these_weights
 43 |     )
 44 | 
 45 |     # Update mean of squares.
 46 |     these_means = numpy.array([
 47 |         intermediate_normalization_dict[MEAN_OF_SQUARES_KEY],
 48 |         numpy.mean(new_values ** 2)
 49 |     ])
 50 |     intermediate_normalization_dict[MEAN_OF_SQUARES_KEY] = numpy.average(
 51 |         these_means, weights=these_weights
 52 |     )
 53 | 
 54 |     # Update number of values.
 55 |     intermediate_normalization_dict[NUM_VALUES_KEY] += new_values.size
 56 | 
 57 |     return intermediate_normalization_dict
 58 | 
 59 | 
 60 | def _get_standard_deviation(intermediate_normalization_dict):
 61 |     """Computes stdev from intermediate normalization params.
 62 | 
 63 |     :param intermediate_normalization_dict: See doc for
 64 |         `_update_normalization_params`.
 65 |     :return: standard_deviation: Standard deviation.
 66 |     """
 67 | 
 68 |     num_values = float(intermediate_normalization_dict[NUM_VALUES_KEY])
 69 |     multiplier = num_values / (num_values - 1)
 70 | 
 71 |     return numpy.sqrt(multiplier * (
 72 |         intermediate_normalization_dict[MEAN_OF_SQUARES_KEY] -
 73 |         intermediate_normalization_dict[MEAN_VALUE_KEY] ** 2
 74 |     ))
 75 | 
 76 | 
 77 | def get_image_normalization_params(image_file_names):
 78 |     """Computes normalization params (mean and stdev) for each predictor.
 79 | 
 80 |     :param image_file_names: 1-D list of paths to input files.
 81 |     :return: normalization_dict: See input doc for `normalize_images`.
 82 |     """
 83 | 
 84 |     predictor_names = None
 85 |     norm_dict_by_predictor = None
 86 | 
 87 |     for this_file_name in image_file_names:
 88 |         print('Reading data from: "{0:s}"...'.format(this_file_name))
 89 |         this_image_dict = utils.read_image_file(this_file_name)
 90 | 
 91 |         if predictor_names is None:
 92 |             predictor_names = this_image_dict[utils.PREDICTOR_NAMES_KEY]
 93 |             norm_dict_by_predictor = [{}] * len(predictor_names)
 94 | 
 95 |         for k in range(len(predictor_names)):
 96 |             norm_dict_by_predictor[k] = _update_normalization_params(
 97 |                 intermediate_normalization_dict=norm_dict_by_predictor[k],
 98 |                 new_values=this_image_dict[utils.PREDICTOR_MATRIX_KEY][..., k]
 99 |             )
100 | 
101 |     print('\n')
102 |     normalization_dict = {}
103 | 
104 |     for k in range(len(predictor_names)):
105 |         this_mean = norm_dict_by_predictor[k][MEAN_VALUE_KEY]
106 |         this_stdev = _get_standard_deviation(norm_dict_by_predictor[k])
107 | 
108 |         normalization_dict[predictor_names[k]] = numpy.array([
109 |             this_mean, this_stdev
110 |         ])
111 | 
112 |         print((
113 |             'Mean and standard deviation for "{0:s}" = {1:.4f}, {2:.4f}'
114 |         ).format(
115 |             predictor_names[k], this_mean, this_stdev
116 |         ))
117 | 
118 |     return normalization_dict
119 | 
120 | 
121 | def normalize_images(
122 |         predictor_matrix, predictor_names, normalization_dict=None):
123 |     """Normalizes images to z-scores.
124 | 
125 |     E = number of examples (storm objects) in file
126 |     M = number of rows in each storm-centered grid
127 |     N = number of columns in each storm-centered grid
128 |     C = number of channels (predictor variables)
129 | 
130 |     :param predictor_matrix: E-by-M-by-N-by-C numpy array of predictor values.
131 |     :param predictor_names: length-C list of predictor names.
132 |     :param normalization_dict: Dictionary.  Each key is the name of a predictor
133 |         value, and the corresponding value is a length-2 numpy array with
134 |         [mean, standard deviation].  If `normalization_dict is None`, mean and
135 |         standard deviation will be computed for each predictor.
136 |     :return: predictor_matrix: Normalized version of input.
137 |     :return: normalization_dict: See doc for input variable.  If input was None,
138 |         this will be a newly created dictionary.  Otherwise, this will be the
139 |         same dictionary passed as input.
140 |     """
141 | 
142 |     num_predictors = len(predictor_names)
143 | 
144 |     if normalization_dict is None:
145 |         normalization_dict = {}
146 | 
147 |         for k in range(num_predictors):
148 |             this_mean = numpy.mean(predictor_matrix[..., k])
149 |             this_stdev = numpy.std(predictor_matrix[..., k], ddof=1)
150 | 
151 |             normalization_dict[predictor_names[k]] = numpy.array([
152 |                 this_mean, this_stdev
153 |             ])
154 | 
155 |     for k in range(num_predictors):
156 |         this_mean = normalization_dict[predictor_names[k]][0]
157 |         this_stdev = normalization_dict[predictor_names[k]][1]
158 | 
159 |         predictor_matrix[..., k] = (
160 |             (predictor_matrix[..., k] - this_mean) / float(this_stdev)
161 |         )
162 | 
163 |     return predictor_matrix, normalization_dict
164 | 
165 | 
166 | def denormalize_images(predictor_matrix, predictor_names, normalization_dict):
167 |     """Denormalizes images from z-scores back to original scales.
168 | 
169 |     :param predictor_matrix: See doc for `normalize_images`.
170 |     :param predictor_names: Same.
171 |     :param normalization_dict: Same.
172 |     :return: predictor_matrix: Denormalized version of input.
173 |     """
174 | 
175 |     num_predictors = len(predictor_names)
176 | 
177 |     for k in range(num_predictors):
178 |         this_mean = normalization_dict[predictor_names[k]][0]
179 |         this_stdev = normalization_dict[predictor_names[k]][1]
180 | 
181 |         predictor_matrix[..., k] = (
182 |             this_mean + this_stdev * predictor_matrix[..., k]
183 |         )
184 | 
185 |     return predictor_matrix
186 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/novelty_detection.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for novelty detection."""
  2 | 
  3 | import numpy
  4 | from matplotlib import pyplot
  5 | from interpretation import cnn, utils, plotting
  6 | 
  7 | EOF_MATRIX_KEY = 'eof_matrix'
  8 | FEATURE_MEANS_KEY = 'feature_means'
  9 | FEATURE_STDEVS_KEY = 'feature_standard_deviations'
 10 | 
 11 | NOVEL_MATRIX_KEY = 'novel_predictor_matrix'
 12 | NOVEL_MATRIX_UPCONV_KEY = 'novel_matrix_upconv'
 13 | NOVEL_MATRIX_UPCONV_SVD_KEY = 'novel_matrix_upconv_svd'
 14 | 
 15 | REFL_COLOUR_MAP_OBJECT = pyplot.get_cmap('PuOr')
 16 | TEMPERATURE_COLOUR_MAP_OBJECT = pyplot.get_cmap('bwr')
 17 | 
 18 | 
 19 | def _normalize_features(
 20 |         feature_matrix, feature_means=None, feature_standard_deviations=None):
 21 |     """Normalizes scalar features to z-scores.
 22 | 
 23 |     E = number of examples (storm objects)
 24 |     Z = number of features
 25 | 
 26 |     :param feature_matrix: E-by-Z numpy array of features.
 27 |     :param feature_means: length-Z numpy array of mean values.  If
 28 |         `feature_means is None`, these will be computed on the fly from
 29 |         `feature_matrix`.
 30 |     :param feature_standard_deviations: Same but with standard deviations.
 31 |     :return: feature_matrix: Normalized version of input.
 32 |     :return: feature_means: See input doc.
 33 |     :return: feature_standard_deviations: See input doc.
 34 |     """
 35 | 
 36 |     if feature_means is None or feature_standard_deviations is None:
 37 |         feature_means = numpy.mean(feature_matrix, axis=0)
 38 |         feature_standard_deviations = numpy.std(feature_matrix, axis=0, ddof=1)
 39 | 
 40 |     num_examples = feature_matrix.shape[0]
 41 |     num_features = feature_matrix.shape[1]
 42 | 
 43 |     mean_matrix = numpy.reshape(feature_means, (1, num_features))
 44 |     mean_matrix = numpy.repeat(mean_matrix, repeats=num_examples, axis=0)
 45 | 
 46 |     stdev_matrix = numpy.reshape(feature_standard_deviations, (1, num_features))
 47 |     stdev_matrix = numpy.repeat(stdev_matrix, repeats=num_examples, axis=0)
 48 | 
 49 |     feature_matrix = (feature_matrix - mean_matrix) / stdev_matrix
 50 |     return feature_matrix, feature_means, feature_standard_deviations
 51 | 
 52 | 
 53 | def _fit_svd(baseline_feature_matrix, test_feature_matrix,
 54 |              percent_variance_to_keep):
 55 |     """Fits SVD (singular-value decomposition) model.
 56 | 
 57 |     B = number of baseline examples (storm objects)
 58 |     T = number of testing examples (storm objects)
 59 |     Z = number of scalar features (produced by dense layer of a CNN)
 60 |     K = number of modes (top eigenvectors) retained
 61 | 
 62 |     The SVD model will be fit only to the baseline set, but both the baseline
 63 |     and testing sets will be used to compute normalization parameters (means and
 64 |     standard deviations).  Before, when only the baseline set was used to
 65 |     compute normalization params, the testing set had huge standard deviations,
 66 |     which caused the results of novelty detection to be physically unrealistic.
 67 | 
 68 |     :param baseline_feature_matrix: B-by-Z numpy array of features.
 69 |     :param test_feature_matrix: T-by-Z numpy array of features.
 70 |     :param percent_variance_to_keep: Percentage of variance to keep.  Determines
 71 |         how many eigenvectors (K in the above discussion) will be used in the
 72 |         SVD model.
 73 | 
 74 |     :return: svd_dictionary: Dictionary with the following keys.
 75 |     svd_dictionary['eof_matrix']: Z-by-K numpy array, where each column is an
 76 |         EOF (empirical orthogonal function).
 77 |     svd_dictionary['feature_means']: length-Z numpy array with mean value of
 78 |         each feature (before transformation).
 79 |     svd_dictionary['feature_standard_deviations']: length-Z numpy array with
 80 |         standard deviation of each feature (before transformation).
 81 |     """
 82 | 
 83 |     assert percent_variance_to_keep >= 50.
 84 |     assert percent_variance_to_keep <= 100.
 85 | 
 86 |     combined_feature_matrix = numpy.concatenate(
 87 |         (baseline_feature_matrix, test_feature_matrix), axis=0
 88 |     )
 89 |     combined_feature_matrix, feature_means, feature_standard_deviations = (
 90 |         _normalize_features(feature_matrix=combined_feature_matrix)
 91 |     )
 92 | 
 93 |     num_features = baseline_feature_matrix.shape[1]
 94 |     num_baseline_examples = baseline_feature_matrix.shape[0]
 95 |     baseline_feature_matrix = (
 96 |         combined_feature_matrix[:num_baseline_examples, ...]
 97 |     )
 98 | 
 99 |     eigenvalues, eof_matrix = numpy.linalg.svd(baseline_feature_matrix)[1:]
100 |     eigenvalues = eigenvalues ** 2
101 | 
102 |     explained_variances = eigenvalues / numpy.sum(eigenvalues)
103 |     cumulative_explained_variances = numpy.cumsum(explained_variances)
104 | 
105 |     fraction_of_variance_to_keep = 0.01 * percent_variance_to_keep
106 |     these_indices = numpy.where(
107 |         cumulative_explained_variances >= fraction_of_variance_to_keep
108 |     )[0]
109 | 
110 |     if len(these_indices) == 0:
111 |         these_indices = numpy.array([num_features - 1], dtype=int)
112 | 
113 |     num_modes_to_keep = 1 + these_indices[0]
114 | 
115 |     print((
116 |         'Number of modes required to explain {0:f}% of variance: {1:d}'
117 |     ).format(
118 |         percent_variance_to_keep, num_modes_to_keep
119 |     ))
120 | 
121 |     return {
122 |         EOF_MATRIX_KEY: numpy.transpose(eof_matrix)[..., :num_modes_to_keep],
123 |         FEATURE_MEANS_KEY: feature_means,
124 |         FEATURE_STDEVS_KEY: feature_standard_deviations
125 |     }
126 | 
127 | 
128 | def _apply_svd(feature_vector, svd_dictionary):
129 |     """Applies SVD (singular-value decomposition) model to new example.
130 | 
131 |     Z = number of features
132 | 
133 |     :param feature_vector: length-Z numpy array with feature values for one
134 |         example (storm object).
135 |     :param svd_dictionary: Dictionary created by `_fit_svd`.
136 |     :return: reconstructed_feature_vector: Reconstructed version of input.
137 |     """
138 | 
139 |     this_matrix = numpy.dot(
140 |         svd_dictionary[EOF_MATRIX_KEY],
141 |         numpy.transpose(svd_dictionary[EOF_MATRIX_KEY])
142 |     )
143 |     feature_vector_norm = (
144 |         (feature_vector - svd_dictionary[FEATURE_MEANS_KEY]) /
145 |         svd_dictionary[FEATURE_STDEVS_KEY]
146 |     )
147 |     reconstructed_feature_vector_norm = numpy.dot(
148 |         this_matrix, feature_vector_norm
149 |     )
150 | 
151 |     return (
152 |         svd_dictionary[FEATURE_MEANS_KEY] +
153 |         reconstructed_feature_vector_norm * svd_dictionary[FEATURE_STDEVS_KEY]
154 |     )
155 | 
156 | 
157 | def _plot_novelty_maps(novelty_matrix, predictor_names, max_temp_diff_kelvins,
158 |                        max_reflectivity_diff_dbz):
159 |     """Plots novelty maps for one example.
160 | 
161 |     M = number of rows in grid
162 |     N = number of columns in grid
163 |     C = number of predictors
164 | 
165 |     :param novelty_matrix: M-by-N-by-C numpy array of denormalized novelty
166 |         values (upconvnet reconstruction minus upconvnet/SVD reconstruction).
167 |     :param predictor_names: length-C list of predictor names.
168 |     :param max_temp_diff_kelvins: Max temperature difference in colour bar.
169 |     :param max_reflectivity_diff_dbz: Max reflectivity difference in colour bar.
170 |     :return: figure_object: Figure handle (instance of
171 |         `matplotlib.figure.Figure`).
172 |     :return: axes_object_matrix: 2-D numpy array of axes handles (instances
173 |         of `matplotlib.axes._subplots.AxesSubplot`).
174 |     """
175 | 
176 |     u_diff_matrix_m_s01 = novelty_matrix[
177 |         ..., predictor_names.index(utils.U_WIND_NAME)
178 |     ]
179 |     v_diff_matrix_m_s01 = novelty_matrix[
180 |         ..., predictor_names.index(utils.V_WIND_NAME)
181 |     ]
182 | 
183 |     non_wind_predictor_names = [
184 |         p for p in predictor_names
185 |         if p not in [utils.U_WIND_NAME, utils.V_WIND_NAME]
186 |     ]
187 | 
188 |     figure_object, axes_object_matrix = plotting._create_paneled_figure(
189 |         num_rows=1, num_columns=len(non_wind_predictor_names),
190 |     )
191 | 
192 |     for k in range(len(non_wind_predictor_names)):
193 |         this_predictor_index = predictor_names.index(
194 |             non_wind_predictor_names[k]
195 |         )
196 | 
197 |         if non_wind_predictor_names[k] == utils.REFLECTIVITY_NAME:
198 |             this_max_colour_value = max_reflectivity_diff_dbz
199 |             this_colour_map_object = REFL_COLOUR_MAP_OBJECT
200 |         else:
201 |             this_max_colour_value = max_temp_diff_kelvins
202 |             this_colour_map_object = TEMPERATURE_COLOUR_MAP_OBJECT
203 | 
204 |         plotting.plot_scalar_field_2d(
205 |             predictor_matrix=novelty_matrix[..., this_predictor_index],
206 |             colour_map_object=this_colour_map_object,
207 |             min_colour_value=-this_max_colour_value,
208 |             max_colour_value=this_max_colour_value,
209 |             axes_object=axes_object_matrix[0, k]
210 |         )
211 | 
212 |         this_colour_bar_object = plotting.plot_linear_colour_bar(
213 |             axes_object_or_matrix=axes_object_matrix[0, k],
214 |             data_values=novelty_matrix[..., this_predictor_index],
215 |             colour_map_object=this_colour_map_object,
216 |             min_value=-this_max_colour_value, max_value=this_max_colour_value,
217 |             plot_horizontal=True, plot_min_arrow=True, plot_max_arrow=True
218 |         )
219 | 
220 |         plotting.plot_wind_2d(
221 |             u_wind_matrix_m_s01=u_diff_matrix_m_s01,
222 |             v_wind_matrix_m_s01=v_diff_matrix_m_s01,
223 |             axes_object=axes_object_matrix[0, k]
224 |         )
225 | 
226 |         this_colour_bar_object.set_label(
227 |             non_wind_predictor_names[k],
228 |             fontsize=plotting.DEFAULT_CBAR_FONT_SIZE
229 |         )
230 | 
231 |     return figure_object, axes_object_matrix
232 | 
233 | 
234 | def run_novelty_detection(
235 |         baseline_predictor_matrix_norm, trial_predictor_matrix_norm,
236 |         cnn_model_object, cnn_feature_layer_name, upconvnet_model_object,
237 |         num_novel_examples, multipass=False, percent_variance_to_keep=97.5):
238 |     """Runs novelty detection.
239 | 
240 |     B = number of baseline examples
241 |     T = number of trial examples
242 |     Q = number of novel trial examples to find
243 | 
244 |     :param baseline_predictor_matrix_norm: numpy array with normalized predictor
245 |         values for baseline set.  The first axis should have length B.
246 |     :param trial_predictor_matrix_norm: numpy array with normalized predictor
247 |         values for trial set.  The first axis should have length T.
248 |     :param cnn_model_object: Trained CNN (instance of `keras.models.Model` or
249 |         `keras.models.Sequential`).
250 |     :param cnn_feature_layer_name: Name of feature layer in CNN.  Outputs from
251 |         this layer will be inputs to the upconvnet.
252 |     :param upconvnet_model_object: Trained upconvnet (instance of
253 |         `keras.models.Model` or `keras.models.Sequential`).
254 |     :param num_novel_examples: Q in the above discussion.
255 |     :param multipass: Boolean flag.  If True, will run multi-pass version.  If
256 |         False, will run single-pass version.  In the multi-pass version,
257 |         whenever the next-most novel trial example is found, it is used to fit a
258 |         new SVD model.  In other words, after finding the [i]th-most novel trial
259 |         example, a new SVD model is fit on all baseline examples and the i most
260 |         novel trial examples.
261 |     :param percent_variance_to_keep: Percentage of variance to keep in SVD
262 |         (singular-value decomposition) from image space to feature space.
263 |     :return: novelty_dict: Dictionary with the following keys.
264 |     novelty_dict['novel_predictor_matrix']: numpy array with most novel examples
265 |         in trial set.  The first axis has length Q.
266 |     novelty_dict['novel_matrix_upconv']: numpy array with upconvnet
267 |         reconstructions of the most novel examples.  Same dimensions as
268 |         `novel_predictor_matrix`.
269 |     novelty_dict['novel_matrix_upconv_svd']: numpy array with upconvnet
270 |         reconstructions of SVD reconstructions of the most novel examples.
271 |         Same dimensions as `novel_predictor_matrix`.
272 |     """
273 | 
274 |     multipass = bool(multipass)
275 | 
276 |     num_trial_examples = trial_predictor_matrix_norm.shape[0]
277 |     num_novel_examples = int(numpy.round(num_novel_examples))
278 |     num_novel_examples = min([num_novel_examples, num_trial_examples])
279 | 
280 |     assert num_novel_examples > 1
281 | 
282 |     baseline_feature_matrix = cnn.apply_cnn(
283 |         model_object=cnn_model_object,
284 |         predictor_matrix=baseline_predictor_matrix_norm,
285 |         output_layer_name=cnn_feature_layer_name, verbose=True
286 |     )
287 |     print('\n')
288 | 
289 |     trial_feature_matrix = cnn.apply_cnn(
290 |         model_object=cnn_model_object,
291 |         predictor_matrix=trial_predictor_matrix_norm,
292 |         output_layer_name=cnn_feature_layer_name, verbose=True
293 |     )
294 |     print('\n')
295 | 
296 |     svd_dictionary = None
297 |     novel_indices = numpy.array([], dtype=int)
298 |     novel_matrix_upconv = None
299 |     novel_matrix_upconv_svd = None
300 | 
301 |     for k in range(num_novel_examples):
302 |         print('Finding {0:d}th-most novel trial example...'.format(
303 |             k + 1, num_novel_examples
304 |         ))
305 | 
306 |         fit_new_svd = multipass or k == 0
307 | 
308 |         if fit_new_svd:
309 |             this_baseline_feature_matrix = numpy.concatenate((
310 |                 baseline_feature_matrix,
311 |                 trial_feature_matrix[novel_indices, ...]
312 |             ), axis=0)
313 | 
314 |             this_trial_feature_matrix = numpy.delete(
315 |                 trial_feature_matrix, obj=novel_indices, axis=0
316 |             )
317 | 
318 |             svd_dictionary = _fit_svd(
319 |                 baseline_feature_matrix=this_baseline_feature_matrix,
320 |                 test_feature_matrix=this_trial_feature_matrix,
321 |                 percent_variance_to_keep=percent_variance_to_keep
322 |             )
323 | 
324 |         trial_svd_errors = numpy.full(num_trial_examples, numpy.nan)
325 |         trial_feature_matrix_svd = numpy.full(
326 |             trial_feature_matrix.shape, numpy.nan
327 |         )
328 | 
329 |         for i in range(num_trial_examples):
330 |             if i in novel_indices:
331 |                 continue
332 | 
333 |             print(i)
334 | 
335 |             trial_feature_matrix_svd[i, ...] = _apply_svd(
336 |                 feature_vector=trial_feature_matrix[i, ...],
337 |                 svd_dictionary=svd_dictionary
338 |             )
339 | 
340 |             trial_svd_errors[i] = numpy.linalg.norm(
341 |                 trial_feature_matrix_svd[i, ...] - trial_feature_matrix[i, ...]
342 |             )
343 | 
344 |         this_novel_index = numpy.nanargmax(trial_svd_errors)
345 |         this_novel_index_array = numpy.array([this_novel_index], dtype=int)
346 |         novel_indices = numpy.concatenate((
347 |             novel_indices, this_novel_index_array
348 |         ))
349 | 
350 |         this_image_matrix_upconv = upconvnet_model_object.predict(
351 |             trial_feature_matrix[this_novel_index_array, ...], batch_size=1
352 |         )
353 | 
354 |         this_image_matrix_upconv_svd = upconvnet_model_object.predict(
355 |             trial_feature_matrix_svd[this_novel_index_array, ...], batch_size=1
356 |         )
357 | 
358 |         if novel_matrix_upconv is None:
359 |             these_dim = (
360 |                 (num_novel_examples,) + this_image_matrix_upconv.shape[1:]
361 |             )
362 |             novel_matrix_upconv = numpy.full(these_dim, numpy.nan)
363 |             novel_matrix_upconv_svd = numpy.full(these_dim, numpy.nan)
364 | 
365 |         novel_matrix_upconv[k, ...] = this_image_matrix_upconv
366 |         novel_matrix_upconv_svd[k, ...] = this_image_matrix_upconv_svd
367 | 
368 |     return {
369 |         NOVEL_MATRIX_KEY: trial_predictor_matrix_norm[novel_indices, ...],
370 |         NOVEL_MATRIX_UPCONV_KEY: novel_matrix_upconv,
371 |         NOVEL_MATRIX_UPCONV_SVD_KEY: novel_matrix_upconv_svd
372 |     }
373 | 
374 | 
375 | def plot_results(novelty_dict_denorm, plot_index, predictor_names):
376 |     """Plots results of novelty detection.
377 | 
378 |     :param novelty_dict_denorm: Dictionary created by `run_novelty_detection`,
379 |         except with denormalized predictor values.
380 |     :param plot_index: Will plot the [k]th most novel trial example, where
381 |         k = `plot_index`.
382 |     :param predictor_names: 1-D list of predictor names.
383 |     """
384 | 
385 |     temperature_index = predictor_names.index(utils.TEMPERATURE_NAME)
386 |     reflectivity_index = predictor_names.index(utils.REFLECTIVITY_NAME)
387 | 
388 |     actual_predictor_matrix = (
389 |         novelty_dict_denorm[NOVEL_MATRIX_KEY][plot_index, ...]
390 |     )
391 |     predictor_matrix_upconv = (
392 |         novelty_dict_denorm[NOVEL_MATRIX_UPCONV_KEY][plot_index, ...]
393 |     )
394 |     predictor_matrix_upconv_svd = (
395 |         novelty_dict_denorm[NOVEL_MATRIX_UPCONV_SVD_KEY][plot_index, ...]
396 |     )
397 |     novelty_matrix = predictor_matrix_upconv - predictor_matrix_upconv_svd
398 | 
399 |     concat_temp_matrix_kelvins = numpy.concatenate((
400 |         actual_predictor_matrix[..., temperature_index],
401 |         predictor_matrix_upconv[..., temperature_index]
402 |     ), axis=0)
403 | 
404 |     min_colour_temp_kelvins = numpy.percentile(concat_temp_matrix_kelvins, 1)
405 |     max_colour_temp_kelvins = numpy.percentile(concat_temp_matrix_kelvins, 99)
406 | 
407 |     _, axes_object_matrix = plotting.plot_many_predictors_with_barbs(
408 |         predictor_matrix=actual_predictor_matrix,
409 |         predictor_names=predictor_names,
410 |         min_colour_temp_kelvins=min_colour_temp_kelvins,
411 |         max_colour_temp_kelvins=max_colour_temp_kelvins
412 |     )
413 | 
414 |     for i in range(axes_object_matrix.shape[0]):
415 |         for j in range(axes_object_matrix.shape[1]):
416 |             axes_object_matrix[i, j].set_title('Actual example')
417 | 
418 |     _, axes_object_matrix = plotting.plot_many_predictors_with_barbs(
419 |         predictor_matrix=predictor_matrix_upconv,
420 |         predictor_names=predictor_names,
421 |         min_colour_temp_kelvins=min_colour_temp_kelvins,
422 |         max_colour_temp_kelvins=max_colour_temp_kelvins
423 |     )
424 | 
425 |     for i in range(axes_object_matrix.shape[0]):
426 |         for j in range(axes_object_matrix.shape[1]):
427 |             axes_object_matrix[i, j].set_title('Upconvnet reconstruction')
428 | 
429 |     max_temp_diff_kelvins = numpy.percentile(
430 |         numpy.absolute(novelty_matrix[..., temperature_index]), 99
431 |     )
432 |     max_reflectivity_diff_dbz = numpy.percentile(
433 |         numpy.absolute(novelty_matrix[..., reflectivity_index]), 99
434 |     )
435 | 
436 |     _, axes_object_matrix = _plot_novelty_maps(
437 |         novelty_matrix=novelty_matrix, predictor_names=predictor_names,
438 |         max_temp_diff_kelvins=max_temp_diff_kelvins,
439 |         max_reflectivity_diff_dbz=max_reflectivity_diff_dbz
440 |     )
441 | 
442 |     title_string = 'Novelty (unexpected part)'
443 |     for i in range(axes_object_matrix.shape[0]):
444 |         for j in range(axes_object_matrix.shape[1]):
445 |             axes_object_matrix[i, j].set_title(title_string)
446 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/saliency.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for saliency."""
  2 | 
  3 | import numpy
  4 | from keras import backend as K
  5 | from interpretation import utils
  6 | 
  7 | DEFAULT_LINE_WIDTH = 2.
  8 | 
  9 | 
 10 | def _do_saliency_calculations(
 11 |         model_object, loss_tensor, list_of_input_matrices):
 12 |     """Does saliency calculations.
 13 | 
 14 |     T = number of input tensors to the model
 15 |     E = number of examples (storm objects)
 16 | 
 17 |     :param model_object: Instance of `keras.models.Model`.
 18 |     :param loss_tensor: Keras tensor defining the loss function.
 19 |     :param list_of_input_matrices: length-T list of numpy arrays, comprising one
 20 |         or more examples (storm objects).  list_of_input_matrices[i] must have
 21 |         the same dimensions as the [i]th input tensor to the model.
 22 |     :return: list_of_saliency_matrices: length-T list of numpy arrays,
 23 |         comprising the saliency map for each example.
 24 |         list_of_saliency_matrices[i] has the same dimensions as
 25 |         list_of_input_matrices[i] and defines the "saliency" of each value x,
 26 |         which is the gradient of the loss function with respect to x.
 27 |     """
 28 | 
 29 |     if isinstance(model_object.input, list):
 30 |         list_of_input_tensors = model_object.input
 31 |     else:
 32 |         list_of_input_tensors = [model_object.input]
 33 | 
 34 |     list_of_gradient_tensors = K.gradients(loss_tensor, list_of_input_tensors)
 35 |     num_input_tensors = len(list_of_input_tensors)
 36 | 
 37 |     for i in range(num_input_tensors):
 38 |         list_of_gradient_tensors[i] /= K.maximum(
 39 |             K.std(list_of_gradient_tensors[i]),
 40 |             K.epsilon()
 41 |         )
 42 | 
 43 |     inputs_to_gradients_function = K.function(
 44 |         list_of_input_tensors + [K.learning_phase()],
 45 |         list_of_gradient_tensors
 46 |     )
 47 | 
 48 |     list_of_saliency_matrices = inputs_to_gradients_function(
 49 |         list_of_input_matrices + [0]
 50 |     )
 51 | 
 52 |     for i in range(num_input_tensors):
 53 |         list_of_saliency_matrices[i] *= -1
 54 | 
 55 |     return list_of_saliency_matrices
 56 | 
 57 | 
 58 | def _get_grid_points(x_min, x_spacing, num_columns, y_min, y_spacing, num_rows):
 59 |     """Returns grid points in regular x-y grid.
 60 | 
 61 |     M = number of rows in grid
 62 |     N = number of columns in grid
 63 | 
 64 |     :param x_min: Minimum x-coordinate over all grid points.
 65 |     :param x_spacing: Spacing between adjacent grid points in x-direction.
 66 |     :param num_columns: N in the above definition.
 67 |     :param y_min: Minimum y-coordinate over all grid points.
 68 |     :param y_spacing: Spacing between adjacent grid points in y-direction.
 69 |     :param num_rows: M in the above definition.
 70 |     :return: x_coords: length-N numpy array with x-coordinates at grid points.
 71 |     :return: y_coords: length-M numpy array with y-coordinates at grid points.
 72 |     """
 73 | 
 74 |     # TODO(thunderhoser): Put this in utils.py.
 75 | 
 76 |     x_max = x_min + (num_columns - 1) * x_spacing
 77 |     y_max = y_min + (num_rows - 1) * y_spacing
 78 | 
 79 |     x_coords = numpy.linspace(x_min, x_max, num=num_columns)
 80 |     y_coords = numpy.linspace(y_min, y_max, num=num_rows)
 81 | 
 82 |     return x_coords, y_coords
 83 | 
 84 | 
 85 | def _plot_2d_saliency_map(
 86 |         saliency_matrix_2d, axes_object, colour_map_object, max_contour_value,
 87 |         contour_interval, line_width=DEFAULT_LINE_WIDTH):
 88 |     """Plots 2-D saliency map with line contours.
 89 | 
 90 |     M = number of rows in grid
 91 |     N = number of columns in grid
 92 | 
 93 |     :param saliency_matrix_2d: M-by-N numpy array of saliency values.
 94 |     :param axes_object: Will plot on these axes (instance of
 95 |         `matplotlib.axes._subplots.AxesSubplot`).
 96 |     :param colour_map_object: Colour scheme (instance of `matplotlib.pyplot.cm`
 97 |         or similar).
 98 |     :param max_contour_value: Max contour value.  Contour values will range from
 99 |         -v...v, where v = `max_contour_value`.
100 |     :param contour_interval: Interval between successive contours.
101 |     :param line_width: Line width for contours.
102 |     """
103 | 
104 |     # Check input args.
105 |     assert max_contour_value >= 0.
106 |     max_contour_value = max([max_contour_value, 1e-6])
107 | 
108 |     assert contour_interval >= 0.
109 |     contour_interval = max([contour_interval, 1e-7])
110 | 
111 |     assert not numpy.any(numpy.isnan(saliency_matrix_2d))
112 |     assert len(saliency_matrix_2d.shape) == 2
113 |     assert contour_interval < max_contour_value
114 | 
115 |     half_num_contours = int(numpy.round(
116 |         1 + max_contour_value / contour_interval
117 |     ))
118 | 
119 |     # Find grid coordinates.
120 |     num_grid_rows = saliency_matrix_2d.shape[0]
121 |     num_grid_columns = saliency_matrix_2d.shape[1]
122 |     x_coord_spacing = num_grid_columns ** -1
123 |     y_coord_spacing = num_grid_rows ** -1
124 | 
125 |     x_coords, y_coords = _get_grid_points(
126 |         x_min=x_coord_spacing / 2, y_min=y_coord_spacing / 2,
127 |         x_spacing=x_coord_spacing, y_spacing=y_coord_spacing,
128 |         num_rows=num_grid_rows, num_columns=num_grid_columns
129 |     )
130 | 
131 |     x_coord_matrix, y_coord_matrix = numpy.meshgrid(x_coords, y_coords)
132 | 
133 |     # Plot positive contours.
134 |     positive_contour_values = numpy.linspace(
135 |         0., max_contour_value, num=half_num_contours
136 |     )
137 | 
138 |     axes_object.contour(
139 |         x_coord_matrix, y_coord_matrix, saliency_matrix_2d,
140 |         positive_contour_values, cmap=colour_map_object,
141 |         vmin=numpy.min(positive_contour_values),
142 |         vmax=numpy.max(positive_contour_values),
143 |         linewidths=line_width, linestyles='solid', zorder=1e6,
144 |         transform=axes_object.transAxes
145 |     )
146 | 
147 |     # Plot negative contours.
148 |     negative_contour_values = positive_contour_values[1:]
149 | 
150 |     axes_object.contour(
151 |         x_coord_matrix, y_coord_matrix, -saliency_matrix_2d,
152 |         negative_contour_values, cmap=colour_map_object,
153 |         vmin=numpy.min(negative_contour_values),
154 |         vmax=numpy.max(negative_contour_values),
155 |         linewidths=line_width, linestyles='dashed', zorder=1e6,
156 |         transform=axes_object.transAxes
157 |     )
158 | 
159 | 
160 | def get_saliency_maps_for_class(
161 |         model_object, target_class, list_of_input_matrices):
162 |     """For each input example, creates saliency map for prob of target class.
163 | 
164 |     :param model_object: Trained model (instance of `keras.models.Model` or
165 |         `keras.models.Sequential`).
166 |     :param target_class: Saliency maps will be created for this class.  Must be
167 |         an integer in 0...(K - 1), where K = number of classes.
168 |     :param list_of_input_matrices: See doc for `_do_saliency_calculations`.
169 |     :return: list_of_saliency_matrices: See doc for `_do_saliency_calculations`.
170 |     """
171 | 
172 |     # TODO(thunderhoser): Create example axis.
173 | 
174 |     target_class = int(numpy.round(target_class))
175 |     assert target_class >= 0
176 | 
177 |     num_output_neurons = (
178 |         model_object.layers[-1].output.get_shape().as_list()[-1]
179 |     )
180 | 
181 |     if num_output_neurons == 1:
182 |         assert target_class <= 1
183 | 
184 |         if target_class == 1:
185 |             loss_tensor = K.mean(
186 |                 (model_object.layers[-1].output[..., 0] - 1) ** 2
187 |             )
188 |         else:
189 |             loss_tensor = K.mean(model_object.layers[-1].output[..., 0] ** 2)
190 |     else:
191 |         assert target_class < num_output_neurons
192 | 
193 |         loss_tensor = K.mean(
194 |             (model_object.layers[-1].output[..., target_class] - 1) ** 2
195 |         )
196 | 
197 |     return _do_saliency_calculations(
198 |         model_object=model_object, loss_tensor=loss_tensor,
199 |         list_of_input_matrices=list_of_input_matrices)
200 | 
201 | 
202 | def smooth_saliency_maps(saliency_matrices, smoothing_radius_grid_cells):
203 |     """Smooths saliency maps via Gaussian filter.
204 | 
205 |     T = number of input tensors to the model
206 | 
207 |     :param saliency_matrices: length-T list of numpy arrays.
208 |     :param smoothing_radius_grid_cells: e-folding radius (number of grid cells).
209 |     :return: saliency_matrices: Smoothed version of input.
210 |     """
211 | 
212 |     num_matrices = len(saliency_matrices)
213 |     num_examples = saliency_matrices[0].shape[0]
214 | 
215 |     for j in range(num_matrices):
216 |         this_num_channels = saliency_matrices[j].shape[-1]
217 | 
218 |         for i in range(num_examples):
219 |             for k in range(this_num_channels):
220 |                 saliency_matrices[j][i, ..., k] = utils.apply_gaussian_filter(
221 |                     input_matrix=saliency_matrices[j][i, ..., k],
222 |                     e_folding_radius_grid_cells=smoothing_radius_grid_cells
223 |                 )
224 | 
225 |     return saliency_matrices
226 | 
227 | 
228 | def plot_saliency_maps(
229 |         saliency_matrix_3d, axes_object_matrix, colour_map_object,
230 |         max_contour_value, contour_interval,
231 |         line_width=DEFAULT_LINE_WIDTH):
232 |     """Plots many saliency maps (one for each channel).
233 | 
234 |     M = number of rows in grid
235 |     N = number of columns in grid
236 |     C = number of channels
237 | 
238 |     :param saliency_matrix_3d: M-by-N-by-C numpy array of saliency values.
239 |     :param axes_object_matrix: 2-D numpy array of axes (each an instance of
240 |         `matplotlib.axes._subplots.AxesSubplot`).
241 |     :param colour_map_object: See doc for `_plot_2d_saliency_map`.
242 |     :param max_contour_value: Same.
243 |     :param contour_interval: Same.
244 |     :param line_width: Same.
245 |     """
246 | 
247 |     assert len(saliency_matrix_3d.shape) == 3
248 | 
249 |     num_channels = saliency_matrix_3d.shape[-1]
250 |     num_panel_rows = axes_object_matrix.shape[0]
251 |     num_panel_columns = axes_object_matrix.shape[1]
252 | 
253 |     for k in range(num_channels):
254 |         i, j = numpy.unravel_index(k, (num_panel_rows, num_panel_columns))
255 |         this_axes_object = axes_object_matrix[i, j]
256 | 
257 |         _plot_2d_saliency_map(
258 |             saliency_matrix_2d=saliency_matrix_3d[..., k],
259 |             axes_object=this_axes_object,
260 |             colour_map_object=colour_map_object,
261 |             max_contour_value=max_contour_value,
262 |             contour_interval=contour_interval, line_width=line_width
263 |         )
264 | 
265 |     # colour_bar_object = utils.plot_linear_colour_bar(
266 |     #     axes_object_or_matrix=axes_object_matrix,
267 |     #     data_values=saliency_matrix_3d, colour_map_object=colour_map_object,
268 |     #     min_value=0., max_value=max_contour_value, plot_horizontal=False,
269 |     #     plot_min_arrow=False, plot_max_arrow=True, fraction_of_axis_length=0.9
270 |     # )
271 |     #
272 |     # colour_bar_object.set_label('Absolute saliency')
273 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/interpretation/utils.py:
--------------------------------------------------------------------------------
  1 | """Helper methods for model interpretation in general."""
  2 | 
  3 | import copy
  4 | import glob
  5 | import errno
  6 | import time
  7 | import calendar
  8 | import os.path
  9 | import numpy
 10 | import netCDF4
 11 | from scipy.interpolate import interp1d
 12 | from scipy.ndimage.filters import gaussian_filter
 13 | 
 14 | DATE_FORMAT = '%Y%m%d'
 15 | DATE_FORMAT_REGEX = '[0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]'
 16 | 
 17 | CSV_TARGET_NAME = 'RVORT1_MAX-future_max'
 18 | TARGET_NAME = 'max_future_vorticity_s01'
 19 | 
 20 | NETCDF_REFL_NAME = 'REFL_COM_curr'
 21 | NETCDF_TEMP_NAME = 'T2_curr'
 22 | NETCDF_U_WIND_NAME = 'U10_curr'
 23 | NETCDF_V_WIND_NAME = 'V10_curr'
 24 | NETCDF_PREDICTOR_NAMES = [
 25 |     NETCDF_REFL_NAME, NETCDF_TEMP_NAME, NETCDF_U_WIND_NAME, NETCDF_V_WIND_NAME
 26 | ]
 27 | 
 28 | REFLECTIVITY_NAME = 'reflectivity_dbz'
 29 | TEMPERATURE_NAME = 'temperature_kelvins'
 30 | U_WIND_NAME = 'u_wind_m_s01'
 31 | V_WIND_NAME = 'v_wind_m_s01'
 32 | PREDICTOR_NAMES = [
 33 |     REFLECTIVITY_NAME, TEMPERATURE_NAME, U_WIND_NAME, V_WIND_NAME
 34 | ]
 35 | 
 36 | NETCDF_TRACK_ID_NAME = 'track_id'
 37 | NETCDF_TRACK_STEP_NAME = 'track_step'
 38 | NETCDF_TARGET_NAME = 'RVORT1_MAX_future'
 39 | 
 40 | STORM_IDS_KEY = 'storm_ids'
 41 | STORM_STEPS_KEY = 'storm_steps'
 42 | PREDICTOR_NAMES_KEY = 'predictor_names'
 43 | PREDICTOR_MATRIX_KEY = 'predictor_matrix'
 44 | TARGET_NAME_KEY = 'target_name'
 45 | TARGET_MATRIX_KEY = 'target_matrix'
 46 | 
 47 | HIT_INDICES_KEY = 'hit_indices'
 48 | MISS_INDICES_KEY = 'miss_indices'
 49 | FALSE_ALARM_INDICES_KEY = 'false_alarm_indices'
 50 | CORRECT_NULL_INDICES_KEY = 'correct_null_indices'
 51 | 
 52 | 
 53 | def _image_file_name_to_date(netcdf_file_name):
 54 |     """Parses date from name of image (NetCDF) file.
 55 | 
 56 |     :param netcdf_file_name: Path to input file.
 57 |     :return: date_string: Date (format "yyyymmdd").
 58 |     """
 59 | 
 60 |     pathless_file_name = os.path.split(netcdf_file_name)[-1]
 61 | 
 62 |     date_string = pathless_file_name.replace(
 63 |         'NCARSTORM_', ''
 64 |     ).replace('-0000_d01_model_patches.nc', '')
 65 | 
 66 |     # Verify.
 67 |     time_string_to_unix(time_string=date_string, time_format=DATE_FORMAT)
 68 |     return date_string
 69 | 
 70 | 
 71 | def create_directory(directory_name=None, file_name=None):
 72 |     """Creates directory if necessary (i.e., doesn't already exist).
 73 | 
 74 |     This method checks for the argument `directory_name` first.  If
 75 |     `directory_name` is None, this method checks for `file_name` and extracts
 76 |     the directory.
 77 | 
 78 |     :param directory_name: Path to local directory.
 79 |     :param file_name: Path to local file.
 80 |     """
 81 | 
 82 |     if directory_name is None:
 83 |         directory_name = os.path.dirname(file_name)
 84 | 
 85 |     if directory_name == '':
 86 |         return
 87 | 
 88 |     try:
 89 |         os.makedirs(directory_name)
 90 |     except OSError as this_error:
 91 |         if this_error.errno == errno.EEXIST and os.path.isdir(directory_name):
 92 |             pass
 93 |         else:
 94 |             raise
 95 | 
 96 | 
 97 | def apply_gaussian_filter(input_matrix, e_folding_radius_grid_cells):
 98 |     """Applies Gaussian filter to any-dimensional grid.
 99 | 
100 |     :param input_matrix: numpy array with any dimensions.
101 |     :param e_folding_radius_grid_cells: e-folding radius (num grid cells).
102 |     :return: output_matrix: numpy array after smoothing (same dimensions as
103 |         input).
104 |     """
105 | 
106 |     assert e_folding_radius_grid_cells >= 0.
107 |     return gaussian_filter(
108 |         input_matrix, sigma=e_folding_radius_grid_cells, order=0, mode='nearest'
109 |     )
110 | 
111 | 
112 | def time_string_to_unix(time_string, time_format):
113 |     """Converts time from string to Unix format.
114 | 
115 |     Unix format = seconds since 0000 UTC 1 Jan 1970.
116 | 
117 |     :param time_string: Time string.
118 |     :param time_format: Format of time string (example: "%Y%m%d" or
119 |         "%Y-%m-%d-%H%M%S").
120 |     :return: unix_time_sec: Time in Unix format.
121 |     """
122 | 
123 |     return calendar.timegm(time.strptime(time_string, time_format))
124 | 
125 | 
126 | def time_unix_to_string(unix_time_sec, time_format):
127 |     """Converts time from Unix format to string.
128 | 
129 |     Unix format = seconds since 0000 UTC 1 Jan 1970.
130 | 
131 |     :param unix_time_sec: Time in Unix format.
132 |     :param time_format: Desired format of time string (example: "%Y%m%d" or
133 |         "%Y-%m-%d-%H%M%S").
134 |     :return: time_string: Time string.
135 |     """
136 | 
137 |     return time.strftime(time_format, time.gmtime(unix_time_sec))
138 | 
139 | 
140 | def find_many_image_files(first_date_string, last_date_string, image_dir_name):
141 |     """Finds image (NetCDF) files in the given date range.
142 | 
143 |     :param first_date_string: First date ("yyyymmdd") in range.
144 |     :param last_date_string: Last date ("yyyymmdd") in range.
145 |     :param image_dir_name: Name of directory with image (NetCDF) files.
146 |     :return: netcdf_file_names: 1-D list of paths to image files.
147 |     """
148 | 
149 |     first_time_unix_sec = time_string_to_unix(
150 |         time_string=first_date_string, time_format=DATE_FORMAT
151 |     )
152 |     last_time_unix_sec = time_string_to_unix(
153 |         time_string=last_date_string, time_format=DATE_FORMAT
154 |     )
155 | 
156 |     netcdf_file_pattern = (
157 |         '{0:s}/NCARSTORM_{1:s}-0000_d01_model_patches.nc'
158 |     ).format(image_dir_name, DATE_FORMAT_REGEX)
159 | 
160 |     netcdf_file_names = glob.glob(netcdf_file_pattern)
161 |     netcdf_file_names.sort()
162 | 
163 |     file_date_strings = [_image_file_name_to_date(f) for f in netcdf_file_names]
164 |     file_times_unix_sec = numpy.array([
165 |         time_string_to_unix(time_string=d, time_format=DATE_FORMAT)
166 |         for d in file_date_strings
167 |     ], dtype=int)
168 | 
169 |     good_indices = numpy.where(numpy.logical_and(
170 |         file_times_unix_sec >= first_time_unix_sec,
171 |         file_times_unix_sec <= last_time_unix_sec
172 |     ))[0]
173 | 
174 |     return [netcdf_file_names[k] for k in good_indices]
175 | 
176 | 
177 | def read_image_file(netcdf_file_name):
178 |     """Reads storm-centered images from NetCDF file.
179 | 
180 |     E = number of examples (storm objects) in file
181 |     M = number of rows in each storm-centered grid
182 |     N = number of columns in each storm-centered grid
183 |     C = number of channels (predictor variables)
184 | 
185 |     :param netcdf_file_name: Path to input file.
186 |     :return: image_dict: Dictionary with the following keys.
187 |     image_dict['storm_ids']: length-E list of storm IDs (integers).
188 |     image_dict['storm_steps']: length-E numpy array of storm steps (integers).
189 |     image_dict['predictor_names']: length-C list of predictor names.
190 |     image_dict['predictor_matrix']: E-by-M-by-N-by-C numpy array of predictor
191 |         values.
192 |     image_dict['target_name']: Name of target variable.
193 |     image_dict['target_matrix']: E-by-M-by-N numpy array of target values.
194 |     """
195 | 
196 |     dataset_object = netCDF4.Dataset(netcdf_file_name)
197 | 
198 |     storm_ids = numpy.array(
199 |         dataset_object.variables[NETCDF_TRACK_ID_NAME][:], dtype=int
200 |     )
201 |     storm_steps = numpy.array(
202 |         dataset_object.variables[NETCDF_TRACK_STEP_NAME][:], dtype=int
203 |     )
204 | 
205 |     predictor_matrix = None
206 | 
207 |     for this_predictor_name in NETCDF_PREDICTOR_NAMES:
208 |         this_predictor_matrix = numpy.array(
209 |             dataset_object.variables[this_predictor_name][:], dtype=float
210 |         )
211 |         this_predictor_matrix = numpy.expand_dims(
212 |             this_predictor_matrix, axis=-1
213 |         )
214 | 
215 |         if predictor_matrix is None:
216 |             predictor_matrix = this_predictor_matrix + 0.
217 |         else:
218 |             predictor_matrix = numpy.concatenate(
219 |                 (predictor_matrix, this_predictor_matrix), axis=-1
220 |             )
221 | 
222 |     target_matrix = numpy.array(
223 |         dataset_object.variables[NETCDF_TARGET_NAME][:], dtype=float
224 |     )
225 | 
226 |     return {
227 |         STORM_IDS_KEY: storm_ids,
228 |         STORM_STEPS_KEY: storm_steps,
229 |         PREDICTOR_NAMES_KEY: PREDICTOR_NAMES,
230 |         PREDICTOR_MATRIX_KEY: predictor_matrix,
231 |         TARGET_NAME_KEY: TARGET_NAME,
232 |         TARGET_MATRIX_KEY: target_matrix
233 |     }
234 | 
235 | 
236 | def read_many_image_files(netcdf_file_names):
237 |     """Reads storm-centered images from many NetCDF files.
238 | 
239 |     :param netcdf_file_names: 1-D list of paths to input files.
240 |     :return: image_dict: See doc for `read_image_file`.
241 |     """
242 | 
243 |     image_dict = None
244 |     keys_to_concat = [
245 |         STORM_IDS_KEY, STORM_STEPS_KEY, PREDICTOR_MATRIX_KEY, TARGET_MATRIX_KEY
246 |     ]
247 | 
248 |     for this_file_name in netcdf_file_names:
249 |         print('Reading data from: "{0:s}"...'.format(this_file_name))
250 |         this_image_dict = read_image_file(this_file_name)
251 | 
252 |         if image_dict is None:
253 |             image_dict = copy.deepcopy(this_image_dict)
254 |             continue
255 | 
256 |         for this_key in keys_to_concat:
257 |             image_dict[this_key] = numpy.concatenate((
258 |                 image_dict[this_key], this_image_dict[this_key]
259 |             ), axis=0)
260 | 
261 |     return image_dict
262 | 
263 | 
264 | def find_extreme_examples(
265 |         class_labels, event_probabilities, num_examples_per_set):
266 |     """Finds extreme examples.
267 | 
268 |     There are four sets of examples:
269 | 
270 |     - best hits
271 |     - worst false alarms
272 |     - worst misses
273 |     - best correct nulls
274 | 
275 |     E = total number of examples
276 |     e = number of examples per set
277 | 
278 |     :param class_labels: length-E numpy array of class labels (1 for event, 0
279 |         for non-event).
280 |     :param event_probabilities: length-E numpy array of event probabilities.
281 |     :param num_examples_per_set: Number of examples in each set.
282 | 
283 |     :return: extreme_dict: Dictionary with the following keys.
284 |     extreme_dict['hit_indices']: length-e numpy array with indices of best hits.
285 |     extreme_dict['miss_indices']: length-e numpy array with indices of worst
286 |         misses.
287 |     extreme_dict['false_alarm_indices']: length-e numpy array with indices of
288 |         worst false alarms.
289 |     extreme_dict['correct_null_indices']: length-e numpy array with indices of
290 |         best correct nulls.
291 |     """
292 | 
293 |     # Check input args.
294 |     class_labels = numpy.round(class_labels).astype(int)
295 |     assert numpy.all(class_labels >= 0)
296 |     assert numpy.all(class_labels <= 1)
297 |     assert len(class_labels.shape) == 1
298 | 
299 |     num_examples_total = len(class_labels)
300 | 
301 |     assert numpy.all(event_probabilities >= 0.)
302 |     assert numpy.all(event_probabilities <= 1.)
303 |     assert len(event_probabilities.shape) == 1
304 |     assert len(event_probabilities) == num_examples_total
305 | 
306 |     num_examples_per_set = int(numpy.round(num_examples_per_set))
307 |     assert num_examples_per_set > 0
308 | 
309 |     positive_indices = numpy.where(class_labels == 1)[0]
310 |     negative_indices = numpy.where(class_labels == 0)[0]
311 | 
312 |     num_hits = min([
313 |         num_examples_per_set, len(positive_indices)
314 |     ])
315 |     num_misses = min([
316 |         num_examples_per_set, len(positive_indices)
317 |     ])
318 |     num_false_alarms = min([
319 |         num_examples_per_set, len(negative_indices)
320 |     ])
321 |     num_correct_nulls = min([
322 |         num_examples_per_set, len(negative_indices)
323 |     ])
324 | 
325 |     these_indices = numpy.argsort(-1 * event_probabilities[positive_indices])
326 |     hit_indices = positive_indices[these_indices][:num_hits]
327 |     print('Average event probability for {0:d} best hits = {1:.4f}'.format(
328 |         num_hits, numpy.mean(event_probabilities[hit_indices])
329 |     ))
330 | 
331 |     these_indices = numpy.argsort(event_probabilities[positive_indices])
332 |     miss_indices = positive_indices[these_indices][:num_misses]
333 |     print('Average event probability for {0:d} worst misses = {1:.4f}'.format(
334 |         num_misses, numpy.mean(event_probabilities[miss_indices])
335 |     ))
336 | 
337 |     these_indices = numpy.argsort(-1 * event_probabilities[negative_indices])
338 |     false_alarm_indices = negative_indices[these_indices][:num_false_alarms]
339 |     print((
340 |         'Average event probability for {0:d} worst false alarms = {1:.4f}'
341 |     ).format(
342 |         num_false_alarms, numpy.mean(event_probabilities[false_alarm_indices])
343 |     ))
344 | 
345 |     these_indices = numpy.argsort(event_probabilities[negative_indices])
346 |     correct_null_indices = negative_indices[these_indices][:num_correct_nulls]
347 |     print((
348 |         'Average event probability for {0:d} best correct nulls = {1:.4f}'
349 |     ).format(
350 |         num_correct_nulls, numpy.mean(event_probabilities[correct_null_indices])
351 |     ))
352 | 
353 |     return {
354 |         HIT_INDICES_KEY: hit_indices,
355 |         MISS_INDICES_KEY: miss_indices,
356 |         FALSE_ALARM_INDICES_KEY: false_alarm_indices,
357 |         CORRECT_NULL_INDICES_KEY: correct_null_indices
358 |     }
359 | 
360 | 
361 | def run_pmm_one_variable(field_matrix, max_percentile_level=99.):
362 |     """Applies PMM (probability-matched means) to one variable.
363 | 
364 |     :param field_matrix: numpy array with data to be averaged.  The first axis
365 |         should represent examples, and remaining axes should represent spatial
366 |         dimensions.
367 |     :param max_percentile_level: Maximum percentile.  No output value will
368 |         exceed the [q]th percentile of `field_matrix`, where q =
369 |         `max_percentile_level`.  Similarly, no output value will be less than
370 |         the [100 - q]th percentile of `field_matrix`.
371 |     :return: mean_field_matrix: numpy array with average spatial field.
372 |         Dimensions are the same as `field_matrix`, except that the first axis is
373 |         gone.  For instance, if `field_matrix` is 1000 x 32 x 32 (1000 examples
374 |         x 32 rows x 32 columns), `mean_field_matrix` will be 32 x 32.
375 |     """
376 | 
377 |     assert not numpy.any(numpy.isnan(field_matrix))
378 |     assert len(field_matrix.shape) > 1
379 |     assert max_percentile_level >= 90.
380 |     assert max_percentile_level < 100.
381 | 
382 |     # Pool values over all dimensions and remove extremes.
383 |     pooled_values = numpy.sort(numpy.ravel(field_matrix))
384 |     max_pooled_value = numpy.percentile(pooled_values, max_percentile_level)
385 |     pooled_values = pooled_values[pooled_values <= max_pooled_value]
386 | 
387 |     min_pooled_value = numpy.percentile(
388 |         pooled_values, 100 - max_percentile_level
389 |     )
390 |     pooled_values = pooled_values[pooled_values >= min_pooled_value]
391 | 
392 |     # Find ensemble mean at each location (e.g., grid point).
393 |     mean_field_matrix = numpy.mean(field_matrix, axis=0)
394 |     mean_field_flattened = numpy.ravel(mean_field_matrix)
395 | 
396 |     # At each location, replace ensemble mean with the same percentile from the
397 |     # pooled array.
398 |     pooled_value_percentiles = numpy.linspace(
399 |         0, 100, num=len(pooled_values), dtype=float
400 |     )
401 |     mean_value_percentiles = numpy.linspace(
402 |         0, 100, num=len(mean_field_flattened), dtype=float
403 |     )
404 | 
405 |     sort_indices = numpy.argsort(mean_field_flattened)
406 |     unsort_indices = numpy.argsort(sort_indices)
407 | 
408 |     interp_object = interp1d(
409 |         pooled_value_percentiles, pooled_values, kind='linear',
410 |         bounds_error=True, assume_sorted=True
411 |     )
412 | 
413 |     mean_field_flattened = interp_object(mean_value_percentiles)
414 |     mean_field_flattened = mean_field_flattened[unsort_indices]
415 |     mean_field_matrix = numpy.reshape(
416 |         mean_field_flattened, mean_field_matrix.shape
417 |     )
418 | 
419 |     return mean_field_matrix
420 | 
421 | 
422 | def run_pmm_many_variables(field_matrix, max_percentile_level=99.):
423 |     """Applies PMM (probability-matched means) to each variable.
424 | 
425 |     :param field_matrix: numpy array with data to be averaged.  The first axis
426 |         should represent examples; the last axis should represent variables; and
427 |         remaining axes should represent spatial dimensions.
428 |     :param max_percentile_level: See doc for `run_pmm_one_variable`.
429 |     :return: mean_field_matrix: numpy array with average spatial fields.
430 |         Dimensions are the same as `field_matrix`, except that the first axis is
431 |         gone.  For instance, if `field_matrix` is 1000 x 32 x 32 x 4
432 |         (1000 examples x 32 rows x 32 columns x 4 variables),
433 |         `mean_field_matrix` will be 32 x 32 x 4.
434 |     """
435 | 
436 |     assert len(field_matrix.shape) > 2
437 | 
438 |     num_variables = field_matrix.shape[-1]
439 |     mean_field_matrix = numpy.full(field_matrix.shape[1:], numpy.nan)
440 | 
441 |     for k in range(num_variables):
442 |         mean_field_matrix[..., k] = run_pmm_one_variable(
443 |             field_matrix=field_matrix[..., k],
444 |             max_percentile_level=max_percentile_level
445 |         )
446 | 
447 |     return mean_field_matrix
448 | 


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/lak_permutation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/lak_permutation.gif


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/model_components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/model_components.png


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_cnn.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_cnn.h5


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_cnn_metadata.json:
--------------------------------------------------------------------------------
1 | {"normalization_dict": {"v_wind_m_s01": [0.8430999384858744, 5.02621590127866], "u_wind_m_s01": [-0.2901194096210985, 4.6688756920528895], "temperature_kelvins": [290.34299022259927, 7.613423606954989], "reflectivity_dbz": [22.68552537091767, 15.761682079862304]}, "validation_file_names": ["/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150331-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150416-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150422-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150505-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150510-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150523-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150528-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150605-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150612-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150620-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150630-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150706-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150712-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20151031-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20151227-0000_d01_model_patches.nc"], "training_file_names": ["/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20101024-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20101122-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110201-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110308-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110326-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110404-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110414-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110420-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110425-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110509-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110522-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110527-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110605-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110610-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110615-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110620-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110704-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110712-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20111116-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120218-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120315-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120323-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120401-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120409-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120426-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120503-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120510-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120529-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120606-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120622-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120701-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120706-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120715-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20121225-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130318-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130331-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130411-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130429-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130513-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130519-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130527-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130602-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130613-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130619-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130701-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130708-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130715-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140220-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140328-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140407-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140425-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140508-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140514-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140526-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140604-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140609-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140617-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140622-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140628-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140705-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140710-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20141123-0000_d01_model_patches.nc"], "num_training_batches_per_epoch": 32, "binarization_threshold": 0.005431033764034511, "num_validation_batches_per_epoch": 16, "num_examples_per_batch": 1024}


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_ucn.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_ucn.h5


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/pretrained_cnn/pretrained_ucn_metadata.json:
--------------------------------------------------------------------------------
1 | {"normalization_dict": {"v_wind_m_s01": [0.8430999384858744, 5.02621590127866], "u_wind_m_s01": [-0.2901194096210985, 4.6688756920528895], "temperature_kelvins": [290.34299022259927, 7.613423606954989], "reflectivity_dbz": [22.68552537091767, 15.761682079862304]}, "validation_file_names": ["/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150331-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150416-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150422-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150505-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150510-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150523-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150528-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150605-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150612-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150620-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150630-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150706-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20150712-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20151031-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20151227-0000_d01_model_patches.nc"], "training_file_names": ["/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20101024-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20101122-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110201-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110308-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110326-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110404-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110414-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110420-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110425-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110509-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110522-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110527-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110605-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110610-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110615-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110620-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110704-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20110712-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20111116-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120218-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120315-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120323-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120401-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120409-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120426-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120503-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120510-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120529-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120606-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120622-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120701-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120706-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20120715-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20121225-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130318-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130331-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130411-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130429-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130513-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130519-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130527-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130602-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130613-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130619-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130625-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130701-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130708-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20130715-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140220-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140328-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140407-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140425-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140508-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140514-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140526-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140604-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140609-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140617-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140622-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140628-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140705-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20140710-0000_d01_model_patches.nc", "/condo/swatwork/ralager/ams2019_short_course/track_data_ncar_ams_3km_nc_small/NCARSTORM_20141123-0000_d01_model_patches.nc"], "cnn_file_name": "/condo/swatwork/ralager/ams2019_short_course/pretrained_cnn/pretrained_cnn.h5", "num_training_batches_per_epoch": 32, "num_validation_batches_per_epoch": 16, "cnn_feature_layer_name": "flatten_1", "num_examples_per_batch": 1024}


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/wind_barb_explainer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Advanced_Topics_In_Machine_Learning/ML_Model_Interpretation/wind_barb_explainer.png


--------------------------------------------------------------------------------
/Advanced_Topics_In_Machine_Learning/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning In Python For Environmental Science Problems Short Course: Advanced Topics In Machine Learning
2 | 
3 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/.DS_Store


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/.DS_Store


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/README.md:
--------------------------------------------------------------------------------
1 | # Data Science Fundamentals
2 | 
3 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/attributes_diagrams.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/attributes_diagrams.cpython-36.pyc


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/performance_diagrams.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/performance_diagrams.cpython-36.pyc


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/roc_curves.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/roc_curves.cpython-36.pyc


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/attributes_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/attributes_diagram.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/contingency_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/contingency_table.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/ct_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/ct_scores.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/overfitting.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/performance_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/performance_diagram.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/performance_diagrams.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting performance diagram."""
  2 | 
  3 | import numpy
  4 | import matplotlib.colors
  5 | import matplotlib.pyplot as pyplot
  6 | 
  7 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  8 | DEFAULT_LINE_WIDTH = 3
  9 | DEFAULT_BIAS_LINE_COLOUR = numpy.full(3, 152. / 255)
 10 | DEFAULT_BIAS_LINE_WIDTH = 2
 11 | 
 12 | LEVELS_FOR_CSI_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
 13 | LEVELS_FOR_BIAS_CONTOURS = numpy.array(
 14 |     [0.25, 0.5, 0.75, 1., 1.5, 2., 3., 5.])
 15 | 
 16 | BIAS_STRING_FORMAT = '%.2f'
 17 | BIAS_LABEL_PADDING_PX = 10
 18 | 
 19 | FIGURE_WIDTH_INCHES = 10
 20 | FIGURE_HEIGHT_INCHES = 10
 21 | 
 22 | FONT_SIZE = 20
 23 | pyplot.rc('font', size=FONT_SIZE)
 24 | pyplot.rc('axes', titlesize=FONT_SIZE)
 25 | pyplot.rc('axes', labelsize=FONT_SIZE)
 26 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 27 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 28 | pyplot.rc('legend', fontsize=FONT_SIZE)
 29 | pyplot.rc('figure', titlesize=FONT_SIZE)
 30 | 
 31 | 
 32 | def _get_sr_pod_grid(success_ratio_spacing=0.01, pod_spacing=0.01):
 33 |     """Creates grid in SR-POD (success ratio / probability of detection) space.
 34 | 
 35 |     M = number of rows (unique POD values) in grid
 36 |     N = number of columns (unique success ratios) in grid
 37 | 
 38 |     :param success_ratio_spacing: Spacing between grid cells in adjacent
 39 |         columns.
 40 |     :param pod_spacing: Spacing between grid cells in adjacent rows.
 41 |     :return: success_ratio_matrix: M-by-N numpy array of success ratios.
 42 |         Success ratio increases with column index.
 43 |     :return: pod_matrix: M-by-N numpy array of POD values.  POD decreases with
 44 |         row index.
 45 |     """
 46 | 
 47 |     num_success_ratios = 1 + int(numpy.ceil(1. / success_ratio_spacing))
 48 |     num_pod_values = 1 + int(numpy.ceil(1. / pod_spacing))
 49 | 
 50 |     unique_success_ratios = numpy.linspace(0., 1., num=num_success_ratios)
 51 |     unique_pod_values = numpy.linspace(0., 1., num=num_pod_values)[::-1]
 52 |     return numpy.meshgrid(unique_success_ratios, unique_pod_values)
 53 | 
 54 | 
 55 | def _csi_from_sr_and_pod(success_ratio_array, pod_array):
 56 |     """Computes CSI (critical success index) from success ratio and POD.
 57 | 
 58 |     POD = probability of detection
 59 | 
 60 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 61 |     :param pod_array: numpy array (same shape) of POD values.
 62 |     :return: csi_array: numpy array (same shape) of CSI values.
 63 |     """
 64 | 
 65 |     return (success_ratio_array ** -1 + pod_array ** -1 - 1.) ** -1
 66 | 
 67 | 
 68 | def _bias_from_sr_and_pod(success_ratio_array, pod_array):
 69 |     """Computes frequency bias from success ratio and POD.
 70 | 
 71 |     POD = probability of detection
 72 | 
 73 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 74 |     :param pod_array: numpy array (same shape) of POD values.
 75 |     :return: frequency_bias_array: numpy array (same shape) of frequency biases.
 76 |     """
 77 | 
 78 |     return pod_array / success_ratio_array
 79 | 
 80 | 
 81 | def _get_csi_colour_scheme():
 82 |     """Returns colour scheme for CSI (critical success index).
 83 | 
 84 |     :return: colour_map_object: Colour scheme (instance of
 85 |         `matplotlib.colors.ListedColormap`).
 86 |     :return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
 87 |         defining the scale of the colour map.
 88 |     """
 89 | 
 90 |     this_colour_map_object = pyplot.cm.Blues
 91 |     this_colour_norm_object = matplotlib.colors.BoundaryNorm(
 92 |         LEVELS_FOR_CSI_CONTOURS, this_colour_map_object.N)
 93 | 
 94 |     rgba_matrix = this_colour_map_object(this_colour_norm_object(
 95 |         LEVELS_FOR_CSI_CONTOURS))
 96 |     colour_list = [
 97 |         rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])
 98 |     ]
 99 | 
100 |     colour_map_object = matplotlib.colors.ListedColormap(colour_list)
101 |     colour_map_object.set_under(numpy.array([1, 1, 1]))
102 |     colour_norm_object = matplotlib.colors.BoundaryNorm(
103 |         LEVELS_FOR_CSI_CONTOURS, colour_map_object.N)
104 | 
105 |     return colour_map_object, colour_norm_object
106 | 
107 | 
108 | def _add_colour_bar(
109 |         axes_object, colour_map_object, values_to_colour, min_colour_value,
110 |         max_colour_value, colour_norm_object=None,
111 |         orientation_string='vertical', extend_min=True, extend_max=True,
112 |         fraction_of_axis_length=1., font_size=FONT_SIZE):
113 |     """Adds colour bar to existing axes.
114 | 
115 |     :param axes_object: Existing axes (instance of
116 |         `matplotlib.axes._subplots.AxesSubplot`).
117 |     :param colour_map_object: Colour scheme (instance of
118 |         `matplotlib.pyplot.cm`).
119 |     :param values_to_colour: numpy array of values to colour.
120 |     :param min_colour_value: Minimum value in colour map.
121 |     :param max_colour_value: Max value in colour map.
122 |     :param colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
123 |         defining the scale of the colour map.  If `colour_norm_object is None`,
124 |         will assume that scale is linear.
125 |     :param orientation_string: Orientation of colour bar ("vertical" or
126 |         "horizontal").
127 |     :param extend_min: Boolean flag.  If True, the bottom of the colour bar will
128 |         have an arrow.  If False, it will be a flat line, suggesting that lower
129 |         values are not possible.
130 |     :param extend_max: Same but for top of colour bar.
131 |     :param fraction_of_axis_length: Fraction of axis length (y-axis if
132 |         orientation is "vertical", x-axis if orientation is "horizontal")
133 |         occupied by colour bar.
134 |     :param font_size: Font size for labels on colour bar.
135 |     :return: colour_bar_object: Colour bar (instance of
136 |         `matplotlib.pyplot.colorbar`) created by this method.
137 |     """
138 | 
139 |     if colour_norm_object is None:
140 |         colour_norm_object = matplotlib.colors.Normalize(
141 |             vmin=min_colour_value, vmax=max_colour_value, clip=False)
142 | 
143 |     scalar_mappable_object = pyplot.cm.ScalarMappable(
144 |         cmap=colour_map_object, norm=colour_norm_object)
145 |     scalar_mappable_object.set_array(values_to_colour)
146 | 
147 |     if extend_min and extend_max:
148 |         extend_string = 'both'
149 |     elif extend_min:
150 |         extend_string = 'min'
151 |     elif extend_max:
152 |         extend_string = 'max'
153 |     else:
154 |         extend_string = 'neither'
155 | 
156 |     if orientation_string == 'horizontal':
157 |         padding = 0.075
158 |     else:
159 |         padding = 0.05
160 | 
161 |     colour_bar_object = pyplot.colorbar(
162 |         ax=axes_object, mappable=scalar_mappable_object,
163 |         orientation=orientation_string, pad=padding, extend=extend_string,
164 |         shrink=fraction_of_axis_length)
165 | 
166 |     colour_bar_object.ax.tick_params(labelsize=font_size)
167 |     return colour_bar_object
168 | 
169 | 
170 | def get_points_in_perf_diagram(observed_labels, forecast_probabilities):
171 |     """Creates points for performance diagram.
172 | 
173 |     E = number of examples
174 |     T = number of binarization thresholds
175 | 
176 |     :param observed_labels: length-E numpy array of class labels (integers in
177 |         0...1).
178 |     :param forecast_probabilities: length-E numpy array with forecast
179 |         probabilities of label = 1.
180 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
181 |         detection) values.
182 |     :return: success_ratio_by_threshold: length-T numpy array of success ratios.
183 |     """
184 | 
185 |     assert numpy.all(numpy.logical_or(
186 |         observed_labels == 0, observed_labels == 1
187 |     ))
188 | 
189 |     assert numpy.all(numpy.logical_and(
190 |         forecast_probabilities >= 0, forecast_probabilities <= 1
191 |     ))
192 | 
193 |     observed_labels = observed_labels.astype(int)
194 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
195 | 
196 |     num_thresholds = len(binarization_thresholds)
197 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
198 |     success_ratio_by_threshold = numpy.full(num_thresholds, numpy.nan)
199 | 
200 |     for k in range(num_thresholds):
201 |         these_forecast_labels = (
202 |             forecast_probabilities >= binarization_thresholds[k]
203 |         ).astype(int)
204 | 
205 |         this_num_hits = numpy.sum(numpy.logical_and(
206 |             these_forecast_labels == 1, observed_labels == 1
207 |         ))
208 | 
209 |         this_num_false_alarms = numpy.sum(numpy.logical_and(
210 |             these_forecast_labels == 1, observed_labels == 0
211 |         ))
212 | 
213 |         this_num_misses = numpy.sum(numpy.logical_and(
214 |             these_forecast_labels == 0, observed_labels == 1
215 |         ))
216 | 
217 |         try:
218 |             pod_by_threshold[k] = (
219 |                 float(this_num_hits) / (this_num_hits + this_num_misses)
220 |             )
221 |         except ZeroDivisionError:
222 |             pass
223 | 
224 |         try:
225 |             success_ratio_by_threshold[k] = (
226 |                 float(this_num_hits) / (this_num_hits + this_num_false_alarms)
227 |             )
228 |         except ZeroDivisionError:
229 |             pass
230 | 
231 |     pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
232 |     success_ratio_by_threshold = numpy.array(
233 |         [0.] + success_ratio_by_threshold.tolist() + [1.]
234 |     )
235 | 
236 |     return pod_by_threshold, success_ratio_by_threshold
237 | 
238 | 
239 | def plot_performance_diagram(
240 |         observed_labels, forecast_probabilities,
241 |         line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
242 |         bias_line_colour=DEFAULT_BIAS_LINE_COLOUR,
243 |         bias_line_width=DEFAULT_BIAS_LINE_WIDTH, axes_object=None):
244 |     """Plots performance diagram.
245 | 
246 |     E = number of examples
247 | 
248 |     :param observed_labels: length-E numpy array of class labels (integers in
249 |         0...1).
250 |     :param forecast_probabilities: length-E numpy array with forecast
251 |         probabilities of label = 1.
252 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
253 |     :param line_width: Line width (real positive number).
254 |     :param bias_line_colour: Colour of contour lines for frequency bias.
255 |     :param bias_line_width: Width of contour lines for frequency bias.
256 |     :param axes_object: Will plot on these axes (instance of
257 |         `matplotlib.axes._subplots.AxesSubplot`).  If `axes_object is None`,
258 |         will create new axes.
259 |     :return: pod_by_threshold: See doc for `get_points_in_perf_diagram`.
260 |         detection) values.
261 |     :return: success_ratio_by_threshold: Same.
262 |     """
263 | 
264 |     pod_by_threshold, success_ratio_by_threshold = get_points_in_perf_diagram(
265 |         observed_labels=observed_labels,
266 |         forecast_probabilities=forecast_probabilities)
267 | 
268 |     if axes_object is None:
269 |         _, axes_object = pyplot.subplots(
270 |             1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
271 |         )
272 | 
273 |     success_ratio_matrix, pod_matrix = _get_sr_pod_grid()
274 |     csi_matrix = _csi_from_sr_and_pod(success_ratio_matrix, pod_matrix)
275 |     frequency_bias_matrix = _bias_from_sr_and_pod(
276 |         success_ratio_matrix, pod_matrix)
277 | 
278 |     this_colour_map_object, this_colour_norm_object = _get_csi_colour_scheme()
279 | 
280 |     pyplot.contourf(
281 |         success_ratio_matrix, pod_matrix, csi_matrix, LEVELS_FOR_CSI_CONTOURS,
282 |         cmap=this_colour_map_object, norm=this_colour_norm_object, vmin=0.,
283 |         vmax=1., axes=axes_object)
284 | 
285 |     colour_bar_object = _add_colour_bar(
286 |         axes_object=axes_object, colour_map_object=this_colour_map_object,
287 |         colour_norm_object=this_colour_norm_object,
288 |         values_to_colour=csi_matrix, min_colour_value=0.,
289 |         max_colour_value=1., orientation_string='vertical',
290 |         extend_min=False, extend_max=False)
291 |     colour_bar_object.set_label('CSI (critical success index)')
292 | 
293 |     bias_colour_tuple = ()
294 |     for _ in range(len(LEVELS_FOR_BIAS_CONTOURS)):
295 |         bias_colour_tuple += (bias_line_colour,)
296 | 
297 |     bias_contour_object = pyplot.contour(
298 |         success_ratio_matrix, pod_matrix, frequency_bias_matrix,
299 |         LEVELS_FOR_BIAS_CONTOURS, colors=bias_colour_tuple,
300 |         linewidths=bias_line_width, linestyles='dashed', axes=axes_object)
301 |     pyplot.clabel(
302 |         bias_contour_object, inline=True, inline_spacing=BIAS_LABEL_PADDING_PX,
303 |         fmt=BIAS_STRING_FORMAT, fontsize=FONT_SIZE)
304 | 
305 |     nan_flags = numpy.logical_or(
306 |         numpy.isnan(success_ratio_by_threshold), numpy.isnan(pod_by_threshold)
307 |     )
308 | 
309 |     if not numpy.all(nan_flags):
310 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
311 |         axes_object.plot(
312 |             success_ratio_by_threshold[real_indices],
313 |             pod_by_threshold[real_indices], color=line_colour,
314 |             linestyle='solid', linewidth=line_width)
315 | 
316 |     axes_object.set_xlabel('Success ratio (1 - FAR)')
317 |     axes_object.set_ylabel('POD (probability of detection)')
318 |     axes_object.set_xlim(0., 1.)
319 |     axes_object.set_ylim(0., 1.)
320 | 
321 |     return pod_by_threshold, success_ratio_by_threshold
322 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/roc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Data_Science_Fundamentals/roc.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Data_Science_Fundamentals/roc_curves.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting ROC (receiver operating characteristic) curve."""
  2 | 
  3 | import numpy
  4 | import matplotlib.colors
  5 | import matplotlib.pyplot as pyplot
  6 | import Introduction_To_Machine_Learning.Data_Science_Fundamentals.performance_diagrams as performance_diagrams
  7 | 
  8 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  9 | DEFAULT_LINE_WIDTH = 3
 10 | DEFAULT_RANDOM_LINE_COLOUR = numpy.full(3, 152.0 / 255)
 11 | DEFAULT_RANDOM_LINE_WIDTH = 2
 12 | 
 13 | LEVELS_FOR_PEIRCE_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
 14 | 
 15 | FIGURE_WIDTH_INCHES = 10
 16 | FIGURE_HEIGHT_INCHES = 10
 17 | 
 18 | FONT_SIZE = 20
 19 | pyplot.rc("font", size=FONT_SIZE)
 20 | pyplot.rc("axes", titlesize=FONT_SIZE)
 21 | pyplot.rc("axes", labelsize=FONT_SIZE)
 22 | pyplot.rc("xtick", labelsize=FONT_SIZE)
 23 | pyplot.rc("ytick", labelsize=FONT_SIZE)
 24 | pyplot.rc("legend", fontsize=FONT_SIZE)
 25 | pyplot.rc("figure", titlesize=FONT_SIZE)
 26 | 
 27 | 
 28 | def _get_pofd_pod_grid(pofd_spacing=0.01, pod_spacing=0.01):
 29 |     """Creates grid in POFD-POD space.
 30 | 
 31 |     M = number of rows (unique POD values) in grid
 32 |     N = number of columns (unique POFD values) in grid
 33 | 
 34 |     :param pofd_spacing: Spacing between grid cells in adjacent columns.
 35 |     :param pod_spacing: Spacing between grid cells in adjacent rows.
 36 |     :return: pofd_matrix: M-by-N numpy array of POFD values.
 37 |     :return: pod_matrix: M-by-N numpy array of POD values.
 38 |     """
 39 | 
 40 |     num_pofd_values = 1 + int(numpy.ceil(1.0 / pofd_spacing))
 41 |     num_pod_values = 1 + int(numpy.ceil(1.0 / pod_spacing))
 42 | 
 43 |     unique_pofd_values = numpy.linspace(0.0, 1.0, num=num_pofd_values)
 44 |     unique_pod_values = numpy.linspace(0.0, 1.0, num=num_pod_values)[::-1]
 45 |     return numpy.meshgrid(unique_pofd_values, unique_pod_values)
 46 | 
 47 | 
 48 | def _get_peirce_colour_scheme():
 49 |     """Returns colour scheme for Peirce score.
 50 | 
 51 |     :return: colour_map_object: Colour scheme (instance of
 52 |         `matplotlib.colors.ListedColormap`).
 53 |     :return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
 54 |         defining the scale of the colour map.
 55 |     """
 56 | 
 57 |     this_colour_map_object = pyplot.cm.Blues
 58 |     this_colour_norm_object = matplotlib.colors.BoundaryNorm(
 59 |         LEVELS_FOR_PEIRCE_CONTOURS, this_colour_map_object.N
 60 |     )
 61 | 
 62 |     rgba_matrix = this_colour_map_object(
 63 |         this_colour_norm_object(LEVELS_FOR_PEIRCE_CONTOURS)
 64 |     )
 65 | 
 66 |     colour_list = [rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])]
 67 | 
 68 |     colour_map_object = matplotlib.colors.ListedColormap(colour_list)
 69 |     colour_map_object.set_under(numpy.array([1, 1, 1]))
 70 |     colour_norm_object = matplotlib.colors.BoundaryNorm(
 71 |         LEVELS_FOR_PEIRCE_CONTOURS, colour_map_object.N
 72 |     )
 73 | 
 74 |     return colour_map_object, colour_norm_object
 75 | 
 76 | 
 77 | def get_points_in_roc_curve(observed_labels, forecast_probabilities):
 78 |     """Creates points for ROC curve.
 79 | 
 80 |     E = number of examples
 81 |     T = number of binarization thresholds
 82 | 
 83 |     :param observed_labels: length-E numpy array of class labels (integers in
 84 |         0...1).
 85 |     :param forecast_probabilities: length-E numpy array with forecast
 86 |         probabilities of label = 1.
 87 |     :return: pofd_by_threshold: length-T numpy array of POFD (probability of
 88 |         false detection) values.
 89 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
 90 |         detection) values.
 91 |     """
 92 | 
 93 |     assert numpy.all(numpy.logical_or(observed_labels == 0, observed_labels == 1))
 94 | 
 95 |     assert numpy.all(
 96 |         numpy.logical_and(forecast_probabilities >= 0, forecast_probabilities <= 1)
 97 |     )
 98 | 
 99 |     observed_labels = observed_labels.astype(int)
100 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
101 | 
102 |     num_thresholds = len(binarization_thresholds)
103 |     pofd_by_threshold = numpy.full(num_thresholds, numpy.nan)
104 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
105 | 
106 |     for k in range(num_thresholds):
107 |         these_forecast_labels = (
108 |             forecast_probabilities >= binarization_thresholds[k]
109 |         ).astype(int)
110 | 
111 |         this_num_hits = numpy.sum(
112 |             numpy.logical_and(these_forecast_labels == 1, observed_labels == 1)
113 |         )
114 | 
115 |         this_num_false_alarms = numpy.sum(
116 |             numpy.logical_and(these_forecast_labels == 1, observed_labels == 0)
117 |         )
118 | 
119 |         this_num_misses = numpy.sum(
120 |             numpy.logical_and(these_forecast_labels == 0, observed_labels == 1)
121 |         )
122 | 
123 |         this_num_correct_nulls = numpy.sum(
124 |             numpy.logical_and(these_forecast_labels == 0, observed_labels == 0)
125 |         )
126 | 
127 |         try:
128 |             pofd_by_threshold[k] = float(this_num_false_alarms) / (
129 |                 this_num_false_alarms + this_num_correct_nulls
130 |             )
131 |         except ZeroDivisionError:
132 |             pass
133 | 
134 |         try:
135 |             pod_by_threshold[k] = float(this_num_hits) / (
136 |                 this_num_hits + this_num_misses
137 |             )
138 |         except ZeroDivisionError:
139 |             pass
140 | 
141 |     pod_by_threshold = numpy.array([1.0] + pod_by_threshold.tolist() + [0.0])
142 |     pofd_by_threshold = numpy.array([1.0] + pofd_by_threshold.tolist() + [0.0])
143 | 
144 |     return pofd_by_threshold, pod_by_threshold
145 | 
146 | 
147 | def plot_roc_curve(
148 |     observed_labels,
149 |     forecast_probabilities,
150 |     line_colour=DEFAULT_LINE_COLOUR,
151 |     line_width=DEFAULT_LINE_WIDTH,
152 |     random_line_colour=DEFAULT_RANDOM_LINE_COLOUR,
153 |     random_line_width=DEFAULT_RANDOM_LINE_WIDTH,
154 |     axes_object=None,
155 | ):
156 |     """Plots ROC curve.
157 | 
158 |     E = number of examples
159 | 
160 |     :param observed_labels: length-E numpy array of class labels (integers in
161 |         0...1).
162 |     :param forecast_probabilities: length-E numpy array with forecast
163 |         probabilities of label = 1.
164 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
165 |     :param line_width: Line width (real positive number).
166 |     :param random_line_colour: Colour of reference line (ROC curve for random
167 |         predictor).
168 |     :param random_line_width: Width of reference line (ROC curve for random
169 |         predictor).
170 |     :param axes_object: Will plot on these axes (instance of
171 |         `matplotlib.axes._subplots.AxesSubplot`).  If `axes_object is None`,
172 |         will create new axes.
173 |     :return: pofd_by_threshold: See doc for `get_points_in_roc_curve`.
174 |     :return: pod_by_threshold: Same.
175 |     """
176 | 
177 |     pofd_by_threshold, pod_by_threshold = get_points_in_roc_curve(
178 |         observed_labels=observed_labels, forecast_probabilities=forecast_probabilities
179 |     )
180 | 
181 |     if axes_object is None:
182 |         _, axes_object = pyplot.subplots(
183 |             1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
184 |         )
185 | 
186 |     pofd_matrix, pod_matrix = _get_pofd_pod_grid()
187 |     peirce_score_matrix = pod_matrix - pofd_matrix
188 | 
189 |     colour_map_object, colour_norm_object = _get_peirce_colour_scheme()
190 | 
191 |     pyplot.contourf(
192 |         pofd_matrix,
193 |         pod_matrix,
194 |         peirce_score_matrix,
195 |         LEVELS_FOR_PEIRCE_CONTOURS,
196 |         cmap=colour_map_object,
197 |         norm=colour_norm_object,
198 |         vmin=0.0,
199 |         vmax=1.0,
200 |         axes=axes_object,
201 |     )
202 | 
203 |     # TODO(thunderhoser): Calling private method is a HACK.
204 |     colour_bar_object = performance_diagrams._add_colour_bar(
205 |         axes_object=axes_object,
206 |         colour_map_object=colour_map_object,
207 |         colour_norm_object=colour_norm_object,
208 |         values_to_colour=peirce_score_matrix,
209 |         min_colour_value=0.0,
210 |         max_colour_value=1.0,
211 |         orientation_string="vertical",
212 |         extend_min=False,
213 |         extend_max=False,
214 |     )
215 | 
216 |     print(colour_bar_object)
217 |     colour_bar_object.set_label("Peirce score")
218 | 
219 |     random_x_coords = numpy.array([0.0, 1.0])
220 |     random_y_coords = numpy.array([0.0, 1.0])
221 |     axes_object.plot(
222 |         random_x_coords,
223 |         random_y_coords,
224 |         color=random_line_colour,
225 |         linestyle="dashed",
226 |         linewidth=random_line_width,
227 |     )
228 | 
229 |     nan_flags = numpy.logical_or(
230 |         numpy.isnan(pofd_by_threshold), numpy.isnan(pod_by_threshold)
231 |     )
232 | 
233 |     if not numpy.all(nan_flags):
234 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
235 |         axes_object.plot(
236 |             pofd_by_threshold[real_indices],
237 |             pod_by_threshold[real_indices],
238 |             color=line_colour,
239 |             linestyle="solid",
240 |             linewidth=line_width,
241 |         )
242 | 
243 |     axes_object.set_xlabel("POFD (probability of false detection)")
244 |     axes_object.set_ylabel("POD (probability of detection)")
245 |     axes_object.set_xlim(0.0, 1.0)
246 |     axes_object.set_ylim(0.0, 1.0)
247 | 
248 |     return pofd_by_threshold, pod_by_threshold
249 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/AI-vs-ML-vs-Deep-Learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/AI-vs-ML-vs-Deep-Learning.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/PCAexample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/PCAexample.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/SVD_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/SVD_example.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/ml_comic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/ml_comic.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/pca.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/Images/pca.gif


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Introduction_to_ML_and_AI/README.md:
--------------------------------------------------------------------------------
1 | Introduction To Machine Learning And Artifical Inteligence
2 | 
3 | In this lecture, we will explore different pre-processing techniques applied to data input to machine learning models, including scaling, imputation, and others. We will also introduce the weather dataset to be used throughout the course.
4 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning In Python For Environmental Science Problems Short Course: Introduction to Machine Learning
2 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/BP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/BP.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/FP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/FP.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/Kernel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/Kernel.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/LC.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/LC.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/LR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/LR.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/Models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/Models.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/NN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/NN.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/README.md:
--------------------------------------------------------------------------------
1 | Machine Learning In Python For Environmental Science Problems Short Course: Introduction to Machine Learning
2 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SK.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SK.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SML.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SML.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SVM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SVM.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SupervisedML.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/SupervisedML.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/contingency_table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/contingency_table.png


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/download_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Developed by David John Gagne II 
 3 | AMS 2019 Short Course
 4 | """
 5 | 
 6 | from urllib.request import urlretrieve
 7 | import os
 8 | from os.path import exists, join
 9 | import tarfile
10 | 
11 | if not exists("data"):
12 |         os.mkdir("data")
13 |         csv_tar_file = "https://storage.googleapis.com/track_data_ncar_ams_3km_csv_small/track_data_ncar_ams_3km_csv_small.tar.gz"
14 |         nc_tar_file = "https://storage.googleapis.com/track_data_ncar_ams_3km_nc_small/track_data_ncar_ams_3km_nc_small.tar.gz"
15 |         print("Get csv files")
16 |         urlretrieve(csv_tar_file, join("data", csv_tar_file.split("/")[-1]))
17 |         print("Get nc files")
18 |         urlretrieve(nc_tar_file, join("data", nc_tar_file.split("/")[-1]))
19 |         print("Extract csv tar file")
20 |         csv_tar = tarfile.open(join("data", csv_tar_file.split("/")[-1]))
21 |         csv_tar.extractall("data/")
22 |         csv_tar.close()
23 |         print("Extract nc tar file")
24 |         nc_tar = tarfile.open(join("data", nc_tar_file.split("/")[-1]))
25 |         nc_tar.extractall("data/")
26 |         nc_tar.close() 
27 | 
28 | 
29 | 
30 |         


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/extract_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Developed by Amanda Burke
 3 | Based off methods by Sheri Mickelson, AMS 2019 
 4 | AMS 2020 Short Course 
 5 | """
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | import xarray as xr
10 | from glob import glob
11 | 
12 | # Input variables for the extract_csv_data() function
13 | csv_input_variables = ['REFL_COM_mean', 'REFL_COM_max', 'REFL_COM_min', 'REFL_COM_std', 'REFL_COM_percentile_10', 
14 | 'REFL_COM_percentile_25', 'REFL_COM_percentile_50', 'REFL_COM_percentile_75', 'REFL_COM_percentile_90', 
15 | 'U10_mean', 'U10_max', 'U10_min', 'U10_std', 'U10_percentile_10', 'U10_percentile_25', 'U10_percentile_50', 
16 | 'U10_percentile_75', 'U10_percentile_90', 'V10_mean', 'V10_max', 'V10_min', 'V10_std', 'V10_percentile_10', 
17 | 'V10_percentile_25', 'V10_percentile_50', 'V10_percentile_75', 'V10_percentile_90', 'T2_mean', 'T2_max', 
18 | 'T2_min', 'T2_std', 'T2_percentile_10', 'T2_percentile_25', 'T2_percentile_50', 'T2_percentile_75', 
19 | 'T2_percentile_90', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length', 'orientation']
20 | # Label variable for the extract_csv_data() function
21 | csv_label_variable = ['RVORT1_MAX-future_max'] 
22 | 
23 | # Input variables for the extract_nc_data() function
24 | nc_input_variables = ["REFL_COM_curr", "U10_curr", "V10_curr"]
25 | # Label variable for the extract_nc_data() function
26 | nc_label_variable = ["RVORT1_MAX_future"]
27 | 
28 | 
29 | def extract_csv_data(input_data_path):
30 |     """
31 |     Extracts csv data from a given set of files. Returns datasets 
32 |     containing the predictor and label variables. 
33 |     
34 |     Args:
35 |     input_data_path (str): path to dataset directory
36 |     returns: Predictor, label, and valid date data (# of datafiles,). 
37 |              
38 |     """
39 |     # Find all csv files from given directory
40 |     data_files = sorted(glob(input_data_path + "*.csv"))
41 |     
42 |     in_data = []
43 |     out_data = []
44 |     valid_times = []
45 |      
46 |     for files in data_files:
47 |         # Read in csv data
48 |         data = pd.read_csv(files)
49 |         #Append the predictor and label variables
50 |         in_data.append(data.loc[:,csv_input_variables].values)
51 |         out_data.append(data.loc[:,csv_label_variable].values)
52 |         #Append daily timestamps 
53 |         valid_24_hour_date = data.loc[:,"Valid_Date"].values
54 |         valid_times.append(pd.Timestamp(valid_24_hour_date[0][:10]))
55 |     
56 |     return in_data, out_data, valid_times
57 | 
58 | 
59 | def extract_nc_data(input_data_path):
60 |     """
61 |     Extracts netcdf data from a given set of files. Returns datasets 
62 |     containing the input variables and output variables. 
63 |     
64 |     Args:
65 |     input_data_path (str): path to dataset directory
66 |     returns: Predictor and label data (examples, 32, 32, number of variables), 
67 |              valid dates (examples,). 
68 |     """
69 |     # Find all netcdf files from given directory
70 |     data_files = sorted(glob(input_data_path + "*.nc"))
71 |     
72 |     in_data = []
73 |     out_data = []
74 |     valid_times = []
75 |     
76 |     for files in data_files:
77 |         # Read in netcdf data
78 |         data = xr.open_dataset(files)
79 |         #Append the daily predictor and label variables 
80 |         in_data.append(np.stack([data[v].values for v in nc_input_variables], axis=-1))
81 |         out_data.append(np.stack([data[v].values for v in nc_label_variable], axis=-1))
82 |         #Append daily timestamps 
83 |         date = pd.Timestamp(files.split("/")[-1].split("_")[1])
84 |         valid_times.append([date] * in_data[-1].shape[0])
85 |         data.close()
86 |     
87 |     # Concatenate/stack data from lists of arrays to a single array
88 |     all_in_data = np.vstack(in_data)
89 |     all_out_data = np.vstack(out_data)
90 |     all_valid_times = np.concatenate(valid_times)
91 |     
92 |     # Delete lists to save memory
93 |     del in_data[:], out_data[:],valid_times[:]
94 |     del in_data, out_data, valid_times
95 |     
96 |     return all_in_data, all_out_data, all_valid_times


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/performance_diagrams.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting performance diagram."""
  2 | 
  3 | import numpy
  4 | import matplotlib.colors
  5 | import matplotlib.pyplot as pyplot
  6 | 
  7 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  8 | DEFAULT_LINE_WIDTH = 3
  9 | DEFAULT_BIAS_LINE_COLOUR = numpy.full(3, 152. / 255)
 10 | DEFAULT_BIAS_LINE_WIDTH = 2
 11 | 
 12 | LEVELS_FOR_CSI_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
 13 | LEVELS_FOR_BIAS_CONTOURS = numpy.array(
 14 |     [0.25, 0.5, 0.75, 1., 1.5, 2., 3., 5.])
 15 | 
 16 | BIAS_STRING_FORMAT = '%.2f'
 17 | BIAS_LABEL_PADDING_PX = 10
 18 | 
 19 | FIGURE_WIDTH_INCHES = 10
 20 | FIGURE_HEIGHT_INCHES = 10
 21 | 
 22 | FONT_SIZE = 20
 23 | pyplot.rc('font', size=FONT_SIZE)
 24 | pyplot.rc('axes', titlesize=FONT_SIZE)
 25 | pyplot.rc('axes', labelsize=FONT_SIZE)
 26 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 27 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 28 | pyplot.rc('legend', fontsize=FONT_SIZE)
 29 | pyplot.rc('figure', titlesize=FONT_SIZE)
 30 | 
 31 | 
 32 | def _get_sr_pod_grid(success_ratio_spacing=0.01, pod_spacing=0.01):
 33 |     """Creates grid in SR-POD (success ratio / probability of detection) space.
 34 | 
 35 |     M = number of rows (unique POD values) in grid
 36 |     N = number of columns (unique success ratios) in grid
 37 | 
 38 |     :param success_ratio_spacing: Spacing between grid cells in adjacent
 39 |         columns.
 40 |     :param pod_spacing: Spacing between grid cells in adjacent rows.
 41 |     :return: success_ratio_matrix: M-by-N numpy array of success ratios.
 42 |         Success ratio increases with column index.
 43 |     :return: pod_matrix: M-by-N numpy array of POD values.  POD decreases with
 44 |         row index.
 45 |     """
 46 | 
 47 |     num_success_ratios = 1 + int(numpy.ceil(1. / success_ratio_spacing))
 48 |     num_pod_values = 1 + int(numpy.ceil(1. / pod_spacing))
 49 | 
 50 |     unique_success_ratios = numpy.linspace(0., 1., num=num_success_ratios)
 51 |     unique_pod_values = numpy.linspace(0., 1., num=num_pod_values)[::-1]
 52 |     return numpy.meshgrid(unique_success_ratios, unique_pod_values)
 53 | 
 54 | 
 55 | def _csi_from_sr_and_pod(success_ratio_array, pod_array):
 56 |     """Computes CSI (critical success index) from success ratio and POD.
 57 | 
 58 |     POD = probability of detection
 59 | 
 60 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 61 |     :param pod_array: numpy array (same shape) of POD values.
 62 |     :return: csi_array: numpy array (same shape) of CSI values.
 63 |     """
 64 | 
 65 |     return (success_ratio_array ** -1 + pod_array ** -1 - 1.) ** -1
 66 | 
 67 | 
 68 | def _bias_from_sr_and_pod(success_ratio_array, pod_array):
 69 |     """Computes frequency bias from success ratio and POD.
 70 | 
 71 |     POD = probability of detection
 72 | 
 73 |     :param success_ratio_array: numpy array (any shape) of success ratios.
 74 |     :param pod_array: numpy array (same shape) of POD values.
 75 |     :return: frequency_bias_array: numpy array (same shape) of frequency biases.
 76 |     """
 77 | 
 78 |     return pod_array / success_ratio_array
 79 | 
 80 | 
 81 | def _get_csi_colour_scheme():
 82 |     """Returns colour scheme for CSI (critical success index).
 83 | 
 84 |     :return: colour_map_object: Colour scheme (instance of
 85 |         `matplotlib.colors.ListedColormap`).
 86 |     :return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
 87 |         defining the scale of the colour map.
 88 |     """
 89 | 
 90 |     this_colour_map_object = pyplot.cm.Blues
 91 |     this_colour_norm_object = matplotlib.colors.BoundaryNorm(
 92 |         LEVELS_FOR_CSI_CONTOURS, this_colour_map_object.N)
 93 | 
 94 |     rgba_matrix = this_colour_map_object(this_colour_norm_object(
 95 |         LEVELS_FOR_CSI_CONTOURS))
 96 |     colour_list = [
 97 |         rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])
 98 |     ]
 99 | 
100 |     colour_map_object = matplotlib.colors.ListedColormap(colour_list)
101 |     colour_map_object.set_under(numpy.array([1, 1, 1]))
102 |     colour_norm_object = matplotlib.colors.BoundaryNorm(
103 |         LEVELS_FOR_CSI_CONTOURS, colour_map_object.N)
104 | 
105 |     return colour_map_object, colour_norm_object
106 | 
107 | 
108 | def _add_colour_bar(
109 |         axes_object, colour_map_object, values_to_colour, min_colour_value,
110 |         max_colour_value, colour_norm_object=None,
111 |         orientation_string='vertical', extend_min=True, extend_max=True,
112 |         fraction_of_axis_length=1., font_size=FONT_SIZE):
113 |     """Adds colour bar to existing axes.
114 | 
115 |     :param axes_object: Existing axes (instance of
116 |         `matplotlib.axes._subplots.AxesSubplot`).
117 |     :param colour_map_object: Colour scheme (instance of
118 |         `matplotlib.pyplot.cm`).
119 |     :param values_to_colour: numpy array of values to colour.
120 |     :param min_colour_value: Minimum value in colour map.
121 |     :param max_colour_value: Max value in colour map.
122 |     :param colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
123 |         defining the scale of the colour map.  If `colour_norm_object is None`,
124 |         will assume that scale is linear.
125 |     :param orientation_string: Orientation of colour bar ("vertical" or
126 |         "horizontal").
127 |     :param extend_min: Boolean flag.  If True, the bottom of the colour bar will
128 |         have an arrow.  If False, it will be a flat line, suggesting that lower
129 |         values are not possible.
130 |     :param extend_max: Same but for top of colour bar.
131 |     :param fraction_of_axis_length: Fraction of axis length (y-axis if
132 |         orientation is "vertical", x-axis if orientation is "horizontal")
133 |         occupied by colour bar.
134 |     :param font_size: Font size for labels on colour bar.
135 |     :return: colour_bar_object: Colour bar (instance of
136 |         `matplotlib.pyplot.colorbar`) created by this method.
137 |     """
138 | 
139 |     if colour_norm_object is None:
140 |         colour_norm_object = matplotlib.colors.Normalize(
141 |             vmin=min_colour_value, vmax=max_colour_value, clip=False)
142 | 
143 |     scalar_mappable_object = pyplot.cm.ScalarMappable(
144 |         cmap=colour_map_object, norm=colour_norm_object)
145 |     scalar_mappable_object.set_array(values_to_colour)
146 | 
147 |     if extend_min and extend_max:
148 |         extend_string = 'both'
149 |     elif extend_min:
150 |         extend_string = 'min'
151 |     elif extend_max:
152 |         extend_string = 'max'
153 |     else:
154 |         extend_string = 'neither'
155 | 
156 |     if orientation_string == 'horizontal':
157 |         padding = 0.075
158 |     else:
159 |         padding = 0.05
160 | 
161 |     colour_bar_object = pyplot.colorbar(
162 |         ax=axes_object, mappable=scalar_mappable_object,
163 |         orientation=orientation_string, pad=padding, extend=extend_string,
164 |         shrink=fraction_of_axis_length)
165 | 
166 |     colour_bar_object.ax.tick_params(labelsize=font_size)
167 |     return colour_bar_object
168 | 
169 | 
170 | def get_points_in_perf_diagram(observed_labels, forecast_probabilities):
171 |     """Creates points for performance diagram.
172 | 
173 |     E = number of examples
174 |     T = number of binarization thresholds
175 | 
176 |     :param observed_labels: length-E numpy array of class labels (integers in
177 |         0...1).
178 |     :param forecast_probabilities: length-E numpy array with forecast
179 |         probabilities of label = 1.
180 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
181 |         detection) values.
182 |     :return: success_ratio_by_threshold: length-T numpy array of success ratios.
183 |     """
184 | 
185 |     assert numpy.all(numpy.logical_or(
186 |         observed_labels == 0, observed_labels == 1
187 |     ))
188 | 
189 |     assert numpy.all(numpy.logical_and(
190 |         forecast_probabilities >= 0, forecast_probabilities <= 1
191 |     ))
192 | 
193 |     observed_labels = observed_labels.astype(int)
194 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
195 | 
196 |     num_thresholds = len(binarization_thresholds)
197 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
198 |     success_ratio_by_threshold = numpy.full(num_thresholds, numpy.nan)
199 | 
200 |     for k in range(num_thresholds):
201 |         these_forecast_labels = (
202 |             forecast_probabilities >= binarization_thresholds[k]
203 |         ).astype(int)
204 | 
205 |         this_num_hits = numpy.sum(numpy.logical_and(
206 |             these_forecast_labels == 1, observed_labels == 1
207 |         ))
208 | 
209 |         this_num_false_alarms = numpy.sum(numpy.logical_and(
210 |             these_forecast_labels == 1, observed_labels == 0
211 |         ))
212 | 
213 |         this_num_misses = numpy.sum(numpy.logical_and(
214 |             these_forecast_labels == 0, observed_labels == 1
215 |         ))
216 | 
217 |         try:
218 |             pod_by_threshold[k] = (
219 |                 float(this_num_hits) / (this_num_hits + this_num_misses)
220 |             )
221 |         except ZeroDivisionError:
222 |             pass
223 | 
224 |         try:
225 |             success_ratio_by_threshold[k] = (
226 |                 float(this_num_hits) / (this_num_hits + this_num_false_alarms)
227 |             )
228 |         except ZeroDivisionError:
229 |             pass
230 | 
231 |     pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
232 |     success_ratio_by_threshold = numpy.array(
233 |         [0.] + success_ratio_by_threshold.tolist() + [1.]
234 |     )
235 | 
236 |     return pod_by_threshold, success_ratio_by_threshold
237 | 
238 | 
239 | def plot_performance_diagram(
240 |         observed_labels, forecast_probabilities,
241 |         line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
242 |         bias_line_colour=DEFAULT_BIAS_LINE_COLOUR,
243 |         bias_line_width=DEFAULT_BIAS_LINE_WIDTH, axes_object=None):
244 |     """Plots performance diagram.
245 | 
246 |     E = number of examples
247 | 
248 |     :param observed_labels: length-E numpy array of class labels (integers in
249 |         0...1).
250 |     :param forecast_probabilities: length-E numpy array with forecast
251 |         probabilities of label = 1.
252 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
253 |     :param line_width: Line width (real positive number).
254 |     :param bias_line_colour: Colour of contour lines for frequency bias.
255 |     :param bias_line_width: Width of contour lines for frequency bias.
256 |     :param axes_object: Will plot on these axes (instance of
257 |         `matplotlib.axes._subplots.AxesSubplot`).  If `axes_object is None`,
258 |         will create new axes.
259 |     :return: pod_by_threshold: See doc for `get_points_in_perf_diagram`.
260 |         detection) values.
261 |     :return: success_ratio_by_threshold: Same.
262 |     """
263 | 
264 |     pod_by_threshold, success_ratio_by_threshold = get_points_in_perf_diagram(
265 |         observed_labels=observed_labels,
266 |         forecast_probabilities=forecast_probabilities)
267 | 
268 |     if axes_object is None:
269 |         _, axes_object = pyplot.subplots(
270 |             1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
271 |         )
272 | 
273 |     success_ratio_matrix, pod_matrix = _get_sr_pod_grid()
274 |     csi_matrix = _csi_from_sr_and_pod(success_ratio_matrix, pod_matrix)
275 |     frequency_bias_matrix = _bias_from_sr_and_pod(
276 |         success_ratio_matrix, pod_matrix)
277 | 
278 |     this_colour_map_object, this_colour_norm_object = _get_csi_colour_scheme()
279 | 
280 |     pyplot.contourf(
281 |         success_ratio_matrix, pod_matrix, csi_matrix, LEVELS_FOR_CSI_CONTOURS,
282 |         cmap=this_colour_map_object, norm=this_colour_norm_object, vmin=0.,
283 |         vmax=1., axes=axes_object)
284 | 
285 |     colour_bar_object = _add_colour_bar(
286 |         axes_object=axes_object, colour_map_object=this_colour_map_object,
287 |         colour_norm_object=this_colour_norm_object,
288 |         values_to_colour=csi_matrix, min_colour_value=0.,
289 |         max_colour_value=1., orientation_string='vertical',
290 |         extend_min=False, extend_max=False)
291 |     colour_bar_object.set_label('CSI (critical success index)')
292 | 
293 |     bias_colour_tuple = ()
294 |     for _ in range(len(LEVELS_FOR_BIAS_CONTOURS)):
295 |         bias_colour_tuple += (bias_line_colour,)
296 | 
297 |     bias_contour_object = pyplot.contour(
298 |         success_ratio_matrix, pod_matrix, frequency_bias_matrix,
299 |         LEVELS_FOR_BIAS_CONTOURS, colors=bias_colour_tuple,
300 |         linewidths=bias_line_width, linestyles='dashed', axes=axes_object)
301 |     pyplot.clabel(
302 |         bias_contour_object, inline=True, inline_spacing=BIAS_LABEL_PADDING_PX,
303 |         fmt=BIAS_STRING_FORMAT, fontsize=FONT_SIZE)
304 | 
305 |     nan_flags = numpy.logical_or(
306 |         numpy.isnan(success_ratio_by_threshold), numpy.isnan(pod_by_threshold)
307 |     )
308 | 
309 |     if not numpy.all(nan_flags):
310 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
311 |         axes_object.plot(
312 |             success_ratio_by_threshold[real_indices],
313 |             pod_by_threshold[real_indices], color=line_colour,
314 |             linestyle='solid', linewidth=line_width)
315 | 
316 |     axes_object.set_xlabel('Success ratio (1 - FAR)')
317 |     axes_object.set_ylabel('POD (probability of detection)')
318 |     axes_object.set_xlim(0., 1.)
319 |     axes_object.set_ylim(0., 1.)
320 | 
321 |     return pod_by_threshold, success_ratio_by_threshold
322 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/roc_curves.py:
--------------------------------------------------------------------------------
  1 | """Methods for plotting ROC (receiver operating characteristic) curve."""
  2 | 
  3 | import numpy
  4 | import matplotlib.colors
  5 | import matplotlib.pyplot as pyplot
  6 | import performance_diagrams
  7 | 
  8 | DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
  9 | DEFAULT_LINE_WIDTH = 3
 10 | DEFAULT_RANDOM_LINE_COLOUR = numpy.full(3, 152. / 255)
 11 | DEFAULT_RANDOM_LINE_WIDTH = 2
 12 | 
 13 | LEVELS_FOR_PEIRCE_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
 14 | 
 15 | FIGURE_WIDTH_INCHES = 10
 16 | FIGURE_HEIGHT_INCHES = 10
 17 | 
 18 | FONT_SIZE = 20
 19 | pyplot.rc('font', size=FONT_SIZE)
 20 | pyplot.rc('axes', titlesize=FONT_SIZE)
 21 | pyplot.rc('axes', labelsize=FONT_SIZE)
 22 | pyplot.rc('xtick', labelsize=FONT_SIZE)
 23 | pyplot.rc('ytick', labelsize=FONT_SIZE)
 24 | pyplot.rc('legend', fontsize=FONT_SIZE)
 25 | pyplot.rc('figure', titlesize=FONT_SIZE)
 26 | 
 27 | 
 28 | def _get_pofd_pod_grid(pofd_spacing=0.01, pod_spacing=0.01):
 29 |     """Creates grid in POFD-POD space.
 30 | 
 31 |     M = number of rows (unique POD values) in grid
 32 |     N = number of columns (unique POFD values) in grid
 33 | 
 34 |     :param pofd_spacing: Spacing between grid cells in adjacent columns.
 35 |     :param pod_spacing: Spacing between grid cells in adjacent rows.
 36 |     :return: pofd_matrix: M-by-N numpy array of POFD values.
 37 |     :return: pod_matrix: M-by-N numpy array of POD values.
 38 |     """
 39 | 
 40 |     num_pofd_values = 1 + int(numpy.ceil(1. / pofd_spacing))
 41 |     num_pod_values = 1 + int(numpy.ceil(1. / pod_spacing))
 42 | 
 43 |     unique_pofd_values = numpy.linspace(0., 1., num=num_pofd_values)
 44 |     unique_pod_values = numpy.linspace(0., 1., num=num_pod_values)[::-1]
 45 |     return numpy.meshgrid(unique_pofd_values, unique_pod_values)
 46 | 
 47 | 
 48 | def _get_peirce_colour_scheme():
 49 |     """Returns colour scheme for Peirce score.
 50 | 
 51 |     :return: colour_map_object: Colour scheme (instance of
 52 |         `matplotlib.colors.ListedColormap`).
 53 |     :return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
 54 |         defining the scale of the colour map.
 55 |     """
 56 | 
 57 |     this_colour_map_object = pyplot.cm.Blues
 58 |     this_colour_norm_object = matplotlib.colors.BoundaryNorm(
 59 |         LEVELS_FOR_PEIRCE_CONTOURS, this_colour_map_object.N)
 60 | 
 61 |     rgba_matrix = this_colour_map_object(this_colour_norm_object(
 62 |         LEVELS_FOR_PEIRCE_CONTOURS
 63 |     ))
 64 | 
 65 |     colour_list = [
 66 |         rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])
 67 |     ]
 68 | 
 69 |     colour_map_object = matplotlib.colors.ListedColormap(colour_list)
 70 |     colour_map_object.set_under(numpy.array([1, 1, 1]))
 71 |     colour_norm_object = matplotlib.colors.BoundaryNorm(
 72 |         LEVELS_FOR_PEIRCE_CONTOURS, colour_map_object.N)
 73 | 
 74 |     return colour_map_object, colour_norm_object
 75 | 
 76 | 
 77 | def get_points_in_roc_curve(observed_labels, forecast_probabilities):
 78 |     """Creates points for ROC curve.
 79 | 
 80 |     E = number of examples
 81 |     T = number of binarization thresholds
 82 | 
 83 |     :param observed_labels: length-E numpy array of class labels (integers in
 84 |         0...1).
 85 |     :param forecast_probabilities: length-E numpy array with forecast
 86 |         probabilities of label = 1.
 87 |     :return: pofd_by_threshold: length-T numpy array of POFD (probability of
 88 |         false detection) values.
 89 |     :return: pod_by_threshold: length-T numpy array of POD (probability of
 90 |         detection) values.
 91 |     """
 92 | 
 93 |     assert numpy.all(numpy.logical_or(
 94 |         observed_labels == 0, observed_labels == 1
 95 |     ))
 96 | 
 97 |     assert numpy.all(numpy.logical_and(
 98 |         forecast_probabilities >= 0, forecast_probabilities <= 1
 99 |     ))
100 | 
101 |     observed_labels = observed_labels.astype(int)
102 |     binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
103 | 
104 |     num_thresholds = len(binarization_thresholds)
105 |     pofd_by_threshold = numpy.full(num_thresholds, numpy.nan)
106 |     pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
107 | 
108 |     for k in range(num_thresholds):
109 |         these_forecast_labels = (
110 |             forecast_probabilities >= binarization_thresholds[k]
111 |         ).astype(int)
112 | 
113 |         this_num_hits = numpy.sum(numpy.logical_and(
114 |             these_forecast_labels == 1, observed_labels == 1
115 |         ))
116 | 
117 |         this_num_false_alarms = numpy.sum(numpy.logical_and(
118 |             these_forecast_labels == 1, observed_labels == 0
119 |         ))
120 | 
121 |         this_num_misses = numpy.sum(numpy.logical_and(
122 |             these_forecast_labels == 0, observed_labels == 1
123 |         ))
124 | 
125 |         this_num_correct_nulls = numpy.sum(numpy.logical_and(
126 |             these_forecast_labels == 0, observed_labels == 0
127 |         ))
128 | 
129 |         try:
130 |             pofd_by_threshold[k] = (
131 |                 float(this_num_false_alarms) /
132 |                 (this_num_false_alarms + this_num_correct_nulls)
133 |             )
134 |         except ZeroDivisionError:
135 |             pass
136 | 
137 |         try:
138 |             pod_by_threshold[k] = (
139 |                 float(this_num_hits) / (this_num_hits + this_num_misses)
140 |             )
141 |         except ZeroDivisionError:
142 |             pass
143 | 
144 |     pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
145 |     pofd_by_threshold = numpy.array([1.] + pofd_by_threshold.tolist() + [0.])
146 | 
147 |     return pofd_by_threshold, pod_by_threshold
148 | 
149 | 
150 | def plot_roc_curve(
151 |         observed_labels, forecast_probabilities,
152 |         line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
153 |         random_line_colour=DEFAULT_RANDOM_LINE_COLOUR,
154 |         random_line_width=DEFAULT_RANDOM_LINE_WIDTH, axes_object=None):
155 |     """Plots ROC curve.
156 | 
157 |     E = number of examples
158 | 
159 |     :param observed_labels: length-E numpy array of class labels (integers in
160 |         0...1).
161 |     :param forecast_probabilities: length-E numpy array with forecast
162 |         probabilities of label = 1.
163 |     :param line_colour: Colour (in any format accepted by `matplotlib.colors`).
164 |     :param line_width: Line width (real positive number).
165 |     :param random_line_colour: Colour of reference line (ROC curve for random
166 |         predictor).
167 |     :param random_line_width: Width of reference line (ROC curve for random
168 |         predictor).
169 |     :param axes_object: Will plot on these axes (instance of
170 |         `matplotlib.axes._subplots.AxesSubplot`).  If `axes_object is None`,
171 |         will create new axes.
172 |     :return: pofd_by_threshold: See doc for `get_points_in_roc_curve`.
173 |     :return: pod_by_threshold: Same.
174 |     """
175 | 
176 |     pofd_by_threshold, pod_by_threshold = get_points_in_roc_curve(
177 |         observed_labels=observed_labels,
178 |         forecast_probabilities=forecast_probabilities)
179 | 
180 |     if axes_object is None:
181 |         _, axes_object = pyplot.subplots(
182 |             1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
183 |         )
184 | 
185 |     pofd_matrix, pod_matrix = _get_pofd_pod_grid()
186 |     peirce_score_matrix = pod_matrix - pofd_matrix
187 | 
188 |     colour_map_object, colour_norm_object = _get_peirce_colour_scheme()
189 | 
190 |     pyplot.contourf(
191 |         pofd_matrix, pod_matrix, peirce_score_matrix,
192 |         LEVELS_FOR_PEIRCE_CONTOURS, cmap=colour_map_object,
193 |         norm=colour_norm_object, vmin=0., vmax=1., axes=axes_object)
194 | 
195 |     # TODO(thunderhoser): Calling private method is a HACK.
196 |     colour_bar_object = performance_diagrams._add_colour_bar(
197 |         axes_object=axes_object, colour_map_object=colour_map_object,
198 |         colour_norm_object=colour_norm_object,
199 |         values_to_colour=peirce_score_matrix, min_colour_value=0.,
200 |         max_colour_value=1., orientation_string='vertical',
201 |         extend_min=False, extend_max=False)
202 | 
203 |     print(colour_bar_object)
204 |     colour_bar_object.set_label('Peirce score')
205 | 
206 |     random_x_coords = numpy.array([0., 1.])
207 |     random_y_coords = numpy.array([0., 1.])
208 |     axes_object.plot(
209 |         random_x_coords, random_y_coords, color=random_line_colour,
210 |         linestyle='dashed', linewidth=random_line_width)
211 | 
212 |     nan_flags = numpy.logical_or(
213 |         numpy.isnan(pofd_by_threshold), numpy.isnan(pod_by_threshold)
214 |     )
215 | 
216 |     if not numpy.all(nan_flags):
217 |         real_indices = numpy.where(numpy.invert(nan_flags))[0]
218 |         axes_object.plot(
219 |             pofd_by_threshold[real_indices], pod_by_threshold[real_indices],
220 |             color=line_colour, linestyle='solid', linewidth=line_width)
221 | 
222 |     axes_object.set_xlabel('POFD (probability of false detection)')
223 |     axes_object.set_ylabel('POD (probability of detection)')
224 |     axes_object.set_xlim(0., 1.)
225 |     axes_object.set_ylim(0., 1.)
226 | 
227 |     return pofd_by_threshold, pod_by_threshold
228 | 


--------------------------------------------------------------------------------
/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/tree_schematic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alburke/ams-2020-ml-python-course/997f93303f01956685c49ecdfc010ebb8dfc9b4e/Introduction_To_Machine_Learning/Supervised_Learning_Algorithims/tree_schematic.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ams-2020-ml-python-course
 2 | 
 3 | Machine Learning in Python for Environmental Science Problems AMS 2020 Short Course 
 4 | 
 5 | ## Authors
 6 | * Amanda Burke, University of Oklahoma (aburke1@ou.edu)
 7 | * Benjamin Toms, Colorado State University (benatoms@rams.colostate.edu)
 8 | * Katherine Avery, University of Oklahoma (katherine.avery@ou.edu)
 9 | * Hamid Kamangir, Texas A&M Corpus Christi (hkamangir@islander.tamucc.edu)
10 | * Karthik Kashinath, Lawrence Berkeley National Laboratory (kkashinath@lbl.gov)
11 | * Ryan Lagerquist, University of Oklahoma (ryan.lagerquist@ou.edu)
12 | 
13 | ## Modules
14 | ### Introduction to Machine Learning
15 | 1. Introduction to Machine Learning and AI
16 | 2. Data Science Fundamentals
17 | 3. Supervised Learning Algorithms
18 | 4. Introduction to Deep Learning
19 | 
20 | ### Advanced Topics in Machine Learning
21 | 1. Unsupervised Learning Overview
22 | 2. Machine Learning Model Interpretation
23 | 
24 | ## Requirements
25 | The modules for this short course require Python 3.6 and the following Python libraries:
26 | * numpy
27 | * scipy
28 | * matplotlib
29 | * xarray
30 | * netcdf4
31 | * pandas 
32 | * scikit-learn
33 | * tensorflow-gpu or tensorflow
34 | * keras
35 | * jupyter
36 | * ipython
37 | * jupyterlab
38 | * ipywidgets
39 | 
40 | ## Data Access
41 | The data for the course are stored online. The `download_data.py` script will download the data to the appropriate location and extract all files. The netCDF data is contained in a 2GB tar file, so make sure you have at least 4GB of storage available and a fast internet connection.
42 | 
43 | ## Course Website
44 | To run the notebooks on the cloud rather than a local installation, see the short course website
45 | [Machine Learning in Python for Environmental Science](https://sites.google.com/rams.colostate.edu/ams-ml4es/agenda-and-code). 
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/download_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Developed by David John Gagne II 
 3 | AMS 2019 Short Course
 4 | """
 5 | 
 6 | from urllib.request import urlretrieve
 7 | import os
 8 | from os.path import exists, join
 9 | import tarfile
10 | 
11 | if not exists("data"):
12 |         os.mkdir("data")
13 |         csv_tar_file = "https://storage.googleapis.com/track_data_ncar_ams_3km_csv_small/track_data_ncar_ams_3km_csv_small.tar.gz"
14 |         nc_tar_file = "https://storage.googleapis.com/track_data_ncar_ams_3km_nc_small/track_data_ncar_ams_3km_nc_small.tar.gz"
15 |         print("Get csv files")
16 |         urlretrieve(csv_tar_file, join("data", csv_tar_file.split("/")[-1]))
17 |         print("Get nc files")
18 |         urlretrieve(nc_tar_file, join("data", nc_tar_file.split("/")[-1]))
19 |         print("Extract csv tar file")
20 |         csv_tar = tarfile.open(join("data", csv_tar_file.split("/")[-1]))
21 |         csv_tar.extractall("data/")
22 |         csv_tar.close()
23 |         print("Extract nc tar file")
24 |         nc_tar = tarfile.open(join("data", nc_tar_file.split("/")[-1]))
25 |         nc_tar.extractall("data/")
26 |         nc_tar.close()
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup file for ams-2020-ml-python-course."""
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | PACKAGE_NAMES = ['interpretation', 'evaluation']
 6 | 
 7 | KEYWORDS = [
 8 |     'machine learning', 'deep learning', 'artificial intelligence',
 9 |     'data mining', 'weather', 'meteorology', 'atmospheric science',
10 |     'thunderstorm', 'tornado'
11 | ]
12 | 
13 | SHORT_DESCRIPTION = (
14 |     'Python library for machine-learning short course at AMS 2020.'
15 | )
16 | 
17 | LONG_DESCRIPTION = (
18 |     'Python library for short course on machine learning at AMS (American'
19 |     'Meteorological Society) 2020 Annual Meeting.'
20 | )
21 | 
22 | CLASSIFIERS = [
23 |     'Development Status :: 2 - Pre-Alpha',
24 |     'Intended Audience :: Science/Research',
25 |     'License :: OSI Approved :: MIT License',
26 |     'Programming Language :: Python :: 2.7'
27 | ]
28 | 
29 | if __name__ == '__main__':
30 |     setup(
31 |         name='ams-2020-ml-python-course',
32 |         version='0.1',
33 |         description=SHORT_DESCRIPTION,
34 |         long_description=LONG_DESCRIPTION,
35 |         license='MIT',
36 |         author='Amanda Burke',
37 |         author_email='aburke1@ou.edu',
38 |         url='https://github.com/alburke/ams-2020-ml-python-course',
39 |         packages=PACKAGE_NAMES,
40 |         scripts=[],
41 |         keywords=KEYWORDS,
42 |         classifiers=CLASSIFIERS,
43 |         include_package_data=True,
44 |         zip_safe=False
45 |     )
46 | 


--------------------------------------------------------------------------------
/util/extract_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Developed by Amanda Burke
  3 | Based off methods by Sheri Mickelson, AMS 2019 
  4 | 
  5 | AMS 2020 Short Course 
  6 | """
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import xarray as xr
 11 | from glob import glob
 12 | 
 13 | # Input variables for the extract_csv_data() function
 14 | csv_input_variables = ['REFL_COM_mean', 'REFL_COM_max', 'REFL_COM_min', 'REFL_COM_std', 'REFL_COM_percentile_10', 
 15 | 'REFL_COM_percentile_25', 'REFL_COM_percentile_50', 'REFL_COM_percentile_75', 'REFL_COM_percentile_90', 
 16 | 'U10_mean', 'U10_max', 'U10_min', 'U10_std', 'U10_percentile_10', 'U10_percentile_25', 'U10_percentile_50', 
 17 | 'U10_percentile_75', 'U10_percentile_90', 'V10_mean', 'V10_max', 'V10_min', 'V10_std', 'V10_percentile_10', 
 18 | 'V10_percentile_25', 'V10_percentile_50', 'V10_percentile_75', 'V10_percentile_90', 'T2_mean', 'T2_max', 
 19 | 'T2_min', 'T2_std', 'T2_percentile_10', 'T2_percentile_25', 'T2_percentile_50', 'T2_percentile_75', 
 20 | 'T2_percentile_90', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length', 'orientation']
 21 | # Label variable for the extract_csv_data() function
 22 | csv_label_variable = ['RVORT1_MAX-future_max'] 
 23 | 
 24 | # Input variables for the extract_nc_data() function
 25 | nc_input_variables = ["REFL_COM_curr", "U10_curr", "V10_curr"]
 26 | # Label variable for the extract_nc_data() function
 27 | nc_label_variable = ["RVORT1_MAX_future"]
 28 | 
 29 | 
 30 | def extract_csv_data(input_data_path):
 31 |     """
 32 |     Extracts csv data from a given set of files. Returns datasets 
 33 |     containing the predictor and label variables. 
 34 |     
 35 |     Args:
 36 |     input_data_path (str): path to dataset directory
 37 | 
 38 |     returns: Predictor, label, and valid date data (# of datafiles,). 
 39 |              
 40 |     """
 41 |     # Find all csv files from given directory
 42 |     data_files = sorted(glob(input_data_path + "*.csv"))
 43 |     
 44 |     in_data = []
 45 |     out_data = []
 46 |     valid_times = []
 47 |      
 48 |     for files in data_files:
 49 |         # Read in csv data
 50 |         data = pd.read_csv(files)
 51 |         #Append the predictor and label variables
 52 |         in_data.append(data.loc[:,csv_input_variables].values)
 53 |         out_data.append(data.loc[:,csv_label_variable].values)
 54 |         #Append daily timestamps 
 55 |         valid_24_hour_date = data.loc[:,"Valid_Date"].values
 56 |         valid_times.append(pd.Timestamp(valid_24_hour_date[0][:10]))
 57 |     
 58 |     return in_data, out_data, valid_times
 59 | 
 60 | 
 61 | def extract_nc_data(input_data_path):
 62 |     """
 63 |     Extracts netcdf data from a given set of files. Returns datasets 
 64 |     containing the input variables and output variables. 
 65 |     
 66 |     Args:
 67 |     input_data_path (str): path to dataset directory
 68 | 
 69 |     returns: Predictor and label data (examples, 32, 32, number of variables), 
 70 |              valid dates (examples,). 
 71 |     """
 72 |     # Find all netcdf files from given directory
 73 |     data_files = sorted(glob(input_data_path + "*.nc"))
 74 |     
 75 |     in_data = []
 76 |     out_data = []
 77 |     valid_times = []
 78 |     
 79 |     for files in data_files:
 80 |         # Read in netcdf data
 81 |         data = xr.open_dataset(files)
 82 |         #Append the daily predictor and label variables 
 83 |         in_data.append(np.stack([data[v].values for v in nc_input_variables], axis=-1))
 84 |         out_data.append(np.stack([data[v].values for v in nc_label_variable], axis=-1))
 85 |         #Append daily timestamps 
 86 |         date = pd.Timestamp(files.split("/")[-1].split("_")[1])
 87 |         valid_times.append([date] * in_data[-1].shape[0])
 88 |         data.close()
 89 |     
 90 |     # Concatenate/stack data from lists of arrays to a single array
 91 |     all_in_data = np.vstack(in_data)
 92 |     all_out_data = np.vstack(out_data)
 93 |     all_valid_times = np.concatenate(valid_times)
 94 |     
 95 |     # Delete lists to save memory
 96 |     del in_data[:], out_data[:],valid_times[:]
 97 |     del in_data, out_data, valid_times
 98 |     
 99 |     return all_in_data, all_out_data, all_valid_times
100 | 


--------------------------------------------------------------------------------