├── .gitignore
├── LICENSE
├── README.md
├── data
└── Create_Higgs_CSV.ipynb
├── notebooks
├── Big_Data
│ └── Zeppelin
│ │ ├── 00_Whirlwind_Tour_of_Zeppelin.json
│ │ ├── 01_File_Based_Data_Sources.json
│ │ ├── 03_Spark_Fundamental_Structured_Operations.json
│ │ ├── Machine_Learning_Zeppelin.json
│ │ ├── Spark_Machine_Learning.json
│ │ ├── Spark_Tutorial.json
│ │ └── Supervised_Machine_Learning.json
├── Checksum
│ └── Longitudinal_Redundancy_Check.ipynb
├── Cryptography
│ └── 1_Caesar_Shift.ipynb
├── Hashing
│ ├── Birthday_Attack.ipynb
│ └── Intro_to_Hashing.ipynb
├── Information_Theory
│ ├── Hamming_Distance.ipynb
│ └── Stirlings_Approximation_Factorial.ipynb
├── Legacy_Code
│ └── Bernoulli_Naive_Bayes.ipynb
├── Machine_Learning
│ ├── Algorithms_From_Scratch
│ │ ├── Bagging_and_Bootstrapping.ipynb
│ │ ├── Bernoulli_Naive_Bayes.ipynb
│ │ ├── Cross-Validation.ipynb
│ │ ├── Gradient_Descent.ipynb
│ │ ├── K-means.ipynb
│ │ ├── KNN.ipynb
│ │ ├── PCA.ipynb
│ │ ├── Train_Test_Split.ipynb
│ │ └── Train_Validation_Test_Split.ipynb
│ ├── Fourier_Transforms
│ │ └── FFT.ipynb
│ ├── Supervised_Learning
│ │ ├── Advanced_Techniques
│ │ │ └── Bagging
│ │ │ │ └── Bagging_Simulation.ipynb
│ │ ├── Classification
│ │ │ ├── EDA
│ │ │ │ └── Classification_EDA.ipynb
│ │ │ ├── Logistic_Regression
│ │ │ │ ├── Logistic_Regression_Sklearn_Example.ipynb
│ │ │ │ └── Sigmoid_Function.ipynb
│ │ │ └── Support_Vector_Machines
│ │ │ │ └── Basics_of_SVMs.ipynb
│ │ ├── Model_Selection
│ │ │ ├── Feature_Selection.ipynb
│ │ │ ├── Model_Tuning_and_Cross_Validation.ipynb
│ │ │ └── Train_Test_Split.ipynb
│ │ ├── Numerical_Methods
│ │ │ └── Gradient_Descent
│ │ │ │ └── Linear_Regression_&_Intro_to_GD.ipynb
│ │ └── Regression
│ │ │ └── Linear_Regression
│ │ │ ├── 1_Linear_Regression_101.ipynb
│ │ │ ├── 2_Linear_Regression_Metrics.ipynb
│ │ │ ├── 3_Linear_Regression_Assumptions_and_Evaluation.ipynb
│ │ │ └── 4_Linear_Regression_EDA_and_Residual_Plots.ipynb
│ └── Unsupervised_Learning
│ │ ├── Clustering
│ │ └── Kmeans
│ │ │ └── Kmeans_Clustering_w_Sklearn.ipynb
│ │ └── SVD
│ │ ├── SVD_to_image.ipynb
│ │ └── SVD_with_Iris_and_Images.ipynb
├── OS_library
│ └── OS_Sandbox.ipynb
├── PyTorch
│ └── PyTorch_Intro.ipynb
├── Python
│ ├── Coding_Best_Practices
│ │ ├── Complexity_&_Big_O.ipynb
│ │ ├── Complexity_Practice.ipynb
│ │ └── OOP_How_to_Write_a_Class.ipynb
│ ├── Demos
│ │ ├── ML_101.ipynb
│ │ ├── Python_101_Instructor.ipynb
│ │ └── Python_101_Student.ipynb
│ ├── NumPy
│ │ ├── NP_argwhere_isin.ipynb
│ │ └── Numpy_concat_append_ravel_mgrid.ipynb
│ ├── Programming_Problems
│ │ ├── Alphabet_Magnets.ipynb
│ │ ├── Bubble_Sort.ipynb
│ │ ├── Cryptogram_Puzzle.ipynb
│ │ ├── Dot_Product.ipynb
│ │ ├── Factorial.ipynb
│ │ ├── Fibonacci.ipynb
│ │ ├── GCD_Problem.ipynb
│ │ ├── Guess_a_Number.ipynb
│ │ ├── Moving_Average_Problem.ipynb
│ │ ├── OLS_R2_and_adjR2.ipynb
│ │ ├── Project_Euler_Problem_8_with_Deque.ipynb
│ │ ├── Reverse_String.ipynb
│ │ ├── Rotation_Problem.ipynb
│ │ └── SSE.ipynb
│ ├── Python_Internals
│ │ ├── Arrays_vs_Lists.ipynb
│ │ ├── Deep_vs_Shallow_Copying.ipynb
│ │ ├── Pickling.ipynb
│ │ ├── Python_Data_Structure_Comparison.ipynb
│ │ ├── Working_with_Bits.ipynb
│ │ └── listexp_vs_genexp.ipynb
│ ├── Recursion
│ │ └── Factorial_&_Fibonacci.ipynb
│ └── Visualizations
│ │ ├── Boxplots.ipynb
│ │ ├── Matplotlib_Tutorial.ipynb
│ │ └── Seaborn_Visualizations_&_Data.ipynb
├── Random_Number_Generators
│ ├── 1_PRNG_Middle_Square_Method.ipynb
│ ├── 2_PRNG_Linear_Congruential_Generator.ipynb
│ ├── 3_PRNG_Linear_Feedback_Shift_Register.ipynb
│ └── 4_PRNG_Dev_Random_Directory.ipynb
└── Statistics
│ └── Central_Limit_Theorem.ipynb
└── pkl_files
├── subset_df.pkl
└── subset_df2.pkl
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 David Ziganto
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data_Science_Fundamentals
2 |
3 | **Objective:** to introduce Data Science concepts from both a theoretical and practical standpoint, all in Python.
4 |
5 | **Audience:** ranges from beginner to intermediate.
6 |
7 | **Note:** this is a living repository that will be updated quite frequently.
8 |
--------------------------------------------------------------------------------
/data/Create_Higgs_CSV.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "file_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz'\n",
19 | "columns = ['target',\n",
20 | " ' lepton_pT', \n",
21 | " 'lepton_eta', \n",
22 | " 'lepton_phi', \n",
23 | " 'missing_energy_magnitude', \n",
24 | " 'missing_energy_phi', \n",
25 | " 'jet_1_pt', \n",
26 | " 'jet_1_eta', \n",
27 | " 'jet_1_phi', \n",
28 | " 'jet_1_b-tag', \n",
29 | " 'jet_2_pt', \n",
30 | " 'jet_2_eta', \n",
31 | " 'jet_2_phi', \n",
32 | " 'jet_2_b-tag', \n",
33 | " 'jet_3_pt', \n",
34 | " 'jet_3_eta', \n",
35 | " 'jet_3_phi', \n",
36 | " 'jet_3_b-tag', \n",
37 | " 'jet_4_pt', \n",
38 | " 'jet_4_eta', \n",
39 | " 'jet_4_phi', \n",
40 | " 'jet_4_b-tag', \n",
41 | " 'm_jj', \n",
42 | " 'm_jjj', \n",
43 | " 'm_lv', \n",
44 | " 'm_jlv', \n",
45 | " 'm_bb', \n",
46 | " 'm_wbb', \n",
47 | " 'm_wwbb']"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "df = pd.read_csv(file_path, names=columns)"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 4,
62 | "metadata": {},
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/html": [
67 | "
\n",
68 | "\n",
81 | "
\n",
82 | " \n",
83 | " \n",
84 | " | \n",
85 | " target | \n",
86 | " lepton_pT | \n",
87 | " lepton_eta | \n",
88 | " lepton_phi | \n",
89 | " missing_energy_magnitude | \n",
90 | " missing_energy_phi | \n",
91 | " jet_1_pt | \n",
92 | " jet_1_eta | \n",
93 | " jet_1_phi | \n",
94 | " jet_1_b-tag | \n",
95 | " ... | \n",
96 | " jet_4_eta | \n",
97 | " jet_4_phi | \n",
98 | " jet_4_b-tag | \n",
99 | " m_jj | \n",
100 | " m_jjj | \n",
101 | " m_lv | \n",
102 | " m_jlv | \n",
103 | " m_bb | \n",
104 | " m_wbb | \n",
105 | " m_wwbb | \n",
106 | "
\n",
107 | " \n",
108 | " \n",
109 | " \n",
110 | " 0 | \n",
111 | " 1.0 | \n",
112 | " 0.869293 | \n",
113 | " -0.635082 | \n",
114 | " 0.225690 | \n",
115 | " 0.327470 | \n",
116 | " -0.689993 | \n",
117 | " 0.754202 | \n",
118 | " -0.248573 | \n",
119 | " -1.092064 | \n",
120 | " 0.000000 | \n",
121 | " ... | \n",
122 | " -0.010455 | \n",
123 | " -0.045767 | \n",
124 | " 3.101961 | \n",
125 | " 1.353760 | \n",
126 | " 0.979563 | \n",
127 | " 0.978076 | \n",
128 | " 0.920005 | \n",
129 | " 0.721657 | \n",
130 | " 0.988751 | \n",
131 | " 0.876678 | \n",
132 | "
\n",
133 | " \n",
134 | " 1 | \n",
135 | " 1.0 | \n",
136 | " 0.907542 | \n",
137 | " 0.329147 | \n",
138 | " 0.359412 | \n",
139 | " 1.497970 | \n",
140 | " -0.313010 | \n",
141 | " 1.095531 | \n",
142 | " -0.557525 | \n",
143 | " -1.588230 | \n",
144 | " 2.173076 | \n",
145 | " ... | \n",
146 | " -1.138930 | \n",
147 | " -0.000819 | \n",
148 | " 0.000000 | \n",
149 | " 0.302220 | \n",
150 | " 0.833048 | \n",
151 | " 0.985700 | \n",
152 | " 0.978098 | \n",
153 | " 0.779732 | \n",
154 | " 0.992356 | \n",
155 | " 0.798343 | \n",
156 | "
\n",
157 | " \n",
158 | " 2 | \n",
159 | " 1.0 | \n",
160 | " 0.798835 | \n",
161 | " 1.470639 | \n",
162 | " -1.635975 | \n",
163 | " 0.453773 | \n",
164 | " 0.425629 | \n",
165 | " 1.104875 | \n",
166 | " 1.282322 | \n",
167 | " 1.381664 | \n",
168 | " 0.000000 | \n",
169 | " ... | \n",
170 | " 1.128848 | \n",
171 | " 0.900461 | \n",
172 | " 0.000000 | \n",
173 | " 0.909753 | \n",
174 | " 1.108330 | \n",
175 | " 0.985692 | \n",
176 | " 0.951331 | \n",
177 | " 0.803252 | \n",
178 | " 0.865924 | \n",
179 | " 0.780118 | \n",
180 | "
\n",
181 | " \n",
182 | " 3 | \n",
183 | " 0.0 | \n",
184 | " 1.344385 | \n",
185 | " -0.876626 | \n",
186 | " 0.935913 | \n",
187 | " 1.992050 | \n",
188 | " 0.882454 | \n",
189 | " 1.786066 | \n",
190 | " -1.646778 | \n",
191 | " -0.942383 | \n",
192 | " 0.000000 | \n",
193 | " ... | \n",
194 | " -0.678379 | \n",
195 | " -1.360356 | \n",
196 | " 0.000000 | \n",
197 | " 0.946652 | \n",
198 | " 1.028704 | \n",
199 | " 0.998656 | \n",
200 | " 0.728281 | \n",
201 | " 0.869200 | \n",
202 | " 1.026736 | \n",
203 | " 0.957904 | \n",
204 | "
\n",
205 | " \n",
206 | " 4 | \n",
207 | " 1.0 | \n",
208 | " 1.105009 | \n",
209 | " 0.321356 | \n",
210 | " 1.522401 | \n",
211 | " 0.882808 | \n",
212 | " -1.205349 | \n",
213 | " 0.681466 | \n",
214 | " -1.070464 | \n",
215 | " -0.921871 | \n",
216 | " 0.000000 | \n",
217 | " ... | \n",
218 | " -0.373566 | \n",
219 | " 0.113041 | \n",
220 | " 0.000000 | \n",
221 | " 0.755856 | \n",
222 | " 1.361057 | \n",
223 | " 0.986610 | \n",
224 | " 0.838085 | \n",
225 | " 1.133295 | \n",
226 | " 0.872245 | \n",
227 | " 0.808487 | \n",
228 | "
\n",
229 | " \n",
230 | "
\n",
231 | "
5 rows × 29 columns
\n",
232 | "
"
233 | ],
234 | "text/plain": [
235 | " target lepton_pT lepton_eta lepton_phi missing_energy_magnitude \\\n",
236 | "0 1.0 0.869293 -0.635082 0.225690 0.327470 \n",
237 | "1 1.0 0.907542 0.329147 0.359412 1.497970 \n",
238 | "2 1.0 0.798835 1.470639 -1.635975 0.453773 \n",
239 | "3 0.0 1.344385 -0.876626 0.935913 1.992050 \n",
240 | "4 1.0 1.105009 0.321356 1.522401 0.882808 \n",
241 | "\n",
242 | " missing_energy_phi jet_1_pt jet_1_eta jet_1_phi jet_1_b-tag ... \\\n",
243 | "0 -0.689993 0.754202 -0.248573 -1.092064 0.000000 ... \n",
244 | "1 -0.313010 1.095531 -0.557525 -1.588230 2.173076 ... \n",
245 | "2 0.425629 1.104875 1.282322 1.381664 0.000000 ... \n",
246 | "3 0.882454 1.786066 -1.646778 -0.942383 0.000000 ... \n",
247 | "4 -1.205349 0.681466 -1.070464 -0.921871 0.000000 ... \n",
248 | "\n",
249 | " jet_4_eta jet_4_phi jet_4_b-tag m_jj m_jjj m_lv m_jlv \\\n",
250 | "0 -0.010455 -0.045767 3.101961 1.353760 0.979563 0.978076 0.920005 \n",
251 | "1 -1.138930 -0.000819 0.000000 0.302220 0.833048 0.985700 0.978098 \n",
252 | "2 1.128848 0.900461 0.000000 0.909753 1.108330 0.985692 0.951331 \n",
253 | "3 -0.678379 -1.360356 0.000000 0.946652 1.028704 0.998656 0.728281 \n",
254 | "4 -0.373566 0.113041 0.000000 0.755856 1.361057 0.986610 0.838085 \n",
255 | "\n",
256 | " m_bb m_wbb m_wwbb \n",
257 | "0 0.721657 0.988751 0.876678 \n",
258 | "1 0.779732 0.992356 0.798343 \n",
259 | "2 0.803252 0.865924 0.780118 \n",
260 | "3 0.869200 1.026736 0.957904 \n",
261 | "4 1.133295 0.872245 0.808487 \n",
262 | "\n",
263 | "[5 rows x 29 columns]"
264 | ]
265 | },
266 | "execution_count": 4,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "df.head()"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 5,
278 | "metadata": {},
279 | "outputs": [
280 | {
281 | "name": "stdout",
282 | "output_type": "stream",
283 | "text": [
284 | "\n",
285 | "RangeIndex: 11000000 entries, 0 to 10999999\n",
286 | "Data columns (total 29 columns):\n",
287 | "target float64\n",
288 | " lepton_pT float64\n",
289 | "lepton_eta float64\n",
290 | "lepton_phi float64\n",
291 | "missing_energy_magnitude float64\n",
292 | "missing_energy_phi float64\n",
293 | "jet_1_pt float64\n",
294 | "jet_1_eta float64\n",
295 | "jet_1_phi float64\n",
296 | "jet_1_b-tag float64\n",
297 | "jet_2_pt float64\n",
298 | "jet_2_eta float64\n",
299 | "jet_2_phi float64\n",
300 | "jet_2_b-tag float64\n",
301 | "jet_3_pt float64\n",
302 | "jet_3_eta float64\n",
303 | "jet_3_phi float64\n",
304 | "jet_3_b-tag float64\n",
305 | "jet_4_pt float64\n",
306 | "jet_4_eta float64\n",
307 | "jet_4_phi float64\n",
308 | "jet_4_b-tag float64\n",
309 | "m_jj float64\n",
310 | "m_jjj float64\n",
311 | "m_lv float64\n",
312 | "m_jlv float64\n",
313 | "m_bb float64\n",
314 | "m_wbb float64\n",
315 | "m_wwbb float64\n",
316 | "dtypes: float64(29)\n",
317 | "memory usage: 2.4 GB\n"
318 | ]
319 | }
320 | ],
321 | "source": [
322 | "df.info()"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 11,
328 | "metadata": {},
329 | "outputs": [],
330 | "source": [
331 | "save_path = '/Users/davidziganto/Repositories/Data_Science_Fundamentals/data/'\n",
332 | "file_name = 'higgs.csv'"
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 12,
338 | "metadata": {},
339 | "outputs": [],
340 | "source": [
341 | "df.to_csv(save_path + file_name)"
342 | ]
343 | }
344 | ],
345 | "metadata": {
346 | "kernelspec": {
347 | "display_name": "Python [conda root]",
348 | "language": "python",
349 | "name": "conda-root-py"
350 | },
351 | "language_info": {
352 | "codemirror_mode": {
353 | "name": "ipython",
354 | "version": 3
355 | },
356 | "file_extension": ".py",
357 | "mimetype": "text/x-python",
358 | "name": "python",
359 | "nbconvert_exporter": "python",
360 | "pygments_lexer": "ipython3",
361 | "version": "3.5.4"
362 | }
363 | },
364 | "nbformat": 4,
365 | "nbformat_minor": 2
366 | }
367 |
--------------------------------------------------------------------------------
/notebooks/Big_Data/Zeppelin/Machine_Learning_Zeppelin.json:
--------------------------------------------------------------------------------
1 | {"paragraphs":[{"title":"Introduction","text":"%md\nThis notebook is a simple tutorial on how to use numpy, pandas, and Spark's machine learning library to do fun stuff.","user":"admin","dateUpdated":"2017-11-09T14:04:30-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"markdown","editOnDblClick":true},"editorMode":"ace/mode/markdown","title":true,"editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255925569_1694489231","id":"20171109-133205_1077344504","dateCreated":"2017-11-09T13:32:05-0600","dateStarted":"2017-11-09T14:04:30-0600","dateFinished":"2017-11-09T14:04:32-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1692"},{"title":"Imports","text":"%pyspark\n\nimport pandas as pd\nimport numpy as np","user":"admin","dateUpdated":"2017-11-09T14:04:04-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255243163_715367343","id":"20171109-132043_872043916","dateCreated":"2017-11-09T13:20:43-0600","dateStarted":"2017-11-09T14:04:03-0600","dateFinished":"2017-11-09T14:04:03-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1693"},{"text":"%pyspark\n\ndf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', delimiter=';')\ndf.head()","user":"admin","dateUpdated":"2017-11-09T13:25:33-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255257120_2050091925","id":"20171109-132057_845151643","dateCreated":"2017-11-09T13:20:57-0600","dateStarted":"2017-11-09T13:25:33-0600","dateFinished":"2017-11-09T13:25:34-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1694"},{"title":"Rearrange Rows","text":"%pyspark\ndf = df[['quality', 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']]","user":"admin","dateUpdated":"2017-11-09T14:53:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510260622609_-2020735401","id":"20171109-145022_1858055191","dateCreated":"2017-11-09T14:50:22-0600","dateStarted":"2017-11-09T14:53:15-0600","dateFinished":"2017-11-09T14:53:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1695"},{"text":"%pyspark\ntype(df)","user":"admin","dateUpdated":"2017-11-09T14:53:18-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258090492_534582379","id":"20171109-140810_1298767575","dateCreated":"2017-11-09T14:08:10-0600","dateStarted":"2017-11-09T14:53:18-0600","dateFinished":"2017-11-09T14:53:18-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1696"},{"title":"Create Spark DF","text":"%pyspark\nddf = spark.createDataFrame(df)","user":"admin","dateUpdated":"2017-11-09T15:20:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255281754_1175588324","id":"20171109-132121_1872338822","dateCreated":"2017-11-09T13:21:21-0600","dateStarted":"2017-11-09T15:20:53-0600","dateFinished":"2017-11-09T15:20:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1697"},{"text":"%pyspark\ntype(ddf)","user":"admin","dateUpdated":"2017-11-09T15:20:55-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258074988_2093954347","id":"20171109-140754_1396222594","dateCreated":"2017-11-09T14:07:54-0600","dateStarted":"2017-11-09T15:20:55-0600","dateFinished":"2017-11-09T15:20:55-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1698"},{"title":"Number of Rows","text":"%pyspark\nddf.count()","user":"admin","dateUpdated":"2017-11-09T15:20:56-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259453749_-200329059","id":"20171109-143053_1622356792","dateCreated":"2017-11-09T14:30:53-0600","dateStarted":"2017-11-09T15:20:56-0600","dateFinished":"2017-11-09T15:20:56-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1699"},{"title":"Quick Peek","text":"%pyspark\nddf.first()","user":"admin","dateUpdated":"2017-11-09T15:20:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255689961_508972245","id":"20171109-132809_629688564","dateCreated":"2017-11-09T13:28:09-0600","dateStarted":"2017-11-09T15:20:58-0600","dateFinished":"2017-11-09T15:20:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1700"},{"title":"Quick Peek 2","text":"%pyspark\nddf.take(5)","user":"admin","dateUpdated":"2017-11-09T15:21:00-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255663529_-466628372","id":"20171109-132743_239942099","dateCreated":"2017-11-09T13:27:43-0600","dateStarted":"2017-11-09T15:21:00-0600","dateFinished":"2017-11-09T15:21:00-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1701"},{"title":"Better Format","text":"%pyspark\nddf.show()","user":"admin","dateUpdated":"2017-11-09T15:21:02-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258522386_-981731649","id":"20171109-141522_1465708413","dateCreated":"2017-11-09T14:15:22-0600","dateStarted":"2017-11-09T15:21:02-0600","dateFinished":"2017-11-09T15:21:02-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1702"},{"text":"%pyspark\nddf.columns","user":"admin","dateUpdated":"2017-11-09T15:21:08-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258655977_689079582","id":"20171109-141735_398438825","dateCreated":"2017-11-09T14:17:35-0600","dateStarted":"2017-11-09T15:21:08-0600","dateFinished":"2017-11-09T15:21:08-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1703"},{"text":"%pyspark\ntrain_data, test_data = ddf.randomSplit([.8,.2],seed=1234)","user":"admin","dateUpdated":"2017-11-09T15:21:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258850889_-421449118","id":"20171109-142050_1844692829","dateCreated":"2017-11-09T14:20:50-0600","dateStarted":"2017-11-09T15:21:15-0600","dateFinished":"2017-11-09T15:21:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1704"},{"text":"%pyspark\nprint('train:', train_data.count())\nprint('test:', test_data.count())","user":"admin","dateUpdated":"2017-11-09T15:21:17-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259459848_159811236","id":"20171109-143059_355393844","dateCreated":"2017-11-09T14:30:59-0600","dateStarted":"2017-11-09T15:21:17-0600","dateFinished":"2017-11-09T15:21:17-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1705"},{"text":"%pyspark\nfrom pyspark.ml.feature import VectorAssembler\nfeature_assembler = VectorAssembler(\n inputCols=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol'],\n outputCol='features')","user":"admin","dateUpdated":"2017-11-09T15:29:54-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259638946_984372457","id":"20171109-143358_1110597847","dateCreated":"2017-11-09T14:33:58-0600","dateStarted":"2017-11-09T15:29:54-0600","dateFinished":"2017-11-09T15:29:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1706"},{"text":"%pyspark\ntrain.take(1)","user":"admin","dateUpdated":"2017-11-09T15:29:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262666092_1835562309","id":"20171109-152426_2092064614","dateCreated":"2017-11-09T15:24:26-0600","dateStarted":"2017-11-09T15:29:58-0600","dateFinished":"2017-11-09T15:29:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1707"},{"text":"%pyspark\nfrom pyspark.ml.classification import LogisticRegression\nlr = LogisticRegression(regParam=0.1, elasticNetParam=1.0, labelCol='quality', family=\"multinomial\")\nlr_model = lr.fit(train)","user":"admin","dateUpdated":"2017-11-09T15:35:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510261136989_1594497724","id":"20171109-145856_1022628746","dateCreated":"2017-11-09T14:58:56-0600","dateStarted":"2017-11-09T15:35:53-0600","dateFinished":"2017-11-09T15:35:59-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1708"},{"text":"%pyspark\nprint(\"Multinomial coefficients:\\n \" + str(lr_model.coefficientMatrix))\nprint(\"Multinomial intercepts:\\n \" + str(lr_model.interceptVector))\n","user":"admin","dateUpdated":"2017-11-09T15:36:40-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262678601_-2031148813","id":"20171109-152438_1562383421","dateCreated":"2017-11-09T15:24:38-0600","dateStarted":"2017-11-09T15:36:40-0600","dateFinished":"2017-11-09T15:36:40-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1709"},{"text":"%pyspark\n","user":"admin","dateUpdated":"2017-11-09T15:33:22-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510263202433_2105064972","id":"20171109-153322_151101125","dateCreated":"2017-11-09T15:33:22-0600","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1710"}],"name":"Machine Learning","id":"2CYTR3UEA","angularObjects":{"2CJRPMHPD:shared_process":[],"2CJSHBYC6:shared_process":[],"2CKSW7BWY:shared_process":[],"2CGWQ9B37:shared_process":[],"2CGR5K95Z:shared_process":[],"2CK2ZPKQ4:shared_process":[],"2CJJMSU71:shared_process":[],"2CGZPE6U6:shared_process":[],"2CG9T16R1:shared_process":[],"2CG86WNKX:shared_process":[],"2CKE7UE12:shared_process":[],"2CGJYANVS:shared_process":[],"2CJSRKFPY:shared_process":[],"2CHBR99H6:shared_process":[],"2CFTTN2E2:shared_process":[],"2CGADMUUE:shared_process":[],"2CKJKMCM2:shared_process":[],"2CKS133YC:shared_process":[],"2CHMME5HT:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}}
--------------------------------------------------------------------------------
/notebooks/Checksum/Longitudinal_Redundancy_Check.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Convert Int to Binary"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "# decimal form\n",
19 | "a = 1\n",
20 | "b = 2\n",
21 | "c = 10\n",
22 | "d = 100"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {
29 | "collapsed": true
30 | },
31 | "outputs": [],
32 | "source": [
33 | "def dec2bin(num):\n",
34 | " '''converts decimal number into binary representation'''\n",
35 | " return format(num, 'b')"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 3,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "0 0\n",
48 | "1 1\n",
49 | "2 10\n",
50 | "3 11\n",
51 | "4 100\n",
52 | "5 101\n",
53 | "6 110\n",
54 | "7 111\n",
55 | "8 1000\n",
56 | "9 1001\n",
57 | "10 1010\n",
58 | "11 1011\n",
59 | "12 1100\n",
60 | "13 1101\n",
61 | "14 1110\n",
62 | "15 1111\n",
63 | "16 10000\n",
64 | "17 10001\n",
65 | "18 10010\n",
66 | "19 10011\n"
67 | ]
68 | }
69 | ],
70 | "source": [
71 | "# binary form\n",
72 | "for i in range(20):\n",
73 | " print(i, dec2bin(i))"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 4,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "data": {
83 | "text/plain": [
84 | "'1'"
85 | ]
86 | },
87 | "execution_count": 4,
88 | "metadata": {},
89 | "output_type": "execute_result"
90 | }
91 | ],
92 | "source": [
93 | "dec2bin(a)"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 5,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "data": {
103 | "text/plain": [
104 | "'11'"
105 | ]
106 | },
107 | "execution_count": 5,
108 | "metadata": {},
109 | "output_type": "execute_result"
110 | }
111 | ],
112 | "source": [
113 | "dec2bin(3) "
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 6,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "text/plain": [
124 | "'1010'"
125 | ]
126 | },
127 | "execution_count": 6,
128 | "metadata": {},
129 | "output_type": "execute_result"
130 | }
131 | ],
132 | "source": [
133 | "dec2bin(10)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 7,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "data": {
143 | "text/plain": [
144 | "'101000'"
145 | ]
146 | },
147 | "execution_count": 7,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "# Return x with the bits shifted to the left by y places: x << y\n",
154 | "dec2bin(10 << 2)"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 8,
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "data": {
164 | "text/plain": [
165 | "'101'"
166 | ]
167 | },
168 | "execution_count": 8,
169 | "metadata": {},
170 | "output_type": "execute_result"
171 | }
172 | ],
173 | "source": [
174 | "# Return x with the bits shifted to the right by y places: x >> y\n",
175 | "dec2bin(10 >> 1)"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 9,
181 | "metadata": {},
182 | "outputs": [
183 | {
184 | "name": "stdout",
185 | "output_type": "stream",
186 | "text": [
187 | "b1: 1010\n",
188 | "b2: 1100\n",
189 | "\n",
190 | "b1&b2: 1000\n"
191 | ]
192 | }
193 | ],
194 | "source": [
195 | "# Bitwise \"and\"\n",
196 | "b1 = dec2bin(10)\n",
197 | "b2 = dec2bin(12)\n",
198 | "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n",
199 | "print('b1&b2:', dec2bin(10 & 12))"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 10,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "b1: 1010\n",
212 | "b2: 1100\n",
213 | "\n",
214 | "b1|b2: 1110\n"
215 | ]
216 | }
217 | ],
218 | "source": [
219 | "# Bitwise \"or\"\n",
220 | "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n",
221 | "print('b1|b2:', dec2bin(10 | 12))"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 11,
227 | "metadata": {},
228 | "outputs": [
229 | {
230 | "data": {
231 | "text/plain": [
232 | "'1010'"
233 | ]
234 | },
235 | "execution_count": 11,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | }
239 | ],
240 | "source": [
241 | "# Complement of x -- FIX\n",
242 | "dec2bin(10)"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 12,
248 | "metadata": {},
249 | "outputs": [
250 | {
251 | "data": {
252 | "text/plain": [
253 | "'1010'"
254 | ]
255 | },
256 | "execution_count": 12,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "dec2bin(10)"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 13,
268 | "metadata": {},
269 | "outputs": [
270 | {
271 | "name": "stdout",
272 | "output_type": "stream",
273 | "text": [
274 | "b1: 10010\n",
275 | "b2: 10100\n",
276 | "\n",
277 | "b1^b2: 110\n"
278 | ]
279 | }
280 | ],
281 | "source": [
282 | "# Bitwise xor\n",
283 | "print('b1: {}\\nb2: {}\\n'.format(dec2bin(18), dec2bin(20)))\n",
284 | "print('b1^b2:', dec2bin(18 ^ 20))"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "## Longitudinal Redundancy Check (LRC)"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 14,
297 | "metadata": {
298 | "collapsed": true
299 | },
300 | "outputs": [],
301 | "source": [
302 | "def lrc(block1, block2):\n",
303 | " '''LRC that returns the parity bit check for 8 bit block.\n",
304 | " \n",
305 | " Input: \n",
306 | " decimal (integer(s) w/option for bitwise operations)\n",
307 | " Output:\n",
308 | " binary representation where all zeros represents perfect parity match\n",
309 | " '''\n",
310 | " assert block1 < 255, \"block 1 must have int value less than 255.\"\n",
311 | " assert block2 < 255, \"block 2 must have int value less than 255.\"\n",
312 | " \n",
313 | " xor = format(block1 ^ block2, 'b')\n",
314 | " delta = 8 - len(xor)\n",
315 | " if delta < 8:\n",
316 | " return '0'*delta + xor\n",
317 | " else:\n",
318 | " return xor"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 15,
324 | "metadata": {},
325 | "outputs": [
326 | {
327 | "data": {
328 | "text/plain": [
329 | "'00000000'"
330 | ]
331 | },
332 | "execution_count": 15,
333 | "metadata": {},
334 | "output_type": "execute_result"
335 | }
336 | ],
337 | "source": [
338 | "# perfect match: should return 0's\n",
339 | "lrc(100, 100)"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 16,
345 | "metadata": {},
346 | "outputs": [
347 | {
348 | "data": {
349 | "text/plain": [
350 | "'00001101'"
351 | ]
352 | },
353 | "execution_count": 16,
354 | "metadata": {},
355 | "output_type": "execute_result"
356 | }
357 | ],
358 | "source": [
359 | "# imperfect match: should return 1's wherever parity mismatch\n",
360 | "lrc(100, 105)"
361 | ]
362 | },
363 | {
364 | "cell_type": "code",
365 | "execution_count": 17,
366 | "metadata": {},
367 | "outputs": [
368 | {
369 | "name": "stdout",
370 | "output_type": "stream",
371 | "text": [
372 | "item1: 1100100\n",
373 | "item2: 1101001\n",
374 | "------ -------\n",
375 | " xor: 0001101\n"
376 | ]
377 | }
378 | ],
379 | "source": [
380 | "tup = (('item1:', dec2bin(100)), \n",
381 | " ('item2:', dec2bin(105)),\n",
382 | " ('-'*6, '-'*7),\n",
383 | " (' xor:', lrc(100, 105)[1:]))\n",
384 | "for item in tup:\n",
385 | " print(item[0], item[1])"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 18,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/plain": [
396 | "True"
397 | ]
398 | },
399 | "execution_count": 18,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "lrc(100, 100) == '00000000'"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 19,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/plain": [
416 | "False"
417 | ]
418 | },
419 | "execution_count": 19,
420 | "metadata": {},
421 | "output_type": "execute_result"
422 | }
423 | ],
424 | "source": [
425 | "lrc(100, 105) == '00000000'"
426 | ]
427 | },
428 | {
429 | "cell_type": "markdown",
430 | "metadata": {},
431 | "source": [
432 | "## Problem: Bit Flips\n",
433 | "\n",
434 | "LRC will catch bit flips so long as they occur at different locations. However, if bits flip in the exact same position of each block, then LRC will not catch it, whereas other checksums will."
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": 20,
440 | "metadata": {},
441 | "outputs": [
442 | {
443 | "data": {
444 | "text/plain": [
445 | "'1100100'"
446 | ]
447 | },
448 | "execution_count": 20,
449 | "metadata": {},
450 | "output_type": "execute_result"
451 | }
452 | ],
453 | "source": [
454 | "dec2bin(100)"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": 21,
460 | "metadata": {},
461 | "outputs": [
462 | {
463 | "data": {
464 | "text/plain": [
465 | "'1100101'"
466 | ]
467 | },
468 | "execution_count": 21,
469 | "metadata": {},
470 | "output_type": "execute_result"
471 | }
472 | ],
473 | "source": [
474 | "dec2bin(101)"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": 22,
480 | "metadata": {},
481 | "outputs": [
482 | {
483 | "data": {
484 | "text/plain": [
485 | "'1100100'"
486 | ]
487 | },
488 | "execution_count": 22,
489 | "metadata": {},
490 | "output_type": "execute_result"
491 | }
492 | ],
493 | "source": [
494 | "# simulate flipping least significant bit\n",
495 | "bitflip101 = dec2bin(101)[:6] + '0'\n",
496 | "bitflip101"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 23,
502 | "metadata": {},
503 | "outputs": [
504 | {
505 | "data": {
506 | "text/plain": [
507 | "100"
508 | ]
509 | },
510 | "execution_count": 23,
511 | "metadata": {},
512 | "output_type": "execute_result"
513 | }
514 | ],
515 | "source": [
516 | "# convert binary bit flip to decimal\n",
517 | "dec_bitflip101 = int(bitflip101, 2)\n",
518 | "dec_bitflip101"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": 24,
524 | "metadata": {},
525 | "outputs": [
526 | {
527 | "data": {
528 | "text/plain": [
529 | "'00000000'"
530 | ]
531 | },
532 | "execution_count": 24,
533 | "metadata": {},
534 | "output_type": "execute_result"
535 | }
536 | ],
537 | "source": [
538 | "# check\n",
539 | "lrc(100, dec_bitflip101)"
540 | ]
541 | }
542 | ],
543 | "metadata": {
544 | "kernelspec": {
545 | "display_name": "Python [conda root]",
546 | "language": "python",
547 | "name": "conda-root-py"
548 | },
549 | "language_info": {
550 | "codemirror_mode": {
551 | "name": "ipython",
552 | "version": 3
553 | },
554 | "file_extension": ".py",
555 | "mimetype": "text/x-python",
556 | "name": "python",
557 | "nbconvert_exporter": "python",
558 | "pygments_lexer": "ipython3",
559 | "version": "3.5.4"
560 | }
561 | },
562 | "nbformat": 4,
563 | "nbformat_minor": 2
564 | }
565 |
--------------------------------------------------------------------------------
/notebooks/Cryptography/1_Caesar_Shift.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Caesar Cipher\n",
8 | "\n",
9 | "This notebook introduces the Caesar cipher. \n",
10 | "\n",
11 | "A Caesar cipher is one of the easiest ways to encrypt text. It's also extremely easy to crack. But for the purposes of introducing ciphers, it's a great place to start. \n",
12 | "\n",
13 | "The setup is pretty simple. You start with a message that you want to codify so someone else cannot read it. Say the message is **\"I hope you cannot read this.\"**. This is called the *plaintext*. Now we need to apply some algorithm to our text so the output is incoherent. For example, the output may be **O nuvk eua igttuz xkgj znoy.\"**. What we did in this case is shift every letter six places to the right: A becomes G, B becomes H, Z becomes F, and so on. \n",
14 | "\n",
15 | "> **plaintext:** I hope you cannot read this. \n",
16 | "> **ciphertext:** O nuvk eua igttuz xkgj znoy.\n",
17 | "\n",
18 | "Right away we can see this particular setup is very easy to crack. It includes capitalization and the words are clearly split. By converting all the text to lowercase and removing all spacees, we can make it a bit more difficult. However, there are only 25 different ways to shift the letters. That means a brute force attack, or trying all possibilities, is trivial.\n",
19 | "\n",
20 | "Let's see what this looks like in code. We'll create a class called *CaesarCipher* that can encrypt or decrypt text. "
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "---"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 1,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "class CaesarCipher:\n",
37 | " \n",
38 | " \n",
39 | " \n",
40 | " def _clean_text(self, text):\n",
41 | " '''converts text to lowercase, removes spaces, and removes punctuation.'''\n",
42 | " import string\n",
43 | " assert type(text) == str, 'input needs to be a string!'\n",
44 | " text = text.lower()\n",
45 | " text = text.replace(' ', '')\n",
46 | " self.clean_text = \"\".join(character for character in text \n",
47 | " if character not in string.punctuation)\n",
48 | " return self.clean_text\n",
49 | " \n",
50 | " \n",
51 | " def _string2characters(self, text):\n",
52 | " '''converts a string to individual characters.'''\n",
53 | " assert type(text) == str, 'input needs to be a string!'\n",
54 | " self.str2char = list(text)\n",
55 | " return self.str2char\n",
56 | " \n",
57 | " \n",
58 | " def _chars2nums(self, characters):\n",
59 | " '''converts individual characters to integers.'''\n",
60 | " assert type(characters) == list, 'input needs to be a list of characters!'\n",
61 | " codebook = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7, 'i':8, 'j':9,\n",
62 | " 'k':10, 'l':11, 'm':12, 'n':13, 'o':14, 'p':15, 'q':16, 'r':17, 's':18,\n",
63 | " 't':19, 'u':20, 'v':21, 'w':22, 'x':23, 'y':24, 'z':25}\n",
64 | " for i, char in enumerate(characters):\n",
65 | " try:\n",
66 | " characters[i] = codebook[char]\n",
67 | " except:\n",
68 | " pass\n",
69 | " self.char2num = characters\n",
70 | " return self.char2num\n",
71 | " \n",
72 | " \n",
73 | " def _nums2chars(self, numbers):\n",
74 | " '''converts individual integers to characters .'''\n",
75 | " assert type(numbers) == list, 'input needs to be a list of numbers!'\n",
76 | " codebook = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h', 8:'i', 9:'j',\n",
77 | " 10:'k', 11:'l', 12:'m', 13:'n', 14:'o', 15:'p', 16:'q', 17:'r', 18:'s',\n",
78 | " 19:'t', 20:'u', 21:'v', 22:'w', 23:'x', 24:'y', 25:'z'}\n",
79 | " for i, num in enumerate(numbers):\n",
80 | " try:\n",
81 | " numbers[i] = codebook[num]\n",
82 | " except:\n",
83 | " pass\n",
84 | " self.num2chars = numbers\n",
85 | " return self.num2chars\n",
86 | " \n",
87 | " \n",
88 | " def _preprocessing(self, text):\n",
89 | " ''''''\n",
90 | " clean_text = self._clean_text(text)\n",
91 | " list_of_chars = self._string2characters(clean_text)\n",
92 | " list_of_nums = self._chars2nums(list_of_chars)\n",
93 | " return list_of_nums\n",
94 | " \n",
95 | " def encrypt(self, text, shift=3):\n",
96 | " '''return text that is shifted according to user's input.'''\n",
97 | " import numpy as np\n",
98 | " preprocess = self._preprocessing(text)\n",
99 | " nums_shifted = list((np.array(preprocess) + shift) % 26)\n",
100 | " return ''.join(self._nums2chars(nums_shifted))\n",
101 | " \n",
102 | " def decrypt(self, text, shift=3):\n",
103 | " '''returns text shifted by user-defined shift length.'''\n",
104 | " import numpy as np\n",
105 | " preprocess = self._preprocessing(text)\n",
106 | " nums = self._chars2nums(preprocess)\n",
107 | " num_shift = list((np.array(nums) - shift) % 26)\n",
108 | " return ''.join(self._nums2chars(num_shift))"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "---"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": 2,
121 | "metadata": {},
122 | "outputs": [],
123 | "source": [
124 | "cc = CaesarCipher()"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 3,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "data": {
134 | "text/plain": [
135 | "'onuvkeuaigttuzxkgjznoy'"
136 | ]
137 | },
138 | "execution_count": 3,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "# example\n",
145 | "cc.encrypt('I hope you cannot read this.', shift=6)"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "---"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 4,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "'the QuIcK brown fox jumps over the lazy dog!'"
164 | ]
165 | },
166 | "execution_count": 4,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "text = 'the QuIcK brown fox jumps over the lazy dog!'\n",
173 | "text"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 5,
179 | "metadata": {},
180 | "outputs": [
181 | {
182 | "data": {
183 | "text/plain": [
184 | "'ymjvznhpgwtbsktcozruxtajwymjqfeditl'"
185 | ]
186 | },
187 | "execution_count": 5,
188 | "metadata": {},
189 | "output_type": "execute_result"
190 | }
191 | ],
192 | "source": [
193 | "# show one encryption\n",
194 | "encrypted = cc.encrypt(text, shift=5)\n",
195 | "encrypted"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 6,
201 | "metadata": {},
202 | "outputs": [
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n",
208 | "vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n",
209 | "wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n",
210 | "xliuymgofvsarjsbnyqtwszivxlipedchsk\n",
211 | "ymjvznhpgwtbsktcozruxtajwymjqfeditl\n",
212 | "znkwaoiqhxuctludpasvyubkxznkrgfejum\n",
213 | "aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n",
214 | "bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n",
215 | "cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n",
216 | "droaesmulbygxpyhtewzcyfobdrovkjinyq\n",
217 | "espbftnvmczhyqziufxadzgpcespwlkjozr\n",
218 | "ftqcguowndaizrajvgybeahqdftqxmlkpas\n",
219 | "gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n",
220 | "hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n",
221 | "iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n",
222 | "jxugkysarhemdvenzkcfieluhjxubqpotew\n",
223 | "kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n",
224 | "lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n",
225 | "maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n",
226 | "nbykocwevliqhzirdogjmipylnbyfutsxia\n",
227 | "oczlpdxfwmjriajsephknjqzmoczgvutyjb\n",
228 | "pdamqeygxnksjbktfqilokranpdahwvuzkc\n",
229 | "qebnrfzhyoltkclugrjmplsboqebixwvald\n",
230 | "rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n",
231 | "sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n"
232 | ]
233 | }
234 | ],
235 | "source": [
236 | "# show all encryption possibilities\n",
237 | "for i in range(1,26):\n",
238 | " print(cc.encrypt(text, shift=i))"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 7,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "name": "stdout",
248 | "output_type": "stream",
249 | "text": [
250 | "shift 1: xliuymgofvsarjsbnyqtwszivxlipedchsk\n",
251 | "\n",
252 | "shift 2: wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n",
253 | "\n",
254 | "shift 3: vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n",
255 | "\n",
256 | "shift 4: uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n",
257 | "\n",
258 | "shift 5: thequickbrownfoxjumpsoverthelazydog\n",
259 | "\n",
260 | "shift 6: sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n",
261 | "\n",
262 | "shift 7: rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n",
263 | "\n",
264 | "shift 8: qebnrfzhyoltkclugrjmplsboqebixwvald\n",
265 | "\n",
266 | "shift 9: pdamqeygxnksjbktfqilokranpdahwvuzkc\n",
267 | "\n",
268 | "shift10: oczlpdxfwmjriajsephknjqzmoczgvutyjb\n",
269 | "\n",
270 | "shift11: nbykocwevliqhzirdogjmipylnbyfutsxia\n",
271 | "\n",
272 | "shift12: maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n",
273 | "\n",
274 | "shift13: lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n",
275 | "\n",
276 | "shift14: kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n",
277 | "\n",
278 | "shift15: jxugkysarhemdvenzkcfieluhjxubqpotew\n",
279 | "\n",
280 | "shift16: iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n",
281 | "\n",
282 | "shift17: hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n",
283 | "\n",
284 | "shift18: gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n",
285 | "\n",
286 | "shift19: ftqcguowndaizrajvgybeahqdftqxmlkpas\n",
287 | "\n",
288 | "shift20: espbftnvmczhyqziufxadzgpcespwlkjozr\n",
289 | "\n",
290 | "shift21: droaesmulbygxpyhtewzcyfobdrovkjinyq\n",
291 | "\n",
292 | "shift22: cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n",
293 | "\n",
294 | "shift23: bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n",
295 | "\n",
296 | "shift24: aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n",
297 | "\n",
298 | "shift25: znkwaoiqhxuctludpasvyubkxznkrgfejum\n",
299 | "\n"
300 | ]
301 | }
302 | ],
303 | "source": [
304 | "# show all decryption possibilities\n",
305 | "for i in range(1,26):\n",
306 | " print('shift{:2}: {}'.format(i, cc.decrypt(encrypted, shift=i)))\n",
307 | " print('')"
308 | ]
309 | }
310 | ],
311 | "metadata": {
312 | "kernelspec": {
313 | "display_name": "Python [conda root]",
314 | "language": "python",
315 | "name": "conda-root-py"
316 | },
317 | "language_info": {
318 | "codemirror_mode": {
319 | "name": "ipython",
320 | "version": 3
321 | },
322 | "file_extension": ".py",
323 | "mimetype": "text/x-python",
324 | "name": "python",
325 | "nbconvert_exporter": "python",
326 | "pygments_lexer": "ipython3",
327 | "version": "3.5.4"
328 | }
329 | },
330 | "nbformat": 4,
331 | "nbformat_minor": 2
332 | }
333 |
--------------------------------------------------------------------------------
/notebooks/Hashing/Birthday_Attack.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "According to the [Birthday attack](https://en.wikipedia.org/wiki/Birthday_attack) article on Wikipedia:\n",
8 | ">A birthday attack is a type of cryptographic attack that exploits the mathematics behind the birthday problem in probability theory. This attack can be used to abuse communication between two or more parties. The attack depends on the higher likelihood of collisions found between random attack attempts and a fixed degree of permutations (pigeonholes). With a birthday attack, it is possible to find a collision of a hash function in $\\sqrt{2^{n}}$=$2^{n/2}$ with $2^{n}$ being the classical preimage resistance security."
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "Given a function *f*, the goal of the attack is to find two different inputs *$x_{1}$*, *$x_{2}$* such that f($x_{1}$)=f($x_{2}$). Such a pair $x_{1}$, $x_{2}$ is called a collision. The method used to find a collision is simply to evaluate the function *f* for different input values that may be chosen randomly or pseudorandomly until the same result is found more than once. Because of the birthday problem, this method can be rather efficient. Specifically, if a function f(x) yields any of *H* different outputs with equal probability and *H* is sufficiently large, then we expect to obtain a pair of different arguments $x_{1}$ and $x_{2}$ with f($x_{1}$) = f($x_{2}$) after evaluating the function for about $1.25{\\sqrt {H}}$ different arguments on average."
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 1,
21 | "metadata": {
22 | "collapsed": true
23 | },
24 | "outputs": [],
25 | "source": [
26 | "def hasher(number, modulus):\n",
27 | " '''simply hash function'''\n",
28 | " return number % modulus"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {
35 | "collapsed": true
36 | },
37 | "outputs": [],
38 | "source": [
39 | "from functools import partial\n",
40 | "\n",
41 | "# set modulus for all examples\n",
42 | "myhash = partial(hasher, modulus=47)"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 3,
48 | "metadata": {
49 | "collapsed": true
50 | },
51 | "outputs": [],
52 | "source": [
53 | "from math import log1p, sqrt\n",
54 | "\n",
55 | "def birthday(probability_exponent, bits):\n",
56 | " '''probability_exponent: desired probability of random collision'''\n",
57 | " probability = 10. ** probability_exponent\n",
58 | " outputs = 2. ** bits\n",
59 | " return sqrt(2. * outputs * -log1p(-probability))"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 4,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | "4.536858806263531"
71 | ]
72 | },
73 | "execution_count": 4,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "birthday(-2, 10)"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 6,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/plain": [
90 | "array([868, 988, 838, 285, 906, 485, 429, 386, 325, 923])"
91 | ]
92 | },
93 | "execution_count": 6,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "import numpy as np\n",
100 | "\n",
101 | "integers = np.random.randint(low=1, high=1000, size=10)\n",
102 | "integers"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 7,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "data": {
112 | "text/plain": [
113 | "Counter({1: 1, 3: 1, 6: 1, 10: 1, 13: 1, 15: 1, 22: 1, 30: 1, 39: 1, 43: 1})"
114 | ]
115 | },
116 | "execution_count": 7,
117 | "metadata": {},
118 | "output_type": "execute_result"
119 | }
120 | ],
121 | "source": [
122 | "from collections import Counter\n",
123 | "\n",
124 | "hashes = [myhash(integer) for integer in integers]\n",
125 | "Counter(hashes)"
126 | ]
127 | }
128 | ],
129 | "metadata": {
130 | "kernelspec": {
131 | "display_name": "Python [conda root]",
132 | "language": "python",
133 | "name": "conda-root-py"
134 | },
135 | "language_info": {
136 | "codemirror_mode": {
137 | "name": "ipython",
138 | "version": 3
139 | },
140 | "file_extension": ".py",
141 | "mimetype": "text/x-python",
142 | "name": "python",
143 | "nbconvert_exporter": "python",
144 | "pygments_lexer": "ipython3",
145 | "version": "3.5.4"
146 | }
147 | },
148 | "nbformat": 4,
149 | "nbformat_minor": 2
150 | }
151 |
--------------------------------------------------------------------------------
/notebooks/Information_Theory/Hamming_Distance.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "s1 = 'string'\n",
12 | "s2 = 'stirng'\n",
13 | "s3 = 'stirna'"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "---"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "## Functions"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {
34 | "collapsed": true
35 | },
36 | "outputs": [],
37 | "source": [
38 | "def hamming_distance(string1, string2):\n",
39 | " '''calculate the hamming distance of two strings'''\n",
40 | " assert len(string1) == len(string2), \"strings must be the same length\"\n",
41 | " distance = 0\n",
42 | " for i, elem in enumerate(string1):\n",
43 | " if elem != string2[i]:\n",
44 | " distance += 1\n",
45 | " return distance"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {
52 | "collapsed": true
53 | },
54 | "outputs": [],
55 | "source": [
56 | "def hamming_distance2(string1, string2):\n",
57 | " '''calculate the hamming distance of two strings (more Pythonic)'''\n",
58 | " assert len(string1) == len(string2), \"strings must be the same length\"\n",
59 | " return sum(elem1 != elem2 for elem1, elem2 in zip(string1, string2))"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "metadata": {},
65 | "source": [
66 | "---"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 4,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/plain": [
77 | "2"
78 | ]
79 | },
80 | "execution_count": 4,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "hamming_distance(s1, s2)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "2"
98 | ]
99 | },
100 | "execution_count": 5,
101 | "metadata": {},
102 | "output_type": "execute_result"
103 | }
104 | ],
105 | "source": [
106 | "hamming_distance2(s1, s2)"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "---"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 6,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "text/plain": [
124 | "1"
125 | ]
126 | },
127 | "execution_count": 6,
128 | "metadata": {},
129 | "output_type": "execute_result"
130 | }
131 | ],
132 | "source": [
133 | "hamming_distance(s2, s3)"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 7,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "data": {
143 | "text/plain": [
144 | "1"
145 | ]
146 | },
147 | "execution_count": 7,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "hamming_distance2(s2, s3)"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "---"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 8,
166 | "metadata": {},
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/plain": [
171 | "3"
172 | ]
173 | },
174 | "execution_count": 8,
175 | "metadata": {},
176 | "output_type": "execute_result"
177 | }
178 | ],
179 | "source": [
180 | "hamming_distance(s1, s3)"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 9,
186 | "metadata": {},
187 | "outputs": [
188 | {
189 | "data": {
190 | "text/plain": [
191 | "3"
192 | ]
193 | },
194 | "execution_count": 9,
195 | "metadata": {},
196 | "output_type": "execute_result"
197 | }
198 | ],
199 | "source": [
200 | "hamming_distance2(s1, s3)"
201 | ]
202 | }
203 | ],
204 | "metadata": {
205 | "kernelspec": {
206 | "display_name": "Python [conda root]",
207 | "language": "python",
208 | "name": "conda-root-py"
209 | },
210 | "language_info": {
211 | "codemirror_mode": {
212 | "name": "ipython",
213 | "version": 3
214 | },
215 | "file_extension": ".py",
216 | "mimetype": "text/x-python",
217 | "name": "python",
218 | "nbconvert_exporter": "python",
219 | "pygments_lexer": "ipython3",
220 | "version": "3.5.4"
221 | }
222 | },
223 | "nbformat": 4,
224 | "nbformat_minor": 2
225 | }
226 |
--------------------------------------------------------------------------------
/notebooks/Legacy_Code/Bernoulli_Naive_Bayes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "def create_dataset(n_rows=10, n_cols=2, prob=(0.5, 0.5), seed=None):\n",
19 | " '''\n",
20 | " creates a 2D numpy array with 0s and 1s for columns\n",
21 | " \n",
22 | " INPUT:\n",
23 | " n_rows = (int) number of rows in dataset\n",
24 | " n_cols = (int) number of columns starting with target followed by features\n",
25 | " prob = (tuple) probability of success for target, feature 1, feature 2, ..., feature n\n",
26 | " OUTPUT:\n",
27 | " dataset (numpy array)\n",
28 | " '''\n",
29 | " # error handling\n",
30 | " assert type(n_rows) == int, 'n_rows must be an integer'\n",
31 | " assert type(n_cols) == int, 'n_cols must be an integer'\n",
32 | " assert type(prob) == tuple, 'prob must be a tuple of probabilities'\n",
33 | " assert len(prob) == n_cols, 'tuple must contain probabilities for each n_col'\n",
34 | " assert type(seed) == int, 'seed must be an integer'\n",
35 | " \n",
36 | " # reproducibility\n",
37 | " if seed:\n",
38 | " np.random.seed(seed)\n",
39 | " \n",
40 | " # create dataset\n",
41 | " for i, p in enumerate(prob):\n",
42 | " if i < 1:\n",
43 | " dataset = np.random.binomial(n=1, p=prob[i], size=n_rows)\n",
44 | " else:\n",
45 | " column = np.random.binomial(n=1, p=prob[i], size=n_rows)\n",
46 | " dataset = np.c_[dataset, column]\n",
47 | " \n",
48 | " return dataset"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 3,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "probabilities = (0.5, 0.5, 0.5, 0.5)\n",
58 | "data = create_dataset(n_rows=4, n_cols=4, prob=probabilities, seed=42)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 4,
64 | "metadata": {},
65 | "outputs": [
66 | {
67 | "data": {
68 | "text/plain": [
69 | "array([[0, 0, 1, 1],\n",
70 | " [1, 0, 1, 0],\n",
71 | " [1, 0, 0, 0],\n",
72 | " [1, 1, 1, 0]])"
73 | ]
74 | },
75 | "execution_count": 4,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "data"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 5,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "X = data[:, 1:]\n",
91 | "y = data[:, 0]"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 6,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "array([[0, 1, 1],\n",
103 | " [0, 1, 0],\n",
104 | " [0, 0, 0],\n",
105 | " [1, 1, 0]])"
106 | ]
107 | },
108 | "execution_count": 6,
109 | "metadata": {},
110 | "output_type": "execute_result"
111 | }
112 | ],
113 | "source": [
114 | "X"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 7,
120 | "metadata": {},
121 | "outputs": [],
122 | "source": [
123 | "class BernoulliNB:\n",
124 | " \n",
125 | " \n",
126 | " def __init__(self):\n",
127 | " self.prob_target_one_ = None\n",
128 | " self.prob_target_zero_ = None\n",
129 | " \n",
130 | " \n",
131 | " def _reshape(self, X):\n",
132 | " '''converts array to appropriate dimensions'''\n",
133 | " try:\n",
134 | " n_cols = X.shape[1]\n",
135 | " except:\n",
136 | " n_cols = X.reshape(-1,1)\n",
137 | " return n_cols\n",
138 | " \n",
139 | " \n",
140 | " def fit(self, X, y):\n",
141 | " '''calculate priors and likelihoods'''\n",
142 | " # setup\n",
143 | " n_classes = len(np.unique(y))\n",
144 | " n_cols = self._reshape(X)\n",
145 | " \n",
146 | " # priors (target)\n",
147 | " self.prob_target_one_ = np.mean(y)\n",
148 | " self.prob_target_zero_ = 1 - self.prob_target_one_\n",
149 | " \n",
150 | " # likelihoods (features)\n",
151 | " n_feature_bins = 2\n",
152 | " self.prob_features_ = np.zeros((n_feature_bins, n_cols), dtype=float)\n",
153 | " \n",
154 | " for i in range(n_feature_bins):\n",
155 | " for j in range(n_cols):\n",
156 | " column = X[:,j]\n",
157 | " intersection_y_and_col = sum(np.logical_and(y==i, column==i))\n",
158 | " sum_y = sum(y==i)\n",
159 | " self.prob_features_[i,j] = intersection_y_and_col / sum_y\n",
160 | " \n",
161 | " prob_complements = 1 - self.prob_features_\n",
162 | " self.prob_features_ = np.concatenate((nb.prob_features_, prob_complements), axis=0)\n",
163 | " self.prob_features_ = self.prob_features_[[0,2,3,1],:] # rearrange array\n",
164 | " \n",
165 | " \n",
166 | " def predict(self, X):\n",
167 | " '''return most likely class'''\n",
168 | " # NEED MULTIPLE PREDICTIONS\n",
169 | " \n",
170 | " # setup\n",
171 | " X = np.array(X)\n",
172 | " n_rows = X.shape[0]\n",
173 | " try: \n",
174 | " n_cols = X.shape[1] \n",
175 | " except: \n",
176 | " n_cols = X.shape[0]\n",
177 | " class_zero_probs, class_one_probs = np.split(nb.prob_features_, 2)\n",
178 | " \n",
179 | " # likelihoods\n",
180 | " class_zero_likelihoods = class_zero_probs[X, np.arange(n_cols)]\n",
181 | " class_one_likelihoods = class_one_probs[X, np.arange(n_cols)]\n",
182 | " \n",
183 | " # posteriors\n",
184 | " class_zero_posterior = self.prob_target_zero_ * np.prod(class_zero_likelihoods)\n",
185 | " class_one_posterior = self.prob_target_one_ * np.prod(class_one_likelihoods)\n",
186 | "\n",
187 | " return (class_zero_posterior, class_one_posterior), np.argmax((class_zero_posterior, class_one_posterior))"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 8,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": [
196 | "nb = BernoulliNB()\n",
197 | "nb.fit(X,y)"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 9,
203 | "metadata": {},
204 | "outputs": [
205 | {
206 | "data": {
207 | "text/plain": [
208 | "array([[ 1. , 0. , 0. ],\n",
209 | " [ 0. , 1. , 1. ],\n",
210 | " [ 0.66666667, 0.33333333, 1. ],\n",
211 | " [ 0.33333333, 0.66666667, 0. ]])"
212 | ]
213 | },
214 | "execution_count": 9,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "nb.prob_features_"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 10,
226 | "metadata": {},
227 | "outputs": [
228 | {
229 | "data": {
230 | "text/plain": [
231 | "((0.0, 0.083333333333333343), 1)"
232 | ]
233 | },
234 | "execution_count": 10,
235 | "metadata": {},
236 | "output_type": "execute_result"
237 | }
238 | ],
239 | "source": [
240 | "nb.predict(np.array([1,0,0]))"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": null,
246 | "metadata": {},
247 | "outputs": [],
248 | "source": []
249 | }
250 | ],
251 | "metadata": {
252 | "kernelspec": {
253 | "display_name": "Python [conda root]",
254 | "language": "python",
255 | "name": "conda-root-py"
256 | },
257 | "language_info": {
258 | "codemirror_mode": {
259 | "name": "ipython",
260 | "version": 3
261 | },
262 | "file_extension": ".py",
263 | "mimetype": "text/x-python",
264 | "name": "python",
265 | "nbconvert_exporter": "python",
266 | "pygments_lexer": "ipython3",
267 | "version": "3.5.4"
268 | }
269 | },
270 | "nbformat": 4,
271 | "nbformat_minor": 2
272 | }
273 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Bagging_and_Bootstrapping.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Bootstrap"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "class Bootstrap:\n",
26 | " \n",
27 | " def create_dataset(self, X, y, seed=None):\n",
28 | " '''creates bootstrapped dataset'''\n",
29 | " if seed:\n",
30 | " np.random.seed(seed)\n",
31 | " n_rows = len(X)\n",
32 | " indices = np.arange(n_rows)\n",
33 | " indices = np.random.choice(indices, size=n_rows, replace=True)\n",
34 | " return X[indices], y[indices]"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "## Create Data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "np.random.seed(10)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 4,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "data": {
60 | "text/plain": [
61 | "array([[9, 4, 0, 1, 9],\n",
62 | " [0, 1, 8, 9, 0],\n",
63 | " [8, 6, 4, 3, 0],\n",
64 | " [4, 6, 8, 1, 8],\n",
65 | " [4, 1, 3, 6, 5],\n",
66 | " [3, 9, 6, 9, 1],\n",
67 | " [9, 4, 2, 6, 7],\n",
68 | " [8, 8, 9, 2, 0],\n",
69 | " [6, 7, 8, 1, 7],\n",
70 | " [1, 4, 0, 8, 5]])"
71 | ]
72 | },
73 | "execution_count": 4,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "X = np.random.randint(0,10,50).reshape(10,5)\n",
80 | "X"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 5,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/plain": [
91 | "array([1, 0, 1, 1, 1, 0, 1, 0, 0, 1])"
92 | ]
93 | },
94 | "execution_count": 5,
95 | "metadata": {},
96 | "output_type": "execute_result"
97 | }
98 | ],
99 | "source": [
100 | "y = np.random.binomial(1, 0.5, 10)\n",
101 | "y"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "### Bootstrap Example"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 6,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "bs = Bootstrap()\n",
118 | "X, y = bs.create_dataset(X,y)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 7,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "text/plain": [
129 | "array([[9, 4, 0, 1, 9],\n",
130 | " [9, 4, 0, 1, 9],\n",
131 | " [9, 4, 2, 6, 7],\n",
132 | " [1, 4, 0, 8, 5],\n",
133 | " [0, 1, 8, 9, 0],\n",
134 | " [6, 7, 8, 1, 7],\n",
135 | " [1, 4, 0, 8, 5],\n",
136 | " [0, 1, 8, 9, 0],\n",
137 | " [8, 6, 4, 3, 0],\n",
138 | " [6, 7, 8, 1, 7]])"
139 | ]
140 | },
141 | "execution_count": 7,
142 | "metadata": {},
143 | "output_type": "execute_result"
144 | }
145 | ],
146 | "source": [
147 | "X"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 8,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/plain": [
158 | "array([1, 1, 1, 1, 0, 0, 1, 0, 1, 0])"
159 | ]
160 | },
161 | "execution_count": 8,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "y"
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "## Bagging"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 9,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": [
183 | "class Bagging(Bootstrap):\n",
184 | " \n",
185 | " def __init__(self):\n",
186 | " self.estimator = None\n",
187 | " \n",
188 | " def simulate(self, estimator, X, y, n_datasets, seed=None):\n",
189 | " if seed:\n",
190 | " self.seed=seed\n",
191 | " self.coefficients_ = []\n",
192 | " self.estimator = estimator\n",
193 | " for dataset in range(n_datasets):\n",
194 | " X_, y_ = self.create_dataset(X, y, seed=self.seed)\n",
195 | " self.estimator.fit(X_, y_)\n",
196 | " self.coefficients_.append(self.estimator.coef_)\n",
197 | " self.seed += 1"
198 | ]
199 | },
200 | {
201 | "cell_type": "markdown",
202 | "metadata": {},
203 | "source": [
204 | "### Example: Sklearn vs Bagging"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "#### Sklearn"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 10,
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "from sklearn.linear_model import LinearRegression\n",
221 | "lr = LinearRegression()"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 11,
227 | "metadata": {},
228 | "outputs": [
229 | {
230 | "name": "stdout",
231 | "output_type": "stream",
232 | "text": [
233 | "[ 0.05415476 0.04757173 -0.13439324 0.03477829 -0.04744752]\n"
234 | ]
235 | }
236 | ],
237 | "source": [
238 | "lr.fit(X,y)\n",
239 | "print(lr.coef_)"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "#### Bagging"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 12,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "bg = Bagging()\n",
256 | "bg.simulate(lr, X, y, n_datasets=100, seed=42)"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 13,
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | "[array([ 0.03065716, -0.0095945 , -0.11071628, 0.04298948, 0.03107712]),\n",
268 | " array([ 0.03202357, 0.00323003, -0.10583687, 0.05571963, 0.03346221]),\n",
269 | " array([ 0.01504485, 0.09281765, -0.10585113, 0.0013086 , -0.02779966]),\n",
270 | " array([ 0.03890223, 0.04224749, -0.1397625 , 0.01358421, -0.05403209]),\n",
271 | " array([ 0.05415476, 0.04757173, -0.13439324, 0.03477829, -0.04744752]),\n",
272 | " array([ 0.01504485, 0.09281765, -0.10585113, 0.0013086 , -0.02779966]),\n",
273 | " array([ 0.0225789 , 0.04256523, -0.084117 , 0.0519456 , 0.04574701]),\n",
274 | " array([ 0.05415476, 0.04757173, -0.13439324, 0.03477829, -0.04744752]),\n",
275 | " array([ 0.03202357, 0.00323003, -0.10583687, 0.05571963, 0.03346221]),\n",
276 | " array([ 0.02997327, -0.04066438, -0.09418353, 0.0071274 , 0.01730941])]"
277 | ]
278 | },
279 | "execution_count": 13,
280 | "metadata": {},
281 | "output_type": "execute_result"
282 | }
283 | ],
284 | "source": [
285 | "bg.coefficients_[:10]"
286 | ]
287 | },
288 | {
289 | "cell_type": "markdown",
290 | "metadata": {},
291 | "source": [
292 | "#### Mean of Coefficients"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "execution_count": 14,
298 | "metadata": {},
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/plain": [
303 | "array([ 0.03915596, 0.0205273 , -0.11609587, 0.02856614, -0.01729375])"
304 | ]
305 | },
306 | "execution_count": 14,
307 | "metadata": {},
308 | "output_type": "execute_result"
309 | }
310 | ],
311 | "source": [
312 | "np.mean(bg.coefficients_, axis=0)"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "#### Standard Error of Coefficients"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 15,
325 | "metadata": {},
326 | "outputs": [
327 | {
328 | "data": {
329 | "text/plain": [
330 | "array([ 0.01944324, 0.03295672, 0.02520118, 0.01921173, 0.03869426])"
331 | ]
332 | },
333 | "execution_count": 15,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "np.std(bg.coefficients_, axis=0)"
340 | ]
341 | }
342 | ],
343 | "metadata": {
344 | "kernelspec": {
345 | "display_name": "Python [conda root]",
346 | "language": "python",
347 | "name": "conda-root-py"
348 | },
349 | "language_info": {
350 | "codemirror_mode": {
351 | "name": "ipython",
352 | "version": 3
353 | },
354 | "file_extension": ".py",
355 | "mimetype": "text/x-python",
356 | "name": "python",
357 | "nbconvert_exporter": "python",
358 | "pygments_lexer": "ipython3",
359 | "version": "3.5.4"
360 | }
361 | },
362 | "nbformat": 4,
363 | "nbformat_minor": 2
364 | }
365 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Cross-Validation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "array([[ 4, 91, 9, 37, 28, 29, 3, 43, 71, 59],\n",
23 | " [55, 19, 38, 83, 21, 92, 85, 82, 91, 19],\n",
24 | " [ 1, 49, 31, 86, 43, 80, 44, 87, 73, 73],\n",
25 | " [29, 97, 6, 73, 40, 17, 16, 97, 50, 14],\n",
26 | " [13, 10, 56, 21, 24, 47, 43, 96, 57, 35],\n",
27 | " [21, 41, 3, 72, 34, 65, 89, 14, 83, 63],\n",
28 | " [ 0, 70, 39, 80, 83, 67, 81, 36, 26, 79],\n",
29 | " [65, 9, 17, 19, 58, 13, 3, 92, 11, 72],\n",
30 | " [93, 64, 84, 37, 23, 34, 15, 48, 9, 48],\n",
31 | " [13, 28, 93, 54, 77, 58, 75, 72, 37, 48]])"
32 | ]
33 | },
34 | "execution_count": 2,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
41 | "fake_data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 211,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "def cross_validation(data, shuffle=True, folds=2, random_state=None, verbose=True):\n",
51 | " '''return train and test sets'''\n",
52 | " \n",
53 | " # error checking\n",
54 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
55 | " assert type(folds) == int, 'must be an integer'\n",
56 | " assert folds > 1, 'must have 2 or more folds'\n",
57 | " assert folds <= len(data), 'can have no more than {} folds!'.format(len(data))\n",
58 | " if random_state != None:\n",
59 | " assert type(random_state) == int, 'random_state must be an integer!'\n",
60 | " \n",
61 | " # reproducibility\n",
62 | " if random_state:\n",
63 | " np.random.seed(random_state)\n",
64 | " \n",
65 | " # setup indices\n",
66 | " indices = np.arange(data.shape[0])\n",
67 | " if shuffle:\n",
68 | " np.random.shuffle(indices)\n",
69 | " \n",
70 | " # setup K-fold\n",
71 | " num_indices = len(indices)\n",
72 | " split_proportion = 1/folds\n",
73 | " split = int( np.floor(split_proportion * num_indices) )\n",
74 | " \n",
75 | " # main logic\n",
76 | " for fold in range(folds):\n",
77 | " mask = np.ones(num_indices, dtype=bool)\n",
78 | " offset = split*fold\n",
79 | " if fold < folds-1:\n",
80 | " test_indices = indices[offset:split+offset]\n",
81 | " else:\n",
82 | " test_indices = indices[offset:]\n",
83 | " mask[offset:split+offset] = False\n",
84 | " train_indices = indices[mask] \n",
85 | " train = data[train_indices]\n",
86 | " test = data[test_indices]\n",
87 | " if verbose:\n",
88 | " print('Fold:', fold)\n",
89 | " print('Train:\\n', train)\n",
90 | " print('Test:\\n', test, '\\n')\n",
91 | " \n",
92 | " #return train, test"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 216,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "name": "stdout",
102 | "output_type": "stream",
103 | "text": [
104 | "Fold: 0\n",
105 | "Train:\n",
106 | " [[ 4 91 9 37 28 29 3 43 71 59]\n",
107 | " [65 9 17 19 58 13 3 92 11 72]\n",
108 | " [ 1 49 31 86 43 80 44 87 73 73]\n",
109 | " [13 28 93 54 77 58 75 72 37 48]\n",
110 | " [13 10 56 21 24 47 43 96 57 35]\n",
111 | " [29 97 6 73 40 17 16 97 50 14]\n",
112 | " [ 0 70 39 80 83 67 81 36 26 79]]\n",
113 | "Test:\n",
114 | " [[93 64 84 37 23 34 15 48 9 48]\n",
115 | " [55 19 38 83 21 92 85 82 91 19]\n",
116 | " [21 41 3 72 34 65 89 14 83 63]] \n",
117 | "\n",
118 | "Fold: 1\n",
119 | "Train:\n",
120 | " [[93 64 84 37 23 34 15 48 9 48]\n",
121 | " [55 19 38 83 21 92 85 82 91 19]\n",
122 | " [21 41 3 72 34 65 89 14 83 63]\n",
123 | " [13 28 93 54 77 58 75 72 37 48]\n",
124 | " [13 10 56 21 24 47 43 96 57 35]\n",
125 | " [29 97 6 73 40 17 16 97 50 14]\n",
126 | " [ 0 70 39 80 83 67 81 36 26 79]]\n",
127 | "Test:\n",
128 | " [[ 4 91 9 37 28 29 3 43 71 59]\n",
129 | " [65 9 17 19 58 13 3 92 11 72]\n",
130 | " [ 1 49 31 86 43 80 44 87 73 73]] \n",
131 | "\n",
132 | "Fold: 2\n",
133 | "Train:\n",
134 | " [[93 64 84 37 23 34 15 48 9 48]\n",
135 | " [55 19 38 83 21 92 85 82 91 19]\n",
136 | " [21 41 3 72 34 65 89 14 83 63]\n",
137 | " [ 4 91 9 37 28 29 3 43 71 59]\n",
138 | " [65 9 17 19 58 13 3 92 11 72]\n",
139 | " [ 1 49 31 86 43 80 44 87 73 73]\n",
140 | " [ 0 70 39 80 83 67 81 36 26 79]]\n",
141 | "Test:\n",
142 | " [[13 28 93 54 77 58 75 72 37 48]\n",
143 | " [13 10 56 21 24 47 43 96 57 35]\n",
144 | " [29 97 6 73 40 17 16 97 50 14]\n",
145 | " [ 0 70 39 80 83 67 81 36 26 79]] \n",
146 | "\n"
147 | ]
148 | }
149 | ],
150 | "source": [
151 | "cross_validation(fake_data, shuffle=True, folds=3, random_state=42, verbose=True)"
152 | ]
153 | }
154 | ],
155 | "metadata": {
156 | "kernelspec": {
157 | "display_name": "Python [conda root]",
158 | "language": "python",
159 | "name": "conda-root-py"
160 | },
161 | "language_info": {
162 | "codemirror_mode": {
163 | "name": "ipython",
164 | "version": 3
165 | },
166 | "file_extension": ".py",
167 | "mimetype": "text/x-python",
168 | "name": "python",
169 | "nbconvert_exporter": "python",
170 | "pygments_lexer": "ipython3",
171 | "version": "3.5.4"
172 | }
173 | },
174 | "nbformat": 4,
175 | "nbformat_minor": 2
176 | }
177 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/KNN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np\n",
12 | "from scipy.stats import mode\n",
13 | "from scipy.spatial.distance import euclidean"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 14,
19 | "metadata": {
20 | "collapsed": true
21 | },
22 | "outputs": [],
23 | "source": [
24 | "class KNN:\n",
25 | " '''\n",
26 | " KNN is a lazy supervised machine learning algorithm. \n",
27 | " \n",
28 | " The user specifies the number of neighbors to find.'''\n",
29 | " \n",
30 | " \n",
31 | " def __init__(self, K=2, learning='classification'):\n",
32 | " '''\n",
33 | " K = number of neighbors\n",
34 | " learning = 'classification' or 'regression'\n",
35 | " '''\n",
36 | " assert learning == 'classification' or learning == 'regression', 'need to set learning.'\n",
37 | " self.K_ = K\n",
38 | " self.type_ = learning\n",
39 | " self._nrows = None\n",
40 | " self._indices = None\n",
41 | " self._X = None\n",
42 | " \n",
43 | " \n",
44 | " def _calc_distance(self):\n",
45 | " '''Calculate the distance between data points.\n",
46 | " Input:\n",
47 | " X = numpy data matrix\n",
48 | " Output:\n",
49 | " matrix of distance between each data point and each cluster\n",
50 | " '''\n",
51 | " self._nrows = len(self._X)\n",
52 | " distances = np.array([euclidean(self._X[i], self._X[j]) if i!=j else np.inf\n",
53 | " for i in range(self._nrows) \n",
54 | " for j in range(self._nrows)])\n",
55 | " return distances.reshape(self._nrows, self._nrows)\n",
56 | " \n",
57 | " \n",
58 | " def _sort_indices(self, X):\n",
59 | " '''Sort distance matrix indices by shortest distance. \n",
60 | " Input:\n",
61 | " X = numpy distance matrix from self._calc_distance\n",
62 | " Output:\n",
63 | " indices sorted by min distance\n",
64 | " '''\n",
65 | " return np.argsort(X, axis=1)\n",
66 | "\n",
67 | " \n",
68 | " def fit(self, X):\n",
69 | " '''Copy data.\n",
70 | " Input:\n",
71 | " X = numpy array of data points'''\n",
72 | " assert self.K_ < X.shape[0], 'You chose too many neighbors!'\n",
73 | " self._X = X\n",
74 | " \n",
75 | " \n",
76 | " def predict(self, y):\n",
77 | " '''Predict on new data.'''\n",
78 | " distances = self._calc_distance()\n",
79 | " self._indices = self._sort_indices(distances)\n",
80 | " self._indices = np.delete(self._indices, (self._indices.shape[1] - 1), axis=1)\n",
81 | " self._indices = self._indices[:,:self.K_]\n",
82 | " if self.type_ == 'classification':\n",
83 | " return mode(y[self._indices], axis=1)[0]\n",
84 | " else: \n",
85 | " return np.mean(y[self._indices], axis=1)[0]"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {
91 | "collapsed": true
92 | },
93 | "source": [
94 | "## Generate Data"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "array([[ 0, 0],\n",
106 | " [ 1, 2],\n",
107 | " [ 4, 9],\n",
108 | " [ 5, 10]])"
109 | ]
110 | },
111 | "execution_count": 4,
112 | "metadata": {},
113 | "output_type": "execute_result"
114 | }
115 | ],
116 | "source": [
117 | "X = np.array([0,0,1,2,4,9,5,10]).reshape(4,2)\n",
118 | "X"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 5,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "text/plain": [
129 | "array([1, 1, 1, 0])"
130 | ]
131 | },
132 | "execution_count": 5,
133 | "metadata": {},
134 | "output_type": "execute_result"
135 | }
136 | ],
137 | "source": [
138 | "y = np.array([1,1,1,0])\n",
139 | "y"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {
145 | "collapsed": true
146 | },
147 | "source": [
148 | "## Modeling"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 29,
154 | "metadata": {
155 | "collapsed": true
156 | },
157 | "outputs": [],
158 | "source": [
159 | "knn = KNN(K=3)"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 30,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "array([[1],\n",
171 | " [1],\n",
172 | " [1],\n",
173 | " [1]])"
174 | ]
175 | },
176 | "execution_count": 30,
177 | "metadata": {},
178 | "output_type": "execute_result"
179 | }
180 | ],
181 | "source": [
182 | "knn.fit(X)\n",
183 | "knn.predict(y)"
184 | ]
185 | }
186 | ],
187 | "metadata": {
188 | "kernelspec": {
189 | "display_name": "Python [conda root]",
190 | "language": "python",
191 | "name": "conda-root-py"
192 | },
193 | "language_info": {
194 | "codemirror_mode": {
195 | "name": "ipython",
196 | "version": 3
197 | },
198 | "file_extension": ".py",
199 | "mimetype": "text/x-python",
200 | "name": "python",
201 | "nbconvert_exporter": "python",
202 | "pygments_lexer": "ipython3",
203 | "version": "3.5.4"
204 | }
205 | },
206 | "nbformat": 4,
207 | "nbformat_minor": 2
208 | }
209 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Test_Split.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "array([[44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n",
23 | " [38, 60, 16, 75, 44, 63, 80, 8, 21, 80],\n",
24 | " [14, 76, 22, 67, 5, 16, 11, 5, 26, 57],\n",
25 | " [32, 1, 22, 96, 0, 40, 23, 54, 24, 76],\n",
26 | " [83, 96, 34, 47, 15, 39, 85, 58, 29, 80],\n",
27 | " [43, 55, 36, 72, 77, 61, 4, 77, 38, 94],\n",
28 | " [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n",
29 | " [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n",
30 | " [72, 56, 71, 25, 25, 12, 79, 26, 81, 68],\n",
31 | " [63, 0, 47, 80, 78, 13, 32, 90, 50, 9]])"
32 | ]
33 | },
34 | "execution_count": 2,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
41 | "fake_data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {
48 | "collapsed": true
49 | },
50 | "outputs": [],
51 | "source": [
52 | "def train_test_split(data, shuffle=True, test_proportion=0.2, random_state=None):\n",
53 | " '''return train and test sets'''\n",
54 | " \n",
55 | " # error checking\n",
56 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
57 | " if random_state != None:\n",
58 | " assert type(random_state) == int, 'random_state must be an integer!'\n",
59 | " \n",
60 | " # reproducibility\n",
61 | " if random_state:\n",
62 | " np.random.seed(random_state)\n",
63 | " \n",
64 | " # setup indices\n",
65 | " indices = np.arange(data.shape[0])\n",
66 | " if shuffle:\n",
67 | " np.random.shuffle(indices)\n",
68 | " \n",
69 | " # main logic\n",
70 | " num_indices = len(indices)\n",
71 | " split = int( np.floor(test_proportion * num_indices) )\n",
72 | " train = data[indices[split:]]\n",
73 | " test = data[indices[:split]]\n",
74 | " \n",
75 | " return train, test"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 4,
81 | "metadata": {
82 | "collapsed": true
83 | },
84 | "outputs": [],
85 | "source": [
86 | "train, test = train_test_split(fake_data, shuffle=True, test_proportion=0.4, random_state=43)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "array([[14, 76, 22, 67, 5, 16, 11, 5, 26, 57],\n",
98 | " [43, 55, 36, 72, 77, 61, 4, 77, 38, 94],\n",
99 | " [38, 60, 16, 75, 44, 63, 80, 8, 21, 80],\n",
100 | " [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n",
101 | " [44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n",
102 | " [83, 96, 34, 47, 15, 39, 85, 58, 29, 80]])"
103 | ]
104 | },
105 | "execution_count": 5,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "train"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 6,
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "data": {
121 | "text/plain": [
122 | "array([[32, 1, 22, 96, 0, 40, 23, 54, 24, 76],\n",
123 | " [63, 0, 47, 80, 78, 13, 32, 90, 50, 9],\n",
124 | " [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n",
125 | " [72, 56, 71, 25, 25, 12, 79, 26, 81, 68]])"
126 | ]
127 | },
128 | "execution_count": 6,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "test"
135 | ]
136 | }
137 | ],
138 | "metadata": {
139 | "kernelspec": {
140 | "display_name": "Python [conda root]",
141 | "language": "python",
142 | "name": "conda-root-py"
143 | },
144 | "language_info": {
145 | "codemirror_mode": {
146 | "name": "ipython",
147 | "version": 3
148 | },
149 | "file_extension": ".py",
150 | "mimetype": "text/x-python",
151 | "name": "python",
152 | "nbconvert_exporter": "python",
153 | "pygments_lexer": "ipython3",
154 | "version": "3.5.4"
155 | }
156 | },
157 | "nbformat": 4,
158 | "nbformat_minor": 2
159 | }
160 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Validation_Test_Split.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "array([[75, 50, 96, 10, 21, 46, 93, 1, 91, 7],\n",
23 | " [64, 92, 2, 21, 25, 53, 62, 82, 5, 58],\n",
24 | " [58, 26, 55, 33, 91, 47, 3, 14, 40, 23],\n",
25 | " [93, 67, 6, 0, 49, 13, 5, 61, 11, 16],\n",
26 | " [36, 41, 40, 58, 47, 55, 80, 98, 66, 94],\n",
27 | " [32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n",
28 | " [11, 73, 39, 59, 81, 55, 71, 2, 68, 61],\n",
29 | " [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n",
30 | " [ 2, 34, 30, 10, 5, 18, 76, 43, 11, 88],\n",
31 | " [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])"
32 | ]
33 | },
34 | "execution_count": 2,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
41 | "fake_data"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {
48 | "collapsed": true
49 | },
50 | "outputs": [],
51 | "source": [
52 | "def train_validation_test_split(data, shuffle=True, validation_proportion=0.3,\n",
53 | " test_proportion=0.2, random_state=None):\n",
54 | " '''return train, validation, and test sets'''\n",
55 | " \n",
56 | " # error checking\n",
57 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
58 | " if random_state != None:\n",
59 | " assert type(random_state) == int, 'random_state must be an integer!'\n",
60 | " assert validation_proportion + test_proportion < 1, 'need some training data!'\n",
61 | " \n",
62 | " # reproducibility\n",
63 | " if random_state:\n",
64 | " np.random.seed(random_state)\n",
65 | " \n",
66 | " # setup indices\n",
67 | " indices = np.arange(data.shape[0])\n",
68 | " if shuffle:\n",
69 | " np.random.shuffle(indices)\n",
70 | " \n",
71 | " # MAIN LOGIC\n",
72 | " # calc num of indices to keep\n",
73 | " num_indices = len(indices)\n",
74 | " validation_split = int( np.floor(validation_proportion * num_indices) )\n",
75 | " test_split = int( np.floor(test_proportion * num_indices) )\n",
76 | " \n",
77 | " # parse indices\n",
78 | " training_indices = indices[test_split:]\n",
79 | " train_indices = training_indices[validation_split:]\n",
80 | " validation_indices = training_indices[:validation_split]\n",
81 | " test_indices = indices[:test_split]\n",
82 | " \n",
83 | " # create data sets\n",
84 | " train = data[train_indices]\n",
85 | " validation = data[validation_indices]\n",
86 | " test = data[test_indices]\n",
87 | " \n",
88 | " return train, validation, test"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 4,
94 | "metadata": {
95 | "collapsed": true
96 | },
97 | "outputs": [],
98 | "source": [
99 | "train, validation, test = train_validation_test_split(fake_data, \n",
100 | " shuffle=True, \n",
101 | " validation_proportion=0.3,\n",
102 | " test_proportion=0.2, \n",
103 | " random_state=43)"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 5,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "array([[32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n",
115 | " [64, 92, 2, 21, 25, 53, 62, 82, 5, 58],\n",
116 | " [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n",
117 | " [75, 50, 96, 10, 21, 46, 93, 1, 91, 7],\n",
118 | " [36, 41, 40, 58, 47, 55, 80, 98, 66, 94]])"
119 | ]
120 | },
121 | "execution_count": 5,
122 | "metadata": {},
123 | "output_type": "execute_result"
124 | }
125 | ],
126 | "source": [
127 | "train"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 6,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "array([[11, 73, 39, 59, 81, 55, 71, 2, 68, 61],\n",
139 | " [ 2, 34, 30, 10, 5, 18, 76, 43, 11, 88],\n",
140 | " [58, 26, 55, 33, 91, 47, 3, 14, 40, 23]])"
141 | ]
142 | },
143 | "execution_count": 6,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "validation"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 7,
155 | "metadata": {},
156 | "outputs": [
157 | {
158 | "data": {
159 | "text/plain": [
160 | "array([[93, 67, 6, 0, 49, 13, 5, 61, 11, 16],\n",
161 | " [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])"
162 | ]
163 | },
164 | "execution_count": 7,
165 | "metadata": {},
166 | "output_type": "execute_result"
167 | }
168 | ],
169 | "source": [
170 | "test"
171 | ]
172 | }
173 | ],
174 | "metadata": {
175 | "kernelspec": {
176 | "display_name": "Python [conda root]",
177 | "language": "python",
178 | "name": "conda-root-py"
179 | },
180 | "language_info": {
181 | "codemirror_mode": {
182 | "name": "ipython",
183 | "version": 3
184 | },
185 | "file_extension": ".py",
186 | "mimetype": "text/x-python",
187 | "name": "python",
188 | "nbconvert_exporter": "python",
189 | "pygments_lexer": "ipython3",
190 | "version": "3.5.4"
191 | }
192 | },
193 | "nbformat": 4,
194 | "nbformat_minor": 2
195 | }
196 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Supervised_Learning/Classification/Logistic_Regression/Sigmoid_Function.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%matplotlib inline\n",
12 | "import matplotlib.pyplot as plt\n",
13 | "import numpy as np"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 2,
19 | "metadata": {
20 | "collapsed": true
21 | },
22 | "outputs": [],
23 | "source": [
24 | "def sigmoid(x):\n",
25 | " return 1 / (1 + np.e ** -x)"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": [
36 | "x = np.linspace(-15, 15, 100)\n",
37 | "lst = [sigmoid(i) for i in x]"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 4,
43 | "metadata": {},
44 | "outputs": [
45 | {
46 | "data": {
47 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXXV9//HXe7ZM9p2QfZGwhCWUDMimhFIFrEKtQlHL\nZimlBWkf9leX/vy5VH2Uam2tiiKlGPhZRH4ubdRUcAEEIZrFrJBAiBlmJoGsM1lnvZ/fH/dmvAyZ\nzM1k7py7vJ+Px2TuOed77/mczMx933O+53yPIgIzMzOAiqQLMDOzwuFQMDOzbg4FMzPr5lAwM7Nu\nDgUzM+vmUDAzs24OBStokt4n6bFCW6+kJyTdMpg1HQtJb5K0Mek6rPg4FCxxki6W9IykFkm7Jf1S\n0rkAEfGfEfHWwa7peNYr6ZOSOiTtz/r60EDX2GOdIemkw9MR8VREnJLPdVppqkq6ACtvkkYBPwT+\nEngEqAHeBLQlWdcA+HZE/GnSRZgdK+8pWNJOBoiIb0VEV0QciojHImINgKSbJD19uLGkt0ramNmr\n+KqkJw8fxsm0/aWkf5XULGmzpAsz8xskbZd0Y9ZrjZb0oKQdkuolfUxSRS/rfYukDZn1fgVQfzZW\n0hZJf5A1/UlJ38w8npX5xH+jpJcl7ZT0v7PaVkr6e0kvSdonaYWk6ZJ+kWmyOrNX8ieSFkpqzHru\naZlDXs2S1ku6KmvZIkl3S/pR5nV/JekN/dk+K34OBUvaC0CXpAckXSlpbG8NJU0AvgN8FBgPbAQu\n7NHsjcCazPKHgIeBc4GTgD8FviJpRKbtl4HRwBzgEuAG4OZe1vs94GPABOAl4KL+bGyOLgZOAS4D\nPi7ptMz8DwLvAd4GjALeDxyMiDdnls+PiBER8e0e9VcDPwAeA04APgD8p6Tsw0vXAZ8CxgKbgM/m\nY8Os8DkULFERsZf0m2AA/w7skLRY0qQjNH8bsD4ivhcRncCXgFd6tPltRHwjIrqAbwPTgX+IiLaI\neAxoB06SVEn6jfCjEbEvIrYAXwCuP8p6vxMRHcAXj7Denq7NfCo//DWl7/+Nbp/K7DGtBlYD8zPz\nbwE+FhEbI211ROzK4fXOB0YAd0VEe0T8nPQhu/dktfl+RPw68//6n8DZx1CvlRCHgiUuIp6PiJsi\nYhpwBjCF9BtvT1OAhqznBdDYo82rWY8PZdr1nDeC9Cf+aqA+a1k9MDXH9TYcoV22RyJiTNbX1j7a\nZ8sOnIOZeiEdcC8dw+scNgVoiIhU1rye29rbOq3MOBSsoETEBmAR6XDoaRsw7fCEJGVPH6OdQAcw\nM2veDKCpl/VO77He6Udol4sDwLCs6ROP4bkNQH+O9W8Fph/uL8nobVutzDkULFGSTpX0t5KmZaan\nkz6ssfQIzX8EnCnpjyRVAbdzbG+q3TKHlx4BPitppKSZpI/Zf7OX9Z4u6Y8z672zv+sFVgHXSaqW\nVAe8+xieex/waUlzlXaWpPGZZa+S7hs5kl+R/vT/ocx6FwLvIN3fYvYaDgVL2j7SncO/knSAdBis\nA/62Z8OI2AlcA3wO2AXMA5bT/9NXP0D6k/tm4GnSHdP3H2W9d2XWOxf4ZT/X+X9If9rfQ7pj96Fj\neO6/kA6yx4C9wH8AQzPLPgk8kOm/uLZH/e2kQ+BK0ntIXwVuyOyVmb2GfJMdK1aZwyGNwPsi4vGk\n6zErBd5TsKIi6XJJYyQNAf6e9PUCRzrUZGb94FCwYnMB6TNwdpI+JPJHEXEo2ZLMSocPH5mZWTfv\nKZiZWbeiGxBvwoQJMWvWrKTLMDMrKitWrNgZERP7ald0oTBr1iyWL1+edBlmZkVFUn3frXz4yMzM\nsjgUzMysm0PBzMy6ORTMzKybQ8HMzLrlLRQk3Z+5/eG6XpZL0pckbZK0RtI5+arFzMxyk889hUXA\nFUdZfiXp0SbnArcCX8tjLWZmloO8XacQEb+QNOsoTa4GHszcxWppZpCzyRGxLV81mdnA6uhKcbC9\ni4PtnbR2pGjt6KKtM0V7Z4qOrhTtXSk6OlN0pYKOVNCVStGVgq5UilRAVypIRZBKBQGkAiLS8yLI\nzEs/hvSy9Pf0ssOPDwt+N9HbCD6DMrBPnoYPqps1jjef3Of1Z8clyYvXpvLaWxo2Zua9LhQk3Up6\nb4IZM2YMSnFmx2rRokUA3HTTTYnWcbyaD7ZTv+sg21oOsbW5lVf3trJzfzu7D7Sx+2AH+w51sLe1\ng72tnbR3pvp+wTIlDfxr3nbJG0o6FHIWEfcC9wLU1dV5BD+zAdCVCl7cvo81jS2sbWzh+W172bzz\nALsPtL+mXU1VBROG1zBuRA1jh9UwY9wwRtZWMbK2ihE1VQwbUsWwmkpqqyuorapkSHUFNZWVVFeK\n6qoKaiorqKwQ1ZWisqKCSomKCqiqqKBCUFEhBFRWCElIUKH0vMOPyTwW6n6zTS9X9+PDst+MlY93\n5hKXZCg08dr73E7D94w1y6vte1v5+YbtPPXiTp7etJOWQx0AjBxSxWmTR3H56ZOYM2EEsyYMZ8qY\nWiaPHsrYYdV+cy0jSYbCYuAOSQ+Tvh1ji/sTzAbeofYuHnvuFb67somnX9xBKuDEUbW8dd4kLnjD\neOZPH8Ps8cOpqPAbv+UxFCR9C1gITJDUCHwCqAaIiHuAJcDbgE2kbyp+c75qMStH+1o7ePDZev7j\n6d+y+0A7U8cM5a8WnsQ75k/h5Ekj/OnfjiifZx+9p4/lAdyer/WblauOrhT3PfVb7nnyJVoOdXDJ\nyRP5i0vmcP7s8d4bsD4VRUezmeVmXVMLH/rOGp7btpfLTj2BOy+by/zpY5Iuy4qIQ8GsBKRSwRd/\n+gJ3P/ES44bX8PXrF3D56ScmXZYVIYeCWZE71N7F33z7Nzy6/lXedc40Pv72eYweVp10WVakHApm\nRWz7vlb+/IHlrGlq4eNvn8fNF81yB7IdF4eCWZHavreVd93zDDv3tXPv9XW8Zd6kpEuyEuBQMCtC\n+9s6uXnRMnbtb+ehP38jvzdjbNIlWYlwKJgVmfbOFH/5zRVseGUf/3FjnQPBBpRvsmNWRCKCj35v\nLU+9uJO7/vhMFp5yQtIlWYlxKJgVke+ubOK7Kxv568vmck3d9L6fYHaMHApmRaKp+RCfWrye82aN\n487L5iZdjpUoh4JZEUilgr/7f6tJRfDP18yn0sNVWJ44FMyKwIPPbuGZl3bxsbfPY8b4YUmXYyXM\noWBW4JqaD3HXjzew8JSJXHeu+xEsvxwKZgXuX3/yAqmAz77zTF+tbHnnUDArYBtf2cd3VzZyw/kz\nmTpmaNLlWBlwKJgVsM8/uoERNVXcfulJSZdiZcKhYFaglm3ZzU+f385tC9/A2OE1SZdjZcKhYFaA\nIoJ/+p8NTBw5hJsvmpV0OVZGHApmBejZl3axvH4Pd142l2E1HqLMBo9DwawAfeOZLYwbXsM1C6Yl\nXYqVGYeCWYFp2H2Qnz3/KtedO53a6sqky7Ey41AwKzDfXFqPJP70/JlJl2JlyKFgVkAOtXfx8LIG\nLj99ElN8XYIlwKFgVkD+a1UTLYc6uPGCWUmXYmXKoWBWICKCB57ZwqknjuS82eOSLsfKlEPBrECs\nqN/Dhlf2cdOFszzGkSXGoWBWIP5rVRO11RW8Y/6UpEuxMuZQMCsAHV0plqx9hctOm8TwIb5YzZLj\nUDArAL/ctJPdB9q5ynsJljCHglkB+MHqbYysrWLhKROTLsXKnEPBLGGtHV08tv4Vrjj9RIZU+Qpm\nS1ZeQ0HSFZI2Stok6SNHWD5a0g8krZa0XtLN+azHrBA9sXE7+9o6uepsHzqy5OUtFCRVAncDVwLz\ngPdImtej2e3AcxExH1gIfEGSB463srJ49VYmjKjhgjnjky7FLK97CucBmyJic0S0Aw8DV/doE8BI\npU/KHgHsBjrzWJNZQdnX2sHPnt/OH545mapKH8215OXzt3Aq0JA13ZiZl+0rwGnAVmAt8NcRker5\nQpJulbRc0vIdO3bkq16zQffzDdtp60z52gQrGEl/NLkcWAVMAc4GviJpVM9GEXFvRNRFRN3EiT47\nw0rH4xu2M354DefMGJt0KWZAfkOhCZieNT0tMy/bzcD3Im0T8Fvg1DzWZFYwulLBky/s4JKTJ1JR\n4WEtrDDkMxSWAXMlzc50Hl8HLO7R5mXgMgBJk4BTgM15rMmsYKxqaGbPwQ4WnnpC0qWYdcvb9fQR\n0SnpDuBRoBK4PyLWS7ots/we4NPAIklrAQEfjoid+arJrJA8sXE7FYJL5vqQqBWOvA6yEhFLgCU9\n5t2T9Xgr8NZ81mBWqB7fuJ0FM8cyelh10qWYdUu6o9msLG3f28q6pr0sPMWHjqywOBTMEvDExvSp\n1Zc6FKzAOBTMEvD4xu2cOKqW0yaPTLoUs9dwKJgNsvbOFE+9uJNLT53oO6xZwXEomA2y5fW72d/W\n6f4EK0gOBbNB9stNO6msEBedNCHpUsxex6FgNsiWbt7NWdNGM8K33bQC5FAwG0QH2jpZ3dDM+R4m\n2wqUQ8FsEK2o30NnKhwKVrAcCmaDaOnmXVRViLqZHhXVCpNDwWwQLd28i7OmjWa4+xOsQDkUzAbJ\ngbZO1jS2+NCRFTSHgtkgWe7+BCsCDgWzQdLdnzDL/QlWuBwKZoPk2Zd2MX/6GIbVuD/BCpdDwWwQ\n7G/rZG1TC+fPGZd0KWZH5VAwGwTLt+ymKxVcMMdDW1hhcyiYDYJf/3Y3VRXinJljki7F7KgcCmaD\nYEX9HuZNGeX+BCt4DgWzPOvoSrGmsYVzZvisIyt8DgWzPNuwbR+HOrpY4KEtrAg4FMzybOXLewA4\nx6FgRcChYJZnK+r3cOKoWqaMrk26FLM+ORTM8mxF/R7OmTnG92O2ouBQMMujV/e20tR8yJ3MVjQc\nCmZ5tLI+3Z/gTmYrFg4Fszxa+fIeaqoqOH3K6KRLMcuJQ8Esj1bU7+GsqaOpqfKfmhUH/6aa5Ulb\nZxfrmvb6VFQrKjmFgqTKfBdiVmrWNe2lvSvlTmYrKrnuKbwo6fOS5h3Li0u6QtJGSZskfaSXNgsl\nrZK0XtKTx/L6ZoXsN90XrXkQPCseuYbCfOAF4D5JSyXdKmnU0Z6Q2bu4G7gSmAe8p2eoSBoDfBW4\nKiJOB6451g0wK1S/aWhm6pihnDDSF61Z8cgpFCJiX0T8e0RcCHwY+ASwTdIDkk7q5WnnAZsiYnNE\ntAMPA1f3aPNe4HsR8XJmPdv7tRVmBWh1QzNnz/BeghWXnPsUJF0l6fvAF4EvAHOAHwBLennaVKAh\na7oxMy/bycBYSU9IWiHphl7Wf6uk5ZKW79ixI5eSzRK1c38bjXsOcfY0h4IVl1wHd38ReBz4fEQ8\nkzX/O5LefJzrXwBcBgwFnpW0NCJeyG4UEfcC9wLU1dXFcazPbFCsaWwGYP50h4IVl1xD4YaIeDp7\nhqSLIuKXEXFnL89pAqZnTU/LzMvWCOyKiAPAAUm/4Hf9F2ZFa1VDCxWCM6YetevNrODk2tH8pSPM\n+3Ifz1kGzJU0W1INcB2wuEeb/wYullQlaRjwRuD5HGsyK1irG5o5edJI32nNis5Rf2MlXQBcCEyU\n9MGsRaOAo167EBGdku4AHs20vT8i1ku6LbP8noh4XtKPgTVACrgvItb1f3PMkhcRrG5s5vJ5JyZd\nitkx6+tjTA0wItNuZNb8vcC7+3rxiFhCj47oiLinx/Tngc/nUqxZMXh590GaD3a4P8GK0lFDISKe\nBJ6UtCgi6gepJrOitqrhcCezB8Gz4tPX4aMvRsTfAF+R9LqzfiLiqrxVZlakVje0UFtdwcmTRvbd\n2KzA9HX46P9mvv9zvgsxKxVrGps5Y8poqis93qQVn74OH63IfPeYRGY56ApYt7WF971xZtKlmPVL\nX4eP1gK9XiwWEWcNeEVmRWx7WyWtHSl3MlvR6uvw0dsHpQqzEtHUWg3g4S2saPV1+MhnHJkdg6ZD\nVYwZVs30cUOTLsWsX47aEybp6cz3fZL29vw+OCWaFY+trdWcNW0MkpIuxaxf+tpTuDjz3efWmfWh\nPZXuU7h2mq9PsOKV88Asks4BLibd8fx0RPwmb1WZFaFXWqsIxFnuT7Ailuv9FD4OPACMByYAiyR9\nLJ+FmRWbrZlO5rO8p2BFLNc9hfcB8yOiFUDSXcAq4DP5Ksys2DS1VjGyqotJo3z7TSteuV5yuRXI\n/k0fwuvvjWBW1ra2VjGltjPpMsyOS18Xr32ZdB9CC7Be0k8y028Bfp3/8l5v165dLFq0KIlVm/Xq\nUEcX9SvWMXREG4sW+cQ8K159HT5anvm+Avh+1vwn8lKNWZFq3HMQgLHVXQlXYnZ8FFFctzyuq6uL\n5cuX993QbBDd8+RL3PU/G/i7k3Zy+y03Jl2O2etIWhERdX21y6mjWdJc4B+BeWT1LUTEnH5XaFZC\n1jQ2M6a6i+FVxfUhy6ynXDuavwF8DegELgUeBL6Zr6LMis2axham1nYkXYbZccs1FIZGxM9IH26q\nj4hPAn+Yv7LMiseu/W007jnkM4+sJOR6nUKbpArgRUl3kD4ddUT+yjIrHmuaWgAcClYSct1T+Gtg\nGHAnsAC4HnBvmhmwtrEFCSY7FKwE5LSnEBHLADJ7C3dGxL68VmVWRNY0NjNnwnBqK3ckXYrZcct1\n7KO6zF3Y1gBrJa2WtCC/pZkVvohgVUOL77RmJSPXw0f3A38VEbMiYhZwO+kzkszK2taWVnbub+Ns\nh4KViFxDoSsinjo8ERFPkz491aysrW5oBmC+h8u2EtHX2EfnZB4+KenrwLdIj330J3ioCzNWNzZT\nU1nBqZNH4huMWCnoq6P5Cz2mP5H12JduWtlb3dDMaZNHMqSqMulSzAZEX7fjvHSwCjErNl2pYG1j\nC+9aMC3pUswGTK5nH42W9C+Slme+viDJt5eysvbSjv0caO9yf4KVlGM5+2gfcG3may8++8jK3KrD\nncw+88hKSK7DXLwhIt6VNf0pSavyUZBZsVjd0MzIIVXMmTA86VLMBkyuewqHJF18eELSRcChvp4k\n6QpJGyVtkvSRo7Q7V1KnpHfnWI9Z4lY3NnPW9NFUVCjpUswGTK57CrcBD2b1I+yhj7GPJFUCd5O+\ndWcjsEzS4oh47gjt/gl47FgKN0tSa0cXG7bt49Y3+5YiVlr6DIXMeEenRMR8SaMAIiKXm9CeB2yK\niM2Z13kYuBp4rke7DwDfBc49lsLNkrR+6146U8FZ7mS2EtPn4aOISAEfyjzem2MgAEwFGrKmGzPz\nukmaCryT9A18eiXp1sNnPu3Y4UHHLHlrGtOdzB7ewkpNrn0KP5X0vyRNlzTu8NcArP+LwIczwdOr\niLg3Iuoiom7ixIkDsFqz47O6oZlJo4Zw4ujavhubFZFc+xT+hPQVzH/VY/7RDqg2AdOzpqdl5mWr\nAx6WBDABeJukzoj4rxzrMkvEbxqafX2ClaRcQ2Ee6UC4mHQ4PAXc08dzlgFzJc0mHQbXAe/NbhAR\nsw8/lrQI+KEDwQrdzv1t1O86yHvPm5F0KWYDLtdQeID0BWtfyky/NzPv2t6eEBGdmVt3PgpUAvdH\nxHpJt2WW9xUqZgVpZf0eABbMHJtwJWYDL9dQOCMi5mVNPy6p51lErxMRS4AlPeYdMQwi4qYcazFL\n1IqX91BdKc6Y6pFerPTk2tG8UtL5hyckvRFYnp+SzArbyvo9nD5lNLXVHhnVSk+uobAAeEbSFklb\ngGeBcyWtlbQmb9WZFZj2zhRrGlt86MhKVq6Hj67IaxVmReK5bXtp60xxzgyHgpWmnEIhIurzXYhZ\nMTjcyXzOTJ+OaqUp18NHZka6k3nK6Fomjx6adClmeeFQMDsGK+v3cI77E6yEORTMcrS1+RDbWlrd\nyWwlzaFglqOVL2f6E9zJbCXMoWCWo5X1zdRWVzBvyqikSzHLG4eCWY5WvLyHs6aOobrSfzZWuvzb\nbZaD/W2drGtq4dzZPnRkpc2hYJaD5Vt205UKLpgzIelSzPLKoWCWg6Wbd1NdKV+0ZiXPoWCWg2c3\n72L+tDEMq8l1ZBiz4uRQMOvDvtYO1jW1cMEbxiddilneORTM+rC8fg9dqeD8OQ4FK30OBbM+LN28\nK92f4IvWrAw4FMz6sHTzbs6ePoahNb6pjpU+h4LZUXT3J/jQkZUJh4LZUSzf4v4EKy8OBbOjWLp5\nFzWVFR4u28qGQ8HsKJZu3sXZM8ZQW+3+BCsPDgWzXuw+0M6aphYu9PUJVkYcCma9+MULO4iA3z/1\nhKRLMRs0DgWzXvx8w3YmjKjhjCmjky7FbNA4FMyOoCsVPPnCDi45+QQqKpR0OWaDxqFgdgSrGvbQ\ncqjDh46s7DgUzI7g5xu2U1khLp7r+ydYeXEomB3B4xt2sGDmWEYPrU66FLNB5VAw6+GVllae27bX\nh46sLOU1FCRdIWmjpE2SPnKE5e+TtEbSWknPSJqfz3rMcvHExu0AXHqKQ8HKT95CQVIlcDdwJTAP\neI+keT2a/Ra4JCLOBD4N3Juvesxy9fjG7UwZXcvJk0YkXYrZoMvnnsJ5wKaI2BwR7cDDwNXZDSLi\nmYjYk5lcCkzLYz1mfWrt6OLpF3ey8NQTkHwqqpWffIbCVKAha7oxM683fwb8z5EWSLpV0nJJy3fs\n2DGAJZq91uMbtnOgvYu3nTE56VLMElEQHc2SLiUdCh8+0vKIuDci6iKibuLEiYNbnJWVxau3MmHE\nEN+P2cpWPkOhCZieNT0tM+81JJ0F3AdcHRG78liP2VHta+3gZxu284dnnkilr2K2MpXPUFgGzJU0\nW1INcB2wOLuBpBnA94DrI+KFPNZi1qefPPcq7Z0prjp7StKlmCWmKl8vHBGdku4AHgUqgfsjYr2k\n2zLL7wE+DowHvprp1OuMiLp81WR2NItXb2XqmKGcM8M31LHylbdQAIiIJcCSHvPuyXp8C3BLPmsw\ny8XuA+08/eJObnnTHJ91ZGWtIDqazZK2ZO02OlPBVfN96MjKm0PBjPSho5NOGMFpk0cmXYpZohwK\nVvYadh9k2ZbdXDV/ig8dWdlzKFjZe/DZLVRKXFs3vc+2ZqXOoWBl7WB7J99e1sAVZ5zIiaNrky7H\nLHEOBStr3/9NE3tbO7npwllJl2JWEBwKVrYiggee2cLpU0axYKavTTADh4KVsWc37+KFV/dz04Wz\n3MFsluFQsLK16JdbGDe8hnf42gSzbg4FK0tbdh7gp8+/ynXnTqe2ujLpcswKhkPBytI/P7aRIVWV\n3HTRrKRLMSsoDgUrO2sbW/jhmm3c8qbZnDDSp6GaZXMoWNn53KMbGDusmlvfPCfpUswKjkPBysrT\nL+7kqRd3cvulJzGytjrpcswKjkPBykYqFfzTjzcwdcxQrr9gZtLlmBUkh4KVjW8te5m1TS188C0n\nM6TKZxyZHYlDwcpC/a4DfPZHz/OmuRN45+9NTbocs4LlULCS15UK/vaR1VRWiM+9+ywqKnz1sllv\n8no7TrNCcN9Tm1lev4d/uXY+k0cPTbocs4LmPQUraWsam/nCYy9wxekn+rCRWQ4cClayXt51kPcv\nWsbEkUP47DvP8KB3Zjnw4SMrSbsPtHPjN35NZyp4+P3nMX7EkKRLMisK3lOwknOwvZM/e2AZW5sP\ncd8NdZx0woikSzIrGt5TsJKyfV8rf/7ActY2tfDV9y2gbta4pEsyKyoOBSsZG1/Zx/sXLWP3gXa+\nfn0db5k3KemSzIqOQ8FKwpK12/jwd9YwtKaSR/7iAs6cNjrpksyKkkPBitr2va18/L/X8+P1r3Dm\n1NF8/foFTBnjaxHM+suhYEXpUHsXD/36Zf7tpy/Q1pniI1eeyi0Xz6aq0udOmB0Ph4IVlb2tHTz0\nq5e576nN7NzfzkUnjefTV5/BnIk+w8hsIDgUrOB1dqV4atNOvreyicfWv0JbZ4o3zZ3AnZfN5Vyf\nXWQ2oBwKVnBSqWDzzv0s3bybp17cwTObdrGvrZMxw6q5tm4619RN46xpY5Iu06wk5TUUJF0B/BtQ\nCdwXEXf1WK7M8rcBB4GbImJlPmuywtHRleLVva3U7zrI5h37eWnHAZ7ftpd1TS0caO8CYOqYobx9\n/mQuPeUEFp5yAjVV7jMwy6e8hYKkSuBu4C1AI7BM0uKIeC6r2ZXA3MzXG4GvZb5bAYoIOlNBVyro\n6ErR0RW0d6bo6ErR2tFFa0eK1s4uDrZ3cai9kwNtXexr7WBfayd7WzvYfaCDXQfa2H2gnVdaWtmx\nv42I373+sJpKTp40knctmMaZU0dzzsyxzJkw3GMWmQ2ifO4pnAdsiojNAJIeBq4GskPhauDBiAhg\nqaQxkiZHxLaBLubJF3bwmR8+13fDXkTfTQZUxJHXGL1MZM8//NyA7jfdIIjImo4ggFRk5mfmpSJ9\n/4FUBKlU0BVBKgVdkQ6D/qqtrmD88CGMG17DuOE1nHriSCaPHsrk0bXMGD+MORNGMGnUEAeAWcLy\nGQpTgYas6UZevxdwpDZTgdeEgqRbgVsBZsyY0a9iRgypYu6k4ztDRQzyG1Yvq8uenf0m+tr5v5t3\nuI0y/wghpacrlHms9OP0F1RUpB9Xdn+HyooKqitEZaWorqigpqqC6soKqitFbXUlQ6oqqK2uZPiQ\nSoZWVzF8SCUja6sZWVtFdRmcKnrTTTclXYLZcSuKjuaIuBe4F6Curq5fH1cXzBzLgpkLBrQuM7NS\nk8+Pb03A9KzpaZl5x9rGzMwGST5DYRkwV9JsSTXAdcDiHm0WAzco7XygJR/9CWZmlpu8HT6KiE5J\ndwCPkj4l9f6IWC/ptszye4AlpE9H3UT6lNSb81WPmZn1La99ChGxhPQbf/a8e7IeB3B7PmswM7Pc\nlf4pIWZmljOHgpmZdXMomJlZN4eCmZl1U2/DKRQqSTuA+n4+fQKwcwDLSZK3pTCVyraUynaAt+Ww\nmRExsa9GRRcKx0PS8oioS7qOgeBtKUylsi2lsh3gbTlWPnxkZmbdHApmZtat3ELh3qQLGEDelsJU\nKttSKtsB3pZjUlZ9CmZmdnTltqdgZmZH4VAwM7NuZREKkq6RtF5SSlJd1vxZkg5JWpX5uudor1MI\netuWzLK9qDflAAADr0lEQVSPStokaaOky5OqsT8kfVJSU9bP4m1J13QsJF2R+X/fJOkjSddzPCRt\nkbQ283NYnnQ9x0LS/ZK2S1qXNW+cpJ9IejHzfWySNeaql23J+99JWYQCsA74Y+AXR1j2UkScnfm6\nbZDr6o8jboukeaTvWXE6cAXwVUmVg1/ecfnXrJ/Fkr6bF4bM//PdwJXAPOA9mZ9HMbs083MotvP7\nF5H+/c/2EeBnETEX+Flmuhgs4vXbAnn+OymLUIiI5yNiY9J1DISjbMvVwMMR0RYRvyV9j4rzBre6\nsnUesCkiNkdEO/Aw6Z+HDbKI+AWwu8fsq4EHMo8fAP5oUIvqp162Je/KIhT6MDuzG/akpDclXcxx\nmAo0ZE03ZuYVkw9IWpPZbS6KXfyMUvi/zxbATyWtkHRr0sUMgElZd3R8BZiUZDEDIK9/JyUTCpJ+\nKmndEb6O9oltGzAjIs4GPgg8JGnU4FTcu35uS8HrY7u+BswBzib9c/lCosWWt4szfxNXArdLenPS\nBQ2UzI29ivk8/Lz/neT1zmuDKSL+oB/PaQPaMo9XSHoJOBlItHOtP9sCNAHTs6anZeYVjFy3S9K/\nAz/MczkDqeD/749FRDRlvm+X9H3Sh8eO1B9XLF6VNDkitkmaDGxPuqD+iohXDz/O199Jyewp9Iek\niYc7YyXNAeYCm5Otqt8WA9dJGiJpNult+XXCNeUs88d62DtJd6gXi2XAXEmzJdWQ7vBfnHBN/SJp\nuKSRhx8Db6W4fhZHshi4MfP4RuC/E6zluAzG30nJ7CkcjaR3Al8GJgI/krQqIi4H3gz8g6QOIAXc\nFhGD3rFzLHrblohYL+kR4DmgE7g9IrqSrPUYfU7S2aR37bcAf5FsObmLiE5JdwCPApXA/RGxPuGy\n+msS8H1JkH5/eCgifpxsSbmT9C1gITBBUiPwCeAu4BFJf0Z62P1rk6swd71sy8J8/514mAszM+tW\n1oePzMzstRwKZmbWzaFgZmbdHApmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZsdJ0rmZAcpqM1cEr5d0\nRtJ1mfWHL14zGwCSPgPUAkOBxoj4x4RLMusXh4LZAMiMebQMaAUuLLIhRsy6+fCR2cAYD4wARpLe\nYzArSt5TMBsAkhaTvuPabGByRNyRcElm/VIWo6Sa5ZOkG4COiHgoMxT7M5J+PyJ+nnRtZsfKewpm\nZtbNfQpmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZmbWzaFgZmbdHApmZtbt/wM0YVudODC3EQAAAABJ\nRU5ErkJggg==\n",
48 | "text/plain": [
49 | ""
50 | ]
51 | },
52 | "metadata": {},
53 | "output_type": "display_data"
54 | }
55 | ],
56 | "source": [
57 | "plt.plot(x, lst);\n",
58 | "plt.axhline(0.5, c='k', alpha=0.5)\n",
59 | "plt.vlines(0, 0, 1, alpha=0.5)\n",
60 | "plt.title('Sigmoid Function')\n",
61 | "plt.xlabel('x')\n",
62 | "plt.ylabel('probability');"
63 | ]
64 | }
65 | ],
66 | "metadata": {
67 | "kernelspec": {
68 | "display_name": "Python [conda root]",
69 | "language": "python",
70 | "name": "conda-root-py"
71 | },
72 | "language_info": {
73 | "codemirror_mode": {
74 | "name": "ipython",
75 | "version": 3
76 | },
77 | "file_extension": ".py",
78 | "mimetype": "text/x-python",
79 | "name": "python",
80 | "nbconvert_exporter": "python",
81 | "pygments_lexer": "ipython3",
82 | "version": "3.5.4"
83 | }
84 | },
85 | "nbformat": 4,
86 | "nbformat_minor": 2
87 | }
88 |
--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Supervised_Learning/Model_Selection/Feature_Selection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]\n",
10 | "print('All errors are RMSE')\n",
11 | "print('-'*74)\n",
12 | "for alpha in alphas:\n",
13 | " # instantiate and fit model\n",
14 | " lasso = Lasso(alpha=alpha, fit_intercept=True, random_state=99)\n",
15 | " lasso.fit(X_train, y_train)\n",
16 | " # calculate errors\n",
17 | " train_error = np.sqrt(mean_squared_error(y_train, lasso.predict(X_train)))\n",
18 | " validation_error = np.sqrt(mean_squared_error(y_validation, lasso.predict(X_validation)))\n",
19 | " test_error = np.sqrt(mean_squared_error(y_test, lasso.predict(X_test)))\n",
20 | " # print errors as report\n",
21 | " print('alpha: {:6} | train error: {:5} | val error: {:6} | test error: {}'.\n",
22 | " format(alpha,\n",
23 | " round(train_error,2),\n",
24 | " round(validation_error,2),\n",
25 | " round(test_error,2)))"
26 | ]
27 | }
28 | ],
29 | "metadata": {
30 | "kernelspec": {
31 | "display_name": "Python [conda root]",
32 | "language": "python",
33 | "name": "conda-root-py"
34 | },
35 | "language_info": {
36 | "codemirror_mode": {
37 | "name": "ipython",
38 | "version": 3
39 | },
40 | "file_extension": ".py",
41 | "mimetype": "text/x-python",
42 | "name": "python",
43 | "nbconvert_exporter": "python",
44 | "pygments_lexer": "ipython3",
45 | "version": "3.5.4"
46 | }
47 | },
48 | "nbformat": 4,
49 | "nbformat_minor": 2
50 | }
51 |
--------------------------------------------------------------------------------
/notebooks/OS_library/OS_Sandbox.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import os"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 3,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "posix.uname_result(sysname='Darwin', nodename='metis-davidziganto.local', release='16.7.0', version='Darwin Kernel Version 16.7.0: Wed Oct 4 00:17:00 PDT 2017; root:xnu-3789.71.6~1/RELEASE_X86_64', machine='x86_64')"
23 | ]
24 | },
25 | "execution_count": 3,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | }
29 | ],
30 | "source": [
31 | "os.uname()"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 5,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/plain": [
42 | "1333"
43 | ]
44 | },
45 | "execution_count": 5,
46 | "metadata": {},
47 | "output_type": "execute_result"
48 | }
49 | ],
50 | "source": [
51 | "os.getpid()"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 8,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "ename": "AttributeError",
61 | "evalue": "module 'os' has no attribute 'getresuid'",
62 | "output_type": "error",
63 | "traceback": [
64 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
65 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
66 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresuid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
67 | "\u001b[0;31mAttributeError\u001b[0m: module 'os' has no attribute 'getresuid'"
68 | ]
69 | }
70 | ],
71 | "source": [
72 | "os.getresuid()"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": null,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": []
83 | }
84 | ],
85 | "metadata": {
86 | "kernelspec": {
87 | "display_name": "Python [conda root]",
88 | "language": "python",
89 | "name": "conda-root-py"
90 | },
91 | "language_info": {
92 | "codemirror_mode": {
93 | "name": "ipython",
94 | "version": 3
95 | },
96 | "file_extension": ".py",
97 | "mimetype": "text/x-python",
98 | "name": "python",
99 | "nbconvert_exporter": "python",
100 | "pygments_lexer": "ipython3",
101 | "version": "3.5.4"
102 | }
103 | },
104 | "nbformat": 4,
105 | "nbformat_minor": 2
106 | }
107 |
--------------------------------------------------------------------------------
/notebooks/Python/Coding_Best_Practices/Complexity_Practice.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "X = np.array([12,6,3,4,5,6,7,8,9]).reshape(3,3)\n",
19 | "Y = np.array([5,8,1,2,6,7,3,0,4,5,9,1]).reshape(3,4)"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 3,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/plain": [
30 | "array([[12, 6, 3],\n",
31 | " [ 4, 5, 6],\n",
32 | " [ 7, 8, 9]])"
33 | ]
34 | },
35 | "execution_count": 3,
36 | "metadata": {},
37 | "output_type": "execute_result"
38 | }
39 | ],
40 | "source": [
41 | "X"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 4,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "array([[5, 8, 1, 2],\n",
53 | " [6, 7, 3, 0],\n",
54 | " [4, 5, 9, 1]])"
55 | ]
56 | },
57 | "execution_count": 4,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "Y"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 5,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "rows, cols = X.shape[0], Y.shape[1]\n",
73 | "result = np.zeros(rows * cols).reshape(rows, cols)\n",
74 | "\n",
75 | "# iterate through rows of X\n",
76 | "for i in range(rows):\n",
77 | " # iterate through columns of Y\n",
78 | " for j in range(cols):\n",
79 | " # iterate through rows of Y\n",
80 | " for k in range(len(Y)):\n",
81 | " result[i][j] += X[i][k] * Y[k][j]"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "# Q1: What does this block of code do?"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": []
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "# Q2: What's the complexity?"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": null,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": []
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "## Show Results"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 6,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "array([[ 108., 153., 57., 27.],\n",
128 | " [ 74., 97., 73., 14.],\n",
129 | " [ 119., 157., 112., 23.]])"
130 | ]
131 | },
132 | "execution_count": 6,
133 | "metadata": {},
134 | "output_type": "execute_result"
135 | }
136 | ],
137 | "source": [
138 | "result"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 7,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "array([[108, 153, 57, 27],\n",
150 | " [ 74, 97, 73, 14],\n",
151 | " [119, 157, 112, 23]])"
152 | ]
153 | },
154 | "execution_count": 7,
155 | "metadata": {},
156 | "output_type": "execute_result"
157 | }
158 | ],
159 | "source": [
160 | "np_result = np.dot(X,Y)\n",
161 | "np_result"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 8,
167 | "metadata": {},
168 | "outputs": [
169 | {
170 | "data": {
171 | "text/plain": [
172 | "True"
173 | ]
174 | },
175 | "execution_count": 8,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "np.allclose(result, np_result)"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": []
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": []
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "# Answers\n",
203 | "\n",
204 | "## Q1: Matrix-Matrix multiplication\n",
205 | "## Q2: if m = rows of X, n = cols of Y and p = rows of Y --> O($mnp$) "
206 | ]
207 | }
208 | ],
209 | "metadata": {
210 | "kernelspec": {
211 | "display_name": "Python [conda root]",
212 | "language": "python",
213 | "name": "conda-root-py"
214 | },
215 | "language_info": {
216 | "codemirror_mode": {
217 | "name": "ipython",
218 | "version": 3
219 | },
220 | "file_extension": ".py",
221 | "mimetype": "text/x-python",
222 | "name": "python",
223 | "nbconvert_exporter": "python",
224 | "pygments_lexer": "ipython3",
225 | "version": "3.5.4"
226 | }
227 | },
228 | "nbformat": 4,
229 | "nbformat_minor": 2
230 | }
231 |
--------------------------------------------------------------------------------
/notebooks/Python/Coding_Best_Practices/OOP_How_to_Write_a_Class.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Imports"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "from sklearn.linear_model import LinearRegression\n",
18 | "from sklearn.datasets import load_boston\n",
19 | "from sklearn.metrics import mean_squared_error, r2_score"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "## Classes"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "class MyLinearRegression:\n",
36 | " \n",
37 | " def __init__(self, fit_intercept=True):\n",
38 | " self.coef_ = None\n",
39 | " self.intercept_ = None\n",
40 | " self._fit_intercept = fit_intercept\n",
41 | "\n",
42 | " \n",
43 | " def fit(self, X, y):\n",
44 | " \"\"\"Fit model coefficients.\n",
45 | "\n",
46 | " Arguments:\n",
47 | " X: 1D or 2D numpy array \n",
48 | " y: 1D numpy array\n",
49 | " \"\"\"\n",
50 | " \n",
51 | " # check if X is 1D or 2D array\n",
52 | " if len(X.shape) == 1:\n",
53 | " X = X.reshape(-1,1)\n",
54 | " \n",
55 | " # add bias if fit_intercept\n",
56 | " if self._fit_intercept:\n",
57 | " X = np.c_[np.ones(X.shape[0]), X]\n",
58 | " \n",
59 | " # closed form solution\n",
60 | " xTx = np.dot(X.T, X)\n",
61 | " inverse_xTx = np.linalg.inv(xTx)\n",
62 | " xTy = np.dot(X.T, y)\n",
63 | " coef = np.dot(inverse_xTx, xTy)\n",
64 | " \n",
65 | " # set attributes\n",
66 | " if self._fit_intercept:\n",
67 | " self.intercept_ = coef[0]\n",
68 | " self.coef_ = coef[1:]\n",
69 | " else:\n",
70 | " self.intercept_ = 0\n",
71 | " self.coef_ = coef\n",
72 | " \n",
73 | " def predict(self, X):\n",
74 | " \"\"\"Output model prediction.\n",
75 | "\n",
76 | " Arguments:\n",
77 | " X: 1D or 2D numpy array \n",
78 | " \"\"\"\n",
79 | " # check if X is 1D or 2D array\n",
80 | " if len(X.shape) == 1:\n",
81 | " X = X.reshape(-1,1) \n",
82 | " return np.dot(X, self.coef_) + self.intercept_"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 3,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "class Metrics:\n",
92 | " \n",
93 | " def __init__(self, X, y, model):\n",
94 | " self.data = X\n",
95 | " self.target = y\n",
96 | " self.model = model\n",
97 | " # degrees of freedom population dep. variable variance\n",
98 | " self._dft = X.shape[0] - 1 \n",
99 | " # degrees of freedom population error variance\n",
100 | " self._dfe = X.shape[0] - X.shape[1] - 1 \n",
101 | " \n",
102 | " def sse(self):\n",
103 | " '''returns sum of squared errors (model vs actual)'''\n",
104 | " squared_errors = (self.target - self.model.predict(self.data)) ** 2\n",
105 | " self.sq_error_ = np.sum(squared_errors)\n",
106 | " return self.sq_error_\n",
107 | " \n",
108 | " def sst(self):\n",
109 | " '''returns total sum of squared errors (actual vs avg(actual))'''\n",
110 | " avg_y = np.mean(self.target)\n",
111 | " squared_errors = (self.target - avg_y) ** 2\n",
112 | " self.sst_ = np.sum(squared_errors)\n",
113 | " return self.sst_\n",
114 | " \n",
115 | " def r_squared(self):\n",
116 | " '''returns calculated value of r^2'''\n",
117 | " self.r_sq_ = 1 - self.sse()/self.sst()\n",
118 | " return self.r_sq_\n",
119 | " \n",
120 | " def adj_r_squared(self):\n",
121 | " '''returns calculated value of adjusted r^2'''\n",
122 | " self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n",
123 | " return self.adj_r_sq_\n",
124 | " \n",
125 | " def mse(self):\n",
126 | " '''returns calculated value of mse'''\n",
127 | " self.mse_ = np.mean( (self.model.predict(self.data) - self.target) ** 2 )\n",
128 | " return self.mse_\n",
129 | " \n",
130 | " def pretty_print_stats(self):\n",
131 | " '''returns report of statistics for a given model object'''\n",
132 | " items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n",
133 | " ('mse:', self.mse()), ('r^2:', self.r_squared()), \n",
134 | " ('adj_r^2:', self.adj_r_squared()))\n",
135 | " for item in items:\n",
136 | " print('{0:8} {1:.4f}'.format(item[0], item[1]))"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "## Data"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 4,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "boston = load_boston()\n",
153 | "data = boston.data\n",
154 | "target = boston.target"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "## Model"
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 5,
167 | "metadata": {},
168 | "outputs": [
169 | {
170 | "data": {
171 | "text/plain": [
172 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
173 | ]
174 | },
175 | "execution_count": 5,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "# sklearn\n",
182 | "lr = LinearRegression()\n",
183 | "lr.fit(data, target)"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 6,
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "# from scratch\n",
193 | "mlr = MyLinearRegression()\n",
194 | "mlr.fit(data, target)"
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "## Sklearn Stats"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 7,
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "name": "stdout",
211 | "output_type": "stream",
212 | "text": [
213 | "mse: 21.8978\n",
214 | "r^2 0.7406\n"
215 | ]
216 | }
217 | ],
218 | "source": [
219 | "print('mse:', round(mean_squared_error(target, lr.predict(data)), 4))\n",
220 | "print('r^2', round(r2_score(target, lr.predict(data)), 4))"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "## Metrics Stats"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 8,
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "name": "stdout",
237 | "output_type": "stream",
238 | "text": [
239 | "sse: 11080.2763\n",
240 | "sst: 42716.2954\n",
241 | "mse: 21.8978\n",
242 | "r^2: 0.7406\n",
243 | "adj_r^2: 0.7338\n"
244 | ]
245 | }
246 | ],
247 | "source": [
248 | "metrics = Metrics(data, target, mlr)\n",
249 | "metrics.pretty_print_stats()"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {},
255 | "source": [
256 | "---"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {},
262 | "source": [
263 | "## Right now we have two separate classes. We'd like to integrate them but how do we do that? \n",
264 | "\n",
265 | "## -- INHERITANCE!"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 9,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "class ModifiedMetrics:\n",
275 | " \n",
276 | " def sse(self):\n",
277 | " '''returns sum of squared errors (model vs actual)'''\n",
278 | " squared_errors = (self.target - self.predict(self.data)) ** 2\n",
279 | " self.sq_error_ = np.sum(squared_errors)\n",
280 | " return self.sq_error_\n",
281 | " \n",
282 | " def sst(self):\n",
283 | " '''returns total sum of squared errors (actual vs avg(actual))'''\n",
284 | " avg_y = np.mean(self.target)\n",
285 | " squared_errors = (self.target - avg_y) ** 2\n",
286 | " self.sst_ = np.sum(squared_errors)\n",
287 | " return self.sst_\n",
288 | " \n",
289 | " def r_squared(self):\n",
290 | " '''returns calculated value of r^2'''\n",
291 | " self.r_sq_ = 1 - self.sse()/self.sst()\n",
292 | " return self.r_sq_\n",
293 | " \n",
294 | " def adj_r_squared(self):\n",
295 | " '''returns calculated value of adjusted r^2'''\n",
296 | " self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n",
297 | " return self.adj_r_sq_\n",
298 | " \n",
299 | " def mse(self):\n",
300 | " '''returns calculated value of mse'''\n",
301 | " self.mse_ = np.mean( (self.predict(self.data) - self.target) ** 2 )\n",
302 | " return self.mse_\n",
303 | " \n",
304 | " def pretty_print_stats(self):\n",
305 | " '''returns report of statistics for a given model object'''\n",
306 | " items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n",
307 | " ('mse:', self.mse()), ('r^2:', self.r_squared()), \n",
308 | " ('adj_r^2:', self.adj_r_squared()))\n",
309 | " for item in items:\n",
310 | " print('{0:8} {1:.4f}'.format(item[0], item[1]))"
311 | ]
312 | },
313 | {
314 | "cell_type": "code",
315 | "execution_count": 10,
316 | "metadata": {},
317 | "outputs": [],
318 | "source": [
319 | "class MyLinearRegressionWithInheritance(ModifiedMetrics):\n",
320 | " \n",
321 | " \n",
322 | " def __init__(self, fit_intercept=True):\n",
323 | " self.coef_ = None\n",
324 | " self.intercept_ = None\n",
325 | " self._fit_intercept = fit_intercept\n",
326 | " \n",
327 | " \n",
328 | " def fit(self, X, y):\n",
329 | " \"\"\"\n",
330 | " Fit model coefficients.\n",
331 | "\n",
332 | " Arguments:\n",
333 | " X: 1D or 2D numpy array \n",
334 | " y: 1D numpy array\n",
335 | " \"\"\"\n",
336 | " \n",
337 | " # training data & ground truth data\n",
338 | " self.data = X\n",
339 | " self.target = y\n",
340 | " \n",
341 | " # degrees of freedom population dep. variable variance \n",
342 | " self._dft = X.shape[0] - 1 \n",
343 | " # degrees of freedom population error variance\n",
344 | " self._dfe = X.shape[0] - X.shape[1] - 1\n",
345 | " \n",
346 | " # check if X is 1D or 2D array\n",
347 | " if len(X.shape) == 1:\n",
348 | " X = X.reshape(-1,1)\n",
349 | " \n",
350 | " # add bias if fit_intercept\n",
351 | " if self._fit_intercept:\n",
352 | " X = np.c_[np.ones(X.shape[0]), X]\n",
353 | " \n",
354 | " # closed form solution\n",
355 | " xTx = np.dot(X.T, X)\n",
356 | " inverse_xTx = np.linalg.inv(xTx)\n",
357 | " xTy = np.dot(X.T, y)\n",
358 | " coef = np.dot(inverse_xTx, xTy)\n",
359 | " \n",
360 | " # set attributes\n",
361 | " if self._fit_intercept:\n",
362 | " self.intercept_ = coef[0]\n",
363 | " self.coef_ = coef[1:]\n",
364 | " else:\n",
365 | " self.intercept_ = 0\n",
366 | " self.coef_ = coef\n",
367 | " \n",
368 | " def predict(self, X):\n",
369 | " \"\"\"Output model prediction.\n",
370 | "\n",
371 | " Arguments:\n",
372 | " X: 1D or 2D numpy array \n",
373 | " \"\"\"\n",
374 | " # check if X is 1D or 2D array\n",
375 | " if len(X.shape) == 1:\n",
376 | " X = X.reshape(-1,1) \n",
377 | " return np.dot(X, self.coef_) + self.intercept_"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 11,
383 | "metadata": {},
384 | "outputs": [],
385 | "source": [
386 | "# from scratch w/inheritance\n",
387 | "mlri = MyLinearRegressionWithInheritance()\n",
388 | "mlri.fit(data, target)"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 12,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "name": "stdout",
398 | "output_type": "stream",
399 | "text": [
400 | "sse: 11080.2763\n",
401 | "sst: 42716.2954\n",
402 | "mse: 21.8978\n",
403 | "r^2: 0.7406\n",
404 | "adj_r^2: 0.7338\n"
405 | ]
406 | }
407 | ],
408 | "source": [
409 | "mlri.pretty_print_stats()"
410 | ]
411 | }
412 | ],
413 | "metadata": {
414 | "kernelspec": {
415 | "display_name": "Python [conda root]",
416 | "language": "python",
417 | "name": "conda-root-py"
418 | },
419 | "language_info": {
420 | "codemirror_mode": {
421 | "name": "ipython",
422 | "version": 3
423 | },
424 | "file_extension": ".py",
425 | "mimetype": "text/x-python",
426 | "name": "python",
427 | "nbconvert_exporter": "python",
428 | "pygments_lexer": "ipython3",
429 | "version": "3.5.4"
430 | }
431 | },
432 | "nbformat": 4,
433 | "nbformat_minor": 2
434 | }
435 |
--------------------------------------------------------------------------------
/notebooks/Python/NumPy/NP_argwhere_isin.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "np.argwhere(X)"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "np.isin(X, 1)"
19 | ]
20 | }
21 | ],
22 | "metadata": {
23 | "kernelspec": {
24 | "display_name": "Python [conda root]",
25 | "language": "python",
26 | "name": "conda-root-py"
27 | },
28 | "language_info": {
29 | "codemirror_mode": {
30 | "name": "ipython",
31 | "version": 3
32 | },
33 | "file_extension": ".py",
34 | "mimetype": "text/x-python",
35 | "name": "python",
36 | "nbconvert_exporter": "python",
37 | "pygments_lexer": "ipython3",
38 | "version": "3.5.4"
39 | }
40 | },
41 | "nbformat": 4,
42 | "nbformat_minor": 2
43 | }
44 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Alphabet_Magnets.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Sal's classroom has a bag of alphabet magnets. She wants to know if she can spell her friend's name using the letters in the bag. \n",
8 | "\n",
9 | "### Write a function called *CanYouSpell* that will take a list of letters and a name and print out yes if the name can be spelled and no otherwise.\n",
10 | "\n",
11 | "> CanYouSpell(['y','n','p','g','n','l'],\"lynn\") would print YES \n",
12 | "> CanYouSpell(['y','n','p','g','l'],\"lynn\") would print NO"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "metadata": {
19 | "collapsed": true
20 | },
21 | "outputs": [],
22 | "source": [
23 | "def CanYouSpell(list_of_letters, name):\n",
24 | " '''takes list of letters and a name, then returns Boolean showing if name can be spelled'''\n",
25 | " \n",
26 | " # type checking\n",
27 | " assert type(list_of_letters) == list, 'list_of_letters input must be a list'\n",
28 | " assert type(name) == str, 'name input must be a string'\n",
29 | " \n",
30 | " # import\n",
31 | " from collections import defaultdict\n",
32 | " \n",
33 | " # defaultdict objects\n",
34 | " defdict1 = defaultdict(int)\n",
35 | " defdict2 = defaultdict(int)\n",
36 | " \n",
37 | " # main logic\n",
38 | " if len(list_of_letters) < len(name):\n",
39 | " return False\n",
40 | " else:\n",
41 | " # count letter occurrences in list_of_letters\n",
42 | " for letter in list_of_letters:\n",
43 | " defdict1[letter] += 1\n",
44 | " # count letter occurrences in name\n",
45 | " for letter in name:\n",
46 | " defdict2[letter] += 1\n",
47 | " # check if possible to spell name\n",
48 | " for k, _ in defdict2.items():\n",
49 | " if defdict1[k] < defdict2[k]:\n",
50 | " return False \n",
51 | " return True"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "---"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 2,
64 | "metadata": {},
65 | "outputs": [
66 | {
67 | "data": {
68 | "text/plain": [
69 | "True"
70 | ]
71 | },
72 | "execution_count": 2,
73 | "metadata": {},
74 | "output_type": "execute_result"
75 | }
76 | ],
77 | "source": [
78 | "# example 1\n",
79 | "CanYouSpell(['y','n','p','g','n','l'],\"lynn\")"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "data": {
89 | "text/plain": [
90 | "False"
91 | ]
92 | },
93 | "execution_count": 3,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "# example 2\n",
100 | "CanYouSpell(['y','n','p','g','l'],\"lynn\")"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 4,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/plain": [
111 | "False"
112 | ]
113 | },
114 | "execution_count": 4,
115 | "metadata": {},
116 | "output_type": "execute_result"
117 | }
118 | ],
119 | "source": [
120 | "# example: too few letters\n",
121 | "CanYouSpell(['y','n','p','g'],\"lynn\")"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 5,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "data": {
131 | "text/plain": [
132 | "True"
133 | ]
134 | },
135 | "execution_count": 5,
136 | "metadata": {},
137 | "output_type": "execute_result"
138 | }
139 | ],
140 | "source": [
141 | "# fun example 1\n",
142 | "CanYouSpell(list('oranges and apples'), 'apple')"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 6,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": [
153 | "False"
154 | ]
155 | },
156 | "execution_count": 6,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "# fun example 2\n",
163 | "CanYouSpell(list('oranges and apps'), 'apple')"
164 | ]
165 | }
166 | ],
167 | "metadata": {
168 | "kernelspec": {
169 | "display_name": "Python [conda root]",
170 | "language": "python",
171 | "name": "conda-root-py"
172 | },
173 | "language_info": {
174 | "codemirror_mode": {
175 | "name": "ipython",
176 | "version": 3
177 | },
178 | "file_extension": ".py",
179 | "mimetype": "text/x-python",
180 | "name": "python",
181 | "nbconvert_exporter": "python",
182 | "pygments_lexer": "ipython3",
183 | "version": "3.5.4"
184 | }
185 | },
186 | "nbformat": 4,
187 | "nbformat_minor": 2
188 | }
189 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Bubble_Sort.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Bubble Sort\n",
17 | "Bubble sort is described in detail on [Wikipedia](https://en.wikipedia.org/wiki/Bubble_sort).\n",
18 | "\n",
19 | "Its complexity is $O(n^2)$."
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "def bubble_sort(array, verbose=False):\n",
29 | " '''The bubble sort algorithm.\n",
30 | " \n",
31 | " INPUT: array or list\n",
32 | " OUTPUT: sorted array or list\n",
33 | " '''\n",
34 | " \n",
35 | " # setup\n",
36 | " unsorted = True\n",
37 | " list_length = len(array) - 1\n",
38 | " \n",
39 | " # main logic\n",
40 | " while unsorted:\n",
41 | " changes = 0\n",
42 | " for i in range(list_length):\n",
43 | " if mylist[i] > mylist[i+1]:\n",
44 | " mylist[i], mylist[i+1] = mylist[i+1], mylist[i]\n",
45 | " changes += 1\n",
46 | " if not changes:\n",
47 | " unsorted = False\n",
48 | " if verbose:\n",
49 | " print(mylist)"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [
57 | {
58 | "data": {
59 | "text/plain": [
60 | "array([39, 40, 35, 46, 11, 33, 23, 14, 38, 32, 1, 10, 16, 34, 26, 22, 23,\n",
61 | " 10, 12, 11, 10, 0, 6, 10, 31])"
62 | ]
63 | },
64 | "execution_count": 3,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "mylist = np.random.randint(0, 50, 25)\n",
71 | "mylist"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | "[39 35 40 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 46]\n",
84 | "[35 39 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 40 46]\n",
85 | "[35 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 39 40 46]\n",
86 | "[11 33 23 14 35 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 38 39 40 46]\n",
87 | "[11 23 14 33 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 35 38 39 40 46]\n",
88 | "[11 14 23 32 1 10 16 33 26 22 23 10 12 11 10 0 6 10 31 34 35 38 39 40 46]\n",
89 | "[11 14 23 1 10 16 32 26 22 23 10 12 11 10 0 6 10 31 33 34 35 38 39 40 46]\n",
90 | "[11 14 1 10 16 23 26 22 23 10 12 11 10 0 6 10 31 32 33 34 35 38 39 40 46]\n",
91 | "[11 1 10 14 16 23 22 23 10 12 11 10 0 6 10 26 31 32 33 34 35 38 39 40 46]\n",
92 | "[ 1 10 11 14 16 22 23 10 12 11 10 0 6 10 23 26 31 32 33 34 35 38 39 40 46]\n",
93 | "[ 1 10 11 14 16 22 10 12 11 10 0 6 10 23 23 26 31 32 33 34 35 38 39 40 46]\n",
94 | "[ 1 10 11 14 16 10 12 11 10 0 6 10 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
95 | "[ 1 10 11 14 10 12 11 10 0 6 10 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
96 | "[ 1 10 11 10 12 11 10 0 6 10 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
97 | "[ 1 10 10 11 11 10 0 6 10 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
98 | "[ 1 10 10 11 10 0 6 10 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
99 | "[ 1 10 10 10 0 6 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
100 | "[ 1 10 10 0 6 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
101 | "[ 1 10 0 6 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
102 | "[ 1 0 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
103 | "[ 0 1 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
104 | "[ 0 1 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n"
105 | ]
106 | }
107 | ],
108 | "source": [
109 | "bubble_sort(mylist, verbose=True)"
110 | ]
111 | }
112 | ],
113 | "metadata": {
114 | "kernelspec": {
115 | "display_name": "Python [conda root]",
116 | "language": "python",
117 | "name": "conda-root-py"
118 | },
119 | "language_info": {
120 | "codemirror_mode": {
121 | "name": "ipython",
122 | "version": 3
123 | },
124 | "file_extension": ".py",
125 | "mimetype": "text/x-python",
126 | "name": "python",
127 | "nbconvert_exporter": "python",
128 | "pygments_lexer": "ipython3",
129 | "version": "3.5.4"
130 | }
131 | },
132 | "nbformat": 4,
133 | "nbformat_minor": 2
134 | }
135 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Cryptogram_Puzzle.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Introduction\n",
8 | "\n",
9 | "Objective #1: Find a solution for this problem:\n",
10 | "\n",
11 | "$\\frac{A}{BC} + \\frac{D}{EF} + \\frac{G}{HI} = 1$\n",
12 | "\n",
13 | "Objective #2: Enumerate all solutions to the problem."
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {},
19 | "source": [
20 | "## Setup"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 1,
26 | "metadata": {
27 | "collapsed": true
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import numpy as np"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 4,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/plain": [
42 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])"
43 | ]
44 | },
45 | "execution_count": 4,
46 | "metadata": {},
47 | "output_type": "execute_result"
48 | }
49 | ],
50 | "source": [
51 | "digits = np.arange(1,10)\n",
52 | "digits"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 7,
58 | "metadata": {
59 | "collapsed": true
60 | },
61 | "outputs": [],
62 | "source": [
63 | "def checker(arr):\n",
64 | " return arr[0]/(arr[1]*arr[2]) + arr[3]/(arr[4]*arr[5]) + arr[6]/(arr[7]*arr[8])"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 27,
70 | "metadata": {
71 | "collapsed": true
72 | },
73 | "outputs": [],
74 | "source": [
75 | "def pretty_print(count, permute):\n",
76 | " print('A={} B={} C={} D={} E={} F={} G={} H={} I={}'.format(permute[0], permute[1],\n",
77 | " permute[2], permute[3],\n",
78 | " permute[4], permute[5], \n",
79 | " permute[6], permute[7],\n",
80 | " permute[8]))\n",
81 | " print('\\nSearches:', count)"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "## Objective #1: find a solution"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 12,
94 | "metadata": {
95 | "collapsed": true
96 | },
97 | "outputs": [],
98 | "source": [
99 | "def naive_search():\n",
100 | " '''random search using permutations - returns number of searches and solution.'''\n",
101 | " import numpy as np\n",
102 | " digits = np.arange(1,10)\n",
103 | " count = 0\n",
104 | " permute = np.random.permutation(digits)\n",
105 | " while checker(permute) != 1:\n",
106 | " count += 1\n",
107 | " permute = np.random.permutation(digits)\n",
108 | " return count, permute"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 21,
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "name": "stdout",
118 | "output_type": "stream",
119 | "text": [
120 | "9455\n",
121 | "[7 4 2 1 3 6 5 8 9]\n"
122 | ]
123 | }
124 | ],
125 | "source": [
126 | "searches, array = naive_search()\n",
127 | "print(searches)\n",
128 | "print(array)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 28,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "A=7 B=4 C=2 D=1 E=3 F=6 G=5 H=8 I=9\n",
141 | "\n",
142 | "Searches: 9455\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "pretty_print(searches, array)"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "## Monte Carlo Expectation"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": 61,
160 | "metadata": {
161 | "collapsed": true
162 | },
163 | "outputs": [],
164 | "source": [
165 | "count = []\n",
166 | "solution = []\n",
167 | "for _ in range(int(1e3)):\n",
168 | " searches, array = naive_search()\n",
169 | " count.append(searches) \n",
170 | " solution.append(array)"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 62,
176 | "metadata": {},
177 | "outputs": [
178 | {
179 | "data": {
180 | "text/plain": [
181 | "7434.53"
182 | ]
183 | },
184 | "execution_count": 62,
185 | "metadata": {},
186 | "output_type": "execute_result"
187 | }
188 | ],
189 | "source": [
190 | "sum(count)/len(count)"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "## Next Steps\n",
198 | "\n",
199 | "Can you find a brute force approach?\n",
200 | "\n",
201 | "Can you find a faster method?"
202 | ]
203 | }
204 | ],
205 | "metadata": {
206 | "kernelspec": {
207 | "display_name": "Python [conda root]",
208 | "language": "python",
209 | "name": "conda-root-py"
210 | },
211 | "language_info": {
212 | "codemirror_mode": {
213 | "name": "ipython",
214 | "version": 3
215 | },
216 | "file_extension": ".py",
217 | "mimetype": "text/x-python",
218 | "name": "python",
219 | "nbconvert_exporter": "python",
220 | "pygments_lexer": "ipython3",
221 | "version": "3.5.4"
222 | }
223 | },
224 | "nbformat": 4,
225 | "nbformat_minor": 2
226 | }
227 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Dot_Product.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "def find_w(a, b):\n",
23 | " '''function that returns solution to a*w=b, if it exists'''\n",
24 | " # check if 1D or 2D array\n",
25 | " if len(a.shape) != 2:\n",
26 | " # reshape if 1D array, else throws error\n",
27 | " a = a.reshape(-1,1)\n",
28 | " try:\n",
29 | " return np.linalg.solve(a,b)\n",
30 | " except:\n",
31 | " print('Warning: Singular Matrix') ## case where a=0 and b!=0\n",
32 | " else:\n",
33 | " # num rows < num columns = infinite solutions\n",
34 | " if a.shape[0] < a.shape[1]:\n",
35 | " print('Underdetermined System: infinite or no solutions')\n",
36 | " else:\n",
37 | " try:\n",
38 | " return np.linalg.solve(a,b)\n",
39 | " except:\n",
40 | " print('0 solutions')"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "### Example Problem"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "name": "stdout",
57 | "output_type": "stream",
58 | "text": [
59 | "[[5 3 7]\n",
60 | " [2 4 1]]\n",
61 | "\n",
62 | "[537 241]\n"
63 | ]
64 | }
65 | ],
66 | "source": [
67 | "X = np.array([5,3,7,2,4,1]).reshape(2,3)\n",
68 | "y = np.array([537, 241])\n",
69 | "print(X)\n",
70 | "print()\n",
71 | "print(y)"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | "Underdetermined System: infinite or no solutions\n"
84 | ]
85 | }
86 | ],
87 | "source": [
88 | "find_w(X,y)"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "** *This is an underdetermined systems as there are 2 equations and 3 variables. Therefore, there exist an infinite number of solutions.* **"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "### Example A"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 5,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "data": {
112 | "text/plain": [
113 | "array([ 4.])"
114 | ]
115 | },
116 | "execution_count": 5,
117 | "metadata": {},
118 | "output_type": "execute_result"
119 | }
120 | ],
121 | "source": [
122 | "X = np.array([2])\n",
123 | "y = np.array([8])\n",
124 | "find_w(X,y)"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "### Example B"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 6,
137 | "metadata": {},
138 | "outputs": [
139 | {
140 | "name": "stdout",
141 | "output_type": "stream",
142 | "text": [
143 | "Warning: Singular Matrix\n"
144 | ]
145 | }
146 | ],
147 | "source": [
148 | "X = np.array([0])\n",
149 | "y = np.array([8])\n",
150 | "find_w(X,y)"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "** *No solution. Zero times anything is always zero!* **"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {},
163 | "source": [
164 | "### Example C"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 7,
170 | "metadata": {},
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | "Warning: Singular Matrix\n"
177 | ]
178 | }
179 | ],
180 | "source": [
181 | "X = np.array([2,4])\n",
182 | "y = np.array([8])\n",
183 | "find_w(X,y)"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "np.linalg.solve expects square matrix so disregard message. Infinite solutions for this one. Underdetermined."
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "### Example D"
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": 8,
203 | "metadata": {},
204 | "outputs": [
205 | {
206 | "data": {
207 | "text/plain": [
208 | "array([-2., 3.])"
209 | ]
210 | },
211 | "execution_count": 8,
212 | "metadata": {},
213 | "output_type": "execute_result"
214 | }
215 | ],
216 | "source": [
217 | "X = np.array([2,4,0,1]).reshape(2,2)\n",
218 | "y = np.array([8,3])\n",
219 | "find_w(X,y)"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "metadata": {},
225 | "source": [
226 | "### Example E"
227 | ]
228 | },
229 | {
230 | "cell_type": "code",
231 | "execution_count": 9,
232 | "metadata": {},
233 | "outputs": [
234 | {
235 | "name": "stdout",
236 | "output_type": "stream",
237 | "text": [
238 | "0 solutions\n"
239 | ]
240 | }
241 | ],
242 | "source": [
243 | "X = np.array([2,4,0,1,9,5]).reshape(3,2)\n",
244 | "y = np.array([8,3,1])\n",
245 | "find_w(X,y)"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "### Example F"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 10,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/plain": [
263 | "array([ 2., 0.])"
264 | ]
265 | },
266 | "execution_count": 10,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "X = np.array([2,2,3,3]).reshape(2,2)\n",
273 | "y = np.array([4,6])\n",
274 | "find_w(X,y)"
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {},
280 | "source": [
281 | "Actually infinite solutions because same equation twice, just with different coefficients. Really have 1 unique equation with 2 unknowns. Underdetermined. Need to catch this edge case in code."
282 | ]
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {},
287 | "source": [
288 | "### Example G"
289 | ]
290 | },
291 | {
292 | "cell_type": "markdown",
293 | "metadata": {},
294 | "source": [
295 | "Can't be done. Can't multiply text * numbers."
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {},
301 | "source": [
302 | "### Example H"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 11,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "array([ 8., 6.])"
314 | ]
315 | },
316 | "execution_count": 11,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "X = np.array([1,0,0,1]).reshape(2,2)\n",
323 | "y = np.array([8,6])\n",
324 | "find_w(X,y)"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "### Example I"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 12,
337 | "metadata": {},
338 | "outputs": [
339 | {
340 | "name": "stdout",
341 | "output_type": "stream",
342 | "text": [
343 | "Underdetermined System: infinite or no solutions\n"
344 | ]
345 | }
346 | ],
347 | "source": [
348 | "X = np.array([1,1,0,1,0,1]).reshape(2,3)\n",
349 | "y = np.array([8,6])\n",
350 | "find_w(X,y)"
351 | ]
352 | },
353 | {
354 | "cell_type": "markdown",
355 | "metadata": {},
356 | "source": [
357 | "### Example J"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": 13,
363 | "metadata": {},
364 | "outputs": [
365 | {
366 | "data": {
367 | "text/plain": [
368 | "array([ 8., -2.])"
369 | ]
370 | },
371 | "execution_count": 13,
372 | "metadata": {},
373 | "output_type": "execute_result"
374 | }
375 | ],
376 | "source": [
377 | "X = np.array([1,0,1,1]).reshape(2,2)\n",
378 | "y = np.array([8,6])\n",
379 | "find_w(X,y)"
380 | ]
381 | },
382 | {
383 | "cell_type": "markdown",
384 | "metadata": {},
385 | "source": [
386 | "### NOTES\n",
387 | "\n",
388 | "An **underdetermined** system is one in which there are fewer equations than unknowns. The result is either 0 or infinite solutions. There are 0 solutions when equations are inconsistent. For example, x+y=1 and x+y=2. That can't possibly be because the same equation gives different outputs. There are infinite solutions otherwise.\n",
389 | "\n",
390 | "An **overdetermined** system is one in which there are more equations than unknowns. Be wary of inconsistent equations (x+y=1 and x+y=2) that will cause this system to have 0 solutions. Otherise a unique solution exists.\n",
391 | "\n",
392 | "**Takeaway:** *so long as the number of unique equations is at least the same as the number of variables AND there are no inconsistencies, a unique solution exists.*"
393 | ]
394 | }
395 | ],
396 | "metadata": {
397 | "kernelspec": {
398 | "display_name": "Python [conda root]",
399 | "language": "python",
400 | "name": "conda-root-py"
401 | },
402 | "language_info": {
403 | "codemirror_mode": {
404 | "name": "ipython",
405 | "version": 3
406 | },
407 | "file_extension": ".py",
408 | "mimetype": "text/x-python",
409 | "name": "python",
410 | "nbconvert_exporter": "python",
411 | "pygments_lexer": "ipython3",
412 | "version": "3.5.4"
413 | }
414 | },
415 | "nbformat": 4,
416 | "nbformat_minor": 2
417 | }
418 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Factorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Introduction"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Factorial(n) written as *n!* is the product of all the numbers up to *n*. \n",
15 | "\n",
16 | "8! = 8 x 7 x 6 x 5 x 4 x 3 x 2 x 1 = 40320"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "## Instructions"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "Write a function called **factorial** to calculate the factorial of a given number."
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "## Solutions"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 1,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "import numpy as np"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "#### No Recursion"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 11,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "def factorial(n):\n",
63 | " '''returns factorial of number n w/out recursion'''\n",
64 | " \n",
65 | " # error checking\n",
66 | " assert n >= 0, 'n must be a nonnegative integer'\n",
67 | " assert type(n) == int, 'n must be a nonnegative integer'\n",
68 | " \n",
69 | " out = 1\n",
70 | " for value in range(1, n+1):\n",
71 | " out *= value\n",
72 | " return out"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 58,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "n: 0 | result: 1\n",
85 | "n: 1 | result: 1\n",
86 | "n: 2 | result: 2\n",
87 | "n: 3 | result: 6\n",
88 | "n: 4 | result: 24\n",
89 | "n: 5 | result: 120\n",
90 | "n: 6 | result: 720\n",
91 | "n: 7 | result: 5040\n",
92 | "n: 8 | result: 40320\n",
93 | "n: 9 | result: 362880\n"
94 | ]
95 | }
96 | ],
97 | "source": [
98 | "for i in range(10):\n",
99 | " print('n: {} | result: {}'.format(i, factorial(i)))"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "metadata": {},
105 | "source": [
106 | "#### Recursion"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 47,
112 | "metadata": {},
113 | "outputs": [],
114 | "source": [
115 | "def recursive_factorial(n):\n",
116 | " '''returns factorial of number n w/recursion'''\n",
117 | " \n",
118 | " # error checking\n",
119 | " assert n >= 0, 'n must be a nonnegative integer'\n",
120 | " assert type(n) == int, 'n must be a nonnegative integer'\n",
121 | " \n",
122 | " # main logic\n",
123 | " if n == 0: \n",
124 | " return 1\n",
125 | " else:\n",
126 | " output = n * recursive_factorial(n - 1) \n",
127 | " return output"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 57,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "name": "stdout",
137 | "output_type": "stream",
138 | "text": [
139 | "n: 0 | result: 1\n",
140 | "n: 1 | result: 1\n",
141 | "n: 2 | result: 2\n",
142 | "n: 3 | result: 6\n",
143 | "n: 4 | result: 24\n",
144 | "n: 5 | result: 120\n",
145 | "n: 6 | result: 720\n",
146 | "n: 7 | result: 5040\n",
147 | "n: 8 | result: 40320\n",
148 | "n: 9 | result: 362880\n"
149 | ]
150 | }
151 | ],
152 | "source": [
153 | "for i in range(10):\n",
154 | " print('n: {} | result: {}'.format(i, recursive_factorial(i)))"
155 | ]
156 | }
157 | ],
158 | "metadata": {
159 | "kernelspec": {
160 | "display_name": "Python [conda root]",
161 | "language": "python",
162 | "name": "conda-root-py"
163 | },
164 | "language_info": {
165 | "codemirror_mode": {
166 | "name": "ipython",
167 | "version": 3
168 | },
169 | "file_extension": ".py",
170 | "mimetype": "text/x-python",
171 | "name": "python",
172 | "nbconvert_exporter": "python",
173 | "pygments_lexer": "ipython3",
174 | "version": "3.5.4"
175 | }
176 | },
177 | "nbformat": 4,
178 | "nbformat_minor": 2
179 | }
180 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Fibonacci.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Introduction"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "From Wikipedia:\n",
15 | "\n",
16 | ">In mathematics, the Fibonacci numbers are the numbers in the following integer sequence, called the Fibonacci sequence, and characterized by the fact that every number after the first two is the sum of the two preceding ones:\n",
17 | "\n",
18 | ">0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, ...\n",
19 | "\n",
20 | "\n",
21 | ">By definition, the first two numbers in the Fibonacci sequence are either 1 and 1, or 0 and 1, depending on the chosen starting point of the sequence, and each subsequent number is the sum of the previous two.\n",
22 | "\n",
23 | ">The sequence Fn of Fibonacci numbers is defined by the recurrence relation:\n",
24 | "\n",
25 | ">**$F_{n}$ = $F_{n-1}$ + $F_{n-2}$**"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "## Instructions"
33 | ]
34 | },
35 | {
36 | "cell_type": "markdown",
37 | "metadata": {},
38 | "source": [
39 | "Write a function called **fibonacci** to calculate the fibonnaci sequence of a given starting number."
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "## Solution"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 1,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "import numpy as np"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "#### Recursion"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 2,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "def fibonacci(n):\n",
72 | " '''with recursion'''\n",
73 | " \n",
74 | " # error checking\n",
75 | " assert n >= 0, 'n must be a nonnegative integer'\n",
76 | " assert type(n) == int, 'n must be a nonnegative integer'\n",
77 | " \n",
78 | " # main logic\n",
79 | " if n == 0: \n",
80 | " return 0\n",
81 | " elif n == 1: \n",
82 | " return 1\n",
83 | " else: \n",
84 | " return fibonacci(n-1) + fibonacci(n-2)"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 3,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "n: 0 | result: 0\n",
97 | "n: 1 | result: 1\n",
98 | "n: 2 | result: 1\n",
99 | "n: 3 | result: 2\n",
100 | "n: 4 | result: 3\n",
101 | "n: 5 | result: 5\n",
102 | "n: 6 | result: 8\n",
103 | "n: 7 | result: 13\n",
104 | "n: 8 | result: 21\n",
105 | "n: 9 | result: 34\n"
106 | ]
107 | }
108 | ],
109 | "source": [
110 | "for i in range(10):\n",
111 | " print('n: {} | result: {}'.format(i, fibonacci(i)))"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "## What's going on here?"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | ""
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "## A little more detail..."
133 | ]
134 | },
135 | {
136 | "cell_type": "markdown",
137 | "metadata": {},
138 | "source": [
139 | ""
140 | ]
141 | }
142 | ],
143 | "metadata": {
144 | "kernelspec": {
145 | "display_name": "Python [conda root]",
146 | "language": "python",
147 | "name": "conda-root-py"
148 | },
149 | "language_info": {
150 | "codemirror_mode": {
151 | "name": "ipython",
152 | "version": 3
153 | },
154 | "file_extension": ".py",
155 | "mimetype": "text/x-python",
156 | "name": "python",
157 | "nbconvert_exporter": "python",
158 | "pygments_lexer": "ipython3",
159 | "version": "3.5.4"
160 | }
161 | },
162 | "nbformat": 4,
163 | "nbformat_minor": 2
164 | }
165 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/GCD_Problem.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "def naive_gcd(num1, num2):\n",
10 | " '''enumerate divisors and returns largest of intersection(num1, num2).'''\n",
11 | " \n",
12 | " assert num1 > 0, \"num1 must be positive.\"\n",
13 | " assert num2 > 0, \"num2 must be positive.\"\n",
14 | " \n",
15 | " divisors1 = []\n",
16 | " divisors2 = []\n",
17 | " \n",
18 | " count = 0\n",
19 | " \n",
20 | " for divisor in range(1, num1):\n",
21 | " count += 1 ## just to keep tally\n",
22 | " if num1 % divisor == 0:\n",
23 | " divisors1.append(divisor)\n",
24 | " \n",
25 | " for divisor in range(1, num2):\n",
26 | " count += 1 ## just to keep tally\n",
27 | " if num2 % divisor == 0:\n",
28 | " divisors2.append(divisor)\n",
29 | " \n",
30 | " gcd = max(set(divisors1) & set(divisors2))\n",
31 | " print(\"GCD: {}\\nCount: {}\".format(gcd, count))"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 2,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "name": "stdout",
41 | "output_type": "stream",
42 | "text": [
43 | "GCD: 4\n",
44 | "Count: 16098\n"
45 | ]
46 | }
47 | ],
48 | "source": [
49 | "naive_gcd(1236, 14864)"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 3,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "def gcd(num1, num2):\n",
59 | " '''returns the greatest common divisor'''\n",
60 | " \n",
61 | " assert num1 >= 0, \"num1 must be nonnegative.\"\n",
62 | " assert num2 >= 0, \"num2 must be nonnegative.\"\n",
63 | " \n",
64 | " count = 0\n",
65 | " \n",
66 | " while num1 != 0:\n",
67 | " count += 1 ## just to keep tally\n",
68 | " gcd = num1\n",
69 | " num1 = num2 % num1\n",
70 | " num2 = gcd\n",
71 | " print(\"GCD: {}\\nCount: {}\".format(gcd, count))"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "metadata": {},
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | "GCD: 4\n",
84 | "Count: 6\n"
85 | ]
86 | }
87 | ],
88 | "source": [
89 | "gcd(1236,14864)"
90 | ]
91 | },
92 | {
93 | "cell_type": "markdown",
94 | "metadata": {},
95 | "source": [
96 | "---"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 5,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "GCD: 1\n",
109 | "Count: 333101\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "naive_gcd(134526, 198577)"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 6,
120 | "metadata": {},
121 | "outputs": [
122 | {
123 | "name": "stdout",
124 | "output_type": "stream",
125 | "text": [
126 | "GCD: 1\n",
127 | "Count: 8\n"
128 | ]
129 | }
130 | ],
131 | "source": [
132 | "gcd(134526, 198577)"
133 | ]
134 | }
135 | ],
136 | "metadata": {
137 | "kernelspec": {
138 | "display_name": "Python [conda root]",
139 | "language": "python",
140 | "name": "conda-root-py"
141 | },
142 | "language_info": {
143 | "codemirror_mode": {
144 | "name": "ipython",
145 | "version": 3
146 | },
147 | "file_extension": ".py",
148 | "mimetype": "text/x-python",
149 | "name": "python",
150 | "nbconvert_exporter": "python",
151 | "pygments_lexer": "ipython3",
152 | "version": "3.5.4"
153 | }
154 | },
155 | "nbformat": 4,
156 | "nbformat_minor": 2
157 | }
158 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Moving_Average_Problem.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "#### Show Step-by-Step"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/plain": [
29 | "array([ 2, 4, 8, 16, 7, 99])"
30 | ]
31 | },
32 | "execution_count": 2,
33 | "metadata": {},
34 | "output_type": "execute_result"
35 | }
36 | ],
37 | "source": [
38 | "test = np.array([2,4,8,16,7,99])\n",
39 | "test"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 3,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "name": "stdout",
49 | "output_type": "stream",
50 | "text": [
51 | "2.0\n",
52 | "3.0\n",
53 | "4.66666666667\n",
54 | "7.5\n",
55 | "7.4\n",
56 | "22.6666666667\n"
57 | ]
58 | }
59 | ],
60 | "source": [
61 | "for i in range(len(test)):\n",
62 | " if i == 0:\n",
63 | " print(np.mean(test[0]))\n",
64 | " else:\n",
65 | " print(np.mean(test[:i+1]))"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "#### Function for Rolling Mean"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 4,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "def rolling_mean(array, verbose=False):\n",
84 | " '''computes the mean incrementally'''\n",
85 | " \n",
86 | " assert type(array) == np.ndarray, \"input must be of type np.ndarray\"\n",
87 | " \n",
88 | " mv_avg = 0\n",
89 | " num_elements = len(array)\n",
90 | " \n",
91 | " for i, elem in enumerate(array):\n",
92 | " mv_avg += (elem-mv_avg)/(i+1)\n",
93 | " if verbose:\n",
94 | " print(i+1, mv_avg)\n",
95 | " return mv_avg"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "#### Example"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 5,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "name": "stdout",
112 | "output_type": "stream",
113 | "text": [
114 | "1 2.0\n",
115 | "2 3.0\n",
116 | "3 4.66666666667\n",
117 | "4 7.5\n",
118 | "5 7.4\n",
119 | "6 22.6666666667\n"
120 | ]
121 | },
122 | {
123 | "data": {
124 | "text/plain": [
125 | "22.666666666666664"
126 | ]
127 | },
128 | "execution_count": 5,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "rolling_mean(test, verbose=True)"
135 | ]
136 | }
137 | ],
138 | "metadata": {
139 | "kernelspec": {
140 | "display_name": "Python [conda root]",
141 | "language": "python",
142 | "name": "conda-root-py"
143 | },
144 | "language_info": {
145 | "codemirror_mode": {
146 | "name": "ipython",
147 | "version": 3
148 | },
149 | "file_extension": ".py",
150 | "mimetype": "text/x-python",
151 | "name": "python",
152 | "nbconvert_exporter": "python",
153 | "pygments_lexer": "ipython3",
154 | "version": "3.5.4"
155 | }
156 | },
157 | "nbformat": 4,
158 | "nbformat_minor": 2
159 | }
160 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Project_Euler_Problem_8_with_Deque.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Objective"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This notebook shows one way to solve [Project Euler Problem #8](https://projecteuler.net/problem=8) using a great data structure called a deque."
15 | ]
16 | },
17 | {
18 | "cell_type": "markdown",
19 | "metadata": {},
20 | "source": [
21 | "## Imports"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "from collections import deque\n",
31 | "import numpy as np"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {},
37 | "source": [
38 | "## Data"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 2,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "num = '''73167176531330624919225119674426574742355349194934\n",
48 | "96983520312774506326239578318016984801869478851843\n",
49 | "85861560789112949495459501737958331952853208805511\n",
50 | "12540698747158523863050715693290963295227443043557\n",
51 | "66896648950445244523161731856403098711121722383113\n",
52 | "62229893423380308135336276614282806444486645238749\n",
53 | "30358907296290491560440772390713810515859307960866\n",
54 | "70172427121883998797908792274921901699720888093776\n",
55 | "65727333001053367881220235421809751254540594752243\n",
56 | "52584907711670556013604839586446706324415722155397\n",
57 | "53697817977846174064955149290862569321978468622482\n",
58 | "83972241375657056057490261407972968652414535100474\n",
59 | "82166370484403199890008895243450658541227588666881\n",
60 | "16427171479924442928230863465674813919123162824586\n",
61 | "17866458359124566529476545682848912883142607690042\n",
62 | "24219022671055626321111109370544217506941658960408\n",
63 | "07198403850962455444362981230987879927244284909188\n",
64 | "84580156166097919133875499200524063689912560717606\n",
65 | "05886116467109405077541002256983155200055935729725\n",
66 | "71636269561882670428252483600823257530420752963450'''"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "## Function"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 3,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "def max_prod(numString, numDigits=4):\n",
83 | " '''takes string of numbers and outputs greatest product exactly numDigits long'''\n",
84 | " \n",
85 | " numString = numString.replace('\\n','') ## remove newline characters\n",
86 | " dq = deque(maxlen=numDigits)\n",
87 | " max_prod = 0\n",
88 | " \n",
89 | " for num in numString:\n",
90 | " dq.append(num)\n",
91 | " if len(dq) == dq.maxlen:\n",
92 | " current = np.prod([int(val) for val in dq])\n",
93 | " if current > max_prod:\n",
94 | " max_prod = current\n",
95 | " return max_prod"
96 | ]
97 | },
98 | {
99 | "cell_type": "markdown",
100 | "metadata": {},
101 | "source": [
102 | "## Example #1"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 4,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "data": {
112 | "text/plain": [
113 | "5832"
114 | ]
115 | },
116 | "execution_count": 4,
117 | "metadata": {},
118 | "output_type": "execute_result"
119 | }
120 | ],
121 | "source": [
122 | "max_prod(num, numDigits=4)"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "## Example #2"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 5,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "23514624000"
141 | ]
142 | },
143 | "execution_count": 5,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "max_prod(num, numDigits=13)"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 6,
155 | "metadata": {},
156 | "outputs": [
157 | {
158 | "name": "stdout",
159 | "output_type": "stream",
160 | "text": [
161 | "12.1 ms ± 802 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
162 | ]
163 | }
164 | ],
165 | "source": [
166 | "%timeit max_prod(num, numDigits=13)"
167 | ]
168 | }
169 | ],
170 | "metadata": {
171 | "kernelspec": {
172 | "display_name": "Python [conda root]",
173 | "language": "python",
174 | "name": "conda-root-py"
175 | },
176 | "language_info": {
177 | "codemirror_mode": {
178 | "name": "ipython",
179 | "version": 3
180 | },
181 | "file_extension": ".py",
182 | "mimetype": "text/x-python",
183 | "name": "python",
184 | "nbconvert_exporter": "python",
185 | "pygments_lexer": "ipython3",
186 | "version": "3.5.4"
187 | }
188 | },
189 | "nbformat": 4,
190 | "nbformat_minor": 2
191 | }
192 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Reverse_String.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Given a string, write a function called *reverseString* to reverse it. Do this using a loop, if possible."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "def reverseString(string):\n",
17 | " '''takes a string and returns it in reverse order (w/for loop)'''\n",
18 | " \n",
19 | " # type checking\n",
20 | " assert type(string) == str, 'input must be a string'\n",
21 | " \n",
22 | " # main logic\n",
23 | " string_length = len(string)-1\n",
24 | " new_string = ''\n",
25 | " for i in range(string_length, -1, -1):\n",
26 | " new_string += string[i]\n",
27 | " return new_string\n"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {
34 | "collapsed": true
35 | },
36 | "outputs": [],
37 | "source": [
38 | "def reverseString2(string):\n",
39 | " '''takes a string and returns it in reverse order (w/slicing)'''\n",
40 | " \n",
41 | " # type checking\n",
42 | " assert type(string) == str, 'input must be a string'\n",
43 | " \n",
44 | " # main logic\n",
45 | " return string[::-1]"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "---"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/plain": [
63 | "'this is an example string'"
64 | ]
65 | },
66 | "execution_count": 3,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | }
70 | ],
71 | "source": [
72 | "# example string\n",
73 | "s = 'this is an example string'\n",
74 | "s"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 4,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/plain": [
85 | "'gnirts elpmaxe na si siht'"
86 | ]
87 | },
88 | "execution_count": 4,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "# 1st function\n",
95 | "reverseString(s)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 5,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "data": {
105 | "text/plain": [
106 | "'gnirts elpmaxe na si siht'"
107 | ]
108 | },
109 | "execution_count": 5,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "# 2nd function\n",
116 | "reverseString2(s)"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 6,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "True"
128 | ]
129 | },
130 | "execution_count": 6,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | }
134 | ],
135 | "source": [
136 | "# test for equality\n",
137 | "reverseString(s) == reverseString2(s)"
138 | ]
139 | }
140 | ],
141 | "metadata": {
142 | "kernelspec": {
143 | "display_name": "Python [conda root]",
144 | "language": "python",
145 | "name": "conda-root-py"
146 | },
147 | "language_info": {
148 | "codemirror_mode": {
149 | "name": "ipython",
150 | "version": 3
151 | },
152 | "file_extension": ".py",
153 | "mimetype": "text/x-python",
154 | "name": "python",
155 | "nbconvert_exporter": "python",
156 | "pygments_lexer": "ipython3",
157 | "version": "3.5.4"
158 | }
159 | },
160 | "nbformat": 4,
161 | "nbformat_minor": 2
162 | }
163 |
--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Rotation_Problem.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "def rotation(num1, num2, verbose=False):\n",
12 | " '''tests whether num2 is a rotated version of num1'''\n",
13 | " \n",
14 | " num1 = list(str(num1))\n",
15 | " num2 = list(str(num2))\n",
16 | " \n",
17 | " assert len(num1) == len(num2), \"numbers are different lengths\"\n",
18 | " \n",
19 | " for i, _ in enumerate(num1):\n",
20 | " if num1 == num2:\n",
21 | " print('{} rotations to the right'.format(i))\n",
22 | " return True\n",
23 | " else:\n",
24 | " pop = list(num2.pop())\n",
25 | " num2 = pop + num2\n",
26 | " if verbose:\n",
27 | " print(num1)\n",
28 | " print(num2)\n",
29 | " print('-'*25)"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {
36 | "collapsed": true
37 | },
38 | "outputs": [],
39 | "source": [
40 | "num1 = 12345\n",
41 | "num2 = 51234"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "['1', '2', '3', '4', '5']\n",
54 | "['4', '5', '1', '2', '3']\n",
55 | "-------------------------\n",
56 | "['1', '2', '3', '4', '5']\n",
57 | "['3', '4', '5', '1', '2']\n",
58 | "-------------------------\n",
59 | "['1', '2', '3', '4', '5']\n",
60 | "['2', '3', '4', '5', '1']\n",
61 | "-------------------------\n",
62 | "['1', '2', '3', '4', '5']\n",
63 | "['1', '2', '3', '4', '5']\n",
64 | "-------------------------\n",
65 | "4 rotations to the right\n"
66 | ]
67 | },
68 | {
69 | "data": {
70 | "text/plain": [
71 | "True"
72 | ]
73 | },
74 | "execution_count": 3,
75 | "metadata": {},
76 | "output_type": "execute_result"
77 | }
78 | ],
79 | "source": [
80 | "rotation(num1, num2, verbose=True)"
81 | ]
82 | }
83 | ],
84 | "metadata": {
85 | "kernelspec": {
86 | "display_name": "Python [conda root]",
87 | "language": "python",
88 | "name": "conda-root-py"
89 | },
90 | "language_info": {
91 | "codemirror_mode": {
92 | "name": "ipython",
93 | "version": 3
94 | },
95 | "file_extension": ".py",
96 | "mimetype": "text/x-python",
97 | "name": "python",
98 | "nbconvert_exporter": "python",
99 | "pygments_lexer": "ipython3",
100 | "version": "3.5.4"
101 | }
102 | },
103 | "nbformat": 4,
104 | "nbformat_minor": 2
105 | }
106 |
--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/Arrays_vs_Lists.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "arr = np.arange(1e7)\n",
23 | "lst = arr.tolist()"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 3,
29 | "metadata": {},
30 | "outputs": [
31 | {
32 | "data": {
33 | "text/plain": [
34 | "numpy.ndarray"
35 | ]
36 | },
37 | "execution_count": 3,
38 | "metadata": {},
39 | "output_type": "execute_result"
40 | }
41 | ],
42 | "source": [
43 | "type(arr)"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 4,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/plain": [
54 | "list"
55 | ]
56 | },
57 | "execution_count": 4,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "type(lst)"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "## Empirical Testing (Simple)"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "#### Show Equivalent (Simple)"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 5,
83 | "metadata": {},
84 | "outputs": [
85 | {
86 | "data": {
87 | "text/plain": [
88 | "0"
89 | ]
90 | },
91 | "execution_count": 5,
92 | "metadata": {},
93 | "output_type": "execute_result"
94 | }
95 | ],
96 | "source": [
97 | "sum(arr[arr > 1e5] != [el for el in lst if el > 1e5])"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "#### Timing (Simple)"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 6,
110 | "metadata": {},
111 | "outputs": [
112 | {
113 | "name": "stdout",
114 | "output_type": "stream",
115 | "text": [
116 | "42 ms ± 4.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "%timeit arr[arr > 1e5] "
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 7,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "1.82 s ± 38.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
134 | ]
135 | }
136 | ],
137 | "source": [
138 | "%timeit [el for el in lst if el > 1e5]"
139 | ]
140 | },
141 | {
142 | "cell_type": "markdown",
143 | "metadata": {},
144 | "source": [
145 | "## Empirical Testing (Complex)"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "#### Show Equivalent"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 8,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "0"
164 | ]
165 | },
166 | "execution_count": 8,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "sum(arr[arr < 1e5][arr[arr < 1e5] > 1e4] != [el for el in lst if el > 1e4 and el < 1e5])"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "#### Timing (Complex)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 9,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "18.9 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
192 | ]
193 | }
194 | ],
195 | "source": [
196 | "%timeit arr[arr < 1e5][arr[arr < 1e5] > 1e4]"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 10,
202 | "metadata": {},
203 | "outputs": [
204 | {
205 | "name": "stdout",
206 | "output_type": "stream",
207 | "text": [
208 | "2.03 s ± 56.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "%timeit [el for el in lst if el > 1e4 and el < 1e5]"
214 | ]
215 | }
216 | ],
217 | "metadata": {
218 | "kernelspec": {
219 | "display_name": "Python [conda root]",
220 | "language": "python",
221 | "name": "conda-root-py"
222 | },
223 | "language_info": {
224 | "codemirror_mode": {
225 | "name": "ipython",
226 | "version": 3
227 | },
228 | "file_extension": ".py",
229 | "mimetype": "text/x-python",
230 | "name": "python",
231 | "nbconvert_exporter": "python",
232 | "pygments_lexer": "ipython3",
233 | "version": "3.5.4"
234 | }
235 | },
236 | "nbformat": 4,
237 | "nbformat_minor": 2
238 | }
239 |
--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/Working_with_Bits.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/plain": [
22 | "116"
23 | ]
24 | },
25 | "execution_count": 2,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | }
29 | ],
30 | "source": [
31 | "np.bitwise_xor(23,99)"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 3,
37 | "metadata": {},
38 | "outputs": [
39 | {
40 | "data": {
41 | "text/plain": [
42 | "'1110100'"
43 | ]
44 | },
45 | "execution_count": 3,
46 | "metadata": {},
47 | "output_type": "execute_result"
48 | }
49 | ],
50 | "source": [
51 | "np.binary_repr(116)"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "outputs": [],
61 | "source": []
62 | }
63 | ],
64 | "metadata": {
65 | "kernelspec": {
66 | "display_name": "Python [conda root]",
67 | "language": "python",
68 | "name": "conda-root-py"
69 | },
70 | "language_info": {
71 | "codemirror_mode": {
72 | "name": "ipython",
73 | "version": 3
74 | },
75 | "file_extension": ".py",
76 | "mimetype": "text/x-python",
77 | "name": "python",
78 | "nbconvert_exporter": "python",
79 | "pygments_lexer": "ipython3",
80 | "version": "3.5.4"
81 | }
82 | },
83 | "nbformat": 4,
84 | "nbformat_minor": 2
85 | }
86 |
--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/listexp_vs_genexp.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "from itertools import cycle"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {
18 | "collapsed": true
19 | },
20 | "outputs": [],
21 | "source": [
22 | "lst = [x for x in range(int(1e6))]"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 3,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "499999500000\n",
35 | "499999500000\n",
36 | "499999500000\n",
37 | "499999500000\n",
38 | "499999500000\n",
39 | "499999500000\n",
40 | "499999500000\n",
41 | "499999500000\n",
42 | "499999500000\n",
43 | "499999500000\n",
44 | "499999500000\n",
45 | "499999500000\n",
46 | "499999500000\n",
47 | "499999500000\n",
48 | "499999500000\n",
49 | "499999500000\n",
50 | "499999500000\n",
51 | "499999500000\n",
52 | "499999500000\n",
53 | "499999500000\n",
54 | "499999500000\n",
55 | "499999500000\n",
56 | "499999500000\n",
57 | "499999500000\n",
58 | "499999500000\n",
59 | "499999500000\n",
60 | "499999500000\n",
61 | "499999500000\n",
62 | "499999500000\n",
63 | "499999500000\n",
64 | "499999500000\n",
65 | "499999500000\n",
66 | "499999500000\n",
67 | "499999500000\n",
68 | "499999500000\n",
69 | "499999500000\n",
70 | "499999500000\n",
71 | "499999500000\n",
72 | "499999500000\n",
73 | "499999500000\n",
74 | "499999500000\n",
75 | "499999500000\n",
76 | "499999500000\n",
77 | "499999500000\n",
78 | "499999500000\n",
79 | "499999500000\n",
80 | "499999500000\n",
81 | "499999500000\n",
82 | "499999500000\n",
83 | "499999500000\n",
84 | "499999500000\n",
85 | "499999500000\n",
86 | "499999500000\n",
87 | "499999500000\n",
88 | "499999500000\n",
89 | "499999500000\n",
90 | "499999500000\n",
91 | "499999500000\n",
92 | "499999500000\n",
93 | "499999500000\n",
94 | "499999500000\n",
95 | "499999500000\n",
96 | "499999500000\n",
97 | "499999500000\n",
98 | "499999500000\n",
99 | "499999500000\n",
100 | "499999500000\n",
101 | "499999500000\n",
102 | "499999500000\n",
103 | "499999500000\n",
104 | "499999500000\n",
105 | "499999500000\n",
106 | "499999500000\n",
107 | "499999500000\n",
108 | "499999500000\n",
109 | "499999500000\n",
110 | "499999500000\n",
111 | "499999500000\n",
112 | "499999500000\n",
113 | "499999500000\n",
114 | "499999500000\n",
115 | "148 ms ± 2.65 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "%%timeit\n",
121 | "mysum = 0\n",
122 | "for el in lst:\n",
123 | " mysum += el\n",
124 | "print(mysum)"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 4,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "gen = cycle((x for x in range(int(1e6))))"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 5,
139 | "metadata": {},
140 | "outputs": [
141 | {
142 | "name": "stdout",
143 | "output_type": "stream",
144 | "text": [
145 | "499999500000\n",
146 | "499999500000\n",
147 | "499999500000\n",
148 | "499999500000\n",
149 | "499999500000\n",
150 | "499999500000\n",
151 | "499999500000\n",
152 | "499999500000\n",
153 | "279 ms ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
154 | ]
155 | }
156 | ],
157 | "source": [
158 | "%%timeit\n",
159 | "mysum = 0\n",
160 | "for i in lst:\n",
161 | " mysum += next(gen)\n",
162 | "print(mysum)"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 6,
168 | "metadata": {
169 | "collapsed": true
170 | },
171 | "outputs": [],
172 | "source": [
173 | "def gen():\n",
174 | " while True:\n",
175 | " yield 1"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 7,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "gen2 = gen()"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 8,
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "name": "stdout",
194 | "output_type": "stream",
195 | "text": [
196 | "1\n",
197 | "1\n",
198 | "1\n",
199 | "1\n",
200 | "1\n",
201 | "1\n",
202 | "1\n",
203 | "1\n",
204 | "1\n",
205 | "1\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "for i in range(10):\n",
211 | " print(next(gen2))"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 9,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "name": "stdout",
221 | "output_type": "stream",
222 | "text": [
223 | "1000000\n",
224 | "1000000\n",
225 | "1000000\n",
226 | "1000000\n",
227 | "1000000\n",
228 | "1000000\n",
229 | "1000000\n",
230 | "1000000\n",
231 | "353 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
232 | ]
233 | }
234 | ],
235 | "source": [
236 | "%%timeit\n",
237 | "mysum = 0\n",
238 | "for i in lst:\n",
239 | " mysum += next(gen2)\n",
240 | "print(mysum)"
241 | ]
242 | }
243 | ],
244 | "metadata": {
245 | "kernelspec": {
246 | "display_name": "Python [conda root]",
247 | "language": "python",
248 | "name": "conda-root-py"
249 | },
250 | "language_info": {
251 | "codemirror_mode": {
252 | "name": "ipython",
253 | "version": 3
254 | },
255 | "file_extension": ".py",
256 | "mimetype": "text/x-python",
257 | "name": "python",
258 | "nbconvert_exporter": "python",
259 | "pygments_lexer": "ipython3",
260 | "version": "3.5.4"
261 | }
262 | },
263 | "nbformat": 4,
264 | "nbformat_minor": 2
265 | }
266 |
--------------------------------------------------------------------------------
/notebooks/Python/Recursion/Factorial_&_Fibonacci.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 29,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "def factorial(n):\n",
12 | " '''no recursion'''\n",
13 | " val = 1\n",
14 | " while n > 0:\n",
15 | " val *= n\n",
16 | " n -= 1\n",
17 | " return val "
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 33,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "data": {
27 | "text/plain": [
28 | "40320"
29 | ]
30 | },
31 | "execution_count": 33,
32 | "metadata": {},
33 | "output_type": "execute_result"
34 | }
35 | ],
36 | "source": [
37 | "factorial(8)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 13,
43 | "metadata": {
44 | "collapsed": true
45 | },
46 | "outputs": [],
47 | "source": [
48 | "def factorial2(n):\n",
49 | " '''with recursion'''\n",
50 | " if n < 1: \n",
51 | " return 1\n",
52 | " else:\n",
53 | " num = n * factorial2(n - 1) \n",
54 | " return num"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 19,
60 | "metadata": {},
61 | "outputs": [
62 | {
63 | "data": {
64 | "text/plain": [
65 | "40320"
66 | ]
67 | },
68 | "execution_count": 19,
69 | "metadata": {},
70 | "output_type": "execute_result"
71 | }
72 | ],
73 | "source": [
74 | "factorial2(8)"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "---"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 3,
87 | "metadata": {
88 | "collapsed": true
89 | },
90 | "outputs": [],
91 | "source": [
92 | "def fibonacci(n):\n",
93 | " '''with recursion'''\n",
94 | " if n == 0: \n",
95 | " return 0\n",
96 | " elif n == 1: \n",
97 | " return 1\n",
98 | " else: \n",
99 | " return fibonacci(n-1)+fibonacci(n-2)"
100 | ]
101 | }
102 | ],
103 | "metadata": {
104 | "kernelspec": {
105 | "display_name": "Python [conda root]",
106 | "language": "python",
107 | "name": "conda-root-py"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.5.4"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 2
124 | }
125 |
--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/2_PRNG_Linear_Congruential_Generator.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Wikipedia link: [linear congruential generator](https://en.wikipedia.org/wiki/Linear_congruential_generator_)\n",
8 | "\n",
9 | "Wikipedia description: \n",
10 | "> A linear congruential generator (LCG) is an algorithm that yields a sequence of pseudo-randomized numbers calculated with a discontinuous piecewise linear equation. The method represents one of the oldest and best-known pseudorandom number generator algorithms. The theory behind them is relatively easy to understand, and they are easily implemented and fast, especially on computer hardware which can provide modulo arithmetic by storage-bit truncation.\n",
11 | "\n",
12 | "The generator is defined by the recurrence relation:\n",
13 | "\n",
14 | "X$_{n+1}$ = (aX$_{n}$+c) mod m\n",
15 | "\n",
16 | "where X is the sequence of pseudorandom values and\n",
17 | "- m is the modulus (0 $<$ m)\n",
18 | "- a is the multiplier (0 $<$ a $<$ m)\n",
19 | "- c is the increment (0 $\\leq$ c $<$ m)\n",
20 | "- X$_{o}$ is the seed value (0 $\\leq$ X$_{o}$ < m)\n",
21 | "\n",
22 | "all of which are integer values."
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {
29 | "collapsed": true
30 | },
31 | "outputs": [],
32 | "source": [
33 | "def lcg(m, a, c, seed):\n",
34 | " '''linear congruential generator that pseudorandomly generates numbers.\n",
35 | " \n",
36 | " In:\n",
37 | " m = (+ int) modulus\n",
38 | " a = (+ int) multiplier\n",
39 | " c = (nonnegative int) increment\n",
40 | " seed = (nonnegative int) initial value\n",
41 | " Out:\n",
42 | " single random number\n",
43 | " '''\n",
44 | " \n",
45 | " # type and value checks\n",
46 | " assert type(m) == int, \"'m' must be an integer\"\n",
47 | " assert m > 0, \"'m' must be an int greater than 0\"\n",
48 | " assert type(a) == int, \"'a' must be an integer\"\n",
49 | " assert a > 0, \"'a' must be an int greater than 0\"\n",
50 | " assert type(c) == int, \"'c' must be an integer\"\n",
51 | " assert c >= 0, \"'c' must be an int greater than 0\"\n",
52 | " assert type(seed) == int, \"'seed' must be an integer\"\n",
53 | " assert seed >= 0, \"'seed' must be an int greater than 0\"\n",
54 | " \n",
55 | " # algorithm\n",
56 | " X_n = seed\n",
57 | " while True:\n",
58 | " X_n = (a * X_n + c) % m\n",
59 | " yield X_n"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 2,
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "def auto_print(implementation, number):\n",
71 | " for _ in range(number):\n",
72 | " print(next(implementation)) "
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "#### Basic Implementation"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "12\n",
92 | "27\n",
93 | "8\n",
94 | "30\n",
95 | "29\n",
96 | "22\n",
97 | "4\n",
98 | "2\n",
99 | "19\n",
100 | "14\n",
101 | "10\n",
102 | "13\n",
103 | "3\n",
104 | "26\n",
105 | "1\n",
106 | "12\n",
107 | "27\n",
108 | "8\n",
109 | "30\n",
110 | "29\n"
111 | ]
112 | }
113 | ],
114 | "source": [
115 | "basic = lcg(m=31, a=7, c=5, seed=1)\n",
116 | "auto_print(basic, 20)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "#### There's a Problem: Parameter Choice Matters"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 4,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | "1\n",
136 | "3\n",
137 | "7\n",
138 | "15\n",
139 | "0\n",
140 | "1\n",
141 | "3\n",
142 | "7\n",
143 | "15\n",
144 | "0\n",
145 | "1\n",
146 | "3\n",
147 | "7\n",
148 | "15\n",
149 | "0\n",
150 | "1\n",
151 | "3\n",
152 | "7\n",
153 | "15\n",
154 | "0\n"
155 | ]
156 | }
157 | ],
158 | "source": [
159 | "problem = lcg(m=31, a=2, c=1, seed=0)\n",
160 | "auto_print(problem, 20)"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "**Explanation:** a poor choice of parameters leads to a short period length. Once the period length, or cycle, is known, it is trivially to make predictions. This means the numbers generated are no longer random."
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "#### Lehmer Implementation (c=0)"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 5,
180 | "metadata": {},
181 | "outputs": [
182 | {
183 | "name": "stdout",
184 | "output_type": "stream",
185 | "text": [
186 | "16807\n",
187 | "282475249\n",
188 | "1622650073\n",
189 | "984943658\n",
190 | "1144108930\n",
191 | "470211272\n",
192 | "101027544\n",
193 | "1457850878\n",
194 | "1458777923\n",
195 | "2007237709\n",
196 | "823564440\n",
197 | "1115438165\n",
198 | "1784484492\n",
199 | "74243042\n",
200 | "114807987\n",
201 | "1137522503\n",
202 | "1441282327\n",
203 | "16531729\n",
204 | "823378840\n",
205 | "143542612\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "lehmer = lcg(m=2147483647, a=16807, c=0, seed=1)\n",
211 | "auto_print(lehmer, 20)"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "**Note:** this Lehmer implementation uses a Mersenne Prime (a prime that is 1 less than a power of two: 2$^n$ - 1)."
219 | ]
220 | },
221 | {
222 | "cell_type": "markdown",
223 | "metadata": {
224 | "collapsed": true
225 | },
226 | "source": [
227 | "## Advantages\n",
228 | "\n",
229 | "- Fast\n",
230 | "- Minimal memory to retain state\n",
231 | "- Great for embedded systems and video games \n",
232 | "\n",
233 | "## Disadvantages\n",
234 | "\n",
235 | "- Serial correlation of sequence (see Marsaglia's Theorem)\n",
236 | "- Short period of the low-order bits when m is chosen to be a power of 2\n",
237 | "- Not suitable for a Monte Carlo simulation\n",
238 | "- Must not be used for cryptographic applications\n",
239 | "\n",
240 | "## Final Notes\n",
241 | "\n",
242 | "The low-order bits of LCGs when m is a power of 2 should never be relied on for any degree of randomness whatsoever. Indeed, simply substituting 2$^n$ for the modulus term reveals that the low order bits go through very short cycles. In particular, any full-cycle LCG when m is a power of 2 will produce alternately odd and even results."
243 | ]
244 | }
245 | ],
246 | "metadata": {
247 | "kernelspec": {
248 | "display_name": "Python [conda root]",
249 | "language": "python",
250 | "name": "conda-root-py"
251 | },
252 | "language_info": {
253 | "codemirror_mode": {
254 | "name": "ipython",
255 | "version": 3
256 | },
257 | "file_extension": ".py",
258 | "mimetype": "text/x-python",
259 | "name": "python",
260 | "nbconvert_exporter": "python",
261 | "pygments_lexer": "ipython3",
262 | "version": "3.5.4"
263 | }
264 | },
265 | "nbformat": 4,
266 | "nbformat_minor": 2
267 | }
268 |
--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/3_PRNG_Linear_Feedback_Shift_Register.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 21,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "value = 0xabbacaddabba\n",
10 | "value &= (1<<10)"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 22,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/plain": [
21 | "'0b101010111011101011001010110111011010101110111010'"
22 | ]
23 | },
24 | "execution_count": 22,
25 | "metadata": {},
26 | "output_type": "execute_result"
27 | }
28 | ],
29 | "source": [
30 | "bin(value)"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": null,
36 | "metadata": {
37 | "collapsed": true
38 | },
39 | "outputs": [],
40 | "source": []
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {
46 | "collapsed": true
47 | },
48 | "outputs": [],
49 | "source": []
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {
55 | "collapsed": true
56 | },
57 | "outputs": [],
58 | "source": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": true
65 | },
66 | "outputs": [],
67 | "source": []
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": null,
72 | "metadata": {
73 | "collapsed": true
74 | },
75 | "outputs": [],
76 | "source": []
77 | }
78 | ],
79 | "metadata": {
80 | "kernelspec": {
81 | "display_name": "Python [conda root]",
82 | "language": "python",
83 | "name": "conda-root-py"
84 | },
85 | "language_info": {
86 | "codemirror_mode": {
87 | "name": "ipython",
88 | "version": 3
89 | },
90 | "file_extension": ".py",
91 | "mimetype": "text/x-python",
92 | "name": "python",
93 | "nbconvert_exporter": "python",
94 | "pygments_lexer": "ipython3",
95 | "version": "3.5.4"
96 | }
97 | },
98 | "nbformat": 4,
99 | "nbformat_minor": 2
100 | }
101 |
--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/4_PRNG_Dev_Random_Directory.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import os"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "## Accessing dev/random"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 16,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/plain": [
29 | "b'\\xdb/T1i\\xdc\\x89\\xe0Q5'"
30 | ]
31 | },
32 | "execution_count": 16,
33 | "metadata": {},
34 | "output_type": "execute_result"
35 | }
36 | ],
37 | "source": [
38 | "# Return a bytes object containing random bytes suitable for cryptographic use.\n",
39 | "rand = os.urandom(10)\n",
40 | "rand"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Function"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 17,
53 | "metadata": {
54 | "collapsed": true
55 | },
56 | "outputs": [],
57 | "source": [
58 | "def bytes2decimal(byte_obj):\n",
59 | " '''Takes bytes object and returns decimal equivalent.'''\n",
60 | " return int.from_bytes(rand, byteorder='big')"
61 | ]
62 | },
63 | {
64 | "cell_type": "markdown",
65 | "metadata": {},
66 | "source": [
67 | "## Example"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "execution_count": 18,
73 | "metadata": {},
74 | "outputs": [
75 | {
76 | "data": {
77 | "text/plain": [
78 | "1035071323466485622395189"
79 | ]
80 | },
81 | "execution_count": 18,
82 | "metadata": {},
83 | "output_type": "execute_result"
84 | }
85 | ],
86 | "source": [
87 | "b2d = bytes2decimal(rand)\n",
88 | "b2d"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "## Multiple Examples"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 19,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "b'\\x19\\x10cd\\x99' 107649131673\n",
108 | "b'\\x19\\xa2\\x7fx\\xa8' 110100445352\n",
109 | "b'\"T\\xdfM\\xb9' 147452808633\n",
110 | "b'\\xff\\x85O\\x83\\xcf' 1097453241295\n",
111 | "b'\\x1d\\xddE\\x9e\\\\' 128266378844\n",
112 | "b'\\xa1\\x80%\\xf4\\x0b' 693639705611\n",
113 | "b'#gxPy' 152059793529\n",
114 | "b\"\\xfa\\xd5'%\\x87\" 1077317936519\n",
115 | "b'\\x83\\x06 1t' 562743488884\n",
116 | "b'D4@I\\x88' 292934404488\n",
117 | "b'\\xe7\\xffR\\x9d\\x07' 996421049607\n",
118 | "b'\\xad\\xca\\x8e#\\r' 746427654925\n",
119 | "b'\\x0bM{\\x8e\\xec' 48544583404\n",
120 | "b'E\\x9b\\x99\\xc3\\xfe' 298963289086\n",
121 | "b'\\x98a17O' 654465644367\n",
122 | "b'\\x1ck\\xa7\\xa7\\xb9' 122065233849\n",
123 | "b'\\x17g\\x8cS\\x8c' 100521497484\n",
124 | "b'\\xa9\\xdf=Dh' 729594807400\n",
125 | "b'\\xfdR%c\\x1c' 1088004907804\n",
126 | "b'\\x1c\\xf2\\xcd \\x86' 124332613766\n"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "for _ in range(20):\n",
132 | " rand = os.urandom(5)\n",
133 | " print('{0:25} {1}'.format(str(rand), bytes2decimal(rand)))"
134 | ]
135 | }
136 | ],
137 | "metadata": {
138 | "kernelspec": {
139 | "display_name": "Python [conda root]",
140 | "language": "python",
141 | "name": "conda-root-py"
142 | },
143 | "language_info": {
144 | "codemirror_mode": {
145 | "name": "ipython",
146 | "version": 3
147 | },
148 | "file_extension": ".py",
149 | "mimetype": "text/x-python",
150 | "name": "python",
151 | "nbconvert_exporter": "python",
152 | "pygments_lexer": "ipython3",
153 | "version": "3.5.4"
154 | }
155 | },
156 | "nbformat": 4,
157 | "nbformat_minor": 2
158 | }
159 |
--------------------------------------------------------------------------------
/pkl_files/subset_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df.pkl
--------------------------------------------------------------------------------
/pkl_files/subset_df2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df2.pkl
--------------------------------------------------------------------------------