├── .gitignore
├── LICENSE
├── README.md
├── data
    └── Create_Higgs_CSV.ipynb
├── notebooks
    ├── Big_Data
    │   └── Zeppelin
    │   │   ├── 00_Whirlwind_Tour_of_Zeppelin.json
    │   │   ├── 01_File_Based_Data_Sources.json
    │   │   ├── 03_Spark_Fundamental_Structured_Operations.json
    │   │   ├── Machine_Learning_Zeppelin.json
    │   │   ├── Spark_Machine_Learning.json
    │   │   ├── Spark_Tutorial.json
    │   │   └── Supervised_Machine_Learning.json
    ├── Checksum
    │   └── Longitudinal_Redundancy_Check.ipynb
    ├── Cryptography
    │   └── 1_Caesar_Shift.ipynb
    ├── Hashing
    │   ├── Birthday_Attack.ipynb
    │   └── Intro_to_Hashing.ipynb
    ├── Information_Theory
    │   ├── Hamming_Distance.ipynb
    │   └── Stirlings_Approximation_Factorial.ipynb
    ├── Legacy_Code
    │   └── Bernoulli_Naive_Bayes.ipynb
    ├── Machine_Learning
    │   ├── Algorithms_From_Scratch
    │   │   ├── Bagging_and_Bootstrapping.ipynb
    │   │   ├── Bernoulli_Naive_Bayes.ipynb
    │   │   ├── Cross-Validation.ipynb
    │   │   ├── Gradient_Descent.ipynb
    │   │   ├── K-means.ipynb
    │   │   ├── KNN.ipynb
    │   │   ├── PCA.ipynb
    │   │   ├── Train_Test_Split.ipynb
    │   │   └── Train_Validation_Test_Split.ipynb
    │   ├── Fourier_Transforms
    │   │   └── FFT.ipynb
    │   ├── Supervised_Learning
    │   │   ├── Advanced_Techniques
    │   │   │   └── Bagging
    │   │   │   │   └── Bagging_Simulation.ipynb
    │   │   ├── Classification
    │   │   │   ├── EDA
    │   │   │   │   └── Classification_EDA.ipynb
    │   │   │   ├── Logistic_Regression
    │   │   │   │   ├── Logistic_Regression_Sklearn_Example.ipynb
    │   │   │   │   └── Sigmoid_Function.ipynb
    │   │   │   └── Support_Vector_Machines
    │   │   │   │   └── Basics_of_SVMs.ipynb
    │   │   ├── Model_Selection
    │   │   │   ├── Feature_Selection.ipynb
    │   │   │   ├── Model_Tuning_and_Cross_Validation.ipynb
    │   │   │   └── Train_Test_Split.ipynb
    │   │   ├── Numerical_Methods
    │   │   │   └── Gradient_Descent
    │   │   │   │   └── Linear_Regression_&_Intro_to_GD.ipynb
    │   │   └── Regression
    │   │   │   └── Linear_Regression
    │   │   │       ├── 1_Linear_Regression_101.ipynb
    │   │   │       ├── 2_Linear_Regression_Metrics.ipynb
    │   │   │       ├── 3_Linear_Regression_Assumptions_and_Evaluation.ipynb
    │   │   │       └── 4_Linear_Regression_EDA_and_Residual_Plots.ipynb
    │   └── Unsupervised_Learning
    │   │   ├── Clustering
    │   │       └── Kmeans
    │   │       │   └── Kmeans_Clustering_w_Sklearn.ipynb
    │   │   └── SVD
    │   │       ├── SVD_to_image.ipynb
    │   │       └── SVD_with_Iris_and_Images.ipynb
    ├── OS_library
    │   └── OS_Sandbox.ipynb
    ├── PyTorch
    │   └── PyTorch_Intro.ipynb
    ├── Python
    │   ├── Coding_Best_Practices
    │   │   ├── Complexity_&_Big_O.ipynb
    │   │   ├── Complexity_Practice.ipynb
    │   │   └── OOP_How_to_Write_a_Class.ipynb
    │   ├── Demos
    │   │   ├── ML_101.ipynb
    │   │   ├── Python_101_Instructor.ipynb
    │   │   └── Python_101_Student.ipynb
    │   ├── NumPy
    │   │   ├── NP_argwhere_isin.ipynb
    │   │   └── Numpy_concat_append_ravel_mgrid.ipynb
    │   ├── Programming_Problems
    │   │   ├── Alphabet_Magnets.ipynb
    │   │   ├── Bubble_Sort.ipynb
    │   │   ├── Cryptogram_Puzzle.ipynb
    │   │   ├── Dot_Product.ipynb
    │   │   ├── Factorial.ipynb
    │   │   ├── Fibonacci.ipynb
    │   │   ├── GCD_Problem.ipynb
    │   │   ├── Guess_a_Number.ipynb
    │   │   ├── Moving_Average_Problem.ipynb
    │   │   ├── OLS_R2_and_adjR2.ipynb
    │   │   ├── Project_Euler_Problem_8_with_Deque.ipynb
    │   │   ├── Reverse_String.ipynb
    │   │   ├── Rotation_Problem.ipynb
    │   │   └── SSE.ipynb
    │   ├── Python_Internals
    │   │   ├── Arrays_vs_Lists.ipynb
    │   │   ├── Deep_vs_Shallow_Copying.ipynb
    │   │   ├── Pickling.ipynb
    │   │   ├── Python_Data_Structure_Comparison.ipynb
    │   │   ├── Working_with_Bits.ipynb
    │   │   └── listexp_vs_genexp.ipynb
    │   ├── Recursion
    │   │   └── Factorial_&_Fibonacci.ipynb
    │   └── Visualizations
    │   │   ├── Boxplots.ipynb
    │   │   ├── Matplotlib_Tutorial.ipynb
    │   │   └── Seaborn_Visualizations_&_Data.ipynb
    ├── Random_Number_Generators
    │   ├── 1_PRNG_Middle_Square_Method.ipynb
    │   ├── 2_PRNG_Linear_Congruential_Generator.ipynb
    │   ├── 3_PRNG_Linear_Feedback_Shift_Register.ipynb
    │   └── 4_PRNG_Dev_Random_Directory.ipynb
    └── Statistics
    │   └── Central_Limit_Theorem.ipynb
└── pkl_files
    ├── subset_df.pkl
    └── subset_df2.pkl


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 David Ziganto
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Data_Science_Fundamentals
2 | 
3 | **Objective:** to introduce Data Science concepts from both a theoretical and practical standpoint, all in Python.
4 | 
5 | **Audience:** ranges from beginner to intermediate.
6 | 
7 | **Note:** this is a living repository that will be updated quite frequently.
8 | 


--------------------------------------------------------------------------------
/data/Create_Higgs_CSV.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "file_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz'\n",
 19 |     "columns = ['target',\n",
 20 |     "          ' lepton_pT', \n",
 21 |     "           'lepton_eta', \n",
 22 |     "           'lepton_phi', \n",
 23 |     "           'missing_energy_magnitude', \n",
 24 |     "           'missing_energy_phi', \n",
 25 |     "           'jet_1_pt', \n",
 26 |     "           'jet_1_eta', \n",
 27 |     "           'jet_1_phi', \n",
 28 |     "           'jet_1_b-tag', \n",
 29 |     "           'jet_2_pt', \n",
 30 |     "           'jet_2_eta', \n",
 31 |     "           'jet_2_phi', \n",
 32 |     "           'jet_2_b-tag', \n",
 33 |     "           'jet_3_pt', \n",
 34 |     "           'jet_3_eta', \n",
 35 |     "           'jet_3_phi', \n",
 36 |     "           'jet_3_b-tag', \n",
 37 |     "           'jet_4_pt', \n",
 38 |     "           'jet_4_eta', \n",
 39 |     "           'jet_4_phi', \n",
 40 |     "           'jet_4_b-tag', \n",
 41 |     "           'm_jj', \n",
 42 |     "           'm_jjj', \n",
 43 |     "           'm_lv', \n",
 44 |     "           'm_jlv', \n",
 45 |     "           'm_bb', \n",
 46 |     "           'm_wbb', \n",
 47 |     "           'm_wwbb']"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "df = pd.read_csv(file_path, names=columns)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 4,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "data": {
 66 |       "text/html": [
 67 |        "<div>\n",
 68 |        "<style scoped>\n",
 69 |        "    .dataframe tbody tr th:only-of-type {\n",
 70 |        "        vertical-align: middle;\n",
 71 |        "    }\n",
 72 |        "\n",
 73 |        "    .dataframe tbody tr th {\n",
 74 |        "        vertical-align: top;\n",
 75 |        "    }\n",
 76 |        "\n",
 77 |        "    .dataframe thead th {\n",
 78 |        "        text-align: right;\n",
 79 |        "    }\n",
 80 |        "</style>\n",
 81 |        "<table border=\"1\" class=\"dataframe\">\n",
 82 |        "  <thead>\n",
 83 |        "    <tr style=\"text-align: right;\">\n",
 84 |        "      <th></th>\n",
 85 |        "      <th>target</th>\n",
 86 |        "      <th>lepton_pT</th>\n",
 87 |        "      <th>lepton_eta</th>\n",
 88 |        "      <th>lepton_phi</th>\n",
 89 |        "      <th>missing_energy_magnitude</th>\n",
 90 |        "      <th>missing_energy_phi</th>\n",
 91 |        "      <th>jet_1_pt</th>\n",
 92 |        "      <th>jet_1_eta</th>\n",
 93 |        "      <th>jet_1_phi</th>\n",
 94 |        "      <th>jet_1_b-tag</th>\n",
 95 |        "      <th>...</th>\n",
 96 |        "      <th>jet_4_eta</th>\n",
 97 |        "      <th>jet_4_phi</th>\n",
 98 |        "      <th>jet_4_b-tag</th>\n",
 99 |        "      <th>m_jj</th>\n",
100 |        "      <th>m_jjj</th>\n",
101 |        "      <th>m_lv</th>\n",
102 |        "      <th>m_jlv</th>\n",
103 |        "      <th>m_bb</th>\n",
104 |        "      <th>m_wbb</th>\n",
105 |        "      <th>m_wwbb</th>\n",
106 |        "    </tr>\n",
107 |        "  </thead>\n",
108 |        "  <tbody>\n",
109 |        "    <tr>\n",
110 |        "      <th>0</th>\n",
111 |        "      <td>1.0</td>\n",
112 |        "      <td>0.869293</td>\n",
113 |        "      <td>-0.635082</td>\n",
114 |        "      <td>0.225690</td>\n",
115 |        "      <td>0.327470</td>\n",
116 |        "      <td>-0.689993</td>\n",
117 |        "      <td>0.754202</td>\n",
118 |        "      <td>-0.248573</td>\n",
119 |        "      <td>-1.092064</td>\n",
120 |        "      <td>0.000000</td>\n",
121 |        "      <td>...</td>\n",
122 |        "      <td>-0.010455</td>\n",
123 |        "      <td>-0.045767</td>\n",
124 |        "      <td>3.101961</td>\n",
125 |        "      <td>1.353760</td>\n",
126 |        "      <td>0.979563</td>\n",
127 |        "      <td>0.978076</td>\n",
128 |        "      <td>0.920005</td>\n",
129 |        "      <td>0.721657</td>\n",
130 |        "      <td>0.988751</td>\n",
131 |        "      <td>0.876678</td>\n",
132 |        "    </tr>\n",
133 |        "    <tr>\n",
134 |        "      <th>1</th>\n",
135 |        "      <td>1.0</td>\n",
136 |        "      <td>0.907542</td>\n",
137 |        "      <td>0.329147</td>\n",
138 |        "      <td>0.359412</td>\n",
139 |        "      <td>1.497970</td>\n",
140 |        "      <td>-0.313010</td>\n",
141 |        "      <td>1.095531</td>\n",
142 |        "      <td>-0.557525</td>\n",
143 |        "      <td>-1.588230</td>\n",
144 |        "      <td>2.173076</td>\n",
145 |        "      <td>...</td>\n",
146 |        "      <td>-1.138930</td>\n",
147 |        "      <td>-0.000819</td>\n",
148 |        "      <td>0.000000</td>\n",
149 |        "      <td>0.302220</td>\n",
150 |        "      <td>0.833048</td>\n",
151 |        "      <td>0.985700</td>\n",
152 |        "      <td>0.978098</td>\n",
153 |        "      <td>0.779732</td>\n",
154 |        "      <td>0.992356</td>\n",
155 |        "      <td>0.798343</td>\n",
156 |        "    </tr>\n",
157 |        "    <tr>\n",
158 |        "      <th>2</th>\n",
159 |        "      <td>1.0</td>\n",
160 |        "      <td>0.798835</td>\n",
161 |        "      <td>1.470639</td>\n",
162 |        "      <td>-1.635975</td>\n",
163 |        "      <td>0.453773</td>\n",
164 |        "      <td>0.425629</td>\n",
165 |        "      <td>1.104875</td>\n",
166 |        "      <td>1.282322</td>\n",
167 |        "      <td>1.381664</td>\n",
168 |        "      <td>0.000000</td>\n",
169 |        "      <td>...</td>\n",
170 |        "      <td>1.128848</td>\n",
171 |        "      <td>0.900461</td>\n",
172 |        "      <td>0.000000</td>\n",
173 |        "      <td>0.909753</td>\n",
174 |        "      <td>1.108330</td>\n",
175 |        "      <td>0.985692</td>\n",
176 |        "      <td>0.951331</td>\n",
177 |        "      <td>0.803252</td>\n",
178 |        "      <td>0.865924</td>\n",
179 |        "      <td>0.780118</td>\n",
180 |        "    </tr>\n",
181 |        "    <tr>\n",
182 |        "      <th>3</th>\n",
183 |        "      <td>0.0</td>\n",
184 |        "      <td>1.344385</td>\n",
185 |        "      <td>-0.876626</td>\n",
186 |        "      <td>0.935913</td>\n",
187 |        "      <td>1.992050</td>\n",
188 |        "      <td>0.882454</td>\n",
189 |        "      <td>1.786066</td>\n",
190 |        "      <td>-1.646778</td>\n",
191 |        "      <td>-0.942383</td>\n",
192 |        "      <td>0.000000</td>\n",
193 |        "      <td>...</td>\n",
194 |        "      <td>-0.678379</td>\n",
195 |        "      <td>-1.360356</td>\n",
196 |        "      <td>0.000000</td>\n",
197 |        "      <td>0.946652</td>\n",
198 |        "      <td>1.028704</td>\n",
199 |        "      <td>0.998656</td>\n",
200 |        "      <td>0.728281</td>\n",
201 |        "      <td>0.869200</td>\n",
202 |        "      <td>1.026736</td>\n",
203 |        "      <td>0.957904</td>\n",
204 |        "    </tr>\n",
205 |        "    <tr>\n",
206 |        "      <th>4</th>\n",
207 |        "      <td>1.0</td>\n",
208 |        "      <td>1.105009</td>\n",
209 |        "      <td>0.321356</td>\n",
210 |        "      <td>1.522401</td>\n",
211 |        "      <td>0.882808</td>\n",
212 |        "      <td>-1.205349</td>\n",
213 |        "      <td>0.681466</td>\n",
214 |        "      <td>-1.070464</td>\n",
215 |        "      <td>-0.921871</td>\n",
216 |        "      <td>0.000000</td>\n",
217 |        "      <td>...</td>\n",
218 |        "      <td>-0.373566</td>\n",
219 |        "      <td>0.113041</td>\n",
220 |        "      <td>0.000000</td>\n",
221 |        "      <td>0.755856</td>\n",
222 |        "      <td>1.361057</td>\n",
223 |        "      <td>0.986610</td>\n",
224 |        "      <td>0.838085</td>\n",
225 |        "      <td>1.133295</td>\n",
226 |        "      <td>0.872245</td>\n",
227 |        "      <td>0.808487</td>\n",
228 |        "    </tr>\n",
229 |        "  </tbody>\n",
230 |        "</table>\n",
231 |        "<p>5 rows × 29 columns</p>\n",
232 |        "</div>"
233 |       ],
234 |       "text/plain": [
235 |        "   target   lepton_pT  lepton_eta  lepton_phi  missing_energy_magnitude  \\\n",
236 |        "0     1.0    0.869293   -0.635082    0.225690                  0.327470   \n",
237 |        "1     1.0    0.907542    0.329147    0.359412                  1.497970   \n",
238 |        "2     1.0    0.798835    1.470639   -1.635975                  0.453773   \n",
239 |        "3     0.0    1.344385   -0.876626    0.935913                  1.992050   \n",
240 |        "4     1.0    1.105009    0.321356    1.522401                  0.882808   \n",
241 |        "\n",
242 |        "   missing_energy_phi  jet_1_pt  jet_1_eta  jet_1_phi  jet_1_b-tag    ...     \\\n",
243 |        "0           -0.689993  0.754202  -0.248573  -1.092064     0.000000    ...      \n",
244 |        "1           -0.313010  1.095531  -0.557525  -1.588230     2.173076    ...      \n",
245 |        "2            0.425629  1.104875   1.282322   1.381664     0.000000    ...      \n",
246 |        "3            0.882454  1.786066  -1.646778  -0.942383     0.000000    ...      \n",
247 |        "4           -1.205349  0.681466  -1.070464  -0.921871     0.000000    ...      \n",
248 |        "\n",
249 |        "   jet_4_eta  jet_4_phi  jet_4_b-tag      m_jj     m_jjj      m_lv     m_jlv  \\\n",
250 |        "0  -0.010455  -0.045767     3.101961  1.353760  0.979563  0.978076  0.920005   \n",
251 |        "1  -1.138930  -0.000819     0.000000  0.302220  0.833048  0.985700  0.978098   \n",
252 |        "2   1.128848   0.900461     0.000000  0.909753  1.108330  0.985692  0.951331   \n",
253 |        "3  -0.678379  -1.360356     0.000000  0.946652  1.028704  0.998656  0.728281   \n",
254 |        "4  -0.373566   0.113041     0.000000  0.755856  1.361057  0.986610  0.838085   \n",
255 |        "\n",
256 |        "       m_bb     m_wbb    m_wwbb  \n",
257 |        "0  0.721657  0.988751  0.876678  \n",
258 |        "1  0.779732  0.992356  0.798343  \n",
259 |        "2  0.803252  0.865924  0.780118  \n",
260 |        "3  0.869200  1.026736  0.957904  \n",
261 |        "4  1.133295  0.872245  0.808487  \n",
262 |        "\n",
263 |        "[5 rows x 29 columns]"
264 |       ]
265 |      },
266 |      "execution_count": 4,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "df.head()"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 5,
278 |    "metadata": {},
279 |    "outputs": [
280 |     {
281 |      "name": "stdout",
282 |      "output_type": "stream",
283 |      "text": [
284 |       "<class 'pandas.core.frame.DataFrame'>\n",
285 |       "RangeIndex: 11000000 entries, 0 to 10999999\n",
286 |       "Data columns (total 29 columns):\n",
287 |       "target                      float64\n",
288 |       " lepton_pT                  float64\n",
289 |       "lepton_eta                  float64\n",
290 |       "lepton_phi                  float64\n",
291 |       "missing_energy_magnitude    float64\n",
292 |       "missing_energy_phi          float64\n",
293 |       "jet_1_pt                    float64\n",
294 |       "jet_1_eta                   float64\n",
295 |       "jet_1_phi                   float64\n",
296 |       "jet_1_b-tag                 float64\n",
297 |       "jet_2_pt                    float64\n",
298 |       "jet_2_eta                   float64\n",
299 |       "jet_2_phi                   float64\n",
300 |       "jet_2_b-tag                 float64\n",
301 |       "jet_3_pt                    float64\n",
302 |       "jet_3_eta                   float64\n",
303 |       "jet_3_phi                   float64\n",
304 |       "jet_3_b-tag                 float64\n",
305 |       "jet_4_pt                    float64\n",
306 |       "jet_4_eta                   float64\n",
307 |       "jet_4_phi                   float64\n",
308 |       "jet_4_b-tag                 float64\n",
309 |       "m_jj                        float64\n",
310 |       "m_jjj                       float64\n",
311 |       "m_lv                        float64\n",
312 |       "m_jlv                       float64\n",
313 |       "m_bb                        float64\n",
314 |       "m_wbb                       float64\n",
315 |       "m_wwbb                      float64\n",
316 |       "dtypes: float64(29)\n",
317 |       "memory usage: 2.4 GB\n"
318 |      ]
319 |     }
320 |    ],
321 |    "source": [
322 |     "df.info()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 11,
328 |    "metadata": {},
329 |    "outputs": [],
330 |    "source": [
331 |     "save_path = '/Users/davidziganto/Repositories/Data_Science_Fundamentals/data/'\n",
332 |     "file_name = 'higgs.csv'"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": 12,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "df.to_csv(save_path + file_name)"
342 |    ]
343 |   }
344 |  ],
345 |  "metadata": {
346 |   "kernelspec": {
347 |    "display_name": "Python [conda root]",
348 |    "language": "python",
349 |    "name": "conda-root-py"
350 |   },
351 |   "language_info": {
352 |    "codemirror_mode": {
353 |     "name": "ipython",
354 |     "version": 3
355 |    },
356 |    "file_extension": ".py",
357 |    "mimetype": "text/x-python",
358 |    "name": "python",
359 |    "nbconvert_exporter": "python",
360 |    "pygments_lexer": "ipython3",
361 |    "version": "3.5.4"
362 |   }
363 |  },
364 |  "nbformat": 4,
365 |  "nbformat_minor": 2
366 | }
367 | 


--------------------------------------------------------------------------------
/notebooks/Big_Data/Zeppelin/Machine_Learning_Zeppelin.json:
--------------------------------------------------------------------------------
1 | ﻿{"paragraphs":[{"title":"Introduction","text":"%md\nThis notebook is a simple tutorial on how to use numpy, pandas, and Spark's machine learning library to do fun stuff.","user":"admin","dateUpdated":"2017-11-09T14:04:30-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"markdown","editOnDblClick":true},"editorMode":"ace/mode/markdown","title":true,"editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255925569_1694489231","id":"20171109-133205_1077344504","dateCreated":"2017-11-09T13:32:05-0600","dateStarted":"2017-11-09T14:04:30-0600","dateFinished":"2017-11-09T14:04:32-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1692"},{"title":"Imports","text":"%pyspark\n\nimport pandas as pd\nimport numpy as np","user":"admin","dateUpdated":"2017-11-09T14:04:04-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255243163_715367343","id":"20171109-132043_872043916","dateCreated":"2017-11-09T13:20:43-0600","dateStarted":"2017-11-09T14:04:03-0600","dateFinished":"2017-11-09T14:04:03-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1693"},{"text":"%pyspark\n\ndf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', delimiter=';')\ndf.head()","user":"admin","dateUpdated":"2017-11-09T13:25:33-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255257120_2050091925","id":"20171109-132057_845151643","dateCreated":"2017-11-09T13:20:57-0600","dateStarted":"2017-11-09T13:25:33-0600","dateFinished":"2017-11-09T13:25:34-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1694"},{"title":"Rearrange Rows","text":"%pyspark\ndf = df[['quality', 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']]","user":"admin","dateUpdated":"2017-11-09T14:53:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510260622609_-2020735401","id":"20171109-145022_1858055191","dateCreated":"2017-11-09T14:50:22-0600","dateStarted":"2017-11-09T14:53:15-0600","dateFinished":"2017-11-09T14:53:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1695"},{"text":"%pyspark\ntype(df)","user":"admin","dateUpdated":"2017-11-09T14:53:18-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258090492_534582379","id":"20171109-140810_1298767575","dateCreated":"2017-11-09T14:08:10-0600","dateStarted":"2017-11-09T14:53:18-0600","dateFinished":"2017-11-09T14:53:18-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1696"},{"title":"Create Spark DF","text":"%pyspark\nddf = spark.createDataFrame(df)","user":"admin","dateUpdated":"2017-11-09T15:20:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255281754_1175588324","id":"20171109-132121_1872338822","dateCreated":"2017-11-09T13:21:21-0600","dateStarted":"2017-11-09T15:20:53-0600","dateFinished":"2017-11-09T15:20:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1697"},{"text":"%pyspark\ntype(ddf)","user":"admin","dateUpdated":"2017-11-09T15:20:55-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258074988_2093954347","id":"20171109-140754_1396222594","dateCreated":"2017-11-09T14:07:54-0600","dateStarted":"2017-11-09T15:20:55-0600","dateFinished":"2017-11-09T15:20:55-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1698"},{"title":"Number of Rows","text":"%pyspark\nddf.count()","user":"admin","dateUpdated":"2017-11-09T15:20:56-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259453749_-200329059","id":"20171109-143053_1622356792","dateCreated":"2017-11-09T14:30:53-0600","dateStarted":"2017-11-09T15:20:56-0600","dateFinished":"2017-11-09T15:20:56-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1699"},{"title":"Quick Peek","text":"%pyspark\nddf.first()","user":"admin","dateUpdated":"2017-11-09T15:20:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255689961_508972245","id":"20171109-132809_629688564","dateCreated":"2017-11-09T13:28:09-0600","dateStarted":"2017-11-09T15:20:58-0600","dateFinished":"2017-11-09T15:20:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1700"},{"title":"Quick Peek 2","text":"%pyspark\nddf.take(5)","user":"admin","dateUpdated":"2017-11-09T15:21:00-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255663529_-466628372","id":"20171109-132743_239942099","dateCreated":"2017-11-09T13:27:43-0600","dateStarted":"2017-11-09T15:21:00-0600","dateFinished":"2017-11-09T15:21:00-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1701"},{"title":"Better Format","text":"%pyspark\nddf.show()","user":"admin","dateUpdated":"2017-11-09T15:21:02-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258522386_-981731649","id":"20171109-141522_1465708413","dateCreated":"2017-11-09T14:15:22-0600","dateStarted":"2017-11-09T15:21:02-0600","dateFinished":"2017-11-09T15:21:02-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1702"},{"text":"%pyspark\nddf.columns","user":"admin","dateUpdated":"2017-11-09T15:21:08-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258655977_689079582","id":"20171109-141735_398438825","dateCreated":"2017-11-09T14:17:35-0600","dateStarted":"2017-11-09T15:21:08-0600","dateFinished":"2017-11-09T15:21:08-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1703"},{"text":"%pyspark\ntrain_data, test_data = ddf.randomSplit([.8,.2],seed=1234)","user":"admin","dateUpdated":"2017-11-09T15:21:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258850889_-421449118","id":"20171109-142050_1844692829","dateCreated":"2017-11-09T14:20:50-0600","dateStarted":"2017-11-09T15:21:15-0600","dateFinished":"2017-11-09T15:21:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1704"},{"text":"%pyspark\nprint('train:', train_data.count())\nprint('test:', test_data.count())","user":"admin","dateUpdated":"2017-11-09T15:21:17-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259459848_159811236","id":"20171109-143059_355393844","dateCreated":"2017-11-09T14:30:59-0600","dateStarted":"2017-11-09T15:21:17-0600","dateFinished":"2017-11-09T15:21:17-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1705"},{"text":"%pyspark\nfrom pyspark.ml.feature import VectorAssembler\nfeature_assembler = VectorAssembler(\n    inputCols=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol'],\n    outputCol='features')","user":"admin","dateUpdated":"2017-11-09T15:29:54-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259638946_984372457","id":"20171109-143358_1110597847","dateCreated":"2017-11-09T14:33:58-0600","dateStarted":"2017-11-09T15:29:54-0600","dateFinished":"2017-11-09T15:29:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1706"},{"text":"%pyspark\ntrain.take(1)","user":"admin","dateUpdated":"2017-11-09T15:29:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262666092_1835562309","id":"20171109-152426_2092064614","dateCreated":"2017-11-09T15:24:26-0600","dateStarted":"2017-11-09T15:29:58-0600","dateFinished":"2017-11-09T15:29:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1707"},{"text":"%pyspark\nfrom pyspark.ml.classification import LogisticRegression\nlr = LogisticRegression(regParam=0.1, elasticNetParam=1.0, labelCol='quality', family=\"multinomial\")\nlr_model = lr.fit(train)","user":"admin","dateUpdated":"2017-11-09T15:35:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510261136989_1594497724","id":"20171109-145856_1022628746","dateCreated":"2017-11-09T14:58:56-0600","dateStarted":"2017-11-09T15:35:53-0600","dateFinished":"2017-11-09T15:35:59-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1708"},{"text":"%pyspark\nprint(\"Multinomial coefficients:\\n \" + str(lr_model.coefficientMatrix))\nprint(\"Multinomial intercepts:\\n \" + str(lr_model.interceptVector))\n","user":"admin","dateUpdated":"2017-11-09T15:36:40-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262678601_-2031148813","id":"20171109-152438_1562383421","dateCreated":"2017-11-09T15:24:38-0600","dateStarted":"2017-11-09T15:36:40-0600","dateFinished":"2017-11-09T15:36:40-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1709"},{"text":"%pyspark\n","user":"admin","dateUpdated":"2017-11-09T15:33:22-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510263202433_2105064972","id":"20171109-153322_151101125","dateCreated":"2017-11-09T15:33:22-0600","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1710"}],"name":"Machine Learning","id":"2CYTR3UEA","angularObjects":{"2CJRPMHPD:shared_process":[],"2CJSHBYC6:shared_process":[],"2CKSW7BWY:shared_process":[],"2CGWQ9B37:shared_process":[],"2CGR5K95Z:shared_process":[],"2CK2ZPKQ4:shared_process":[],"2CJJMSU71:shared_process":[],"2CGZPE6U6:shared_process":[],"2CG9T16R1:shared_process":[],"2CG86WNKX:shared_process":[],"2CKE7UE12:shared_process":[],"2CGJYANVS:shared_process":[],"2CJSRKFPY:shared_process":[],"2CHBR99H6:shared_process":[],"2CFTTN2E2:shared_process":[],"2CGADMUUE:shared_process":[],"2CKJKMCM2:shared_process":[],"2CKS133YC:shared_process":[],"2CHMME5HT:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}}


--------------------------------------------------------------------------------
/notebooks/Checksum/Longitudinal_Redundancy_Check.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Convert Int to Binary"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# decimal form\n",
 19 |     "a = 1\n",
 20 |     "b = 2\n",
 21 |     "c = 10\n",
 22 |     "d = 100"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def dec2bin(num):\n",
 34 |     "    '''converts decimal number into binary representation'''\n",
 35 |     "    return format(num, 'b')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "0 0\n",
 48 |       "1 1\n",
 49 |       "2 10\n",
 50 |       "3 11\n",
 51 |       "4 100\n",
 52 |       "5 101\n",
 53 |       "6 110\n",
 54 |       "7 111\n",
 55 |       "8 1000\n",
 56 |       "9 1001\n",
 57 |       "10 1010\n",
 58 |       "11 1011\n",
 59 |       "12 1100\n",
 60 |       "13 1101\n",
 61 |       "14 1110\n",
 62 |       "15 1111\n",
 63 |       "16 10000\n",
 64 |       "17 10001\n",
 65 |       "18 10010\n",
 66 |       "19 10011\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# binary form\n",
 72 |     "for i in range(20):\n",
 73 |     "    print(i, dec2bin(i))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 4,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/plain": [
 84 |        "'1'"
 85 |       ]
 86 |      },
 87 |      "execution_count": 4,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "dec2bin(a)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 5,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "'11'"
105 |       ]
106 |      },
107 |      "execution_count": 5,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "dec2bin(3) "
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "data": {
123 |       "text/plain": [
124 |        "'1010'"
125 |       ]
126 |      },
127 |      "execution_count": 6,
128 |      "metadata": {},
129 |      "output_type": "execute_result"
130 |     }
131 |    ],
132 |    "source": [
133 |     "dec2bin(10)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 7,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "data": {
143 |       "text/plain": [
144 |        "'101000'"
145 |       ]
146 |      },
147 |      "execution_count": 7,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "# Return x with the bits shifted to the left by y places: x << y\n",
154 |     "dec2bin(10 << 2)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 8,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "'101'"
166 |       ]
167 |      },
168 |      "execution_count": 8,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "# Return x with the bits shifted to the right by y places: x >> y\n",
175 |     "dec2bin(10 >> 1)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 9,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stdout",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "b1: 1010\n",
188 |       "b2: 1100\n",
189 |       "\n",
190 |       "b1&b2: 1000\n"
191 |      ]
192 |     }
193 |    ],
194 |    "source": [
195 |     "# Bitwise \"and\"\n",
196 |     "b1 = dec2bin(10)\n",
197 |     "b2 = dec2bin(12)\n",
198 |     "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n",
199 |     "print('b1&b2:', dec2bin(10 & 12))"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 10,
205 |    "metadata": {},
206 |    "outputs": [
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "b1: 1010\n",
212 |       "b2: 1100\n",
213 |       "\n",
214 |       "b1|b2: 1110\n"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "# Bitwise \"or\"\n",
220 |     "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n",
221 |     "print('b1|b2:', dec2bin(10 | 12))"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 11,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "'1010'"
233 |       ]
234 |      },
235 |      "execution_count": 11,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "# Complement of x -- FIX\n",
242 |     "dec2bin(10)"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": 12,
248 |    "metadata": {},
249 |    "outputs": [
250 |     {
251 |      "data": {
252 |       "text/plain": [
253 |        "'1010'"
254 |       ]
255 |      },
256 |      "execution_count": 12,
257 |      "metadata": {},
258 |      "output_type": "execute_result"
259 |     }
260 |    ],
261 |    "source": [
262 |     "dec2bin(10)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": 13,
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "name": "stdout",
272 |      "output_type": "stream",
273 |      "text": [
274 |       "b1: 10010\n",
275 |       "b2: 10100\n",
276 |       "\n",
277 |       "b1^b2: 110\n"
278 |      ]
279 |     }
280 |    ],
281 |    "source": [
282 |     "# Bitwise xor\n",
283 |     "print('b1: {}\\nb2: {}\\n'.format(dec2bin(18), dec2bin(20)))\n",
284 |     "print('b1^b2:', dec2bin(18 ^ 20))"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "## Longitudinal Redundancy Check (LRC)"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 14,
297 |    "metadata": {
298 |     "collapsed": true
299 |    },
300 |    "outputs": [],
301 |    "source": [
302 |     "def lrc(block1, block2):\n",
303 |     "    '''LRC that returns the parity bit check for 8 bit block.\n",
304 |     "    \n",
305 |     "    Input: \n",
306 |     "        decimal (integer(s) w/option for bitwise operations)\n",
307 |     "    Output:\n",
308 |     "        binary representation where all zeros represents perfect parity match\n",
309 |     "        '''\n",
310 |     "    assert block1 < 255, \"block 1 must have int value less than 255.\"\n",
311 |     "    assert block2 < 255, \"block 2 must have int value less than 255.\"\n",
312 |     "    \n",
313 |     "    xor = format(block1 ^ block2, 'b')\n",
314 |     "    delta = 8 - len(xor)\n",
315 |     "    if delta < 8:\n",
316 |     "        return '0'*delta + xor\n",
317 |     "    else:\n",
318 |     "        return xor"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 15,
324 |    "metadata": {},
325 |    "outputs": [
326 |     {
327 |      "data": {
328 |       "text/plain": [
329 |        "'00000000'"
330 |       ]
331 |      },
332 |      "execution_count": 15,
333 |      "metadata": {},
334 |      "output_type": "execute_result"
335 |     }
336 |    ],
337 |    "source": [
338 |     "# perfect match: should return 0's\n",
339 |     "lrc(100, 100)"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 16,
345 |    "metadata": {},
346 |    "outputs": [
347 |     {
348 |      "data": {
349 |       "text/plain": [
350 |        "'00001101'"
351 |       ]
352 |      },
353 |      "execution_count": 16,
354 |      "metadata": {},
355 |      "output_type": "execute_result"
356 |     }
357 |    ],
358 |    "source": [
359 |     "# imperfect match: should return 1's wherever parity mismatch\n",
360 |     "lrc(100, 105)"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": 17,
366 |    "metadata": {},
367 |    "outputs": [
368 |     {
369 |      "name": "stdout",
370 |      "output_type": "stream",
371 |      "text": [
372 |       "item1: 1100100\n",
373 |       "item2: 1101001\n",
374 |       "------ -------\n",
375 |       "  xor: 0001101\n"
376 |      ]
377 |     }
378 |    ],
379 |    "source": [
380 |     "tup = (('item1:', dec2bin(100)), \n",
381 |     "       ('item2:', dec2bin(105)),\n",
382 |     "       ('-'*6, '-'*7),\n",
383 |     "       ('  xor:', lrc(100, 105)[1:]))\n",
384 |     "for item in tup:\n",
385 |     "    print(item[0], item[1])"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": 18,
391 |    "metadata": {},
392 |    "outputs": [
393 |     {
394 |      "data": {
395 |       "text/plain": [
396 |        "True"
397 |       ]
398 |      },
399 |      "execution_count": 18,
400 |      "metadata": {},
401 |      "output_type": "execute_result"
402 |     }
403 |    ],
404 |    "source": [
405 |     "lrc(100, 100) == '00000000'"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": 19,
411 |    "metadata": {},
412 |    "outputs": [
413 |     {
414 |      "data": {
415 |       "text/plain": [
416 |        "False"
417 |       ]
418 |      },
419 |      "execution_count": 19,
420 |      "metadata": {},
421 |      "output_type": "execute_result"
422 |     }
423 |    ],
424 |    "source": [
425 |     "lrc(100, 105) == '00000000'"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "markdown",
430 |    "metadata": {},
431 |    "source": [
432 |     "## Problem: Bit Flips\n",
433 |     "\n",
434 |     "LRC will catch bit flips so long as they occur at different locations. However, if bits flip in the exact same position of each block, then LRC will not catch it, whereas other checksums will."
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": 20,
440 |    "metadata": {},
441 |    "outputs": [
442 |     {
443 |      "data": {
444 |       "text/plain": [
445 |        "'1100100'"
446 |       ]
447 |      },
448 |      "execution_count": 20,
449 |      "metadata": {},
450 |      "output_type": "execute_result"
451 |     }
452 |    ],
453 |    "source": [
454 |     "dec2bin(100)"
455 |    ]
456 |   },
457 |   {
458 |    "cell_type": "code",
459 |    "execution_count": 21,
460 |    "metadata": {},
461 |    "outputs": [
462 |     {
463 |      "data": {
464 |       "text/plain": [
465 |        "'1100101'"
466 |       ]
467 |      },
468 |      "execution_count": 21,
469 |      "metadata": {},
470 |      "output_type": "execute_result"
471 |     }
472 |    ],
473 |    "source": [
474 |     "dec2bin(101)"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": 22,
480 |    "metadata": {},
481 |    "outputs": [
482 |     {
483 |      "data": {
484 |       "text/plain": [
485 |        "'1100100'"
486 |       ]
487 |      },
488 |      "execution_count": 22,
489 |      "metadata": {},
490 |      "output_type": "execute_result"
491 |     }
492 |    ],
493 |    "source": [
494 |     "# simulate flipping least significant bit\n",
495 |     "bitflip101 = dec2bin(101)[:6] + '0'\n",
496 |     "bitflip101"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "code",
501 |    "execution_count": 23,
502 |    "metadata": {},
503 |    "outputs": [
504 |     {
505 |      "data": {
506 |       "text/plain": [
507 |        "100"
508 |       ]
509 |      },
510 |      "execution_count": 23,
511 |      "metadata": {},
512 |      "output_type": "execute_result"
513 |     }
514 |    ],
515 |    "source": [
516 |     "# convert binary bit flip to decimal\n",
517 |     "dec_bitflip101 = int(bitflip101, 2)\n",
518 |     "dec_bitflip101"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": 24,
524 |    "metadata": {},
525 |    "outputs": [
526 |     {
527 |      "data": {
528 |       "text/plain": [
529 |        "'00000000'"
530 |       ]
531 |      },
532 |      "execution_count": 24,
533 |      "metadata": {},
534 |      "output_type": "execute_result"
535 |     }
536 |    ],
537 |    "source": [
538 |     "# check\n",
539 |     "lrc(100, dec_bitflip101)"
540 |    ]
541 |   }
542 |  ],
543 |  "metadata": {
544 |   "kernelspec": {
545 |    "display_name": "Python [conda root]",
546 |    "language": "python",
547 |    "name": "conda-root-py"
548 |   },
549 |   "language_info": {
550 |    "codemirror_mode": {
551 |     "name": "ipython",
552 |     "version": 3
553 |    },
554 |    "file_extension": ".py",
555 |    "mimetype": "text/x-python",
556 |    "name": "python",
557 |    "nbconvert_exporter": "python",
558 |    "pygments_lexer": "ipython3",
559 |    "version": "3.5.4"
560 |   }
561 |  },
562 |  "nbformat": 4,
563 |  "nbformat_minor": 2
564 | }
565 | 


--------------------------------------------------------------------------------
/notebooks/Cryptography/1_Caesar_Shift.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Caesar Cipher\n",
  8 |     "\n",
  9 |     "This notebook introduces the Caesar cipher. \n",
 10 |     "\n",
 11 |     "A Caesar cipher is one of the easiest ways to encrypt text. It's also extremely easy to crack. But for the purposes of introducing ciphers, it's a great place to start. \n",
 12 |     "\n",
 13 |     "The setup is pretty simple. You start with a message that you want to codify so someone else cannot read it. Say the message is **\"I hope you cannot read this.\"**. This is called the *plaintext*. Now we need to apply some algorithm to our text so the output is incoherent. For example, the output may be **O nuvk eua igttuz xkgj znoy.\"**.  What we did in this case is shift every letter six places to the right: A becomes G, B becomes H, Z becomes F, and so on. \n",
 14 |     "\n",
 15 |     "> **plaintext:**  I hope you cannot read this.  \n",
 16 |     "> **ciphertext:** O nuvk eua igttuz xkgj znoy.\n",
 17 |     "\n",
 18 |     "Right away we can see this particular setup is very easy to crack. It includes capitalization and the words are clearly split. By converting all the text to lowercase and removing all spacees, we can make it a bit more difficult. However, there are only 25 different ways to shift the letters. That means a brute force attack, or trying all possibilities, is trivial.\n",
 19 |     "\n",
 20 |     "Let's see what this looks like in code. We'll create a class called *CaesarCipher* that can encrypt or decrypt text. "
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "---"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 1,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "class CaesarCipher:\n",
 37 |     "    \n",
 38 |     "    \n",
 39 |     "    \n",
 40 |     "    def _clean_text(self, text):\n",
 41 |     "        '''converts text to lowercase, removes spaces, and removes punctuation.'''\n",
 42 |     "        import string\n",
 43 |     "        assert type(text) == str, 'input needs to be a string!'\n",
 44 |     "        text = text.lower()\n",
 45 |     "        text = text.replace(' ', '')\n",
 46 |     "        self.clean_text = \"\".join(character for character in text \n",
 47 |     "                                  if character not in string.punctuation)\n",
 48 |     "        return self.clean_text\n",
 49 |     "    \n",
 50 |     "    \n",
 51 |     "    def _string2characters(self, text):\n",
 52 |     "        '''converts a string to individual characters.'''\n",
 53 |     "        assert type(text) == str, 'input needs to be a string!'\n",
 54 |     "        self.str2char = list(text)\n",
 55 |     "        return self.str2char\n",
 56 |     "    \n",
 57 |     "    \n",
 58 |     "    def _chars2nums(self, characters):\n",
 59 |     "        '''converts individual characters to integers.'''\n",
 60 |     "        assert type(characters) == list, 'input needs to be a list of characters!'\n",
 61 |     "        codebook = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7, 'i':8, 'j':9,\n",
 62 |     "               'k':10, 'l':11, 'm':12, 'n':13, 'o':14, 'p':15, 'q':16, 'r':17, 's':18,\n",
 63 |     "               't':19, 'u':20, 'v':21, 'w':22, 'x':23, 'y':24, 'z':25}\n",
 64 |     "        for i, char in enumerate(characters):\n",
 65 |     "            try:\n",
 66 |     "                characters[i] = codebook[char]\n",
 67 |     "            except:\n",
 68 |     "                pass\n",
 69 |     "        self.char2num = characters\n",
 70 |     "        return self.char2num\n",
 71 |     "    \n",
 72 |     "    \n",
 73 |     "    def _nums2chars(self, numbers):\n",
 74 |     "        '''converts individual integers to characters .'''\n",
 75 |     "        assert type(numbers) == list, 'input needs to be a list of numbers!'\n",
 76 |     "        codebook = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h', 8:'i', 9:'j',\n",
 77 |     "               10:'k', 11:'l', 12:'m', 13:'n', 14:'o', 15:'p', 16:'q', 17:'r', 18:'s',\n",
 78 |     "               19:'t', 20:'u', 21:'v', 22:'w', 23:'x', 24:'y', 25:'z'}\n",
 79 |     "        for i, num in enumerate(numbers):\n",
 80 |     "            try:\n",
 81 |     "                numbers[i] = codebook[num]\n",
 82 |     "            except:\n",
 83 |     "                pass\n",
 84 |     "        self.num2chars = numbers\n",
 85 |     "        return self.num2chars\n",
 86 |     "    \n",
 87 |     "    \n",
 88 |     "    def _preprocessing(self, text):\n",
 89 |     "        ''''''\n",
 90 |     "        clean_text = self._clean_text(text)\n",
 91 |     "        list_of_chars = self._string2characters(clean_text)\n",
 92 |     "        list_of_nums = self._chars2nums(list_of_chars)\n",
 93 |     "        return list_of_nums\n",
 94 |     "    \n",
 95 |     "    def encrypt(self, text, shift=3):\n",
 96 |     "        '''return text that is shifted according to user's input.'''\n",
 97 |     "        import numpy as np\n",
 98 |     "        preprocess = self._preprocessing(text)\n",
 99 |     "        nums_shifted = list((np.array(preprocess) + shift) % 26)\n",
100 |     "        return ''.join(self._nums2chars(nums_shifted))\n",
101 |     "        \n",
102 |     "    def decrypt(self, text, shift=3):\n",
103 |     "        '''returns text shifted by user-defined shift length.'''\n",
104 |     "        import numpy as np\n",
105 |     "        preprocess = self._preprocessing(text)\n",
106 |     "        nums = self._chars2nums(preprocess)\n",
107 |     "        num_shift = list((np.array(nums) - shift) % 26)\n",
108 |     "        return ''.join(self._nums2chars(num_shift))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "---"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 2,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "cc = CaesarCipher()"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 3,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "'onuvkeuaigttuzxkgjznoy'"
136 |       ]
137 |      },
138 |      "execution_count": 3,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "# example\n",
145 |     "cc.encrypt('I hope you cannot read this.', shift=6)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "---"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 4,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "'the QuIcK brown fox jumps over the lazy dog!'"
164 |       ]
165 |      },
166 |      "execution_count": 4,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "text = 'the QuIcK brown fox jumps over the lazy dog!'\n",
173 |     "text"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 5,
179 |    "metadata": {},
180 |    "outputs": [
181 |     {
182 |      "data": {
183 |       "text/plain": [
184 |        "'ymjvznhpgwtbsktcozruxtajwymjqfeditl'"
185 |       ]
186 |      },
187 |      "execution_count": 5,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "# show one encryption\n",
194 |     "encrypted = cc.encrypt(text, shift=5)\n",
195 |     "encrypted"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 6,
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n",
208 |       "vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n",
209 |       "wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n",
210 |       "xliuymgofvsarjsbnyqtwszivxlipedchsk\n",
211 |       "ymjvznhpgwtbsktcozruxtajwymjqfeditl\n",
212 |       "znkwaoiqhxuctludpasvyubkxznkrgfejum\n",
213 |       "aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n",
214 |       "bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n",
215 |       "cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n",
216 |       "droaesmulbygxpyhtewzcyfobdrovkjinyq\n",
217 |       "espbftnvmczhyqziufxadzgpcespwlkjozr\n",
218 |       "ftqcguowndaizrajvgybeahqdftqxmlkpas\n",
219 |       "gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n",
220 |       "hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n",
221 |       "iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n",
222 |       "jxugkysarhemdvenzkcfieluhjxubqpotew\n",
223 |       "kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n",
224 |       "lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n",
225 |       "maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n",
226 |       "nbykocwevliqhzirdogjmipylnbyfutsxia\n",
227 |       "oczlpdxfwmjriajsephknjqzmoczgvutyjb\n",
228 |       "pdamqeygxnksjbktfqilokranpdahwvuzkc\n",
229 |       "qebnrfzhyoltkclugrjmplsboqebixwvald\n",
230 |       "rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n",
231 |       "sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n"
232 |      ]
233 |     }
234 |    ],
235 |    "source": [
236 |     "# show all encryption possibilities\n",
237 |     "for i in range(1,26):\n",
238 |     "    print(cc.encrypt(text, shift=i))"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 7,
244 |    "metadata": {},
245 |    "outputs": [
246 |     {
247 |      "name": "stdout",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "shift 1: xliuymgofvsarjsbnyqtwszivxlipedchsk\n",
251 |       "\n",
252 |       "shift 2: wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n",
253 |       "\n",
254 |       "shift 3: vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n",
255 |       "\n",
256 |       "shift 4: uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n",
257 |       "\n",
258 |       "shift 5: thequickbrownfoxjumpsoverthelazydog\n",
259 |       "\n",
260 |       "shift 6: sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n",
261 |       "\n",
262 |       "shift 7: rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n",
263 |       "\n",
264 |       "shift 8: qebnrfzhyoltkclugrjmplsboqebixwvald\n",
265 |       "\n",
266 |       "shift 9: pdamqeygxnksjbktfqilokranpdahwvuzkc\n",
267 |       "\n",
268 |       "shift10: oczlpdxfwmjriajsephknjqzmoczgvutyjb\n",
269 |       "\n",
270 |       "shift11: nbykocwevliqhzirdogjmipylnbyfutsxia\n",
271 |       "\n",
272 |       "shift12: maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n",
273 |       "\n",
274 |       "shift13: lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n",
275 |       "\n",
276 |       "shift14: kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n",
277 |       "\n",
278 |       "shift15: jxugkysarhemdvenzkcfieluhjxubqpotew\n",
279 |       "\n",
280 |       "shift16: iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n",
281 |       "\n",
282 |       "shift17: hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n",
283 |       "\n",
284 |       "shift18: gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n",
285 |       "\n",
286 |       "shift19: ftqcguowndaizrajvgybeahqdftqxmlkpas\n",
287 |       "\n",
288 |       "shift20: espbftnvmczhyqziufxadzgpcespwlkjozr\n",
289 |       "\n",
290 |       "shift21: droaesmulbygxpyhtewzcyfobdrovkjinyq\n",
291 |       "\n",
292 |       "shift22: cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n",
293 |       "\n",
294 |       "shift23: bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n",
295 |       "\n",
296 |       "shift24: aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n",
297 |       "\n",
298 |       "shift25: znkwaoiqhxuctludpasvyubkxznkrgfejum\n",
299 |       "\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "# show all decryption possibilities\n",
305 |     "for i in range(1,26):\n",
306 |     "    print('shift{:2}: {}'.format(i, cc.decrypt(encrypted, shift=i)))\n",
307 |     "    print('')"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python [conda root]",
314 |    "language": "python",
315 |    "name": "conda-root-py"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.5.4"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 2
332 | }
333 | 


--------------------------------------------------------------------------------
/notebooks/Hashing/Birthday_Attack.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "According to the [Birthday attack](https://en.wikipedia.org/wiki/Birthday_attack) article on Wikipedia:\n",
  8 |     ">A birthday attack is a type of cryptographic attack that exploits the mathematics behind the birthday problem in probability theory. This attack can be used to abuse communication between two or more parties. The attack depends on the higher likelihood of collisions found between random attack attempts and a fixed degree of permutations (pigeonholes). With a birthday attack, it is possible to find a collision of a hash function in $\\sqrt{2^{n}}$=$2^{n/2}$ with $2^{n}$ being the classical preimage resistance security."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "Given a function *f*, the goal of the attack is to find two different inputs *$x_{1}$*, *$x_{2}$* such that f($x_{1}$)=f($x_{2}$). Such a pair $x_{1}$, $x_{2}$ is called a collision. The method used to find a collision is simply to evaluate the function *f* for different input values that may be chosen randomly or pseudorandomly until the same result is found more than once. Because of the birthday problem, this method can be rather efficient. Specifically, if a function f(x) yields any of *H* different outputs with equal probability and *H* is sufficiently large, then we expect to obtain a pair of different arguments $x_{1}$ and $x_{2}$ with f($x_{1}$) = f($x_{2}$) after evaluating the function for about $1.25{\\sqrt {H}}$ different arguments on average."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 1,
 21 |    "metadata": {
 22 |     "collapsed": true
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "def hasher(number, modulus):\n",
 27 |     "    '''simply hash function'''\n",
 28 |     "    return number % modulus"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from functools import partial\n",
 40 |     "\n",
 41 |     "# set modulus for all examples\n",
 42 |     "myhash = partial(hasher, modulus=47)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "metadata": {
 49 |     "collapsed": true
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "from math import log1p, sqrt\n",
 54 |     "\n",
 55 |     "def birthday(probability_exponent, bits):\n",
 56 |     "    '''probability_exponent: desired probability of random collision'''\n",
 57 |     "    probability = 10. ** probability_exponent\n",
 58 |     "    outputs     =  2. ** bits\n",
 59 |     "    return sqrt(2. * outputs * -log1p(-probability))"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "data": {
 69 |       "text/plain": [
 70 |        "4.536858806263531"
 71 |       ]
 72 |      },
 73 |      "execution_count": 4,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "birthday(-2, 10)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 6,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "array([868, 988, 838, 285, 906, 485, 429, 386, 325, 923])"
 91 |       ]
 92 |      },
 93 |      "execution_count": 6,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "import numpy as np\n",
100 |     "\n",
101 |     "integers = np.random.randint(low=1, high=1000, size=10)\n",
102 |     "integers"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 7,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "Counter({1: 1, 3: 1, 6: 1, 10: 1, 13: 1, 15: 1, 22: 1, 30: 1, 39: 1, 43: 1})"
114 |       ]
115 |      },
116 |      "execution_count": 7,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "from collections import Counter\n",
123 |     "\n",
124 |     "hashes = [myhash(integer) for integer in integers]\n",
125 |     "Counter(hashes)"
126 |    ]
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "Python [conda root]",
132 |    "language": "python",
133 |    "name": "conda-root-py"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 3
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython3",
145 |    "version": "3.5.4"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 2
150 | }
151 | 


--------------------------------------------------------------------------------
/notebooks/Information_Theory/Hamming_Distance.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "s1 = 'string'\n",
 12 |     "s2 = 'stirng'\n",
 13 |     "s3 = 'stirna'"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "---"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Functions"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "def hamming_distance(string1, string2):\n",
 39 |     "    '''calculate the hamming distance of two strings'''\n",
 40 |     "    assert len(string1) == len(string2), \"strings must be the same length\"\n",
 41 |     "    distance = 0\n",
 42 |     "    for i, elem in enumerate(string1):\n",
 43 |     "        if elem != string2[i]:\n",
 44 |     "            distance += 1\n",
 45 |     "    return distance"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 3,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "def hamming_distance2(string1, string2):\n",
 57 |     "    '''calculate the hamming distance of two strings (more Pythonic)'''\n",
 58 |     "    assert len(string1) == len(string2), \"strings must be the same length\"\n",
 59 |     "    return sum(elem1 != elem2 for elem1, elem2 in zip(string1, string2))"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "---"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "2"
 78 |       ]
 79 |      },
 80 |      "execution_count": 4,
 81 |      "metadata": {},
 82 |      "output_type": "execute_result"
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "hamming_distance(s1, s2)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "2"
 98 |       ]
 99 |      },
100 |      "execution_count": 5,
101 |      "metadata": {},
102 |      "output_type": "execute_result"
103 |     }
104 |    ],
105 |    "source": [
106 |     "hamming_distance2(s1, s2)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "---"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "data": {
123 |       "text/plain": [
124 |        "1"
125 |       ]
126 |      },
127 |      "execution_count": 6,
128 |      "metadata": {},
129 |      "output_type": "execute_result"
130 |     }
131 |    ],
132 |    "source": [
133 |     "hamming_distance(s2, s3)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 7,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "data": {
143 |       "text/plain": [
144 |        "1"
145 |       ]
146 |      },
147 |      "execution_count": 7,
148 |      "metadata": {},
149 |      "output_type": "execute_result"
150 |     }
151 |    ],
152 |    "source": [
153 |     "hamming_distance2(s2, s3)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "---"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 8,
166 |    "metadata": {},
167 |    "outputs": [
168 |     {
169 |      "data": {
170 |       "text/plain": [
171 |        "3"
172 |       ]
173 |      },
174 |      "execution_count": 8,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "hamming_distance(s1, s3)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 9,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "data": {
190 |       "text/plain": [
191 |        "3"
192 |       ]
193 |      },
194 |      "execution_count": 9,
195 |      "metadata": {},
196 |      "output_type": "execute_result"
197 |     }
198 |    ],
199 |    "source": [
200 |     "hamming_distance2(s1, s3)"
201 |    ]
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "kernelspec": {
206 |    "display_name": "Python [conda root]",
207 |    "language": "python",
208 |    "name": "conda-root-py"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.5.4"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 2
225 | }
226 | 


--------------------------------------------------------------------------------
/notebooks/Legacy_Code/Bernoulli_Naive_Bayes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "def create_dataset(n_rows=10, n_cols=2, prob=(0.5, 0.5), seed=None):\n",
 19 |     "    '''\n",
 20 |     "    creates a 2D numpy array with 0s and 1s for columns\n",
 21 |     "    \n",
 22 |     "    INPUT:\n",
 23 |     "        n_rows = (int) number of rows in dataset\n",
 24 |     "        n_cols = (int) number of columns starting with target followed by features\n",
 25 |     "        prob = (tuple) probability of success for target, feature 1, feature 2, ..., feature n\n",
 26 |     "    OUTPUT:\n",
 27 |     "        dataset (numpy array)\n",
 28 |     "    '''\n",
 29 |     "    # error handling\n",
 30 |     "    assert type(n_rows) == int, 'n_rows must be an integer'\n",
 31 |     "    assert type(n_cols) == int, 'n_cols must be an integer'\n",
 32 |     "    assert type(prob) == tuple, 'prob must be a tuple of probabilities'\n",
 33 |     "    assert len(prob) == n_cols, 'tuple must contain probabilities for each n_col'\n",
 34 |     "    assert type(seed) == int, 'seed must be an integer'\n",
 35 |     "    \n",
 36 |     "    # reproducibility\n",
 37 |     "    if seed:\n",
 38 |     "        np.random.seed(seed)\n",
 39 |     "        \n",
 40 |     "    # create dataset\n",
 41 |     "    for i, p in enumerate(prob):\n",
 42 |     "        if i < 1:\n",
 43 |     "            dataset = np.random.binomial(n=1, p=prob[i], size=n_rows)\n",
 44 |     "        else:\n",
 45 |     "            column = np.random.binomial(n=1, p=prob[i], size=n_rows)\n",
 46 |     "            dataset = np.c_[dataset, column]\n",
 47 |     "    \n",
 48 |     "    return dataset"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "probabilities = (0.5, 0.5, 0.5, 0.5)\n",
 58 |     "data = create_dataset(n_rows=4, n_cols=4, prob=probabilities, seed=42)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "data": {
 68 |       "text/plain": [
 69 |        "array([[0, 0, 1, 1],\n",
 70 |        "       [1, 0, 1, 0],\n",
 71 |        "       [1, 0, 0, 0],\n",
 72 |        "       [1, 1, 1, 0]])"
 73 |       ]
 74 |      },
 75 |      "execution_count": 4,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "data"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 5,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "X = data[:, 1:]\n",
 91 |     "y = data[:, 0]"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 6,
 97 |    "metadata": {},
 98 |    "outputs": [
 99 |     {
100 |      "data": {
101 |       "text/plain": [
102 |        "array([[0, 1, 1],\n",
103 |        "       [0, 1, 0],\n",
104 |        "       [0, 0, 0],\n",
105 |        "       [1, 1, 0]])"
106 |       ]
107 |      },
108 |      "execution_count": 6,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "X"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 7,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "class BernoulliNB:\n",
124 |     "    \n",
125 |     "    \n",
126 |     "    def __init__(self):\n",
127 |     "        self.prob_target_one_ = None\n",
128 |     "        self.prob_target_zero_ = None\n",
129 |     "     \n",
130 |     "    \n",
131 |     "    def _reshape(self, X):\n",
132 |     "        '''converts array to appropriate dimensions'''\n",
133 |     "        try:\n",
134 |     "            n_cols = X.shape[1]\n",
135 |     "        except:\n",
136 |     "            n_cols = X.reshape(-1,1)\n",
137 |     "        return n_cols\n",
138 |     "    \n",
139 |     "    \n",
140 |     "    def fit(self, X, y):\n",
141 |     "        '''calculate priors and likelihoods'''\n",
142 |     "        # setup\n",
143 |     "        n_classes = len(np.unique(y))\n",
144 |     "        n_cols = self._reshape(X)\n",
145 |     "        \n",
146 |     "        # priors (target)\n",
147 |     "        self.prob_target_one_ = np.mean(y)\n",
148 |     "        self.prob_target_zero_ = 1 - self.prob_target_one_\n",
149 |     "        \n",
150 |     "        # likelihoods (features)\n",
151 |     "        n_feature_bins = 2\n",
152 |     "        self.prob_features_ = np.zeros((n_feature_bins, n_cols), dtype=float)\n",
153 |     "        \n",
154 |     "        for i in range(n_feature_bins):\n",
155 |     "            for j in range(n_cols):\n",
156 |     "                column = X[:,j]\n",
157 |     "                intersection_y_and_col = sum(np.logical_and(y==i, column==i))\n",
158 |     "                sum_y = sum(y==i)\n",
159 |     "                self.prob_features_[i,j] = intersection_y_and_col / sum_y\n",
160 |     "        \n",
161 |     "        prob_complements = 1 - self.prob_features_\n",
162 |     "        self.prob_features_ = np.concatenate((nb.prob_features_, prob_complements), axis=0)\n",
163 |     "        self.prob_features_ = self.prob_features_[[0,2,3,1],:] # rearrange array\n",
164 |     "    \n",
165 |     "    \n",
166 |     "    def predict(self, X):\n",
167 |     "        '''return most likely class'''\n",
168 |     "        # NEED MULTIPLE PREDICTIONS\n",
169 |     "        \n",
170 |     "        # setup\n",
171 |     "        X = np.array(X)\n",
172 |     "        n_rows = X.shape[0]\n",
173 |     "        try: \n",
174 |     "            n_cols = X.shape[1] \n",
175 |     "        except: \n",
176 |     "            n_cols = X.shape[0]\n",
177 |     "        class_zero_probs,  class_one_probs = np.split(nb.prob_features_, 2)\n",
178 |     "        \n",
179 |     "        # likelihoods\n",
180 |     "        class_zero_likelihoods = class_zero_probs[X, np.arange(n_cols)]\n",
181 |     "        class_one_likelihoods = class_one_probs[X, np.arange(n_cols)]\n",
182 |     "            \n",
183 |     "        # posteriors\n",
184 |     "        class_zero_posterior = self.prob_target_zero_ * np.prod(class_zero_likelihoods)\n",
185 |     "        class_one_posterior = self.prob_target_one_ * np.prod(class_one_likelihoods)\n",
186 |     "\n",
187 |     "        return (class_zero_posterior, class_one_posterior), np.argmax((class_zero_posterior, class_one_posterior))"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 8,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "nb = BernoulliNB()\n",
197 |     "nb.fit(X,y)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 9,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "data": {
207 |       "text/plain": [
208 |        "array([[ 1.        ,  0.        ,  0.        ],\n",
209 |        "       [ 0.        ,  1.        ,  1.        ],\n",
210 |        "       [ 0.66666667,  0.33333333,  1.        ],\n",
211 |        "       [ 0.33333333,  0.66666667,  0.        ]])"
212 |       ]
213 |      },
214 |      "execution_count": 9,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "nb.prob_features_"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 10,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "data": {
230 |       "text/plain": [
231 |        "((0.0, 0.083333333333333343), 1)"
232 |       ]
233 |      },
234 |      "execution_count": 10,
235 |      "metadata": {},
236 |      "output_type": "execute_result"
237 |     }
238 |    ],
239 |    "source": [
240 |     "nb.predict(np.array([1,0,0]))"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": []
249 |   }
250 |  ],
251 |  "metadata": {
252 |   "kernelspec": {
253 |    "display_name": "Python [conda root]",
254 |    "language": "python",
255 |    "name": "conda-root-py"
256 |   },
257 |   "language_info": {
258 |    "codemirror_mode": {
259 |     "name": "ipython",
260 |     "version": 3
261 |    },
262 |    "file_extension": ".py",
263 |    "mimetype": "text/x-python",
264 |    "name": "python",
265 |    "nbconvert_exporter": "python",
266 |    "pygments_lexer": "ipython3",
267 |    "version": "3.5.4"
268 |   }
269 |  },
270 |  "nbformat": 4,
271 |  "nbformat_minor": 2
272 | }
273 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Bagging_and_Bootstrapping.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Bootstrap"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "class Bootstrap:\n",
 26 |     "    \n",
 27 |     "    def create_dataset(self, X, y, seed=None):\n",
 28 |     "        '''creates bootstrapped dataset'''\n",
 29 |     "        if seed:\n",
 30 |     "            np.random.seed(seed)\n",
 31 |     "        n_rows = len(X)\n",
 32 |     "        indices = np.arange(n_rows)\n",
 33 |     "        indices = np.random.choice(indices, size=n_rows, replace=True)\n",
 34 |     "        return X[indices], y[indices]"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## Create Data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "np.random.seed(10)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "array([[9, 4, 0, 1, 9],\n",
 62 |        "       [0, 1, 8, 9, 0],\n",
 63 |        "       [8, 6, 4, 3, 0],\n",
 64 |        "       [4, 6, 8, 1, 8],\n",
 65 |        "       [4, 1, 3, 6, 5],\n",
 66 |        "       [3, 9, 6, 9, 1],\n",
 67 |        "       [9, 4, 2, 6, 7],\n",
 68 |        "       [8, 8, 9, 2, 0],\n",
 69 |        "       [6, 7, 8, 1, 7],\n",
 70 |        "       [1, 4, 0, 8, 5]])"
 71 |       ]
 72 |      },
 73 |      "execution_count": 4,
 74 |      "metadata": {},
 75 |      "output_type": "execute_result"
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "X = np.random.randint(0,10,50).reshape(10,5)\n",
 80 |     "X"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "data": {
 90 |       "text/plain": [
 91 |        "array([1, 0, 1, 1, 1, 0, 1, 0, 0, 1])"
 92 |       ]
 93 |      },
 94 |      "execution_count": 5,
 95 |      "metadata": {},
 96 |      "output_type": "execute_result"
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "y = np.random.binomial(1, 0.5, 10)\n",
101 |     "y"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "### Bootstrap Example"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "bs = Bootstrap()\n",
118 |     "X, y = bs.create_dataset(X,y)"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 7,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "array([[9, 4, 0, 1, 9],\n",
130 |        "       [9, 4, 0, 1, 9],\n",
131 |        "       [9, 4, 2, 6, 7],\n",
132 |        "       [1, 4, 0, 8, 5],\n",
133 |        "       [0, 1, 8, 9, 0],\n",
134 |        "       [6, 7, 8, 1, 7],\n",
135 |        "       [1, 4, 0, 8, 5],\n",
136 |        "       [0, 1, 8, 9, 0],\n",
137 |        "       [8, 6, 4, 3, 0],\n",
138 |        "       [6, 7, 8, 1, 7]])"
139 |       ]
140 |      },
141 |      "execution_count": 7,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "X"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 8,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "data": {
157 |       "text/plain": [
158 |        "array([1, 1, 1, 1, 0, 0, 1, 0, 1, 0])"
159 |       ]
160 |      },
161 |      "execution_count": 8,
162 |      "metadata": {},
163 |      "output_type": "execute_result"
164 |     }
165 |    ],
166 |    "source": [
167 |     "y"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## Bagging"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 9,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "class Bagging(Bootstrap):\n",
184 |     "    \n",
185 |     "    def __init__(self):\n",
186 |     "        self.estimator = None\n",
187 |     "        \n",
188 |     "    def simulate(self, estimator, X, y, n_datasets, seed=None):\n",
189 |     "        if seed:\n",
190 |     "            self.seed=seed\n",
191 |     "        self.coefficients_ = []\n",
192 |     "        self.estimator = estimator\n",
193 |     "        for dataset in range(n_datasets):\n",
194 |     "            X_, y_ = self.create_dataset(X, y, seed=self.seed)\n",
195 |     "            self.estimator.fit(X_, y_)\n",
196 |     "            self.coefficients_.append(self.estimator.coef_)\n",
197 |     "            self.seed += 1"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "### Example: Sklearn vs Bagging"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "#### Sklearn"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 10,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": [
220 |     "from sklearn.linear_model import LinearRegression\n",
221 |     "lr = LinearRegression()"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 11,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "[ 0.05415476  0.04757173 -0.13439324  0.03477829 -0.04744752]\n"
234 |      ]
235 |     }
236 |    ],
237 |    "source": [
238 |     "lr.fit(X,y)\n",
239 |     "print(lr.coef_)"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "#### Bagging"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 12,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": [
255 |     "bg = Bagging()\n",
256 |     "bg.simulate(lr, X, y, n_datasets=100, seed=42)"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": 13,
262 |    "metadata": {},
263 |    "outputs": [
264 |     {
265 |      "data": {
266 |       "text/plain": [
267 |        "[array([ 0.03065716, -0.0095945 , -0.11071628,  0.04298948,  0.03107712]),\n",
268 |        " array([ 0.03202357,  0.00323003, -0.10583687,  0.05571963,  0.03346221]),\n",
269 |        " array([ 0.01504485,  0.09281765, -0.10585113,  0.0013086 , -0.02779966]),\n",
270 |        " array([ 0.03890223,  0.04224749, -0.1397625 ,  0.01358421, -0.05403209]),\n",
271 |        " array([ 0.05415476,  0.04757173, -0.13439324,  0.03477829, -0.04744752]),\n",
272 |        " array([ 0.01504485,  0.09281765, -0.10585113,  0.0013086 , -0.02779966]),\n",
273 |        " array([ 0.0225789 ,  0.04256523, -0.084117  ,  0.0519456 ,  0.04574701]),\n",
274 |        " array([ 0.05415476,  0.04757173, -0.13439324,  0.03477829, -0.04744752]),\n",
275 |        " array([ 0.03202357,  0.00323003, -0.10583687,  0.05571963,  0.03346221]),\n",
276 |        " array([ 0.02997327, -0.04066438, -0.09418353,  0.0071274 ,  0.01730941])]"
277 |       ]
278 |      },
279 |      "execution_count": 13,
280 |      "metadata": {},
281 |      "output_type": "execute_result"
282 |     }
283 |    ],
284 |    "source": [
285 |     "bg.coefficients_[:10]"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "#### Mean of Coefficients"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 14,
298 |    "metadata": {},
299 |    "outputs": [
300 |     {
301 |      "data": {
302 |       "text/plain": [
303 |        "array([ 0.03915596,  0.0205273 , -0.11609587,  0.02856614, -0.01729375])"
304 |       ]
305 |      },
306 |      "execution_count": 14,
307 |      "metadata": {},
308 |      "output_type": "execute_result"
309 |     }
310 |    ],
311 |    "source": [
312 |     "np.mean(bg.coefficients_, axis=0)"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "#### Standard Error of Coefficients"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 15,
325 |    "metadata": {},
326 |    "outputs": [
327 |     {
328 |      "data": {
329 |       "text/plain": [
330 |        "array([ 0.01944324,  0.03295672,  0.02520118,  0.01921173,  0.03869426])"
331 |       ]
332 |      },
333 |      "execution_count": 15,
334 |      "metadata": {},
335 |      "output_type": "execute_result"
336 |     }
337 |    ],
338 |    "source": [
339 |     "np.std(bg.coefficients_, axis=0)"
340 |    ]
341 |   }
342 |  ],
343 |  "metadata": {
344 |   "kernelspec": {
345 |    "display_name": "Python [conda root]",
346 |    "language": "python",
347 |    "name": "conda-root-py"
348 |   },
349 |   "language_info": {
350 |    "codemirror_mode": {
351 |     "name": "ipython",
352 |     "version": 3
353 |    },
354 |    "file_extension": ".py",
355 |    "mimetype": "text/x-python",
356 |    "name": "python",
357 |    "nbconvert_exporter": "python",
358 |    "pygments_lexer": "ipython3",
359 |    "version": "3.5.4"
360 |   }
361 |  },
362 |  "nbformat": 4,
363 |  "nbformat_minor": 2
364 | }
365 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Cross-Validation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "array([[ 4, 91,  9, 37, 28, 29,  3, 43, 71, 59],\n",
 23 |        "       [55, 19, 38, 83, 21, 92, 85, 82, 91, 19],\n",
 24 |        "       [ 1, 49, 31, 86, 43, 80, 44, 87, 73, 73],\n",
 25 |        "       [29, 97,  6, 73, 40, 17, 16, 97, 50, 14],\n",
 26 |        "       [13, 10, 56, 21, 24, 47, 43, 96, 57, 35],\n",
 27 |        "       [21, 41,  3, 72, 34, 65, 89, 14, 83, 63],\n",
 28 |        "       [ 0, 70, 39, 80, 83, 67, 81, 36, 26, 79],\n",
 29 |        "       [65,  9, 17, 19, 58, 13,  3, 92, 11, 72],\n",
 30 |        "       [93, 64, 84, 37, 23, 34, 15, 48,  9, 48],\n",
 31 |        "       [13, 28, 93, 54, 77, 58, 75, 72, 37, 48]])"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
 41 |     "fake_data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 211,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "def cross_validation(data, shuffle=True, folds=2, random_state=None, verbose=True):\n",
 51 |     "    '''return train and test sets'''\n",
 52 |     "        \n",
 53 |     "    # error checking\n",
 54 |     "    assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
 55 |     "    assert type(folds) == int, 'must be an integer'\n",
 56 |     "    assert folds > 1, 'must have 2 or more folds'\n",
 57 |     "    assert folds <= len(data), 'can have no more than {} folds!'.format(len(data))\n",
 58 |     "    if random_state != None:\n",
 59 |     "        assert type(random_state) == int, 'random_state must be an integer!'\n",
 60 |     "    \n",
 61 |     "    # reproducibility\n",
 62 |     "    if random_state:\n",
 63 |     "        np.random.seed(random_state)\n",
 64 |     "    \n",
 65 |     "    # setup indices\n",
 66 |     "    indices = np.arange(data.shape[0])\n",
 67 |     "    if shuffle:\n",
 68 |     "        np.random.shuffle(indices)\n",
 69 |     "    \n",
 70 |     "    # setup K-fold\n",
 71 |     "    num_indices = len(indices)\n",
 72 |     "    split_proportion = 1/folds\n",
 73 |     "    split = int( np.floor(split_proportion * num_indices) )\n",
 74 |     "    \n",
 75 |     "    # main logic\n",
 76 |     "    for fold in range(folds):\n",
 77 |     "        mask = np.ones(num_indices, dtype=bool)\n",
 78 |     "        offset = split*fold\n",
 79 |     "        if fold < folds-1:\n",
 80 |     "            test_indices = indices[offset:split+offset]\n",
 81 |     "        else:\n",
 82 |     "            test_indices = indices[offset:]\n",
 83 |     "        mask[offset:split+offset] = False\n",
 84 |     "        train_indices = indices[mask]        \n",
 85 |     "        train = data[train_indices]\n",
 86 |     "        test = data[test_indices]\n",
 87 |     "        if verbose:\n",
 88 |     "            print('Fold:', fold)\n",
 89 |     "            print('Train:\\n', train)\n",
 90 |     "            print('Test:\\n', test, '\\n')\n",
 91 |     "    \n",
 92 |     "    #return train, test"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 216,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "Fold: 0\n",
105 |       "Train:\n",
106 |       " [[ 4 91  9 37 28 29  3 43 71 59]\n",
107 |       " [65  9 17 19 58 13  3 92 11 72]\n",
108 |       " [ 1 49 31 86 43 80 44 87 73 73]\n",
109 |       " [13 28 93 54 77 58 75 72 37 48]\n",
110 |       " [13 10 56 21 24 47 43 96 57 35]\n",
111 |       " [29 97  6 73 40 17 16 97 50 14]\n",
112 |       " [ 0 70 39 80 83 67 81 36 26 79]]\n",
113 |       "Test:\n",
114 |       " [[93 64 84 37 23 34 15 48  9 48]\n",
115 |       " [55 19 38 83 21 92 85 82 91 19]\n",
116 |       " [21 41  3 72 34 65 89 14 83 63]] \n",
117 |       "\n",
118 |       "Fold: 1\n",
119 |       "Train:\n",
120 |       " [[93 64 84 37 23 34 15 48  9 48]\n",
121 |       " [55 19 38 83 21 92 85 82 91 19]\n",
122 |       " [21 41  3 72 34 65 89 14 83 63]\n",
123 |       " [13 28 93 54 77 58 75 72 37 48]\n",
124 |       " [13 10 56 21 24 47 43 96 57 35]\n",
125 |       " [29 97  6 73 40 17 16 97 50 14]\n",
126 |       " [ 0 70 39 80 83 67 81 36 26 79]]\n",
127 |       "Test:\n",
128 |       " [[ 4 91  9 37 28 29  3 43 71 59]\n",
129 |       " [65  9 17 19 58 13  3 92 11 72]\n",
130 |       " [ 1 49 31 86 43 80 44 87 73 73]] \n",
131 |       "\n",
132 |       "Fold: 2\n",
133 |       "Train:\n",
134 |       " [[93 64 84 37 23 34 15 48  9 48]\n",
135 |       " [55 19 38 83 21 92 85 82 91 19]\n",
136 |       " [21 41  3 72 34 65 89 14 83 63]\n",
137 |       " [ 4 91  9 37 28 29  3 43 71 59]\n",
138 |       " [65  9 17 19 58 13  3 92 11 72]\n",
139 |       " [ 1 49 31 86 43 80 44 87 73 73]\n",
140 |       " [ 0 70 39 80 83 67 81 36 26 79]]\n",
141 |       "Test:\n",
142 |       " [[13 28 93 54 77 58 75 72 37 48]\n",
143 |       " [13 10 56 21 24 47 43 96 57 35]\n",
144 |       " [29 97  6 73 40 17 16 97 50 14]\n",
145 |       " [ 0 70 39 80 83 67 81 36 26 79]] \n",
146 |       "\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "cross_validation(fake_data, shuffle=True, folds=3, random_state=42, verbose=True)"
152 |    ]
153 |   }
154 |  ],
155 |  "metadata": {
156 |   "kernelspec": {
157 |    "display_name": "Python [conda root]",
158 |    "language": "python",
159 |    "name": "conda-root-py"
160 |   },
161 |   "language_info": {
162 |    "codemirror_mode": {
163 |     "name": "ipython",
164 |     "version": 3
165 |    },
166 |    "file_extension": ".py",
167 |    "mimetype": "text/x-python",
168 |    "name": "python",
169 |    "nbconvert_exporter": "python",
170 |    "pygments_lexer": "ipython3",
171 |    "version": "3.5.4"
172 |   }
173 |  },
174 |  "nbformat": 4,
175 |  "nbformat_minor": 2
176 | }
177 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/KNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "from scipy.stats import mode\n",
 13 |     "from scipy.spatial.distance import euclidean"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 14,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "class KNN:\n",
 25 |     "    '''\n",
 26 |     "    KNN is a lazy supervised machine learning algorithm. \n",
 27 |     "    \n",
 28 |     "    The user specifies the number of neighbors to find.'''\n",
 29 |     "    \n",
 30 |     "    \n",
 31 |     "    def __init__(self, K=2, learning='classification'):\n",
 32 |     "        '''\n",
 33 |     "        K = number of neighbors\n",
 34 |     "        learning = 'classification' or 'regression'\n",
 35 |     "        '''\n",
 36 |     "        assert learning == 'classification' or learning == 'regression', 'need to set learning.'\n",
 37 |     "        self.K_ = K\n",
 38 |     "        self.type_ = learning\n",
 39 |     "        self._nrows = None\n",
 40 |     "        self._indices = None\n",
 41 |     "        self._X = None\n",
 42 |     "    \n",
 43 |     "    \n",
 44 |     "    def _calc_distance(self):\n",
 45 |     "        '''Calculate the distance between data points.\n",
 46 |     "        Input:\n",
 47 |     "            X = numpy data matrix\n",
 48 |     "        Output:\n",
 49 |     "            matrix of distance between each data point and each cluster\n",
 50 |     "        '''\n",
 51 |     "        self._nrows = len(self._X)\n",
 52 |     "        distances = np.array([euclidean(self._X[i], self._X[j]) if i!=j else np.inf\n",
 53 |     "                              for i in range(self._nrows) \n",
 54 |     "                              for j in range(self._nrows)])\n",
 55 |     "        return distances.reshape(self._nrows, self._nrows)\n",
 56 |     "    \n",
 57 |     "    \n",
 58 |     "    def _sort_indices(self, X):\n",
 59 |     "        '''Sort distance matrix indices by shortest distance. \n",
 60 |     "        Input:\n",
 61 |     "            X = numpy distance matrix from self._calc_distance\n",
 62 |     "        Output:\n",
 63 |     "            indices sorted by min distance\n",
 64 |     "        '''\n",
 65 |     "        return np.argsort(X, axis=1)\n",
 66 |     "\n",
 67 |     "    \n",
 68 |     "    def fit(self, X):\n",
 69 |     "        '''Copy data.\n",
 70 |     "        Input:\n",
 71 |     "            X = numpy array of data points'''\n",
 72 |     "        assert self.K_ < X.shape[0], 'You chose too many neighbors!'\n",
 73 |     "        self._X = X\n",
 74 |     "    \n",
 75 |     "    \n",
 76 |     "    def predict(self, y):\n",
 77 |     "        '''Predict on new data.'''\n",
 78 |     "        distances = self._calc_distance()\n",
 79 |     "        self._indices = self._sort_indices(distances)\n",
 80 |     "        self._indices = np.delete(self._indices, (self._indices.shape[1] - 1), axis=1)\n",
 81 |     "        self._indices = self._indices[:,:self.K_]\n",
 82 |     "        if self.type_ == 'classification':\n",
 83 |     "            return mode(y[self._indices], axis=1)[0]\n",
 84 |     "        else: \n",
 85 |     "            return np.mean(y[self._indices], axis=1)[0]"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "source": [
 94 |     "## Generate Data"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 4,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "data": {
104 |       "text/plain": [
105 |        "array([[ 0,  0],\n",
106 |        "       [ 1,  2],\n",
107 |        "       [ 4,  9],\n",
108 |        "       [ 5, 10]])"
109 |       ]
110 |      },
111 |      "execution_count": 4,
112 |      "metadata": {},
113 |      "output_type": "execute_result"
114 |     }
115 |    ],
116 |    "source": [
117 |     "X = np.array([0,0,1,2,4,9,5,10]).reshape(4,2)\n",
118 |     "X"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 5,
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "array([1, 1, 1, 0])"
130 |       ]
131 |      },
132 |      "execution_count": 5,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "y = np.array([1,1,1,0])\n",
139 |     "y"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {
145 |     "collapsed": true
146 |    },
147 |    "source": [
148 |     "## Modeling"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 29,
154 |    "metadata": {
155 |     "collapsed": true
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "knn = KNN(K=3)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 30,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "data": {
169 |       "text/plain": [
170 |        "array([[1],\n",
171 |        "       [1],\n",
172 |        "       [1],\n",
173 |        "       [1]])"
174 |       ]
175 |      },
176 |      "execution_count": 30,
177 |      "metadata": {},
178 |      "output_type": "execute_result"
179 |     }
180 |    ],
181 |    "source": [
182 |     "knn.fit(X)\n",
183 |     "knn.predict(y)"
184 |    ]
185 |   }
186 |  ],
187 |  "metadata": {
188 |   "kernelspec": {
189 |    "display_name": "Python [conda root]",
190 |    "language": "python",
191 |    "name": "conda-root-py"
192 |   },
193 |   "language_info": {
194 |    "codemirror_mode": {
195 |     "name": "ipython",
196 |     "version": 3
197 |    },
198 |    "file_extension": ".py",
199 |    "mimetype": "text/x-python",
200 |    "name": "python",
201 |    "nbconvert_exporter": "python",
202 |    "pygments_lexer": "ipython3",
203 |    "version": "3.5.4"
204 |   }
205 |  },
206 |  "nbformat": 4,
207 |  "nbformat_minor": 2
208 | }
209 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Test_Split.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "array([[44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n",
 23 |        "       [38, 60, 16, 75, 44, 63, 80,  8, 21, 80],\n",
 24 |        "       [14, 76, 22, 67,  5, 16, 11,  5, 26, 57],\n",
 25 |        "       [32,  1, 22, 96,  0, 40, 23, 54, 24, 76],\n",
 26 |        "       [83, 96, 34, 47, 15, 39, 85, 58, 29, 80],\n",
 27 |        "       [43, 55, 36, 72, 77, 61,  4, 77, 38, 94],\n",
 28 |        "       [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n",
 29 |        "       [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n",
 30 |        "       [72, 56, 71, 25, 25, 12, 79, 26, 81, 68],\n",
 31 |        "       [63,  0, 47, 80, 78, 13, 32, 90, 50,  9]])"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
 41 |     "fake_data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {
 48 |     "collapsed": true
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "def train_test_split(data, shuffle=True, test_proportion=0.2, random_state=None):\n",
 53 |     "    '''return train and test sets'''\n",
 54 |     "    \n",
 55 |     "    # error checking\n",
 56 |     "    assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
 57 |     "    if random_state != None:\n",
 58 |     "        assert type(random_state) == int, 'random_state must be an integer!'\n",
 59 |     "    \n",
 60 |     "    # reproducibility\n",
 61 |     "    if random_state:\n",
 62 |     "        np.random.seed(random_state)\n",
 63 |     "    \n",
 64 |     "    # setup indices\n",
 65 |     "    indices = np.arange(data.shape[0])\n",
 66 |     "    if shuffle:\n",
 67 |     "        np.random.shuffle(indices)\n",
 68 |     "    \n",
 69 |     "    # main logic\n",
 70 |     "    num_indices = len(indices)\n",
 71 |     "    split = int( np.floor(test_proportion * num_indices) )\n",
 72 |     "    train = data[indices[split:]]\n",
 73 |     "    test = data[indices[:split]]\n",
 74 |     "    \n",
 75 |     "    return train, test"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 4,
 81 |    "metadata": {
 82 |     "collapsed": true
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "train, test = train_test_split(fake_data, shuffle=True, test_proportion=0.4, random_state=43)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "array([[14, 76, 22, 67,  5, 16, 11,  5, 26, 57],\n",
 98 |        "       [43, 55, 36, 72, 77, 61,  4, 77, 38, 94],\n",
 99 |        "       [38, 60, 16, 75, 44, 63, 80,  8, 21, 80],\n",
100 |        "       [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n",
101 |        "       [44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n",
102 |        "       [83, 96, 34, 47, 15, 39, 85, 58, 29, 80]])"
103 |       ]
104 |      },
105 |      "execution_count": 5,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "train"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 6,
117 |    "metadata": {},
118 |    "outputs": [
119 |     {
120 |      "data": {
121 |       "text/plain": [
122 |        "array([[32,  1, 22, 96,  0, 40, 23, 54, 24, 76],\n",
123 |        "       [63,  0, 47, 80, 78, 13, 32, 90, 50,  9],\n",
124 |        "       [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n",
125 |        "       [72, 56, 71, 25, 25, 12, 79, 26, 81, 68]])"
126 |       ]
127 |      },
128 |      "execution_count": 6,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "test"
135 |    ]
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python [conda root]",
141 |    "language": "python",
142 |    "name": "conda-root-py"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.5.4"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Validation_Test_Split.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "array([[75, 50, 96, 10, 21, 46, 93,  1, 91,  7],\n",
 23 |        "       [64, 92,  2, 21, 25, 53, 62, 82,  5, 58],\n",
 24 |        "       [58, 26, 55, 33, 91, 47,  3, 14, 40, 23],\n",
 25 |        "       [93, 67,  6,  0, 49, 13,  5, 61, 11, 16],\n",
 26 |        "       [36, 41, 40, 58, 47, 55, 80, 98, 66, 94],\n",
 27 |        "       [32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n",
 28 |        "       [11, 73, 39, 59, 81, 55, 71,  2, 68, 61],\n",
 29 |        "       [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n",
 30 |        "       [ 2, 34, 30, 10,  5, 18, 76, 43, 11, 88],\n",
 31 |        "       [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "fake_data = np.random.randint(0,100,100).reshape(10,10)\n",
 41 |     "fake_data"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {
 48 |     "collapsed": true
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "def train_validation_test_split(data, shuffle=True, validation_proportion=0.3,\n",
 53 |     "                                test_proportion=0.2, random_state=None):\n",
 54 |     "    '''return train, validation, and test sets'''\n",
 55 |     "    \n",
 56 |     "    # error checking\n",
 57 |     "    assert type(shuffle) == bool, 'shuffle must be True or False!'\n",
 58 |     "    if random_state != None:\n",
 59 |     "        assert type(random_state) == int, 'random_state must be an integer!'\n",
 60 |     "    assert validation_proportion + test_proportion < 1, 'need some training data!'\n",
 61 |     "    \n",
 62 |     "    # reproducibility\n",
 63 |     "    if random_state:\n",
 64 |     "        np.random.seed(random_state)\n",
 65 |     "    \n",
 66 |     "    # setup indices\n",
 67 |     "    indices = np.arange(data.shape[0])\n",
 68 |     "    if shuffle:\n",
 69 |     "        np.random.shuffle(indices)\n",
 70 |     "    \n",
 71 |     "    # MAIN LOGIC\n",
 72 |     "    # calc num of indices to keep\n",
 73 |     "    num_indices = len(indices)\n",
 74 |     "    validation_split = int( np.floor(validation_proportion * num_indices) )\n",
 75 |     "    test_split = int( np.floor(test_proportion * num_indices) )\n",
 76 |     "    \n",
 77 |     "    # parse indices\n",
 78 |     "    training_indices = indices[test_split:]\n",
 79 |     "    train_indices = training_indices[validation_split:]\n",
 80 |     "    validation_indices = training_indices[:validation_split]\n",
 81 |     "    test_indices = indices[:test_split]\n",
 82 |     "    \n",
 83 |     "    # create data sets\n",
 84 |     "    train = data[train_indices]\n",
 85 |     "    validation = data[validation_indices]\n",
 86 |     "    test = data[test_indices]\n",
 87 |     "    \n",
 88 |     "    return train, validation, test"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "train, validation, test = train_validation_test_split(fake_data,  \n",
100 |     "                                                      shuffle=True, \n",
101 |     "                                                      validation_proportion=0.3,\n",
102 |     "                                                      test_proportion=0.2, \n",
103 |     "                                                      random_state=43)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "array([[32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n",
115 |        "       [64, 92,  2, 21, 25, 53, 62, 82,  5, 58],\n",
116 |        "       [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n",
117 |        "       [75, 50, 96, 10, 21, 46, 93,  1, 91,  7],\n",
118 |        "       [36, 41, 40, 58, 47, 55, 80, 98, 66, 94]])"
119 |       ]
120 |      },
121 |      "execution_count": 5,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "train"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "array([[11, 73, 39, 59, 81, 55, 71,  2, 68, 61],\n",
139 |        "       [ 2, 34, 30, 10,  5, 18, 76, 43, 11, 88],\n",
140 |        "       [58, 26, 55, 33, 91, 47,  3, 14, 40, 23]])"
141 |       ]
142 |      },
143 |      "execution_count": 6,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "validation"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 7,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "data": {
159 |       "text/plain": [
160 |        "array([[93, 67,  6,  0, 49, 13,  5, 61, 11, 16],\n",
161 |        "       [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])"
162 |       ]
163 |      },
164 |      "execution_count": 7,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "test"
171 |    ]
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "kernelspec": {
176 |    "display_name": "Python [conda root]",
177 |    "language": "python",
178 |    "name": "conda-root-py"
179 |   },
180 |   "language_info": {
181 |    "codemirror_mode": {
182 |     "name": "ipython",
183 |     "version": 3
184 |    },
185 |    "file_extension": ".py",
186 |    "mimetype": "text/x-python",
187 |    "name": "python",
188 |    "nbconvert_exporter": "python",
189 |    "pygments_lexer": "ipython3",
190 |    "version": "3.5.4"
191 |   }
192 |  },
193 |  "nbformat": 4,
194 |  "nbformat_minor": 2
195 | }
196 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Supervised_Learning/Classification/Logistic_Regression/Sigmoid_Function.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "%matplotlib inline\n",
12 |     "import matplotlib.pyplot as plt\n",
13 |     "import numpy as np"
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "code",
18 |    "execution_count": 2,
19 |    "metadata": {
20 |     "collapsed": true
21 |    },
22 |    "outputs": [],
23 |    "source": [
24 |     "def sigmoid(x):\n",
25 |     "    return 1 / (1 + np.e ** -x)"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": 3,
31 |    "metadata": {
32 |     "collapsed": true
33 |    },
34 |    "outputs": [],
35 |    "source": [
36 |     "x = np.linspace(-15, 15, 100)\n",
37 |     "lst = [sigmoid(i) for i in x]"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": 4,
43 |    "metadata": {},
44 |    "outputs": [
45 |     {
46 |      "data": {
47 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXXV9//HXe7ZM9p2QfZGwhCWUDMimhFIFrEKtQlHL\nZimlBWkf9leX/vy5VH2Uam2tiiKlGPhZRH4ubdRUcAEEIZrFrJBAiBlmJoGsM1lnvZ/fH/dmvAyZ\nzM1k7py7vJ+Px2TuOed77/mczMx933O+53yPIgIzMzOAiqQLMDOzwuFQMDOzbg4FMzPr5lAwM7Nu\nDgUzM+vmUDAzs24OBStokt4n6bFCW6+kJyTdMpg1HQtJb5K0Mek6rPg4FCxxki6W9IykFkm7Jf1S\n0rkAEfGfEfHWwa7peNYr6ZOSOiTtz/r60EDX2GOdIemkw9MR8VREnJLPdVppqkq6ACtvkkYBPwT+\nEngEqAHeBLQlWdcA+HZE/GnSRZgdK+8pWNJOBoiIb0VEV0QciojHImINgKSbJD19uLGkt0ramNmr\n+KqkJw8fxsm0/aWkf5XULGmzpAsz8xskbZd0Y9ZrjZb0oKQdkuolfUxSRS/rfYukDZn1fgVQfzZW\n0hZJf5A1/UlJ38w8npX5xH+jpJcl7ZT0v7PaVkr6e0kvSdonaYWk6ZJ+kWmyOrNX8ieSFkpqzHru\naZlDXs2S1ku6KmvZIkl3S/pR5nV/JekN/dk+K34OBUvaC0CXpAckXSlpbG8NJU0AvgN8FBgPbAQu\n7NHsjcCazPKHgIeBc4GTgD8FviJpRKbtl4HRwBzgEuAG4OZe1vs94GPABOAl4KL+bGyOLgZOAS4D\nPi7ptMz8DwLvAd4GjALeDxyMiDdnls+PiBER8e0e9VcDPwAeA04APgD8p6Tsw0vXAZ8CxgKbgM/m\nY8Os8DkULFERsZf0m2AA/w7skLRY0qQjNH8bsD4ivhcRncCXgFd6tPltRHwjIrqAbwPTgX+IiLaI\neAxoB06SVEn6jfCjEbEvIrYAXwCuP8p6vxMRHcAXj7Denq7NfCo//DWl7/+Nbp/K7DGtBlYD8zPz\nbwE+FhEbI211ROzK4fXOB0YAd0VEe0T8nPQhu/dktfl+RPw68//6n8DZx1CvlRCHgiUuIp6PiJsi\nYhpwBjCF9BtvT1OAhqznBdDYo82rWY8PZdr1nDeC9Cf+aqA+a1k9MDXH9TYcoV22RyJiTNbX1j7a\nZ8sOnIOZeiEdcC8dw+scNgVoiIhU1rye29rbOq3MOBSsoETEBmAR6XDoaRsw7fCEJGVPH6OdQAcw\nM2veDKCpl/VO77He6Udol4sDwLCs6ROP4bkNQH+O9W8Fph/uL8nobVutzDkULFGSTpX0t5KmZaan\nkz6ssfQIzX8EnCnpjyRVAbdzbG+q3TKHlx4BPitppKSZpI/Zf7OX9Z4u6Y8z672zv+sFVgHXSaqW\nVAe8+xieex/waUlzlXaWpPGZZa+S7hs5kl+R/vT/ocx6FwLvIN3fYvYaDgVL2j7SncO/knSAdBis\nA/62Z8OI2AlcA3wO2AXMA5bT/9NXP0D6k/tm4GnSHdP3H2W9d2XWOxf4ZT/X+X9If9rfQ7pj96Fj\neO6/kA6yx4C9wH8AQzPLPgk8kOm/uLZH/e2kQ+BK0ntIXwVuyOyVmb2GfJMdK1aZwyGNwPsi4vGk\n6zErBd5TsKIi6XJJYyQNAf6e9PUCRzrUZGb94FCwYnMB6TNwdpI+JPJHEXEo2ZLMSocPH5mZWTfv\nKZiZWbeiGxBvwoQJMWvWrKTLMDMrKitWrNgZERP7ald0oTBr1iyWL1+edBlmZkVFUn3frXz4yMzM\nsjgUzMysm0PBzMy6ORTMzKybQ8HMzLrlLRQk3Z+5/eG6XpZL0pckbZK0RtI5+arFzMxyk889hUXA\nFUdZfiXp0SbnArcCX8tjLWZmloO8XacQEb+QNOsoTa4GHszcxWppZpCzyRGxLV81mdnA6uhKcbC9\ni4PtnbR2pGjt6KKtM0V7Z4qOrhTtXSk6OlN0pYKOVNCVStGVgq5UilRAVypIRZBKBQGkAiLS8yLI\nzEs/hvSy9Pf0ssOPDwt+N9HbCD6DMrBPnoYPqps1jjef3Of1Z8clyYvXpvLaWxo2Zua9LhQk3Up6\nb4IZM2YMSnFmx2rRokUA3HTTTYnWcbyaD7ZTv+sg21oOsbW5lVf3trJzfzu7D7Sx+2AH+w51sLe1\ng72tnbR3pvp+wTIlDfxr3nbJG0o6FHIWEfcC9wLU1dV5BD+zAdCVCl7cvo81jS2sbWzh+W172bzz\nALsPtL+mXU1VBROG1zBuRA1jh9UwY9wwRtZWMbK2ihE1VQwbUsWwmkpqqyuorapkSHUFNZWVVFeK\n6qoKaiorqKwQ1ZWisqKCSomKCqiqqKBCUFEhBFRWCElIUKH0vMOPyTwW6n6zTS9X9+PDst+MlY93\n5hKXZCg08dr73E7D94w1y6vte1v5+YbtPPXiTp7etJOWQx0AjBxSxWmTR3H56ZOYM2EEsyYMZ8qY\nWiaPHsrYYdV+cy0jSYbCYuAOSQ+Tvh1ji/sTzAbeofYuHnvuFb67somnX9xBKuDEUbW8dd4kLnjD\neOZPH8Ps8cOpqPAbv+UxFCR9C1gITJDUCHwCqAaIiHuAJcDbgE2kbyp+c75qMStH+1o7ePDZev7j\n6d+y+0A7U8cM5a8WnsQ75k/h5Ekj/OnfjiifZx+9p4/lAdyer/WblauOrhT3PfVb7nnyJVoOdXDJ\nyRP5i0vmcP7s8d4bsD4VRUezmeVmXVMLH/rOGp7btpfLTj2BOy+by/zpY5Iuy4qIQ8GsBKRSwRd/\n+gJ3P/ES44bX8PXrF3D56ScmXZYVIYeCWZE71N7F33z7Nzy6/lXedc40Pv72eYweVp10WVakHApm\nRWz7vlb+/IHlrGlq4eNvn8fNF81yB7IdF4eCWZHavreVd93zDDv3tXPv9XW8Zd6kpEuyEuBQMCtC\n+9s6uXnRMnbtb+ehP38jvzdjbNIlWYlwKJgVmfbOFH/5zRVseGUf/3FjnQPBBpRvsmNWRCKCj35v\nLU+9uJO7/vhMFp5yQtIlWYlxKJgVke+ubOK7Kxv568vmck3d9L6fYHaMHApmRaKp+RCfWrye82aN\n487L5iZdjpUoh4JZEUilgr/7f6tJRfDP18yn0sNVWJ44FMyKwIPPbuGZl3bxsbfPY8b4YUmXYyXM\noWBW4JqaD3HXjzew8JSJXHeu+xEsvxwKZgXuX3/yAqmAz77zTF+tbHnnUDArYBtf2cd3VzZyw/kz\nmTpmaNLlWBlwKJgVsM8/uoERNVXcfulJSZdiZcKhYFaglm3ZzU+f385tC9/A2OE1SZdjZcKhYFaA\nIoJ/+p8NTBw5hJsvmpV0OVZGHApmBejZl3axvH4Pd142l2E1HqLMBo9DwawAfeOZLYwbXsM1C6Yl\nXYqVGYeCWYFp2H2Qnz3/KtedO53a6sqky7Ey41AwKzDfXFqPJP70/JlJl2JlyKFgVkAOtXfx8LIG\nLj99ElN8XYIlwKFgVkD+a1UTLYc6uPGCWUmXYmXKoWBWICKCB57ZwqknjuS82eOSLsfKlEPBrECs\nqN/Dhlf2cdOFszzGkSXGoWBWIP5rVRO11RW8Y/6UpEuxMuZQMCsAHV0plqx9hctOm8TwIb5YzZLj\nUDArAL/ctJPdB9q5ynsJljCHglkB+MHqbYysrWLhKROTLsXKnEPBLGGtHV08tv4Vrjj9RIZU+Qpm\nS1ZeQ0HSFZI2Stok6SNHWD5a0g8krZa0XtLN+azHrBA9sXE7+9o6uepsHzqy5OUtFCRVAncDVwLz\ngPdImtej2e3AcxExH1gIfEGSB463srJ49VYmjKjhgjnjky7FLK97CucBmyJic0S0Aw8DV/doE8BI\npU/KHgHsBjrzWJNZQdnX2sHPnt/OH545mapKH8215OXzt3Aq0JA13ZiZl+0rwGnAVmAt8NcRker5\nQpJulbRc0vIdO3bkq16zQffzDdtp60z52gQrGEl/NLkcWAVMAc4GviJpVM9GEXFvRNRFRN3EiT47\nw0rH4xu2M354DefMGJt0KWZAfkOhCZieNT0tMy/bzcD3Im0T8Fvg1DzWZFYwulLBky/s4JKTJ1JR\n4WEtrDDkMxSWAXMlzc50Hl8HLO7R5mXgMgBJk4BTgM15rMmsYKxqaGbPwQ4WnnpC0qWYdcvb9fQR\n0SnpDuBRoBK4PyLWS7ots/we4NPAIklrAQEfjoid+arJrJA8sXE7FYJL5vqQqBWOvA6yEhFLgCU9\n5t2T9Xgr8NZ81mBWqB7fuJ0FM8cyelh10qWYdUu6o9msLG3f28q6pr0sPMWHjqywOBTMEvDExvSp\n1Zc6FKzAOBTMEvD4xu2cOKqW0yaPTLoUs9dwKJgNsvbOFE+9uJNLT53oO6xZwXEomA2y5fW72d/W\n6f4EK0gOBbNB9stNO6msEBedNCHpUsxex6FgNsiWbt7NWdNGM8K33bQC5FAwG0QH2jpZ3dDM+R4m\n2wqUQ8FsEK2o30NnKhwKVrAcCmaDaOnmXVRViLqZHhXVCpNDwWwQLd28i7OmjWa4+xOsQDkUzAbJ\ngbZO1jS2+NCRFTSHgtkgWe7+BCsCDgWzQdLdnzDL/QlWuBwKZoPk2Zd2MX/6GIbVuD/BCpdDwWwQ\n7G/rZG1TC+fPGZd0KWZH5VAwGwTLt+ymKxVcMMdDW1hhcyiYDYJf/3Y3VRXinJljki7F7KgcCmaD\nYEX9HuZNGeX+BCt4DgWzPOvoSrGmsYVzZvisIyt8DgWzPNuwbR+HOrpY4KEtrAg4FMzybOXLewA4\nx6FgRcChYJZnK+r3cOKoWqaMrk26FLM+ORTM8mxF/R7OmTnG92O2ouBQMMujV/e20tR8yJ3MVjQc\nCmZ5tLI+3Z/gTmYrFg4Fszxa+fIeaqoqOH3K6KRLMcuJQ8Esj1bU7+GsqaOpqfKfmhUH/6aa5Ulb\nZxfrmvb6VFQrKjmFgqTKfBdiVmrWNe2lvSvlTmYrKrnuKbwo6fOS5h3Li0u6QtJGSZskfaSXNgsl\nrZK0XtKTx/L6ZoXsN90XrXkQPCseuYbCfOAF4D5JSyXdKmnU0Z6Q2bu4G7gSmAe8p2eoSBoDfBW4\nKiJOB6451g0wK1S/aWhm6pihnDDSF61Z8cgpFCJiX0T8e0RcCHwY+ASwTdIDkk7q5WnnAZsiYnNE\ntAMPA1f3aPNe4HsR8XJmPdv7tRVmBWh1QzNnz/BeghWXnPsUJF0l6fvAF4EvAHOAHwBLennaVKAh\na7oxMy/bycBYSU9IWiHphl7Wf6uk5ZKW79ixI5eSzRK1c38bjXsOcfY0h4IVl1wHd38ReBz4fEQ8\nkzX/O5LefJzrXwBcBgwFnpW0NCJeyG4UEfcC9wLU1dXFcazPbFCsaWwGYP50h4IVl1xD4YaIeDp7\nhqSLIuKXEXFnL89pAqZnTU/LzMvWCOyKiAPAAUm/4Hf9F2ZFa1VDCxWCM6YetevNrODk2tH8pSPM\n+3Ifz1kGzJU0W1INcB2wuEeb/wYullQlaRjwRuD5HGsyK1irG5o5edJI32nNis5Rf2MlXQBcCEyU\n9MGsRaOAo167EBGdku4AHs20vT8i1ku6LbP8noh4XtKPgTVACrgvItb1f3PMkhcRrG5s5vJ5JyZd\nitkx6+tjTA0wItNuZNb8vcC7+3rxiFhCj47oiLinx/Tngc/nUqxZMXh590GaD3a4P8GK0lFDISKe\nBJ6UtCgi6gepJrOitqrhcCezB8Gz4tPX4aMvRsTfAF+R9LqzfiLiqrxVZlakVje0UFtdwcmTRvbd\n2KzA9HX46P9mvv9zvgsxKxVrGps5Y8poqis93qQVn74OH63IfPeYRGY56ApYt7WF971xZtKlmPVL\nX4eP1gK9XiwWEWcNeEVmRWx7WyWtHSl3MlvR6uvw0dsHpQqzEtHUWg3g4S2saPV1+MhnHJkdg6ZD\nVYwZVs30cUOTLsWsX47aEybp6cz3fZL29vw+OCWaFY+trdWcNW0MkpIuxaxf+tpTuDjz3efWmfWh\nPZXuU7h2mq9PsOKV88Asks4BLibd8fx0RPwmb1WZFaFXWqsIxFnuT7Ailuv9FD4OPACMByYAiyR9\nLJ+FmRWbrZlO5rO8p2BFLNc9hfcB8yOiFUDSXcAq4DP5Ksys2DS1VjGyqotJo3z7TSteuV5yuRXI\n/k0fwuvvjWBW1ra2VjGltjPpMsyOS18Xr32ZdB9CC7Be0k8y028Bfp3/8l5v165dLFq0KIlVm/Xq\nUEcX9SvWMXREG4sW+cQ8K159HT5anvm+Avh+1vwn8lKNWZFq3HMQgLHVXQlXYnZ8FFFctzyuq6uL\n5cuX993QbBDd8+RL3PU/G/i7k3Zy+y03Jl2O2etIWhERdX21y6mjWdJc4B+BeWT1LUTEnH5XaFZC\n1jQ2M6a6i+FVxfUhy6ynXDuavwF8DegELgUeBL6Zr6LMis2axham1nYkXYbZccs1FIZGxM9IH26q\nj4hPAn+Yv7LMiseu/W007jnkM4+sJOR6nUKbpArgRUl3kD4ddUT+yjIrHmuaWgAcClYSct1T+Gtg\nGHAnsAC4HnBvmhmwtrEFCSY7FKwE5LSnEBHLADJ7C3dGxL68VmVWRNY0NjNnwnBqK3ckXYrZcct1\n7KO6zF3Y1gBrJa2WtCC/pZkVvohgVUOL77RmJSPXw0f3A38VEbMiYhZwO+kzkszK2taWVnbub+Ns\nh4KViFxDoSsinjo8ERFPkz491aysrW5oBmC+h8u2EtHX2EfnZB4+KenrwLdIj330J3ioCzNWNzZT\nU1nBqZNH4huMWCnoq6P5Cz2mP5H12JduWtlb3dDMaZNHMqSqMulSzAZEX7fjvHSwCjErNl2pYG1j\nC+9aMC3pUswGTK5nH42W9C+Slme+viDJt5eysvbSjv0caO9yf4KVlGM5+2gfcG3may8++8jK3KrD\nncw+88hKSK7DXLwhIt6VNf0pSavyUZBZsVjd0MzIIVXMmTA86VLMBkyuewqHJF18eELSRcChvp4k\n6QpJGyVtkvSRo7Q7V1KnpHfnWI9Z4lY3NnPW9NFUVCjpUswGTK57CrcBD2b1I+yhj7GPJFUCd5O+\ndWcjsEzS4oh47gjt/gl47FgKN0tSa0cXG7bt49Y3+5YiVlr6DIXMeEenRMR8SaMAIiKXm9CeB2yK\niM2Z13kYuBp4rke7DwDfBc49lsLNkrR+6146U8FZ7mS2EtPn4aOISAEfyjzem2MgAEwFGrKmGzPz\nukmaCryT9A18eiXp1sNnPu3Y4UHHLHlrGtOdzB7ewkpNrn0KP5X0vyRNlzTu8NcArP+LwIczwdOr\niLg3Iuoiom7ixIkDsFqz47O6oZlJo4Zw4ujavhubFZFc+xT+hPQVzH/VY/7RDqg2AdOzpqdl5mWr\nAx6WBDABeJukzoj4rxzrMkvEbxqafX2ClaRcQ2Ee6UC4mHQ4PAXc08dzlgFzJc0mHQbXAe/NbhAR\nsw8/lrQI+KEDwQrdzv1t1O86yHvPm5F0KWYDLtdQeID0BWtfyky/NzPv2t6eEBGdmVt3PgpUAvdH\nxHpJt2WW9xUqZgVpZf0eABbMHJtwJWYDL9dQOCMi5mVNPy6p51lErxMRS4AlPeYdMQwi4qYcazFL\n1IqX91BdKc6Y6pFerPTk2tG8UtL5hyckvRFYnp+SzArbyvo9nD5lNLXVHhnVSk+uobAAeEbSFklb\ngGeBcyWtlbQmb9WZFZj2zhRrGlt86MhKVq6Hj67IaxVmReK5bXtp60xxzgyHgpWmnEIhIurzXYhZ\nMTjcyXzOTJ+OaqUp18NHZka6k3nK6Fomjx6adClmeeFQMDsGK+v3cI77E6yEORTMcrS1+RDbWlrd\nyWwlzaFglqOVL2f6E9zJbCXMoWCWo5X1zdRWVzBvyqikSzHLG4eCWY5WvLyHs6aOobrSfzZWuvzb\nbZaD/W2drGtq4dzZPnRkpc2hYJaD5Vt205UKLpgzIelSzPLKoWCWg6Wbd1NdKV+0ZiXPoWCWg2c3\n72L+tDEMq8l1ZBiz4uRQMOvDvtYO1jW1cMEbxiddilneORTM+rC8fg9dqeD8OQ4FK30OBbM+LN28\nK92f4IvWrAw4FMz6sHTzbs6ePoahNb6pjpU+h4LZUXT3J/jQkZUJh4LZUSzf4v4EKy8OBbOjWLp5\nFzWVFR4u28qGQ8HsKJZu3sXZM8ZQW+3+BCsPDgWzXuw+0M6aphYu9PUJVkYcCma9+MULO4iA3z/1\nhKRLMRs0DgWzXvx8w3YmjKjhjCmjky7FbNA4FMyOoCsVPPnCDi45+QQqKpR0OWaDxqFgdgSrGvbQ\ncqjDh46s7DgUzI7g5xu2U1khLp7r+ydYeXEomB3B4xt2sGDmWEYPrU66FLNB5VAw6+GVllae27bX\nh46sLOU1FCRdIWmjpE2SPnKE5e+TtEbSWknPSJqfz3rMcvHExu0AXHqKQ8HKT95CQVIlcDdwJTAP\neI+keT2a/Ra4JCLOBD4N3Juvesxy9fjG7UwZXcvJk0YkXYrZoMvnnsJ5wKaI2BwR7cDDwNXZDSLi\nmYjYk5lcCkzLYz1mfWrt6OLpF3ey8NQTkHwqqpWffIbCVKAha7oxM683fwb8z5EWSLpV0nJJy3fs\n2DGAJZq91uMbtnOgvYu3nTE56VLMElEQHc2SLiUdCh8+0vKIuDci6iKibuLEiYNbnJWVxau3MmHE\nEN+P2cpWPkOhCZieNT0tM+81JJ0F3AdcHRG78liP2VHta+3gZxu284dnnkilr2K2MpXPUFgGzJU0\nW1INcB2wOLuBpBnA94DrI+KFPNZi1qefPPcq7Z0prjp7StKlmCWmKl8vHBGdku4AHgUqgfsjYr2k\n2zLL7wE+DowHvprp1OuMiLp81WR2NItXb2XqmKGcM8M31LHylbdQAIiIJcCSHvPuyXp8C3BLPmsw\ny8XuA+08/eJObnnTHJ91ZGWtIDqazZK2ZO02OlPBVfN96MjKm0PBjPSho5NOGMFpk0cmXYpZohwK\nVvYadh9k2ZbdXDV/ig8dWdlzKFjZe/DZLVRKXFs3vc+2ZqXOoWBl7WB7J99e1sAVZ5zIiaNrky7H\nLHEOBStr3/9NE3tbO7npwllJl2JWEBwKVrYiggee2cLpU0axYKavTTADh4KVsWc37+KFV/dz04Wz\n3MFsluFQsLK16JdbGDe8hnf42gSzbg4FK0tbdh7gp8+/ynXnTqe2ujLpcswKhkPBytI/P7aRIVWV\n3HTRrKRLMSsoDgUrO2sbW/jhmm3c8qbZnDDSp6GaZXMoWNn53KMbGDusmlvfPCfpUswKjkPBysrT\nL+7kqRd3cvulJzGytjrpcswKjkPBykYqFfzTjzcwdcxQrr9gZtLlmBUkh4KVjW8te5m1TS188C0n\nM6TKZxyZHYlDwcpC/a4DfPZHz/OmuRN45+9NTbocs4LlULCS15UK/vaR1VRWiM+9+ywqKnz1sllv\n8no7TrNCcN9Tm1lev4d/uXY+k0cPTbocs4LmPQUraWsam/nCYy9wxekn+rCRWQ4cClayXt51kPcv\nWsbEkUP47DvP8KB3Zjnw4SMrSbsPtHPjN35NZyp4+P3nMX7EkKRLMisK3lOwknOwvZM/e2AZW5sP\ncd8NdZx0woikSzIrGt5TsJKyfV8rf/7ActY2tfDV9y2gbta4pEsyKyoOBSsZG1/Zx/sXLWP3gXa+\nfn0db5k3KemSzIqOQ8FKwpK12/jwd9YwtKaSR/7iAs6cNjrpksyKkkPBitr2va18/L/X8+P1r3Dm\n1NF8/foFTBnjaxHM+suhYEXpUHsXD/36Zf7tpy/Q1pniI1eeyi0Xz6aq0udOmB0Ph4IVlb2tHTz0\nq5e576nN7NzfzkUnjefTV5/BnIk+w8hsIDgUrOB1dqV4atNOvreyicfWv0JbZ4o3zZ3AnZfN5Vyf\nXWQ2oBwKVnBSqWDzzv0s3bybp17cwTObdrGvrZMxw6q5tm4619RN46xpY5Iu06wk5TUUJF0B/BtQ\nCdwXEXf1WK7M8rcBB4GbImJlPmuywtHRleLVva3U7zrI5h37eWnHAZ7ftpd1TS0caO8CYOqYobx9\n/mQuPeUEFp5yAjVV7jMwy6e8hYKkSuBu4C1AI7BM0uKIeC6r2ZXA3MzXG4GvZb5bAYoIOlNBVyro\n6ErR0RW0d6bo6ErR2tFFa0eK1s4uDrZ3cai9kwNtXexr7WBfayd7WzvYfaCDXQfa2H2gnVdaWtmx\nv42I373+sJpKTp40knctmMaZU0dzzsyxzJkw3GMWmQ2ifO4pnAdsiojNAJIeBq4GskPhauDBiAhg\nqaQxkiZHxLaBLubJF3bwmR8+13fDXkTfTQZUxJHXGL1MZM8//NyA7jfdIIjImo4ggFRk5mfmpSJ9\n/4FUBKlU0BVBKgVdkQ6D/qqtrmD88CGMG17DuOE1nHriSCaPHsrk0bXMGD+MORNGMGnUEAeAWcLy\nGQpTgYas6UZevxdwpDZTgdeEgqRbgVsBZsyY0a9iRgypYu6k4ztDRQzyG1Yvq8uenf0m+tr5v5t3\nuI0y/wghpacrlHms9OP0F1RUpB9Xdn+HyooKqitEZaWorqigpqqC6soKqitFbXUlQ6oqqK2uZPiQ\nSoZWVzF8SCUja6sZWVtFdRmcKnrTTTclXYLZcSuKjuaIuBe4F6Curq5fH1cXzBzLgpkLBrQuM7NS\nk8+Pb03A9KzpaZl5x9rGzMwGST5DYRkwV9JsSTXAdcDiHm0WAzco7XygJR/9CWZmlpu8HT6KiE5J\ndwCPkj4l9f6IWC/ptszye4AlpE9H3UT6lNSb81WPmZn1La99ChGxhPQbf/a8e7IeB3B7PmswM7Pc\nlf4pIWZmljOHgpmZdXMomJlZN4eCmZl1U2/DKRQqSTuA+n4+fQKwcwDLSZK3pTCVyraUynaAt+Ww\nmRExsa9GRRcKx0PS8oioS7qOgeBtKUylsi2lsh3gbTlWPnxkZmbdHApmZtat3ELh3qQLGEDelsJU\nKttSKtsB3pZjUlZ9CmZmdnTltqdgZmZH4VAwM7NuZREKkq6RtF5SSlJd1vxZkg5JWpX5uudor1MI\netuWzLK9qDflAAADr0lEQVSPStokaaOky5OqsT8kfVJSU9bP4m1J13QsJF2R+X/fJOkjSddzPCRt\nkbQ283NYnnQ9x0LS/ZK2S1qXNW+cpJ9IejHzfWySNeaql23J+99JWYQCsA74Y+AXR1j2UkScnfm6\nbZDr6o8jboukeaTvWXE6cAXwVUmVg1/ecfnXrJ/Fkr6bF4bM//PdwJXAPOA9mZ9HMbs083MotvP7\nF5H+/c/2EeBnETEX+Flmuhgs4vXbAnn+OymLUIiI5yNiY9J1DISjbMvVwMMR0RYRvyV9j4rzBre6\nsnUesCkiNkdEO/Aw6Z+HDbKI+AWwu8fsq4EHMo8fAP5oUIvqp162Je/KIhT6MDuzG/akpDclXcxx\nmAo0ZE03ZuYVkw9IWpPZbS6KXfyMUvi/zxbATyWtkHRr0sUMgElZd3R8BZiUZDEDIK9/JyUTCpJ+\nKmndEb6O9oltGzAjIs4GPgg8JGnU4FTcu35uS8HrY7u+BswBzib9c/lCosWWt4szfxNXArdLenPS\nBQ2UzI29ivk8/Lz/neT1zmuDKSL+oB/PaQPaMo9XSHoJOBlItHOtP9sCNAHTs6anZeYVjFy3S9K/\nAz/MczkDqeD/749FRDRlvm+X9H3Sh8eO1B9XLF6VNDkitkmaDGxPuqD+iohXDz/O199Jyewp9Iek\niYc7YyXNAeYCm5Otqt8WA9dJGiJpNult+XXCNeUs88d62DtJd6gXi2XAXEmzJdWQ7vBfnHBN/SJp\nuKSRhx8Db6W4fhZHshi4MfP4RuC/E6zluAzG30nJ7CkcjaR3Al8GJgI/krQqIi4H3gz8g6QOIAXc\nFhGD3rFzLHrblohYL+kR4DmgE7g9IrqSrPUYfU7S2aR37bcAf5FsObmLiE5JdwCPApXA/RGxPuGy\n+msS8H1JkH5/eCgifpxsSbmT9C1gITBBUiPwCeAu4BFJf0Z62P1rk6swd71sy8J8/514mAszM+tW\n1oePzMzstRwKZmbWzaFgZmbdHApmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZsdJ0rmZAcpqM1cEr5d0\nRtJ1mfWHL14zGwCSPgPUAkOBxoj4x4RLMusXh4LZAMiMebQMaAUuLLIhRsy6+fCR2cAYD4wARpLe\nYzArSt5TMBsAkhaTvuPabGByRNyRcElm/VIWo6Sa5ZOkG4COiHgoMxT7M5J+PyJ+nnRtZsfKewpm\nZtbNfQpmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZmbWzaFgZmbdHApmZtbt/wM0YVudODC3EQAAAABJ\nRU5ErkJggg==\n",
48 |       "text/plain": [
49 |        "<matplotlib.figure.Figure at 0x10bf45588>"
50 |       ]
51 |      },
52 |      "metadata": {},
53 |      "output_type": "display_data"
54 |     }
55 |    ],
56 |    "source": [
57 |     "plt.plot(x, lst);\n",
58 |     "plt.axhline(0.5, c='k', alpha=0.5)\n",
59 |     "plt.vlines(0, 0, 1, alpha=0.5)\n",
60 |     "plt.title('Sigmoid Function')\n",
61 |     "plt.xlabel('x')\n",
62 |     "plt.ylabel('probability');"
63 |    ]
64 |   }
65 |  ],
66 |  "metadata": {
67 |   "kernelspec": {
68 |    "display_name": "Python [conda root]",
69 |    "language": "python",
70 |    "name": "conda-root-py"
71 |   },
72 |   "language_info": {
73 |    "codemirror_mode": {
74 |     "name": "ipython",
75 |     "version": 3
76 |    },
77 |    "file_extension": ".py",
78 |    "mimetype": "text/x-python",
79 |    "name": "python",
80 |    "nbconvert_exporter": "python",
81 |    "pygments_lexer": "ipython3",
82 |    "version": "3.5.4"
83 |   }
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 2
87 | }
88 | 


--------------------------------------------------------------------------------
/notebooks/Machine_Learning/Supervised_Learning/Model_Selection/Feature_Selection.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]\n",
10 |     "print('All errors are RMSE')\n",
11 |     "print('-'*74)\n",
12 |     "for alpha in alphas:\n",
13 |     "    # instantiate and fit model\n",
14 |     "    lasso = Lasso(alpha=alpha, fit_intercept=True, random_state=99)\n",
15 |     "    lasso.fit(X_train, y_train)\n",
16 |     "    # calculate errors\n",
17 |     "    train_error = np.sqrt(mean_squared_error(y_train, lasso.predict(X_train)))\n",
18 |     "    validation_error = np.sqrt(mean_squared_error(y_validation, lasso.predict(X_validation)))\n",
19 |     "    test_error = np.sqrt(mean_squared_error(y_test, lasso.predict(X_test)))\n",
20 |     "    # print errors as report\n",
21 |     "    print('alpha: {:6} | train error: {:5} | val error: {:6} | test error: {}'.\n",
22 |     "          format(alpha,\n",
23 |     "                 round(train_error,2),\n",
24 |     "                 round(validation_error,2),\n",
25 |     "                 round(test_error,2)))"
26 |    ]
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python [conda root]",
32 |    "language": "python",
33 |    "name": "conda-root-py"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.5.4"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 2
50 | }
51 | 


--------------------------------------------------------------------------------
/notebooks/OS_library/OS_Sandbox.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 3,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "data": {
 21 |       "text/plain": [
 22 |        "posix.uname_result(sysname='Darwin', nodename='metis-davidziganto.local', release='16.7.0', version='Darwin Kernel Version 16.7.0: Wed Oct  4 00:17:00 PDT 2017; root:xnu-3789.71.6~1/RELEASE_X86_64', machine='x86_64')"
 23 |       ]
 24 |      },
 25 |      "execution_count": 3,
 26 |      "metadata": {},
 27 |      "output_type": "execute_result"
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "os.uname()"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 5,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/plain": [
 42 |        "1333"
 43 |       ]
 44 |      },
 45 |      "execution_count": 5,
 46 |      "metadata": {},
 47 |      "output_type": "execute_result"
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "os.getpid()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 8,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "ename": "AttributeError",
 61 |      "evalue": "module 'os' has no attribute 'getresuid'",
 62 |      "output_type": "error",
 63 |      "traceback": [
 64 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 65 |       "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
 66 |       "\u001b[0;32m<ipython-input-8-8474d7bcb309>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresuid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 67 |       "\u001b[0;31mAttributeError\u001b[0m: module 'os' has no attribute 'getresuid'"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "os.getresuid()"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": []
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python [conda root]",
 88 |    "language": "python",
 89 |    "name": "conda-root-py"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.5.4"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 2
106 | }
107 | 


--------------------------------------------------------------------------------
/notebooks/Python/Coding_Best_Practices/Complexity_Practice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "X = np.array([12,6,3,4,5,6,7,8,9]).reshape(3,3)\n",
 19 |     "Y = np.array([5,8,1,2,6,7,3,0,4,5,9,1]).reshape(3,4)"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "array([[12,  6,  3],\n",
 31 |        "       [ 4,  5,  6],\n",
 32 |        "       [ 7,  8,  9]])"
 33 |       ]
 34 |      },
 35 |      "execution_count": 3,
 36 |      "metadata": {},
 37 |      "output_type": "execute_result"
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "X"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "array([[5, 8, 1, 2],\n",
 53 |        "       [6, 7, 3, 0],\n",
 54 |        "       [4, 5, 9, 1]])"
 55 |       ]
 56 |      },
 57 |      "execution_count": 4,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "Y"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 5,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "rows, cols = X.shape[0], Y.shape[1]\n",
 73 |     "result = np.zeros(rows * cols).reshape(rows, cols)\n",
 74 |     "\n",
 75 |     "# iterate through rows of X\n",
 76 |     "for i in range(rows):\n",
 77 |     "   # iterate through columns of Y\n",
 78 |     "   for j in range(cols):\n",
 79 |     "       # iterate through rows of Y\n",
 80 |     "       for k in range(len(Y)):\n",
 81 |     "            result[i][j] += X[i][k] * Y[k][j]"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "# Q1: What does this block of code do?"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": []
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "# Q2: What's the complexity?"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": []
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "## Show Results"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 6,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "array([[ 108.,  153.,   57.,   27.],\n",
128 |        "       [  74.,   97.,   73.,   14.],\n",
129 |        "       [ 119.,  157.,  112.,   23.]])"
130 |       ]
131 |      },
132 |      "execution_count": 6,
133 |      "metadata": {},
134 |      "output_type": "execute_result"
135 |     }
136 |    ],
137 |    "source": [
138 |     "result"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 7,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "data": {
148 |       "text/plain": [
149 |        "array([[108, 153,  57,  27],\n",
150 |        "       [ 74,  97,  73,  14],\n",
151 |        "       [119, 157, 112,  23]])"
152 |       ]
153 |      },
154 |      "execution_count": 7,
155 |      "metadata": {},
156 |      "output_type": "execute_result"
157 |     }
158 |    ],
159 |    "source": [
160 |     "np_result = np.dot(X,Y)\n",
161 |     "np_result"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 8,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "True"
173 |       ]
174 |      },
175 |      "execution_count": 8,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "np.allclose(result, np_result)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {},
188 |    "outputs": [],
189 |    "source": []
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": []
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "# Answers\n",
203 |     "\n",
204 |     "## Q1: Matrix-Matrix multiplication\n",
205 |     "## Q2: if m = rows of X, n = cols of Y and p = rows of Y --> O($mnp$) "
206 |    ]
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python [conda root]",
212 |    "language": "python",
213 |    "name": "conda-root-py"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.5.4"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/notebooks/Python/Coding_Best_Practices/OOP_How_to_Write_a_Class.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Imports"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "from sklearn.linear_model import LinearRegression\n",
 18 |     "from sklearn.datasets import load_boston\n",
 19 |     "from sklearn.metrics import mean_squared_error, r2_score"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Classes"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "class MyLinearRegression:\n",
 36 |     "    \n",
 37 |     "    def __init__(self, fit_intercept=True):\n",
 38 |     "        self.coef_ = None\n",
 39 |     "        self.intercept_ = None\n",
 40 |     "        self._fit_intercept = fit_intercept\n",
 41 |     "\n",
 42 |     "    \n",
 43 |     "    def fit(self, X, y):\n",
 44 |     "        \"\"\"Fit model coefficients.\n",
 45 |     "\n",
 46 |     "        Arguments:\n",
 47 |     "        X: 1D or 2D numpy array \n",
 48 |     "        y: 1D numpy array\n",
 49 |     "        \"\"\"\n",
 50 |     "        \n",
 51 |     "        # check if X is 1D or 2D array\n",
 52 |     "        if len(X.shape) == 1:\n",
 53 |     "            X = X.reshape(-1,1)\n",
 54 |     "            \n",
 55 |     "        # add bias if fit_intercept\n",
 56 |     "        if self._fit_intercept:\n",
 57 |     "            X = np.c_[np.ones(X.shape[0]), X]\n",
 58 |     "        \n",
 59 |     "        # closed form solution\n",
 60 |     "        xTx = np.dot(X.T, X)\n",
 61 |     "        inverse_xTx = np.linalg.inv(xTx)\n",
 62 |     "        xTy = np.dot(X.T, y)\n",
 63 |     "        coef = np.dot(inverse_xTx, xTy)\n",
 64 |     "        \n",
 65 |     "        # set attributes\n",
 66 |     "        if self._fit_intercept:\n",
 67 |     "            self.intercept_ = coef[0]\n",
 68 |     "            self.coef_ = coef[1:]\n",
 69 |     "        else:\n",
 70 |     "            self.intercept_ = 0\n",
 71 |     "            self.coef_ = coef\n",
 72 |     "            \n",
 73 |     "    def predict(self, X):\n",
 74 |     "        \"\"\"Output model prediction.\n",
 75 |     "\n",
 76 |     "        Arguments:\n",
 77 |     "        X: 1D or 2D numpy array \n",
 78 |     "        \"\"\"\n",
 79 |     "        # check if X is 1D or 2D array\n",
 80 |     "        if len(X.shape) == 1:\n",
 81 |     "            X = X.reshape(-1,1) \n",
 82 |     "        return np.dot(X, self.coef_) + self.intercept_"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 3,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "class Metrics:\n",
 92 |     "    \n",
 93 |     "    def __init__(self, X, y, model):\n",
 94 |     "        self.data = X\n",
 95 |     "        self.target = y\n",
 96 |     "        self.model = model\n",
 97 |     "        # degrees of freedom population dep. variable variance\n",
 98 |     "        self._dft = X.shape[0] - 1   \n",
 99 |     "        # degrees of freedom population error variance\n",
100 |     "        self._dfe = X.shape[0] - X.shape[1] - 1  \n",
101 |     "    \n",
102 |     "    def sse(self):\n",
103 |     "        '''returns sum of squared errors (model vs actual)'''\n",
104 |     "        squared_errors = (self.target - self.model.predict(self.data)) ** 2\n",
105 |     "        self.sq_error_ = np.sum(squared_errors)\n",
106 |     "        return self.sq_error_\n",
107 |     "        \n",
108 |     "    def sst(self):\n",
109 |     "        '''returns total sum of squared errors (actual vs avg(actual))'''\n",
110 |     "        avg_y = np.mean(self.target)\n",
111 |     "        squared_errors = (self.target - avg_y) ** 2\n",
112 |     "        self.sst_ = np.sum(squared_errors)\n",
113 |     "        return self.sst_\n",
114 |     "    \n",
115 |     "    def r_squared(self):\n",
116 |     "        '''returns calculated value of r^2'''\n",
117 |     "        self.r_sq_ = 1 - self.sse()/self.sst()\n",
118 |     "        return self.r_sq_\n",
119 |     "    \n",
120 |     "    def adj_r_squared(self):\n",
121 |     "        '''returns calculated value of adjusted r^2'''\n",
122 |     "        self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n",
123 |     "        return self.adj_r_sq_\n",
124 |     "    \n",
125 |     "    def mse(self):\n",
126 |     "        '''returns calculated value of mse'''\n",
127 |     "        self.mse_ = np.mean( (self.model.predict(self.data) - self.target) ** 2 )\n",
128 |     "        return self.mse_\n",
129 |     "    \n",
130 |     "    def pretty_print_stats(self):\n",
131 |     "        '''returns report of statistics for a given model object'''\n",
132 |     "        items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n",
133 |     "                 ('mse:', self.mse()), ('r^2:', self.r_squared()), \n",
134 |     "                  ('adj_r^2:', self.adj_r_squared()))\n",
135 |     "        for item in items:\n",
136 |     "            print('{0:8} {1:.4f}'.format(item[0], item[1]))"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "## Data"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 4,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "boston = load_boston()\n",
153 |     "data = boston.data\n",
154 |     "target = boston.target"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "## Model"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 5,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
173 |       ]
174 |      },
175 |      "execution_count": 5,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "# sklearn\n",
182 |     "lr = LinearRegression()\n",
183 |     "lr.fit(data, target)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 6,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "# from scratch\n",
193 |     "mlr = MyLinearRegression()\n",
194 |     "mlr.fit(data, target)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "markdown",
199 |    "metadata": {},
200 |    "source": [
201 |     "## Sklearn Stats"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 7,
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "name": "stdout",
211 |      "output_type": "stream",
212 |      "text": [
213 |       "mse: 21.8978\n",
214 |       "r^2 0.7406\n"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "print('mse:', round(mean_squared_error(target, lr.predict(data)), 4))\n",
220 |     "print('r^2', round(r2_score(target, lr.predict(data)), 4))"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "## Metrics Stats"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 8,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "name": "stdout",
237 |      "output_type": "stream",
238 |      "text": [
239 |       "sse:     11080.2763\n",
240 |       "sst:     42716.2954\n",
241 |       "mse:     21.8978\n",
242 |       "r^2:     0.7406\n",
243 |       "adj_r^2: 0.7338\n"
244 |      ]
245 |     }
246 |    ],
247 |    "source": [
248 |     "metrics = Metrics(data, target, mlr)\n",
249 |     "metrics.pretty_print_stats()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "---"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "## Right now we have two separate classes. We'd like to integrate them but how do we do that? \n",
264 |     "\n",
265 |     "## -- INHERITANCE!"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 9,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "class ModifiedMetrics:\n",
275 |     "    \n",
276 |     "    def sse(self):\n",
277 |     "        '''returns sum of squared errors (model vs actual)'''\n",
278 |     "        squared_errors = (self.target - self.predict(self.data)) ** 2\n",
279 |     "        self.sq_error_ = np.sum(squared_errors)\n",
280 |     "        return self.sq_error_\n",
281 |     "        \n",
282 |     "    def sst(self):\n",
283 |     "        '''returns total sum of squared errors (actual vs avg(actual))'''\n",
284 |     "        avg_y = np.mean(self.target)\n",
285 |     "        squared_errors = (self.target - avg_y) ** 2\n",
286 |     "        self.sst_ = np.sum(squared_errors)\n",
287 |     "        return self.sst_\n",
288 |     "    \n",
289 |     "    def r_squared(self):\n",
290 |     "        '''returns calculated value of r^2'''\n",
291 |     "        self.r_sq_ = 1 - self.sse()/self.sst()\n",
292 |     "        return self.r_sq_\n",
293 |     "    \n",
294 |     "    def adj_r_squared(self):\n",
295 |     "        '''returns calculated value of adjusted r^2'''\n",
296 |     "        self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n",
297 |     "        return self.adj_r_sq_\n",
298 |     "    \n",
299 |     "    def mse(self):\n",
300 |     "        '''returns calculated value of mse'''\n",
301 |     "        self.mse_ = np.mean( (self.predict(self.data) - self.target) ** 2 )\n",
302 |     "        return self.mse_\n",
303 |     "    \n",
304 |     "    def pretty_print_stats(self):\n",
305 |     "        '''returns report of statistics for a given model object'''\n",
306 |     "        items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n",
307 |     "                 ('mse:', self.mse()), ('r^2:', self.r_squared()), \n",
308 |     "                  ('adj_r^2:', self.adj_r_squared()))\n",
309 |     "        for item in items:\n",
310 |     "            print('{0:8} {1:.4f}'.format(item[0], item[1]))"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": 10,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": [
319 |     "class MyLinearRegressionWithInheritance(ModifiedMetrics):\n",
320 |     "    \n",
321 |     "    \n",
322 |     "    def __init__(self, fit_intercept=True):\n",
323 |     "        self.coef_ = None\n",
324 |     "        self.intercept_ = None\n",
325 |     "        self._fit_intercept = fit_intercept\n",
326 |     "          \n",
327 |     "        \n",
328 |     "    def fit(self, X, y):\n",
329 |     "        \"\"\"\n",
330 |     "        Fit model coefficients.\n",
331 |     "\n",
332 |     "        Arguments:\n",
333 |     "        X: 1D or 2D numpy array \n",
334 |     "        y: 1D numpy array\n",
335 |     "        \"\"\"\n",
336 |     "        \n",
337 |     "        # training data & ground truth data\n",
338 |     "        self.data = X\n",
339 |     "        self.target = y\n",
340 |     "        \n",
341 |     "        # degrees of freedom population dep. variable variance \n",
342 |     "        self._dft = X.shape[0] - 1  \n",
343 |     "        # degrees of freedom population error variance\n",
344 |     "        self._dfe = X.shape[0] - X.shape[1] - 1\n",
345 |     "        \n",
346 |     "        # check if X is 1D or 2D array\n",
347 |     "        if len(X.shape) == 1:\n",
348 |     "            X = X.reshape(-1,1)\n",
349 |     "            \n",
350 |     "        # add bias if fit_intercept\n",
351 |     "        if self._fit_intercept:\n",
352 |     "            X = np.c_[np.ones(X.shape[0]), X]\n",
353 |     "        \n",
354 |     "        # closed form solution\n",
355 |     "        xTx = np.dot(X.T, X)\n",
356 |     "        inverse_xTx = np.linalg.inv(xTx)\n",
357 |     "        xTy = np.dot(X.T, y)\n",
358 |     "        coef = np.dot(inverse_xTx, xTy)\n",
359 |     "        \n",
360 |     "        # set attributes\n",
361 |     "        if self._fit_intercept:\n",
362 |     "            self.intercept_ = coef[0]\n",
363 |     "            self.coef_ = coef[1:]\n",
364 |     "        else:\n",
365 |     "            self.intercept_ = 0\n",
366 |     "            self.coef_ = coef\n",
367 |     "            \n",
368 |     "    def predict(self, X):\n",
369 |     "        \"\"\"Output model prediction.\n",
370 |     "\n",
371 |     "        Arguments:\n",
372 |     "        X: 1D or 2D numpy array \n",
373 |     "        \"\"\"\n",
374 |     "        # check if X is 1D or 2D array\n",
375 |     "        if len(X.shape) == 1:\n",
376 |     "            X = X.reshape(-1,1) \n",
377 |     "        return np.dot(X, self.coef_) + self.intercept_"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 11,
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "# from scratch w/inheritance\n",
387 |     "mlri = MyLinearRegressionWithInheritance()\n",
388 |     "mlri.fit(data, target)"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": 12,
394 |    "metadata": {},
395 |    "outputs": [
396 |     {
397 |      "name": "stdout",
398 |      "output_type": "stream",
399 |      "text": [
400 |       "sse:     11080.2763\n",
401 |       "sst:     42716.2954\n",
402 |       "mse:     21.8978\n",
403 |       "r^2:     0.7406\n",
404 |       "adj_r^2: 0.7338\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "mlri.pretty_print_stats()"
410 |    ]
411 |   }
412 |  ],
413 |  "metadata": {
414 |   "kernelspec": {
415 |    "display_name": "Python [conda root]",
416 |    "language": "python",
417 |    "name": "conda-root-py"
418 |   },
419 |   "language_info": {
420 |    "codemirror_mode": {
421 |     "name": "ipython",
422 |     "version": 3
423 |    },
424 |    "file_extension": ".py",
425 |    "mimetype": "text/x-python",
426 |    "name": "python",
427 |    "nbconvert_exporter": "python",
428 |    "pygments_lexer": "ipython3",
429 |    "version": "3.5.4"
430 |   }
431 |  },
432 |  "nbformat": 4,
433 |  "nbformat_minor": 2
434 | }
435 | 


--------------------------------------------------------------------------------
/notebooks/Python/NumPy/NP_argwhere_isin.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "np.argwhere(X)"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": null,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "np.isin(X, 1)"
19 |    ]
20 |   }
21 |  ],
22 |  "metadata": {
23 |   "kernelspec": {
24 |    "display_name": "Python [conda root]",
25 |    "language": "python",
26 |    "name": "conda-root-py"
27 |   },
28 |   "language_info": {
29 |    "codemirror_mode": {
30 |     "name": "ipython",
31 |     "version": 3
32 |    },
33 |    "file_extension": ".py",
34 |    "mimetype": "text/x-python",
35 |    "name": "python",
36 |    "nbconvert_exporter": "python",
37 |    "pygments_lexer": "ipython3",
38 |    "version": "3.5.4"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 2
43 | }
44 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Alphabet_Magnets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Sal's classroom has a bag of alphabet magnets. She wants to know if she can spell her friend's name using the letters in the bag. \n",
  8 |     "\n",
  9 |     "### Write a function called *CanYouSpell* that will take a list of letters and a name and print out yes if the name can be spelled and no otherwise.\n",
 10 |     "\n",
 11 |     "> CanYouSpell(['y','n','p','g','n','l'],\"lynn\") would print YES  \n",
 12 |     "> CanYouSpell(['y','n','p','g','l'],\"lynn\") would print NO"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 1,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "def CanYouSpell(list_of_letters, name):\n",
 24 |     "    '''takes list of letters and a name, then returns Boolean showing if name can be spelled'''\n",
 25 |     "    \n",
 26 |     "    # type checking\n",
 27 |     "    assert type(list_of_letters) == list, 'list_of_letters input must be a list'\n",
 28 |     "    assert type(name) == str, 'name input must be a string'\n",
 29 |     "    \n",
 30 |     "    # import\n",
 31 |     "    from collections import defaultdict\n",
 32 |     "    \n",
 33 |     "    # defaultdict objects\n",
 34 |     "    defdict1 = defaultdict(int)\n",
 35 |     "    defdict2 = defaultdict(int)\n",
 36 |     "    \n",
 37 |     "    # main logic\n",
 38 |     "    if len(list_of_letters) < len(name):\n",
 39 |     "        return False\n",
 40 |     "    else:\n",
 41 |     "        # count letter occurrences in list_of_letters\n",
 42 |     "        for letter in list_of_letters:\n",
 43 |     "            defdict1[letter] += 1\n",
 44 |     "        # count letter occurrences in name\n",
 45 |     "        for letter in name:\n",
 46 |     "            defdict2[letter] += 1\n",
 47 |     "        # check if possible to spell name\n",
 48 |     "        for k, _ in defdict2.items():\n",
 49 |     "            if defdict1[k] < defdict2[k]:\n",
 50 |     "                return False     \n",
 51 |     "    return True"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "---"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 2,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "data": {
 68 |       "text/plain": [
 69 |        "True"
 70 |       ]
 71 |      },
 72 |      "execution_count": 2,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "# example 1\n",
 79 |     "CanYouSpell(['y','n','p','g','n','l'],\"lynn\")"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/plain": [
 90 |        "False"
 91 |       ]
 92 |      },
 93 |      "execution_count": 3,
 94 |      "metadata": {},
 95 |      "output_type": "execute_result"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "# example 2\n",
100 |     "CanYouSpell(['y','n','p','g','l'],\"lynn\")"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 4,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "data": {
110 |       "text/plain": [
111 |        "False"
112 |       ]
113 |      },
114 |      "execution_count": 4,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "# example: too few letters\n",
121 |     "CanYouSpell(['y','n','p','g'],\"lynn\")"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 5,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "data": {
131 |       "text/plain": [
132 |        "True"
133 |       ]
134 |      },
135 |      "execution_count": 5,
136 |      "metadata": {},
137 |      "output_type": "execute_result"
138 |     }
139 |    ],
140 |    "source": [
141 |     "# fun example 1\n",
142 |     "CanYouSpell(list('oranges and apples'), 'apple')"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 6,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "False"
154 |       ]
155 |      },
156 |      "execution_count": 6,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "# fun example 2\n",
163 |     "CanYouSpell(list('oranges and apps'), 'apple')"
164 |    ]
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python [conda root]",
170 |    "language": "python",
171 |    "name": "conda-root-py"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.5.4"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 2
188 | }
189 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Bubble_Sort.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Bubble Sort\n",
 17 |     "Bubble sort is described in detail on [Wikipedia](https://en.wikipedia.org/wiki/Bubble_sort).\n",
 18 |     "\n",
 19 |     "Its complexity is $O(n^2)$."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def bubble_sort(array, verbose=False):\n",
 29 |     "    '''The bubble sort algorithm.\n",
 30 |     "    \n",
 31 |     "    INPUT: array or list\n",
 32 |     "    OUTPUT: sorted array or list\n",
 33 |     "    '''\n",
 34 |     "    \n",
 35 |     "    # setup\n",
 36 |     "    unsorted = True\n",
 37 |     "    list_length = len(array) - 1\n",
 38 |     "    \n",
 39 |     "    # main logic\n",
 40 |     "    while unsorted:\n",
 41 |     "        changes = 0\n",
 42 |     "        for i in range(list_length):\n",
 43 |     "            if mylist[i] > mylist[i+1]:\n",
 44 |     "                mylist[i], mylist[i+1] = mylist[i+1], mylist[i]\n",
 45 |     "                changes += 1\n",
 46 |     "        if not changes:\n",
 47 |     "            unsorted = False\n",
 48 |     "        if verbose:\n",
 49 |     "            print(mylist)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "data": {
 59 |       "text/plain": [
 60 |        "array([39, 40, 35, 46, 11, 33, 23, 14, 38, 32,  1, 10, 16, 34, 26, 22, 23,\n",
 61 |        "       10, 12, 11, 10,  0,  6, 10, 31])"
 62 |       ]
 63 |      },
 64 |      "execution_count": 3,
 65 |      "metadata": {},
 66 |      "output_type": "execute_result"
 67 |     }
 68 |    ],
 69 |    "source": [
 70 |     "mylist = np.random.randint(0, 50, 25)\n",
 71 |     "mylist"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "[39 35 40 11 33 23 14 38 32  1 10 16 34 26 22 23 10 12 11 10  0  6 10 31 46]\n",
 84 |       "[35 39 11 33 23 14 38 32  1 10 16 34 26 22 23 10 12 11 10  0  6 10 31 40 46]\n",
 85 |       "[35 11 33 23 14 38 32  1 10 16 34 26 22 23 10 12 11 10  0  6 10 31 39 40 46]\n",
 86 |       "[11 33 23 14 35 32  1 10 16 34 26 22 23 10 12 11 10  0  6 10 31 38 39 40 46]\n",
 87 |       "[11 23 14 33 32  1 10 16 34 26 22 23 10 12 11 10  0  6 10 31 35 38 39 40 46]\n",
 88 |       "[11 14 23 32  1 10 16 33 26 22 23 10 12 11 10  0  6 10 31 34 35 38 39 40 46]\n",
 89 |       "[11 14 23  1 10 16 32 26 22 23 10 12 11 10  0  6 10 31 33 34 35 38 39 40 46]\n",
 90 |       "[11 14  1 10 16 23 26 22 23 10 12 11 10  0  6 10 31 32 33 34 35 38 39 40 46]\n",
 91 |       "[11  1 10 14 16 23 22 23 10 12 11 10  0  6 10 26 31 32 33 34 35 38 39 40 46]\n",
 92 |       "[ 1 10 11 14 16 22 23 10 12 11 10  0  6 10 23 26 31 32 33 34 35 38 39 40 46]\n",
 93 |       "[ 1 10 11 14 16 22 10 12 11 10  0  6 10 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 94 |       "[ 1 10 11 14 16 10 12 11 10  0  6 10 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 95 |       "[ 1 10 11 14 10 12 11 10  0  6 10 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 96 |       "[ 1 10 11 10 12 11 10  0  6 10 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 97 |       "[ 1 10 10 11 11 10  0  6 10 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 98 |       "[ 1 10 10 11 10  0  6 10 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
 99 |       "[ 1 10 10 10  0  6 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
100 |       "[ 1 10 10  0  6 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
101 |       "[ 1 10  0  6 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
102 |       "[ 1  0  6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
103 |       "[ 0  1  6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n",
104 |       "[ 0  1  6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n"
105 |      ]
106 |     }
107 |    ],
108 |    "source": [
109 |     "bubble_sort(mylist, verbose=True)"
110 |    ]
111 |   }
112 |  ],
113 |  "metadata": {
114 |   "kernelspec": {
115 |    "display_name": "Python [conda root]",
116 |    "language": "python",
117 |    "name": "conda-root-py"
118 |   },
119 |   "language_info": {
120 |    "codemirror_mode": {
121 |     "name": "ipython",
122 |     "version": 3
123 |    },
124 |    "file_extension": ".py",
125 |    "mimetype": "text/x-python",
126 |    "name": "python",
127 |    "nbconvert_exporter": "python",
128 |    "pygments_lexer": "ipython3",
129 |    "version": "3.5.4"
130 |   }
131 |  },
132 |  "nbformat": 4,
133 |  "nbformat_minor": 2
134 | }
135 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Cryptogram_Puzzle.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Introduction\n",
  8 |     "\n",
  9 |     "Objective #1: Find a solution for this problem:\n",
 10 |     "\n",
 11 |     "$\\frac{A}{BC} + \\frac{D}{EF} + \\frac{G}{HI} = 1$\n",
 12 |     "\n",
 13 |     "Objective #2: Enumerate all solutions to the problem."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## Setup"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import numpy as np"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 4,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/plain": [
 42 |        "array([1, 2, 3, 4, 5, 6, 7, 8, 9])"
 43 |       ]
 44 |      },
 45 |      "execution_count": 4,
 46 |      "metadata": {},
 47 |      "output_type": "execute_result"
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "digits = np.arange(1,10)\n",
 52 |     "digits"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 7,
 58 |    "metadata": {
 59 |     "collapsed": true
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "def checker(arr):\n",
 64 |     "    return arr[0]/(arr[1]*arr[2]) + arr[3]/(arr[4]*arr[5]) + arr[6]/(arr[7]*arr[8])"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 27,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "def pretty_print(count, permute):\n",
 76 |     "    print('A={} B={} C={} D={} E={} F={} G={} H={} I={}'.format(permute[0], permute[1],\n",
 77 |     "                                                            permute[2], permute[3],\n",
 78 |     "                                                            permute[4], permute[5], \n",
 79 |     "                                                            permute[6], permute[7],\n",
 80 |     "                                                            permute[8]))\n",
 81 |     "    print('\\nSearches:', count)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "## Objective #1: find a solution"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 12,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def naive_search():\n",
100 |     "    '''random search using permutations - returns number of searches and solution.'''\n",
101 |     "    import numpy as np\n",
102 |     "    digits = np.arange(1,10)\n",
103 |     "    count = 0\n",
104 |     "    permute = np.random.permutation(digits)\n",
105 |     "    while checker(permute) != 1:\n",
106 |     "        count += 1\n",
107 |     "        permute = np.random.permutation(digits)\n",
108 |     "    return count, permute"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 21,
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "9455\n",
121 |       "[7 4 2 1 3 6 5 8 9]\n"
122 |      ]
123 |     }
124 |    ],
125 |    "source": [
126 |     "searches, array = naive_search()\n",
127 |     "print(searches)\n",
128 |     "print(array)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 28,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stdout",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "A=7 B=4 C=2 D=1 E=3 F=6 G=5 H=8 I=9\n",
141 |       "\n",
142 |       "Searches: 9455\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "pretty_print(searches, array)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "## Monte Carlo Expectation"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 61,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "count = []\n",
166 |     "solution = []\n",
167 |     "for _ in range(int(1e3)):\n",
168 |     "    searches, array = naive_search()\n",
169 |     "    count.append(searches) \n",
170 |     "    solution.append(array)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 62,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/plain": [
181 |        "7434.53"
182 |       ]
183 |      },
184 |      "execution_count": 62,
185 |      "metadata": {},
186 |      "output_type": "execute_result"
187 |     }
188 |    ],
189 |    "source": [
190 |     "sum(count)/len(count)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "## Next Steps\n",
198 |     "\n",
199 |     "Can you find a brute force approach?\n",
200 |     "\n",
201 |     "Can you find a faster method?"
202 |    ]
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "kernelspec": {
207 |    "display_name": "Python [conda root]",
208 |    "language": "python",
209 |    "name": "conda-root-py"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.5.4"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 2
226 | }
227 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Dot_Product.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def find_w(a, b):\n",
 23 |     "    '''function that returns solution to a*w=b, if it exists'''\n",
 24 |     "    # check if 1D or 2D array\n",
 25 |     "    if len(a.shape) != 2:\n",
 26 |     "        # reshape if 1D array, else throws error\n",
 27 |     "        a = a.reshape(-1,1)\n",
 28 |     "        try:\n",
 29 |     "            return np.linalg.solve(a,b)\n",
 30 |     "        except:\n",
 31 |     "            print('Warning: Singular Matrix')  ## case where a=0 and b!=0\n",
 32 |     "    else:\n",
 33 |     "        # num rows < num columns = infinite solutions\n",
 34 |     "        if a.shape[0] < a.shape[1]:\n",
 35 |     "            print('Underdetermined System: infinite or no solutions')\n",
 36 |     "        else:\n",
 37 |     "            try:\n",
 38 |     "                return np.linalg.solve(a,b)\n",
 39 |     "            except:\n",
 40 |     "                print('0 solutions')"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Example Problem"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 3,
 53 |    "metadata": {},
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "[[5 3 7]\n",
 60 |       " [2 4 1]]\n",
 61 |       "\n",
 62 |       "[537 241]\n"
 63 |      ]
 64 |     }
 65 |    ],
 66 |    "source": [
 67 |     "X = np.array([5,3,7,2,4,1]).reshape(2,3)\n",
 68 |     "y = np.array([537, 241])\n",
 69 |     "print(X)\n",
 70 |     "print()\n",
 71 |     "print(y)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "Underdetermined System: infinite or no solutions\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "find_w(X,y)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "** *This is an underdetermined systems as there are 2 equations and 3 variables. Therefore, there exist an infinite number of solutions.* **"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "### Example A"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 5,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "array([ 4.])"
114 |       ]
115 |      },
116 |      "execution_count": 5,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "X = np.array([2])\n",
123 |     "y = np.array([8])\n",
124 |     "find_w(X,y)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "### Example B"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 6,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "Warning: Singular Matrix\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "X = np.array([0])\n",
149 |     "y = np.array([8])\n",
150 |     "find_w(X,y)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "** *No solution. Zero times anything is always zero!* **"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "### Example C"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": 7,
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "Warning: Singular Matrix\n"
177 |      ]
178 |     }
179 |    ],
180 |    "source": [
181 |     "X = np.array([2,4])\n",
182 |     "y = np.array([8])\n",
183 |     "find_w(X,y)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "np.linalg.solve expects square matrix so disregard message. Infinite solutions for this one. Underdetermined."
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "### Example D"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 8,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "data": {
207 |       "text/plain": [
208 |        "array([-2.,  3.])"
209 |       ]
210 |      },
211 |      "execution_count": 8,
212 |      "metadata": {},
213 |      "output_type": "execute_result"
214 |     }
215 |    ],
216 |    "source": [
217 |     "X = np.array([2,4,0,1]).reshape(2,2)\n",
218 |     "y = np.array([8,3])\n",
219 |     "find_w(X,y)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "### Example E"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": 9,
232 |    "metadata": {},
233 |    "outputs": [
234 |     {
235 |      "name": "stdout",
236 |      "output_type": "stream",
237 |      "text": [
238 |       "0 solutions\n"
239 |      ]
240 |     }
241 |    ],
242 |    "source": [
243 |     "X = np.array([2,4,0,1,9,5]).reshape(3,2)\n",
244 |     "y = np.array([8,3,1])\n",
245 |     "find_w(X,y)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "### Example F"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 10,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "data": {
262 |       "text/plain": [
263 |        "array([ 2.,  0.])"
264 |       ]
265 |      },
266 |      "execution_count": 10,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "X = np.array([2,2,3,3]).reshape(2,2)\n",
273 |     "y = np.array([4,6])\n",
274 |     "find_w(X,y)"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "markdown",
279 |    "metadata": {},
280 |    "source": [
281 |     "Actually infinite solutions because same equation twice, just with different coefficients. Really have 1 unique equation with 2 unknowns. Underdetermined. Need to catch this edge case in code."
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "markdown",
286 |    "metadata": {},
287 |    "source": [
288 |     "### Example G"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "Can't be done. Can't multiply text * numbers."
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "### Example H"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 11,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "array([ 8.,  6.])"
314 |       ]
315 |      },
316 |      "execution_count": 11,
317 |      "metadata": {},
318 |      "output_type": "execute_result"
319 |     }
320 |    ],
321 |    "source": [
322 |     "X = np.array([1,0,0,1]).reshape(2,2)\n",
323 |     "y = np.array([8,6])\n",
324 |     "find_w(X,y)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "metadata": {},
330 |    "source": [
331 |     "### Example I"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 12,
337 |    "metadata": {},
338 |    "outputs": [
339 |     {
340 |      "name": "stdout",
341 |      "output_type": "stream",
342 |      "text": [
343 |       "Underdetermined System: infinite or no solutions\n"
344 |      ]
345 |     }
346 |    ],
347 |    "source": [
348 |     "X = np.array([1,1,0,1,0,1]).reshape(2,3)\n",
349 |     "y = np.array([8,6])\n",
350 |     "find_w(X,y)"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {},
356 |    "source": [
357 |     "### Example J"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": 13,
363 |    "metadata": {},
364 |    "outputs": [
365 |     {
366 |      "data": {
367 |       "text/plain": [
368 |        "array([ 8., -2.])"
369 |       ]
370 |      },
371 |      "execution_count": 13,
372 |      "metadata": {},
373 |      "output_type": "execute_result"
374 |     }
375 |    ],
376 |    "source": [
377 |     "X = np.array([1,0,1,1]).reshape(2,2)\n",
378 |     "y = np.array([8,6])\n",
379 |     "find_w(X,y)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "markdown",
384 |    "metadata": {},
385 |    "source": [
386 |     "### NOTES\n",
387 |     "\n",
388 |     "An **underdetermined** system is one in which there are fewer equations than unknowns. The result is either 0 or infinite solutions. There are 0 solutions when equations are inconsistent. For example, x+y=1 and x+y=2. That can't possibly be because the same equation gives different outputs. There are infinite solutions otherwise.\n",
389 |     "\n",
390 |     "An **overdetermined** system is one in which there are more equations than unknowns. Be wary of inconsistent equations (x+y=1 and x+y=2) that will cause this system to have 0 solutions. Otherise a unique solution exists.\n",
391 |     "\n",
392 |     "**Takeaway:** *so long as the number of unique equations is at least the same as the number of variables AND there are no inconsistencies, a unique solution exists.*"
393 |    ]
394 |   }
395 |  ],
396 |  "metadata": {
397 |   "kernelspec": {
398 |    "display_name": "Python [conda root]",
399 |    "language": "python",
400 |    "name": "conda-root-py"
401 |   },
402 |   "language_info": {
403 |    "codemirror_mode": {
404 |     "name": "ipython",
405 |     "version": 3
406 |    },
407 |    "file_extension": ".py",
408 |    "mimetype": "text/x-python",
409 |    "name": "python",
410 |    "nbconvert_exporter": "python",
411 |    "pygments_lexer": "ipython3",
412 |    "version": "3.5.4"
413 |   }
414 |  },
415 |  "nbformat": 4,
416 |  "nbformat_minor": 2
417 | }
418 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Factorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Introduction"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Factorial(n) written as *n!* is the product of all the numbers up to *n*. \n",
 15 |     "\n",
 16 |     "8! = 8 x 7 x 6 x 5 x 4 x 3 x 2 x 1 = 40320"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "## Instructions"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "Write a function called **factorial** to calculate the factorial of a given number."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Solutions"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 1,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import numpy as np"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "#### No Recursion"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 11,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "def factorial(n):\n",
 63 |     "    '''returns factorial of number n w/out recursion'''\n",
 64 |     "    \n",
 65 |     "    # error checking\n",
 66 |     "    assert n >= 0, 'n must be a nonnegative integer'\n",
 67 |     "    assert type(n) == int, 'n must be a nonnegative integer'\n",
 68 |     "    \n",
 69 |     "    out = 1\n",
 70 |     "    for value in range(1, n+1):\n",
 71 |     "        out *= value\n",
 72 |     "    return out"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 58,
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "n: 0 | result: 1\n",
 85 |       "n: 1 | result: 1\n",
 86 |       "n: 2 | result: 2\n",
 87 |       "n: 3 | result: 6\n",
 88 |       "n: 4 | result: 24\n",
 89 |       "n: 5 | result: 120\n",
 90 |       "n: 6 | result: 720\n",
 91 |       "n: 7 | result: 5040\n",
 92 |       "n: 8 | result: 40320\n",
 93 |       "n: 9 | result: 362880\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "for i in range(10):\n",
 99 |     "    print('n: {} | result: {}'.format(i, factorial(i)))"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "#### Recursion"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 47,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "def recursive_factorial(n):\n",
116 |     "    '''returns factorial of number n w/recursion'''\n",
117 |     "    \n",
118 |     "    # error checking\n",
119 |     "    assert n >= 0, 'n must be a nonnegative integer'\n",
120 |     "    assert type(n) == int, 'n must be a nonnegative integer'\n",
121 |     "    \n",
122 |     "    # main logic\n",
123 |     "    if n == 0:   \n",
124 |     "        return 1\n",
125 |     "    else:\n",
126 |     "        output = n * recursive_factorial(n - 1) \n",
127 |     "    return output"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 57,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "name": "stdout",
137 |      "output_type": "stream",
138 |      "text": [
139 |       "n: 0 | result: 1\n",
140 |       "n: 1 | result: 1\n",
141 |       "n: 2 | result: 2\n",
142 |       "n: 3 | result: 6\n",
143 |       "n: 4 | result: 24\n",
144 |       "n: 5 | result: 120\n",
145 |       "n: 6 | result: 720\n",
146 |       "n: 7 | result: 5040\n",
147 |       "n: 8 | result: 40320\n",
148 |       "n: 9 | result: 362880\n"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "for i in range(10):\n",
154 |     "    print('n: {} | result: {}'.format(i, recursive_factorial(i)))"
155 |    ]
156 |   }
157 |  ],
158 |  "metadata": {
159 |   "kernelspec": {
160 |    "display_name": "Python [conda root]",
161 |    "language": "python",
162 |    "name": "conda-root-py"
163 |   },
164 |   "language_info": {
165 |    "codemirror_mode": {
166 |     "name": "ipython",
167 |     "version": 3
168 |    },
169 |    "file_extension": ".py",
170 |    "mimetype": "text/x-python",
171 |    "name": "python",
172 |    "nbconvert_exporter": "python",
173 |    "pygments_lexer": "ipython3",
174 |    "version": "3.5.4"
175 |   }
176 |  },
177 |  "nbformat": 4,
178 |  "nbformat_minor": 2
179 | }
180 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Fibonacci.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Introduction"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "From Wikipedia:\n",
 15 |     "\n",
 16 |     ">In mathematics, the Fibonacci numbers are the numbers in the following integer sequence, called the Fibonacci sequence, and characterized by the fact that every number after the first two is the sum of the two preceding ones:\n",
 17 |     "\n",
 18 |     ">0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, ...\n",
 19 |     "\n",
 20 |     "\n",
 21 |     ">By definition, the first two numbers in the Fibonacci sequence are either 1 and 1, or 0 and 1, depending on the chosen starting point of the sequence, and each subsequent number is the sum of the previous two.\n",
 22 |     "\n",
 23 |     ">The sequence Fn of Fibonacci numbers is defined by the recurrence relation:\n",
 24 |     "\n",
 25 |     ">**$F_{n}$ = $F_{n-1}$ + $F_{n-2}$**"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "## Instructions"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "Write a function called **fibonacci** to calculate the fibonnaci sequence of a given starting number."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Solution"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 1,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "import numpy as np"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "#### Recursion"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 2,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "def fibonacci(n):\n",
 72 |     "    '''with recursion'''\n",
 73 |     "    \n",
 74 |     "    # error checking\n",
 75 |     "    assert n >= 0, 'n must be a nonnegative integer'\n",
 76 |     "    assert type(n) == int, 'n must be a nonnegative integer'\n",
 77 |     "    \n",
 78 |     "    # main logic\n",
 79 |     "    if n == 0: \n",
 80 |     "        return 0\n",
 81 |     "    elif n == 1: \n",
 82 |     "        return 1\n",
 83 |     "    else: \n",
 84 |     "        return fibonacci(n-1) + fibonacci(n-2)"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 3,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "name": "stdout",
 94 |      "output_type": "stream",
 95 |      "text": [
 96 |       "n: 0 | result: 0\n",
 97 |       "n: 1 | result: 1\n",
 98 |       "n: 2 | result: 1\n",
 99 |       "n: 3 | result: 2\n",
100 |       "n: 4 | result: 3\n",
101 |       "n: 5 | result: 5\n",
102 |       "n: 6 | result: 8\n",
103 |       "n: 7 | result: 13\n",
104 |       "n: 8 | result: 21\n",
105 |       "n: 9 | result: 34\n"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "for i in range(10):\n",
111 |     "    print('n: {} | result: {}'.format(i, fibonacci(i)))"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## What's going on here?"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "![Fibonacci](http://www.idryman.org/images/graphviz/g-ff6b576e3a2600dd84a36f0059875404.svg)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "## A little more detail..."
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "markdown",
137 |    "metadata": {},
138 |    "source": [
139 |     "![Fibonacci 2](http://zvzzt.files.wordpress.com/2014/05/fibonacci1.png)"
140 |    ]
141 |   }
142 |  ],
143 |  "metadata": {
144 |   "kernelspec": {
145 |    "display_name": "Python [conda root]",
146 |    "language": "python",
147 |    "name": "conda-root-py"
148 |   },
149 |   "language_info": {
150 |    "codemirror_mode": {
151 |     "name": "ipython",
152 |     "version": 3
153 |    },
154 |    "file_extension": ".py",
155 |    "mimetype": "text/x-python",
156 |    "name": "python",
157 |    "nbconvert_exporter": "python",
158 |    "pygments_lexer": "ipython3",
159 |    "version": "3.5.4"
160 |   }
161 |  },
162 |  "nbformat": 4,
163 |  "nbformat_minor": 2
164 | }
165 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/GCD_Problem.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "def naive_gcd(num1, num2):\n",
 10 |     "    '''enumerate divisors and returns largest of intersection(num1, num2).'''\n",
 11 |     "    \n",
 12 |     "    assert num1 > 0, \"num1 must be positive.\"\n",
 13 |     "    assert num2 > 0, \"num2 must be positive.\"\n",
 14 |     "    \n",
 15 |     "    divisors1 = []\n",
 16 |     "    divisors2 = []\n",
 17 |     "    \n",
 18 |     "    count = 0\n",
 19 |     "    \n",
 20 |     "    for divisor in range(1, num1):\n",
 21 |     "        count += 1  ## just to keep tally\n",
 22 |     "        if num1 % divisor == 0:\n",
 23 |     "            divisors1.append(divisor)\n",
 24 |     "            \n",
 25 |     "    for divisor in range(1, num2):\n",
 26 |     "        count += 1 ## just to keep tally\n",
 27 |     "        if num2 % divisor == 0:\n",
 28 |     "            divisors2.append(divisor)\n",
 29 |     "    \n",
 30 |     "    gcd = max(set(divisors1) & set(divisors2))\n",
 31 |     "    print(\"GCD: {}\\nCount: {}\".format(gcd, count))"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 2,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "GCD: 4\n",
 44 |       "Count: 16098\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "naive_gcd(1236, 14864)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def gcd(num1, num2):\n",
 59 |     "    '''returns the greatest common divisor'''\n",
 60 |     "    \n",
 61 |     "    assert num1 >= 0, \"num1 must be nonnegative.\"\n",
 62 |     "    assert num2 >= 0, \"num2 must be nonnegative.\"\n",
 63 |     "    \n",
 64 |     "    count = 0\n",
 65 |     "    \n",
 66 |     "    while num1 != 0:\n",
 67 |     "        count += 1  ## just to keep tally\n",
 68 |     "        gcd = num1\n",
 69 |     "        num1 = num2 % num1\n",
 70 |     "        num2 = gcd\n",
 71 |     "    print(\"GCD: {}\\nCount: {}\".format(gcd, count))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "name": "stdout",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "GCD: 4\n",
 84 |       "Count: 6\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "gcd(1236,14864)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "---"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 5,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "name": "stdout",
106 |      "output_type": "stream",
107 |      "text": [
108 |       "GCD: 1\n",
109 |       "Count: 333101\n"
110 |      ]
111 |     }
112 |    ],
113 |    "source": [
114 |     "naive_gcd(134526, 198577)"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 6,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "name": "stdout",
124 |      "output_type": "stream",
125 |      "text": [
126 |       "GCD: 1\n",
127 |       "Count: 8\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "gcd(134526, 198577)"
133 |    ]
134 |   }
135 |  ],
136 |  "metadata": {
137 |   "kernelspec": {
138 |    "display_name": "Python [conda root]",
139 |    "language": "python",
140 |    "name": "conda-root-py"
141 |   },
142 |   "language_info": {
143 |    "codemirror_mode": {
144 |     "name": "ipython",
145 |     "version": 3
146 |    },
147 |    "file_extension": ".py",
148 |    "mimetype": "text/x-python",
149 |    "name": "python",
150 |    "nbconvert_exporter": "python",
151 |    "pygments_lexer": "ipython3",
152 |    "version": "3.5.4"
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 2
157 | }
158 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Moving_Average_Problem.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "#### Show Step-by-Step"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/plain": [
 29 |        "array([ 2,  4,  8, 16,  7, 99])"
 30 |       ]
 31 |      },
 32 |      "execution_count": 2,
 33 |      "metadata": {},
 34 |      "output_type": "execute_result"
 35 |     }
 36 |    ],
 37 |    "source": [
 38 |     "test = np.array([2,4,8,16,7,99])\n",
 39 |     "test"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "2.0\n",
 52 |       "3.0\n",
 53 |       "4.66666666667\n",
 54 |       "7.5\n",
 55 |       "7.4\n",
 56 |       "22.6666666667\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "for i in range(len(test)):\n",
 62 |     "    if i == 0:\n",
 63 |     "        print(np.mean(test[0]))\n",
 64 |     "    else:\n",
 65 |     "        print(np.mean(test[:i+1]))"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "#### Function for Rolling Mean"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def rolling_mean(array, verbose=False):\n",
 84 |     "    '''computes the mean incrementally'''\n",
 85 |     "    \n",
 86 |     "    assert type(array) == np.ndarray, \"input must be of type np.ndarray\"\n",
 87 |     "    \n",
 88 |     "    mv_avg = 0\n",
 89 |     "    num_elements = len(array)\n",
 90 |     "    \n",
 91 |     "    for i, elem in enumerate(array):\n",
 92 |     "        mv_avg += (elem-mv_avg)/(i+1)\n",
 93 |     "        if verbose:\n",
 94 |     "            print(i+1, mv_avg)\n",
 95 |     "    return mv_avg"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "#### Example"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 5,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "1 2.0\n",
115 |       "2 3.0\n",
116 |       "3 4.66666666667\n",
117 |       "4 7.5\n",
118 |       "5 7.4\n",
119 |       "6 22.6666666667\n"
120 |      ]
121 |     },
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "22.666666666666664"
126 |       ]
127 |      },
128 |      "execution_count": 5,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "rolling_mean(test, verbose=True)"
135 |    ]
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python [conda root]",
141 |    "language": "python",
142 |    "name": "conda-root-py"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.5.4"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Project_Euler_Problem_8_with_Deque.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Objective"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This notebook shows one way to solve [Project Euler Problem #8](https://projecteuler.net/problem=8) using a great data structure called a deque."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Imports"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from collections import deque\n",
 31 |     "import numpy as np"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## Data"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "num = '''73167176531330624919225119674426574742355349194934\n",
 48 |     "96983520312774506326239578318016984801869478851843\n",
 49 |     "85861560789112949495459501737958331952853208805511\n",
 50 |     "12540698747158523863050715693290963295227443043557\n",
 51 |     "66896648950445244523161731856403098711121722383113\n",
 52 |     "62229893423380308135336276614282806444486645238749\n",
 53 |     "30358907296290491560440772390713810515859307960866\n",
 54 |     "70172427121883998797908792274921901699720888093776\n",
 55 |     "65727333001053367881220235421809751254540594752243\n",
 56 |     "52584907711670556013604839586446706324415722155397\n",
 57 |     "53697817977846174064955149290862569321978468622482\n",
 58 |     "83972241375657056057490261407972968652414535100474\n",
 59 |     "82166370484403199890008895243450658541227588666881\n",
 60 |     "16427171479924442928230863465674813919123162824586\n",
 61 |     "17866458359124566529476545682848912883142607690042\n",
 62 |     "24219022671055626321111109370544217506941658960408\n",
 63 |     "07198403850962455444362981230987879927244284909188\n",
 64 |     "84580156166097919133875499200524063689912560717606\n",
 65 |     "05886116467109405077541002256983155200055935729725\n",
 66 |     "71636269561882670428252483600823257530420752963450'''"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "## Function"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 3,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "def max_prod(numString, numDigits=4):\n",
 83 |     "    '''takes string of numbers and outputs greatest product exactly numDigits long'''\n",
 84 |     "    \n",
 85 |     "    numString = numString.replace('\\n','')  ## remove newline characters\n",
 86 |     "    dq = deque(maxlen=numDigits)\n",
 87 |     "    max_prod = 0\n",
 88 |     "    \n",
 89 |     "    for num in numString:\n",
 90 |     "        dq.append(num)\n",
 91 |     "        if len(dq) == dq.maxlen:\n",
 92 |     "            current = np.prod([int(val) for val in dq])\n",
 93 |     "            if current > max_prod:\n",
 94 |     "                max_prod = current\n",
 95 |     "    return max_prod"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "## Example #1"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 4,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "5832"
114 |       ]
115 |      },
116 |      "execution_count": 4,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "max_prod(num, numDigits=4)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "## Example #2"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 5,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "data": {
139 |       "text/plain": [
140 |        "23514624000"
141 |       ]
142 |      },
143 |      "execution_count": 5,
144 |      "metadata": {},
145 |      "output_type": "execute_result"
146 |     }
147 |    ],
148 |    "source": [
149 |     "max_prod(num, numDigits=13)"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 6,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "12.1 ms ± 802 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "%timeit max_prod(num, numDigits=13)"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python [conda root]",
173 |    "language": "python",
174 |    "name": "conda-root-py"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.5.4"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Reverse_String.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Given a string, write a function called *reverseString* to reverse it. Do this using a loop, if possible."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "def reverseString(string):\n",
 17 |     "    '''takes a string and returns it in reverse order (w/for loop)'''\n",
 18 |     "    \n",
 19 |     "    # type checking\n",
 20 |     "    assert type(string) == str, 'input must be a string'\n",
 21 |     "    \n",
 22 |     "    # main logic\n",
 23 |     "    string_length = len(string)-1\n",
 24 |     "    new_string = ''\n",
 25 |     "    for i in range(string_length, -1, -1):\n",
 26 |     "        new_string += string[i]\n",
 27 |     "    return new_string\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "def reverseString2(string):\n",
 39 |     "    '''takes a string and returns it in reverse order (w/slicing)'''\n",
 40 |     "    \n",
 41 |     "    # type checking\n",
 42 |     "    assert type(string) == str, 'input must be a string'\n",
 43 |     "    \n",
 44 |     "    # main logic\n",
 45 |     "    return string[::-1]"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "---"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "data": {
 62 |       "text/plain": [
 63 |        "'this is an example string'"
 64 |       ]
 65 |      },
 66 |      "execution_count": 3,
 67 |      "metadata": {},
 68 |      "output_type": "execute_result"
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "# example string\n",
 73 |     "s = 'this is an example string'\n",
 74 |     "s"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "data": {
 84 |       "text/plain": [
 85 |        "'gnirts elpmaxe na si siht'"
 86 |       ]
 87 |      },
 88 |      "execution_count": 4,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "# 1st function\n",
 95 |     "reverseString(s)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 5,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "data": {
105 |       "text/plain": [
106 |        "'gnirts elpmaxe na si siht'"
107 |       ]
108 |      },
109 |      "execution_count": 5,
110 |      "metadata": {},
111 |      "output_type": "execute_result"
112 |     }
113 |    ],
114 |    "source": [
115 |     "# 2nd function\n",
116 |     "reverseString2(s)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 6,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "True"
128 |       ]
129 |      },
130 |      "execution_count": 6,
131 |      "metadata": {},
132 |      "output_type": "execute_result"
133 |     }
134 |    ],
135 |    "source": [
136 |     "# test for equality\n",
137 |     "reverseString(s) == reverseString2(s)"
138 |    ]
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "Python [conda root]",
144 |    "language": "python",
145 |    "name": "conda-root-py"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 3
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython3",
157 |    "version": "3.5.4"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 2
162 | }
163 | 


--------------------------------------------------------------------------------
/notebooks/Python/Programming_Problems/Rotation_Problem.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "def rotation(num1, num2, verbose=False):\n",
 12 |     "    '''tests whether num2 is a rotated version of num1'''\n",
 13 |     "    \n",
 14 |     "    num1 = list(str(num1))\n",
 15 |     "    num2 = list(str(num2))\n",
 16 |     "    \n",
 17 |     "    assert len(num1) == len(num2), \"numbers are different lengths\"\n",
 18 |     "    \n",
 19 |     "    for i, _ in enumerate(num1):\n",
 20 |     "        if num1 == num2:\n",
 21 |     "            print('{} rotations to the right'.format(i))\n",
 22 |     "            return True\n",
 23 |     "        else:\n",
 24 |     "            pop = list(num2.pop())\n",
 25 |     "            num2 = pop + num2\n",
 26 |     "        if verbose:\n",
 27 |     "            print(num1)\n",
 28 |     "            print(num2)\n",
 29 |     "            print('-'*25)"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {
 36 |     "collapsed": true
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "num1 = 12345\n",
 41 |     "num2 = 51234"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "['1', '2', '3', '4', '5']\n",
 54 |       "['4', '5', '1', '2', '3']\n",
 55 |       "-------------------------\n",
 56 |       "['1', '2', '3', '4', '5']\n",
 57 |       "['3', '4', '5', '1', '2']\n",
 58 |       "-------------------------\n",
 59 |       "['1', '2', '3', '4', '5']\n",
 60 |       "['2', '3', '4', '5', '1']\n",
 61 |       "-------------------------\n",
 62 |       "['1', '2', '3', '4', '5']\n",
 63 |       "['1', '2', '3', '4', '5']\n",
 64 |       "-------------------------\n",
 65 |       "4 rotations to the right\n"
 66 |      ]
 67 |     },
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "True"
 72 |       ]
 73 |      },
 74 |      "execution_count": 3,
 75 |      "metadata": {},
 76 |      "output_type": "execute_result"
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "rotation(num1, num2, verbose=True)"
 81 |    ]
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Python [conda root]",
 87 |    "language": "python",
 88 |    "name": "conda-root-py"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.5.4"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 2
105 | }
106 | 


--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/Arrays_vs_Lists.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "arr = np.arange(1e7)\n",
 23 |     "lst = arr.tolist()"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 3,
 29 |    "metadata": {},
 30 |    "outputs": [
 31 |     {
 32 |      "data": {
 33 |       "text/plain": [
 34 |        "numpy.ndarray"
 35 |       ]
 36 |      },
 37 |      "execution_count": 3,
 38 |      "metadata": {},
 39 |      "output_type": "execute_result"
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "type(arr)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 4,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/plain": [
 54 |        "list"
 55 |       ]
 56 |      },
 57 |      "execution_count": 4,
 58 |      "metadata": {},
 59 |      "output_type": "execute_result"
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "type(lst)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "## Empirical Testing (Simple)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "#### Show Equivalent (Simple)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 5,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "data": {
 87 |       "text/plain": [
 88 |        "0"
 89 |       ]
 90 |      },
 91 |      "execution_count": 5,
 92 |      "metadata": {},
 93 |      "output_type": "execute_result"
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "sum(arr[arr > 1e5] != [el for el in lst if el > 1e5])"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "#### Timing (Simple)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 6,
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "42 ms ± 4.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "%timeit arr[arr > 1e5] "
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 7,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "1.82 s ± 38.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "%timeit [el for el in lst if el > 1e5]"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "## Empirical Testing (Complex)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "#### Show Equivalent"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 8,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "0"
164 |       ]
165 |      },
166 |      "execution_count": 8,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "sum(arr[arr < 1e5][arr[arr < 1e5] > 1e4] != [el for el in lst if el > 1e4 and el < 1e5])"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "#### Timing (Complex)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 9,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "18.9 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "%timeit arr[arr < 1e5][arr[arr < 1e5] > 1e4]"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 10,
202 |    "metadata": {},
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "2.03 s ± 56.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "%timeit [el for el in lst if el > 1e4 and el < 1e5]"
214 |    ]
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "kernelspec": {
219 |    "display_name": "Python [conda root]",
220 |    "language": "python",
221 |    "name": "conda-root-py"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 3
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython3",
233 |    "version": "3.5.4"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/Working_with_Bits.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import numpy as np"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": 2,
17 |    "metadata": {},
18 |    "outputs": [
19 |     {
20 |      "data": {
21 |       "text/plain": [
22 |        "116"
23 |       ]
24 |      },
25 |      "execution_count": 2,
26 |      "metadata": {},
27 |      "output_type": "execute_result"
28 |     }
29 |    ],
30 |    "source": [
31 |     "np.bitwise_xor(23,99)"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": 3,
37 |    "metadata": {},
38 |    "outputs": [
39 |     {
40 |      "data": {
41 |       "text/plain": [
42 |        "'1110100'"
43 |       ]
44 |      },
45 |      "execution_count": 3,
46 |      "metadata": {},
47 |      "output_type": "execute_result"
48 |     }
49 |    ],
50 |    "source": [
51 |     "np.binary_repr(116)"
52 |    ]
53 |   },
54 |   {
55 |    "cell_type": "code",
56 |    "execution_count": null,
57 |    "metadata": {
58 |     "collapsed": true
59 |    },
60 |    "outputs": [],
61 |    "source": []
62 |   }
63 |  ],
64 |  "metadata": {
65 |   "kernelspec": {
66 |    "display_name": "Python [conda root]",
67 |    "language": "python",
68 |    "name": "conda-root-py"
69 |   },
70 |   "language_info": {
71 |    "codemirror_mode": {
72 |     "name": "ipython",
73 |     "version": 3
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python",
79 |    "pygments_lexer": "ipython3",
80 |    "version": "3.5.4"
81 |   }
82 |  },
83 |  "nbformat": 4,
84 |  "nbformat_minor": 2
85 | }
86 | 


--------------------------------------------------------------------------------
/notebooks/Python/Python_Internals/listexp_vs_genexp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from itertools import cycle"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "lst = [x for x in range(int(1e6))]"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "499999500000\n",
 35 |       "499999500000\n",
 36 |       "499999500000\n",
 37 |       "499999500000\n",
 38 |       "499999500000\n",
 39 |       "499999500000\n",
 40 |       "499999500000\n",
 41 |       "499999500000\n",
 42 |       "499999500000\n",
 43 |       "499999500000\n",
 44 |       "499999500000\n",
 45 |       "499999500000\n",
 46 |       "499999500000\n",
 47 |       "499999500000\n",
 48 |       "499999500000\n",
 49 |       "499999500000\n",
 50 |       "499999500000\n",
 51 |       "499999500000\n",
 52 |       "499999500000\n",
 53 |       "499999500000\n",
 54 |       "499999500000\n",
 55 |       "499999500000\n",
 56 |       "499999500000\n",
 57 |       "499999500000\n",
 58 |       "499999500000\n",
 59 |       "499999500000\n",
 60 |       "499999500000\n",
 61 |       "499999500000\n",
 62 |       "499999500000\n",
 63 |       "499999500000\n",
 64 |       "499999500000\n",
 65 |       "499999500000\n",
 66 |       "499999500000\n",
 67 |       "499999500000\n",
 68 |       "499999500000\n",
 69 |       "499999500000\n",
 70 |       "499999500000\n",
 71 |       "499999500000\n",
 72 |       "499999500000\n",
 73 |       "499999500000\n",
 74 |       "499999500000\n",
 75 |       "499999500000\n",
 76 |       "499999500000\n",
 77 |       "499999500000\n",
 78 |       "499999500000\n",
 79 |       "499999500000\n",
 80 |       "499999500000\n",
 81 |       "499999500000\n",
 82 |       "499999500000\n",
 83 |       "499999500000\n",
 84 |       "499999500000\n",
 85 |       "499999500000\n",
 86 |       "499999500000\n",
 87 |       "499999500000\n",
 88 |       "499999500000\n",
 89 |       "499999500000\n",
 90 |       "499999500000\n",
 91 |       "499999500000\n",
 92 |       "499999500000\n",
 93 |       "499999500000\n",
 94 |       "499999500000\n",
 95 |       "499999500000\n",
 96 |       "499999500000\n",
 97 |       "499999500000\n",
 98 |       "499999500000\n",
 99 |       "499999500000\n",
100 |       "499999500000\n",
101 |       "499999500000\n",
102 |       "499999500000\n",
103 |       "499999500000\n",
104 |       "499999500000\n",
105 |       "499999500000\n",
106 |       "499999500000\n",
107 |       "499999500000\n",
108 |       "499999500000\n",
109 |       "499999500000\n",
110 |       "499999500000\n",
111 |       "499999500000\n",
112 |       "499999500000\n",
113 |       "499999500000\n",
114 |       "499999500000\n",
115 |       "148 ms ± 2.65 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "%%timeit\n",
121 |     "mysum = 0\n",
122 |     "for el in lst:\n",
123 |     "    mysum += el\n",
124 |     "print(mysum)"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 4,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "gen = cycle((x for x in range(int(1e6))))"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 5,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "499999500000\n",
146 |       "499999500000\n",
147 |       "499999500000\n",
148 |       "499999500000\n",
149 |       "499999500000\n",
150 |       "499999500000\n",
151 |       "499999500000\n",
152 |       "499999500000\n",
153 |       "279 ms ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "%%timeit\n",
159 |     "mysum = 0\n",
160 |     "for i in lst:\n",
161 |     "    mysum += next(gen)\n",
162 |     "print(mysum)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 6,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "def gen():\n",
174 |     "    while True:\n",
175 |     "        yield 1"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 7,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "gen2 = gen()"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 8,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "1\n",
197 |       "1\n",
198 |       "1\n",
199 |       "1\n",
200 |       "1\n",
201 |       "1\n",
202 |       "1\n",
203 |       "1\n",
204 |       "1\n",
205 |       "1\n"
206 |      ]
207 |     }
208 |    ],
209 |    "source": [
210 |     "for i in range(10):\n",
211 |     "    print(next(gen2))"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 9,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "name": "stdout",
221 |      "output_type": "stream",
222 |      "text": [
223 |       "1000000\n",
224 |       "1000000\n",
225 |       "1000000\n",
226 |       "1000000\n",
227 |       "1000000\n",
228 |       "1000000\n",
229 |       "1000000\n",
230 |       "1000000\n",
231 |       "353 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
232 |      ]
233 |     }
234 |    ],
235 |    "source": [
236 |     "%%timeit\n",
237 |     "mysum = 0\n",
238 |     "for i in lst:\n",
239 |     "    mysum += next(gen2)\n",
240 |     "print(mysum)"
241 |    ]
242 |   }
243 |  ],
244 |  "metadata": {
245 |   "kernelspec": {
246 |    "display_name": "Python [conda root]",
247 |    "language": "python",
248 |    "name": "conda-root-py"
249 |   },
250 |   "language_info": {
251 |    "codemirror_mode": {
252 |     "name": "ipython",
253 |     "version": 3
254 |    },
255 |    "file_extension": ".py",
256 |    "mimetype": "text/x-python",
257 |    "name": "python",
258 |    "nbconvert_exporter": "python",
259 |    "pygments_lexer": "ipython3",
260 |    "version": "3.5.4"
261 |   }
262 |  },
263 |  "nbformat": 4,
264 |  "nbformat_minor": 2
265 | }
266 | 


--------------------------------------------------------------------------------
/notebooks/Python/Recursion/Factorial_&_Fibonacci.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 29,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "def factorial(n):\n",
 12 |     "    '''no recursion'''\n",
 13 |     "    val = 1\n",
 14 |     "    while n > 0:\n",
 15 |     "        val *= n\n",
 16 |     "        n -= 1\n",
 17 |     "    return val  "
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 33,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "data": {
 27 |       "text/plain": [
 28 |        "40320"
 29 |       ]
 30 |      },
 31 |      "execution_count": 33,
 32 |      "metadata": {},
 33 |      "output_type": "execute_result"
 34 |     }
 35 |    ],
 36 |    "source": [
 37 |     "factorial(8)"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 13,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "def factorial2(n):\n",
 49 |     "    '''with recursion'''\n",
 50 |     "    if n < 1:   \n",
 51 |     "        return 1\n",
 52 |     "    else:\n",
 53 |     "        num = n * factorial2(n - 1) \n",
 54 |     "    return num"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 19,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "data": {
 64 |       "text/plain": [
 65 |        "40320"
 66 |       ]
 67 |      },
 68 |      "execution_count": 19,
 69 |      "metadata": {},
 70 |      "output_type": "execute_result"
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "factorial2(8)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "---"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 3,
 87 |    "metadata": {
 88 |     "collapsed": true
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def fibonacci(n):\n",
 93 |     "    '''with recursion'''\n",
 94 |     "    if n == 0: \n",
 95 |     "        return 0\n",
 96 |     "    elif n == 1: \n",
 97 |     "        return 1\n",
 98 |     "    else: \n",
 99 |     "        return fibonacci(n-1)+fibonacci(n-2)"
100 |    ]
101 |   }
102 |  ],
103 |  "metadata": {
104 |   "kernelspec": {
105 |    "display_name": "Python [conda root]",
106 |    "language": "python",
107 |    "name": "conda-root-py"
108 |   },
109 |   "language_info": {
110 |    "codemirror_mode": {
111 |     "name": "ipython",
112 |     "version": 3
113 |    },
114 |    "file_extension": ".py",
115 |    "mimetype": "text/x-python",
116 |    "name": "python",
117 |    "nbconvert_exporter": "python",
118 |    "pygments_lexer": "ipython3",
119 |    "version": "3.5.4"
120 |   }
121 |  },
122 |  "nbformat": 4,
123 |  "nbformat_minor": 2
124 | }
125 | 


--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/2_PRNG_Linear_Congruential_Generator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Wikipedia link: [linear congruential generator](https://en.wikipedia.org/wiki/Linear_congruential_generator_)\n",
  8 |     "\n",
  9 |     "Wikipedia description: \n",
 10 |     "> A linear congruential generator (LCG) is an algorithm that yields a sequence of pseudo-randomized numbers calculated with a discontinuous piecewise linear equation. The method represents one of the oldest and best-known pseudorandom number generator algorithms. The theory behind them is relatively easy to understand, and they are easily implemented and fast, especially on computer hardware which can provide modulo arithmetic by storage-bit truncation.\n",
 11 |     "\n",
 12 |     "The generator is defined by the recurrence relation:\n",
 13 |     "\n",
 14 |     "X$_{n+1}$ = (aX$_{n}$+c) mod m\n",
 15 |     "\n",
 16 |     "where X is the sequence of pseudorandom values and\n",
 17 |     "- m is the modulus (0 $<$ m)\n",
 18 |     "- a is the multiplier (0 $<$ a $<$ m)\n",
 19 |     "- c is the increment (0 $\\leq$ c $<$ m)\n",
 20 |     "- X$_{o}$ is the seed value (0 $\\leq$ X$_{o}$ < m)\n",
 21 |     "\n",
 22 |     "all of which are integer values."
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def lcg(m, a, c, seed):\n",
 34 |     "    '''linear congruential generator that pseudorandomly generates numbers.\n",
 35 |     "    \n",
 36 |     "    In:\n",
 37 |     "        m = (+ int) modulus\n",
 38 |     "        a = (+ int) multiplier\n",
 39 |     "        c = (nonnegative int) increment\n",
 40 |     "        seed = (nonnegative int) initial value\n",
 41 |     "    Out:\n",
 42 |     "        single random number\n",
 43 |     "    '''\n",
 44 |     "    \n",
 45 |     "    # type and value checks\n",
 46 |     "    assert type(m) == int, \"'m' must be an integer\"\n",
 47 |     "    assert m > 0, \"'m' must be an int greater than 0\"\n",
 48 |     "    assert type(a) == int, \"'a' must be an integer\"\n",
 49 |     "    assert a > 0, \"'a' must be an int greater than 0\"\n",
 50 |     "    assert type(c) == int, \"'c' must be an integer\"\n",
 51 |     "    assert c >= 0, \"'c' must be an int greater than 0\"\n",
 52 |     "    assert type(seed) == int, \"'seed' must be an integer\"\n",
 53 |     "    assert seed >= 0, \"'seed' must be an int greater than 0\"\n",
 54 |     "    \n",
 55 |     "    # algorithm\n",
 56 |     "    X_n = seed\n",
 57 |     "    while True:\n",
 58 |     "        X_n = (a * X_n + c) % m\n",
 59 |     "        yield X_n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 2,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def auto_print(implementation, number):\n",
 71 |     "    for _ in range(number):\n",
 72 |     "        print(next(implementation)) "
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "#### Basic Implementation"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "12\n",
 92 |       "27\n",
 93 |       "8\n",
 94 |       "30\n",
 95 |       "29\n",
 96 |       "22\n",
 97 |       "4\n",
 98 |       "2\n",
 99 |       "19\n",
100 |       "14\n",
101 |       "10\n",
102 |       "13\n",
103 |       "3\n",
104 |       "26\n",
105 |       "1\n",
106 |       "12\n",
107 |       "27\n",
108 |       "8\n",
109 |       "30\n",
110 |       "29\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "basic = lcg(m=31, a=7, c=5, seed=1)\n",
116 |     "auto_print(basic, 20)"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "#### There's a Problem: Parameter Choice Matters"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 4,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "name": "stdout",
133 |      "output_type": "stream",
134 |      "text": [
135 |       "1\n",
136 |       "3\n",
137 |       "7\n",
138 |       "15\n",
139 |       "0\n",
140 |       "1\n",
141 |       "3\n",
142 |       "7\n",
143 |       "15\n",
144 |       "0\n",
145 |       "1\n",
146 |       "3\n",
147 |       "7\n",
148 |       "15\n",
149 |       "0\n",
150 |       "1\n",
151 |       "3\n",
152 |       "7\n",
153 |       "15\n",
154 |       "0\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "problem = lcg(m=31, a=2, c=1, seed=0)\n",
160 |     "auto_print(problem, 20)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "**Explanation:** a poor choice of parameters leads to a short period length. Once the period length, or cycle, is known, it is trivially to make predictions. This means the numbers generated are no longer random."
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "#### Lehmer Implementation (c=0)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 5,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "16807\n",
187 |       "282475249\n",
188 |       "1622650073\n",
189 |       "984943658\n",
190 |       "1144108930\n",
191 |       "470211272\n",
192 |       "101027544\n",
193 |       "1457850878\n",
194 |       "1458777923\n",
195 |       "2007237709\n",
196 |       "823564440\n",
197 |       "1115438165\n",
198 |       "1784484492\n",
199 |       "74243042\n",
200 |       "114807987\n",
201 |       "1137522503\n",
202 |       "1441282327\n",
203 |       "16531729\n",
204 |       "823378840\n",
205 |       "143542612\n"
206 |      ]
207 |     }
208 |    ],
209 |    "source": [
210 |     "lehmer = lcg(m=2147483647, a=16807, c=0, seed=1)\n",
211 |     "auto_print(lehmer, 20)"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "**Note:** this Lehmer implementation uses a Mersenne Prime (a prime that is 1 less than a power of two: 2$^n$ - 1)."
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {
224 |     "collapsed": true
225 |    },
226 |    "source": [
227 |     "## Advantages\n",
228 |     "\n",
229 |     "- Fast\n",
230 |     "- Minimal memory to retain state\n",
231 |     "- Great for embedded systems and video games \n",
232 |     "\n",
233 |     "## Disadvantages\n",
234 |     "\n",
235 |     "- Serial correlation of sequence (see Marsaglia's Theorem)\n",
236 |     "- Short period of the low-order bits when m is chosen to be a power of 2\n",
237 |     "- Not suitable for a Monte Carlo simulation\n",
238 |     "- Must not be used for cryptographic applications\n",
239 |     "\n",
240 |     "## Final Notes\n",
241 |     "\n",
242 |     "The low-order bits of LCGs when m is a power of 2 should never be relied on for any degree of randomness whatsoever. Indeed, simply substituting 2$^n$ for the modulus term reveals that the low order bits go through very short cycles. In particular, any full-cycle LCG when m is a power of 2 will produce alternately odd and even results."
243 |    ]
244 |   }
245 |  ],
246 |  "metadata": {
247 |   "kernelspec": {
248 |    "display_name": "Python [conda root]",
249 |    "language": "python",
250 |    "name": "conda-root-py"
251 |   },
252 |   "language_info": {
253 |    "codemirror_mode": {
254 |     "name": "ipython",
255 |     "version": 3
256 |    },
257 |    "file_extension": ".py",
258 |    "mimetype": "text/x-python",
259 |    "name": "python",
260 |    "nbconvert_exporter": "python",
261 |    "pygments_lexer": "ipython3",
262 |    "version": "3.5.4"
263 |   }
264 |  },
265 |  "nbformat": 4,
266 |  "nbformat_minor": 2
267 | }
268 | 


--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/3_PRNG_Linear_Feedback_Shift_Register.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 21,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "value = 0xabbacaddabba\n",
 10 |     "value &= (1<<10)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 22,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "'0b101010111011101011001010110111011010101110111010'"
 22 |       ]
 23 |      },
 24 |      "execution_count": 22,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "bin(value)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "collapsed": true
 38 |    },
 39 |    "outputs": [],
 40 |    "source": []
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": true
 47 |    },
 48 |    "outputs": [],
 49 |    "source": []
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": []
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": []
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": true
 74 |    },
 75 |    "outputs": [],
 76 |    "source": []
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python [conda root]",
 82 |    "language": "python",
 83 |    "name": "conda-root-py"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.5.4"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 2
100 | }
101 | 


--------------------------------------------------------------------------------
/notebooks/Random_Number_Generators/4_PRNG_Dev_Random_Directory.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Accessing dev/random"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 16,
 24 |    "metadata": {},
 25 |    "outputs": [
 26 |     {
 27 |      "data": {
 28 |       "text/plain": [
 29 |        "b'\\xdb/T1i\\xdc\\x89\\xe0Q5'"
 30 |       ]
 31 |      },
 32 |      "execution_count": 16,
 33 |      "metadata": {},
 34 |      "output_type": "execute_result"
 35 |     }
 36 |    ],
 37 |    "source": [
 38 |     "# Return a bytes object containing random bytes suitable for cryptographic use.\n",
 39 |     "rand = os.urandom(10)\n",
 40 |     "rand"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Function"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 17,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "def bytes2decimal(byte_obj):\n",
 59 |     "    '''Takes bytes object and returns decimal equivalent.'''\n",
 60 |     "    return int.from_bytes(rand, byteorder='big')"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Example"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 18,
 73 |    "metadata": {},
 74 |    "outputs": [
 75 |     {
 76 |      "data": {
 77 |       "text/plain": [
 78 |        "1035071323466485622395189"
 79 |       ]
 80 |      },
 81 |      "execution_count": 18,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "b2d = bytes2decimal(rand)\n",
 88 |     "b2d"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Multiple Examples"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 19,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "b'\\x19\\x10cd\\x99'         107649131673\n",
108 |       "b'\\x19\\xa2\\x7fx\\xa8'      110100445352\n",
109 |       "b'\"T\\xdfM\\xb9'            147452808633\n",
110 |       "b'\\xff\\x85O\\x83\\xcf'      1097453241295\n",
111 |       "b'\\x1d\\xddE\\x9e\\\\'        128266378844\n",
112 |       "b'\\xa1\\x80%\\xf4\\x0b'      693639705611\n",
113 |       "b'#gxPy'                  152059793529\n",
114 |       "b\"\\xfa\\xd5'%\\x87\"         1077317936519\n",
115 |       "b'\\x83\\x06 1t'            562743488884\n",
116 |       "b'D4@I\\x88'               292934404488\n",
117 |       "b'\\xe7\\xffR\\x9d\\x07'      996421049607\n",
118 |       "b'\\xad\\xca\\x8e#\\r'        746427654925\n",
119 |       "b'\\x0bM{\\x8e\\xec'         48544583404\n",
120 |       "b'E\\x9b\\x99\\xc3\\xfe'      298963289086\n",
121 |       "b'\\x98a17O'               654465644367\n",
122 |       "b'\\x1ck\\xa7\\xa7\\xb9'      122065233849\n",
123 |       "b'\\x17g\\x8cS\\x8c'         100521497484\n",
124 |       "b'\\xa9\\xdf=Dh'            729594807400\n",
125 |       "b'\\xfdR%c\\x1c'            1088004907804\n",
126 |       "b'\\x1c\\xf2\\xcd \\x86'      124332613766\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "for _ in range(20):\n",
132 |     "    rand = os.urandom(5)\n",
133 |     "    print('{0:25} {1}'.format(str(rand), bytes2decimal(rand)))"
134 |    ]
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python [conda root]",
140 |    "language": "python",
141 |    "name": "conda-root-py"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.5.4"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 2
158 | }
159 | 


--------------------------------------------------------------------------------
/pkl_files/subset_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df.pkl


--------------------------------------------------------------------------------
/pkl_files/subset_df2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df2.pkl


--------------------------------------------------------------------------------