├── .gitignore ├── LICENSE ├── README.md ├── data └── Create_Higgs_CSV.ipynb ├── notebooks ├── Big_Data │ └── Zeppelin │ │ ├── 00_Whirlwind_Tour_of_Zeppelin.json │ │ ├── 01_File_Based_Data_Sources.json │ │ ├── 03_Spark_Fundamental_Structured_Operations.json │ │ ├── Machine_Learning_Zeppelin.json │ │ ├── Spark_Machine_Learning.json │ │ ├── Spark_Tutorial.json │ │ └── Supervised_Machine_Learning.json ├── Checksum │ └── Longitudinal_Redundancy_Check.ipynb ├── Cryptography │ └── 1_Caesar_Shift.ipynb ├── Hashing │ ├── Birthday_Attack.ipynb │ └── Intro_to_Hashing.ipynb ├── Information_Theory │ ├── Hamming_Distance.ipynb │ └── Stirlings_Approximation_Factorial.ipynb ├── Legacy_Code │ └── Bernoulli_Naive_Bayes.ipynb ├── Machine_Learning │ ├── Algorithms_From_Scratch │ │ ├── Bagging_and_Bootstrapping.ipynb │ │ ├── Bernoulli_Naive_Bayes.ipynb │ │ ├── Cross-Validation.ipynb │ │ ├── Gradient_Descent.ipynb │ │ ├── K-means.ipynb │ │ ├── KNN.ipynb │ │ ├── PCA.ipynb │ │ ├── Train_Test_Split.ipynb │ │ └── Train_Validation_Test_Split.ipynb │ ├── Fourier_Transforms │ │ └── FFT.ipynb │ ├── Supervised_Learning │ │ ├── Advanced_Techniques │ │ │ └── Bagging │ │ │ │ └── Bagging_Simulation.ipynb │ │ ├── Classification │ │ │ ├── EDA │ │ │ │ └── Classification_EDA.ipynb │ │ │ ├── Logistic_Regression │ │ │ │ ├── Logistic_Regression_Sklearn_Example.ipynb │ │ │ │ └── Sigmoid_Function.ipynb │ │ │ └── Support_Vector_Machines │ │ │ │ └── Basics_of_SVMs.ipynb │ │ ├── Model_Selection │ │ │ ├── Feature_Selection.ipynb │ │ │ ├── Model_Tuning_and_Cross_Validation.ipynb │ │ │ └── Train_Test_Split.ipynb │ │ ├── Numerical_Methods │ │ │ └── Gradient_Descent │ │ │ │ └── Linear_Regression_&_Intro_to_GD.ipynb │ │ └── Regression │ │ │ └── Linear_Regression │ │ │ ├── 1_Linear_Regression_101.ipynb │ │ │ ├── 2_Linear_Regression_Metrics.ipynb │ │ │ ├── 3_Linear_Regression_Assumptions_and_Evaluation.ipynb │ │ │ └── 4_Linear_Regression_EDA_and_Residual_Plots.ipynb │ └── Unsupervised_Learning │ │ ├── Clustering │ │ └── Kmeans │ │ │ └── Kmeans_Clustering_w_Sklearn.ipynb │ │ └── SVD │ │ ├── SVD_to_image.ipynb │ │ └── SVD_with_Iris_and_Images.ipynb ├── OS_library │ └── OS_Sandbox.ipynb ├── PyTorch │ └── PyTorch_Intro.ipynb ├── Python │ ├── Coding_Best_Practices │ │ ├── Complexity_&_Big_O.ipynb │ │ ├── Complexity_Practice.ipynb │ │ └── OOP_How_to_Write_a_Class.ipynb │ ├── Demos │ │ ├── ML_101.ipynb │ │ ├── Python_101_Instructor.ipynb │ │ └── Python_101_Student.ipynb │ ├── NumPy │ │ ├── NP_argwhere_isin.ipynb │ │ └── Numpy_concat_append_ravel_mgrid.ipynb │ ├── Programming_Problems │ │ ├── Alphabet_Magnets.ipynb │ │ ├── Bubble_Sort.ipynb │ │ ├── Cryptogram_Puzzle.ipynb │ │ ├── Dot_Product.ipynb │ │ ├── Factorial.ipynb │ │ ├── Fibonacci.ipynb │ │ ├── GCD_Problem.ipynb │ │ ├── Guess_a_Number.ipynb │ │ ├── Moving_Average_Problem.ipynb │ │ ├── OLS_R2_and_adjR2.ipynb │ │ ├── Project_Euler_Problem_8_with_Deque.ipynb │ │ ├── Reverse_String.ipynb │ │ ├── Rotation_Problem.ipynb │ │ └── SSE.ipynb │ ├── Python_Internals │ │ ├── Arrays_vs_Lists.ipynb │ │ ├── Deep_vs_Shallow_Copying.ipynb │ │ ├── Pickling.ipynb │ │ ├── Python_Data_Structure_Comparison.ipynb │ │ ├── Working_with_Bits.ipynb │ │ └── listexp_vs_genexp.ipynb │ ├── Recursion │ │ └── Factorial_&_Fibonacci.ipynb │ └── Visualizations │ │ ├── Boxplots.ipynb │ │ ├── Matplotlib_Tutorial.ipynb │ │ └── Seaborn_Visualizations_&_Data.ipynb ├── Random_Number_Generators │ ├── 1_PRNG_Middle_Square_Method.ipynb │ ├── 2_PRNG_Linear_Congruential_Generator.ipynb │ ├── 3_PRNG_Linear_Feedback_Shift_Register.ipynb │ └── 4_PRNG_Dev_Random_Directory.ipynb └── Statistics │ └── Central_Limit_Theorem.ipynb └── pkl_files ├── subset_df.pkl └── subset_df2.pkl /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 David Ziganto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data_Science_Fundamentals 2 | 3 | **Objective:** to introduce Data Science concepts from both a theoretical and practical standpoint, all in Python. 4 | 5 | **Audience:** ranges from beginner to intermediate. 6 | 7 | **Note:** this is a living repository that will be updated quite frequently. 8 | -------------------------------------------------------------------------------- /data/Create_Higgs_CSV.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "file_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz'\n", 19 | "columns = ['target',\n", 20 | " ' lepton_pT', \n", 21 | " 'lepton_eta', \n", 22 | " 'lepton_phi', \n", 23 | " 'missing_energy_magnitude', \n", 24 | " 'missing_energy_phi', \n", 25 | " 'jet_1_pt', \n", 26 | " 'jet_1_eta', \n", 27 | " 'jet_1_phi', \n", 28 | " 'jet_1_b-tag', \n", 29 | " 'jet_2_pt', \n", 30 | " 'jet_2_eta', \n", 31 | " 'jet_2_phi', \n", 32 | " 'jet_2_b-tag', \n", 33 | " 'jet_3_pt', \n", 34 | " 'jet_3_eta', \n", 35 | " 'jet_3_phi', \n", 36 | " 'jet_3_b-tag', \n", 37 | " 'jet_4_pt', \n", 38 | " 'jet_4_eta', \n", 39 | " 'jet_4_phi', \n", 40 | " 'jet_4_b-tag', \n", 41 | " 'm_jj', \n", 42 | " 'm_jjj', \n", 43 | " 'm_lv', \n", 44 | " 'm_jlv', \n", 45 | " 'm_bb', \n", 46 | " 'm_wbb', \n", 47 | " 'm_wwbb']" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "df = pd.read_csv(file_path, names=columns)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 4, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/html": [ 67 | "
\n", 68 | "\n", 81 | "\n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | "
targetlepton_pTlepton_etalepton_phimissing_energy_magnitudemissing_energy_phijet_1_ptjet_1_etajet_1_phijet_1_b-tag...jet_4_etajet_4_phijet_4_b-tagm_jjm_jjjm_lvm_jlvm_bbm_wbbm_wwbb
01.00.869293-0.6350820.2256900.327470-0.6899930.754202-0.248573-1.0920640.000000...-0.010455-0.0457673.1019611.3537600.9795630.9780760.9200050.7216570.9887510.876678
11.00.9075420.3291470.3594121.497970-0.3130101.095531-0.557525-1.5882302.173076...-1.138930-0.0008190.0000000.3022200.8330480.9857000.9780980.7797320.9923560.798343
21.00.7988351.470639-1.6359750.4537730.4256291.1048751.2823221.3816640.000000...1.1288480.9004610.0000000.9097531.1083300.9856920.9513310.8032520.8659240.780118
30.01.344385-0.8766260.9359131.9920500.8824541.786066-1.646778-0.9423830.000000...-0.678379-1.3603560.0000000.9466521.0287040.9986560.7282810.8692001.0267360.957904
41.01.1050090.3213561.5224010.882808-1.2053490.681466-1.070464-0.9218710.000000...-0.3735660.1130410.0000000.7558561.3610570.9866100.8380851.1332950.8722450.808487
\n", 231 | "

5 rows × 29 columns

\n", 232 | "
" 233 | ], 234 | "text/plain": [ 235 | " target lepton_pT lepton_eta lepton_phi missing_energy_magnitude \\\n", 236 | "0 1.0 0.869293 -0.635082 0.225690 0.327470 \n", 237 | "1 1.0 0.907542 0.329147 0.359412 1.497970 \n", 238 | "2 1.0 0.798835 1.470639 -1.635975 0.453773 \n", 239 | "3 0.0 1.344385 -0.876626 0.935913 1.992050 \n", 240 | "4 1.0 1.105009 0.321356 1.522401 0.882808 \n", 241 | "\n", 242 | " missing_energy_phi jet_1_pt jet_1_eta jet_1_phi jet_1_b-tag ... \\\n", 243 | "0 -0.689993 0.754202 -0.248573 -1.092064 0.000000 ... \n", 244 | "1 -0.313010 1.095531 -0.557525 -1.588230 2.173076 ... \n", 245 | "2 0.425629 1.104875 1.282322 1.381664 0.000000 ... \n", 246 | "3 0.882454 1.786066 -1.646778 -0.942383 0.000000 ... \n", 247 | "4 -1.205349 0.681466 -1.070464 -0.921871 0.000000 ... \n", 248 | "\n", 249 | " jet_4_eta jet_4_phi jet_4_b-tag m_jj m_jjj m_lv m_jlv \\\n", 250 | "0 -0.010455 -0.045767 3.101961 1.353760 0.979563 0.978076 0.920005 \n", 251 | "1 -1.138930 -0.000819 0.000000 0.302220 0.833048 0.985700 0.978098 \n", 252 | "2 1.128848 0.900461 0.000000 0.909753 1.108330 0.985692 0.951331 \n", 253 | "3 -0.678379 -1.360356 0.000000 0.946652 1.028704 0.998656 0.728281 \n", 254 | "4 -0.373566 0.113041 0.000000 0.755856 1.361057 0.986610 0.838085 \n", 255 | "\n", 256 | " m_bb m_wbb m_wwbb \n", 257 | "0 0.721657 0.988751 0.876678 \n", 258 | "1 0.779732 0.992356 0.798343 \n", 259 | "2 0.803252 0.865924 0.780118 \n", 260 | "3 0.869200 1.026736 0.957904 \n", 261 | "4 1.133295 0.872245 0.808487 \n", 262 | "\n", 263 | "[5 rows x 29 columns]" 264 | ] 265 | }, 266 | "execution_count": 4, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "df.head()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 5, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "name": "stdout", 282 | "output_type": "stream", 283 | "text": [ 284 | "\n", 285 | "RangeIndex: 11000000 entries, 0 to 10999999\n", 286 | "Data columns (total 29 columns):\n", 287 | "target float64\n", 288 | " lepton_pT float64\n", 289 | "lepton_eta float64\n", 290 | "lepton_phi float64\n", 291 | "missing_energy_magnitude float64\n", 292 | "missing_energy_phi float64\n", 293 | "jet_1_pt float64\n", 294 | "jet_1_eta float64\n", 295 | "jet_1_phi float64\n", 296 | "jet_1_b-tag float64\n", 297 | "jet_2_pt float64\n", 298 | "jet_2_eta float64\n", 299 | "jet_2_phi float64\n", 300 | "jet_2_b-tag float64\n", 301 | "jet_3_pt float64\n", 302 | "jet_3_eta float64\n", 303 | "jet_3_phi float64\n", 304 | "jet_3_b-tag float64\n", 305 | "jet_4_pt float64\n", 306 | "jet_4_eta float64\n", 307 | "jet_4_phi float64\n", 308 | "jet_4_b-tag float64\n", 309 | "m_jj float64\n", 310 | "m_jjj float64\n", 311 | "m_lv float64\n", 312 | "m_jlv float64\n", 313 | "m_bb float64\n", 314 | "m_wbb float64\n", 315 | "m_wwbb float64\n", 316 | "dtypes: float64(29)\n", 317 | "memory usage: 2.4 GB\n" 318 | ] 319 | } 320 | ], 321 | "source": [ 322 | "df.info()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 11, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "save_path = '/Users/davidziganto/Repositories/Data_Science_Fundamentals/data/'\n", 332 | "file_name = 'higgs.csv'" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 12, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "df.to_csv(save_path + file_name)" 342 | ] 343 | } 344 | ], 345 | "metadata": { 346 | "kernelspec": { 347 | "display_name": "Python [conda root]", 348 | "language": "python", 349 | "name": "conda-root-py" 350 | }, 351 | "language_info": { 352 | "codemirror_mode": { 353 | "name": "ipython", 354 | "version": 3 355 | }, 356 | "file_extension": ".py", 357 | "mimetype": "text/x-python", 358 | "name": "python", 359 | "nbconvert_exporter": "python", 360 | "pygments_lexer": "ipython3", 361 | "version": "3.5.4" 362 | } 363 | }, 364 | "nbformat": 4, 365 | "nbformat_minor": 2 366 | } 367 | -------------------------------------------------------------------------------- /notebooks/Big_Data/Zeppelin/Machine_Learning_Zeppelin.json: -------------------------------------------------------------------------------- 1 | {"paragraphs":[{"title":"Introduction","text":"%md\nThis notebook is a simple tutorial on how to use numpy, pandas, and Spark's machine learning library to do fun stuff.","user":"admin","dateUpdated":"2017-11-09T14:04:30-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"markdown","editOnDblClick":true},"editorMode":"ace/mode/markdown","title":true,"editorHide":true,"tableHide":false},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255925569_1694489231","id":"20171109-133205_1077344504","dateCreated":"2017-11-09T13:32:05-0600","dateStarted":"2017-11-09T14:04:30-0600","dateFinished":"2017-11-09T14:04:32-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"focus":true,"$$hashKey":"object:1692"},{"title":"Imports","text":"%pyspark\n\nimport pandas as pd\nimport numpy as np","user":"admin","dateUpdated":"2017-11-09T14:04:04-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python","editOnDblClick":false},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255243163_715367343","id":"20171109-132043_872043916","dateCreated":"2017-11-09T13:20:43-0600","dateStarted":"2017-11-09T14:04:03-0600","dateFinished":"2017-11-09T14:04:03-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1693"},{"text":"%pyspark\n\ndf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', delimiter=';')\ndf.head()","user":"admin","dateUpdated":"2017-11-09T13:25:33-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255257120_2050091925","id":"20171109-132057_845151643","dateCreated":"2017-11-09T13:20:57-0600","dateStarted":"2017-11-09T13:25:33-0600","dateFinished":"2017-11-09T13:25:34-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1694"},{"title":"Rearrange Rows","text":"%pyspark\ndf = df[['quality', 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']]","user":"admin","dateUpdated":"2017-11-09T14:53:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510260622609_-2020735401","id":"20171109-145022_1858055191","dateCreated":"2017-11-09T14:50:22-0600","dateStarted":"2017-11-09T14:53:15-0600","dateFinished":"2017-11-09T14:53:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1695"},{"text":"%pyspark\ntype(df)","user":"admin","dateUpdated":"2017-11-09T14:53:18-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258090492_534582379","id":"20171109-140810_1298767575","dateCreated":"2017-11-09T14:08:10-0600","dateStarted":"2017-11-09T14:53:18-0600","dateFinished":"2017-11-09T14:53:18-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1696"},{"title":"Create Spark DF","text":"%pyspark\nddf = spark.createDataFrame(df)","user":"admin","dateUpdated":"2017-11-09T15:20:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255281754_1175588324","id":"20171109-132121_1872338822","dateCreated":"2017-11-09T13:21:21-0600","dateStarted":"2017-11-09T15:20:53-0600","dateFinished":"2017-11-09T15:20:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1697"},{"text":"%pyspark\ntype(ddf)","user":"admin","dateUpdated":"2017-11-09T15:20:55-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258074988_2093954347","id":"20171109-140754_1396222594","dateCreated":"2017-11-09T14:07:54-0600","dateStarted":"2017-11-09T15:20:55-0600","dateFinished":"2017-11-09T15:20:55-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1698"},{"title":"Number of Rows","text":"%pyspark\nddf.count()","user":"admin","dateUpdated":"2017-11-09T15:20:56-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259453749_-200329059","id":"20171109-143053_1622356792","dateCreated":"2017-11-09T14:30:53-0600","dateStarted":"2017-11-09T15:20:56-0600","dateFinished":"2017-11-09T15:20:56-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1699"},{"title":"Quick Peek","text":"%pyspark\nddf.first()","user":"admin","dateUpdated":"2017-11-09T15:20:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255689961_508972245","id":"20171109-132809_629688564","dateCreated":"2017-11-09T13:28:09-0600","dateStarted":"2017-11-09T15:20:58-0600","dateFinished":"2017-11-09T15:20:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1700"},{"title":"Quick Peek 2","text":"%pyspark\nddf.take(5)","user":"admin","dateUpdated":"2017-11-09T15:21:00-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510255663529_-466628372","id":"20171109-132743_239942099","dateCreated":"2017-11-09T13:27:43-0600","dateStarted":"2017-11-09T15:21:00-0600","dateFinished":"2017-11-09T15:21:00-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1701"},{"title":"Better Format","text":"%pyspark\nddf.show()","user":"admin","dateUpdated":"2017-11-09T15:21:02-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python","title":true},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258522386_-981731649","id":"20171109-141522_1465708413","dateCreated":"2017-11-09T14:15:22-0600","dateStarted":"2017-11-09T15:21:02-0600","dateFinished":"2017-11-09T15:21:02-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1702"},{"text":"%pyspark\nddf.columns","user":"admin","dateUpdated":"2017-11-09T15:21:08-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258655977_689079582","id":"20171109-141735_398438825","dateCreated":"2017-11-09T14:17:35-0600","dateStarted":"2017-11-09T15:21:08-0600","dateFinished":"2017-11-09T15:21:08-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1703"},{"text":"%pyspark\ntrain_data, test_data = ddf.randomSplit([.8,.2],seed=1234)","user":"admin","dateUpdated":"2017-11-09T15:21:15-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510258850889_-421449118","id":"20171109-142050_1844692829","dateCreated":"2017-11-09T14:20:50-0600","dateStarted":"2017-11-09T15:21:15-0600","dateFinished":"2017-11-09T15:21:15-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1704"},{"text":"%pyspark\nprint('train:', train_data.count())\nprint('test:', test_data.count())","user":"admin","dateUpdated":"2017-11-09T15:21:17-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259459848_159811236","id":"20171109-143059_355393844","dateCreated":"2017-11-09T14:30:59-0600","dateStarted":"2017-11-09T15:21:17-0600","dateFinished":"2017-11-09T15:21:17-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1705"},{"text":"%pyspark\nfrom pyspark.ml.feature import VectorAssembler\nfeature_assembler = VectorAssembler(\n inputCols=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol'],\n outputCol='features')","user":"admin","dateUpdated":"2017-11-09T15:29:54-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510259638946_984372457","id":"20171109-143358_1110597847","dateCreated":"2017-11-09T14:33:58-0600","dateStarted":"2017-11-09T15:29:54-0600","dateFinished":"2017-11-09T15:29:54-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1706"},{"text":"%pyspark\ntrain.take(1)","user":"admin","dateUpdated":"2017-11-09T15:29:58-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262666092_1835562309","id":"20171109-152426_2092064614","dateCreated":"2017-11-09T15:24:26-0600","dateStarted":"2017-11-09T15:29:58-0600","dateFinished":"2017-11-09T15:29:58-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1707"},{"text":"%pyspark\nfrom pyspark.ml.classification import LogisticRegression\nlr = LogisticRegression(regParam=0.1, elasticNetParam=1.0, labelCol='quality', family=\"multinomial\")\nlr_model = lr.fit(train)","user":"admin","dateUpdated":"2017-11-09T15:35:53-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510261136989_1594497724","id":"20171109-145856_1022628746","dateCreated":"2017-11-09T14:58:56-0600","dateStarted":"2017-11-09T15:35:53-0600","dateFinished":"2017-11-09T15:35:59-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1708"},{"text":"%pyspark\nprint(\"Multinomial coefficients:\\n \" + str(lr_model.coefficientMatrix))\nprint(\"Multinomial intercepts:\\n \" + str(lr_model.interceptVector))\n","user":"admin","dateUpdated":"2017-11-09T15:36:40-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510262678601_-2031148813","id":"20171109-152438_1562383421","dateCreated":"2017-11-09T15:24:38-0600","dateStarted":"2017-11-09T15:36:40-0600","dateFinished":"2017-11-09T15:36:40-0600","status":"FINISHED","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1709"},{"text":"%pyspark\n","user":"admin","dateUpdated":"2017-11-09T15:33:22-0600","config":{"colWidth":12,"enabled":true,"results":{},"editorSetting":{"language":"python"},"editorMode":"ace/mode/python"},"settings":{"params":{},"forms":{}},"apps":[],"jobName":"paragraph_1510263202433_2105064972","id":"20171109-153322_151101125","dateCreated":"2017-11-09T15:33:22-0600","status":"READY","errorMessage":"","progressUpdateIntervalMs":500,"$$hashKey":"object:1710"}],"name":"Machine Learning","id":"2CYTR3UEA","angularObjects":{"2CJRPMHPD:shared_process":[],"2CJSHBYC6:shared_process":[],"2CKSW7BWY:shared_process":[],"2CGWQ9B37:shared_process":[],"2CGR5K95Z:shared_process":[],"2CK2ZPKQ4:shared_process":[],"2CJJMSU71:shared_process":[],"2CGZPE6U6:shared_process":[],"2CG9T16R1:shared_process":[],"2CG86WNKX:shared_process":[],"2CKE7UE12:shared_process":[],"2CGJYANVS:shared_process":[],"2CJSRKFPY:shared_process":[],"2CHBR99H6:shared_process":[],"2CFTTN2E2:shared_process":[],"2CGADMUUE:shared_process":[],"2CKJKMCM2:shared_process":[],"2CKS133YC:shared_process":[],"2CHMME5HT:shared_process":[]},"config":{"looknfeel":"default","personalizedMode":"false"},"info":{}} -------------------------------------------------------------------------------- /notebooks/Checksum/Longitudinal_Redundancy_Check.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Convert Int to Binary" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# decimal form\n", 19 | "a = 1\n", 20 | "b = 2\n", 21 | "c = 10\n", 22 | "d = 100" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def dec2bin(num):\n", 34 | " '''converts decimal number into binary representation'''\n", 35 | " return format(num, 'b')" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "0 0\n", 48 | "1 1\n", 49 | "2 10\n", 50 | "3 11\n", 51 | "4 100\n", 52 | "5 101\n", 53 | "6 110\n", 54 | "7 111\n", 55 | "8 1000\n", 56 | "9 1001\n", 57 | "10 1010\n", 58 | "11 1011\n", 59 | "12 1100\n", 60 | "13 1101\n", 61 | "14 1110\n", 62 | "15 1111\n", 63 | "16 10000\n", 64 | "17 10001\n", 65 | "18 10010\n", 66 | "19 10011\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "# binary form\n", 72 | "for i in range(20):\n", 73 | " print(i, dec2bin(i))" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "'1'" 85 | ] 86 | }, 87 | "execution_count": 4, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "dec2bin(a)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "'11'" 105 | ] 106 | }, 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "dec2bin(3) " 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "text/plain": [ 124 | "'1010'" 125 | ] 126 | }, 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "dec2bin(10)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "'101000'" 145 | ] 146 | }, 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "# Return x with the bits shifted to the left by y places: x << y\n", 154 | "dec2bin(10 << 2)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 8, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "'101'" 166 | ] 167 | }, 168 | "execution_count": 8, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "# Return x with the bits shifted to the right by y places: x >> y\n", 175 | "dec2bin(10 >> 1)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "b1: 1010\n", 188 | "b2: 1100\n", 189 | "\n", 190 | "b1&b2: 1000\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "# Bitwise \"and\"\n", 196 | "b1 = dec2bin(10)\n", 197 | "b2 = dec2bin(12)\n", 198 | "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n", 199 | "print('b1&b2:', dec2bin(10 & 12))" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 10, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "b1: 1010\n", 212 | "b2: 1100\n", 213 | "\n", 214 | "b1|b2: 1110\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "# Bitwise \"or\"\n", 220 | "print('b1: {}\\nb2: {}\\n'.format(b1, b2))\n", 221 | "print('b1|b2:', dec2bin(10 | 12))" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 11, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "'1010'" 233 | ] 234 | }, 235 | "execution_count": 11, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "# Complement of x -- FIX\n", 242 | "dec2bin(10)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 12, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "'1010'" 254 | ] 255 | }, 256 | "execution_count": 12, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "dec2bin(10)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 13, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "b1: 10010\n", 275 | "b2: 10100\n", 276 | "\n", 277 | "b1^b2: 110\n" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "# Bitwise xor\n", 283 | "print('b1: {}\\nb2: {}\\n'.format(dec2bin(18), dec2bin(20)))\n", 284 | "print('b1^b2:', dec2bin(18 ^ 20))" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "## Longitudinal Redundancy Check (LRC)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 14, 297 | "metadata": { 298 | "collapsed": true 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "def lrc(block1, block2):\n", 303 | " '''LRC that returns the parity bit check for 8 bit block.\n", 304 | " \n", 305 | " Input: \n", 306 | " decimal (integer(s) w/option for bitwise operations)\n", 307 | " Output:\n", 308 | " binary representation where all zeros represents perfect parity match\n", 309 | " '''\n", 310 | " assert block1 < 255, \"block 1 must have int value less than 255.\"\n", 311 | " assert block2 < 255, \"block 2 must have int value less than 255.\"\n", 312 | " \n", 313 | " xor = format(block1 ^ block2, 'b')\n", 314 | " delta = 8 - len(xor)\n", 315 | " if delta < 8:\n", 316 | " return '0'*delta + xor\n", 317 | " else:\n", 318 | " return xor" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 15, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "'00000000'" 330 | ] 331 | }, 332 | "execution_count": 15, 333 | "metadata": {}, 334 | "output_type": "execute_result" 335 | } 336 | ], 337 | "source": [ 338 | "# perfect match: should return 0's\n", 339 | "lrc(100, 100)" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 16, 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "data": { 349 | "text/plain": [ 350 | "'00001101'" 351 | ] 352 | }, 353 | "execution_count": 16, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "# imperfect match: should return 1's wherever parity mismatch\n", 360 | "lrc(100, 105)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 17, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "name": "stdout", 370 | "output_type": "stream", 371 | "text": [ 372 | "item1: 1100100\n", 373 | "item2: 1101001\n", 374 | "------ -------\n", 375 | " xor: 0001101\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "tup = (('item1:', dec2bin(100)), \n", 381 | " ('item2:', dec2bin(105)),\n", 382 | " ('-'*6, '-'*7),\n", 383 | " (' xor:', lrc(100, 105)[1:]))\n", 384 | "for item in tup:\n", 385 | " print(item[0], item[1])" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 18, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "True" 397 | ] 398 | }, 399 | "execution_count": 18, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "lrc(100, 100) == '00000000'" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 19, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "False" 417 | ] 418 | }, 419 | "execution_count": 19, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "lrc(100, 105) == '00000000'" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "## Problem: Bit Flips\n", 433 | "\n", 434 | "LRC will catch bit flips so long as they occur at different locations. However, if bits flip in the exact same position of each block, then LRC will not catch it, whereas other checksums will." 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 20, 440 | "metadata": {}, 441 | "outputs": [ 442 | { 443 | "data": { 444 | "text/plain": [ 445 | "'1100100'" 446 | ] 447 | }, 448 | "execution_count": 20, 449 | "metadata": {}, 450 | "output_type": "execute_result" 451 | } 452 | ], 453 | "source": [ 454 | "dec2bin(100)" 455 | ] 456 | }, 457 | { 458 | "cell_type": "code", 459 | "execution_count": 21, 460 | "metadata": {}, 461 | "outputs": [ 462 | { 463 | "data": { 464 | "text/plain": [ 465 | "'1100101'" 466 | ] 467 | }, 468 | "execution_count": 21, 469 | "metadata": {}, 470 | "output_type": "execute_result" 471 | } 472 | ], 473 | "source": [ 474 | "dec2bin(101)" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 22, 480 | "metadata": {}, 481 | "outputs": [ 482 | { 483 | "data": { 484 | "text/plain": [ 485 | "'1100100'" 486 | ] 487 | }, 488 | "execution_count": 22, 489 | "metadata": {}, 490 | "output_type": "execute_result" 491 | } 492 | ], 493 | "source": [ 494 | "# simulate flipping least significant bit\n", 495 | "bitflip101 = dec2bin(101)[:6] + '0'\n", 496 | "bitflip101" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 23, 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "data": { 506 | "text/plain": [ 507 | "100" 508 | ] 509 | }, 510 | "execution_count": 23, 511 | "metadata": {}, 512 | "output_type": "execute_result" 513 | } 514 | ], 515 | "source": [ 516 | "# convert binary bit flip to decimal\n", 517 | "dec_bitflip101 = int(bitflip101, 2)\n", 518 | "dec_bitflip101" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": 24, 524 | "metadata": {}, 525 | "outputs": [ 526 | { 527 | "data": { 528 | "text/plain": [ 529 | "'00000000'" 530 | ] 531 | }, 532 | "execution_count": 24, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "# check\n", 539 | "lrc(100, dec_bitflip101)" 540 | ] 541 | } 542 | ], 543 | "metadata": { 544 | "kernelspec": { 545 | "display_name": "Python [conda root]", 546 | "language": "python", 547 | "name": "conda-root-py" 548 | }, 549 | "language_info": { 550 | "codemirror_mode": { 551 | "name": "ipython", 552 | "version": 3 553 | }, 554 | "file_extension": ".py", 555 | "mimetype": "text/x-python", 556 | "name": "python", 557 | "nbconvert_exporter": "python", 558 | "pygments_lexer": "ipython3", 559 | "version": "3.5.4" 560 | } 561 | }, 562 | "nbformat": 4, 563 | "nbformat_minor": 2 564 | } 565 | -------------------------------------------------------------------------------- /notebooks/Cryptography/1_Caesar_Shift.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Caesar Cipher\n", 8 | "\n", 9 | "This notebook introduces the Caesar cipher. \n", 10 | "\n", 11 | "A Caesar cipher is one of the easiest ways to encrypt text. It's also extremely easy to crack. But for the purposes of introducing ciphers, it's a great place to start. \n", 12 | "\n", 13 | "The setup is pretty simple. You start with a message that you want to codify so someone else cannot read it. Say the message is **\"I hope you cannot read this.\"**. This is called the *plaintext*. Now we need to apply some algorithm to our text so the output is incoherent. For example, the output may be **O nuvk eua igttuz xkgj znoy.\"**. What we did in this case is shift every letter six places to the right: A becomes G, B becomes H, Z becomes F, and so on. \n", 14 | "\n", 15 | "> **plaintext:** I hope you cannot read this. \n", 16 | "> **ciphertext:** O nuvk eua igttuz xkgj znoy.\n", 17 | "\n", 18 | "Right away we can see this particular setup is very easy to crack. It includes capitalization and the words are clearly split. By converting all the text to lowercase and removing all spacees, we can make it a bit more difficult. However, there are only 25 different ways to shift the letters. That means a brute force attack, or trying all possibilities, is trivial.\n", 19 | "\n", 20 | "Let's see what this looks like in code. We'll create a class called *CaesarCipher* that can encrypt or decrypt text. " 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "---" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "class CaesarCipher:\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " def _clean_text(self, text):\n", 41 | " '''converts text to lowercase, removes spaces, and removes punctuation.'''\n", 42 | " import string\n", 43 | " assert type(text) == str, 'input needs to be a string!'\n", 44 | " text = text.lower()\n", 45 | " text = text.replace(' ', '')\n", 46 | " self.clean_text = \"\".join(character for character in text \n", 47 | " if character not in string.punctuation)\n", 48 | " return self.clean_text\n", 49 | " \n", 50 | " \n", 51 | " def _string2characters(self, text):\n", 52 | " '''converts a string to individual characters.'''\n", 53 | " assert type(text) == str, 'input needs to be a string!'\n", 54 | " self.str2char = list(text)\n", 55 | " return self.str2char\n", 56 | " \n", 57 | " \n", 58 | " def _chars2nums(self, characters):\n", 59 | " '''converts individual characters to integers.'''\n", 60 | " assert type(characters) == list, 'input needs to be a list of characters!'\n", 61 | " codebook = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7, 'i':8, 'j':9,\n", 62 | " 'k':10, 'l':11, 'm':12, 'n':13, 'o':14, 'p':15, 'q':16, 'r':17, 's':18,\n", 63 | " 't':19, 'u':20, 'v':21, 'w':22, 'x':23, 'y':24, 'z':25}\n", 64 | " for i, char in enumerate(characters):\n", 65 | " try:\n", 66 | " characters[i] = codebook[char]\n", 67 | " except:\n", 68 | " pass\n", 69 | " self.char2num = characters\n", 70 | " return self.char2num\n", 71 | " \n", 72 | " \n", 73 | " def _nums2chars(self, numbers):\n", 74 | " '''converts individual integers to characters .'''\n", 75 | " assert type(numbers) == list, 'input needs to be a list of numbers!'\n", 76 | " codebook = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h', 8:'i', 9:'j',\n", 77 | " 10:'k', 11:'l', 12:'m', 13:'n', 14:'o', 15:'p', 16:'q', 17:'r', 18:'s',\n", 78 | " 19:'t', 20:'u', 21:'v', 22:'w', 23:'x', 24:'y', 25:'z'}\n", 79 | " for i, num in enumerate(numbers):\n", 80 | " try:\n", 81 | " numbers[i] = codebook[num]\n", 82 | " except:\n", 83 | " pass\n", 84 | " self.num2chars = numbers\n", 85 | " return self.num2chars\n", 86 | " \n", 87 | " \n", 88 | " def _preprocessing(self, text):\n", 89 | " ''''''\n", 90 | " clean_text = self._clean_text(text)\n", 91 | " list_of_chars = self._string2characters(clean_text)\n", 92 | " list_of_nums = self._chars2nums(list_of_chars)\n", 93 | " return list_of_nums\n", 94 | " \n", 95 | " def encrypt(self, text, shift=3):\n", 96 | " '''return text that is shifted according to user's input.'''\n", 97 | " import numpy as np\n", 98 | " preprocess = self._preprocessing(text)\n", 99 | " nums_shifted = list((np.array(preprocess) + shift) % 26)\n", 100 | " return ''.join(self._nums2chars(nums_shifted))\n", 101 | " \n", 102 | " def decrypt(self, text, shift=3):\n", 103 | " '''returns text shifted by user-defined shift length.'''\n", 104 | " import numpy as np\n", 105 | " preprocess = self._preprocessing(text)\n", 106 | " nums = self._chars2nums(preprocess)\n", 107 | " num_shift = list((np.array(nums) - shift) % 26)\n", 108 | " return ''.join(self._nums2chars(num_shift))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "---" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 2, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "cc = CaesarCipher()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 3, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "'onuvkeuaigttuzxkgjznoy'" 136 | ] 137 | }, 138 | "execution_count": 3, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "# example\n", 145 | "cc.encrypt('I hope you cannot read this.', shift=6)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "---" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 4, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "'the QuIcK brown fox jumps over the lazy dog!'" 164 | ] 165 | }, 166 | "execution_count": 4, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "text = 'the QuIcK brown fox jumps over the lazy dog!'\n", 173 | "text" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 5, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "'ymjvznhpgwtbsktcozruxtajwymjqfeditl'" 185 | ] 186 | }, 187 | "execution_count": 5, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "# show one encryption\n", 194 | "encrypted = cc.encrypt(text, shift=5)\n", 195 | "encrypted" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 6, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n", 208 | "vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n", 209 | "wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n", 210 | "xliuymgofvsarjsbnyqtwszivxlipedchsk\n", 211 | "ymjvznhpgwtbsktcozruxtajwymjqfeditl\n", 212 | "znkwaoiqhxuctludpasvyubkxznkrgfejum\n", 213 | "aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n", 214 | "bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n", 215 | "cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n", 216 | "droaesmulbygxpyhtewzcyfobdrovkjinyq\n", 217 | "espbftnvmczhyqziufxadzgpcespwlkjozr\n", 218 | "ftqcguowndaizrajvgybeahqdftqxmlkpas\n", 219 | "gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n", 220 | "hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n", 221 | "iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n", 222 | "jxugkysarhemdvenzkcfieluhjxubqpotew\n", 223 | "kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n", 224 | "lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n", 225 | "maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n", 226 | "nbykocwevliqhzirdogjmipylnbyfutsxia\n", 227 | "oczlpdxfwmjriajsephknjqzmoczgvutyjb\n", 228 | "pdamqeygxnksjbktfqilokranpdahwvuzkc\n", 229 | "qebnrfzhyoltkclugrjmplsboqebixwvald\n", 230 | "rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n", 231 | "sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "# show all encryption possibilities\n", 237 | "for i in range(1,26):\n", 238 | " print(cc.encrypt(text, shift=i))" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 7, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "shift 1: xliuymgofvsarjsbnyqtwszivxlipedchsk\n", 251 | "\n", 252 | "shift 2: wkhtxlfneurzqiramxpsvryhuwkhodcbgrj\n", 253 | "\n", 254 | "shift 3: vjgswkemdtqyphqzlworuqxgtvjgncbafqi\n", 255 | "\n", 256 | "shift 4: uifrvjdlcspxogpykvnqtpwfsuifmbazeph\n", 257 | "\n", 258 | "shift 5: thequickbrownfoxjumpsoverthelazydog\n", 259 | "\n", 260 | "shift 6: sgdpthbjaqnvmenwitlornudqsgdkzyxcnf\n", 261 | "\n", 262 | "shift 7: rfcosgaizpmuldmvhsknqmtcprfcjyxwbme\n", 263 | "\n", 264 | "shift 8: qebnrfzhyoltkclugrjmplsboqebixwvald\n", 265 | "\n", 266 | "shift 9: pdamqeygxnksjbktfqilokranpdahwvuzkc\n", 267 | "\n", 268 | "shift10: oczlpdxfwmjriajsephknjqzmoczgvutyjb\n", 269 | "\n", 270 | "shift11: nbykocwevliqhzirdogjmipylnbyfutsxia\n", 271 | "\n", 272 | "shift12: maxjnbvdukhpgyhqcnfilhoxkmaxetsrwhz\n", 273 | "\n", 274 | "shift13: lzwimauctjgofxgpbmehkgnwjlzwdsrqvgy\n", 275 | "\n", 276 | "shift14: kyvhlztbsifnewfoaldgjfmvikyvcrqpufx\n", 277 | "\n", 278 | "shift15: jxugkysarhemdvenzkcfieluhjxubqpotew\n", 279 | "\n", 280 | "shift16: iwtfjxrzqgdlcudmyjbehdktgiwtaponsdv\n", 281 | "\n", 282 | "shift17: hvseiwqypfckbtclxiadgcjsfhvszonmrcu\n", 283 | "\n", 284 | "shift18: gurdhvpxoebjasbkwhzcfbiregurynmlqbt\n", 285 | "\n", 286 | "shift19: ftqcguowndaizrajvgybeahqdftqxmlkpas\n", 287 | "\n", 288 | "shift20: espbftnvmczhyqziufxadzgpcespwlkjozr\n", 289 | "\n", 290 | "shift21: droaesmulbygxpyhtewzcyfobdrovkjinyq\n", 291 | "\n", 292 | "shift22: cqnzdrltkaxfwoxgsdvybxenacqnujihmxp\n", 293 | "\n", 294 | "shift23: bpmycqksjzwevnwfrcuxawdmzbpmtihglwo\n", 295 | "\n", 296 | "shift24: aolxbpjriyvdumveqbtwzvclyaolshgfkvn\n", 297 | "\n", 298 | "shift25: znkwaoiqhxuctludpasvyubkxznkrgfejum\n", 299 | "\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "# show all decryption possibilities\n", 305 | "for i in range(1,26):\n", 306 | " print('shift{:2}: {}'.format(i, cc.decrypt(encrypted, shift=i)))\n", 307 | " print('')" 308 | ] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "Python [conda root]", 314 | "language": "python", 315 | "name": "conda-root-py" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.5.4" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 2 332 | } 333 | -------------------------------------------------------------------------------- /notebooks/Hashing/Birthday_Attack.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "According to the [Birthday attack](https://en.wikipedia.org/wiki/Birthday_attack) article on Wikipedia:\n", 8 | ">A birthday attack is a type of cryptographic attack that exploits the mathematics behind the birthday problem in probability theory. This attack can be used to abuse communication between two or more parties. The attack depends on the higher likelihood of collisions found between random attack attempts and a fixed degree of permutations (pigeonholes). With a birthday attack, it is possible to find a collision of a hash function in $\\sqrt{2^{n}}$=$2^{n/2}$ with $2^{n}$ being the classical preimage resistance security." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Given a function *f*, the goal of the attack is to find two different inputs *$x_{1}$*, *$x_{2}$* such that f($x_{1}$)=f($x_{2}$). Such a pair $x_{1}$, $x_{2}$ is called a collision. The method used to find a collision is simply to evaluate the function *f* for different input values that may be chosen randomly or pseudorandomly until the same result is found more than once. Because of the birthday problem, this method can be rather efficient. Specifically, if a function f(x) yields any of *H* different outputs with equal probability and *H* is sufficiently large, then we expect to obtain a pair of different arguments $x_{1}$ and $x_{2}$ with f($x_{1}$) = f($x_{2}$) after evaluating the function for about $1.25{\\sqrt {H}}$ different arguments on average." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "def hasher(number, modulus):\n", 27 | " '''simply hash function'''\n", 28 | " return number % modulus" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "from functools import partial\n", 40 | "\n", 41 | "# set modulus for all examples\n", 42 | "myhash = partial(hasher, modulus=47)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [ 53 | "from math import log1p, sqrt\n", 54 | "\n", 55 | "def birthday(probability_exponent, bits):\n", 56 | " '''probability_exponent: desired probability of random collision'''\n", 57 | " probability = 10. ** probability_exponent\n", 58 | " outputs = 2. ** bits\n", 59 | " return sqrt(2. * outputs * -log1p(-probability))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "4.536858806263531" 71 | ] 72 | }, 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "birthday(-2, 10)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 6, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "array([868, 988, 838, 285, 906, 485, 429, 386, 325, 923])" 91 | ] 92 | }, 93 | "execution_count": 6, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "import numpy as np\n", 100 | "\n", 101 | "integers = np.random.randint(low=1, high=1000, size=10)\n", 102 | "integers" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 7, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "Counter({1: 1, 3: 1, 6: 1, 10: 1, 13: 1, 15: 1, 22: 1, 30: 1, 39: 1, 43: 1})" 114 | ] 115 | }, 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "from collections import Counter\n", 123 | "\n", 124 | "hashes = [myhash(integer) for integer in integers]\n", 125 | "Counter(hashes)" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python [conda root]", 132 | "language": "python", 133 | "name": "conda-root-py" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.5.4" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 2 150 | } 151 | -------------------------------------------------------------------------------- /notebooks/Information_Theory/Hamming_Distance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "s1 = 'string'\n", 12 | "s2 = 'stirng'\n", 13 | "s3 = 'stirna'" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "---" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Functions" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "def hamming_distance(string1, string2):\n", 39 | " '''calculate the hamming distance of two strings'''\n", 40 | " assert len(string1) == len(string2), \"strings must be the same length\"\n", 41 | " distance = 0\n", 42 | " for i, elem in enumerate(string1):\n", 43 | " if elem != string2[i]:\n", 44 | " distance += 1\n", 45 | " return distance" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def hamming_distance2(string1, string2):\n", 57 | " '''calculate the hamming distance of two strings (more Pythonic)'''\n", 58 | " assert len(string1) == len(string2), \"strings must be the same length\"\n", 59 | " return sum(elem1 != elem2 for elem1, elem2 in zip(string1, string2))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "---" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "2" 78 | ] 79 | }, 80 | "execution_count": 4, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "hamming_distance(s1, s2)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "2" 98 | ] 99 | }, 100 | "execution_count": 5, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "hamming_distance2(s1, s2)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "---" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "text/plain": [ 124 | "1" 125 | ] 126 | }, 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "output_type": "execute_result" 130 | } 131 | ], 132 | "source": [ 133 | "hamming_distance(s2, s3)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 7, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "1" 145 | ] 146 | }, 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "hamming_distance2(s2, s3)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "---" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 8, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "data": { 170 | "text/plain": [ 171 | "3" 172 | ] 173 | }, 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "hamming_distance(s1, s3)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 9, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "3" 192 | ] 193 | }, 194 | "execution_count": 9, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "hamming_distance2(s1, s3)" 201 | ] 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "Python [conda root]", 207 | "language": "python", 208 | "name": "conda-root-py" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.5.4" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /notebooks/Legacy_Code/Bernoulli_Naive_Bayes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "def create_dataset(n_rows=10, n_cols=2, prob=(0.5, 0.5), seed=None):\n", 19 | " '''\n", 20 | " creates a 2D numpy array with 0s and 1s for columns\n", 21 | " \n", 22 | " INPUT:\n", 23 | " n_rows = (int) number of rows in dataset\n", 24 | " n_cols = (int) number of columns starting with target followed by features\n", 25 | " prob = (tuple) probability of success for target, feature 1, feature 2, ..., feature n\n", 26 | " OUTPUT:\n", 27 | " dataset (numpy array)\n", 28 | " '''\n", 29 | " # error handling\n", 30 | " assert type(n_rows) == int, 'n_rows must be an integer'\n", 31 | " assert type(n_cols) == int, 'n_cols must be an integer'\n", 32 | " assert type(prob) == tuple, 'prob must be a tuple of probabilities'\n", 33 | " assert len(prob) == n_cols, 'tuple must contain probabilities for each n_col'\n", 34 | " assert type(seed) == int, 'seed must be an integer'\n", 35 | " \n", 36 | " # reproducibility\n", 37 | " if seed:\n", 38 | " np.random.seed(seed)\n", 39 | " \n", 40 | " # create dataset\n", 41 | " for i, p in enumerate(prob):\n", 42 | " if i < 1:\n", 43 | " dataset = np.random.binomial(n=1, p=prob[i], size=n_rows)\n", 44 | " else:\n", 45 | " column = np.random.binomial(n=1, p=prob[i], size=n_rows)\n", 46 | " dataset = np.c_[dataset, column]\n", 47 | " \n", 48 | " return dataset" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "probabilities = (0.5, 0.5, 0.5, 0.5)\n", 58 | "data = create_dataset(n_rows=4, n_cols=4, prob=probabilities, seed=42)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "array([[0, 0, 1, 1],\n", 70 | " [1, 0, 1, 0],\n", 71 | " [1, 0, 0, 0],\n", 72 | " [1, 1, 1, 0]])" 73 | ] 74 | }, 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "data" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "X = data[:, 1:]\n", 91 | "y = data[:, 0]" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "array([[0, 1, 1],\n", 103 | " [0, 1, 0],\n", 104 | " [0, 0, 0],\n", 105 | " [1, 1, 0]])" 106 | ] 107 | }, 108 | "execution_count": 6, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "X" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 7, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "class BernoulliNB:\n", 124 | " \n", 125 | " \n", 126 | " def __init__(self):\n", 127 | " self.prob_target_one_ = None\n", 128 | " self.prob_target_zero_ = None\n", 129 | " \n", 130 | " \n", 131 | " def _reshape(self, X):\n", 132 | " '''converts array to appropriate dimensions'''\n", 133 | " try:\n", 134 | " n_cols = X.shape[1]\n", 135 | " except:\n", 136 | " n_cols = X.reshape(-1,1)\n", 137 | " return n_cols\n", 138 | " \n", 139 | " \n", 140 | " def fit(self, X, y):\n", 141 | " '''calculate priors and likelihoods'''\n", 142 | " # setup\n", 143 | " n_classes = len(np.unique(y))\n", 144 | " n_cols = self._reshape(X)\n", 145 | " \n", 146 | " # priors (target)\n", 147 | " self.prob_target_one_ = np.mean(y)\n", 148 | " self.prob_target_zero_ = 1 - self.prob_target_one_\n", 149 | " \n", 150 | " # likelihoods (features)\n", 151 | " n_feature_bins = 2\n", 152 | " self.prob_features_ = np.zeros((n_feature_bins, n_cols), dtype=float)\n", 153 | " \n", 154 | " for i in range(n_feature_bins):\n", 155 | " for j in range(n_cols):\n", 156 | " column = X[:,j]\n", 157 | " intersection_y_and_col = sum(np.logical_and(y==i, column==i))\n", 158 | " sum_y = sum(y==i)\n", 159 | " self.prob_features_[i,j] = intersection_y_and_col / sum_y\n", 160 | " \n", 161 | " prob_complements = 1 - self.prob_features_\n", 162 | " self.prob_features_ = np.concatenate((nb.prob_features_, prob_complements), axis=0)\n", 163 | " self.prob_features_ = self.prob_features_[[0,2,3,1],:] # rearrange array\n", 164 | " \n", 165 | " \n", 166 | " def predict(self, X):\n", 167 | " '''return most likely class'''\n", 168 | " # NEED MULTIPLE PREDICTIONS\n", 169 | " \n", 170 | " # setup\n", 171 | " X = np.array(X)\n", 172 | " n_rows = X.shape[0]\n", 173 | " try: \n", 174 | " n_cols = X.shape[1] \n", 175 | " except: \n", 176 | " n_cols = X.shape[0]\n", 177 | " class_zero_probs, class_one_probs = np.split(nb.prob_features_, 2)\n", 178 | " \n", 179 | " # likelihoods\n", 180 | " class_zero_likelihoods = class_zero_probs[X, np.arange(n_cols)]\n", 181 | " class_one_likelihoods = class_one_probs[X, np.arange(n_cols)]\n", 182 | " \n", 183 | " # posteriors\n", 184 | " class_zero_posterior = self.prob_target_zero_ * np.prod(class_zero_likelihoods)\n", 185 | " class_one_posterior = self.prob_target_one_ * np.prod(class_one_likelihoods)\n", 186 | "\n", 187 | " return (class_zero_posterior, class_one_posterior), np.argmax((class_zero_posterior, class_one_posterior))" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 8, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "nb = BernoulliNB()\n", 197 | "nb.fit(X,y)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 9, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": [ 208 | "array([[ 1. , 0. , 0. ],\n", 209 | " [ 0. , 1. , 1. ],\n", 210 | " [ 0.66666667, 0.33333333, 1. ],\n", 211 | " [ 0.33333333, 0.66666667, 0. ]])" 212 | ] 213 | }, 214 | "execution_count": 9, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "nb.prob_features_" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "((0.0, 0.083333333333333343), 1)" 232 | ] 233 | }, 234 | "execution_count": 10, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "nb.predict(np.array([1,0,0]))" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python [conda root]", 254 | "language": "python", 255 | "name": "conda-root-py" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.5.4" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 2 272 | } 273 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Algorithms_From_Scratch/Bagging_and_Bootstrapping.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Bootstrap" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "class Bootstrap:\n", 26 | " \n", 27 | " def create_dataset(self, X, y, seed=None):\n", 28 | " '''creates bootstrapped dataset'''\n", 29 | " if seed:\n", 30 | " np.random.seed(seed)\n", 31 | " n_rows = len(X)\n", 32 | " indices = np.arange(n_rows)\n", 33 | " indices = np.random.choice(indices, size=n_rows, replace=True)\n", 34 | " return X[indices], y[indices]" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Create Data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "np.random.seed(10)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "array([[9, 4, 0, 1, 9],\n", 62 | " [0, 1, 8, 9, 0],\n", 63 | " [8, 6, 4, 3, 0],\n", 64 | " [4, 6, 8, 1, 8],\n", 65 | " [4, 1, 3, 6, 5],\n", 66 | " [3, 9, 6, 9, 1],\n", 67 | " [9, 4, 2, 6, 7],\n", 68 | " [8, 8, 9, 2, 0],\n", 69 | " [6, 7, 8, 1, 7],\n", 70 | " [1, 4, 0, 8, 5]])" 71 | ] 72 | }, 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "X = np.random.randint(0,10,50).reshape(10,5)\n", 80 | "X" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "array([1, 0, 1, 1, 1, 0, 1, 0, 0, 1])" 92 | ] 93 | }, 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "y = np.random.binomial(1, 0.5, 10)\n", 101 | "y" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "### Bootstrap Example" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "bs = Bootstrap()\n", 118 | "X, y = bs.create_dataset(X,y)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "array([[9, 4, 0, 1, 9],\n", 130 | " [9, 4, 0, 1, 9],\n", 131 | " [9, 4, 2, 6, 7],\n", 132 | " [1, 4, 0, 8, 5],\n", 133 | " [0, 1, 8, 9, 0],\n", 134 | " [6, 7, 8, 1, 7],\n", 135 | " [1, 4, 0, 8, 5],\n", 136 | " [0, 1, 8, 9, 0],\n", 137 | " [8, 6, 4, 3, 0],\n", 138 | " [6, 7, 8, 1, 7]])" 139 | ] 140 | }, 141 | "execution_count": 7, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "X" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "array([1, 1, 1, 1, 0, 0, 1, 0, 1, 0])" 159 | ] 160 | }, 161 | "execution_count": 8, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "y" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## Bagging" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 9, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "class Bagging(Bootstrap):\n", 184 | " \n", 185 | " def __init__(self):\n", 186 | " self.estimator = None\n", 187 | " \n", 188 | " def simulate(self, estimator, X, y, n_datasets, seed=None):\n", 189 | " if seed:\n", 190 | " self.seed=seed\n", 191 | " self.coefficients_ = []\n", 192 | " self.estimator = estimator\n", 193 | " for dataset in range(n_datasets):\n", 194 | " X_, y_ = self.create_dataset(X, y, seed=self.seed)\n", 195 | " self.estimator.fit(X_, y_)\n", 196 | " self.coefficients_.append(self.estimator.coef_)\n", 197 | " self.seed += 1" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "### Example: Sklearn vs Bagging" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "#### Sklearn" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 10, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "from sklearn.linear_model import LinearRegression\n", 221 | "lr = LinearRegression()" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 11, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "[ 0.05415476 0.04757173 -0.13439324 0.03477829 -0.04744752]\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "lr.fit(X,y)\n", 239 | "print(lr.coef_)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "#### Bagging" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 12, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [ 255 | "bg = Bagging()\n", 256 | "bg.simulate(lr, X, y, n_datasets=100, seed=42)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 13, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "[array([ 0.03065716, -0.0095945 , -0.11071628, 0.04298948, 0.03107712]),\n", 268 | " array([ 0.03202357, 0.00323003, -0.10583687, 0.05571963, 0.03346221]),\n", 269 | " array([ 0.01504485, 0.09281765, -0.10585113, 0.0013086 , -0.02779966]),\n", 270 | " array([ 0.03890223, 0.04224749, -0.1397625 , 0.01358421, -0.05403209]),\n", 271 | " array([ 0.05415476, 0.04757173, -0.13439324, 0.03477829, -0.04744752]),\n", 272 | " array([ 0.01504485, 0.09281765, -0.10585113, 0.0013086 , -0.02779966]),\n", 273 | " array([ 0.0225789 , 0.04256523, -0.084117 , 0.0519456 , 0.04574701]),\n", 274 | " array([ 0.05415476, 0.04757173, -0.13439324, 0.03477829, -0.04744752]),\n", 275 | " array([ 0.03202357, 0.00323003, -0.10583687, 0.05571963, 0.03346221]),\n", 276 | " array([ 0.02997327, -0.04066438, -0.09418353, 0.0071274 , 0.01730941])]" 277 | ] 278 | }, 279 | "execution_count": 13, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "bg.coefficients_[:10]" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "#### Mean of Coefficients" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 14, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "array([ 0.03915596, 0.0205273 , -0.11609587, 0.02856614, -0.01729375])" 304 | ] 305 | }, 306 | "execution_count": 14, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "np.mean(bg.coefficients_, axis=0)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "#### Standard Error of Coefficients" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 15, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "data": { 329 | "text/plain": [ 330 | "array([ 0.01944324, 0.03295672, 0.02520118, 0.01921173, 0.03869426])" 331 | ] 332 | }, 333 | "execution_count": 15, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "np.std(bg.coefficients_, axis=0)" 340 | ] 341 | } 342 | ], 343 | "metadata": { 344 | "kernelspec": { 345 | "display_name": "Python [conda root]", 346 | "language": "python", 347 | "name": "conda-root-py" 348 | }, 349 | "language_info": { 350 | "codemirror_mode": { 351 | "name": "ipython", 352 | "version": 3 353 | }, 354 | "file_extension": ".py", 355 | "mimetype": "text/x-python", 356 | "name": "python", 357 | "nbconvert_exporter": "python", 358 | "pygments_lexer": "ipython3", 359 | "version": "3.5.4" 360 | } 361 | }, 362 | "nbformat": 4, 363 | "nbformat_minor": 2 364 | } 365 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Algorithms_From_Scratch/Cross-Validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "array([[ 4, 91, 9, 37, 28, 29, 3, 43, 71, 59],\n", 23 | " [55, 19, 38, 83, 21, 92, 85, 82, 91, 19],\n", 24 | " [ 1, 49, 31, 86, 43, 80, 44, 87, 73, 73],\n", 25 | " [29, 97, 6, 73, 40, 17, 16, 97, 50, 14],\n", 26 | " [13, 10, 56, 21, 24, 47, 43, 96, 57, 35],\n", 27 | " [21, 41, 3, 72, 34, 65, 89, 14, 83, 63],\n", 28 | " [ 0, 70, 39, 80, 83, 67, 81, 36, 26, 79],\n", 29 | " [65, 9, 17, 19, 58, 13, 3, 92, 11, 72],\n", 30 | " [93, 64, 84, 37, 23, 34, 15, 48, 9, 48],\n", 31 | " [13, 28, 93, 54, 77, 58, 75, 72, 37, 48]])" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n", 41 | "fake_data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 211, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "def cross_validation(data, shuffle=True, folds=2, random_state=None, verbose=True):\n", 51 | " '''return train and test sets'''\n", 52 | " \n", 53 | " # error checking\n", 54 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n", 55 | " assert type(folds) == int, 'must be an integer'\n", 56 | " assert folds > 1, 'must have 2 or more folds'\n", 57 | " assert folds <= len(data), 'can have no more than {} folds!'.format(len(data))\n", 58 | " if random_state != None:\n", 59 | " assert type(random_state) == int, 'random_state must be an integer!'\n", 60 | " \n", 61 | " # reproducibility\n", 62 | " if random_state:\n", 63 | " np.random.seed(random_state)\n", 64 | " \n", 65 | " # setup indices\n", 66 | " indices = np.arange(data.shape[0])\n", 67 | " if shuffle:\n", 68 | " np.random.shuffle(indices)\n", 69 | " \n", 70 | " # setup K-fold\n", 71 | " num_indices = len(indices)\n", 72 | " split_proportion = 1/folds\n", 73 | " split = int( np.floor(split_proportion * num_indices) )\n", 74 | " \n", 75 | " # main logic\n", 76 | " for fold in range(folds):\n", 77 | " mask = np.ones(num_indices, dtype=bool)\n", 78 | " offset = split*fold\n", 79 | " if fold < folds-1:\n", 80 | " test_indices = indices[offset:split+offset]\n", 81 | " else:\n", 82 | " test_indices = indices[offset:]\n", 83 | " mask[offset:split+offset] = False\n", 84 | " train_indices = indices[mask] \n", 85 | " train = data[train_indices]\n", 86 | " test = data[test_indices]\n", 87 | " if verbose:\n", 88 | " print('Fold:', fold)\n", 89 | " print('Train:\\n', train)\n", 90 | " print('Test:\\n', test, '\\n')\n", 91 | " \n", 92 | " #return train, test" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 216, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "Fold: 0\n", 105 | "Train:\n", 106 | " [[ 4 91 9 37 28 29 3 43 71 59]\n", 107 | " [65 9 17 19 58 13 3 92 11 72]\n", 108 | " [ 1 49 31 86 43 80 44 87 73 73]\n", 109 | " [13 28 93 54 77 58 75 72 37 48]\n", 110 | " [13 10 56 21 24 47 43 96 57 35]\n", 111 | " [29 97 6 73 40 17 16 97 50 14]\n", 112 | " [ 0 70 39 80 83 67 81 36 26 79]]\n", 113 | "Test:\n", 114 | " [[93 64 84 37 23 34 15 48 9 48]\n", 115 | " [55 19 38 83 21 92 85 82 91 19]\n", 116 | " [21 41 3 72 34 65 89 14 83 63]] \n", 117 | "\n", 118 | "Fold: 1\n", 119 | "Train:\n", 120 | " [[93 64 84 37 23 34 15 48 9 48]\n", 121 | " [55 19 38 83 21 92 85 82 91 19]\n", 122 | " [21 41 3 72 34 65 89 14 83 63]\n", 123 | " [13 28 93 54 77 58 75 72 37 48]\n", 124 | " [13 10 56 21 24 47 43 96 57 35]\n", 125 | " [29 97 6 73 40 17 16 97 50 14]\n", 126 | " [ 0 70 39 80 83 67 81 36 26 79]]\n", 127 | "Test:\n", 128 | " [[ 4 91 9 37 28 29 3 43 71 59]\n", 129 | " [65 9 17 19 58 13 3 92 11 72]\n", 130 | " [ 1 49 31 86 43 80 44 87 73 73]] \n", 131 | "\n", 132 | "Fold: 2\n", 133 | "Train:\n", 134 | " [[93 64 84 37 23 34 15 48 9 48]\n", 135 | " [55 19 38 83 21 92 85 82 91 19]\n", 136 | " [21 41 3 72 34 65 89 14 83 63]\n", 137 | " [ 4 91 9 37 28 29 3 43 71 59]\n", 138 | " [65 9 17 19 58 13 3 92 11 72]\n", 139 | " [ 1 49 31 86 43 80 44 87 73 73]\n", 140 | " [ 0 70 39 80 83 67 81 36 26 79]]\n", 141 | "Test:\n", 142 | " [[13 28 93 54 77 58 75 72 37 48]\n", 143 | " [13 10 56 21 24 47 43 96 57 35]\n", 144 | " [29 97 6 73 40 17 16 97 50 14]\n", 145 | " [ 0 70 39 80 83 67 81 36 26 79]] \n", 146 | "\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "cross_validation(fake_data, shuffle=True, folds=3, random_state=42, verbose=True)" 152 | ] 153 | } 154 | ], 155 | "metadata": { 156 | "kernelspec": { 157 | "display_name": "Python [conda root]", 158 | "language": "python", 159 | "name": "conda-root-py" 160 | }, 161 | "language_info": { 162 | "codemirror_mode": { 163 | "name": "ipython", 164 | "version": 3 165 | }, 166 | "file_extension": ".py", 167 | "mimetype": "text/x-python", 168 | "name": "python", 169 | "nbconvert_exporter": "python", 170 | "pygments_lexer": "ipython3", 171 | "version": "3.5.4" 172 | } 173 | }, 174 | "nbformat": 4, 175 | "nbformat_minor": 2 176 | } 177 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Algorithms_From_Scratch/KNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "from scipy.stats import mode\n", 13 | "from scipy.spatial.distance import euclidean" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 14, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "class KNN:\n", 25 | " '''\n", 26 | " KNN is a lazy supervised machine learning algorithm. \n", 27 | " \n", 28 | " The user specifies the number of neighbors to find.'''\n", 29 | " \n", 30 | " \n", 31 | " def __init__(self, K=2, learning='classification'):\n", 32 | " '''\n", 33 | " K = number of neighbors\n", 34 | " learning = 'classification' or 'regression'\n", 35 | " '''\n", 36 | " assert learning == 'classification' or learning == 'regression', 'need to set learning.'\n", 37 | " self.K_ = K\n", 38 | " self.type_ = learning\n", 39 | " self._nrows = None\n", 40 | " self._indices = None\n", 41 | " self._X = None\n", 42 | " \n", 43 | " \n", 44 | " def _calc_distance(self):\n", 45 | " '''Calculate the distance between data points.\n", 46 | " Input:\n", 47 | " X = numpy data matrix\n", 48 | " Output:\n", 49 | " matrix of distance between each data point and each cluster\n", 50 | " '''\n", 51 | " self._nrows = len(self._X)\n", 52 | " distances = np.array([euclidean(self._X[i], self._X[j]) if i!=j else np.inf\n", 53 | " for i in range(self._nrows) \n", 54 | " for j in range(self._nrows)])\n", 55 | " return distances.reshape(self._nrows, self._nrows)\n", 56 | " \n", 57 | " \n", 58 | " def _sort_indices(self, X):\n", 59 | " '''Sort distance matrix indices by shortest distance. \n", 60 | " Input:\n", 61 | " X = numpy distance matrix from self._calc_distance\n", 62 | " Output:\n", 63 | " indices sorted by min distance\n", 64 | " '''\n", 65 | " return np.argsort(X, axis=1)\n", 66 | "\n", 67 | " \n", 68 | " def fit(self, X):\n", 69 | " '''Copy data.\n", 70 | " Input:\n", 71 | " X = numpy array of data points'''\n", 72 | " assert self.K_ < X.shape[0], 'You chose too many neighbors!'\n", 73 | " self._X = X\n", 74 | " \n", 75 | " \n", 76 | " def predict(self, y):\n", 77 | " '''Predict on new data.'''\n", 78 | " distances = self._calc_distance()\n", 79 | " self._indices = self._sort_indices(distances)\n", 80 | " self._indices = np.delete(self._indices, (self._indices.shape[1] - 1), axis=1)\n", 81 | " self._indices = self._indices[:,:self.K_]\n", 82 | " if self.type_ == 'classification':\n", 83 | " return mode(y[self._indices], axis=1)[0]\n", 84 | " else: \n", 85 | " return np.mean(y[self._indices], axis=1)[0]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "source": [ 94 | "## Generate Data" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "array([[ 0, 0],\n", 106 | " [ 1, 2],\n", 107 | " [ 4, 9],\n", 108 | " [ 5, 10]])" 109 | ] 110 | }, 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "X = np.array([0,0,1,2,4,9,5,10]).reshape(4,2)\n", 118 | "X" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 5, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "array([1, 1, 1, 0])" 130 | ] 131 | }, 132 | "execution_count": 5, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "y = np.array([1,1,1,0])\n", 139 | "y" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "source": [ 148 | "## Modeling" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 29, 154 | "metadata": { 155 | "collapsed": true 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "knn = KNN(K=3)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 30, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "array([[1],\n", 171 | " [1],\n", 172 | " [1],\n", 173 | " [1]])" 174 | ] 175 | }, 176 | "execution_count": 30, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "knn.fit(X)\n", 183 | "knn.predict(y)" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python [conda root]", 190 | "language": "python", 191 | "name": "conda-root-py" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.5.4" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Test_Split.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "array([[44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n", 23 | " [38, 60, 16, 75, 44, 63, 80, 8, 21, 80],\n", 24 | " [14, 76, 22, 67, 5, 16, 11, 5, 26, 57],\n", 25 | " [32, 1, 22, 96, 0, 40, 23, 54, 24, 76],\n", 26 | " [83, 96, 34, 47, 15, 39, 85, 58, 29, 80],\n", 27 | " [43, 55, 36, 72, 77, 61, 4, 77, 38, 94],\n", 28 | " [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n", 29 | " [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n", 30 | " [72, 56, 71, 25, 25, 12, 79, 26, 81, 68],\n", 31 | " [63, 0, 47, 80, 78, 13, 32, 90, 50, 9]])" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n", 41 | "fake_data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "def train_test_split(data, shuffle=True, test_proportion=0.2, random_state=None):\n", 53 | " '''return train and test sets'''\n", 54 | " \n", 55 | " # error checking\n", 56 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n", 57 | " if random_state != None:\n", 58 | " assert type(random_state) == int, 'random_state must be an integer!'\n", 59 | " \n", 60 | " # reproducibility\n", 61 | " if random_state:\n", 62 | " np.random.seed(random_state)\n", 63 | " \n", 64 | " # setup indices\n", 65 | " indices = np.arange(data.shape[0])\n", 66 | " if shuffle:\n", 67 | " np.random.shuffle(indices)\n", 68 | " \n", 69 | " # main logic\n", 70 | " num_indices = len(indices)\n", 71 | " split = int( np.floor(test_proportion * num_indices) )\n", 72 | " train = data[indices[split:]]\n", 73 | " test = data[indices[:split]]\n", 74 | " \n", 75 | " return train, test" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 4, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "train, test = train_test_split(fake_data, shuffle=True, test_proportion=0.4, random_state=43)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "array([[14, 76, 22, 67, 5, 16, 11, 5, 26, 57],\n", 98 | " [43, 55, 36, 72, 77, 61, 4, 77, 38, 94],\n", 99 | " [38, 60, 16, 75, 44, 63, 80, 8, 21, 80],\n", 100 | " [ 0, 77, 60, 70, 98, 10, 50, 38, 13, 90],\n", 101 | " [44, 42, 47, 60, 78, 60, 26, 79, 43, 63],\n", 102 | " [83, 96, 34, 47, 15, 39, 85, 58, 29, 80]])" 103 | ] 104 | }, 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "train" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 6, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "array([[32, 1, 22, 96, 0, 40, 23, 54, 24, 76],\n", 123 | " [63, 0, 47, 80, 78, 13, 32, 90, 50, 9],\n", 124 | " [49, 57, 58, 44, 91, 22, 95, 46, 11, 11],\n", 125 | " [72, 56, 71, 25, 25, 12, 79, 26, 81, 68]])" 126 | ] 127 | }, 128 | "execution_count": 6, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "test" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python [conda root]", 141 | "language": "python", 142 | "name": "conda-root-py" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.5.4" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 2 159 | } 160 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Algorithms_From_Scratch/Train_Validation_Test_Split.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "array([[75, 50, 96, 10, 21, 46, 93, 1, 91, 7],\n", 23 | " [64, 92, 2, 21, 25, 53, 62, 82, 5, 58],\n", 24 | " [58, 26, 55, 33, 91, 47, 3, 14, 40, 23],\n", 25 | " [93, 67, 6, 0, 49, 13, 5, 61, 11, 16],\n", 26 | " [36, 41, 40, 58, 47, 55, 80, 98, 66, 94],\n", 27 | " [32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n", 28 | " [11, 73, 39, 59, 81, 55, 71, 2, 68, 61],\n", 29 | " [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n", 30 | " [ 2, 34, 30, 10, 5, 18, 76, 43, 11, 88],\n", 31 | " [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "fake_data = np.random.randint(0,100,100).reshape(10,10)\n", 41 | "fake_data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "def train_validation_test_split(data, shuffle=True, validation_proportion=0.3,\n", 53 | " test_proportion=0.2, random_state=None):\n", 54 | " '''return train, validation, and test sets'''\n", 55 | " \n", 56 | " # error checking\n", 57 | " assert type(shuffle) == bool, 'shuffle must be True or False!'\n", 58 | " if random_state != None:\n", 59 | " assert type(random_state) == int, 'random_state must be an integer!'\n", 60 | " assert validation_proportion + test_proportion < 1, 'need some training data!'\n", 61 | " \n", 62 | " # reproducibility\n", 63 | " if random_state:\n", 64 | " np.random.seed(random_state)\n", 65 | " \n", 66 | " # setup indices\n", 67 | " indices = np.arange(data.shape[0])\n", 68 | " if shuffle:\n", 69 | " np.random.shuffle(indices)\n", 70 | " \n", 71 | " # MAIN LOGIC\n", 72 | " # calc num of indices to keep\n", 73 | " num_indices = len(indices)\n", 74 | " validation_split = int( np.floor(validation_proportion * num_indices) )\n", 75 | " test_split = int( np.floor(test_proportion * num_indices) )\n", 76 | " \n", 77 | " # parse indices\n", 78 | " training_indices = indices[test_split:]\n", 79 | " train_indices = training_indices[validation_split:]\n", 80 | " validation_indices = training_indices[:validation_split]\n", 81 | " test_indices = indices[:test_split]\n", 82 | " \n", 83 | " # create data sets\n", 84 | " train = data[train_indices]\n", 85 | " validation = data[validation_indices]\n", 86 | " test = data[test_indices]\n", 87 | " \n", 88 | " return train, validation, test" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "train, validation, test = train_validation_test_split(fake_data, \n", 100 | " shuffle=True, \n", 101 | " validation_proportion=0.3,\n", 102 | " test_proportion=0.2, \n", 103 | " random_state=43)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 5, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "array([[32, 69, 52, 87, 79, 50, 30, 68, 93, 65],\n", 115 | " [64, 92, 2, 21, 25, 53, 62, 82, 5, 58],\n", 116 | " [88, 25, 62, 63, 26, 57, 98, 74, 11, 25],\n", 117 | " [75, 50, 96, 10, 21, 46, 93, 1, 91, 7],\n", 118 | " [36, 41, 40, 58, 47, 55, 80, 98, 66, 94]])" 119 | ] 120 | }, 121 | "execution_count": 5, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "train" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "array([[11, 73, 39, 59, 81, 55, 71, 2, 68, 61],\n", 139 | " [ 2, 34, 30, 10, 5, 18, 76, 43, 11, 88],\n", 140 | " [58, 26, 55, 33, 91, 47, 3, 14, 40, 23]])" 141 | ] 142 | }, 143 | "execution_count": 6, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "validation" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 7, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "array([[93, 67, 6, 0, 49, 13, 5, 61, 11, 16],\n", 161 | " [ 4, 17, 87, 87, 50, 78, 58, 31, 29, 89]])" 162 | ] 163 | }, 164 | "execution_count": 7, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "test" 171 | ] 172 | } 173 | ], 174 | "metadata": { 175 | "kernelspec": { 176 | "display_name": "Python [conda root]", 177 | "language": "python", 178 | "name": "conda-root-py" 179 | }, 180 | "language_info": { 181 | "codemirror_mode": { 182 | "name": "ipython", 183 | "version": 3 184 | }, 185 | "file_extension": ".py", 186 | "mimetype": "text/x-python", 187 | "name": "python", 188 | "nbconvert_exporter": "python", 189 | "pygments_lexer": "ipython3", 190 | "version": "3.5.4" 191 | } 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 2 195 | } 196 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Supervised_Learning/Classification/Logistic_Regression/Sigmoid_Function.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import numpy as np" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "def sigmoid(x):\n", 25 | " return 1 / (1 + np.e ** -x)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "x = np.linspace(-15, 15, 100)\n", 37 | "lst = [sigmoid(i) for i in x]" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXXV9//HXe7ZM9p2QfZGwhCWUDMimhFIFrEKtQlHL\nZimlBWkf9leX/vy5VH2Uam2tiiKlGPhZRH4ubdRUcAEEIZrFrJBAiBlmJoGsM1lnvZ/fH/dmvAyZ\nzM1k7py7vJ+Px2TuOed77/mczMx933O+53yPIgIzMzOAiqQLMDOzwuFQMDOzbg4FMzPr5lAwM7Nu\nDgUzM+vmUDAzs24OBStokt4n6bFCW6+kJyTdMpg1HQtJb5K0Mek6rPg4FCxxki6W9IykFkm7Jf1S\n0rkAEfGfEfHWwa7peNYr6ZOSOiTtz/r60EDX2GOdIemkw9MR8VREnJLPdVppqkq6ACtvkkYBPwT+\nEngEqAHeBLQlWdcA+HZE/GnSRZgdK+8pWNJOBoiIb0VEV0QciojHImINgKSbJD19uLGkt0ramNmr\n+KqkJw8fxsm0/aWkf5XULGmzpAsz8xskbZd0Y9ZrjZb0oKQdkuolfUxSRS/rfYukDZn1fgVQfzZW\n0hZJf5A1/UlJ38w8npX5xH+jpJcl7ZT0v7PaVkr6e0kvSdonaYWk6ZJ+kWmyOrNX8ieSFkpqzHru\naZlDXs2S1ku6KmvZIkl3S/pR5nV/JekN/dk+K34OBUvaC0CXpAckXSlpbG8NJU0AvgN8FBgPbAQu\n7NHsjcCazPKHgIeBc4GTgD8FviJpRKbtl4HRwBzgEuAG4OZe1vs94GPABOAl4KL+bGyOLgZOAS4D\nPi7ptMz8DwLvAd4GjALeDxyMiDdnls+PiBER8e0e9VcDPwAeA04APgD8p6Tsw0vXAZ8CxgKbgM/m\nY8Os8DkULFERsZf0m2AA/w7skLRY0qQjNH8bsD4ivhcRncCXgFd6tPltRHwjIrqAbwPTgX+IiLaI\neAxoB06SVEn6jfCjEbEvIrYAXwCuP8p6vxMRHcAXj7Denq7NfCo//DWl7/+Nbp/K7DGtBlYD8zPz\nbwE+FhEbI211ROzK4fXOB0YAd0VEe0T8nPQhu/dktfl+RPw68//6n8DZx1CvlRCHgiUuIp6PiJsi\nYhpwBjCF9BtvT1OAhqznBdDYo82rWY8PZdr1nDeC9Cf+aqA+a1k9MDXH9TYcoV22RyJiTNbX1j7a\nZ8sOnIOZeiEdcC8dw+scNgVoiIhU1rye29rbOq3MOBSsoETEBmAR6XDoaRsw7fCEJGVPH6OdQAcw\nM2veDKCpl/VO77He6Udol4sDwLCs6ROP4bkNQH+O9W8Fph/uL8nobVutzDkULFGSTpX0t5KmZaan\nkz6ssfQIzX8EnCnpjyRVAbdzbG+q3TKHlx4BPitppKSZpI/Zf7OX9Z4u6Y8z672zv+sFVgHXSaqW\nVAe8+xieex/waUlzlXaWpPGZZa+S7hs5kl+R/vT/ocx6FwLvIN3fYvYaDgVL2j7SncO/knSAdBis\nA/62Z8OI2AlcA3wO2AXMA5bT/9NXP0D6k/tm4GnSHdP3H2W9d2XWOxf4ZT/X+X9If9rfQ7pj96Fj\neO6/kA6yx4C9wH8AQzPLPgk8kOm/uLZH/e2kQ+BK0ntIXwVuyOyVmb2GfJMdK1aZwyGNwPsi4vGk\n6zErBd5TsKIi6XJJYyQNAf6e9PUCRzrUZGb94FCwYnMB6TNwdpI+JPJHEXEo2ZLMSocPH5mZWTfv\nKZiZWbeiGxBvwoQJMWvWrKTLMDMrKitWrNgZERP7ald0oTBr1iyWL1+edBlmZkVFUn3frXz4yMzM\nsjgUzMysm0PBzMy6ORTMzKybQ8HMzLrlLRQk3Z+5/eG6XpZL0pckbZK0RtI5+arFzMxyk889hUXA\nFUdZfiXp0SbnArcCX8tjLWZmloO8XacQEb+QNOsoTa4GHszcxWppZpCzyRGxLV81mdnA6uhKcbC9\ni4PtnbR2pGjt6KKtM0V7Z4qOrhTtXSk6OlN0pYKOVNCVStGVgq5UilRAVypIRZBKBQGkAiLS8yLI\nzEs/hvSy9Pf0ssOPDwt+N9HbCD6DMrBPnoYPqps1jjef3Of1Z8clyYvXpvLaWxo2Zua9LhQk3Up6\nb4IZM2YMSnFmx2rRokUA3HTTTYnWcbyaD7ZTv+sg21oOsbW5lVf3trJzfzu7D7Sx+2AH+w51sLe1\ng72tnbR3pvp+wTIlDfxr3nbJG0o6FHIWEfcC9wLU1dV5BD+zAdCVCl7cvo81jS2sbWzh+W172bzz\nALsPtL+mXU1VBROG1zBuRA1jh9UwY9wwRtZWMbK2ihE1VQwbUsWwmkpqqyuorapkSHUFNZWVVFeK\n6qoKaiorqKwQ1ZWisqKCSomKCqiqqKBCUFEhBFRWCElIUKH0vMOPyTwW6n6zTS9X9+PDst+MlY93\n5hKXZCg08dr73E7D94w1y6vte1v5+YbtPPXiTp7etJOWQx0AjBxSxWmTR3H56ZOYM2EEsyYMZ8qY\nWiaPHsrYYdV+cy0jSYbCYuAOSQ+Tvh1ji/sTzAbeofYuHnvuFb67somnX9xBKuDEUbW8dd4kLnjD\neOZPH8Ps8cOpqPAbv+UxFCR9C1gITJDUCHwCqAaIiHuAJcDbgE2kbyp+c75qMStH+1o7ePDZev7j\n6d+y+0A7U8cM5a8WnsQ75k/h5Ekj/OnfjiifZx+9p4/lAdyer/WblauOrhT3PfVb7nnyJVoOdXDJ\nyRP5i0vmcP7s8d4bsD4VRUezmeVmXVMLH/rOGp7btpfLTj2BOy+by/zpY5Iuy4qIQ8GsBKRSwRd/\n+gJ3P/ES44bX8PXrF3D56ScmXZYVIYeCWZE71N7F33z7Nzy6/lXedc40Pv72eYweVp10WVakHApm\nRWz7vlb+/IHlrGlq4eNvn8fNF81yB7IdF4eCWZHavreVd93zDDv3tXPv9XW8Zd6kpEuyEuBQMCtC\n+9s6uXnRMnbtb+ehP38jvzdjbNIlWYlwKJgVmfbOFH/5zRVseGUf/3FjnQPBBpRvsmNWRCKCj35v\nLU+9uJO7/vhMFp5yQtIlWYlxKJgVke+ubOK7Kxv568vmck3d9L6fYHaMHApmRaKp+RCfWrye82aN\n487L5iZdjpUoh4JZEUilgr/7f6tJRfDP18yn0sNVWJ44FMyKwIPPbuGZl3bxsbfPY8b4YUmXYyXM\noWBW4JqaD3HXjzew8JSJXHeu+xEsvxwKZgXuX3/yAqmAz77zTF+tbHnnUDArYBtf2cd3VzZyw/kz\nmTpmaNLlWBlwKJgVsM8/uoERNVXcfulJSZdiZcKhYFaglm3ZzU+f385tC9/A2OE1SZdjZcKhYFaA\nIoJ/+p8NTBw5hJsvmpV0OVZGHApmBejZl3axvH4Pd142l2E1HqLMBo9DwawAfeOZLYwbXsM1C6Yl\nXYqVGYeCWYFp2H2Qnz3/KtedO53a6sqky7Ey41AwKzDfXFqPJP70/JlJl2JlyKFgVkAOtXfx8LIG\nLj99ElN8XYIlwKFgVkD+a1UTLYc6uPGCWUmXYmXKoWBWICKCB57ZwqknjuS82eOSLsfKlEPBrECs\nqN/Dhlf2cdOFszzGkSXGoWBWIP5rVRO11RW8Y/6UpEuxMuZQMCsAHV0plqx9hctOm8TwIb5YzZLj\nUDArAL/ctJPdB9q5ynsJljCHglkB+MHqbYysrWLhKROTLsXKnEPBLGGtHV08tv4Vrjj9RIZU+Qpm\nS1ZeQ0HSFZI2Stok6SNHWD5a0g8krZa0XtLN+azHrBA9sXE7+9o6uepsHzqy5OUtFCRVAncDVwLz\ngPdImtej2e3AcxExH1gIfEGSB463srJ49VYmjKjhgjnjky7FLK97CucBmyJic0S0Aw8DV/doE8BI\npU/KHgHsBjrzWJNZQdnX2sHPnt/OH545mapKH8215OXzt3Aq0JA13ZiZl+0rwGnAVmAt8NcRker5\nQpJulbRc0vIdO3bkq16zQffzDdtp60z52gQrGEl/NLkcWAVMAc4GviJpVM9GEXFvRNRFRN3EiT47\nw0rH4xu2M354DefMGJt0KWZAfkOhCZieNT0tMy/bzcD3Im0T8Fvg1DzWZFYwulLBky/s4JKTJ1JR\n4WEtrDDkMxSWAXMlzc50Hl8HLO7R5mXgMgBJk4BTgM15rMmsYKxqaGbPwQ4WnnpC0qWYdcvb9fQR\n0SnpDuBRoBK4PyLWS7ots/we4NPAIklrAQEfjoid+arJrJA8sXE7FYJL5vqQqBWOvA6yEhFLgCU9\n5t2T9Xgr8NZ81mBWqB7fuJ0FM8cyelh10qWYdUu6o9msLG3f28q6pr0sPMWHjqywOBTMEvDExvSp\n1Zc6FKzAOBTMEvD4xu2cOKqW0yaPTLoUs9dwKJgNsvbOFE+9uJNLT53oO6xZwXEomA2y5fW72d/W\n6f4EK0gOBbNB9stNO6msEBedNCHpUsxex6FgNsiWbt7NWdNGM8K33bQC5FAwG0QH2jpZ3dDM+R4m\n2wqUQ8FsEK2o30NnKhwKVrAcCmaDaOnmXVRViLqZHhXVCpNDwWwQLd28i7OmjWa4+xOsQDkUzAbJ\ngbZO1jS2+NCRFTSHgtkgWe7+BCsCDgWzQdLdnzDL/QlWuBwKZoPk2Zd2MX/6GIbVuD/BCpdDwWwQ\n7G/rZG1TC+fPGZd0KWZH5VAwGwTLt+ymKxVcMMdDW1hhcyiYDYJf/3Y3VRXinJljki7F7KgcCmaD\nYEX9HuZNGeX+BCt4DgWzPOvoSrGmsYVzZvisIyt8DgWzPNuwbR+HOrpY4KEtrAg4FMzybOXLewA4\nx6FgRcChYJZnK+r3cOKoWqaMrk26FLM+ORTM8mxF/R7OmTnG92O2ouBQMMujV/e20tR8yJ3MVjQc\nCmZ5tLI+3Z/gTmYrFg4Fszxa+fIeaqoqOH3K6KRLMcuJQ8Esj1bU7+GsqaOpqfKfmhUH/6aa5Ulb\nZxfrmvb6VFQrKjmFgqTKfBdiVmrWNe2lvSvlTmYrKrnuKbwo6fOS5h3Li0u6QtJGSZskfaSXNgsl\nrZK0XtKTx/L6ZoXsN90XrXkQPCseuYbCfOAF4D5JSyXdKmnU0Z6Q2bu4G7gSmAe8p2eoSBoDfBW4\nKiJOB6451g0wK1S/aWhm6pihnDDSF61Z8cgpFCJiX0T8e0RcCHwY+ASwTdIDkk7q5WnnAZsiYnNE\ntAMPA1f3aPNe4HsR8XJmPdv7tRVmBWh1QzNnz/BeghWXnPsUJF0l6fvAF4EvAHOAHwBLennaVKAh\na7oxMy/bycBYSU9IWiHphl7Wf6uk5ZKW79ixI5eSzRK1c38bjXsOcfY0h4IVl1wHd38ReBz4fEQ8\nkzX/O5LefJzrXwBcBgwFnpW0NCJeyG4UEfcC9wLU1dXFcazPbFCsaWwGYP50h4IVl1xD4YaIeDp7\nhqSLIuKXEXFnL89pAqZnTU/LzMvWCOyKiAPAAUm/4Hf9F2ZFa1VDCxWCM6YetevNrODk2tH8pSPM\n+3Ifz1kGzJU0W1INcB2wuEeb/wYullQlaRjwRuD5HGsyK1irG5o5edJI32nNis5Rf2MlXQBcCEyU\n9MGsRaOAo167EBGdku4AHs20vT8i1ku6LbP8noh4XtKPgTVACrgvItb1f3PMkhcRrG5s5vJ5JyZd\nitkx6+tjTA0wItNuZNb8vcC7+3rxiFhCj47oiLinx/Tngc/nUqxZMXh590GaD3a4P8GK0lFDISKe\nBJ6UtCgi6gepJrOitqrhcCezB8Gz4tPX4aMvRsTfAF+R9LqzfiLiqrxVZlakVje0UFtdwcmTRvbd\n2KzA9HX46P9mvv9zvgsxKxVrGps5Y8poqis93qQVn74OH63IfPeYRGY56ApYt7WF971xZtKlmPVL\nX4eP1gK9XiwWEWcNeEVmRWx7WyWtHSl3MlvR6uvw0dsHpQqzEtHUWg3g4S2saPV1+MhnHJkdg6ZD\nVYwZVs30cUOTLsWsX47aEybp6cz3fZL29vw+OCWaFY+trdWcNW0MkpIuxaxf+tpTuDjz3efWmfWh\nPZXuU7h2mq9PsOKV88Asks4BLibd8fx0RPwmb1WZFaFXWqsIxFnuT7Ailuv9FD4OPACMByYAiyR9\nLJ+FmRWbrZlO5rO8p2BFLNc9hfcB8yOiFUDSXcAq4DP5Ksys2DS1VjGyqotJo3z7TSteuV5yuRXI\n/k0fwuvvjWBW1ra2VjGltjPpMsyOS18Xr32ZdB9CC7Be0k8y028Bfp3/8l5v165dLFq0KIlVm/Xq\nUEcX9SvWMXREG4sW+cQ8K159HT5anvm+Avh+1vwn8lKNWZFq3HMQgLHVXQlXYnZ8FFFctzyuq6uL\n5cuX993QbBDd8+RL3PU/G/i7k3Zy+y03Jl2O2etIWhERdX21y6mjWdJc4B+BeWT1LUTEnH5XaFZC\n1jQ2M6a6i+FVxfUhy6ynXDuavwF8DegELgUeBL6Zr6LMis2axham1nYkXYbZccs1FIZGxM9IH26q\nj4hPAn+Yv7LMiseu/W007jnkM4+sJOR6nUKbpArgRUl3kD4ddUT+yjIrHmuaWgAcClYSct1T+Gtg\nGHAnsAC4HnBvmhmwtrEFCSY7FKwE5LSnEBHLADJ7C3dGxL68VmVWRNY0NjNnwnBqK3ckXYrZcct1\n7KO6zF3Y1gBrJa2WtCC/pZkVvohgVUOL77RmJSPXw0f3A38VEbMiYhZwO+kzkszK2taWVnbub+Ns\nh4KViFxDoSsinjo8ERFPkz491aysrW5oBmC+h8u2EtHX2EfnZB4+KenrwLdIj330J3ioCzNWNzZT\nU1nBqZNH4huMWCnoq6P5Cz2mP5H12JduWtlb3dDMaZNHMqSqMulSzAZEX7fjvHSwCjErNl2pYG1j\nC+9aMC3pUswGTK5nH42W9C+Slme+viDJt5eysvbSjv0caO9yf4KVlGM5+2gfcG3may8++8jK3KrD\nncw+88hKSK7DXLwhIt6VNf0pSavyUZBZsVjd0MzIIVXMmTA86VLMBkyuewqHJF18eELSRcChvp4k\n6QpJGyVtkvSRo7Q7V1KnpHfnWI9Z4lY3NnPW9NFUVCjpUswGTK57CrcBD2b1I+yhj7GPJFUCd5O+\ndWcjsEzS4oh47gjt/gl47FgKN0tSa0cXG7bt49Y3+5YiVlr6DIXMeEenRMR8SaMAIiKXm9CeB2yK\niM2Z13kYuBp4rke7DwDfBc49lsLNkrR+6146U8FZ7mS2EtPn4aOISAEfyjzem2MgAEwFGrKmGzPz\nukmaCryT9A18eiXp1sNnPu3Y4UHHLHlrGtOdzB7ewkpNrn0KP5X0vyRNlzTu8NcArP+LwIczwdOr\niLg3Iuoiom7ixIkDsFqz47O6oZlJo4Zw4ujavhubFZFc+xT+hPQVzH/VY/7RDqg2AdOzpqdl5mWr\nAx6WBDABeJukzoj4rxzrMkvEbxqafX2ClaRcQ2Ee6UC4mHQ4PAXc08dzlgFzJc0mHQbXAe/NbhAR\nsw8/lrQI+KEDwQrdzv1t1O86yHvPm5F0KWYDLtdQeID0BWtfyky/NzPv2t6eEBGdmVt3PgpUAvdH\nxHpJt2WW9xUqZgVpZf0eABbMHJtwJWYDL9dQOCMi5mVNPy6p51lErxMRS4AlPeYdMQwi4qYcazFL\n1IqX91BdKc6Y6pFerPTk2tG8UtL5hyckvRFYnp+SzArbyvo9nD5lNLXVHhnVSk+uobAAeEbSFklb\ngGeBcyWtlbQmb9WZFZj2zhRrGlt86MhKVq6Hj67IaxVmReK5bXtp60xxzgyHgpWmnEIhIurzXYhZ\nMTjcyXzOTJ+OaqUp18NHZka6k3nK6Fomjx6adClmeeFQMDsGK+v3cI77E6yEORTMcrS1+RDbWlrd\nyWwlzaFglqOVL2f6E9zJbCXMoWCWo5X1zdRWVzBvyqikSzHLG4eCWY5WvLyHs6aOobrSfzZWuvzb\nbZaD/W2drGtq4dzZPnRkpc2hYJaD5Vt205UKLpgzIelSzPLKoWCWg6Wbd1NdKV+0ZiXPoWCWg2c3\n72L+tDEMq8l1ZBiz4uRQMOvDvtYO1jW1cMEbxiddilneORTM+rC8fg9dqeD8OQ4FK30OBbM+LN28\nK92f4IvWrAw4FMz6sHTzbs6ePoahNb6pjpU+h4LZUXT3J/jQkZUJh4LZUSzf4v4EKy8OBbOjWLp5\nFzWVFR4u28qGQ8HsKJZu3sXZM8ZQW+3+BCsPDgWzXuw+0M6aphYu9PUJVkYcCma9+MULO4iA3z/1\nhKRLMRs0DgWzXvx8w3YmjKjhjCmjky7FbNA4FMyOoCsVPPnCDi45+QQqKpR0OWaDxqFgdgSrGvbQ\ncqjDh46s7DgUzI7g5xu2U1khLp7r+ydYeXEomB3B4xt2sGDmWEYPrU66FLNB5VAw6+GVllae27bX\nh46sLOU1FCRdIWmjpE2SPnKE5e+TtEbSWknPSJqfz3rMcvHExu0AXHqKQ8HKT95CQVIlcDdwJTAP\neI+keT2a/Ra4JCLOBD4N3Juvesxy9fjG7UwZXcvJk0YkXYrZoMvnnsJ5wKaI2BwR7cDDwNXZDSLi\nmYjYk5lcCkzLYz1mfWrt6OLpF3ey8NQTkHwqqpWffIbCVKAha7oxM683fwb8z5EWSLpV0nJJy3fs\n2DGAJZq91uMbtnOgvYu3nTE56VLMElEQHc2SLiUdCh8+0vKIuDci6iKibuLEiYNbnJWVxau3MmHE\nEN+P2cpWPkOhCZieNT0tM+81JJ0F3AdcHRG78liP2VHta+3gZxu284dnnkilr2K2MpXPUFgGzJU0\nW1INcB2wOLuBpBnA94DrI+KFPNZi1qefPPcq7Z0prjp7StKlmCWmKl8vHBGdku4AHgUqgfsjYr2k\n2zLL7wE+DowHvprp1OuMiLp81WR2NItXb2XqmKGcM8M31LHylbdQAIiIJcCSHvPuyXp8C3BLPmsw\ny8XuA+08/eJObnnTHJ91ZGWtIDqazZK2ZO02OlPBVfN96MjKm0PBjPSho5NOGMFpk0cmXYpZohwK\nVvYadh9k2ZbdXDV/ig8dWdlzKFjZe/DZLVRKXFs3vc+2ZqXOoWBl7WB7J99e1sAVZ5zIiaNrky7H\nLHEOBStr3/9NE3tbO7npwllJl2JWEBwKVrYiggee2cLpU0axYKavTTADh4KVsWc37+KFV/dz04Wz\n3MFsluFQsLK16JdbGDe8hnf42gSzbg4FK0tbdh7gp8+/ynXnTqe2ujLpcswKhkPBytI/P7aRIVWV\n3HTRrKRLMSsoDgUrO2sbW/jhmm3c8qbZnDDSp6GaZXMoWNn53KMbGDusmlvfPCfpUswKjkPBysrT\nL+7kqRd3cvulJzGytjrpcswKjkPBykYqFfzTjzcwdcxQrr9gZtLlmBUkh4KVjW8te5m1TS188C0n\nM6TKZxyZHYlDwcpC/a4DfPZHz/OmuRN45+9NTbocs4LlULCS15UK/vaR1VRWiM+9+ywqKnz1sllv\n8no7TrNCcN9Tm1lev4d/uXY+k0cPTbocs4LmPQUraWsam/nCYy9wxekn+rCRWQ4cClayXt51kPcv\nWsbEkUP47DvP8KB3Zjnw4SMrSbsPtHPjN35NZyp4+P3nMX7EkKRLMisK3lOwknOwvZM/e2AZW5sP\ncd8NdZx0woikSzIrGt5TsJKyfV8rf/7ActY2tfDV9y2gbta4pEsyKyoOBSsZG1/Zx/sXLWP3gXa+\nfn0db5k3KemSzIqOQ8FKwpK12/jwd9YwtKaSR/7iAs6cNjrpksyKkkPBitr2va18/L/X8+P1r3Dm\n1NF8/foFTBnjaxHM+suhYEXpUHsXD/36Zf7tpy/Q1pniI1eeyi0Xz6aq0udOmB0Ph4IVlb2tHTz0\nq5e576nN7NzfzkUnjefTV5/BnIk+w8hsIDgUrOB1dqV4atNOvreyicfWv0JbZ4o3zZ3AnZfN5Vyf\nXWQ2oBwKVnBSqWDzzv0s3bybp17cwTObdrGvrZMxw6q5tm4619RN46xpY5Iu06wk5TUUJF0B/BtQ\nCdwXEXf1WK7M8rcBB4GbImJlPmuywtHRleLVva3U7zrI5h37eWnHAZ7ftpd1TS0caO8CYOqYobx9\n/mQuPeUEFp5yAjVV7jMwy6e8hYKkSuBu4C1AI7BM0uKIeC6r2ZXA3MzXG4GvZb5bAYoIOlNBVyro\n6ErR0RW0d6bo6ErR2tFFa0eK1s4uDrZ3cai9kwNtXexr7WBfayd7WzvYfaCDXQfa2H2gnVdaWtmx\nv42I373+sJpKTp40knctmMaZU0dzzsyxzJkw3GMWmQ2ifO4pnAdsiojNAJIeBq4GskPhauDBiAhg\nqaQxkiZHxLaBLubJF3bwmR8+13fDXkTfTQZUxJHXGL1MZM8//NyA7jfdIIjImo4ggFRk5mfmpSJ9\n/4FUBKlU0BVBKgVdkQ6D/qqtrmD88CGMG17DuOE1nHriSCaPHsrk0bXMGD+MORNGMGnUEAeAWcLy\nGQpTgYas6UZevxdwpDZTgdeEgqRbgVsBZsyY0a9iRgypYu6k4ztDRQzyG1Yvq8uenf0m+tr5v5t3\nuI0y/wghpacrlHms9OP0F1RUpB9Xdn+HyooKqitEZaWorqigpqqC6soKqitFbXUlQ6oqqK2uZPiQ\nSoZWVzF8SCUja6sZWVtFdRmcKnrTTTclXYLZcSuKjuaIuBe4F6Curq5fH1cXzBzLgpkLBrQuM7NS\nk8+Pb03A9KzpaZl5x9rGzMwGST5DYRkwV9JsSTXAdcDiHm0WAzco7XygJR/9CWZmlpu8HT6KiE5J\ndwCPkj4l9f6IWC/ptszye4AlpE9H3UT6lNSb81WPmZn1La99ChGxhPQbf/a8e7IeB3B7PmswM7Pc\nlf4pIWZmljOHgpmZdXMomJlZN4eCmZl1U2/DKRQqSTuA+n4+fQKwcwDLSZK3pTCVyraUynaAt+Ww\nmRExsa9GRRcKx0PS8oioS7qOgeBtKUylsi2lsh3gbTlWPnxkZmbdHApmZtat3ELh3qQLGEDelsJU\nKttSKtsB3pZjUlZ9CmZmdnTltqdgZmZH4VAwM7NuZREKkq6RtF5SSlJd1vxZkg5JWpX5uudor1MI\netuWzLK9qDflAAADr0lEQVSPStokaaOky5OqsT8kfVJSU9bP4m1J13QsJF2R+X/fJOkjSddzPCRt\nkbQ283NYnnQ9x0LS/ZK2S1qXNW+cpJ9IejHzfWySNeaql23J+99JWYQCsA74Y+AXR1j2UkScnfm6\nbZDr6o8jboukeaTvWXE6cAXwVUmVg1/ecfnXrJ/Fkr6bF4bM//PdwJXAPOA9mZ9HMbs083MotvP7\nF5H+/c/2EeBnETEX+Flmuhgs4vXbAnn+OymLUIiI5yNiY9J1DISjbMvVwMMR0RYRvyV9j4rzBre6\nsnUesCkiNkdEO/Aw6Z+HDbKI+AWwu8fsq4EHMo8fAP5oUIvqp162Je/KIhT6MDuzG/akpDclXcxx\nmAo0ZE03ZuYVkw9IWpPZbS6KXfyMUvi/zxbATyWtkHRr0sUMgElZd3R8BZiUZDEDIK9/JyUTCpJ+\nKmndEb6O9oltGzAjIs4GPgg8JGnU4FTcu35uS8HrY7u+BswBzib9c/lCosWWt4szfxNXArdLenPS\nBQ2UzI29ivk8/Lz/neT1zmuDKSL+oB/PaQPaMo9XSHoJOBlItHOtP9sCNAHTs6anZeYVjFy3S9K/\nAz/MczkDqeD/749FRDRlvm+X9H3Sh8eO1B9XLF6VNDkitkmaDGxPuqD+iohXDz/O199Jyewp9Iek\niYc7YyXNAeYCm5Otqt8WA9dJGiJpNult+XXCNeUs88d62DtJd6gXi2XAXEmzJdWQ7vBfnHBN/SJp\nuKSRhx8Db6W4fhZHshi4MfP4RuC/E6zluAzG30nJ7CkcjaR3Al8GJgI/krQqIi4H3gz8g6QOIAXc\nFhGD3rFzLHrblohYL+kR4DmgE7g9IrqSrPUYfU7S2aR37bcAf5FsObmLiE5JdwCPApXA/RGxPuGy\n+msS8H1JkH5/eCgifpxsSbmT9C1gITBBUiPwCeAu4BFJf0Z62P1rk6swd71sy8J8/514mAszM+tW\n1oePzMzstRwKZmbWzaFgZmbdHApmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZsdJ0rmZAcpqM1cEr5d0\nRtJ1mfWHL14zGwCSPgPUAkOBxoj4x4RLMusXh4LZAMiMebQMaAUuLLIhRsy6+fCR2cAYD4wARpLe\nYzArSt5TMBsAkhaTvuPabGByRNyRcElm/VIWo6Sa5ZOkG4COiHgoMxT7M5J+PyJ+nnRtZsfKewpm\nZtbNfQpmZtbNoWBmZt0cCmZm1s2hYGZm3RwKZmbWzaFgZmbdHApmZtbt/wM0YVudODC3EQAAAABJ\nRU5ErkJggg==\n", 48 | "text/plain": [ 49 | "" 50 | ] 51 | }, 52 | "metadata": {}, 53 | "output_type": "display_data" 54 | } 55 | ], 56 | "source": [ 57 | "plt.plot(x, lst);\n", 58 | "plt.axhline(0.5, c='k', alpha=0.5)\n", 59 | "plt.vlines(0, 0, 1, alpha=0.5)\n", 60 | "plt.title('Sigmoid Function')\n", 61 | "plt.xlabel('x')\n", 62 | "plt.ylabel('probability');" 63 | ] 64 | } 65 | ], 66 | "metadata": { 67 | "kernelspec": { 68 | "display_name": "Python [conda root]", 69 | "language": "python", 70 | "name": "conda-root-py" 71 | }, 72 | "language_info": { 73 | "codemirror_mode": { 74 | "name": "ipython", 75 | "version": 3 76 | }, 77 | "file_extension": ".py", 78 | "mimetype": "text/x-python", 79 | "name": "python", 80 | "nbconvert_exporter": "python", 81 | "pygments_lexer": "ipython3", 82 | "version": "3.5.4" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /notebooks/Machine_Learning/Supervised_Learning/Model_Selection/Feature_Selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "alphas = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100]\n", 10 | "print('All errors are RMSE')\n", 11 | "print('-'*74)\n", 12 | "for alpha in alphas:\n", 13 | " # instantiate and fit model\n", 14 | " lasso = Lasso(alpha=alpha, fit_intercept=True, random_state=99)\n", 15 | " lasso.fit(X_train, y_train)\n", 16 | " # calculate errors\n", 17 | " train_error = np.sqrt(mean_squared_error(y_train, lasso.predict(X_train)))\n", 18 | " validation_error = np.sqrt(mean_squared_error(y_validation, lasso.predict(X_validation)))\n", 19 | " test_error = np.sqrt(mean_squared_error(y_test, lasso.predict(X_test)))\n", 20 | " # print errors as report\n", 21 | " print('alpha: {:6} | train error: {:5} | val error: {:6} | test error: {}'.\n", 22 | " format(alpha,\n", 23 | " round(train_error,2),\n", 24 | " round(validation_error,2),\n", 25 | " round(test_error,2)))" 26 | ] 27 | } 28 | ], 29 | "metadata": { 30 | "kernelspec": { 31 | "display_name": "Python [conda root]", 32 | "language": "python", 33 | "name": "conda-root-py" 34 | }, 35 | "language_info": { 36 | "codemirror_mode": { 37 | "name": "ipython", 38 | "version": 3 39 | }, 40 | "file_extension": ".py", 41 | "mimetype": "text/x-python", 42 | "name": "python", 43 | "nbconvert_exporter": "python", 44 | "pygments_lexer": "ipython3", 45 | "version": "3.5.4" 46 | } 47 | }, 48 | "nbformat": 4, 49 | "nbformat_minor": 2 50 | } 51 | -------------------------------------------------------------------------------- /notebooks/OS_library/OS_Sandbox.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "posix.uname_result(sysname='Darwin', nodename='metis-davidziganto.local', release='16.7.0', version='Darwin Kernel Version 16.7.0: Wed Oct 4 00:17:00 PDT 2017; root:xnu-3789.71.6~1/RELEASE_X86_64', machine='x86_64')" 23 | ] 24 | }, 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "os.uname()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 5, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "1333" 43 | ] 44 | }, 45 | "execution_count": 5, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "os.getpid()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 8, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "ename": "AttributeError", 61 | "evalue": "module 'os' has no attribute 'getresuid'", 62 | "output_type": "error", 63 | "traceback": [ 64 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 65 | "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", 66 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresuid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 67 | "\u001b[0;31mAttributeError\u001b[0m: module 'os' has no attribute 'getresuid'" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "os.getresuid()" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "Python [conda root]", 88 | "language": "python", 89 | "name": "conda-root-py" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.5.4" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /notebooks/Python/Coding_Best_Practices/Complexity_Practice.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "X = np.array([12,6,3,4,5,6,7,8,9]).reshape(3,3)\n", 19 | "Y = np.array([5,8,1,2,6,7,3,0,4,5,9,1]).reshape(3,4)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "array([[12, 6, 3],\n", 31 | " [ 4, 5, 6],\n", 32 | " [ 7, 8, 9]])" 33 | ] 34 | }, 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "output_type": "execute_result" 38 | } 39 | ], 40 | "source": [ 41 | "X" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "array([[5, 8, 1, 2],\n", 53 | " [6, 7, 3, 0],\n", 54 | " [4, 5, 9, 1]])" 55 | ] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "Y" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "rows, cols = X.shape[0], Y.shape[1]\n", 73 | "result = np.zeros(rows * cols).reshape(rows, cols)\n", 74 | "\n", 75 | "# iterate through rows of X\n", 76 | "for i in range(rows):\n", 77 | " # iterate through columns of Y\n", 78 | " for j in range(cols):\n", 79 | " # iterate through rows of Y\n", 80 | " for k in range(len(Y)):\n", 81 | " result[i][j] += X[i][k] * Y[k][j]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "# Q1: What does this block of code do?" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "# Q2: What's the complexity?" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Show Results" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "array([[ 108., 153., 57., 27.],\n", 128 | " [ 74., 97., 73., 14.],\n", 129 | " [ 119., 157., 112., 23.]])" 130 | ] 131 | }, 132 | "execution_count": 6, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "result" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "array([[108, 153, 57, 27],\n", 150 | " [ 74, 97, 73, 14],\n", 151 | " [119, 157, 112, 23]])" 152 | ] 153 | }, 154 | "execution_count": 7, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "np_result = np.dot(X,Y)\n", 161 | "np_result" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 8, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "True" 173 | ] 174 | }, 175 | "execution_count": 8, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "np.allclose(result, np_result)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "# Answers\n", 203 | "\n", 204 | "## Q1: Matrix-Matrix multiplication\n", 205 | "## Q2: if m = rows of X, n = cols of Y and p = rows of Y --> O($mnp$) " 206 | ] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python [conda root]", 212 | "language": "python", 213 | "name": "conda-root-py" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.5.4" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 2 230 | } 231 | -------------------------------------------------------------------------------- /notebooks/Python/Coding_Best_Practices/OOP_How_to_Write_a_Class.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Imports" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "from sklearn.linear_model import LinearRegression\n", 18 | "from sklearn.datasets import load_boston\n", 19 | "from sklearn.metrics import mean_squared_error, r2_score" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## Classes" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "class MyLinearRegression:\n", 36 | " \n", 37 | " def __init__(self, fit_intercept=True):\n", 38 | " self.coef_ = None\n", 39 | " self.intercept_ = None\n", 40 | " self._fit_intercept = fit_intercept\n", 41 | "\n", 42 | " \n", 43 | " def fit(self, X, y):\n", 44 | " \"\"\"Fit model coefficients.\n", 45 | "\n", 46 | " Arguments:\n", 47 | " X: 1D or 2D numpy array \n", 48 | " y: 1D numpy array\n", 49 | " \"\"\"\n", 50 | " \n", 51 | " # check if X is 1D or 2D array\n", 52 | " if len(X.shape) == 1:\n", 53 | " X = X.reshape(-1,1)\n", 54 | " \n", 55 | " # add bias if fit_intercept\n", 56 | " if self._fit_intercept:\n", 57 | " X = np.c_[np.ones(X.shape[0]), X]\n", 58 | " \n", 59 | " # closed form solution\n", 60 | " xTx = np.dot(X.T, X)\n", 61 | " inverse_xTx = np.linalg.inv(xTx)\n", 62 | " xTy = np.dot(X.T, y)\n", 63 | " coef = np.dot(inverse_xTx, xTy)\n", 64 | " \n", 65 | " # set attributes\n", 66 | " if self._fit_intercept:\n", 67 | " self.intercept_ = coef[0]\n", 68 | " self.coef_ = coef[1:]\n", 69 | " else:\n", 70 | " self.intercept_ = 0\n", 71 | " self.coef_ = coef\n", 72 | " \n", 73 | " def predict(self, X):\n", 74 | " \"\"\"Output model prediction.\n", 75 | "\n", 76 | " Arguments:\n", 77 | " X: 1D or 2D numpy array \n", 78 | " \"\"\"\n", 79 | " # check if X is 1D or 2D array\n", 80 | " if len(X.shape) == 1:\n", 81 | " X = X.reshape(-1,1) \n", 82 | " return np.dot(X, self.coef_) + self.intercept_" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "class Metrics:\n", 92 | " \n", 93 | " def __init__(self, X, y, model):\n", 94 | " self.data = X\n", 95 | " self.target = y\n", 96 | " self.model = model\n", 97 | " # degrees of freedom population dep. variable variance\n", 98 | " self._dft = X.shape[0] - 1 \n", 99 | " # degrees of freedom population error variance\n", 100 | " self._dfe = X.shape[0] - X.shape[1] - 1 \n", 101 | " \n", 102 | " def sse(self):\n", 103 | " '''returns sum of squared errors (model vs actual)'''\n", 104 | " squared_errors = (self.target - self.model.predict(self.data)) ** 2\n", 105 | " self.sq_error_ = np.sum(squared_errors)\n", 106 | " return self.sq_error_\n", 107 | " \n", 108 | " def sst(self):\n", 109 | " '''returns total sum of squared errors (actual vs avg(actual))'''\n", 110 | " avg_y = np.mean(self.target)\n", 111 | " squared_errors = (self.target - avg_y) ** 2\n", 112 | " self.sst_ = np.sum(squared_errors)\n", 113 | " return self.sst_\n", 114 | " \n", 115 | " def r_squared(self):\n", 116 | " '''returns calculated value of r^2'''\n", 117 | " self.r_sq_ = 1 - self.sse()/self.sst()\n", 118 | " return self.r_sq_\n", 119 | " \n", 120 | " def adj_r_squared(self):\n", 121 | " '''returns calculated value of adjusted r^2'''\n", 122 | " self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n", 123 | " return self.adj_r_sq_\n", 124 | " \n", 125 | " def mse(self):\n", 126 | " '''returns calculated value of mse'''\n", 127 | " self.mse_ = np.mean( (self.model.predict(self.data) - self.target) ** 2 )\n", 128 | " return self.mse_\n", 129 | " \n", 130 | " def pretty_print_stats(self):\n", 131 | " '''returns report of statistics for a given model object'''\n", 132 | " items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n", 133 | " ('mse:', self.mse()), ('r^2:', self.r_squared()), \n", 134 | " ('adj_r^2:', self.adj_r_squared()))\n", 135 | " for item in items:\n", 136 | " print('{0:8} {1:.4f}'.format(item[0], item[1]))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "## Data" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "boston = load_boston()\n", 153 | "data = boston.data\n", 154 | "target = boston.target" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Model" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" 173 | ] 174 | }, 175 | "execution_count": 5, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "# sklearn\n", 182 | "lr = LinearRegression()\n", 183 | "lr.fit(data, target)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 6, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# from scratch\n", 193 | "mlr = MyLinearRegression()\n", 194 | "mlr.fit(data, target)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "## Sklearn Stats" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 7, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "name": "stdout", 211 | "output_type": "stream", 212 | "text": [ 213 | "mse: 21.8978\n", 214 | "r^2 0.7406\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "print('mse:', round(mean_squared_error(target, lr.predict(data)), 4))\n", 220 | "print('r^2', round(r2_score(target, lr.predict(data)), 4))" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "## Metrics Stats" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 8, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "sse: 11080.2763\n", 240 | "sst: 42716.2954\n", 241 | "mse: 21.8978\n", 242 | "r^2: 0.7406\n", 243 | "adj_r^2: 0.7338\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "metrics = Metrics(data, target, mlr)\n", 249 | "metrics.pretty_print_stats()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "---" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "## Right now we have two separate classes. We'd like to integrate them but how do we do that? \n", 264 | "\n", 265 | "## -- INHERITANCE!" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 9, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "class ModifiedMetrics:\n", 275 | " \n", 276 | " def sse(self):\n", 277 | " '''returns sum of squared errors (model vs actual)'''\n", 278 | " squared_errors = (self.target - self.predict(self.data)) ** 2\n", 279 | " self.sq_error_ = np.sum(squared_errors)\n", 280 | " return self.sq_error_\n", 281 | " \n", 282 | " def sst(self):\n", 283 | " '''returns total sum of squared errors (actual vs avg(actual))'''\n", 284 | " avg_y = np.mean(self.target)\n", 285 | " squared_errors = (self.target - avg_y) ** 2\n", 286 | " self.sst_ = np.sum(squared_errors)\n", 287 | " return self.sst_\n", 288 | " \n", 289 | " def r_squared(self):\n", 290 | " '''returns calculated value of r^2'''\n", 291 | " self.r_sq_ = 1 - self.sse()/self.sst()\n", 292 | " return self.r_sq_\n", 293 | " \n", 294 | " def adj_r_squared(self):\n", 295 | " '''returns calculated value of adjusted r^2'''\n", 296 | " self.adj_r_sq_ = 1 - (self.sse()/self._dfe) / (self.sst()/self._dft)\n", 297 | " return self.adj_r_sq_\n", 298 | " \n", 299 | " def mse(self):\n", 300 | " '''returns calculated value of mse'''\n", 301 | " self.mse_ = np.mean( (self.predict(self.data) - self.target) ** 2 )\n", 302 | " return self.mse_\n", 303 | " \n", 304 | " def pretty_print_stats(self):\n", 305 | " '''returns report of statistics for a given model object'''\n", 306 | " items = ( ('sse:', self.sse()), ('sst:', self.sst()), \n", 307 | " ('mse:', self.mse()), ('r^2:', self.r_squared()), \n", 308 | " ('adj_r^2:', self.adj_r_squared()))\n", 309 | " for item in items:\n", 310 | " print('{0:8} {1:.4f}'.format(item[0], item[1]))" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 10, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "class MyLinearRegressionWithInheritance(ModifiedMetrics):\n", 320 | " \n", 321 | " \n", 322 | " def __init__(self, fit_intercept=True):\n", 323 | " self.coef_ = None\n", 324 | " self.intercept_ = None\n", 325 | " self._fit_intercept = fit_intercept\n", 326 | " \n", 327 | " \n", 328 | " def fit(self, X, y):\n", 329 | " \"\"\"\n", 330 | " Fit model coefficients.\n", 331 | "\n", 332 | " Arguments:\n", 333 | " X: 1D or 2D numpy array \n", 334 | " y: 1D numpy array\n", 335 | " \"\"\"\n", 336 | " \n", 337 | " # training data & ground truth data\n", 338 | " self.data = X\n", 339 | " self.target = y\n", 340 | " \n", 341 | " # degrees of freedom population dep. variable variance \n", 342 | " self._dft = X.shape[0] - 1 \n", 343 | " # degrees of freedom population error variance\n", 344 | " self._dfe = X.shape[0] - X.shape[1] - 1\n", 345 | " \n", 346 | " # check if X is 1D or 2D array\n", 347 | " if len(X.shape) == 1:\n", 348 | " X = X.reshape(-1,1)\n", 349 | " \n", 350 | " # add bias if fit_intercept\n", 351 | " if self._fit_intercept:\n", 352 | " X = np.c_[np.ones(X.shape[0]), X]\n", 353 | " \n", 354 | " # closed form solution\n", 355 | " xTx = np.dot(X.T, X)\n", 356 | " inverse_xTx = np.linalg.inv(xTx)\n", 357 | " xTy = np.dot(X.T, y)\n", 358 | " coef = np.dot(inverse_xTx, xTy)\n", 359 | " \n", 360 | " # set attributes\n", 361 | " if self._fit_intercept:\n", 362 | " self.intercept_ = coef[0]\n", 363 | " self.coef_ = coef[1:]\n", 364 | " else:\n", 365 | " self.intercept_ = 0\n", 366 | " self.coef_ = coef\n", 367 | " \n", 368 | " def predict(self, X):\n", 369 | " \"\"\"Output model prediction.\n", 370 | "\n", 371 | " Arguments:\n", 372 | " X: 1D or 2D numpy array \n", 373 | " \"\"\"\n", 374 | " # check if X is 1D or 2D array\n", 375 | " if len(X.shape) == 1:\n", 376 | " X = X.reshape(-1,1) \n", 377 | " return np.dot(X, self.coef_) + self.intercept_" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 11, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "# from scratch w/inheritance\n", 387 | "mlri = MyLinearRegressionWithInheritance()\n", 388 | "mlri.fit(data, target)" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 12, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "name": "stdout", 398 | "output_type": "stream", 399 | "text": [ 400 | "sse: 11080.2763\n", 401 | "sst: 42716.2954\n", 402 | "mse: 21.8978\n", 403 | "r^2: 0.7406\n", 404 | "adj_r^2: 0.7338\n" 405 | ] 406 | } 407 | ], 408 | "source": [ 409 | "mlri.pretty_print_stats()" 410 | ] 411 | } 412 | ], 413 | "metadata": { 414 | "kernelspec": { 415 | "display_name": "Python [conda root]", 416 | "language": "python", 417 | "name": "conda-root-py" 418 | }, 419 | "language_info": { 420 | "codemirror_mode": { 421 | "name": "ipython", 422 | "version": 3 423 | }, 424 | "file_extension": ".py", 425 | "mimetype": "text/x-python", 426 | "name": "python", 427 | "nbconvert_exporter": "python", 428 | "pygments_lexer": "ipython3", 429 | "version": "3.5.4" 430 | } 431 | }, 432 | "nbformat": 4, 433 | "nbformat_minor": 2 434 | } 435 | -------------------------------------------------------------------------------- /notebooks/Python/NumPy/NP_argwhere_isin.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "np.argwhere(X)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "np.isin(X, 1)" 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python [conda root]", 25 | "language": "python", 26 | "name": "conda-root-py" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.5.4" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 2 43 | } 44 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Alphabet_Magnets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Sal's classroom has a bag of alphabet magnets. She wants to know if she can spell her friend's name using the letters in the bag. \n", 8 | "\n", 9 | "### Write a function called *CanYouSpell* that will take a list of letters and a name and print out yes if the name can be spelled and no otherwise.\n", 10 | "\n", 11 | "> CanYouSpell(['y','n','p','g','n','l'],\"lynn\") would print YES \n", 12 | "> CanYouSpell(['y','n','p','g','l'],\"lynn\") would print NO" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "def CanYouSpell(list_of_letters, name):\n", 24 | " '''takes list of letters and a name, then returns Boolean showing if name can be spelled'''\n", 25 | " \n", 26 | " # type checking\n", 27 | " assert type(list_of_letters) == list, 'list_of_letters input must be a list'\n", 28 | " assert type(name) == str, 'name input must be a string'\n", 29 | " \n", 30 | " # import\n", 31 | " from collections import defaultdict\n", 32 | " \n", 33 | " # defaultdict objects\n", 34 | " defdict1 = defaultdict(int)\n", 35 | " defdict2 = defaultdict(int)\n", 36 | " \n", 37 | " # main logic\n", 38 | " if len(list_of_letters) < len(name):\n", 39 | " return False\n", 40 | " else:\n", 41 | " # count letter occurrences in list_of_letters\n", 42 | " for letter in list_of_letters:\n", 43 | " defdict1[letter] += 1\n", 44 | " # count letter occurrences in name\n", 45 | " for letter in name:\n", 46 | " defdict2[letter] += 1\n", 47 | " # check if possible to spell name\n", 48 | " for k, _ in defdict2.items():\n", 49 | " if defdict1[k] < defdict2[k]:\n", 50 | " return False \n", 51 | " return True" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "---" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "True" 70 | ] 71 | }, 72 | "execution_count": 2, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "# example 1\n", 79 | "CanYouSpell(['y','n','p','g','n','l'],\"lynn\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "False" 91 | ] 92 | }, 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "# example 2\n", 100 | "CanYouSpell(['y','n','p','g','l'],\"lynn\")" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "False" 112 | ] 113 | }, 114 | "execution_count": 4, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "# example: too few letters\n", 121 | "CanYouSpell(['y','n','p','g'],\"lynn\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "True" 133 | ] 134 | }, 135 | "execution_count": 5, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "# fun example 1\n", 142 | "CanYouSpell(list('oranges and apples'), 'apple')" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "False" 154 | ] 155 | }, 156 | "execution_count": 6, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "# fun example 2\n", 163 | "CanYouSpell(list('oranges and apps'), 'apple')" 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python [conda root]", 170 | "language": "python", 171 | "name": "conda-root-py" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.5.4" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 2 188 | } 189 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Bubble_Sort.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Bubble Sort\n", 17 | "Bubble sort is described in detail on [Wikipedia](https://en.wikipedia.org/wiki/Bubble_sort).\n", 18 | "\n", 19 | "Its complexity is $O(n^2)$." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "def bubble_sort(array, verbose=False):\n", 29 | " '''The bubble sort algorithm.\n", 30 | " \n", 31 | " INPUT: array or list\n", 32 | " OUTPUT: sorted array or list\n", 33 | " '''\n", 34 | " \n", 35 | " # setup\n", 36 | " unsorted = True\n", 37 | " list_length = len(array) - 1\n", 38 | " \n", 39 | " # main logic\n", 40 | " while unsorted:\n", 41 | " changes = 0\n", 42 | " for i in range(list_length):\n", 43 | " if mylist[i] > mylist[i+1]:\n", 44 | " mylist[i], mylist[i+1] = mylist[i+1], mylist[i]\n", 45 | " changes += 1\n", 46 | " if not changes:\n", 47 | " unsorted = False\n", 48 | " if verbose:\n", 49 | " print(mylist)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "array([39, 40, 35, 46, 11, 33, 23, 14, 38, 32, 1, 10, 16, 34, 26, 22, 23,\n", 61 | " 10, 12, 11, 10, 0, 6, 10, 31])" 62 | ] 63 | }, 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "mylist = np.random.randint(0, 50, 25)\n", 71 | "mylist" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "[39 35 40 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 46]\n", 84 | "[35 39 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 40 46]\n", 85 | "[35 11 33 23 14 38 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 39 40 46]\n", 86 | "[11 33 23 14 35 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 38 39 40 46]\n", 87 | "[11 23 14 33 32 1 10 16 34 26 22 23 10 12 11 10 0 6 10 31 35 38 39 40 46]\n", 88 | "[11 14 23 32 1 10 16 33 26 22 23 10 12 11 10 0 6 10 31 34 35 38 39 40 46]\n", 89 | "[11 14 23 1 10 16 32 26 22 23 10 12 11 10 0 6 10 31 33 34 35 38 39 40 46]\n", 90 | "[11 14 1 10 16 23 26 22 23 10 12 11 10 0 6 10 31 32 33 34 35 38 39 40 46]\n", 91 | "[11 1 10 14 16 23 22 23 10 12 11 10 0 6 10 26 31 32 33 34 35 38 39 40 46]\n", 92 | "[ 1 10 11 14 16 22 23 10 12 11 10 0 6 10 23 26 31 32 33 34 35 38 39 40 46]\n", 93 | "[ 1 10 11 14 16 22 10 12 11 10 0 6 10 23 23 26 31 32 33 34 35 38 39 40 46]\n", 94 | "[ 1 10 11 14 16 10 12 11 10 0 6 10 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 95 | "[ 1 10 11 14 10 12 11 10 0 6 10 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 96 | "[ 1 10 11 10 12 11 10 0 6 10 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 97 | "[ 1 10 10 11 11 10 0 6 10 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 98 | "[ 1 10 10 11 10 0 6 10 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 99 | "[ 1 10 10 10 0 6 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 100 | "[ 1 10 10 0 6 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 101 | "[ 1 10 0 6 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 102 | "[ 1 0 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 103 | "[ 0 1 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n", 104 | "[ 0 1 6 10 10 10 10 11 11 12 14 16 22 23 23 26 31 32 33 34 35 38 39 40 46]\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "bubble_sort(mylist, verbose=True)" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python [conda root]", 116 | "language": "python", 117 | "name": "conda-root-py" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.5.4" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Cryptogram_Puzzle.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction\n", 8 | "\n", 9 | "Objective #1: Find a solution for this problem:\n", 10 | "\n", 11 | "$\\frac{A}{BC} + \\frac{D}{EF} + \\frac{G}{HI} = 1$\n", 12 | "\n", 13 | "Objective #2: Enumerate all solutions to the problem." 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## Setup" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": { 27 | "collapsed": true 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "import numpy as np" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "array([1, 2, 3, 4, 5, 6, 7, 8, 9])" 43 | ] 44 | }, 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "digits = np.arange(1,10)\n", 52 | "digits" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 7, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "def checker(arr):\n", 64 | " return arr[0]/(arr[1]*arr[2]) + arr[3]/(arr[4]*arr[5]) + arr[6]/(arr[7]*arr[8])" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 27, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "def pretty_print(count, permute):\n", 76 | " print('A={} B={} C={} D={} E={} F={} G={} H={} I={}'.format(permute[0], permute[1],\n", 77 | " permute[2], permute[3],\n", 78 | " permute[4], permute[5], \n", 79 | " permute[6], permute[7],\n", 80 | " permute[8]))\n", 81 | " print('\\nSearches:', count)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Objective #1: find a solution" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 12, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "def naive_search():\n", 100 | " '''random search using permutations - returns number of searches and solution.'''\n", 101 | " import numpy as np\n", 102 | " digits = np.arange(1,10)\n", 103 | " count = 0\n", 104 | " permute = np.random.permutation(digits)\n", 105 | " while checker(permute) != 1:\n", 106 | " count += 1\n", 107 | " permute = np.random.permutation(digits)\n", 108 | " return count, permute" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 21, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "9455\n", 121 | "[7 4 2 1 3 6 5 8 9]\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "searches, array = naive_search()\n", 127 | "print(searches)\n", 128 | "print(array)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 28, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "A=7 B=4 C=2 D=1 E=3 F=6 G=5 H=8 I=9\n", 141 | "\n", 142 | "Searches: 9455\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "pretty_print(searches, array)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "## Monte Carlo Expectation" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 61, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "count = []\n", 166 | "solution = []\n", 167 | "for _ in range(int(1e3)):\n", 168 | " searches, array = naive_search()\n", 169 | " count.append(searches) \n", 170 | " solution.append(array)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 62, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "7434.53" 182 | ] 183 | }, 184 | "execution_count": 62, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "sum(count)/len(count)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## Next Steps\n", 198 | "\n", 199 | "Can you find a brute force approach?\n", 200 | "\n", 201 | "Can you find a faster method?" 202 | ] 203 | } 204 | ], 205 | "metadata": { 206 | "kernelspec": { 207 | "display_name": "Python [conda root]", 208 | "language": "python", 209 | "name": "conda-root-py" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 2 226 | } 227 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Dot_Product.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "def find_w(a, b):\n", 23 | " '''function that returns solution to a*w=b, if it exists'''\n", 24 | " # check if 1D or 2D array\n", 25 | " if len(a.shape) != 2:\n", 26 | " # reshape if 1D array, else throws error\n", 27 | " a = a.reshape(-1,1)\n", 28 | " try:\n", 29 | " return np.linalg.solve(a,b)\n", 30 | " except:\n", 31 | " print('Warning: Singular Matrix') ## case where a=0 and b!=0\n", 32 | " else:\n", 33 | " # num rows < num columns = infinite solutions\n", 34 | " if a.shape[0] < a.shape[1]:\n", 35 | " print('Underdetermined System: infinite or no solutions')\n", 36 | " else:\n", 37 | " try:\n", 38 | " return np.linalg.solve(a,b)\n", 39 | " except:\n", 40 | " print('0 solutions')" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Example Problem" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 3, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "[[5 3 7]\n", 60 | " [2 4 1]]\n", 61 | "\n", 62 | "[537 241]\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "X = np.array([5,3,7,2,4,1]).reshape(2,3)\n", 68 | "y = np.array([537, 241])\n", 69 | "print(X)\n", 70 | "print()\n", 71 | "print(y)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "Underdetermined System: infinite or no solutions\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "find_w(X,y)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "** *This is an underdetermined systems as there are 2 equations and 3 variables. Therefore, there exist an infinite number of solutions.* **" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Example A" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "array([ 4.])" 114 | ] 115 | }, 116 | "execution_count": 5, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "X = np.array([2])\n", 123 | "y = np.array([8])\n", 124 | "find_w(X,y)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "### Example B" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "Warning: Singular Matrix\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "X = np.array([0])\n", 149 | "y = np.array([8])\n", 150 | "find_w(X,y)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "** *No solution. Zero times anything is always zero!* **" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### Example C" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 7, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "Warning: Singular Matrix\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "X = np.array([2,4])\n", 182 | "y = np.array([8])\n", 183 | "find_w(X,y)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "np.linalg.solve expects square matrix so disregard message. Infinite solutions for this one. Underdetermined." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### Example D" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 8, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": [ 208 | "array([-2., 3.])" 209 | ] 210 | }, 211 | "execution_count": 8, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "X = np.array([2,4,0,1]).reshape(2,2)\n", 218 | "y = np.array([8,3])\n", 219 | "find_w(X,y)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "### Example E" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 9, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "name": "stdout", 236 | "output_type": "stream", 237 | "text": [ 238 | "0 solutions\n" 239 | ] 240 | } 241 | ], 242 | "source": [ 243 | "X = np.array([2,4,0,1,9,5]).reshape(3,2)\n", 244 | "y = np.array([8,3,1])\n", 245 | "find_w(X,y)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "### Example F" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 10, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/plain": [ 263 | "array([ 2., 0.])" 264 | ] 265 | }, 266 | "execution_count": 10, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "X = np.array([2,2,3,3]).reshape(2,2)\n", 273 | "y = np.array([4,6])\n", 274 | "find_w(X,y)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "Actually infinite solutions because same equation twice, just with different coefficients. Really have 1 unique equation with 2 unknowns. Underdetermined. Need to catch this edge case in code." 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "### Example G" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "Can't be done. Can't multiply text * numbers." 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### Example H" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 11, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "array([ 8., 6.])" 314 | ] 315 | }, 316 | "execution_count": 11, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "X = np.array([1,0,0,1]).reshape(2,2)\n", 323 | "y = np.array([8,6])\n", 324 | "find_w(X,y)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### Example I" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 12, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "Underdetermined System: infinite or no solutions\n" 344 | ] 345 | } 346 | ], 347 | "source": [ 348 | "X = np.array([1,1,0,1,0,1]).reshape(2,3)\n", 349 | "y = np.array([8,6])\n", 350 | "find_w(X,y)" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "### Example J" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": 13, 363 | "metadata": {}, 364 | "outputs": [ 365 | { 366 | "data": { 367 | "text/plain": [ 368 | "array([ 8., -2.])" 369 | ] 370 | }, 371 | "execution_count": 13, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "X = np.array([1,0,1,1]).reshape(2,2)\n", 378 | "y = np.array([8,6])\n", 379 | "find_w(X,y)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "### NOTES\n", 387 | "\n", 388 | "An **underdetermined** system is one in which there are fewer equations than unknowns. The result is either 0 or infinite solutions. There are 0 solutions when equations are inconsistent. For example, x+y=1 and x+y=2. That can't possibly be because the same equation gives different outputs. There are infinite solutions otherwise.\n", 389 | "\n", 390 | "An **overdetermined** system is one in which there are more equations than unknowns. Be wary of inconsistent equations (x+y=1 and x+y=2) that will cause this system to have 0 solutions. Otherise a unique solution exists.\n", 391 | "\n", 392 | "**Takeaway:** *so long as the number of unique equations is at least the same as the number of variables AND there are no inconsistencies, a unique solution exists.*" 393 | ] 394 | } 395 | ], 396 | "metadata": { 397 | "kernelspec": { 398 | "display_name": "Python [conda root]", 399 | "language": "python", 400 | "name": "conda-root-py" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 3 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython3", 412 | "version": "3.5.4" 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 2 417 | } 418 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Factorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Factorial(n) written as *n!* is the product of all the numbers up to *n*. \n", 15 | "\n", 16 | "8! = 8 x 7 x 6 x 5 x 4 x 3 x 2 x 1 = 40320" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Instructions" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Write a function called **factorial** to calculate the factorial of a given number." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Solutions" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 1, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import numpy as np" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "#### No Recursion" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 11, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "def factorial(n):\n", 63 | " '''returns factorial of number n w/out recursion'''\n", 64 | " \n", 65 | " # error checking\n", 66 | " assert n >= 0, 'n must be a nonnegative integer'\n", 67 | " assert type(n) == int, 'n must be a nonnegative integer'\n", 68 | " \n", 69 | " out = 1\n", 70 | " for value in range(1, n+1):\n", 71 | " out *= value\n", 72 | " return out" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 58, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "n: 0 | result: 1\n", 85 | "n: 1 | result: 1\n", 86 | "n: 2 | result: 2\n", 87 | "n: 3 | result: 6\n", 88 | "n: 4 | result: 24\n", 89 | "n: 5 | result: 120\n", 90 | "n: 6 | result: 720\n", 91 | "n: 7 | result: 5040\n", 92 | "n: 8 | result: 40320\n", 93 | "n: 9 | result: 362880\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "for i in range(10):\n", 99 | " print('n: {} | result: {}'.format(i, factorial(i)))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "#### Recursion" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 47, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "def recursive_factorial(n):\n", 116 | " '''returns factorial of number n w/recursion'''\n", 117 | " \n", 118 | " # error checking\n", 119 | " assert n >= 0, 'n must be a nonnegative integer'\n", 120 | " assert type(n) == int, 'n must be a nonnegative integer'\n", 121 | " \n", 122 | " # main logic\n", 123 | " if n == 0: \n", 124 | " return 1\n", 125 | " else:\n", 126 | " output = n * recursive_factorial(n - 1) \n", 127 | " return output" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 57, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "n: 0 | result: 1\n", 140 | "n: 1 | result: 1\n", 141 | "n: 2 | result: 2\n", 142 | "n: 3 | result: 6\n", 143 | "n: 4 | result: 24\n", 144 | "n: 5 | result: 120\n", 145 | "n: 6 | result: 720\n", 146 | "n: 7 | result: 5040\n", 147 | "n: 8 | result: 40320\n", 148 | "n: 9 | result: 362880\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "for i in range(10):\n", 154 | " print('n: {} | result: {}'.format(i, recursive_factorial(i)))" 155 | ] 156 | } 157 | ], 158 | "metadata": { 159 | "kernelspec": { 160 | "display_name": "Python [conda root]", 161 | "language": "python", 162 | "name": "conda-root-py" 163 | }, 164 | "language_info": { 165 | "codemirror_mode": { 166 | "name": "ipython", 167 | "version": 3 168 | }, 169 | "file_extension": ".py", 170 | "mimetype": "text/x-python", 171 | "name": "python", 172 | "nbconvert_exporter": "python", 173 | "pygments_lexer": "ipython3", 174 | "version": "3.5.4" 175 | } 176 | }, 177 | "nbformat": 4, 178 | "nbformat_minor": 2 179 | } 180 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Fibonacci.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "From Wikipedia:\n", 15 | "\n", 16 | ">In mathematics, the Fibonacci numbers are the numbers in the following integer sequence, called the Fibonacci sequence, and characterized by the fact that every number after the first two is the sum of the two preceding ones:\n", 17 | "\n", 18 | ">0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, ...\n", 19 | "\n", 20 | "\n", 21 | ">By definition, the first two numbers in the Fibonacci sequence are either 1 and 1, or 0 and 1, depending on the chosen starting point of the sequence, and each subsequent number is the sum of the previous two.\n", 22 | "\n", 23 | ">The sequence Fn of Fibonacci numbers is defined by the recurrence relation:\n", 24 | "\n", 25 | ">**$F_{n}$ = $F_{n-1}$ + $F_{n-2}$**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Instructions" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Write a function called **fibonacci** to calculate the fibonnaci sequence of a given starting number." 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Solution" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 1, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import numpy as np" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "#### Recursion" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 2, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "def fibonacci(n):\n", 72 | " '''with recursion'''\n", 73 | " \n", 74 | " # error checking\n", 75 | " assert n >= 0, 'n must be a nonnegative integer'\n", 76 | " assert type(n) == int, 'n must be a nonnegative integer'\n", 77 | " \n", 78 | " # main logic\n", 79 | " if n == 0: \n", 80 | " return 0\n", 81 | " elif n == 1: \n", 82 | " return 1\n", 83 | " else: \n", 84 | " return fibonacci(n-1) + fibonacci(n-2)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "n: 0 | result: 0\n", 97 | "n: 1 | result: 1\n", 98 | "n: 2 | result: 1\n", 99 | "n: 3 | result: 2\n", 100 | "n: 4 | result: 3\n", 101 | "n: 5 | result: 5\n", 102 | "n: 6 | result: 8\n", 103 | "n: 7 | result: 13\n", 104 | "n: 8 | result: 21\n", 105 | "n: 9 | result: 34\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "for i in range(10):\n", 111 | " print('n: {} | result: {}'.format(i, fibonacci(i)))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## What's going on here?" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "![Fibonacci](http://www.idryman.org/images/graphviz/g-ff6b576e3a2600dd84a36f0059875404.svg)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## A little more detail..." 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "![Fibonacci 2](http://zvzzt.files.wordpress.com/2014/05/fibonacci1.png)" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "Python [conda root]", 146 | "language": "python", 147 | "name": "conda-root-py" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.5.4" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 2 164 | } 165 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/GCD_Problem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "def naive_gcd(num1, num2):\n", 10 | " '''enumerate divisors and returns largest of intersection(num1, num2).'''\n", 11 | " \n", 12 | " assert num1 > 0, \"num1 must be positive.\"\n", 13 | " assert num2 > 0, \"num2 must be positive.\"\n", 14 | " \n", 15 | " divisors1 = []\n", 16 | " divisors2 = []\n", 17 | " \n", 18 | " count = 0\n", 19 | " \n", 20 | " for divisor in range(1, num1):\n", 21 | " count += 1 ## just to keep tally\n", 22 | " if num1 % divisor == 0:\n", 23 | " divisors1.append(divisor)\n", 24 | " \n", 25 | " for divisor in range(1, num2):\n", 26 | " count += 1 ## just to keep tally\n", 27 | " if num2 % divisor == 0:\n", 28 | " divisors2.append(divisor)\n", 29 | " \n", 30 | " gcd = max(set(divisors1) & set(divisors2))\n", 31 | " print(\"GCD: {}\\nCount: {}\".format(gcd, count))" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "GCD: 4\n", 44 | "Count: 16098\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "naive_gcd(1236, 14864)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def gcd(num1, num2):\n", 59 | " '''returns the greatest common divisor'''\n", 60 | " \n", 61 | " assert num1 >= 0, \"num1 must be nonnegative.\"\n", 62 | " assert num2 >= 0, \"num2 must be nonnegative.\"\n", 63 | " \n", 64 | " count = 0\n", 65 | " \n", 66 | " while num1 != 0:\n", 67 | " count += 1 ## just to keep tally\n", 68 | " gcd = num1\n", 69 | " num1 = num2 % num1\n", 70 | " num2 = gcd\n", 71 | " print(\"GCD: {}\\nCount: {}\".format(gcd, count))" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "GCD: 4\n", 84 | "Count: 6\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "gcd(1236,14864)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "---" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "GCD: 1\n", 109 | "Count: 333101\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "naive_gcd(134526, 198577)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 6, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "name": "stdout", 124 | "output_type": "stream", 125 | "text": [ 126 | "GCD: 1\n", 127 | "Count: 8\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "gcd(134526, 198577)" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "Python [conda root]", 139 | "language": "python", 140 | "name": "conda-root-py" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.5.4" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Moving_Average_Problem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "#### Show Step-by-Step" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "array([ 2, 4, 8, 16, 7, 99])" 30 | ] 31 | }, 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "test = np.array([2,4,8,16,7,99])\n", 39 | "test" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "2.0\n", 52 | "3.0\n", 53 | "4.66666666667\n", 54 | "7.5\n", 55 | "7.4\n", 56 | "22.6666666667\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "for i in range(len(test)):\n", 62 | " if i == 0:\n", 63 | " print(np.mean(test[0]))\n", 64 | " else:\n", 65 | " print(np.mean(test[:i+1]))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "#### Function for Rolling Mean" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "def rolling_mean(array, verbose=False):\n", 84 | " '''computes the mean incrementally'''\n", 85 | " \n", 86 | " assert type(array) == np.ndarray, \"input must be of type np.ndarray\"\n", 87 | " \n", 88 | " mv_avg = 0\n", 89 | " num_elements = len(array)\n", 90 | " \n", 91 | " for i, elem in enumerate(array):\n", 92 | " mv_avg += (elem-mv_avg)/(i+1)\n", 93 | " if verbose:\n", 94 | " print(i+1, mv_avg)\n", 95 | " return mv_avg" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "#### Example" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "1 2.0\n", 115 | "2 3.0\n", 116 | "3 4.66666666667\n", 117 | "4 7.5\n", 118 | "5 7.4\n", 119 | "6 22.6666666667\n" 120 | ] 121 | }, 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "22.666666666666664" 126 | ] 127 | }, 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "rolling_mean(test, verbose=True)" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python [conda root]", 141 | "language": "python", 142 | "name": "conda-root-py" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 3 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython3", 154 | "version": "3.5.4" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 2 159 | } 160 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Project_Euler_Problem_8_with_Deque.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Objective" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook shows one way to solve [Project Euler Problem #8](https://projecteuler.net/problem=8) using a great data structure called a deque." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Imports" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from collections import deque\n", 31 | "import numpy as np" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Data" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 2, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "num = '''73167176531330624919225119674426574742355349194934\n", 48 | "96983520312774506326239578318016984801869478851843\n", 49 | "85861560789112949495459501737958331952853208805511\n", 50 | "12540698747158523863050715693290963295227443043557\n", 51 | "66896648950445244523161731856403098711121722383113\n", 52 | "62229893423380308135336276614282806444486645238749\n", 53 | "30358907296290491560440772390713810515859307960866\n", 54 | "70172427121883998797908792274921901699720888093776\n", 55 | "65727333001053367881220235421809751254540594752243\n", 56 | "52584907711670556013604839586446706324415722155397\n", 57 | "53697817977846174064955149290862569321978468622482\n", 58 | "83972241375657056057490261407972968652414535100474\n", 59 | "82166370484403199890008895243450658541227588666881\n", 60 | "16427171479924442928230863465674813919123162824586\n", 61 | "17866458359124566529476545682848912883142607690042\n", 62 | "24219022671055626321111109370544217506941658960408\n", 63 | "07198403850962455444362981230987879927244284909188\n", 64 | "84580156166097919133875499200524063689912560717606\n", 65 | "05886116467109405077541002256983155200055935729725\n", 66 | "71636269561882670428252483600823257530420752963450'''" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Function" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "def max_prod(numString, numDigits=4):\n", 83 | " '''takes string of numbers and outputs greatest product exactly numDigits long'''\n", 84 | " \n", 85 | " numString = numString.replace('\\n','') ## remove newline characters\n", 86 | " dq = deque(maxlen=numDigits)\n", 87 | " max_prod = 0\n", 88 | " \n", 89 | " for num in numString:\n", 90 | " dq.append(num)\n", 91 | " if len(dq) == dq.maxlen:\n", 92 | " current = np.prod([int(val) for val in dq])\n", 93 | " if current > max_prod:\n", 94 | " max_prod = current\n", 95 | " return max_prod" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "## Example #1" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 4, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "5832" 114 | ] 115 | }, 116 | "execution_count": 4, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "max_prod(num, numDigits=4)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Example #2" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 5, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "23514624000" 141 | ] 142 | }, 143 | "execution_count": 5, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "max_prod(num, numDigits=13)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 6, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "12.1 ms ± 802 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "%timeit max_prod(num, numDigits=13)" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python [conda root]", 173 | "language": "python", 174 | "name": "conda-root-py" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.5.4" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 2 191 | } 192 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Reverse_String.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Given a string, write a function called *reverseString* to reverse it. Do this using a loop, if possible." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "def reverseString(string):\n", 17 | " '''takes a string and returns it in reverse order (w/for loop)'''\n", 18 | " \n", 19 | " # type checking\n", 20 | " assert type(string) == str, 'input must be a string'\n", 21 | " \n", 22 | " # main logic\n", 23 | " string_length = len(string)-1\n", 24 | " new_string = ''\n", 25 | " for i in range(string_length, -1, -1):\n", 26 | " new_string += string[i]\n", 27 | " return new_string\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "def reverseString2(string):\n", 39 | " '''takes a string and returns it in reverse order (w/slicing)'''\n", 40 | " \n", 41 | " # type checking\n", 42 | " assert type(string) == str, 'input must be a string'\n", 43 | " \n", 44 | " # main logic\n", 45 | " return string[::-1]" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "---" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "'this is an example string'" 64 | ] 65 | }, 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "# example string\n", 73 | "s = 'this is an example string'\n", 74 | "s" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "'gnirts elpmaxe na si siht'" 86 | ] 87 | }, 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "# 1st function\n", 95 | "reverseString(s)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 5, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "'gnirts elpmaxe na si siht'" 107 | ] 108 | }, 109 | "execution_count": 5, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "# 2nd function\n", 116 | "reverseString2(s)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "True" 128 | ] 129 | }, 130 | "execution_count": 6, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "# test for equality\n", 137 | "reverseString(s) == reverseString2(s)" 138 | ] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python [conda root]", 144 | "language": "python", 145 | "name": "conda-root-py" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.5.4" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 2 162 | } 163 | -------------------------------------------------------------------------------- /notebooks/Python/Programming_Problems/Rotation_Problem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "def rotation(num1, num2, verbose=False):\n", 12 | " '''tests whether num2 is a rotated version of num1'''\n", 13 | " \n", 14 | " num1 = list(str(num1))\n", 15 | " num2 = list(str(num2))\n", 16 | " \n", 17 | " assert len(num1) == len(num2), \"numbers are different lengths\"\n", 18 | " \n", 19 | " for i, _ in enumerate(num1):\n", 20 | " if num1 == num2:\n", 21 | " print('{} rotations to the right'.format(i))\n", 22 | " return True\n", 23 | " else:\n", 24 | " pop = list(num2.pop())\n", 25 | " num2 = pop + num2\n", 26 | " if verbose:\n", 27 | " print(num1)\n", 28 | " print(num2)\n", 29 | " print('-'*25)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": { 36 | "collapsed": true 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "num1 = 12345\n", 41 | "num2 = 51234" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "['1', '2', '3', '4', '5']\n", 54 | "['4', '5', '1', '2', '3']\n", 55 | "-------------------------\n", 56 | "['1', '2', '3', '4', '5']\n", 57 | "['3', '4', '5', '1', '2']\n", 58 | "-------------------------\n", 59 | "['1', '2', '3', '4', '5']\n", 60 | "['2', '3', '4', '5', '1']\n", 61 | "-------------------------\n", 62 | "['1', '2', '3', '4', '5']\n", 63 | "['1', '2', '3', '4', '5']\n", 64 | "-------------------------\n", 65 | "4 rotations to the right\n" 66 | ] 67 | }, 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "True" 72 | ] 73 | }, 74 | "execution_count": 3, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "rotation(num1, num2, verbose=True)" 81 | ] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Python [conda root]", 87 | "language": "python", 88 | "name": "conda-root-py" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 3 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython3", 100 | "version": "3.5.4" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 2 105 | } 106 | -------------------------------------------------------------------------------- /notebooks/Python/Python_Internals/Arrays_vs_Lists.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "arr = np.arange(1e7)\n", 23 | "lst = arr.tolist()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/plain": [ 34 | "numpy.ndarray" 35 | ] 36 | }, 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "type(arr)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 4, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "list" 55 | ] 56 | }, 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "type(lst)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Empirical Testing (Simple)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "#### Show Equivalent (Simple)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "0" 89 | ] 90 | }, 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "sum(arr[arr > 1e5] != [el for el in lst if el > 1e5])" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "#### Timing (Simple)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "name": "stdout", 114 | "output_type": "stream", 115 | "text": [ 116 | "42 ms ± 4.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "%timeit arr[arr > 1e5] " 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 7, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "1.82 s ± 38.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "%timeit [el for el in lst if el > 1e5]" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## Empirical Testing (Complex)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "#### Show Equivalent" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "0" 164 | ] 165 | }, 166 | "execution_count": 8, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "sum(arr[arr < 1e5][arr[arr < 1e5] > 1e4] != [el for el in lst if el > 1e4 and el < 1e5])" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "#### Timing (Complex)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 9, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "18.9 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "%timeit arr[arr < 1e5][arr[arr < 1e5] > 1e4]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 10, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "2.03 s ± 56.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "%timeit [el for el in lst if el > 1e4 and el < 1e5]" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python [conda root]", 220 | "language": "python", 221 | "name": "conda-root-py" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.4" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /notebooks/Python/Python_Internals/Working_with_Bits.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": [ 22 | "116" 23 | ] 24 | }, 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "np.bitwise_xor(23,99)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "'1110100'" 43 | ] 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "np.binary_repr(116)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python [conda root]", 67 | "language": "python", 68 | "name": "conda-root-py" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.5.4" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 2 85 | } 86 | -------------------------------------------------------------------------------- /notebooks/Python/Python_Internals/listexp_vs_genexp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from itertools import cycle" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "lst = [x for x in range(int(1e6))]" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "499999500000\n", 35 | "499999500000\n", 36 | "499999500000\n", 37 | "499999500000\n", 38 | "499999500000\n", 39 | "499999500000\n", 40 | "499999500000\n", 41 | "499999500000\n", 42 | "499999500000\n", 43 | "499999500000\n", 44 | "499999500000\n", 45 | "499999500000\n", 46 | "499999500000\n", 47 | "499999500000\n", 48 | "499999500000\n", 49 | "499999500000\n", 50 | "499999500000\n", 51 | "499999500000\n", 52 | "499999500000\n", 53 | "499999500000\n", 54 | "499999500000\n", 55 | "499999500000\n", 56 | "499999500000\n", 57 | "499999500000\n", 58 | "499999500000\n", 59 | "499999500000\n", 60 | "499999500000\n", 61 | "499999500000\n", 62 | "499999500000\n", 63 | "499999500000\n", 64 | "499999500000\n", 65 | "499999500000\n", 66 | "499999500000\n", 67 | "499999500000\n", 68 | "499999500000\n", 69 | "499999500000\n", 70 | "499999500000\n", 71 | "499999500000\n", 72 | "499999500000\n", 73 | "499999500000\n", 74 | "499999500000\n", 75 | "499999500000\n", 76 | "499999500000\n", 77 | "499999500000\n", 78 | "499999500000\n", 79 | "499999500000\n", 80 | "499999500000\n", 81 | "499999500000\n", 82 | "499999500000\n", 83 | "499999500000\n", 84 | "499999500000\n", 85 | "499999500000\n", 86 | "499999500000\n", 87 | "499999500000\n", 88 | "499999500000\n", 89 | "499999500000\n", 90 | "499999500000\n", 91 | "499999500000\n", 92 | "499999500000\n", 93 | "499999500000\n", 94 | "499999500000\n", 95 | "499999500000\n", 96 | "499999500000\n", 97 | "499999500000\n", 98 | "499999500000\n", 99 | "499999500000\n", 100 | "499999500000\n", 101 | "499999500000\n", 102 | "499999500000\n", 103 | "499999500000\n", 104 | "499999500000\n", 105 | "499999500000\n", 106 | "499999500000\n", 107 | "499999500000\n", 108 | "499999500000\n", 109 | "499999500000\n", 110 | "499999500000\n", 111 | "499999500000\n", 112 | "499999500000\n", 113 | "499999500000\n", 114 | "499999500000\n", 115 | "148 ms ± 2.65 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "%%timeit\n", 121 | "mysum = 0\n", 122 | "for el in lst:\n", 123 | " mysum += el\n", 124 | "print(mysum)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 4, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "gen = cycle((x for x in range(int(1e6))))" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 5, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "499999500000\n", 146 | "499999500000\n", 147 | "499999500000\n", 148 | "499999500000\n", 149 | "499999500000\n", 150 | "499999500000\n", 151 | "499999500000\n", 152 | "499999500000\n", 153 | "279 ms ± 17.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 154 | ] 155 | } 156 | ], 157 | "source": [ 158 | "%%timeit\n", 159 | "mysum = 0\n", 160 | "for i in lst:\n", 161 | " mysum += next(gen)\n", 162 | "print(mysum)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 6, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "def gen():\n", 174 | " while True:\n", 175 | " yield 1" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 7, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "gen2 = gen()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "1\n", 197 | "1\n", 198 | "1\n", 199 | "1\n", 200 | "1\n", 201 | "1\n", 202 | "1\n", 203 | "1\n", 204 | "1\n", 205 | "1\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "for i in range(10):\n", 211 | " print(next(gen2))" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 9, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "1000000\n", 224 | "1000000\n", 225 | "1000000\n", 226 | "1000000\n", 227 | "1000000\n", 228 | "1000000\n", 229 | "1000000\n", 230 | "1000000\n", 231 | "353 ms ± 14.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "%%timeit\n", 237 | "mysum = 0\n", 238 | "for i in lst:\n", 239 | " mysum += next(gen2)\n", 240 | "print(mysum)" 241 | ] 242 | } 243 | ], 244 | "metadata": { 245 | "kernelspec": { 246 | "display_name": "Python [conda root]", 247 | "language": "python", 248 | "name": "conda-root-py" 249 | }, 250 | "language_info": { 251 | "codemirror_mode": { 252 | "name": "ipython", 253 | "version": 3 254 | }, 255 | "file_extension": ".py", 256 | "mimetype": "text/x-python", 257 | "name": "python", 258 | "nbconvert_exporter": "python", 259 | "pygments_lexer": "ipython3", 260 | "version": "3.5.4" 261 | } 262 | }, 263 | "nbformat": 4, 264 | "nbformat_minor": 2 265 | } 266 | -------------------------------------------------------------------------------- /notebooks/Python/Recursion/Factorial_&_Fibonacci.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 29, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "def factorial(n):\n", 12 | " '''no recursion'''\n", 13 | " val = 1\n", 14 | " while n > 0:\n", 15 | " val *= n\n", 16 | " n -= 1\n", 17 | " return val " 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 33, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "data": { 27 | "text/plain": [ 28 | "40320" 29 | ] 30 | }, 31 | "execution_count": 33, 32 | "metadata": {}, 33 | "output_type": "execute_result" 34 | } 35 | ], 36 | "source": [ 37 | "factorial(8)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 13, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "def factorial2(n):\n", 49 | " '''with recursion'''\n", 50 | " if n < 1: \n", 51 | " return 1\n", 52 | " else:\n", 53 | " num = n * factorial2(n - 1) \n", 54 | " return num" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 19, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "40320" 66 | ] 67 | }, 68 | "execution_count": 19, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "factorial2(8)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "---" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 3, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "def fibonacci(n):\n", 93 | " '''with recursion'''\n", 94 | " if n == 0: \n", 95 | " return 0\n", 96 | " elif n == 1: \n", 97 | " return 1\n", 98 | " else: \n", 99 | " return fibonacci(n-1)+fibonacci(n-2)" 100 | ] 101 | } 102 | ], 103 | "metadata": { 104 | "kernelspec": { 105 | "display_name": "Python [conda root]", 106 | "language": "python", 107 | "name": "conda-root-py" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.5.4" 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 2 124 | } 125 | -------------------------------------------------------------------------------- /notebooks/Random_Number_Generators/2_PRNG_Linear_Congruential_Generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Wikipedia link: [linear congruential generator](https://en.wikipedia.org/wiki/Linear_congruential_generator_)\n", 8 | "\n", 9 | "Wikipedia description: \n", 10 | "> A linear congruential generator (LCG) is an algorithm that yields a sequence of pseudo-randomized numbers calculated with a discontinuous piecewise linear equation. The method represents one of the oldest and best-known pseudorandom number generator algorithms. The theory behind them is relatively easy to understand, and they are easily implemented and fast, especially on computer hardware which can provide modulo arithmetic by storage-bit truncation.\n", 11 | "\n", 12 | "The generator is defined by the recurrence relation:\n", 13 | "\n", 14 | "X$_{n+1}$ = (aX$_{n}$+c) mod m\n", 15 | "\n", 16 | "where X is the sequence of pseudorandom values and\n", 17 | "- m is the modulus (0 $<$ m)\n", 18 | "- a is the multiplier (0 $<$ a $<$ m)\n", 19 | "- c is the increment (0 $\\leq$ c $<$ m)\n", 20 | "- X$_{o}$ is the seed value (0 $\\leq$ X$_{o}$ < m)\n", 21 | "\n", 22 | "all of which are integer values." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def lcg(m, a, c, seed):\n", 34 | " '''linear congruential generator that pseudorandomly generates numbers.\n", 35 | " \n", 36 | " In:\n", 37 | " m = (+ int) modulus\n", 38 | " a = (+ int) multiplier\n", 39 | " c = (nonnegative int) increment\n", 40 | " seed = (nonnegative int) initial value\n", 41 | " Out:\n", 42 | " single random number\n", 43 | " '''\n", 44 | " \n", 45 | " # type and value checks\n", 46 | " assert type(m) == int, \"'m' must be an integer\"\n", 47 | " assert m > 0, \"'m' must be an int greater than 0\"\n", 48 | " assert type(a) == int, \"'a' must be an integer\"\n", 49 | " assert a > 0, \"'a' must be an int greater than 0\"\n", 50 | " assert type(c) == int, \"'c' must be an integer\"\n", 51 | " assert c >= 0, \"'c' must be an int greater than 0\"\n", 52 | " assert type(seed) == int, \"'seed' must be an integer\"\n", 53 | " assert seed >= 0, \"'seed' must be an int greater than 0\"\n", 54 | " \n", 55 | " # algorithm\n", 56 | " X_n = seed\n", 57 | " while True:\n", 58 | " X_n = (a * X_n + c) % m\n", 59 | " yield X_n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 2, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def auto_print(implementation, number):\n", 71 | " for _ in range(number):\n", 72 | " print(next(implementation)) " 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "#### Basic Implementation" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "12\n", 92 | "27\n", 93 | "8\n", 94 | "30\n", 95 | "29\n", 96 | "22\n", 97 | "4\n", 98 | "2\n", 99 | "19\n", 100 | "14\n", 101 | "10\n", 102 | "13\n", 103 | "3\n", 104 | "26\n", 105 | "1\n", 106 | "12\n", 107 | "27\n", 108 | "8\n", 109 | "30\n", 110 | "29\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "basic = lcg(m=31, a=7, c=5, seed=1)\n", 116 | "auto_print(basic, 20)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "#### There's a Problem: Parameter Choice Matters" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 4, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "1\n", 136 | "3\n", 137 | "7\n", 138 | "15\n", 139 | "0\n", 140 | "1\n", 141 | "3\n", 142 | "7\n", 143 | "15\n", 144 | "0\n", 145 | "1\n", 146 | "3\n", 147 | "7\n", 148 | "15\n", 149 | "0\n", 150 | "1\n", 151 | "3\n", 152 | "7\n", 153 | "15\n", 154 | "0\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "problem = lcg(m=31, a=2, c=1, seed=0)\n", 160 | "auto_print(problem, 20)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "**Explanation:** a poor choice of parameters leads to a short period length. Once the period length, or cycle, is known, it is trivially to make predictions. This means the numbers generated are no longer random." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "#### Lehmer Implementation (c=0)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 5, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "16807\n", 187 | "282475249\n", 188 | "1622650073\n", 189 | "984943658\n", 190 | "1144108930\n", 191 | "470211272\n", 192 | "101027544\n", 193 | "1457850878\n", 194 | "1458777923\n", 195 | "2007237709\n", 196 | "823564440\n", 197 | "1115438165\n", 198 | "1784484492\n", 199 | "74243042\n", 200 | "114807987\n", 201 | "1137522503\n", 202 | "1441282327\n", 203 | "16531729\n", 204 | "823378840\n", 205 | "143542612\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "lehmer = lcg(m=2147483647, a=16807, c=0, seed=1)\n", 211 | "auto_print(lehmer, 20)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "**Note:** this Lehmer implementation uses a Mersenne Prime (a prime that is 1 less than a power of two: 2$^n$ - 1)." 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": { 224 | "collapsed": true 225 | }, 226 | "source": [ 227 | "## Advantages\n", 228 | "\n", 229 | "- Fast\n", 230 | "- Minimal memory to retain state\n", 231 | "- Great for embedded systems and video games \n", 232 | "\n", 233 | "## Disadvantages\n", 234 | "\n", 235 | "- Serial correlation of sequence (see Marsaglia's Theorem)\n", 236 | "- Short period of the low-order bits when m is chosen to be a power of 2\n", 237 | "- Not suitable for a Monte Carlo simulation\n", 238 | "- Must not be used for cryptographic applications\n", 239 | "\n", 240 | "## Final Notes\n", 241 | "\n", 242 | "The low-order bits of LCGs when m is a power of 2 should never be relied on for any degree of randomness whatsoever. Indeed, simply substituting 2$^n$ for the modulus term reveals that the low order bits go through very short cycles. In particular, any full-cycle LCG when m is a power of 2 will produce alternately odd and even results." 243 | ] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "Python [conda root]", 249 | "language": "python", 250 | "name": "conda-root-py" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.5.4" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 2 267 | } 268 | -------------------------------------------------------------------------------- /notebooks/Random_Number_Generators/3_PRNG_Linear_Feedback_Shift_Register.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 21, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "value = 0xabbacaddabba\n", 10 | "value &= (1<<10)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 22, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "text/plain": [ 21 | "'0b101010111011101011001010110111011010101110111010'" 22 | ] 23 | }, 24 | "execution_count": 22, 25 | "metadata": {}, 26 | "output_type": "execute_result" 27 | } 28 | ], 29 | "source": [ 30 | "bin(value)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python [conda root]", 82 | "language": "python", 83 | "name": "conda-root-py" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.5.4" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 2 100 | } 101 | -------------------------------------------------------------------------------- /notebooks/Random_Number_Generators/4_PRNG_Dev_Random_Directory.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Accessing dev/random" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 16, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "b'\\xdb/T1i\\xdc\\x89\\xe0Q5'" 30 | ] 31 | }, 32 | "execution_count": 16, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "# Return a bytes object containing random bytes suitable for cryptographic use.\n", 39 | "rand = os.urandom(10)\n", 40 | "rand" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Function" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 17, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "def bytes2decimal(byte_obj):\n", 59 | " '''Takes bytes object and returns decimal equivalent.'''\n", 60 | " return int.from_bytes(rand, byteorder='big')" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Example" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 18, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "1035071323466485622395189" 79 | ] 80 | }, 81 | "execution_count": 18, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "b2d = bytes2decimal(rand)\n", 88 | "b2d" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Multiple Examples" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 19, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "b'\\x19\\x10cd\\x99' 107649131673\n", 108 | "b'\\x19\\xa2\\x7fx\\xa8' 110100445352\n", 109 | "b'\"T\\xdfM\\xb9' 147452808633\n", 110 | "b'\\xff\\x85O\\x83\\xcf' 1097453241295\n", 111 | "b'\\x1d\\xddE\\x9e\\\\' 128266378844\n", 112 | "b'\\xa1\\x80%\\xf4\\x0b' 693639705611\n", 113 | "b'#gxPy' 152059793529\n", 114 | "b\"\\xfa\\xd5'%\\x87\" 1077317936519\n", 115 | "b'\\x83\\x06 1t' 562743488884\n", 116 | "b'D4@I\\x88' 292934404488\n", 117 | "b'\\xe7\\xffR\\x9d\\x07' 996421049607\n", 118 | "b'\\xad\\xca\\x8e#\\r' 746427654925\n", 119 | "b'\\x0bM{\\x8e\\xec' 48544583404\n", 120 | "b'E\\x9b\\x99\\xc3\\xfe' 298963289086\n", 121 | "b'\\x98a17O' 654465644367\n", 122 | "b'\\x1ck\\xa7\\xa7\\xb9' 122065233849\n", 123 | "b'\\x17g\\x8cS\\x8c' 100521497484\n", 124 | "b'\\xa9\\xdf=Dh' 729594807400\n", 125 | "b'\\xfdR%c\\x1c' 1088004907804\n", 126 | "b'\\x1c\\xf2\\xcd \\x86' 124332613766\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "for _ in range(20):\n", 132 | " rand = os.urandom(5)\n", 133 | " print('{0:25} {1}'.format(str(rand), bytes2decimal(rand)))" 134 | ] 135 | } 136 | ], 137 | "metadata": { 138 | "kernelspec": { 139 | "display_name": "Python [conda root]", 140 | "language": "python", 141 | "name": "conda-root-py" 142 | }, 143 | "language_info": { 144 | "codemirror_mode": { 145 | "name": "ipython", 146 | "version": 3 147 | }, 148 | "file_extension": ".py", 149 | "mimetype": "text/x-python", 150 | "name": "python", 151 | "nbconvert_exporter": "python", 152 | "pygments_lexer": "ipython3", 153 | "version": "3.5.4" 154 | } 155 | }, 156 | "nbformat": 4, 157 | "nbformat_minor": 2 158 | } 159 | -------------------------------------------------------------------------------- /pkl_files/subset_df.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df.pkl -------------------------------------------------------------------------------- /pkl_files/subset_df2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dziganto/Data_Science_Fundamentals/833e83ea27d27de456b01ba05232dc410c5bc8f5/pkl_files/subset_df2.pkl --------------------------------------------------------------------------------