├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ └── pylint.yml ├── .gitignore ├── .vscode ├── settings.json └── settings_DiskStation_Oct-20-1450-2020_Conflict.json ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── ml_algorithms ├── doc │ ├── greedy_algorithm │ │ └── readme.md │ ├── monte_carlo_simulation │ │ └── notes.md │ └── svm │ │ └── notes │ │ ├── .gitignore │ │ ├── README.md │ │ ├── budget_prediction_svm.pdf │ │ ├── budget_prediction_svm.tex │ │ ├── fig_4.tex │ │ ├── fig_5.tex │ │ ├── fig_6.tex │ │ ├── fig_7.tex │ │ ├── fig_8.tex │ │ └── img │ │ ├── fig_1.JPG │ │ ├── fig_2.JPG │ │ ├── fig_3.jpg │ │ ├── fig_4.jpg │ │ ├── fig_5.jpg │ │ ├── fig_6.jpg │ │ ├── fig_7.jpg │ │ └── fig_8.jpg ├── samples │ ├── __init__.py │ └── pandas_examples.py └── src │ ├── algorithms │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-39.pyc │ ├── cnn │ │ ├── bud.jpg │ │ ├── cnn.py │ │ ├── conv2d.py │ │ └── image_convolution.py │ ├── fuzzy_inference │ │ ├── fuzzy_example_2in_1out.py │ │ ├── fuzzy_example_2in_2out.py │ │ └── fuzzy_system │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── fuzzy_clause.cpython-38.pyc │ │ │ ├── fuzzy_rule.cpython-38.pyc │ │ │ ├── fuzzy_set.cpython-38.pyc │ │ │ ├── fuzzy_system.cpython-38.pyc │ │ │ ├── fuzzy_variable.cpython-38.pyc │ │ │ ├── fuzzy_variable_input.cpython-38.pyc │ │ │ └── fuzzy_variable_output.cpython-38.pyc │ │ │ ├── fuzzy_clause.py │ │ │ ├── fuzzy_rule.py │ │ │ ├── fuzzy_set.py │ │ │ ├── fuzzy_system.py │ │ │ ├── fuzzy_variable.py │ │ │ ├── fuzzy_variable_input.py │ │ │ └── fuzzy_variable_output.py │ ├── fuzzy_learning │ │ ├── __init__.py │ │ ├── data │ │ │ ├── Wine Quality Datasets.url │ │ │ ├── linear_model.csv │ │ │ ├── sample_set.csv │ │ │ ├── sbp_age.csv │ │ │ ├── sensor_data.csv │ │ │ ├── weatherHistory.csv │ │ │ ├── weatherHistory_adj.csv │ │ │ ├── weatherHistory_adj_test.csv │ │ │ ├── winequality-names.txt │ │ │ ├── winequality-names.txt.bak │ │ │ ├── winequality-red.csv │ │ │ ├── winequality-red_test.csv │ │ │ ├── winequality-red_train.csv │ │ │ └── winequality-white.csv │ │ ├── fuzzy_learning_sample_set.py │ │ ├── fuzzy_learning_sbp.py │ │ ├── fuzzy_system │ │ │ ├── __init__.py │ │ │ ├── fuzzy_associative_memory.py │ │ │ ├── fuzzy_clause.py │ │ │ ├── fuzzy_learning_helper.py │ │ │ ├── fuzzy_learning_system.py │ │ │ ├── fuzzy_rule.py │ │ │ ├── fuzzy_system.py │ │ │ ├── system_settings.py │ │ │ ├── type1_fuzzy_set.py │ │ │ └── type1_fuzzy_variable.py │ │ ├── fuzzy_system_example.py │ │ ├── fuzzy_system_exec │ │ │ ├── __init__.py │ │ │ ├── fuzzy_learning_system_DiskStation_Apr-16-1542-2020_Conflict.py │ │ │ ├── system_test.py │ │ │ ├── system_test2.py │ │ │ └── system_test2_DiskStation_Mar-27-1623-2020_Conflict.py │ │ ├── fuzzy_system_test.py │ │ ├── poc │ │ │ ├── data_analysis_poc.py │ │ │ ├── fuzzy_learning_system_poc copy.py │ │ │ ├── fuzzy_learning_system_poc.py │ │ │ ├── set_generation.py │ │ │ ├── set_generation_2.py │ │ │ ├── set_generation_notes.py │ │ │ └── set_naming.py │ │ ├── sensor_comparison.py │ │ ├── sensor_data_generate.py │ │ ├── sensor_fuzzy_learn.py │ │ ├── weather_analyse.py │ │ ├── weather_fuzzy_learning.py │ │ ├── weather_preprocessing.py │ │ ├── weather_preprocessing_humidity.py │ │ ├── wine_dataset_analysis.py │ │ ├── wine_fuzzy_learning.py │ │ └── wine_scaling.py │ ├── gan │ │ ├── __pycache__ │ │ │ ├── discriminator.cpython-39.pyc │ │ │ └── generator.cpython-39.pyc │ │ ├── discriminator.py │ │ ├── gan.py │ │ └── generator.py │ ├── id3 │ │ ├── __pycache__ │ │ │ └── id3_classifier.cpython-39.pyc │ │ ├── id3_classifier.py │ │ ├── id3_exec.py │ │ ├── readme.md │ │ ├── tree_exec.py │ │ └── weather.csv │ ├── id3_version2 │ │ ├── data.csv │ │ ├── id3.py │ │ ├── id3_v2.py │ │ ├── id3_v3.py │ │ ├── results_analysis.txt │ │ ├── sample_4_workout.xlsx │ │ ├── sample_results_1.txt │ │ ├── sample_results_2.txt │ │ ├── sample_results_3.txt │ │ ├── sample_results_4.txt │ │ ├── sample_results_5.txt │ │ ├── sample_rules.csv │ │ ├── sample_rules_1.csv │ │ ├── sample_rules_2.csv │ │ ├── sample_rules_3.csv │ │ ├── sample_rules_4.csv │ │ ├── sensor_rules.csv │ │ ├── weather_rules.csv │ │ └── weather_rules_1.csv │ ├── k-means │ │ └── k-means.xlsx │ ├── linear_regression │ │ ├── __init__.py │ │ ├── batch_gradient_descent │ │ │ ├── multifeature_batch_gd.py │ │ │ ├── twofeature_batch_gd.py │ │ │ ├── uni_batch_gd_nv.py │ │ │ ├── uni_batch_gd_v.py │ │ │ └── uni_batch_gd_v_norm.py │ │ ├── data_generation │ │ │ ├── data_1f.csv │ │ │ ├── data_1f_norm.csv │ │ │ ├── data_2f.csv │ │ │ ├── data_3f.csv │ │ │ ├── dataset_generation_1f.py │ │ │ └── dataset_generation_2f.py │ │ ├── minibatch_gradient_descent │ │ │ ├── data.csv │ │ │ ├── minibatch_gd_1.py │ │ │ ├── minibatch_gd_2.py │ │ │ ├── minibatch_gd_2_v.py │ │ │ └── minibatch_gd_3.py │ │ ├── multivariate_linear_regression │ │ │ └── multivariate_lr.py │ │ ├── normalization │ │ │ └── normalization_analysis.py │ │ ├── stochastic_gradient_descent │ │ │ ├── stochastic_gd_1f_1.py │ │ │ ├── stochastic_gd_1f_2.py │ │ │ ├── stochastic_gd_nf_1.py │ │ │ └── stochastic_gd_nf_2.py │ │ ├── univariate_gd_analysis.py │ │ └── univariate_linear_regression │ │ │ └── univariate_lr.py │ ├── logistic_regression │ │ ├── __pycache__ │ │ │ ├── binaryclassification.cpython-311.pyc │ │ │ └── lr_utils.cpython-311.pyc │ │ ├── binaryclassification.py │ │ ├── exec.py │ │ ├── lr_utils.py │ │ ├── test_catvnoncat.h5 │ │ └── train_catvnoncat.h5 │ ├── svm │ │ ├── matplotlib_test.py │ │ ├── supportvectormachine.py │ │ ├── svm_orig.py │ │ └── test_code.py │ └── utils │ │ ├── __pycache__ │ │ └── simple_tree.cpython-39.pyc │ │ ├── nary_tree.py │ │ ├── rooted_dac.py │ │ ├── simple_tree.py │ │ ├── tree.py │ │ └── tree_exec.py │ └── introduction_to_computation │ ├── brute_force │ ├── fibonacci.py │ └── knapsack.py │ ├── confidence_interval │ └── normal_distribution_gen.py │ ├── data_structures │ └── graph.py │ ├── dice │ ├── __pycache__ │ │ ├── biased_die.cpython-39.pyc │ │ ├── dishonest_casino.cpython-39.pyc │ │ ├── fair_casino.cpython-39.pyc │ │ ├── fair_die.cpython-39.pyc │ │ └── loaded_die.cpython-39.pyc │ ├── dishonest_casino.py │ ├── fair_casino.py │ ├── fair_die.py │ ├── loaded_die.py │ ├── simulation_results.txt │ └── simulations.py │ ├── graph_search │ ├── __init__.py │ ├── breath_first_search.py │ ├── depth_first_search.py │ └── graph.py │ ├── greedy_algorithm │ └── knapsack.py │ ├── monte_carlo_simulation │ └── fair_roulette.py │ ├── random_walk │ ├── __pycache__ │ │ ├── drunk.cpython-36.pyc │ │ ├── drunk.cpython-37.pyc │ │ ├── drunk.cpython-38.pyc │ │ ├── field.cpython-36.pyc │ │ ├── field.cpython-37.pyc │ │ ├── field.cpython-38.pyc │ │ ├── location.cpython-36.pyc │ │ ├── location.cpython-37.pyc │ │ └── location.cpython-38.pyc │ ├── drunk.py │ ├── field.py │ ├── location.py │ ├── simulation.py │ └── simulation_analysis.py │ └── stochastic │ ├── approximation.py │ └── random_processes.py └── requirements.txt /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | data -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "discretize", 4 | "dtype", 5 | "imread", 6 | "imshow", 7 | "ndarray", 8 | "pytoch" 9 | ], 10 | "python.pythonPath": "C:\\Program Files (x86)\\Python36-32\\python.exe", 11 | "compile-hero.disable-compile-files-on-did-save-code": false 12 | } -------------------------------------------------------------------------------- /.vscode/settings_DiskStation_Oct-20-1450-2020_Conflict.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "discretize" 4 | ], 5 | "python.pythonPath": "D:\\Users\\carme\\AppData\\Local\\Programs\\Python\\Python37\\python.exe" 6 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ml_algorithms 2 | 3 | 4 | -------------------------------------------------------------------------------- /ml_algorithms/doc/greedy_algorithm/readme.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Resolution of 0/1 knapsack problem by using a greedy algorithm. 4 | 5 | - each value represented by < value, weight > pair 6 | - total weight possible is wk 7 | - vector L of length n representing each set of available items 8 | - boolean vector V of length n indicates whether item is taken 9 | 10 | that is, maximize $$\sum_{i=0}^{n-1} V[i]I[i].value$$ 11 | 12 | such that $$\sum_{i=0}^{n-1} V[i]I[i].weight \leq w$$ 13 | 14 | ## Brute force 15 | 16 | From power set, eliminate all sets greater than w and select best set. 17 | 18 | This approach is not very practical. 19 | 20 | ## Example 21 | 22 | Example builds a menu using having a maximum calories constraint using a couple of greedy algorithms having 23 | 24 | - user preference (held in the value attribute) 25 | - calories (the inverse thereof) 26 | - a function combining the preference and calories (held in the density attribute) 27 | 28 | ## Reference 29 | 30 | https://www.youtube.com/watch?v=C1lhuz6pZC0 31 | -------------------------------------------------------------------------------- /ml_algorithms/doc/monte_carlo_simulation/notes.md: -------------------------------------------------------------------------------- 1 | # Monte Carlo Simulation 2 | 3 | The technique was first developed by Stanislaw Ulam, a mathematician who worked on the Manhattan Project. 4 | 5 | A method of estimating the value of an unknown quantity using the principles of inferential statistics. 6 | 7 | ## Inferential Statistics 8 | 9 | - Population : Set of examples 10 | - Sample : proper subset of population 11 | - **Random** sample tends to exhibit the same qualities as the population. 12 | 13 | Confidence depends on: 14 | 15 | - sample size 16 | - variance. As variance grows, larger samples are required to have the same degree of confidence. 17 | 18 | ## Roulette Considerations 19 | 20 | - Law of large numbers (Bernoulli's law) 21 | If the probability is p, the difference between the prob obtained by samples to p goes to 0 as number of samples goes to infinity. 22 | 23 | - Gambler's Fallacy and Regression to the mean 24 | 25 | Gambler's Fallacy: if a particular event occurs more frequently than normal during the past it is less likely to happen in the future (or vice versa), when it has otherwise been established that the probability of such events does not depend on what has happened in the past. 26 | 27 | Regression to the mean: Following an extreme random event, it is likely that the next random event will be **less extreme**. 28 | 29 | ## Quantifying Variation 30 | 31 | $$ variance(X) = \frac{\sum_{x\in X}(x - \mu)^2}{|X|} $$ 32 | 33 | $$\mu$$ is mean 34 | 35 | $$\sigma(x) = \sqrt{variance(X)}$$ 36 | 37 | - outliers have a big effect 38 | - standard deviation always considered relative to mean 39 | 40 | ## Empirical Rule 41 | 42 | - approx. 68% of data within one standard deviation of mean 43 | - approx. 95% of data within 1.96 standard deviation of mean - usually used 44 | - approx. 99.7% of data within 3 standard deviation of mean 45 | 46 | ### Assumptions 47 | 48 | - mean estimation error is zero. Therefore no bias 49 | - The distribution of errors in estimates is normal (mean=zero, sd=1) 50 | 51 | ## Probability Density Function 52 | 53 | - Distributions defined by Probability Density Function, PDF 54 | - Probability of a random value lying between two values 55 | - Defines a curve where the range in the X-axis is between the maximum and minimum values of the variable. 56 | - Area under curve between two points defined the probability of an example falling in that range 57 | -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | .*.lb 14 | 15 | ## Intermediate documents: 16 | *.dvi 17 | *.xdv 18 | *-converted-to.* 19 | # these rules might exclude image files for figures etc. 20 | # *.ps 21 | # *.eps 22 | # *.pdf 23 | 24 | ## Generated if empty string is given at "Please type another file name for output:" 25 | .pdf 26 | 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 28 | *.bbl 29 | *.bcf 30 | *.blg 31 | *-blx.aux 32 | *-blx.bib 33 | *.run.xml 34 | 35 | ## Build tool auxiliary files: 36 | *.fdb_latexmk 37 | *.synctex 38 | *.synctex(busy) 39 | *.synctex.gz 40 | *.synctex.gz(busy) 41 | *.pdfsync 42 | 43 | ## Auxiliary and intermediate files from other packages: 44 | # algorithms 45 | *.alg 46 | *.loa 47 | 48 | # achemso 49 | acs-*.bib 50 | 51 | # amsthm 52 | *.thm 53 | 54 | # beamer 55 | *.nav 56 | *.pre 57 | *.snm 58 | *.vrb 59 | 60 | # changes 61 | *.soc 62 | 63 | # cprotect 64 | *.cpt 65 | 66 | # elsarticle (documentclass of Elsevier journals) 67 | *.spl 68 | 69 | # endnotes 70 | *.ent 71 | 72 | # fixme 73 | *.lox 74 | 75 | # feynmf/feynmp 76 | *.mf 77 | *.mp 78 | *.t[1-9] 79 | *.t[1-9][0-9] 80 | *.tfm 81 | 82 | #(r)(e)ledmac/(r)(e)ledpar 83 | *.end 84 | *.?end 85 | *.[1-9] 86 | *.[1-9][0-9] 87 | *.[1-9][0-9][0-9] 88 | *.[1-9]R 89 | *.[1-9][0-9]R 90 | *.[1-9][0-9][0-9]R 91 | *.eledsec[1-9] 92 | *.eledsec[1-9]R 93 | *.eledsec[1-9][0-9] 94 | *.eledsec[1-9][0-9]R 95 | *.eledsec[1-9][0-9][0-9] 96 | *.eledsec[1-9][0-9][0-9]R 97 | 98 | # glossaries 99 | *.acn 100 | *.acr 101 | *.glg 102 | *.glo 103 | *.gls 104 | *.glsdefs 105 | 106 | # gnuplottex 107 | *-gnuplottex-* 108 | 109 | # gregoriotex 110 | *.gaux 111 | *.gtex 112 | 113 | # htlatex 114 | *.4ct 115 | *.4tc 116 | *.idv 117 | *.lg 118 | *.trc 119 | *.xref 120 | 121 | # hyperref 122 | *.brf 123 | 124 | # knitr 125 | *-concordance.tex 126 | # TODO Comment the next line if you want to keep your tikz graphics files 127 | *.tikz 128 | *-tikzDictionary 129 | 130 | # listings 131 | *.lol 132 | 133 | # makeidx 134 | *.idx 135 | *.ilg 136 | *.ind 137 | *.ist 138 | 139 | # minitoc 140 | *.maf 141 | *.mlf 142 | *.mlt 143 | *.mtc[0-9]* 144 | *.slf[0-9]* 145 | *.slt[0-9]* 146 | *.stc[0-9]* 147 | 148 | # minted 149 | _minted* 150 | *.pyg 151 | 152 | # morewrites 153 | *.mw 154 | 155 | # nomencl 156 | *.nlg 157 | *.nlo 158 | *.nls 159 | 160 | # pax 161 | *.pax 162 | 163 | # pdfpcnotes 164 | *.pdfpc 165 | 166 | # sagetex 167 | *.sagetex.sage 168 | *.sagetex.py 169 | *.sagetex.scmd 170 | 171 | # scrwfile 172 | *.wrt 173 | 174 | # sympy 175 | *.sout 176 | *.sympy 177 | sympy-plots-for-*.tex/ 178 | 179 | # pdfcomment 180 | *.upa 181 | *.upb 182 | 183 | # pythontex 184 | *.pytxcode 185 | pythontex-files-*/ 186 | 187 | # thmtools 188 | *.loe 189 | 190 | # TikZ & PGF 191 | *.dpth 192 | *.md5 193 | *.auxlock 194 | 195 | # todonotes 196 | *.tdo 197 | 198 | # easy-todo 199 | *.lod 200 | 201 | # xmpincl 202 | *.xmpi 203 | 204 | # xindy 205 | *.xdy 206 | 207 | # xypic precompiled matrices 208 | *.xyc 209 | 210 | # endfloat 211 | *.ttt 212 | *.fff 213 | 214 | # Latexian 215 | TSWLatexianTemp* 216 | 217 | ## Editors: 218 | # WinEdt 219 | *.bak 220 | *.sav 221 | 222 | # Texpad 223 | .texpadtmp 224 | 225 | # Kile 226 | *.backup 227 | 228 | # KBibTeX 229 | *~[0-9]* 230 | 231 | # auto folder when using emacs and auctex 232 | ./auto/* 233 | *.el 234 | 235 | # expex forward references with \gathertags 236 | *-tags.tex 237 | 238 | # standalone packages 239 | *.sta 240 | 241 | # generated if using elsarticle.cls 242 | *.spl 243 | -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/README.md: -------------------------------------------------------------------------------- 1 | # BET_svm 2 | svm for budget prediction 3 | -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/budget_prediction_svm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/budget_prediction_svm.pdf -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/fig_4.tex: -------------------------------------------------------------------------------- 1 | \usetikzlibrary{arrows} 2 | \begin{tikzpicture} 3 | 4 | 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$}; 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$}; 7 | 8 | \node [text=blue] at (1.5,1) {\Huge -}; 9 | \node [text=blue] at (0.5,2) {\Huge -}; 10 | \node [text=red] at (3,3) {\huge +}; 11 | \node [text=red] at (4,4) {\huge +}; 12 | 13 | \draw[red,ultra thick] (2,5)--(2,2); 14 | \draw[red,ultra thick] (5,2)--(2,2); 15 | 16 | 17 | \draw[blue,ultra thick] (0.5,4.5)--(1.5,3); 18 | \draw[blue,ultra thick] (1.5,3)--(1.5,1.5); 19 | \draw[blue,ultra thick] (1.5,1.5)--(3,1.5); 20 | \draw[blue,ultra thick] (3,1.5)--(4.5,0.5); 21 | 22 | \node[red] at (4.5,2.5) {\tiny neural network}; 23 | \node[blue] at (2.75,0.65) {\tiny nearest neighbour}; 24 | 25 | \end{tikzpicture}+ -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/fig_5.tex: -------------------------------------------------------------------------------- 1 | \usetikzlibrary{arrows} 2 | \begin{tikzpicture} 3 | 4 | 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$}; 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$}; 7 | 8 | \node [text=blue] at (1.5,1) {\Huge -}; 9 | \node [text=blue] at (0.5,2) {\Huge -}; 10 | \node [text=red] at (3,3) {\huge +}; 11 | \node [text=red] at (4,4) {\huge +}; 12 | 13 | 14 | 15 | \draw[dashed] (1,5)--(5,1); 16 | 17 | \draw[dashed] (0,2.5)--(2.5,0); 18 | 19 | \draw(0,4.25)--(4.25,0); 20 | 21 | \end{tikzpicture} -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/fig_6.tex: -------------------------------------------------------------------------------- 1 | \usetikzlibrary{arrows} 2 | \begin{tikzpicture} 3 | 4 | 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$}; 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$}; 7 | 8 | \node [text=blue] at (1.5,1) {\Huge -}; 9 | \node [text=blue] at (0.5,2) {\Huge -}; 10 | \node [text=red] at (3,3) {\huge +}; 11 | \node [text=red] at (4,4) {\huge +}; 12 | 13 | 14 | 15 | \draw[dashed] (1,5)--(5,1); 16 | \draw[dashed] (0,2.5)--(2.5,0); 17 | \draw(0,4.25)--(4.25,0); 18 | 19 | 20 | 21 | \draw[->, ultra thick, green] (0,0)--(0.9,1.1) node[text=black, right]{$\vec{w}$}; 22 | \draw[->, ultra thick, black] (0,0)--(0.7,2.8) node[text=black, right]{$\vec{u}$}; 23 | 24 | 25 | \end{tikzpicture} -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/fig_7.tex: -------------------------------------------------------------------------------- 1 | \usetikzlibrary{arrows} 2 | \begin{tikzpicture} 3 | 4 | 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$}; 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$}; 7 | 8 | \node [text=blue] at (1.5,1) {\Huge -}; 9 | \node [text=blue] at (0.5,2) {\Huge -}; 10 | \node [text=red] at (3,3) {\huge +}; 11 | \node [text=red] at (4,4) {\huge +}; 12 | 13 | 14 | 15 | \draw[dashed] (1,5)--(5,1); 16 | \draw[dashed] (0,2.5)--(2.5,0); 17 | \draw(0,4.25)--(4.25,0); 18 | 19 | 20 | \node at (4.5,3.5) {$c \geq 1$}; 21 | \node at (0.6,0.4) {$c \leq -1$}; 22 | \node at (2.9,1.4) {$ -1 \leq c \geq 1$}; 23 | 24 | \end{tikzpicture} 25 | -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/fig_8.tex: -------------------------------------------------------------------------------- 1 | \usetikzlibrary{arrows} 2 | \begin{tikzpicture} 3 | 4 | \draw[->] (0,0)--(5,0) node[right]{$x$}; 5 | \draw[->] (0,0)--(0,5) node[above]{$y$}; 6 | 7 | \node [text=blue] at (2,2) {\Huge -}; 8 | \node [text=blue] at (0,0) {\Huge -}; 9 | \node [text=red] at (2,0) {\huge +}; 10 | \node [text=red] at (0,2) {\huge +}; 11 | 12 | \draw[->] (0,-6)--(5,-6) node[right]{$x$}; 13 | \draw[->] (0,-6)--(0,-1) node[above]{$y$}; 14 | \draw[->] (0,-6)--(3,-3) node[above]{$z$}; 15 | 16 | \draw[dashed, gray] (1.5,-4.5)--(4,-4.5); 17 | \draw[dashed, gray] (4,-4.5)--(2.5,-6); 18 | \draw[dashed, gray] (4,-4.5)--(4,-3); 19 | \draw[dashed, gray] (4,-3)--(1.5,-3); 20 | \draw[dashed, gray] (1.5,-4.5)--(1.5,-3); 21 | \draw[dashed, gray] (2.5,-4.5)--(2.5,-6); 22 | \draw[dashed, gray] (2.5,-4.5)--(4,-3); 23 | 24 | \node [text=red] at (1.5,-3) {\huge +}; 25 | \node [text=red] at (2.5,-4.5) {\huge +}; 26 | \node [text=blue] at (4,-4.5) {\Huge -}; 27 | \node [text=blue] at (0,-6) {\Huge -}; 28 | 29 | \node at (2.5,3) {linearly inseparable samples}; 30 | \node at (2.5,-2) {separable after transformation}; 31 | 32 | \end{tikzpicture} -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_1.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_1.JPG -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_2.JPG -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_3.jpg -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_4.jpg -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_5.jpg -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_6.jpg -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_7.jpg -------------------------------------------------------------------------------- /ml_algorithms/doc/svm/notes/img/fig_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_8.jpg -------------------------------------------------------------------------------- /ml_algorithms/samples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/samples/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/samples/pandas_examples.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | def multiply_matrix_by_vector(matrix, vector): 5 | # original data 6 | df_a = pd.DataFrame([[1,2,3],[4,5,6]]) 7 | print(df_a, '\n') 8 | 9 | # multiplier vector 10 | df_b = pd.DataFrame([2,2,1]) 11 | print(df_b, '\n') 12 | 13 | # multiply by a list - it works 14 | df_c = df_a*[2,2,1] 15 | print(df_c, '\n') 16 | 17 | # multiply by the dataframe - it works 18 | df_c = df_a*df_b.to_numpy().T 19 | print(df_c, '\n') 20 | 21 | #using a series - it works -- preferred 22 | df_c = df_a*df_b[0] 23 | print(df_c, '\n') 24 | 25 | 26 | 27 | def matrix_difference(): 28 | 29 | df_a = pd.DataFrame([[1,2,3],[4,5,6]]) 30 | print(df_a, '\n') 31 | 32 | df_b = pd.DataFrame([[1,1,1],[1,1,1]]) 33 | print(df_b, '\n') 34 | 35 | df_c = df_a - df_b 36 | print(df_c, '\n') 37 | 38 | 39 | if __name__ == '__main__': 40 | #multiply_matrix_by_vector() 41 | matrix_difference() 42 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/cnn/bud.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/cnn/bud.jpg -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/cnn/cnn.py: -------------------------------------------------------------------------------- 1 | def convolution2d(x, kernel, bias, stride=1, padding=0): 2 | """ 3 | Convolution 2D 4 | """ 5 | 6 | 7 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/cnn/image_convolution.py: -------------------------------------------------------------------------------- 1 | ''' image manipulation ''' 2 | #pylint: disable = E0401 3 | import os 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def load_image_to_rgb(image_path): 8 | '''load and convert image to RGB''' 9 | 10 | image = plt.imread(image_path) 11 | 12 | b, g, r = image[:, :, 0], image[:, :, 1], image[:, :, 2] # For RGB image 13 | 14 | return b, g, r 15 | 16 | def load_image_to_grayscale(image_path): 17 | '''load and convert image to grayscale''' 18 | 19 | image = plt.imread(image_path) 20 | 21 | # convert image to grayscale 22 | image = image.mean(axis=2) 23 | 24 | return image 25 | 26 | if __name__ == '__main__': 27 | BUD_FILENAME = 'bud.jpg' 28 | bud_path = os.path.join(os.path.dirname(__file__), BUD_FILENAME) 29 | 30 | 31 | bud_image_grayscale = load_image_to_grayscale(bud_path) 32 | plt.imshow(bud_image_grayscale, cmap='gray') 33 | plt.show() 34 | 35 | 36 | bud_image_r, bud_image_g, bud_image_b = load_image_to_rgb(bud_path) 37 | plt.imshow(bud_image_r, cmap='Reds') 38 | plt.show() 39 | plt.imshow(bud_image_g, cmap='Greens') 40 | plt.show() 41 | plt.imshow(bud_image_b, cmap='Blues') 42 | plt.show() 43 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_example_2in_1out.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_variable_output import FuzzyOutputVariable 2 | from fuzzy_system.fuzzy_variable_input import FuzzyInputVariable 3 | # from fuzzy_system.fuzzy_variable import FuzzyVariable 4 | from fuzzy_system.fuzzy_system import FuzzySystem 5 | 6 | temp = FuzzyInputVariable('Temperature', 10, 40, 100) 7 | temp.add_triangular('Cold', 10, 10, 25) 8 | temp.add_triangular('Medium', 15, 25, 35) 9 | temp.add_triangular('Hot', 25, 40, 40) 10 | 11 | humidity = FuzzyInputVariable('Humidity', 20, 100, 100) 12 | humidity.add_triangular('Wet', 20, 20, 60) 13 | humidity.add_trapezoidal('Normal', 30, 50, 70, 90) 14 | humidity.add_triangular('Dry', 60, 100, 100) 15 | 16 | motor_speed = FuzzyOutputVariable('Speed', 0, 100, 100) 17 | motor_speed.add_triangular('Slow', 0, 0, 50) 18 | motor_speed.add_triangular('Moderate', 10, 50, 90) 19 | motor_speed.add_triangular('Fast', 50, 100, 100) 20 | 21 | system = FuzzySystem() 22 | system.add_input_variable(temp) 23 | system.add_input_variable(humidity) 24 | system.add_output_variable(motor_speed) 25 | 26 | system.add_rule( 27 | { 'Temperature':'Cold', 28 | 'Humidity':'Wet' }, 29 | { 'Speed':'Slow'}) 30 | 31 | system.add_rule( 32 | { 'Temperature':'Cold', 33 | 'Humidity':'Normal' }, 34 | { 'Speed':'Slow'}) 35 | 36 | system.add_rule( 37 | { 'Temperature':'Medium', 38 | 'Humidity':'Wet' }, 39 | { 'Speed':'Slow'}) 40 | 41 | system.add_rule( 42 | { 'Temperature':'Medium', 43 | 'Humidity':'Normal' }, 44 | { 'Speed':'Moderate'}) 45 | 46 | system.add_rule( 47 | { 'Temperature':'Cold', 48 | 'Humidity':'Dry' }, 49 | { 'Speed':'Moderate'}) 50 | 51 | system.add_rule( 52 | { 'Temperature':'Hot', 53 | 'Humidity':'Wet' }, 54 | { 'Speed':'Moderate'}) 55 | 56 | system.add_rule( 57 | { 'Temperature':'Hot', 58 | 'Humidity':'Normal' }, 59 | { 'Speed':'Fast'}) 60 | 61 | system.add_rule( 62 | { 'Temperature':'Hot', 63 | 'Humidity':'Dry' }, 64 | { 'Speed':'Fast'}) 65 | 66 | system.add_rule( 67 | { 'Temperature':'Medium', 68 | 'Humidity':'Dry' }, 69 | { 'Speed':'Fast'}) 70 | 71 | output = system.evaluate_output({ 72 | 'Temperature':18, 73 | 'Humidity':60 74 | }) 75 | 76 | print(output) 77 | # print('fuzzification\n-------------\n', info['fuzzification']) 78 | # print('rules\n-----\n', info['rules']) 79 | 80 | system.plot_system() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_example_2in_2out.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_variable_output import FuzzyOutputVariable 2 | from fuzzy_system.fuzzy_variable_input import FuzzyInputVariable 3 | 4 | 5 | from fuzzy_system.fuzzy_system import FuzzySystem 6 | 7 | x1 = FuzzyInputVariable('x1', 0, 100, 100) 8 | x1.add_triangular('S', 0, 25, 50) 9 | x1.add_triangular('M', 25, 50, 75) 10 | x1.add_triangular('L', 50, 75, 100) 11 | 12 | x2 = FuzzyInputVariable('x2', 0, 100, 100) 13 | x2.add_triangular('S', 0, 25, 50) 14 | x2.add_triangular('M', 25, 50, 75) 15 | x2.add_triangular('L', 50, 75, 100) 16 | 17 | y = FuzzyOutputVariable('y', 0, 100, 100) 18 | y.add_triangular('S', 0, 25, 50) 19 | y.add_triangular('M', 25, 50, 75) 20 | y.add_triangular('L', 50, 75, 100) 21 | 22 | z = FuzzyOutputVariable('z', 0, 100, 100) 23 | z.add_triangular('S', 0, 25, 50) 24 | z.add_triangular('M', 25, 50, 75) 25 | z.add_triangular('L', 50, 75, 100) 26 | 27 | system = FuzzySystem() 28 | system.add_input_variable(x1) 29 | system.add_input_variable(x2) 30 | system.add_output_variable(y) 31 | system.add_output_variable(z) 32 | 33 | system.add_rule( 34 | { 'x1':'S', 35 | 'x2':'S' }, 36 | { 'y':'S', 37 | 'z':'L' }) 38 | 39 | system.add_rule( 40 | { 'x1':'M', 41 | 'x2':'M' }, 42 | { 'y':'M', 43 | 'z':'M' }) 44 | 45 | system.add_rule( 46 | { 'x1':'L', 47 | 'x2':'L' }, 48 | { 'y':'L', 49 | 'z':'S' }) 50 | 51 | system.add_rule( 52 | { 'x1':'S', 53 | 'x2':'M' }, 54 | { 'y':'S', 55 | 'z':'L' }) 56 | 57 | system.add_rule( 58 | { 'x1':'M', 59 | 'x2':'S' }, 60 | { 'y':'S', 61 | 'z':'L' }) 62 | 63 | system.add_rule( 64 | { 'x1':'L', 65 | 'x2':'M' }, 66 | { 'y':'L', 67 | 'z':'S' }) 68 | 69 | system.add_rule( 70 | { 'x1':'M', 71 | 'x2':'L' }, 72 | { 'y':'L', 73 | 'z':'S' }) 74 | 75 | system.add_rule( 76 | { 'x1':'L', 77 | 'x2':'S' }, 78 | { 'y':'M', 79 | 'z':'M' }) 80 | 81 | system.add_rule( 82 | { 'x1':'S', 83 | 'x2':'L' }, 84 | { 'y':'M', 85 | 'z':'M' }) 86 | 87 | output = system.evaluate_output({ 88 | 'x1':35, 89 | 'x2':75 90 | }) 91 | 92 | 93 | print(output) 94 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_clause.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_clause.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_rule.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_rule.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_set.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_set.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_system.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_system.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_input.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_input.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_output.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_output.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_clause.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Fuzzy Clause class. Used in Fuzzy rule 3 | ''' 4 | class FuzzyClause(): 5 | ''' 6 | A fuzzy clause of the type 'variable is set' 7 | used in fuzzy IF ... THEN ... rules 8 | clauses can be antecedent (if part) or consequent 9 | (then part) 10 | ''' 11 | 12 | def __init__(self, variable, f_set, degree=1): 13 | ''' 14 | initialization of the fuzzy clause 15 | 16 | Arguments: 17 | ---------- 18 | variable -- the clause variable in 'variable is set' 19 | set -- the clause set in 'variable is set' 20 | ''' 21 | 22 | if f_set is None: 23 | raise Exception('set none') 24 | 25 | if f_set.name == '': 26 | raise Exception(str(f_set), 'no set name') 27 | 28 | 29 | self._variable = variable 30 | self._set = f_set 31 | 32 | def __str__(self): 33 | ''' 34 | string representation of the clause. 35 | 36 | Returns: 37 | -------- 38 | str: str, string representation of the clause in the form 39 | A is x 40 | ''' 41 | return f'{self._variable.name} is {self._set.name}' 42 | 43 | @property 44 | def variable_name(self): 45 | ''' 46 | returns the name of the clause variable 47 | 48 | Returns: 49 | -------- 50 | variable_name: str, name of variable 51 | ''' 52 | return self._variable.name 53 | 54 | @property 55 | def set_name(self): 56 | ''' 57 | returns the name of the clause variable 58 | 59 | Returns: 60 | -------- 61 | variable_name: str, name of variable 62 | ''' 63 | return self._set.name 64 | 65 | def evaluate_antecedent(self): 66 | ''' 67 | Used when set is antecedent. 68 | returns the set degree of membership. 69 | 70 | Returns: 71 | -------- 72 | dom -- number, the set degree of membership given a value for 73 | that variable. This value is determined at an earlier stage 74 | and stored in the set 75 | ''' 76 | return self._set.last_dom_value 77 | 78 | def evaluate_consequent(self, dom): 79 | ''' 80 | Used when clause is consequent. 81 | 82 | Arguments: 83 | ----------- 84 | dom -- number, scalar value from the antecedent clauses 85 | 86 | Returns: 87 | -------- 88 | set -- Type1FuzzySet, a set resulting from min operation with 89 | the scalar value 90 | ''' 91 | self._variable.add_rule_contribution(self._set.min_scalar(dom)) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_rule.py: -------------------------------------------------------------------------------- 1 | from .fuzzy_clause import FuzzyClause 2 | 3 | class FuzzyRule(): 4 | ''' 5 | A fuzzy rule of the type 6 | IF [antecedent clauses] THEN [consequent clauses] 7 | ''' 8 | 9 | def __init__(self): 10 | ''' 11 | initializes the rule. Two data structures are necessary: 12 | Antecedent clauses list 13 | consequent clauses list 14 | ''' 15 | self._antecedent = [] 16 | self._consequent = [] 17 | 18 | def __str__(self): 19 | ''' 20 | string representation of the rule. 21 | 22 | Returns: 23 | -------- 24 | str: str, string representation of the rule in the form 25 | IF [antecedent clauses] THEN [consequent clauses] 26 | ''' 27 | ante = ' and '.join(map(str, self._antecedent)) 28 | cons = ' and '.join(map(str, self._consequent)) 29 | return f'If {ante} then {cons}' 30 | 31 | def add_antecedent_clause(self, var, f_set): 32 | ''' 33 | adds an antecedent clause to the rule 34 | 35 | Arguments: 36 | ----------- 37 | clause -- FuzzyClause, the antecedent clause 38 | ''' 39 | self._antecedent.append(FuzzyClause(var, f_set)) 40 | 41 | def add_consequent_clause(self, var, f_set): 42 | ''' 43 | adds an consequent clause to the rule 44 | 45 | Arguments: 46 | ----------- 47 | clause -- FuzzyClause, the consequent clause 48 | ''' 49 | self._consequent.append(FuzzyClause(var, f_set)) 50 | 51 | def evaluate(self): 52 | ''' 53 | evaluation of the rule. 54 | the antecedent clauses are executed and the minimum degree of 55 | membership is retained. 56 | This is used in teh consequent clauses to min with the consequent 57 | set 58 | The values are returned in a dict of the form {variable_name: scalar min set, ...} 59 | 60 | Returns: 61 | -------- 62 | rule_consequence -- dict, the resulting sets in the form 63 | {variable_name: scalar min set, ...} 64 | ''' 65 | # rule dom initialize to 1 as min operator will be performed 66 | rule_strength = 1 67 | 68 | # execute all antecedent clauses, keeping the minimum of the 69 | # returned doms to determine the rule strength 70 | for ante_clause in self._antecedent: 71 | rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength) 72 | 73 | # execute consequent clauses, each output variable will update its output_distribution set 74 | for consequent_clause in self._consequent: 75 | consequent_clause.evaluate_consequent(rule_strength) 76 | 77 | def evaluate_info(self): 78 | ''' 79 | evaluation of the rule. 80 | the antecedent clauses are executed and the minimum degree of 81 | membership is retained. 82 | This is used in teh consequent clauses to min with the consequent 83 | set 84 | The values are returned in a dict of the form {variable_name: scalar min set, ...} 85 | 86 | Returns: 87 | -------- 88 | rule_consequence -- dict, the resulting sets in the form 89 | {variable_name: scalar min set, ...} 90 | ''' 91 | # rule dom initialize to 1 as min operator will be performed 92 | rule_strength = 1 93 | 94 | 95 | # execute all antecedent clauses, keeping the minimum of the 96 | # returned doms to determine the rule strength 97 | for ante_clause in self._antecedent: 98 | rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength) 99 | 100 | # execute consequent clauses, each output variable will update its output_distribution set 101 | for consequent_clause in self._consequent: 102 | consequent_clause.evaluate_consequent(rule_strength) 103 | 104 | return f'{rule_strength} : {self}' -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable.py: -------------------------------------------------------------------------------- 1 | from .fuzzy_set import FuzzySet 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | class FuzzyVariable(): 6 | ''' 7 | A type-1 fuzzy variable that is mage up of a number of type-1 fuzzy sets 8 | ''' 9 | def __init__(self, name, min_val, max_val, res): 10 | ''' 11 | creates a new type-1 fuzzy variable (universe) 12 | 13 | Arguments: 14 | ---------- 15 | min_val -- number, minimum value of variable 16 | max_val -- number, maximum value of variable 17 | res -- int, resolution of variable 18 | ''' 19 | self._sets={} 20 | self._max_val = max_val 21 | self._min_val = min_val 22 | self._res = res 23 | self._name = name 24 | 25 | def __str__(self): 26 | return ', '.join(self._sets.keys()) 27 | 28 | @property 29 | def name(self): 30 | return self._name 31 | 32 | def _add_set(self, name, f_set): 33 | ''' 34 | adds a fuzzy set to the variable 35 | 36 | Arguments: 37 | ---------- 38 | name -- string, name of the set 39 | f_set -- FuzzySet, The set 40 | ''' 41 | self._sets[name] = f_set 42 | 43 | def get_set(self, name): 44 | ''' 45 | returns a set given the name 46 | Arguments: 47 | ---------- 48 | name -- str, set name 49 | 50 | Returns: 51 | -------- 52 | set -- FuzzySet, the set 53 | ''' 54 | return self._sets[name] 55 | 56 | def add_triangular(self, name, low, mid, high): 57 | new_set = FuzzySet.create_triangular(name, self._min_val, self._max_val, self._res, low, mid, high) 58 | self._add_set(name, new_set) 59 | return new_set 60 | 61 | def add_trapezoidal(self, name, a, b, c, d): 62 | new_set = FuzzySet. create_trapezoidal(name, self._min_val, self._max_val, self._res, a, b, c, d) 63 | self._add_set(name, new_set) 64 | return new_set 65 | 66 | def plot_variable(self, ax=None, show=True): 67 | ''' 68 | plots a graphical representation of the fuzzy variable 69 | 70 | Reference: 71 | ---------- 72 | https://stackoverflow.com/questions/4700614/how-to-put-the-legend-out-of-the-plot 73 | ''' 74 | if ax == None: 75 | ax = plt.subplot(111) 76 | 77 | for n ,s in self._sets.items(): 78 | ax.plot(s.domain_elements(), s.dom_elements(), label=n) 79 | 80 | # Shrink current axis by 20% 81 | pos = ax.get_position() 82 | ax.set_position([pos.x0, pos.y0, pos.width * 0.8, pos.height]) 83 | ax.grid(True, which='both', alpha=0.4) 84 | ax.set_title(self._name) 85 | ax.set(xlabel='x', ylabel='$\mu (x)$') 86 | 87 | # Put a legend to the right of the current axis 88 | ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 89 | 90 | if show: 91 | plt.show() 92 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable_input.py: -------------------------------------------------------------------------------- 1 | from .fuzzy_variable import FuzzyVariable 2 | 3 | class FuzzyInputVariable(FuzzyVariable): 4 | 5 | def __init__(self, name, min_val, max_val, res): 6 | super().__init__(name, min_val, max_val, res) 7 | 8 | def fuzzify(self, value): 9 | ''' 10 | performs fuzzification of the variable. used when the 11 | variable is an input one 12 | 13 | Arguments: 14 | ----------- 15 | value -- number, input value for the variable 16 | 17 | ''' 18 | # get dom for each set and store it - it will be required for each rule 19 | for set_name, f_set in self._sets.items(): 20 | f_set.last_dom_value = f_set[value] 21 | 22 | def fuzzify_info(self, value): 23 | ''' 24 | performs fuzzification of the variable. used when the 25 | variable is an input one 26 | 27 | Arguments: 28 | ----------- 29 | value -- number, input value for the variable 30 | 31 | ''' 32 | # get dom for each set and store it - it will be required for each rule 33 | for set_name, f_set in self._sets.items(): 34 | f_set.last_dom_value = f_set[value] 35 | 36 | res = [] 37 | 38 | res.append(self._name) 39 | res.append('\n') 40 | 41 | for _, f_set in self._sets.items(): 42 | res.append(f_set.name) 43 | res.append(str(f_set.last_dom_value)) 44 | res.append('\n') 45 | 46 | return ' '.join(res) 47 | 48 | 49 | if __name__ == "__main__": 50 | pass -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable_output.py: -------------------------------------------------------------------------------- 1 | from .fuzzy_variable import FuzzyVariable 2 | from .fuzzy_set import FuzzySet 3 | 4 | class FuzzyOutputVariable(FuzzyVariable): 5 | 6 | def __init__(self, name, min_val, max_val, res): 7 | super().__init__(name, min_val, max_val, res) 8 | self._output_distribution = FuzzySet(name, min_val, max_val, res) 9 | 10 | def clear_output_distribution(self): 11 | self._output_distribution.clear_set() 12 | 13 | def add_rule_contribution(self, rule_consequence): 14 | self._output_distribution = self._output_distribution.union(rule_consequence) 15 | 16 | def get_crisp_output(self): 17 | return self._output_distribution.cog_defuzzify() 18 | 19 | def get_crisp_output_info(self): 20 | return self._output_distribution.cog_defuzzify(), self._output_distribution 21 | 22 | 23 | if __name__ == "__main__": 24 | pass -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/Wine Quality Datasets.url: -------------------------------------------------------------------------------- 1 | [InternetShortcut] 2 | URL=http://www3.dsi.uminho.pt/pcortez/wine/ 3 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/sample_set.csv: -------------------------------------------------------------------------------- 1 | X,y 2 | 0, 0.1 3 | 1,0.4 4 | 2,3 5 | 3,2.8 6 | 4,3.5 7 | 5,5.9 8 | 6,5.8 9 | 7,7.2 10 | 8,7.5 11 | 9,11 12 | 10,9.7 13 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/sbp_age.csv: -------------------------------------------------------------------------------- 1 | Age,SBP 2 | 60,117 3 | 61,120 4 | 74,145 5 | 57,129 6 | 63,132 7 | 68,135 8 | 67,129 9 | 66,110 10 | 77,163 11 | 63,136 12 | 54,115 13 | 63,118 14 | 76,132 15 | 60,111 16 | 61,112 17 | 65,147 18 | 79,138 19 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/sensor_data.csv: -------------------------------------------------------------------------------- 1 | x,Y 2 | 0,1.000 3 | 1,0.815 4 | 2,0.940 5 | 3,0.833 6 | 4,2.032 7 | 5,2.141 8 | 6,3.183 9 | 7,0.214 10 | 8,1.769 11 | 9,-0.181 12 | 10,0.906 13 | 11,2.888 14 | 12,-0.271 15 | 13,1.103 16 | 14,5.057 17 | 15,4.582 18 | 16,1.911 19 | 17,5.965 20 | 18,8.902 21 | 19,0.490 22 | 20,10.397 23 | 21,9.897 24 | 22,6.032 25 | 23,4.200 26 | 24,16.434 27 | 25,8.384 28 | 26,5.565 29 | 27,6.577 30 | 28,20.259 31 | 29,17.668 32 | 30,22.978 33 | 31,23.467 34 | 32,21.809 35 | 33,32.593 36 | 34,23.070 37 | 35,29.195 38 | 36,38.028 39 | 37,36.635 40 | 38,45.651 41 | 39,42.955 42 | 40,50.169 43 | 41,38.612 44 | 42,47.907 45 | 43,54.806 46 | 44,55.171 47 | 45,65.676 48 | 46,69.167 49 | 47,81.936 50 | 48,100.925 51 | 49,102.745 52 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/winequality-names.txt: -------------------------------------------------------------------------------- 1 | Citation Request: 2 | This dataset is public available for research. The details are described in [Cortez et al., 2009]. 3 | Please include this citation if you plan to use this database: 4 | 5 | P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 6 | Modeling wine preferences by data mining from physicochemical properties. 7 | In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236. 8 | 9 | Available at: [@Elsevier] http://dx.doi.org/10.1016/j.dss.2009.05.016 10 | [Pre-press (pdf)] http://www3.dsi.uminho.pt/pcortez/winequality09.pdf 11 | [bib] http://www3.dsi.uminho.pt/pcortez/dss09.bib 12 | 13 | 1. Title: Wine Quality 14 | 15 | 2. Sources 16 | Created by: Paulo Cortez (Univ. Minho), António Cerdeira, Fernando Almeida, Telmo Matos and José Reis (CVRVV) @ 2009 17 | 18 | 3. Past Usage: 19 | 20 | P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 21 | Modeling wine preferences by data mining from physicochemical properties. 22 | In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236. 23 | 24 | In the above reference, two datasets were created, using red and white wine samples. 25 | The inputs include objective tests (e.g. PH values) and the output is based on sensory data 26 | (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality 27 | between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model 28 | these datasets under a regression approach. The support vector machine model achieved the 29 | best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerance (T), 30 | etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity 31 | analysis procedure). 32 | 33 | 4. Relevant Information: 34 | 35 | These datasets can be viewed as classification or regression tasks. 36 | The classes are ordered and not balanced (e.g. there are munch more normal wines than 37 | excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent 38 | or poor wines. Also, we are not sure if all input variables are relevant. So 39 | it could be interesting to test feature selection methods. 40 | 41 | 5. Number of Instances: red wine - 1599; white wine - 4898. 42 | 43 | 6. Number of Attributes: 11 + output attribute 44 | 45 | Note: several of the attributes may be correlated, thus it makes sense to apply some sort of 46 | feature selection. 47 | 48 | 7. Attribute information: 49 | 50 | For more information, read [Cortez et al., 2009]. 51 | 52 | Input variables (based on physicochemical tests): 53 | 1 - fixed acidity 54 | 2 - volatile acidity 55 | 3 - citric acid 56 | 4 - residual sugar 57 | 5 - chlorides 58 | 6 - free sulfur dioxide 59 | 7 - total sulfur dioxide 60 | 8 - density 61 | 9 - pH 62 | 10 - sulphates 63 | 11 - alcohol 64 | Output variable (based on sensory data): 65 | 12 - quality (score between 0 and 10) 66 | 67 | 8. Missing Attribute Values: None 68 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/data/winequality-names.txt.bak: -------------------------------------------------------------------------------- 1 | Citation Request: 2 | This dataset is public available for research. The details are described in [Cortez et al., 2009]. 3 | Please include this citation if you plan to use this database: 4 | 5 | P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 6 | Modeling wine preferences by data mining from physicochemical properties. 7 | In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236. 8 | 9 | Available at: [@Elsevier] http://dx.doi.org/10.1016/j.dss.2009.05.016 10 | [Pre-press (pdf)] http://www3.dsi.uminho.pt/pcortez/winequality09.pdf 11 | [bib] http://www3.dsi.uminho.pt/pcortez/dss09.bib 12 | 13 | 1. Title: Wine Quality 14 | 15 | 2. Sources 16 | Created by: Paulo Cortez (Univ. Minho), António Cerdeira, Fernando Almeida, Telmo Matos and José Reis (CVRVV) @ 2009 17 | 18 | 3. Past Usage: 19 | 20 | P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 21 | Modeling wine preferences by data mining from physicochemical properties. 22 | In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236. 23 | 24 | In the above reference, two datasets were created, using red and white wine samples. 25 | The inputs include objective tests (e.g. PH values) and the output is based on sensory data 26 | (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality 27 | between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model 28 | these datasets under a regression approach. The support vector machine model achieved the 29 | best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerante (T), 30 | etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity 31 | analysis procedure). 32 | 33 | 4. Relevant Information: 34 | 35 | These datasets can be viewed as classification or regression tasks. 36 | The classes are ordered and not balanced (e.g. there are munch more normal wines than 37 | excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent 38 | or poor wines. Also, we are not sure if all input variables are relevant. So 39 | it could be interesting to test feature selection methods. 40 | 41 | 5. Number of Instances: red wine - 1599; white wine - 4898. 42 | 43 | 6. Number of Attributes: 11 + output attribute 44 | 45 | Note: several of the attributes may be correlated, thus it makes sense to apply some sort of 46 | feature selection. 47 | 48 | 7. Attribute information: 49 | 50 | For more information, read [Cortez et al., 2009]. 51 | 52 | Input variables (based on physicochemical tests): 53 | 1 - fixed acidity 54 | 2 - volatile acidity 55 | 3 - citric acid 56 | 4 - residual sugar 57 | 5 - chlorides 58 | 6 - free sulfur dioxide 59 | 7 - total sulfur dioxide 60 | 8 - density 61 | 9 - pH 62 | 10 - sulphates 63 | 11 - alcohol 64 | Output variable (based on sensory data): 65 | 12 - quality (score between 0 and 10) 66 | 67 | 8. Missing Attribute Values: None 68 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_learning_sample_set.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_sample_set 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import random 7 | 8 | def generate_sample_data(): 9 | random.seed(42) 10 | df = pd.DataFrame() 11 | r = random.uniform(-1,1) 12 | print(r) 13 | 14 | 15 | def test_model(): 16 | X, y = load_sample_set() 17 | 18 | # X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.10) 19 | X_train = X 20 | X_test = X 21 | y_train = y 22 | y_test = y 23 | 24 | learning_system = FuzzyLearningSystem(res=1000) 25 | 26 | learning_system.fit(X_train, y_train, X_n=4, y_n=2) 27 | print(learning_system) 28 | 29 | score = learning_system.score(X_test, y_test) 30 | print(score) 31 | 32 | df = pd.DataFrame() 33 | 34 | for i in np.arange(0,11,0.5): 35 | 36 | y_hat = learning_system.get_result({'X':i})['y'] 37 | 38 | a_row = pd.Series([i, y_hat]) 39 | row_df = pd.DataFrame([a_row]) 40 | df = pd.concat([row_df, df]) 41 | 42 | 43 | plt.scatter(X, y) 44 | plt.scatter(df[0], df[1]) 45 | plt.show() 46 | 47 | if __name__ == "__main__": 48 | generate_sample_data() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_learning_sbp.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_sbp 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | 7 | X, y = load_sbp() 8 | # # X, y = load_linear_model() 9 | 10 | # min_max_scaler = preprocessing.StandardScaler() 11 | 12 | # x = X.values #returns a numpy array 13 | # x_scaled = min_max_scaler.fit_transform(x) 14 | # X = pd.DataFrame(x_scaled) 15 | 16 | # print(X.min(), X.max()) 17 | 18 | 19 | # X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.10) 20 | X_train = X 21 | X_test = X 22 | y_train = y 23 | y_test = y 24 | 25 | learning_system = FuzzyLearningSystem(res=1000) 26 | 27 | learning_system.fit(X_train, y_train, X_n=3, y_n=4) 28 | 29 | # learning_system.plot_variables() 30 | 31 | 32 | print(learning_system) 33 | 34 | score = learning_system.score(X_test, y_test) 35 | print(score) 36 | 37 | df = pd.DataFrame() 38 | 39 | for i in np.arange(54,79,0.5): 40 | 41 | y_hat = learning_system.get_result({'Age':i})['SBP'] 42 | 43 | a_row = pd.Series([i, y_hat]) 44 | row_df = pd.DataFrame([a_row]) 45 | df = pd.concat([row_df, df]) 46 | 47 | 48 | plt.scatter(X, y) 49 | plt.scatter(df[0], df[1]) 50 | plt.show() 51 | 52 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_associative_memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class FuzzyAssociativeMemory(): 5 | 6 | def __init__(self, variables_info, fam_shape): 7 | ''' 8 | ''' 9 | self._variables_info = variables_info 10 | self._fam = np.empty(fam_shape, dtype='object') 11 | 12 | def set_entity(self, location, value): 13 | ''' 14 | ''' 15 | entity_location = [] 16 | 17 | for variable, f_sets in self._variables_info.items(): 18 | f_set = location[variable] 19 | entity_location.append(f_sets.index(f_set)) 20 | 21 | self._fam[tuple(entity_location)] = value 22 | 23 | 24 | def __str__(self): 25 | return str(self._fam) 26 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_clause.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Fuzzy Clause class. Used in Fuzzy rule 3 | ''' 4 | class FuzzyClause(): 5 | ''' 6 | A fuzzy clause of the type 'variable is set' 7 | used in fuzzy IF ... THEN ... rules 8 | clauses can be antecedent (if part) or consequent 9 | (then part) 10 | ''' 11 | 12 | def __init__(self, variable, f_set, degree=1): 13 | ''' 14 | initialization of the fuzzy clause 15 | 16 | Arguments: 17 | ---------- 18 | variable -- the clause variable in 'variable is set' 19 | set -- the clause set in 'variable is set' 20 | ''' 21 | 22 | if f_set is None: 23 | raise Exception('set none') 24 | 25 | if f_set.name == '': 26 | raise Exception(str(f_set), 'no set name') 27 | 28 | 29 | self._variable = variable 30 | self._set = f_set 31 | self._degree = degree 32 | 33 | def __str__(self): 34 | ''' 35 | string representation of the clause. 36 | 37 | Returns: 38 | -------- 39 | str: str, string representation of the clause in the form 40 | A is x 41 | ''' 42 | return f'{self._variable.name} is {self._set.name}' 43 | 44 | @property 45 | def degree(self): 46 | return self._degree 47 | 48 | 49 | @property 50 | def variable_name(self): 51 | ''' 52 | returns the name of the clause variable 53 | 54 | Returns: 55 | -------- 56 | variable_name: str, name of variable 57 | ''' 58 | return self._variable.name 59 | 60 | @property 61 | def set_name(self): 62 | ''' 63 | returns the name of the clause variable 64 | 65 | Returns: 66 | -------- 67 | variable_name: str, name of variable 68 | ''' 69 | return self._set.name 70 | 71 | 72 | def evaluate_antecedent(self): 73 | ''' 74 | Used when set is antecedent. 75 | returns the set degree of membership. 76 | 77 | Returns: 78 | -------- 79 | dom -- number, the set degree of membership given a value for 80 | that variable. This value is determined at an earlier stage 81 | and stored in the set 82 | ''' 83 | return self._set.last_dom_value 84 | 85 | def evaluate_consequent(self, dom): 86 | ''' 87 | Used when clause is consequent. 88 | returns an alpha-cut fuzzy set given some scalar value 89 | 90 | Arguments: 91 | ----------- 92 | dom -- number, scalar value from the antecedent clauses 93 | 94 | Returns: 95 | -------- 96 | set -- Type1FuzzySet, a set resulting from alpha-cut from 97 | the scalar value 98 | ''' 99 | return self._set.fuzzy_alpha_cut(dom) 100 | 101 | def get_consequent_center_val(self): 102 | 103 | return self._set.center_value -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_learning_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | 5 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), '..\data') 6 | 7 | def load_data(filename, data_path=DATA_PATH, separator=';'): 8 | csv_path = os.path.join(data_path, filename) 9 | return pd.read_csv(csv_path, sep=separator) 10 | 11 | def save_data(data_frame, filename, data_path=DATA_PATH): 12 | csv_path = os.path.join(data_path, filename) 13 | return data_frame.to_csv(csv_path, float_format='%.3f', index=False) 14 | 15 | def split_train_test(X, y, test_size=0.1, random_seed=21): 16 | np.random.seed(random_seed) 17 | shuffled_indices = np.random.permutation(len(X)) 18 | set_size = int(len(X) * test_size) 19 | 20 | test_indices = shuffled_indices[:set_size] 21 | train_indices = shuffled_indices[set_size:] 22 | 23 | return X.iloc[train_indices], X.iloc[test_indices], y.iloc[train_indices], y.iloc[test_indices] 24 | 25 | def format_dataset(data, output_attributes_names): 26 | ''' 27 | Arguments: 28 | ---------- 29 | data -- original dataset 30 | output_attributes_names -- list, contains the names of the output attributes 31 | ''' 32 | X = data.loc[:, data.columns != output_attributes_names] 33 | y = data.loc[:, data.columns == output_attributes_names] 34 | 35 | return X, y 36 | 37 | def load_winequality_red(): 38 | dataset = load_data('winequality-red.csv') 39 | # print(dataset.shape) 40 | 41 | return format_dataset(dataset, 'quality') 42 | 43 | 44 | def load_weather(): 45 | dataset = load_data('weatherHistory_adj.csv', separator=',') 46 | # print(dataset.shape) 47 | 48 | return format_dataset(dataset, 'Temperature') 49 | 50 | 51 | 52 | def load_linear_model(): 53 | dataset = load_data('linear_model.csv', separator=',') 54 | # print(dataset.shape) 55 | return format_dataset(dataset, 'y') 56 | 57 | def load_sample_set(): 58 | dataset = load_data('sample_set.csv', separator=',') 59 | # print(dataset.shape) 60 | return format_dataset(dataset, 'y') 61 | 62 | 63 | def load_sbp(): 64 | dataset = load_data('sbp_age.csv', separator=',') 65 | # print(dataset.shape) 66 | return format_dataset(dataset, 'SBP') 67 | 68 | 69 | def load_sensor_data(): 70 | dataset = load_data('sensor_data.csv', separator=',') 71 | # print(dataset.shape) 72 | 73 | return format_dataset(dataset, 'Y') 74 | 75 | 76 | 77 | if __name__ == "__main__": 78 | pass -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_rule.py: -------------------------------------------------------------------------------- 1 | class FuzzyRule(): 2 | ''' 3 | A fuzzy rule of the type 4 | IF [antecedent clauses] THEN [consequent clauses] 5 | ''' 6 | 7 | # def __init__(self, antecedent_clauses, consequent_clauses): 8 | def __init__(self): 9 | ''' 10 | initializes the rule. Two data structures are necessary: 11 | Antecedent clauses list 12 | consequent clauses list 13 | ''' 14 | self._antecedent = [] 15 | self._consequent= [] 16 | self._degree = 1 17 | 18 | @property 19 | def degree(self): 20 | return self._degree 21 | 22 | def evaluate_score(self): 23 | 24 | output_control = 1 25 | 26 | center_values={} 27 | 28 | for ante in self._antecedent: 29 | output_control= output_control * ante.evaluate_antecedent() 30 | 31 | for cons in self._consequent: 32 | center_values[cons.variable_name] = cons.get_consequent_center_val() 33 | 34 | return output_control, center_values 35 | 36 | 37 | def __str__(self): 38 | ''' 39 | string representation of the rule. 40 | 41 | Returns: 42 | -------- 43 | str: str, string representation of the rule in the form 44 | IF [antecedent clauses] THEN [consequent clauses] 45 | ''' 46 | ante = ' and '.join(map(str, self._antecedent)) 47 | cons = ' and '.join(map(str, self._consequent)) 48 | return f'If {ante} then {cons}' 49 | 50 | def get_antecedent_str(self): 51 | ante = ' and '.join(map(str, self._antecedent)) 52 | return ante 53 | 54 | def add_antecedent_clause(self, clause): 55 | ''' 56 | adds an antecedent clause to the rule 57 | 58 | Arguments: 59 | ----------- 60 | clause -- FuzzyClause, the antecedent clause 61 | ''' 62 | self._antecedent.append(clause) 63 | self._degree = self._degree * clause.degree 64 | 65 | def add_consequent_clause(self, clause): 66 | ''' 67 | adds an consequent clause to the rule 68 | 69 | Arguments: 70 | ----------- 71 | clause -- FuzzyClause, the consequent clause 72 | ''' 73 | self._consequent.append(clause) 74 | self._degree = self._degree * clause.degree 75 | 76 | def evaluate(self): 77 | ''' 78 | evaluation of the rule. 79 | the antecedent clauses are executed and the minimum degree of 80 | membership is retained. 81 | This is used in teh consequent clauses to alpha cut the consequent 82 | set 83 | The values are returned in a dict of the form {variable_name: alpha-cut set, ...} 84 | 85 | Returns: 86 | -------- 87 | rule_consequence -- dict, the resulting sets in the form 88 | {variable_name: alpha-cut set, ...} 89 | ''' 90 | # rule dom initialize to 1 as min operator will be performed 91 | rule_strength = 1 92 | 93 | # execute all antecedent clauses, keeping the minimum of the 94 | # returned doms to determine the rule strength 95 | for ante_clause in self._antecedent: 96 | rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength) 97 | 98 | # initialize the results dict 99 | rule_consequence = {} 100 | 101 | # execute consequent clauses, adding each result to the results dict using the 102 | # variable name as key 103 | for consequent_clause in self._consequent: 104 | rule_consequence[consequent_clause.variable_name] = consequent_clause.evaluate_consequent(rule_strength) 105 | 106 | # return results 107 | return rule_consequence 108 | 109 | def get_antecedent_list(self): 110 | 111 | ret = [] 112 | 113 | for ante_clause in self._antecedent: 114 | ret.append(ante_clause.set_name) 115 | 116 | return ret 117 | 118 | def get_consequent_list(self): 119 | 120 | ret = [] 121 | 122 | for cons_clause in self._consequent: 123 | ret.append(cons_clause.set_name) 124 | 125 | return ret 126 | 127 | 128 | def get_csv_line(self, header): 129 | 130 | row = [None] * len(header) 131 | 132 | for ante in self._antecedent: 133 | idx = header.index(ante.variable_name) 134 | row[idx] = ante.set_name 135 | 136 | for cons in self._consequent: 137 | idx = header.index(cons.variable_name) 138 | row[idx] = cons.set_name 139 | 140 | return row -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/system_settings.py: -------------------------------------------------------------------------------- 1 | ''' 2 | System settings 3 | ''' 4 | 5 | 6 | PRECISION = 6 # precision used for rounding operations 7 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_example.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable 2 | from fuzzy_system.fuzzy_system import FuzzySystem 3 | 4 | temp = Type1FuzzyVariable(10, 40, 100, 'Temperature') 5 | temp.add_triangular('Cold', 10, 10, 25) 6 | temp.add_triangular('Medium', 15, 25, 35) 7 | temp.add_triangular('Hot', 25, 40, 40) 8 | 9 | humidity = Type1FuzzyVariable(20, 100, 100, 'Humidity') 10 | humidity.add_triangular('Wet', 20, 20, 60) 11 | humidity.add_trapezoidal('Normal', 30, 60, 90) 12 | humidity.add_triangular('Dry', 60, 100, 100) 13 | 14 | motor_speed = Type1FuzzyVariable(0, 100, 100, 'Speed') 15 | motor_speed.add_triangular('Slow', 0, 0, 50) 16 | motor_speed.add_triangular('Moderate', 10, 50, 90) 17 | motor_speed.add_triangular('Fast', 50, 100, 100) 18 | 19 | system = FuzzySystem() 20 | system.add_input_variable(temp) 21 | system.add_input_variable(humidity) 22 | system.add_output_variable(motor_speed) 23 | 24 | system.add_rule( 25 | { 'Temperature':'Cold', 26 | 'Humidity':'Wet' }, 27 | { 'Speed':'Slow'}) 28 | 29 | system.add_rule( 30 | { 'Temperature':'Cold', 31 | 'Humidity':'Normal' }, 32 | { 'Speed':'Slow'}) 33 | 34 | system.add_rule( 35 | { 'Temperature':'Medium', 36 | 'Humidity':'Wet' }, 37 | { 'Speed':'Slow'}) 38 | 39 | system.add_rule( 40 | { 'Temperature':'Medium', 41 | 'Humidity':'Normal' }, 42 | { 'Speed':'Moderate'}) 43 | 44 | system.add_rule( 45 | { 'Temperature':'Cold', 46 | 'Humidity':'Dry' }, 47 | { 'Speed':'Moderate'}) 48 | 49 | system.add_rule( 50 | { 'Temperature':'Hot', 51 | 'Humidity':'Wet' }, 52 | { 'Speed':'Moderate'}) 53 | 54 | system.add_rule( 55 | { 'Temperature':'Hot', 56 | 'Humidity':'Normal' }, 57 | { 'Speed':'Fast'}) 58 | 59 | system.add_rule( 60 | { 'Temperature':'Hot', 61 | 'Humidity':'Dry' }, 62 | { 'Speed':'Fast'}) 63 | 64 | system.add_rule( 65 | { 'Temperature':'Medium', 66 | 'Humidity':'Dry' }, 67 | { 'Speed':'Fast'}) 68 | 69 | output = system.evaluate_output({ 70 | 'Temperature':18, 71 | 'Humidity':60 72 | }) 73 | 74 | print(output) 75 | 76 | system.plot_system() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test.py: -------------------------------------------------------------------------------- 1 | #from fuzzy_learning._implementation.fuzzy_system import * 2 | from fuzzy_system.fuzzy_system import FuzzySystem 3 | from fuzzy_system.fuzzy_clause import FuzzyClause 4 | 5 | 6 | # adding an age linguistic variable1 7 | input_var = Type1FuzzyVariable(0, 100, 100, 'Temperature') 8 | input_var.add_triangular('S2', 0, 0, 25) 9 | input_var.add_triangular('S1', 0, 25, 50) 10 | input_var.add_triangular('CE', 25, 50, 75) 11 | input_var.add_triangular('B1', 50, 75, 100) 12 | input_var.add_triangular('B2', 75, 100, 100) 13 | 14 | 15 | # adding an age linguistic variable 16 | input2_var = Type1FuzzyVariable(0, 100, 100, 'Humidity') 17 | input2_var.add_triangular('S2', 0, 0, 25) 18 | input2_var.add_triangular('S1', 0, 25, 50) 19 | input2_var.add_triangular('CE', 25, 50, 75) 20 | input2_var.add_triangular('B1', 50, 75, 100) 21 | input2_var.add_triangular('B2', 75, 100, 100) 22 | 23 | output_var = Type1FuzzyVariable(0, 100, 100, 'Speed') 24 | output_var.add_triangular('L2', 0, 0, 25) 25 | output_var.add_triangular('L1', 0, 25, 50) 26 | output_var.add_triangular('M', 25, 50, 75) 27 | output_var.add_triangular('H1', 50, 75, 100) 28 | output_var.add_triangular('H2', 75, 100, 100) 29 | 30 | system = FuzzySystem() 31 | system.add_input_variable(input_var) 32 | system.add_input_variable(input2_var) 33 | system.add_output_variable(output_var) 34 | 35 | ante={ 36 | 'Temperature' : 'S2', 37 | 'Humidity' : 'S2' 38 | } 39 | cons ={ 40 | 'Speed' : 'H2' 41 | } 42 | 43 | system.add_rule(ante, cons) 44 | 45 | print(system) 46 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test2.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | 4 | from ..type1_fuzzy_variable import 5 | 6 | from fuzzy_system import FuzzySystem 7 | from fuzzy_clause import FuzzyClause 8 | 9 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1') 10 | x1.add_triangular('S', 0, 25, 50) 11 | x1.add_triangular('M', 25, 50, 75) 12 | x1.add_triangular('L', 50, 75, 100) 13 | 14 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2') 15 | x2.add_triangular('S', 0, 25, 50) 16 | x2.add_triangular('M', 25, 50, 75) 17 | x2.add_triangular('L', 50, 75, 100) 18 | 19 | y = Type1FuzzyVariable(0, 100, 100, 'y') 20 | y.add_triangular('S', 0, 25, 50) 21 | y.add_triangular('M', 25, 50, 75) 22 | y.add_triangular('L', 50, 75, 100) 23 | 24 | system = FuzzySystem() 25 | system.add_input_variable(x1) 26 | system.add_input_variable(x2) 27 | system.add_output_variable(y) 28 | 29 | system.add_rule( 30 | { 31 | 'x1':'S', 32 | 'x2':'M' 33 | }, 34 | { 35 | 'y':'S' 36 | } 37 | ) 38 | 39 | system.add_rule( 40 | { 41 | 'x1':'M', 42 | 'x2':'L' 43 | }, 44 | { 45 | 'y':'M' 46 | } 47 | ) 48 | output = system.evaluate_output({ 49 | 'x1':44, 50 | 'x2':61 51 | }) 52 | 53 | # print(output) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test2_DiskStation_Mar-27-1623-2020_Conflict.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | 4 | from .type1_fuzzy_variable import Type1FuzzyVariable 5 | 6 | from fuzzy_system import FuzzySystem 7 | from fuzzy_clause import FuzzyClause 8 | 9 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1') 10 | x1.add_triangular('S', 0, 25, 50) 11 | x1.add_triangular('M', 25, 50, 75) 12 | x1.add_triangular('L', 50, 75, 100) 13 | 14 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2') 15 | x2.add_triangular('S', 0, 25, 50) 16 | x2.add_triangular('M', 25, 50, 75) 17 | x2.add_triangular('L', 50, 75, 100) 18 | 19 | y = Type1FuzzyVariable(0, 100, 100, 'y') 20 | y.add_triangular('S', 0, 25, 50) 21 | y.add_triangular('M', 25, 50, 75) 22 | y.add_triangular('L', 50, 75, 100) 23 | 24 | system = FuzzySystem() 25 | system.add_input_variable(x1) 26 | system.add_input_variable(x2) 27 | system.add_output_variable(y) 28 | 29 | system.add_rule( 30 | { 31 | 'x1':'S', 32 | 'x2':'M' 33 | }, 34 | { 35 | 'y':'S' 36 | } 37 | ) 38 | 39 | system.add_rule( 40 | { 41 | 'x1':'M', 42 | 'x2':'L' 43 | }, 44 | { 45 | 'y':'M' 46 | } 47 | ) 48 | output = system.evaluate_output({ 49 | 'x1':44, 50 | 'x2':61 51 | }) 52 | 53 | # print(output) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_test.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable 2 | from fuzzy_system.fuzzy_system import FuzzySystem 3 | 4 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1') 5 | x1.add_triangular('S', 0, 25, 50) 6 | x1.add_triangular('M', 25, 50, 75) 7 | x1.add_triangular('L', 50, 75, 100) 8 | 9 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2') 10 | x2.add_triangular('S', 0, 25, 50) 11 | x2.add_triangular('M', 25, 50, 75) 12 | x2.add_triangular('L', 50, 75, 100) 13 | 14 | y = Type1FuzzyVariable(0, 100, 100, 'y') 15 | y.add_triangular('S', 0, 25, 50) 16 | y.add_triangular('M', 25, 50, 75) 17 | y.add_triangular('L', 50, 75, 100) 18 | 19 | z = Type1FuzzyVariable(0, 100, 100, 'z') 20 | z.add_triangular('S', 0, 25, 50) 21 | z.add_triangular('M', 25, 50, 75) 22 | z.add_triangular('L', 50, 75, 100) 23 | 24 | system = FuzzySystem() 25 | system.add_input_variable(x1) 26 | system.add_input_variable(x2) 27 | system.add_output_variable(y) 28 | system.add_output_variable(z) 29 | 30 | system.add_rule( 31 | { 'x1':'S', 32 | 'x2':'S' }, 33 | { 'y':'S', 34 | 'z':'L' }) 35 | 36 | system.add_rule( 37 | { 'x1':'M', 38 | 'x2':'M' }, 39 | { 'y':'M', 40 | 'z':'M' }) 41 | 42 | system.add_rule( 43 | { 'x1':'L', 44 | 'x2':'L' }, 45 | { 'y':'L', 46 | 'z':'S' }) 47 | 48 | system.add_rule( 49 | { 'x1':'S', 50 | 'x2':'M' }, 51 | { 'y':'S', 52 | 'z':'L' }) 53 | 54 | system.add_rule( 55 | { 'x1':'M', 56 | 'x2':'S' }, 57 | { 'y':'S', 58 | 'z':'L' }) 59 | 60 | system.add_rule( 61 | { 'x1':'L', 62 | 'x2':'M' }, 63 | { 'y':'L', 64 | 'z':'S' }) 65 | 66 | system.add_rule( 67 | { 'x1':'M', 68 | 'x2':'L' }, 69 | { 'y':'L', 70 | 'z':'S' }) 71 | 72 | system.add_rule( 73 | { 'x1':'L', 74 | 'x2':'S' }, 75 | { 'y':'M', 76 | 'z':'M' }) 77 | 78 | system.add_rule( 79 | { 'x1':'S', 80 | 'x2':'L' }, 81 | { 'y':'M', 82 | 'z':'M' }) 83 | 84 | output = system.evaluate_output({ 85 | 'x1':35, 86 | 'x2':75 87 | }) 88 | 89 | fam = system.create_fam('y') 90 | 91 | print(fam) 92 | 93 | print(output) 94 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/poc/data_analysis_poc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from pandas import DataFrame 4 | import matplotlib.pyplot as plt 5 | 6 | dirname = os.path.dirname(__file__) 7 | filename = os.path.join(dirname, '..\data\winequality-red.csv') 8 | 9 | df = pd.read_csv(filename, sep=';') 10 | # print(df.head()) 11 | 12 | df2 = df['chlorides'] 13 | # print(df2.head()) 14 | 15 | df3 = df[['free sulfur dioxide', 'total sulfur dioxide']] 16 | # print(df3.head()) 17 | 18 | 19 | to_rename = {'fixed acidity':'fixed_acidity', 20 | 'volatile acidity':'volatile_acidity', 21 | 'citric acid':'citric_acid', 22 | 'residual sugar':'residual_sugar', 23 | 'free sulfur dioxide':'free_sulfur_dioxide', 24 | 'total sulfur dioxide':'total_sulfur_dioxide' 25 | } 26 | 27 | df.rename(columns=to_rename, inplace=True) 28 | # print(df.head()) 29 | 30 | df4 = df[(df['residual_sugar'] > 10)] 31 | # print(df4) 32 | 33 | df['sulphur_dioxide_difference'] = df['total_sulfur_dioxide'] - df['free_sulfur_dioxide'] 34 | # print(df.head()) 35 | 36 | df[['total_sulfur_dioxide','free_sulfur_dioxide','sulphur_dioxide_difference']][200:300].plot() 37 | # plt.show() 38 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation.py: -------------------------------------------------------------------------------- 1 | from type2fuzzy import Type1FuzzyVariable 2 | 3 | # adding an age linguistic variable 4 | var = Type1FuzzyVariable(0, 100, 100) 5 | 6 | var.add_triangular('very young', 0, 0, 20) 7 | var.add_triangular('young', 10, 20, 30) 8 | var.add_triangular('adult', 20, 40, 60) 9 | var.add_triangular('old', 50, 70, 90) 10 | var.add_triangular('very old', 70, 100, 100) 11 | 12 | var.plot_variable() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation_2.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable 2 | 3 | # adding an age linguistic variable 4 | var = Type1FuzzyVariable(0, 100, 100) 5 | 6 | # generate (2*3)+1 = 7 sets 7 | var.generate_sets_mean(3, 30) 8 | 9 | var.plot_variable() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation_notes.py: -------------------------------------------------------------------------------- 1 | from type2fuzzy import Type1FuzzyVariable 2 | 3 | # adding an age linguistic variable 4 | var = Type1FuzzyVariable(0, 100, 100) 5 | 6 | var.add_triangular('S2', 0, 0, 25) 7 | var.add_triangular('S1', 0, 25, 50) 8 | var.add_triangular('CE', 25, 50, 75) 9 | var.add_triangular('B1', 50, 75, 100) 10 | var.add_triangular('B2', 75, 100, 100) 11 | 12 | var.plot_variable() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/poc/set_naming.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | n=5 4 | 5 | x = ['s' + str(i) for i in range(n+1,1,-1)] + ['ce'] + ['b' + str(i) for i in range(1,n+1,1)] 6 | 7 | # print(x) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/sensor_comparison.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_sensor_data 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | 7 | X, y = load_sensor_data() 8 | 9 | clean_y = y.copy() 10 | clean_y['Y'] = 1.1**X['x'] 11 | 12 | X_train = X 13 | X_test = X 14 | y_train = y 15 | y_test = y 16 | 17 | fig, axes = plt.subplots(nrows=3, ncols=3) 18 | df = pd.DataFrame() 19 | 20 | for x_range in range(3, 6): 21 | for y_range in range(2, 5): 22 | 23 | learning_system = FuzzyLearningSystem(res=1000) 24 | learning_system.fit(X_train, y_train, X_n=x_range, y_n=y_range) 25 | 26 | # learning_system.plot_variables() 27 | # print(learning_system) 28 | 29 | score = learning_system.score(X_test, clean_y) 30 | df = df[0:0] 31 | 32 | for i in np.arange(0,50,1): 33 | y_hat = learning_system.get_result({'x':i})['Y'] 34 | a_row = pd.Series([i, y_hat]) 35 | row_df = pd.DataFrame([a_row]) 36 | df = pd.concat([row_df, df]) 37 | 38 | axes[x_range-3, y_range-2].plot(X, clean_y) 39 | # axes[x_range-1, y_range-1].plot(X, y) 40 | axes[x_range-3, y_range-2].plot(df[0], df[1]) 41 | axes[x_range-3, y_range-2].set_title(f'sets x: {1+(2*x_range)}, sets y:{1+(2*y_range)}, R-Squared:{score:1.3f}') 42 | axes[x_range-3, y_range-2].set_xlabel('') 43 | 44 | plt.show() 45 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/sensor_data_generate.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Generates the dataset for a noisy sensor having an exponential response 3 | ''' 4 | from random import uniform, seed 5 | from matplotlib import pyplot as plt 6 | from pandas import DataFrame 7 | from fuzzy_system.fuzzy_learning_helper import save_data 8 | 9 | # generate functions 10 | seed(42) 11 | factor = 0.3 12 | x = range(0,50,1) 13 | 14 | y_clean = [(1.1**i) for i in x] 15 | y = [(1.1**i) + ( i * factor * uniform(-1,1)) for i in x] 16 | 17 | # plot 18 | fig, axes = plt.subplots(nrows=1, ncols=2) 19 | axes[0].plot(y_clean) 20 | axes[1].plot(y) 21 | axes[0].set_title('ideal sensor response') 22 | axes[1].set_title('noisy sensor response') 23 | plt.show() 24 | 25 | # write in file 26 | data = { 27 | 'x': x, 28 | 'Y': y 29 | } 30 | df = DataFrame(data) 31 | save_data(df, "sensor_data.csv") 32 | 33 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/sensor_fuzzy_learn.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_sensor_data 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | 7 | X, y = load_sensor_data() 8 | 9 | clean_y = y.copy() 10 | clean_y['Y'] = 1.1**X['x'] 11 | 12 | X_train = X 13 | X_test = X 14 | y_train = y 15 | y_test = y 16 | 17 | learning_system = FuzzyLearningSystem(res=1000) 18 | 19 | learning_system.fit(X_train, y_train, X_n=5, y_n=2) 20 | 21 | score = learning_system.score(X_test, clean_y) 22 | print(score) 23 | 24 | df = pd.DataFrame() 25 | 26 | for i in np.arange(0,50,1): 27 | 28 | y_hat = learning_system.get_result({'x':i})['Y'] 29 | 30 | a_row = pd.Series([i, y_hat]) 31 | row_df = pd.DataFrame([a_row]) 32 | df = pd.concat([row_df, df]) 33 | 34 | plt.plot(X, y) 35 | plt.plot(df[0], df[1]) 36 | 37 | print(learning_system) 38 | 39 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/weather_analyse.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import os 6 | 7 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data') 8 | 9 | def load_data(filename, data_path=DATA_PATH, separator=','): 10 | csv_path = os.path.join(data_path, filename) 11 | return pd.read_csv(csv_path, sep=separator) 12 | 13 | 14 | if __name__ == "__main__": 15 | 16 | df = load_data ('weatherHistory_adj.csv') 17 | 18 | # param = 'Temperature' 19 | param = 'Humidity' 20 | 21 | res = df.groupby(pd.Grouper(key='Month'))[param].agg([np.min, np.mean, np.max]) 22 | res = res.sort_values(by=['Month']) 23 | print(res) 24 | 25 | error = [(res['amax']-res['mean']), (res['mean']-res['amin'])] 26 | 27 | res.plot(kind = "barh", y = "mean", legend = False, title = param, xerr=error) 28 | plt.show() 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/weather_fuzzy_learning.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_weather 2 | from fuzzy_system.fuzzy_learning_helper import split_train_test 3 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 4 | 5 | def execute_test(resolution, x_n, y_n): 6 | 7 | X, y = load_weather() 8 | 9 | X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.2) 10 | 11 | learning_system = FuzzyLearningSystem(res=resolution) 12 | 13 | learning_system.fit(X_train, y_train, X_n=x_n, y_n=y_n) 14 | 15 | score = learning_system.score(X_test, y_test) 16 | 17 | print(learning_system) 18 | 19 | learning_system.generate_rules_csv('weather_rules.csv') 20 | 21 | return score 22 | 23 | if __name__ == "__main__": 24 | result = execute_test(1000,4,16) 25 | print(result) 26 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/weather_preprocessing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data') 5 | 6 | def load_data(filename, data_path=DATA_PATH, separator=','): 7 | csv_path = os.path.join(data_path, filename) 8 | return pd.read_csv(csv_path, sep=separator) 9 | 10 | def save_data(data_frame, filename, data_path=DATA_PATH): 11 | csv_path = os.path.join(data_path, filename) 12 | return data_frame.to_csv(csv_path, float_format='%.3f', index=False) 13 | 14 | def weather_dataset_preprocess(): 15 | 16 | data_n = load_data ('weatherHistory.csv') 17 | 18 | # get month from date 19 | data_n.loc[:, 'DateTime'] = pd.to_datetime(data_n['Formatted Date'], utc=True) 20 | data_n['Month'] = data_n['DateTime'].dt.month 21 | 22 | data_n = data_n.drop(columns=[ 23 | 'Formatted Date', 24 | 'DateTime', 25 | 'Summary', 26 | 'Precip Type', 27 | 'Apparent Temperature (C)', 28 | 'Loud Cover', 29 | 'Daily Summary', 30 | "Wind Bearing (degrees)", 31 | "Visibility (km)", 32 | "Pressure (millibars)", 33 | "Wind Speed (km/h)", 34 | ]) 35 | 36 | data_n = data_n.rename(columns={ 37 | "Temperature (C)":"Temperature", 38 | # "Wind Speed (km/h)": "Wind Speed", 39 | # "Wind Bearing (degrees)":"Wind Bearing", 40 | # "Visibility (km)":"Visibility", 41 | # "Pressure (millibars)":"Pressure" 42 | }) 43 | 44 | save_data(data_n,'weatherHistory_adj.csv') 45 | 46 | def create_testing_sample(): 47 | 48 | df = load_data('weatherHistory_adj.csv') 49 | df_ret = df.head(1000) 50 | save_data(df_ret,'weatherHistory_adj_test.csv') 51 | 52 | if __name__ == "__main__": 53 | weather_dataset_preprocess() 54 | create_testing_sample() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/weather_preprocessing_humidity.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | 4 | 5 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data') 6 | 7 | def load_data(filename, data_path=DATA_PATH, separator=','): 8 | csv_path = os.path.join(data_path, filename) 9 | return pd.read_csv(csv_path, sep=separator) 10 | 11 | def save_data(data_frame, filename, data_path=DATA_PATH): 12 | csv_path = os.path.join(data_path, filename) 13 | return data_frame.to_csv(csv_path, float_format='%.3f', index=False) 14 | 15 | def weather_dataset_preprocess(): 16 | 17 | data_n = load_data ('weatherHistory.csv') 18 | 19 | # get month from date 20 | data_n.loc[:, 'DateTime'] = pd.to_datetime(data_n['Formatted Date'], utc=True) 21 | data_n['Month'] = data_n['DateTime'].dt.month 22 | 23 | 24 | data_n = data_n.drop(columns=[ 25 | 'Formatted Date', 26 | 'DateTime', 27 | 'Summary', 28 | 'Precip Type', 29 | 'Apparent Temperature (C)', 30 | 'Loud Cover', 31 | 'Daily Summary', 32 | 'Wind Bearing (degrees)', 33 | 'Visibility (km)', 34 | 'Pressure (millibars)', 35 | 'Daily Summary' 36 | ]) 37 | 38 | data_n = data_n.rename(columns={ 39 | "Temperature (C)":"Temperature", 40 | 'Wind Speed (km/h)':'Wind Speed', 41 | }) 42 | 43 | save_data(data_n,'weatherHistory_adj_humidity.csv') 44 | 45 | def create_testing_sample(): 46 | df = load_data('weatherHistory_adj.csv') 47 | df_ret = df.head(1000) 48 | save_data(df_ret,'weatherHistory_adj_test.csv') 49 | 50 | if __name__ == "__main__": 51 | # create_testing_sample() 52 | weather_dataset_preprocess() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/wine_dataset_analysis.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data') 7 | 8 | 9 | def load_data(filename, data_path=DATA_PATH, separator=';'): 10 | csv_path = os.path.join(data_path, filename) 11 | return pd.read_csv(csv_path, sep=separator) 12 | 13 | def save_data(data_frame, filename, data_path=DATA_PATH): 14 | csv_path = os.path.join(data_path, filename) 15 | return data_frame.to_csv(csv_path, float_format='%.3f', index=False) 16 | 17 | def inspect_data(data): 18 | # print first 10 rows 19 | print(data.head()) 20 | # print datatypes 21 | print(data.info()) 22 | # print min, max, mean, std dev and percentiles 23 | print(data.describe()) 24 | # plot histogram 25 | data.hist(bins=50) 26 | plt.show() 27 | 28 | def _split_train_test(data, test_ratio, random_seed=42): 29 | np.random.seed(random_seed) 30 | 31 | shuffled_indices = np.random.permutation(len(data)) 32 | test_set_size = int(len(data) * test_ratio) 33 | test_indices = shuffled_indices[:test_set_size] 34 | train_indices = shuffled_indices[test_set_size:] 35 | return data.iloc[train_indices], data.iloc[test_indices] 36 | 37 | def split_data(data): 38 | train_set, test_set = _split_train_test(data, 0.2) 39 | save_data(train_set, 'winequality-red_train.csv') 40 | save_data(test_set, 'winequality-red_test.csv') 41 | print(f'data count: {len(data)}') 42 | print(f'train set count: {len(train_set)}') 43 | print(f'test set count: {len(test_set)}') 44 | 45 | def visualize(data): 46 | data.plot(kind='scatter', x='alcohol', y='citric acid', label='pH', figsize=(10,7), alpha=0.1, s=data['total sulfur dioxide'], c='quality', 47 | cmap=plt.get_cmap('jet'), colorbar=True) 48 | plt.show() 49 | 50 | if __name__ == "__main__": 51 | from sklearn import preprocessing 52 | 53 | data = load_data('winequality-red.csv') 54 | 55 | # Get column names first 56 | names = data.columns 57 | # Create the Scaler object 58 | scaler = preprocessing.MinMaxScaler() 59 | # Fit your data on the scaler object 60 | scaled_df = scaler.fit_transform(data) 61 | scaled_df = pd.DataFrame(scaled_df, columns=names) 62 | 63 | inspect_data(scaled_df) 64 | # split_data(data) 65 | # visualize(dataNorm) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/wine_fuzzy_learning.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_winequality_red 2 | from fuzzy_system.fuzzy_learning_helper import split_train_test 3 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem 4 | import numpy as np 5 | 6 | 7 | def execute_test(resolution, x_n, y_n): 8 | 9 | X, y = load_winequality_red() 10 | 11 | s = 'chlorides' 12 | c = X[s] 13 | c_log = np.log(c) 14 | c_log[c_log < -3.5] = -3.5 15 | c_log[c_log > -1.5] = -1.5 16 | X[s] = c_log 17 | 18 | s='residual sugar' 19 | c = X[s] 20 | c_log = np.log(c) 21 | c_log[c_log > 2] = 2 22 | X[s] = c_log 23 | 24 | s = 'sulphates' 25 | c = X[s] 26 | c[c > 1.25] = 1.25 27 | X[s] = c 28 | 29 | s = 'total sulfur dioxide' 30 | c = X[s] 31 | c_log = np.log(c) 32 | X[s] = c_log 33 | 34 | 35 | 36 | X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.05) 37 | # X_train = X 38 | # X_test = X 39 | # y_train = y 40 | # y_test = y 41 | 42 | learning_system = FuzzyLearningSystem(res=resolution) 43 | 44 | learning_system.fit(X_train, y_train, X_n=x_n, y_n=y_n) 45 | 46 | score = learning_system.score(X_test, y_test) 47 | 48 | return score 49 | 50 | 51 | size = 10 52 | results = np.zeros((size,size)) 53 | init_val = 11 54 | for x in range (init_val,init_val+size): 55 | for y in range (init_val,init_val+size): 56 | 57 | results [x-init_val,y-init_val] = execute_test(1000, x, y) 58 | 59 | #results.tofile('results.standardscaler.csv', sep=',') 60 | print(results) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/fuzzy_learning/wine_scaling.py: -------------------------------------------------------------------------------- 1 | from fuzzy_system.fuzzy_learning_helper import load_winequality_red 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | X, y = load_winequality_red() 6 | 7 | s = 'chlorides' 8 | c = X[s] 9 | c_log = np.log(c) 10 | c_log[c_log < -3.5] = -3.5 11 | c_log[c_log > -1.5] = -1.5 12 | X[s] = c_log 13 | 14 | s='residual sugar' 15 | c = X[s] 16 | c_log = np.log(c) 17 | c_log[c_log > 2] = 2 18 | X[s] = c_log 19 | 20 | s = 'sulphates' 21 | c = X[s] 22 | c[c > 1.25] = 1.25 23 | X[s] = c 24 | 25 | s = 'total sulfur dioxide' 26 | c = X[s] 27 | c_log = np.log(c) 28 | X[s] = c_log 29 | 30 | 31 | X.hist(bins=50) 32 | plt.show() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/gan/__pycache__/discriminator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/gan/__pycache__/discriminator.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/gan/__pycache__/generator.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/gan/__pycache__/generator.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/gan/discriminator.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class Discriminator(nn.Module): 8 | 9 | def __init__(self): 10 | """ 11 | Initialize the discriminator network. 12 | 13 | The network consists of three layers of fully connected (dense) layers. 14 | The output of the network is a probability that the input is real. 15 | """ 16 | super(Discriminator, self).__init__() 17 | self.model = nn.Sequential( 18 | nn.Linear(784, 512), 19 | nn.LeakyReLU(0.2), 20 | nn.Linear(512, 256), 21 | nn.LeakyReLU(0.2), 22 | nn.Linear(256, 1), 23 | nn.Sigmoid() # Output a probability 24 | ) 25 | 26 | def forward(self, img:torch.tensor) -> torch.tensor: 27 | """ 28 | Forward pass of the discriminator network. 29 | 30 | Parameters 31 | ---------- 32 | img : torch.tensor 33 | The input image to the discriminator network. 34 | 35 | Returns 36 | ------- 37 | validity : torch.tensor 38 | The probability that the input image is real. 39 | """ 40 | img_flat = img.view(img.size(0), -1) # Flatten the image 41 | validity = self.model(img_flat) 42 | return validity 43 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/gan/gan.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torchvision.datasets as datasets 5 | from torch.utils.data import DataLoader 6 | from torchvision import transforms 7 | import torchvision 8 | import matplotlib.pyplot as plt 9 | 10 | from generator import Generator 11 | from discriminator import Discriminator 12 | 13 | # Hyperparameters 14 | latent_dim = 100 15 | lr = 0.0002 16 | batch_size = 64 17 | epochs = 200 18 | 19 | # Device configuration (GPU if available) 20 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 21 | 22 | transform = transforms.Compose([ 23 | transforms.ToTensor(), 24 | transforms.Normalize([0.5], [0.5]) # Normalize images to [-1, 1] 25 | ]) 26 | 27 | train_data = datasets.MNIST(root="./data", train=True, transform=transform, download=True) 28 | train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) 29 | 30 | 31 | generator = Generator(latent_dim).to(device) 32 | discriminator = Discriminator().to(device) 33 | 34 | optimizer_G = optim.Adam(generator.parameters(), lr=lr) 35 | optimizer_D = optim.Adam(discriminator.parameters(), lr=lr) 36 | 37 | criterion = nn.BCELoss() # Binary Cross Entropy Loss 38 | 39 | generator = Generator(latent_dim).to(device) 40 | discriminator = Discriminator().to(device) 41 | 42 | optimizer_G = optim.Adam(generator.parameters(), lr=lr) 43 | optimizer_D = optim.Adam(discriminator.parameters(), lr=lr) 44 | 45 | criterion = nn.BCELoss() # Binary Cross Entropy Loss 46 | 47 | for epoch in range(epochs): 48 | for i, (imgs, _) in enumerate(train_loader): 49 | 50 | # Ground truths 51 | real = torch.ones(imgs.size(0), 1).to(device) 52 | fake = torch.zeros(imgs.size(0), 1).to(device) 53 | 54 | # --------------------- 55 | # Train Discriminator 56 | # --------------------- 57 | 58 | optimizer_D.zero_grad() 59 | 60 | # Real images 61 | real_imgs = imgs.to(device) 62 | real_loss = criterion(discriminator(real_imgs), real) 63 | 64 | # Fake images 65 | z = torch.randn(imgs.size(0), latent_dim).to(device) 66 | fake_imgs = generator(z) 67 | fake_loss = criterion(discriminator(fake_imgs), fake) 68 | 69 | # Total loss for discriminator 70 | d_loss = real_loss + fake_loss 71 | d_loss.backward() 72 | optimizer_D.step() 73 | 74 | # ----------------- 75 | # Train Generator 76 | # ----------------- 77 | 78 | optimizer_G.zero_grad() 79 | 80 | # Generate fake images 81 | z = torch.randn(imgs.size(0), latent_dim).to(device) 82 | fake_imgs = generator(z) 83 | 84 | # The generator wants the discriminator to think these images are real 85 | g_loss = criterion(discriminator(fake_imgs), real) 86 | 87 | g_loss.backward() 88 | optimizer_G.step() 89 | 90 | # Print progress 91 | if i % 200 == 0: 92 | print(f"Epoch [{epoch}/{epochs}] Batch {i}/{len(train_loader)} \ 93 | Loss D: {d_loss.item():.4f}, loss G: {g_loss.item():.4f}") 94 | 95 | # Save generated samples for visualization every few epochs 96 | if epoch % 10 == 0: 97 | with torch.no_grad(): 98 | z = torch.randn(16, latent_dim).to(device) 99 | generated_imgs = generator(z).cpu().view(-1, 1, 28, 28) 100 | grid_img = torchvision.utils.make_grid(generated_imgs, nrow=4, normalize=True) 101 | plt.imshow(grid_img.permute(1, 2, 0)) 102 | plt.show() 103 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/gan/generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Generator(nn.Module): 6 | def __init__(self, latent_dim): 7 | super(Generator, self).__init__() 8 | self.model = nn.Sequential( 9 | nn.Linear(latent_dim, 128), 10 | nn.LeakyReLU(0.2), 11 | nn.Linear(128, 256), 12 | nn.BatchNorm1d(256), 13 | nn.LeakyReLU(0.2), 14 | nn.Linear(256, 512), 15 | nn.BatchNorm1d(512), 16 | nn.LeakyReLU(0.2), 17 | nn.Linear(512, 784), # 28x28=784 18 | nn.Tanh() # Normalize the output to [-1, 1] 19 | ) 20 | 21 | def forward(self, z:torch.tensor) -> torch.tensor: 22 | """ 23 | Forward pass of the generator network. 24 | 25 | Parameters 26 | ---------- 27 | z : torch.tensor 28 | The input latent vector to the generator network. 29 | 30 | Returns 31 | ------- 32 | img : torch.tensor 33 | The generated image, reshaped to 28x28 for MNIST. 34 | """ 35 | img = self.model(z) 36 | img = img.view(img.size(0), 1, 28, 28) # Reshape to 28x28 for MNIST 37 | return img 38 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3/__pycache__/id3_classifier.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/id3/__pycache__/id3_classifier.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3/id3_exec.py: -------------------------------------------------------------------------------- 1 | ''' 2 | id3_exe.py 3 | execution of ID3 example 4 | ''' 5 | import os 6 | import pandas as pd 7 | import numpy as np 8 | from id3_classifier import ID3Classifier 9 | 10 | DIR_PATH = os.path.dirname(os.path.realpath(__file__)) 11 | DF = pd.read_csv(DIR_PATH + '\\weather.csv') 12 | df_copy = DF.copy() 13 | 14 | # 15 | # preprocessing 16 | # 17 | 18 | # discretize the TEMP attribute 19 | df_copy.loc[(DF['TEMP'] <= 69), 'TEMP'] = 'Cold' 20 | df_copy.loc[(DF['TEMP'] > 69), 'TEMP'] = 'Medium' 21 | df_copy.loc[(DF['TEMP'] >= 79), 'TEMP'] = 'Hot' 22 | 23 | # discretize the HUMIDITY attribute 24 | df_copy.loc[(DF['HUMIDITY'] <= 80), 'HUMIDITY'] = 'Normal' 25 | df_copy.loc[(DF['HUMIDITY'] > 80), 'HUMIDITY'] = 'High' 26 | 27 | # remove the DAY column 28 | df_copy.drop(columns=['DAY'], axis=1, inplace=True) 29 | 30 | RESULTS = np.array(df_copy['PLAY']) 31 | df_copy.drop('PLAY', axis=1, inplace=True) 32 | 33 | NODE_NAMES = list(df_copy.columns.values) 34 | NODE = np.array(df_copy[NODE_NAMES]) 35 | 36 | # classify 37 | classifier = ID3Classifier() 38 | classifier.id3_compute(NODE_NAMES, NODE, RESULTS) 39 | classifier.display_tree() 40 | 41 | # test 42 | case = {'WEATHER': 'Sunny','TEMP':'Cold','HUMIDITY':'High','WIND':'Weak'} 43 | result = classifier.infer(case) 44 | print('Result with', case, 'is: ', result) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3/readme.md: -------------------------------------------------------------------------------- 1 | # Implementation of ID3 algorithm 2 | 3 | ## Files 4 | 5 | - simple_tree.py: implementation of a tree structure 6 | - id3_classifier.py: implementation of ID3 7 | - tree_exec.py: execution test for tree 8 | - id3_exec.py: execution test for id2 9 | - weather.csv: data to be classified -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3/tree_exec.py: -------------------------------------------------------------------------------- 1 | ''' 2 | simple example demonstrating the operation of the SimpleTree 3 | 4 | 1. creates a tree with nodes and edges 5 | 2. creates another tree with nodes and edges 6 | 3. appends the second tree to the forst tree 7 | ''' 8 | from simple_tree import SimpleTree 9 | 10 | a_tree = SimpleTree() 11 | a_tree.add_node('One') 12 | a_tree.add_node('Two') 13 | a_tree.add_node('Three') 14 | a_tree.add_node('Four') 15 | a_tree.add_edge('One', 'Three', 'No') 16 | a_tree.add_node('Five') 17 | a_tree.add_edge('One', 'Two', 'Yes') 18 | a_tree.add_edge('Two', 'Four', 'Yes') 19 | a_tree.add_edge('Four', 'Five', 'Yes') 20 | a_tree.add_node('Six') 21 | a_tree.add_edge('Four', 'Six', 'No') 22 | 23 | a_tree.set_root_node('One') 24 | a_tree.display() 25 | 26 | b_tree = SimpleTree() 27 | b_tree.add_node('Uno') 28 | b_tree.add_node('Due') 29 | b_tree.add_node('Tre') 30 | b_tree.add_node('Quattro') 31 | b_tree.add_edge('Uno', 'Due', 'Si') 32 | b_tree.add_edge('Uno', 'Tre', 'No') 33 | b_tree.add_edge('Tre', 'Quattro', 'Si') 34 | b_tree.set_root_node('Uno') 35 | b_tree.display() 36 | 37 | a_tree.append_tree('Five', b_tree, 'No') 38 | a_tree.display() 39 | 40 | 41 | 42 | 43 | # a_tree = SimpleTree() 44 | # a_tree.add_node('One') 45 | # a_tree.add_node('Two') 46 | # a_tree.add_node('Three') 47 | # a_tree.add_node('Four') 48 | # a_tree.add_edge('One', 'Three', 'No') 49 | 50 | 51 | # b_tree = SimpleTree() 52 | # b_tree.add_node('Uno') 53 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3/weather.csv: -------------------------------------------------------------------------------- 1 | DAY,WEATHER,TEMP,HUMIDITY,WIND,PLAY 2 | 1,Sunny,85,85,Weak,No 3 | 2,Sunny,80,90,Strong,No 4 | 3,Cloudy,83,90,Weak,Yes 5 | 4,Rainy,70,96,Weak,Yes 6 | 5,Rainy,68,80,Weak,Yes 7 | 6,Rainy,65,70,Strong,No 8 | 7,Cloudy,64,65,Strong,Yes 9 | 8,Sunny,72,95,Weak,No 10 | 9,Sunny,69,70,Weak,Yes 11 | 10,Rainy,75,80,Weak,Yes 12 | 11,Sunny,75,70,Strong,Yes 13 | 12,Cloudy,72,90,Strong,Yes 14 | 13,Cloudy,81,75,Weak,Yes 15 | 14,Rainy,71,85,Strong,No 16 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/data.csv: -------------------------------------------------------------------------------- 1 | outlook,temperature,humidity,windy,play 2 | sunny,hot,high,weak,no 3 | sunny,hot,high,strong,no 4 | overcast,hot,high,weak,yes 5 | rainy,mild,high,weak,yes 6 | rainy,cool,normal,weak,yes 7 | rainy,cool,normal,strong,no 8 | overcast,cool,normal,strong,yes 9 | sunny,mild,high,weak,no 10 | sunny,cool,normal,weak,yes 11 | rainy,mild,normal,weak,yes 12 | sunny,mild,normal,strong,yes 13 | overcast,mild,high,strong,yes 14 | overcast,hot,normal,weak,yes 15 | rainy,mild,high,strong,no -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/results_analysis.txt: -------------------------------------------------------------------------------- 1 | 'temp', 'outlook', 'no', 'yes', 'humid', 'windy', 'maybe' 2 | 3 | 'temp' 0 'cool' 0 0 'mild' 0 0 4 | 'outlook' 'cool' 0 'sunny' 'sunny' 'high' 'rainy' 'sunny' 5 | 'no' 0 'sunny' 'no' 0 0 True 0 6 | 'yes' 0 'sunny' 0 'yes' 'normal' 0 0 7 | 'humid' 'mild' 'high' 0 'normal' 0 0 0 8 | 'windy' 0 'rainy' True 0 0 0 True 9 | 'maybe' 0 'sunny' 0 0 0 True 'maybe' 10 | 11 | 12 | 13 | temperature 14 | 15 | 16 | 17 | 0 'sunny' 0 'overcast' 'rainy' 18 | 'sunny' 0 'high' 'normal' 0 19 | 0 'high' 0 0 True 20 | 'overcast' 'normal' 0 0 0 21 | 'rainy' 0 True 0 0 -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_4_workout.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/id3_version2/sample_4_workout.xlsx -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_results_1.txt: -------------------------------------------------------------------------------- 1 | * B 2 | * - ['b1', 'b3', 's1', 's2', 'b2', 's4'] - A 3 | * - [['s2']] - b14 4 | * - [['s1'], ['s2']] - b12 5 | * - [['s1']] - ce 6 | * - [['s2'], ['s2'], ['s2']] - b2 7 | * - [['s1'], ['s1']] - b5 8 | * - [['s2']] - b6 9 | * - [['s1']] - b7 10 | * - [['s1']] - s2 11 | * - ['ce'] - b12 12 | * - ['s3'] - s2 13 | * - ['b4'] - s1 14 | [ 15 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s2']]", 'output is b14'], <--- sample_results_2 16 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s1'], ['s2']]", 'output is b12'], 17 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s1']]", 'output is ce'], 18 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s2'], ['s2'], ['s2']]", 'output is b2'], <--- sample_results_1 [_2] 19 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s1'], ['s1']]", 'output is b5'], 20 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s2']]", 'output is b6'], <--- sample_results_1 [_2] 21 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s1']]", 'output is b7'], 22 | ["B is ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is [['s1']]", 'output is s2'], 23 | ["B is ['ce']", 'output is b12'], 24 | ["B is ['s3']", 'output is s2'], 25 | ["B is ['b4']", 'output is s1']] 26 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_results_2.txt: -------------------------------------------------------------------------------- 1 | 2 | * B 3 | * - ['s2'] - b6 4 | * - ['s1', 'b3', 's4'] - b2 5 | [ 6 | ["B is ['s2']", 'output is b6'], 7 | ["B is ['s1', 'b3', 's4']", 'output is b2']] -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_results_3.txt: -------------------------------------------------------------------------------- 1 | * B 2 | * - ['b1'] - b14 3 | * - ['s2'] - b6 4 | * - ['s1', 'b3', 's4'] - b2 5 | [ 6 | ["B is ['b1']", 'output is b14'], 7 | ["B is ['s2']", 'output is b6'], 8 | ["B is ['s1', 'b3', 's4']", 'output is b2']] -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_results_4.txt: -------------------------------------------------------------------------------- 1 | * B 2 | * - ['b1'] - A 3 | * - [['s2']] - b14 4 | * - [['s1']] - b12 5 | * - ['ce', 'b2'] - b12 6 | * - ['s2'] - b6 7 | * - ['s1', 'b3', 's4'] - b2 8 | 9 | [ 10 | ["B is ['b1']", "A is [['s2']]", 'output is b14'], 11 | ["B is ['b1']", "A is [['s1']]", 'output is b12'], 12 | ["B is ['ce', 'b2']", 'output is b12'], 13 | ["B is ['s2']", 'output is b6'], 14 | ["B is ['s1', 'b3', 's4']", 'output is b2']] -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_results_5.txt: -------------------------------------------------------------------------------- 1 | * B 2 | * - ['ce', 'b1'] - b12 3 | * - ['s2'] - b6 4 | * - ['b2', 's4'] - A 5 | * - [['s2']] - b12 6 | * - [['s1']] - b7 7 | * - [['s2']] - b2 8 | * - [['s1']] - s2 9 | * - ['s1', 'b3'] - b2 10 | * - ['s3'] - s2 11 | [ 12 | ["B is ['ce', 'b1']", 'output is b12'], 13 | ["B is ['s2']", 'output is b6'], 14 | ["B is ['b2', 's4']", "A is [['s2']]", 'output is b12'], 15 | ["B is ['b2', 's4']", "A is [['s1']]", 'output is b7'], 16 | ["B is ['b2', 's4']", "A is [['s2']]", 'output is b2'], 17 | ["B is ['b2', 's4']", "A is [['s1']]", 'output is s2'], 18 | ["B is ['s1', 'b3']", 'output is b2'], ["B is ['s3']", 'output is s2']] -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_rules.csv: -------------------------------------------------------------------------------- 1 | A,B,X 2 | s2,b1,b14 3 | s1,ce,b12 4 | s1,b3,ce 5 | s1,s1,b5 6 | s1,b1,b12 7 | s1,s2,b5 8 | s2,s2,b6 9 | s1,b2,b7 10 | s2,s1,b2 11 | s2,ce,b12 12 | s1,s3,s2 13 | s2,s3,s2 14 | s2,b2,b12 15 | s2,b3,b2 16 | s1,s4,s2 17 | s1,b4,s1 18 | s2,s4,b2 19 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_rules_1.csv: -------------------------------------------------------------------------------- 1 | A,B,X 2 | s2,s2,b6 3 | s2,s1,b2 4 | s2,b3,b2 5 | s2,s4,b2 6 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_rules_2.csv: -------------------------------------------------------------------------------- 1 | A,B,X 2 | s2,b1,b14 3 | s2,s2,b6 4 | s2,s1,b2 5 | s2,b3,b2 6 | s2,s4,b2 7 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_rules_3.csv: -------------------------------------------------------------------------------- 1 | A,B,X 2 | s2,b1,b14 3 | s1,ce,b12 4 | s1,b1,b12 5 | s2,s2,b6 6 | s2,s1,b2 7 | s2,ce,b12 8 | s2,b2,b12 9 | s2,b3,b2 10 | s2,s4,b2 11 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sample_rules_4.csv: -------------------------------------------------------------------------------- 1 | A,B,X 2 | s1,ce,b12 3 | s1,b1,b12 4 | s2,s2,b6 5 | s1,b2,b7 6 | s2,s1,b2 7 | s2,ce,b12 8 | s1,s3,s2 9 | s2,s3,s2 10 | s2,b2,b12 11 | s2,b3,b2 12 | s1,s4,s2 13 | s2,s4,b2 14 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/sensor_rules.csv: -------------------------------------------------------------------------------- 1 | x,Y 2 | s5,s2 3 | s4,s2 4 | s3,s2 5 | s2,s2 6 | s1,s2 7 | ce,s2 8 | b1,s1 9 | b2,s1 10 | b3,ce 11 | b4,ce 12 | b5,b2 13 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/weather_rules.csv: -------------------------------------------------------------------------------- 1 | Humidity,Month,Temperature 2 | b2,b2,b4 3 | ce,ce,b6 4 | s2,b1,b14 5 | b3,s3,s7 6 | b4,s3,s3 7 | b4,b3,s3 8 | b2,s3,s2 9 | b4,s1,b1 10 | s1,ce,b12 11 | ce,s1,b5 12 | b4,ce,b3 13 | b3,ce,b4 14 | b3,b1,b3 15 | ce,b3,s1 16 | b1,s3,ce 17 | b3,b4,s7 18 | b2,s4,s5 19 | b3,b2,b3 20 | b3,s4,s5 21 | b1,b1,b8 22 | b3,b3,b1 23 | b2,b1,b4 24 | b2,ce,b3 25 | b4,s2,s1 26 | b2,s1,b5 27 | b2,b3,s1 28 | s1,b3,ce 29 | b4,s4,s3 30 | b3,s1,b3 31 | b1,b2,b3 32 | ce,s2,b2 33 | b1,s2,s1 34 | b1,s1,b7 35 | b2,b4,s1 36 | s1,s1,b5 37 | s1,b1,b12 38 | s1,s2,b5 39 | b1,ce,b7 40 | s2,s2,b6 41 | s1,b2,b7 42 | b1,b3,b1 43 | ce,b1,b8 44 | b4,b4,s4 45 | s2,s1,b2 46 | b4,b2,ce 47 | b3,s2,ce 48 | b1,s4,b1 49 | ce,b2,b5 50 | b1,b4,b1 51 | ce,s3,s1 52 | s2,ce,b12 53 | s1,s3,s2 54 | b2,s2,s2 55 | s2,s3,s2 56 | s2,b2,b12 57 | s3,ce,b16 58 | s2,b3,b2 59 | b4,b1,b1 60 | ce,s4,s1 61 | ce,b4,s1 62 | s1,s4,s2 63 | s4,s4,s4 64 | s3,b1,b15 65 | s3,s3,b3 66 | s1,b4,s1 67 | s3,b3,s3 68 | s2,s4,b2 69 | s4,s3,s11 70 | s3,s2,b6 71 | s4,b4,s12 72 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/id3_version2/weather_rules_1.csv: -------------------------------------------------------------------------------- 1 | Humidity,Month,Temperature 2 | ce,ce,b6 3 | ce,s2,b2 4 | s2,s2,b6 5 | s2,s1,b2 6 | s2,b3,b2 7 | s2,s4,b2 8 | s3,s2,b6 9 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/k-means/k-means.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/k-means/k-means.xlsx -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/linear_regression/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/multifeature_batch_gd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib 3 | matplotlib.rcParams['text.usetex'] = True 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import sys 7 | import numpy as np 8 | 9 | 10 | def multifeature_gradient_descent( 11 | filename, 12 | alpha=0.0023, 13 | epochs_threshold=100000, 14 | costdifference_threshold=0.00001, 15 | plot=False): 16 | 17 | X = None 18 | Y = None 19 | beta = None 20 | 21 | training_data = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 22 | 23 | Y = training_data['y'].to_numpy() 24 | 25 | m = len(Y) 26 | 27 | X = training_data.drop(['y'], axis=1).to_numpy() 28 | 29 | # add a column of ones to the X matrix to account for the intercept, a0 30 | X = np.insert(X, 0, 1, axis=1) 31 | 32 | y_hat = np.zeros(len(Y)) 33 | 34 | # beta will hold the values of the coefficients 35 | beta = np.array([5.0, 3.0, 1.0]) 36 | 37 | epochs = 0 38 | 39 | # initialize the previous cost function value to a large number 40 | previous_cost = sys.float_info.max 41 | 42 | # store the cost function and a2 values for plotting 43 | costs = [] 44 | a_2s = [] 45 | 46 | while True: 47 | # calculate the hypothesis function for all training data 48 | y_hat = np.dot(beta, X.T) 49 | 50 | # calculate the residuals 51 | residuals = y_hat - Y 52 | 53 | # calculate the new value of beta 54 | beta -= (alpha/m) * np.dot(residuals, X) 55 | 56 | # calculate the cost function 57 | cost = np.dot(residuals, residuals)/(2 * m) 58 | 59 | # increase the number of iterations 60 | epochs += 1 61 | 62 | # record the cost and a1 values for plotting 63 | costs.append(cost) 64 | a_2s.append(beta[2]) 65 | 66 | cost_difference = previous_cost - cost 67 | 68 | # undelete this line to see details 69 | # print(f'Epoch: {iterations}, cost: {cost:.3f}, beta: {beta}') 70 | previous_cost = cost 71 | 72 | # check if the cost function is diverging, if so, break 73 | if cost_difference < 0: 74 | print(f'Cost function is diverging. Stopping training.') 75 | break 76 | 77 | # check if the cost function is close enough to 0, if so, break or if the number of 78 | # iterations is greater than the threshold, break 79 | if abs(cost_difference) < costdifference_threshold or epochs > epochs_threshold: 80 | break 81 | 82 | if plot: 83 | # plot the cost function and a1 values 84 | plt.plot(a_2s[3:], costs[3:], '--bx', color='lightblue', mec='red') 85 | plt.xlabel('a2') 86 | plt.ylabel('cost') 87 | plt.title(r'Cost Function vs. a1, with $\alpha$ =' + str(alpha)) 88 | plt.show() 89 | 90 | return beta, epochs, cost 91 | 92 | if __name__ == '__main__': 93 | 94 | from timeit import default_timer as timer 95 | 96 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 97 | alpha = 0.0023 98 | epochs_threshold = 100000 99 | costdifference_threshold = 0.00001 100 | plot = False 101 | 102 | start = timer() 103 | beta, epochs, cost = multifeature_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 104 | end = timer() 105 | print(f'Time: {end - start}, beta: {beta}, epochs: {epochs}, cost: {cost}') 106 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/twofeature_batch_gd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib 3 | matplotlib.rcParams['text.usetex'] = True 4 | import matplotlib.pyplot as plt 5 | import pandas as pd 6 | import sys 7 | 8 | def two_feature_gradient_descent(filename, alpha=0.0023, epochs_threshold=100000, costdifference_threshold=0.00001, plot=False): 9 | ''' 10 | Batch gradient descent for a two feature linear regression problem. 11 | This algorithm does not use any vectorization 12 | ''' 13 | 14 | # initialize the coefficients 15 | a0 = 5 16 | a1 = 3 17 | a2 = 1 18 | 19 | data_set = None 20 | data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 21 | m = len(data_set) 22 | epoch = 0 23 | 24 | previous_cost = sys.float_info.max 25 | 26 | while True: 27 | # calculate the hypothesis function for all training data 28 | data_set['y_hat'] = a0 + (a1 * data_set['x1']) + (a2 * data_set['x2']) 29 | 30 | # calculate the difference between the hypothesis function and the 31 | # actual y value for all training data 32 | data_set['y_hat-y'] = data_set['y_hat'] - data_set['y'] 33 | 34 | # multiply the difference by the x value for all training data 35 | data_set['y-hat-y.x1'] = data_set['y_hat-y'] * data_set['x1'] 36 | data_set['y-hat-y.x2'] = data_set['y_hat-y'] * data_set['x2'] 37 | 38 | # square the difference for all training data 39 | data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2 40 | 41 | # update the a0 and a1 values 42 | a0 -= (alpha * (1/m) * sum(data_set['y_hat-y'])) 43 | a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x1'])) 44 | a2 -= (alpha * (1/m) * sum(data_set['y-hat-y.x2'])) 45 | 46 | # calculate the cost function 47 | cost = sum(data_set['y-hat-y_sq']) / (2 * m) 48 | epoch += 1 49 | 50 | # check if the cost function has converged 51 | cost_difference = previous_cost - cost 52 | # print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}') 53 | previous_cost = cost 54 | 55 | # check if the cost function is diverging, if so, break 56 | if cost_difference < 0: 57 | print(f'Cost function is diverging. Stopping training.') 58 | break 59 | 60 | # check if the cost function is close enough to 0, if so, break or if the number of 61 | # iterations is greater than the threshold, break 62 | if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold: 63 | break 64 | 65 | return a0, a1, a2, epoch, cost 66 | 67 | if __name__ == '__main__': 68 | 69 | from timeit import default_timer as timer 70 | 71 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 72 | alpha = 0.0023 73 | epochs_threshold = 100000 74 | costdifference_threshold = 0.00001 75 | plot = False 76 | 77 | start = timer() 78 | a0, a1, a2, epochs, cost = two_feature_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 79 | end = timer() 80 | print(f'Time: {end - start}, a0: {a0}, a1: {a1}, a2: {a2} epochs: {epochs}, cost: {cost}') 81 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_nv.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import numpy 5 | import matplotlib 6 | matplotlib.rcParams['text.usetex'] = True 7 | import matplotlib.pyplot as plt 8 | 9 | def gradient_descent(filename, alpha=0.0023, epochs_threshold=100000, costdifference_threshold=0.00001, plot=False): 10 | 11 | # initialize coefficient values 12 | a0 = -5 13 | a1 = -3 14 | previous_cost = sys.float_info.max 15 | 16 | data_set = numpy.loadtxt(filename, delimiter=',', skiprows=1) 17 | 18 | epoch = 1 19 | cost = 0 20 | 21 | costs = [] 22 | a_1s = [] 23 | 24 | data_count = len(data_set) 25 | 26 | while True: 27 | 28 | sum_a0 = 0.0 29 | sum_a1 = 0.0 30 | sum_cost = 0.0 31 | cost = 0.0 32 | 33 | for idx in range(0, data_count): 34 | y_value = data_set[idx][1] 35 | x_value = data_set[idx][0] 36 | 37 | y_hat = a0 + (a1 * x_value) 38 | 39 | sum_a0 += (y_hat - y_value) 40 | sum_a1 += ((y_hat - y_value) * x_value) 41 | sum_cost += pow((y_hat - y_value), 2) 42 | 43 | a0 -= ((alpha * sum_a0) / data_count) 44 | a1 -= ((alpha * sum_a1) / data_count) 45 | 46 | cost = ((1 / (2 * data_count)) * sum_cost) 47 | 48 | epoch += 1 49 | 50 | costs.append(cost) 51 | a_1s.append(a1) 52 | 53 | if (previous_cost - cost) < costdifference_threshold or epoch > epochs_threshold: 54 | print(f'Cost Function: {cost}') 55 | print(f'Epoch: {epoch}') 56 | break 57 | else: 58 | previous_cost = cost 59 | 60 | if plot: 61 | plt.plot(a_1s[:], costs[:], '--bx', color='lightblue', mec='red') 62 | plt.xlabel('a1') 63 | plt.ylabel('cost') 64 | plt.title(r'Cost Function vs. a1, with $\alpha$ =' + str(alpha)) 65 | plt.show() 66 | 67 | return a0, a1 68 | 69 | 70 | if __name__ == '__main__': 71 | 72 | current_directory = os.path.dirname(__file__) 73 | filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f.csv') 74 | alpha = 0.00023 75 | epochs_threshold = 100000 76 | costdifference_threshold = 0.00001 77 | plot = True 78 | 79 | a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 80 | print(f'a0: {a0}, a1: {a1}') -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_v.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | #pylint: disable = E0401 4 | import os 5 | import pandas as pd 6 | import sys 7 | 8 | from algorithms.linear_regression.univariate_gd_analysis import plot_univariate_gd_analysis 9 | 10 | 11 | 12 | 13 | def gradient_descent( 14 | filename, 15 | alpha=0.0023, 16 | epochs_threshold=100000, 17 | costdifference_threshold=0.0001, 18 | plot=False): 19 | 20 | a0 = 130 21 | a1 = 19 22 | 23 | a0_prev = a0 24 | a1_prev = a1 25 | 26 | data_set = pd.read_csv(filename, delimiter=',', index_col=False) 27 | 28 | m = len(data_set) 29 | epoch = 0 30 | 31 | previous_cost = sys.float_info.max 32 | gd_data = [] 33 | 34 | 35 | while True: 36 | # calculate the hypothesis function for all training data 37 | data_set['y_hat'] = a0 + (a1 * data_set['x']) 38 | 39 | # calculate the difference between the hypothesis function and the 40 | # actual y value for all training data 41 | data_set['y_hat-y'] = data_set['y_hat'] - data_set['y'] 42 | 43 | # multiply the difference by the x value for all training data 44 | data_set['y-hat-y.x'] = data_set['y_hat-y'] * data_set['x'] 45 | 46 | # square the difference for all training data 47 | data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2 48 | 49 | # update the a0 and a1 values 50 | a0 -= (alpha * (1/m) * sum(data_set['y_hat-y'])) 51 | a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x'])) 52 | 53 | # calculate the cost function 54 | cost = sum(data_set['y-hat-y_sq']) / (2 * m) 55 | epoch += 1 56 | 57 | plot_threshold = 0.001 58 | if abs(a0_prev - a0) > plot_threshold and abs(a1_prev - a1) > plot_threshold: 59 | gd_data.append((a0_prev, a1_prev, cost)) 60 | a0_prev = a0 61 | a1_prev = a1 62 | 63 | cost_difference = previous_cost - cost 64 | print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}') 65 | previous_cost = cost 66 | 67 | # check if the cost function is diverging, if so, break 68 | if cost_difference < 0: 69 | print(f'Cost function is diverging. Stopping training.') 70 | break 71 | 72 | # check if the cost function is close enough to 0, if so, break or if the number of 73 | # iterations is greater than the threshold, break 74 | if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold: 75 | gd_data.append((a0_prev, a1_prev, cost)) 76 | break 77 | 78 | if plot: 79 | plot_univariate_gd_analysis( 80 | file=filename, 81 | a0_range=(125,175,0.5), 82 | a1_range=(18,22,0.5), 83 | gd_points = gd_data 84 | ) 85 | return a0, a1 86 | 87 | if __name__ == '__main__': 88 | 89 | current_directory = os.path.dirname(__file__) 90 | filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f.csv') 91 | alpha = 0.0004 92 | # alpha = 0.00056 93 | epochs_threshold = 100000 94 | costdifference_threshold = 0.00001 95 | plot = True 96 | 97 | a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 98 | print(f'a0: {a0:.3f}, a1: {a1:.3f}') 99 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_v_norm.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import pandas as pd 4 | import sys 5 | from algorithms.linear_regression.univariate_gd_analysis import plot_univariate_gd_analysis 6 | 7 | def gradient_descent( 8 | filename, 9 | alpha=0.0023, 10 | epochs_threshold=100000, 11 | costdifference_threshold=0.0001, 12 | plot=False): 13 | 14 | a0 = -3 15 | a1 = -1 16 | 17 | a0_prev = a0 18 | a1_prev = a1 19 | 20 | data_set = pd.read_csv(filename, delimiter=',', index_col=False) 21 | 22 | m = len(data_set) 23 | epoch = 0 24 | 25 | previous_cost = sys.float_info.max 26 | gd_data = [] 27 | 28 | 29 | while True: 30 | # calculate the hypothesis function for all training data 31 | data_set['y_hat'] = a0 + (a1 * data_set['x']) 32 | 33 | # calculate the difference between the hypothesis function and the 34 | # actual y value for all training data 35 | data_set['y_hat-y'] = data_set['y_hat'] - data_set['y'] 36 | 37 | # multiply the difference by the x value for all training data 38 | data_set['y-hat-y.x'] = data_set['y_hat-y'] * data_set['x'] 39 | 40 | # square the difference for all training data 41 | data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2 42 | 43 | # update the a0 and a1 values 44 | a0 -= (alpha * (1/m) * sum(data_set['y_hat-y'])) 45 | a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x'])) 46 | 47 | # calculate the cost function 48 | cost = sum(data_set['y-hat-y_sq']) / (2 * m) 49 | epoch += 1 50 | 51 | plot_threshold = 0.001 52 | if abs(a0_prev - a0) > plot_threshold and abs(a1_prev - a1) > plot_threshold: 53 | gd_data.append((a0_prev, a1_prev, cost)) 54 | a0_prev = a0 55 | a1_prev = a1 56 | 57 | cost_difference = previous_cost - cost 58 | print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}') 59 | previous_cost = cost 60 | 61 | # check if the cost function is diverging, if so, break 62 | if cost_difference < 0: 63 | print(f'Cost function is diverging. Stopping training.') 64 | break 65 | 66 | # check if the cost function is close enough to 0, if so, break or if the number of 67 | # iterations is greater than the threshold, break 68 | if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold: 69 | gd_data.append((a0_prev, a1_prev, cost)) 70 | break 71 | 72 | if plot: 73 | plot_univariate_gd_analysis( 74 | file=filename, 75 | a0_range=(-3,3,0.5), 76 | a1_range=(-2,2,0.5), 77 | gd_points = gd_data 78 | ) 79 | return a0, a1 80 | 81 | if __name__ == '__main__': 82 | 83 | current_directory = os.path.dirname(__file__) 84 | filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f_norm.csv') 85 | alpha = 0.0001 86 | # alpha = 0.00056 87 | epochs_threshold = 100000 88 | costdifference_threshold = 0.0000001 89 | plot = True 90 | 91 | a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 92 | print(f'a0: {a0:.3f}, a1: {a1:.3f}') -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/data_generation/data_1f.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 0.0,159.93428306022466 3 | 1.0,167.2347139765763 4 | 2.0,202.95377076201385 5 | 3.0,240.46059712816052 6 | 4.0,225.3169325055333 7 | 5.0,245.3172608610164 8 | 6.0,301.58425631014785 9 | 7.0,305.3486945830582 10 | 8.0,300.61051228130094 11 | 9.0,340.8512008717193 12 | 10.0,340.73164614375077 13 | 11.0,360.68540492859483 14 | 12.0,394.8392454313207 15 | 13.0,371.73439510684403 16 | 14.0,395.50164334973937 17 | 15.0,438.75424941518054 18 | 16.0,449.74337759331155 19 | 17.0,496.2849466519055 20 | 18.0,491.83951848957577 21 | 19.0,501.75392597329414 22 | 20.0,579.3129753784311 23 | 21.0,565.4844739902693 24 | 22.0,591.3505640937585 25 | 23.0,581.5050362757308 26 | 24.0,619.1123455094963 27 | 25.0,652.2184517941973 28 | 26.0,646.9801284515539 29 | 27.0,697.5139603669135 30 | 28.0,697.9872262016239 31 | 29.0,724.1661250041344 32 | 30.0,737.9658677554121 33 | 31.0,807.0455636901787 34 | 32.0,789.7300555052414 35 | 33.0,788.845781420882 36 | 34.0,846.4508982420638 37 | 35.0,825.5831270005796 38 | 36.0,874.1772719000951 39 | 37.0,850.8065975224044 40 | 38.0,883.4362790220314 41 | 39.0,933.9372247173825 42 | 40.0,964.7693315999082 43 | 41.0,973.4273656237995 44 | 42.0,987.6870343522352 45 | 43.0,1003.9779260882142 46 | 44.0,1000.4295601926515 47 | 45.0,1035.603115832106 48 | 46.0,1060.7872245808041 49 | 47.0,1111.1424445243783 50 | 48.0,1116.8723657913692 51 | 49.0,1094.7391968927452 52 | 50.0,1156.4816793878958 53 | 51.0,1162.2983543916737 54 | 52.0,1176.4615599938809 55 | 53.0,1222.2335257768173 56 | 54.0,1250.619990449919 57 | 55.0,1268.625602382324 58 | 56.0,1253.2156495355473 59 | 57.0,1283.8157524829758 60 | 58.0,1316.6252686280714 61 | 59.0,1349.5109025424472 62 | 60.0,1340.4165152430942 63 | 61.0,1366.2868204667236 64 | 62.0,1367.8733005198794 65 | 63.0,1386.0758675183865 66 | 64.0,1446.250516447884 67 | 65.0,1477.1248005714165 68 | 66.0,1468.5597975683934 69 | 67.0,1510.0706579578405 70 | 68.0,1517.2327205009526 71 | 69.0,1517.0976049078974 72 | 70.0,1557.2279121101683 73 | 71.0,1600.7607313293195 74 | 72.0,1589.283479217801 75 | 73.0,1641.2928731162801 76 | 74.0,1577.605097918205 77 | 75.0,1666.4380500875045 78 | 76.0,1671.7409413647633 79 | 77.0,1684.0198529906827 80 | 78.0,1711.83521553071 81 | 79.0,1690.2486217079822 82 | 80.0,1745.6065622432498 83 | 81.0,1777.1422514302349 84 | 82.0,1819.5578808948303 85 | 83.0,1799.6345956345272 86 | 84.0,1813.8301279421362 87 | 85.0,1839.9648591283092 88 | 86.0,1888.3080423540414 89 | 87.0,1896.5750221931937 90 | 88.0,1899.4047959246593 91 | 89.0,1940.2653486622671 92 | 90.0,1951.941550986961 93 | 91.0,1989.3728998106578 94 | 92.0,1975.958938122453 95 | 93.0,2003.4467570680447 96 | 94.0,2022.157836937357 97 | 95.0,2020.7297010373577 98 | 96.0,2075.9224055412915 99 | 97.0,2095.221105443598 100 | 98.0,2110.1022691328494 101 | 99.0,2125.308257332497 102 | 100.0,2121.6925851589917 103 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/data_generation/data_1f_norm.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 0.0,-1.6837274923728376 3 | 1.0,-1.6712828488936187 4 | 2.0,-1.6103945352855658 5 | 3.0,-1.5464587090977981 6 | 4.0,-1.57227328214928 7 | 5.0,-1.5381798212696194 8 | 6.0,-1.4422645655765751 9 | 7.0,-1.435847534480374 10 | 8.0,-1.4439244535226243 11 | 9.0,-1.375328362606351 12 | 10.0,-1.3755321609824842 13 | 11.0,-1.3415180846912436 14 | 12.0,-1.283297909283442 15 | 13.0,-1.3226834781934649 16 | 14.0,-1.2821687559457076 17 | 15.0,-1.2084384147920226 18 | 16.0,-1.1897058517568775 19 | 17.0,-1.1103689960925962 20 | 18.0,-1.1179468732380071 21 | 19.0,-1.1010463275227915 22 | 20.0,-0.9688356772895027 23 | 21.0,-0.9924083638335403 24 | 22.0,-0.9483158611816364 25 | 23.0,-0.9650989915151698 26 | 24.0,-0.9009918777037519 27 | 25.0,-0.8445577172522858 28 | 26.0,-0.8534871992479384 29 | 27.0,-0.7673449524366089 30 | 28.0,-0.7665382021705835 31 | 29.0,-0.7219124717159484 32 | 30.0,-0.6983888084401361 33 | 31.0,-0.5806324461909559 34 | 32.0,-0.6101492416366048 35 | 33.0,-0.6116566150839882 36 | 34.0,-0.5134603374077358 37 | 35.0,-0.5490324805156864 38 | 36.0,-0.46619671158902304 39 | 37.0,-0.5060354161575229 40 | 38.0,-0.45041339086120646 41 | 39.0,-0.3643272033822746 42 | 40.0,-0.3117694047724967 43 | 41.0,-0.2970105298659562 44 | 42.0,-0.2727028560463633 45 | 43.0,-0.24493266796627006 46 | 44.0,-0.2509813723523618 47 | 45.0,-0.19102294457667257 48 | 46.0,-0.14809297807052443 49 | 47.0,-0.062255201274109793 50 | 48.0,-0.052487719306438294 51 | 49.0,-0.09021691627641139 52 | 50.0,0.01503210134706469 53 | 51.0,0.024947467643370164 54 | 52.0,0.04909070607160635 55 | 53.0,0.12711566131579802 56 | 54.0,0.1755045080208543 57 | 55.0,0.20619768540878972 58 | 56.0,0.17992918545213973 59 | 57.0,0.23209149970063203 60 | 58.0,0.2880200792365149 61 | 59.0,0.3440784125515897 62 | 60.0,0.32857571019070714 63 | 61.0,0.37267539812632483 64 | 62.0,0.37537978350776785 65 | 63.0,0.4064086993756847 66 | 64.0,0.5089851172604791 67 | 65.0,0.5616148130956783 68 | 66.0,0.5470145230590555 69 | 67.0,0.6177758060547258 70 | 68.0,0.629984580570387 71 | 69.0,0.6297542564424935 72 | 70.0,0.6981621862710204 73 | 71.0,0.7723701913922257 74 | 72.0,0.7528055503065811 75 | 73.0,0.8414631065594781 76 | 74.0,0.7328980553445608 77 | 75.0,0.8843267082035763 78 | 76.0,0.893366255609844 79 | 77.0,0.9142974416245141 80 | 78.0,0.9617127619010943 81 | 79.0,0.9249152814328034 82 | 80.0,1.0192809211599285 83 | 81.0,1.0730380778675512 84 | 82.0,1.1453416709953568 85 | 83.0,1.1113795412471097 86 | 84.0,1.1355778852339342 87 | 85.0,1.1801283255796775 88 | 86.0,1.2625362939295033 89 | 87.0,1.2766285602528116 90 | 88.0,1.281452320058014 91 | 89.0,1.351105059329118 92 | 90.0,1.371008839912341 93 | 91.0,1.4348160036804185 94 | 92.0,1.4119499601870529 95 | 93.0,1.458806934894261 96 | 94.0,1.4907026847140739 97 | 95.0,1.488268219910694 98 | 96.0,1.5823521908536844 99 | 97.0,1.6152496242584322 100 | 98.0,1.6406167263912446 101 | 99.0,1.66653753902026 102 | 100.0,1.6603741013201352 103 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/data_generation/data_3f.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 0.0,159.93428306022466 3 | 1.0,297.2347139765763 4 | 2.0,462.95377076201385 5 | 3.0,630.4605971281605 6 | 4.0,745.3169325055333 7 | 5.0,895.3172608610164 8 | 6.0,1081.5842563101478 9 | 7.0,1215.3486945830582 10 | 8.0,1340.610512281301 11 | 9.0,1510.8512008717194 12 | 10.0,1640.7316461437508 13 | 11.0,1790.6854049285948 14 | 12.0,1954.8392454313207 15 | 13.0,2061.734395106844 16 | 14.0,2215.5016433497394 17 | 15.0,2388.7542494151808 18 | 16.0,2529.7433775933114 19 | 17.0,2706.2849466519056 20 | 18.0,2831.8395184895758 21 | 19.0,2971.7539259732944 22 | 20.0,3179.312975378431 23 | 21.0,3295.484473990269 24 | 22.0,3451.3505640937583 25 | 23.0,3571.505036275731 26 | 24.0,3739.112345509496 27 | 25.0,3902.2184517941973 28 | 26.0,4026.980128451554 29 | 27.0,4207.513960366913 30 | 28.0,4337.987226201624 31 | 29.0,4494.166125004134 32 | 30.0,4637.965867755412 33 | 31.0,4837.045563690179 34 | 32.0,4949.730055505242 35 | 33.0,5078.845781420882 36 | 34.0,5266.450898242064 37 | 35.0,5375.583127000579 38 | 36.0,5554.177271900096 39 | 37.0,5660.806597522404 40 | 38.0,5823.436279022031 41 | 39.0,6003.937224717382 42 | 40.0,6164.769331599909 43 | 41.0,6303.427365623799 44 | 42.0,6447.687034352235 45 | 43.0,6593.977926088214 46 | 44.0,6720.4295601926515 47 | 45.0,6885.603115832106 48 | 46.0,7040.787224580804 49 | 47.0,7221.142444524378 50 | 48.0,7356.872365791369 51 | 49.0,7464.739196892745 52 | 50.0,7656.481679387896 53 | 51.0,7792.2983543916735 54 | 52.0,7936.461559993881 55 | 53.0,8112.233525776817 56 | 54.0,8270.619990449919 57 | 55.0,8418.625602382324 58 | 56.0,8533.215649535547 59 | 57.0,8693.815752482975 60 | 58.0,8856.625268628071 61 | 59.0,9019.510902542446 62 | 60.0,9140.416515243094 63 | 61.0,9296.286820466723 64 | 62.0,9427.87330051988 65 | 63.0,9576.075867518386 66 | 64.0,9766.250516447884 67 | 65.0,9927.124800571417 68 | 66.0,10048.559797568394 69 | 67.0,10220.07065795784 70 | 68.0,10357.232720500953 71 | 69.0,10487.097604907898 72 | 70.0,10657.227912110167 73 | 71.0,10830.76073132932 74 | 72.0,10949.283479217802 75 | 73.0,11131.29287311628 76 | 74.0,11197.605097918206 77 | 75.0,11416.438050087505 78 | 76.0,11551.740941364764 79 | 77.0,11694.019852990683 80 | 78.0,11851.83521553071 81 | 79.0,11960.248621707982 82 | 80.0,12145.60656224325 83 | 81.0,12307.142251430236 84 | 82.0,12479.55788089483 85 | 83.0,12589.634595634527 86 | 84.0,12733.830127942136 87 | 85.0,12889.96485912831 88 | 86.0,13068.308042354041 89 | 87.0,13206.575022193194 90 | 88.0,13339.40479592466 91 | 89.0,13510.265348662268 92 | 90.0,13651.941550986961 93 | 91.0,13819.372899810658 94 | 92.0,13935.958938122452 95 | 93.0,14093.446757068044 96 | 94.0,14242.157836937356 97 | 95.0,14370.729701037357 98 | 96.0,14555.922405541292 99 | 97.0,14705.221105443597 100 | 98.0,14850.102269132849 101 | 99.0,14995.308257332497 102 | 100.0,15121.692585158991 103 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/data_generation/dataset_generation_1f.py: -------------------------------------------------------------------------------- 1 | ''' 2 | single feature data generation 3 | ''' 4 | import os 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | 9 | def generate_data(a0, a1, noise_sigma, file_name, plot=False): 10 | ''' 11 | Generates 100 points with m slope and c intercept 12 | and adds noise with sigma 13 | ''' 14 | 15 | # x between 0 and 100 in steps of 1 16 | x = np.arange(0, 101, 1) 17 | 18 | # generate a noisy line 19 | np.random.seed(42) 20 | l = (a1*x) + a0 21 | e = np.random.randn(len(x))*noise_sigma 22 | y = l + e 23 | 24 | file_path = os.path.join(os.path.dirname(__file__), file_name) 25 | # save the data to a csv file 26 | df = pd.DataFrame(data=[x, y]).T 27 | df.columns = ['x', 'y'] 28 | df.to_csv(file_path, header=True, index=False) 29 | 30 | # plot the data 31 | if plot: 32 | plt.plot(x, y) 33 | plt.plot(x, l, '--') 34 | plt.xlim([min(x), max(x)]) 35 | plt.ylim([min(y), max(y)]) 36 | plt.show() 37 | 38 | if __name__=='__main__': 39 | generate_data(a0=150, a1=20, noise_sigma=20, file_name="data_1f.csv", plot=True) 40 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/data_generation/dataset_generation_2f.py: -------------------------------------------------------------------------------- 1 | # We generate a random dataset of points in a plane, and then add some noise to the y-values. We then 2 | # save the data to a csv file. 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | import os 7 | 8 | def generate_data(a0, a1, a2, noise_sigma, plot=False): 9 | 10 | x1_lower = -10 11 | x1_higher = 10 12 | x1_step = (x1_higher - x1_lower) / 1000 13 | x1 = np.arange(x1_lower, x1_higher, x1_step) 14 | 15 | x2_lower = 0 16 | x2_higher = 50 17 | x2_step = (x2_higher - x2_lower) / 1000 18 | x2= np.arange(x2_lower, x2_higher, x2_step) 19 | 20 | # generate the plane 21 | xx1, xx2 = np.meshgrid(x1, x2) 22 | y = a0 + (a1 * xx1) + (a2 * xx2) 23 | 24 | # add random_multiplier to y 25 | np.random 26 | random_multiplier = noise_sigma 27 | e = np.random.randn(len(xx1), len(xx2) )*random_multiplier 28 | yy = y + e 29 | 30 | df = pd.DataFrame(data=[xx1.ravel(), xx2.ravel(), yy.ravel()]).T 31 | df = df.sample(frac=0.01) 32 | df.columns = ['x1', 'x2', 'y'] 33 | 34 | full_filename = os.path.join(os.path.dirname(__file__), "data_2f.csv") 35 | df.to_csv(full_filename, header=True, index=False) 36 | 37 | if plot: 38 | # plot the data 39 | fig = plt.figure(figsize=(12, 12)) 40 | ax = fig.add_subplot(projection='3d') 41 | y = df.iloc[:,1] 42 | x = df.iloc[:,0] 43 | z = df.iloc[:,2] 44 | ax.scatter(x,y,z, cmap='coolwarm') 45 | plt.show() 46 | 47 | if __name__=='__main__': 48 | generate_data(a0=12, a1=5, a2=-3, noise_sigma=5, plot=True) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/minibatch_gradient_descent/minibatch_gd_1.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import matplotlib 4 | matplotlib.rcParams['text.usetex'] = True 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | import sys 8 | import numpy as np 9 | 10 | def minibatch_gradient_descent( 11 | file:str, 12 | alpha:float=0.0023, 13 | batch_size:int=100, 14 | epochs_threshold:int=100000, 15 | costdifference_threshold:float=0.00001, 16 | plot:bool=False): 17 | ''' 18 | The function calculates the beta values for the linear regression model 19 | using the mini batch gradient descent algorithm 20 | ''' 21 | 22 | # load the training data 23 | training_data = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 24 | 25 | # divide the data into features and labels 26 | X = training_data.drop(['y'], axis=1).to_numpy() 27 | # add a column of ones to the features matrix to account for the intercept, a0 28 | X = np.insert(X, 0, 1, axis=1) 29 | Y = training_data['y'].to_numpy() 30 | 31 | # length of the training data 32 | m = len(Y) 33 | print(f'Length of the training data: {m}') 34 | 35 | # initialize the y_hat vector to 0 36 | y_hat = np.zeros(len(Y)) 37 | 38 | # beta will hold the values of the coefficients, hence it will be the size 39 | # of a row of the X matrix 40 | # initialize beta to random values 41 | beta = np.random.random(len(X[0])) 42 | 43 | # minibatches setting 44 | # number of minibatches = m => stochastic gradient descent 45 | # number of minibatches = 1 => batch gradient descent 46 | minibatches = int(m/batch_size) 47 | 48 | # initialize the number of epochs 49 | minibatch_count = 0 50 | 51 | previous_cumulative_cost = sys.float_info.max 52 | 53 | # loop until exit condition is met 54 | while True: 55 | 56 | cumulative_cost = 0 57 | 58 | for i in range(batch_size): 59 | 60 | # print(f'Minibatch: {i}') 61 | minibatch_X = X[i*minibatches:(i+1)*minibatches] 62 | minibatch_Y = Y[i*minibatches:(i+1)*minibatches] 63 | 64 | # calculate the hypothesis function for all training data 65 | y_hat = np.dot(beta, minibatch_X.T) 66 | # calculate the residuals 67 | residuals = y_hat - minibatch_Y 68 | 69 | # calculate the new value of beta 70 | beta -= ( alpha / minibatches) * np.dot(residuals, minibatch_X) 71 | 72 | # calculate the cost function 73 | cost = np.dot(residuals, residuals) / ( 2 * minibatches) 74 | cumulative_cost += cost 75 | 76 | # increase the number of iterations 77 | minibatch_count += 1 78 | 79 | cost_difference = previous_cumulative_cost - cumulative_cost 80 | # print(f'Epoch: {epochs}, average cost: {(cumulative_cost/minibatches_number):.3f}, beta: {beta}') 81 | previous_cumulative_cost = cumulative_cost 82 | 83 | # check if the cost function is converged or 84 | # iterations is greater than the threshold, break 85 | if abs(cost_difference) < costdifference_threshold or minibatch_count > epochs_threshold: 86 | break 87 | 88 | # calculate the cost for the training data and return the beta values and 89 | # the number of iterations and the cost 90 | y_hat = np.dot(beta, X.T) 91 | residuals = y_hat - Y 92 | cost = np.dot(residuals, residuals) / ( 2 * m) 93 | 94 | return beta, minibatch_count, cost 95 | 96 | 97 | if __name__ == '__main__': 98 | 99 | from timeit import default_timer as timer 100 | 101 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 102 | alpha = 0.00023 103 | epochs_threshold = 1000 104 | costdifference_threshold = 0.00001 105 | plot = False 106 | batch_size = 64 107 | 108 | 109 | start = timer() 110 | beta, minibatch_count, cost = minibatch_gradient_descent(filename, alpha, batch_size, epochs_threshold, costdifference_threshold, plot) 111 | end = timer() 112 | print(f'Time: {end - start} beta: {beta}, minibatch_count: {minibatch_count}, cost: {cost}') 113 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/multivariate_linear_regression/multivariate_lr.py: -------------------------------------------------------------------------------- 1 | # The above code is a simple linear regression model. 2 | # 3 | # The model is: 4 | # 5 | # y = a0 + a1*x1 + a2*x2 6 | # 7 | # The model is fit using the least squares method. 8 | # 9 | # The model is tested by computing the cost of the model. 10 | # 11 | # The cost is the sum of the squared residuals. 12 | # 13 | # The cost is a measure of how good the model is. 14 | # 15 | # The lower the cost, the better the model. 16 | 17 | import pandas as pd 18 | import os 19 | 20 | # import data from csv 21 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 22 | data_set = pd.read_csv(filename) 23 | 24 | data_set['x1_sq'] = data_set['x1']**2 25 | data_set['x2_sq'] = data_set['x2']**2 26 | data_set['x1y'] = data_set['x1']*data_set['y'] 27 | data_set['x2y'] = data_set['x2']*data_set['y'] 28 | data_set['x1x2'] = data_set['x1']*data_set['x2'] 29 | 30 | n = len(data_set) 31 | 32 | sum_X1_sq = data_set['x1_sq'].sum() - (data_set['x1'].sum()**2)/n 33 | print(f'sum_X1_sq: {sum_X1_sq}') 34 | 35 | sum_X2_sq = data_set['x2_sq'].sum() - (data_set['x2'].sum()**2)/n 36 | print(f'sum_x2_sq: {sum_X2_sq}') 37 | 38 | sum_X1y = data_set['x1y'].sum() - (data_set['x1'].sum()*data_set['y'].sum())/n 39 | print(f'sum_X1y: {sum_X1y}') 40 | 41 | sum_X2y = data_set['x2y'].sum() - (data_set['x2'].sum()*data_set['y'].sum())/n 42 | print(f'sum_X2y: {sum_X2y}') 43 | 44 | sum_X1X2 = data_set['x1x2'].sum() - (data_set['x1'].sum()*data_set['x2'].sum())/n 45 | print(f'sum_X1X2: {sum_X1X2}') 46 | 47 | mean_y = data_set['y'].mean() 48 | mean_x1 = data_set['x1'].mean() 49 | mean_x2 = data_set['x2'].mean() 50 | 51 | n = len(data_set) 52 | 53 | a1 = (sum_X2_sq*sum_X1y - sum_X1X2*sum_X2y)/(sum_X1_sq*sum_X2_sq - sum_X1X2**2) 54 | 55 | a2 = (sum_X1_sq*sum_X2y - sum_X1X2*sum_X1y)/(sum_X1_sq*sum_X2_sq - sum_X1X2**2) 56 | 57 | a0 = mean_y - a1*mean_x1 - a2*mean_x2 58 | 59 | print(f'a0: {a0}, a1: {a1}, a2: {a2}') 60 | 61 | 62 | import numpy as np 63 | 64 | y_hat = a0 + a1*data_set['x1'] + a2*data_set['x2'] 65 | 66 | residuals = y_hat - data_set['y'] 67 | 68 | cost = np.dot(residuals, residuals)/(2*n) 69 | 70 | print(f'cost: {cost}') -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/normalization/normalization_analysis.py: -------------------------------------------------------------------------------- 1 | 2 | #pylint: disable = E0401 3 | import os 4 | import pandas as pd 5 | 6 | 7 | 8 | data_path=os.path.join(os.path.dirname(__file__),'..', 'data_generation', 'data_1f.csv') 9 | 10 | df_data = pd.read_csv(data_path, delimiter=',', index_col=False) 11 | 12 | y_mean = df_data['y'].mean() 13 | y_stddev = df_data['y'].std() 14 | 15 | 16 | df_data['y'] = (df_data['y'] - y_mean) / y_stddev 17 | 18 | print(df_data['y'].mean()) 19 | print(df_data['y'].std()) 20 | 21 | 22 | normalized_data_path = os.path.join( 23 | os.path.dirname(__file__),'..', 'data_generation', 'data_1f_norm.csv') 24 | 25 | df_data.to_csv(normalized_data_path, header=True, index=False) 26 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_1f_1.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import matplotlib 4 | matplotlib.rcParams['text.usetex'] = True 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | import sys 8 | import numpy as np 9 | 10 | 11 | def stochastic_gradient_descent(filename:str, alpha:float, max_epochs:int = 5): 12 | ''' 13 | The stochastic gradient descent function takes a dataset, a learning rate, and a maximum number of 14 | epochs. 15 | It returns the beta values and the cost. 16 | ''' 17 | 18 | np.random.seed(42) 19 | 20 | # load the training data 21 | data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 22 | 23 | # training_data = training_data.sample(frac=1).reset_index(drop=True) 24 | 25 | # divide the data into features and labels 26 | X = data_set.drop(['y'], axis=1).to_numpy() 27 | 28 | # add a column of ones to the features matrix to account for the intercept, a0 29 | X = np.insert(X, 0, 1, axis=1) 30 | 31 | Y = data_set['y'].to_numpy() 32 | 33 | # length of the training data 34 | m = len(Y) 35 | 36 | # initialize the y_hat vector to 0 37 | y_hat = np.zeros(len(Y)) 38 | 39 | # beta will hold the values of the coefficients, hence it will be the size 40 | # of a row of the X matrix 41 | # initialize beta to random values 42 | beta = np.random.random(len(X[0])) 43 | 44 | # initialize the number of epochs 45 | epochs = 0 46 | 47 | # loop until exit condition is met 48 | while True: 49 | 50 | i = np.random.randint(0, m) 51 | 52 | # print(f'Minibatch: {i}') 53 | x = X[i] 54 | y = Y[i] 55 | 56 | # calculate the hypothesis function for all training data 57 | y_hat = np.dot(beta, x.T) 58 | 59 | # calculate the residuals 60 | residuals = y_hat - y 61 | 62 | # calculate the new value of beta 63 | beta -= (alpha * residuals * x) 64 | 65 | epochs += 1 66 | 67 | # check if the cost function is close enough to 0, if so, break or if the number of 68 | # iterations is greater than the threshold, break 69 | if epochs > (m*max_epochs): 70 | break 71 | 72 | # calculate the cost for the training data and return the beta values and 73 | # the number of iterations and the cost 74 | y_hat = np.dot(beta, X.T) 75 | residuals = y_hat - Y 76 | cost = np.dot(residuals, residuals) / ( 2 * m) 77 | 78 | return beta, cost 79 | 80 | 81 | if __name__ == '__main__': 82 | 83 | from timeit import default_timer as timer 84 | 85 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_1f.csv') 86 | alpha = 0.0004 87 | max_epochs = 4000 88 | start = timer() 89 | beta, cost = stochastic_gradient_descent(filename, alpha, max_epochs) 90 | end = timer() 91 | print(f'Time: {end - start}, beta: {beta}, cost: {cost}') 92 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_nf_1.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import matplotlib 4 | matplotlib.rcParams['text.usetex'] = True 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | import sys 8 | import numpy as np 9 | 10 | 11 | def stochastic_gradient_descent(filename:str, alpha:float, max_epochs:int = 5): 12 | ''' 13 | The stochastic gradient descent function takes a dataset, 14 | a learning rate, and a maximum number of epochs. 15 | It returns the beta values and the cost. 16 | ''' 17 | 18 | np.random.seed(42) 19 | 20 | # load the training data 21 | data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 22 | 23 | # training_data = training_data.sample(frac=1).reset_index(drop=True) 24 | 25 | # divide the data into features and labels 26 | X = data_set.drop(['y'], axis=1).to_numpy() 27 | 28 | # add a column of ones to the features matrix to account for the intercept, a0 29 | X = np.insert(X, 0, 1, axis=1) 30 | 31 | Y = data_set['y'].to_numpy() 32 | 33 | # length of the training data 34 | m = len(Y) 35 | 36 | # initialize the y_hat vector to 0 37 | y_hat = np.zeros(len(Y)) 38 | 39 | # beta will hold the values of the coefficients, hence it will be the size 40 | # of a row of the X matrix 41 | # initialize beta to random values 42 | # beta = np.random.random(len(X[0])) 43 | 44 | # beta will hold the values of the coefficients 45 | beta = np.array([5.0, 3.0, 1.0]) 46 | 47 | # initialize the number of epochs 48 | iterations = 0 49 | 50 | # loop until exit condition is met 51 | while True: 52 | 53 | i = np.random.randint(0, m) 54 | 55 | x = X[i] 56 | y = Y[i] 57 | 58 | # calculate the hypothesis function for all training data 59 | y_hat = np.dot(beta, x.T) 60 | 61 | # calculate the residuals 62 | residuals = y_hat - y 63 | 64 | # calculate the new value of beta 65 | beta -= (alpha * residuals * x) 66 | 67 | iterations += 1 68 | 69 | # check if the cost function is close enough to 0, if so, break or if the number of 70 | # iterations is greater than the threshold, break 71 | if iterations > (m*max_epochs): 72 | break 73 | 74 | # calculate the cost for the training data and return the beta values and 75 | # the number of iterations and the cost 76 | y_hat = np.dot(beta, X.T) 77 | residuals = y_hat - Y 78 | cost = np.dot(residuals, residuals) / ( 2 * m) 79 | 80 | return beta, cost 81 | 82 | 83 | if __name__ == '__main__': 84 | 85 | from timeit import default_timer as timer 86 | 87 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 88 | alpha = 0.0001 89 | max_epochs = 10 90 | 91 | start = timer() 92 | beta, cost = stochastic_gradient_descent(filename, alpha, max_epochs) 93 | end = timer() 94 | print(f'Time: {end - start}, beta: {beta}, cost: {cost}') 95 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_nf_2.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import matplotlib 4 | matplotlib.rcParams['text.usetex'] = True 5 | import pandas as pd 6 | import sys 7 | import numpy as np 8 | 9 | 10 | def stochastic_gradient_descent( 11 | filename:str, 12 | alpha:float=0.0023, 13 | epochs_threshold:int=100, 14 | costdifference_threshold:float=0.00001, 15 | plot:bool=False): 16 | ''' 17 | The function takes a training data set, a learning rate, a number of epochs and a cost difference 18 | threshold. 19 | It then calculates the beta values for the training data set and returns the beta values, the number 20 | of iterations and the cost 21 | ''' 22 | 23 | np.random.seed(42) 24 | 25 | # load the training data 26 | data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False) 27 | 28 | # create train and test sets 29 | mask = np.random.rand(len(data_set)) < 0.8 30 | training_data = data_set[mask] 31 | validation_data = data_set[~mask] 32 | 33 | # divide the data into features and labels 34 | X_train = training_data.drop(['y'], axis=1).to_numpy() 35 | # add a column of ones to the features matrix to account for the intercept, a0 36 | X_train = np.insert(X_train, 0, 1, axis=1) 37 | Y_train = training_data['y'].to_numpy() 38 | 39 | X_validation = validation_data.drop(['y'], axis=1).to_numpy() 40 | X_validation = np.insert(X_validation, 0, 1, axis=1) 41 | Y_validation = validation_data['y'].to_numpy() 42 | 43 | 44 | # length of the training data 45 | m = len(Y_train) 46 | 47 | # initialize the y_hat vector to 0 48 | y_hat = np.zeros(len(Y_train)) 49 | 50 | # beta will hold the values of the coefficients, hence it will be the size 51 | # of a row of the X matrix 52 | # initialize beta to random values 53 | beta = np.random.random(len(X_train[0])) 54 | 55 | # initialize the number of epochs 56 | iterations = 0 57 | previous_validation_cost = sys.float_info.max 58 | 59 | # loop until exit condition is met 60 | while True: 61 | 62 | i = np.random.randint(0, m) 63 | 64 | x = X_train[i] 65 | y = Y_train[i] 66 | 67 | # calculate the hypothesis function for all training data 68 | y_hat = np.dot(beta, x.T) 69 | 70 | # calculate the residuals 71 | residuals = y_hat - y 72 | 73 | # calculate the new value of beta 74 | beta -= (alpha * residuals * x) 75 | 76 | iterations += 1 77 | 78 | if iterations % 1000 == 0: 79 | y_hat_validation = np.dot(beta, X_validation.T) 80 | residuals_validation = y_hat_validation - Y_validation 81 | cost_validation = np.dot( 82 | residuals_validation, residuals_validation) / ( 83 | 2 * len(Y_validation)) 84 | 85 | if abs(previous_validation_cost - cost_validation) < costdifference_threshold: 86 | break 87 | else: 88 | previous_validation_cost = cost_validation 89 | 90 | # uncomment this line to see details 91 | # print(f'Epoch: {count/m} Cost: {cost_validation} beta: {beta}') 92 | 93 | # check if the cost function is close enough to 0, if so, break or if the number of 94 | # iterations is greater than the threshold, break 95 | if (iterations/m) > (epochs_threshold): 96 | break 97 | 98 | # calculate the cost for the training data and return the beta values and 99 | # the number of iterations and the cost 100 | y_hat = np.dot(beta, X_train.T) 101 | residuals = y_hat - Y_train 102 | cost = np.dot(residuals, residuals) / ( 2 * m) 103 | 104 | return beta, iterations/m, cost 105 | 106 | 107 | if __name__ == '__main__': 108 | 109 | from timeit import default_timer as timer 110 | 111 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv') 112 | alpha = 0.00033 113 | epochs_threshold = 100 114 | costdifference_threshold = 0.0004 115 | plot = False 116 | 117 | start = timer() 118 | beta, count, cost = stochastic_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot) 119 | end = timer() 120 | print(f'Time: {end - start}, beta: {beta}, count: {count}, cost: {cost}') 121 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/univariate_gd_analysis.py: -------------------------------------------------------------------------------- 1 | '''script to plot the cost surface and the gradient descent points''' 2 | 3 | #pylint: disable = E0401 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | from matplotlib import cm 8 | 9 | def plot_univariate_gd_analysis( 10 | file:str, 11 | a0_range:tuple, 12 | a1_range:tuple, 13 | gd_points:list, 14 | plot_slices=False): 15 | ''' 16 | plot the costs surface and the gradient descent points 17 | ''' 18 | 19 | # read the data set 20 | data_set = pd.read_csv(file, delimiter=',', index_col=False) 21 | m = len(data_set) 22 | 23 | # plot the costs surface 24 | a0, a1 = np.meshgrid( 25 | np.arange(a0_range[0], a0_range[1], a0_range[2]), 26 | np.arange(a1_range[0], a1_range[1], a1_range[2])) 27 | ii, jj = np.shape(a0) 28 | 29 | 30 | costs = [] 31 | for i in range(ii): 32 | cost_row = [] 33 | for j in range(jj): 34 | y_hat = a0[i,j] + (a1[i,j] * data_set['x']) 35 | y_diff = y_hat - data_set['y'] 36 | y_diff_sq = y_diff ** 2 37 | cost = sum(y_diff_sq) / (2 * m) 38 | cost_row.append(cost) 39 | costs.append(cost_row) 40 | 41 | if plot_slices: 42 | 43 | a0_mincost_idx = np.where(np.round(a0[0,:], 1)==150) 44 | a1_mincost = a1[:, a0_mincost_idx].squeeze() 45 | ncosts = np.array(costs) 46 | costs_mincosts = ncosts[:,a0_mincost_idx[0].squeeze()] 47 | 48 | plt.rcParams['text.usetex'] = True 49 | plt.plot(a1_mincost, costs_mincosts) 50 | plt.xlabel(r'$a_1$') 51 | plt.ylabel(r'$J(150,a_1$)') 52 | 53 | plt.show() 54 | 55 | 56 | a1_mincost_idx = np.where(np.round(a1[:,0], 1)==20) 57 | a0_mincost = a0[a1_mincost_idx, :].squeeze() 58 | ncosts = np.array(costs) 59 | costs_mincosts = ncosts[a1_mincost_idx[0].squeeze(), :] 60 | 61 | plt.rcParams['text.usetex'] = True 62 | plt.plot(a0_mincost, costs_mincosts) 63 | plt.xlabel(r'$a_1$') 64 | plt.ylabel(r'$J(a_0, 20$)') 65 | 66 | plt.show() 67 | 68 | # plot the gradient descent points 69 | xx = [] 70 | yy = [] 71 | zz = [] 72 | for item in gd_points: 73 | xx.append(item[0]) 74 | yy.append(item[1]) 75 | zz.append(item[2]) 76 | 77 | plt.rcParams['text.usetex'] = True 78 | fig = plt.figure() 79 | ax = plt.axes(projection='3d') 80 | # ax.plot_surface( 81 | # a0, 82 | # a1, 83 | # np.array(costs), 84 | # rstride=1, 85 | # cstride=1, 86 | # cmap='cividis', 87 | # edgecolor='none', 88 | # alpha=0.5) 89 | ax.plot_surface( 90 | a0, 91 | a1, 92 | np.array(costs), 93 | rstride=1, 94 | cstride=1, 95 | cmap='viridis', # or 'plasma' 96 | edgecolor='none', 97 | alpha=0.6) 98 | 99 | 100 | 101 | ax.contour(a0, a1, np.array(costs), zdir='z', offset=-0.5, cmap=cm.coolwarm) 102 | ax.plot(xx, yy, zz, 'r.--', alpha=1) 103 | ax.set_xlabel(r'$a_0$') 104 | ax.set_ylabel(r'$a_1$') 105 | ax.set_zlabel(r'$J(a_0, a_1)$') 106 | plt.show() 107 | 108 | if __name__=='__main__': 109 | import os 110 | 111 | # plot_univariate_gd_analysis( 112 | # file=os.path.join(os.path.dirname(__file__), 'data_generation', 'data_1f.csv'), 113 | # a0_range=(125,175,0.2), 114 | # a1_range=(18,22,0.2), 115 | # gd_points= [], 116 | # plot_slices=True) 117 | 118 | 119 | plot_univariate_gd_analysis( 120 | file=os.path.join(os.path.dirname(__file__), 'data_generation', 'data_1f.csv'), 121 | a0_range=(125, 175, 0.1), # finer grid 122 | a1_range=(18, 22, 0.1), # finer grid 123 | gd_points=[], 124 | plot_slices=True 125 | ) 126 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/linear_regression/univariate_linear_regression/univariate_lr.py: -------------------------------------------------------------------------------- 1 | 2 | #pylint: disable = E0401 3 | import pandas as pd 4 | import os 5 | 6 | # import data from csv 7 | full_filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_1f.csv') 8 | data_set = pd.read_csv(full_filename) 9 | 10 | data_set.columns=['x', 'y'] 11 | 12 | # add new columns required to solve the problem 13 | data_set['x_sq'] = data_set['x']**2 14 | data_set['xy'] = data_set['x']*data_set['y'] 15 | 16 | 17 | # calculate the sums of the data 18 | sum_x = data_set['x'].sum() 19 | sum_y = data_set['y'].sum() 20 | sum_x_sq = data_set['x_sq'].sum() 21 | sum_xy = data_set['xy'].sum() 22 | 23 | n = len(data_set) 24 | print(f'sum_x: {sum_x}, sum_y: {sum_y}, sum_x_sq: {sum_x_sq}, sum_xy: {sum_xy}, n: {n}') 25 | 26 | # calculate the slope and intercept 27 | a_0 = (sum_x_sq*sum_y - sum_x*sum_xy)/(n*sum_x_sq - sum_x**2) 28 | 29 | a_1 = (n*sum_xy - sum_x*sum_y)/(n*sum_x_sq - sum_x**2) 30 | 31 | 32 | print(f'a_0: {a_0}, a_1: {a_1}') -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/__pycache__/binaryclassification.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/__pycache__/binaryclassification.cpython-311.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/__pycache__/lr_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/__pycache__/lr_utils.cpython-311.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/binaryclassification.py: -------------------------------------------------------------------------------- 1 | 2 | #pylint: disable = E0401 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def sigmoid(z): 8 | """ 9 | Compute the sigmoid of z 10 | 11 | Arguments: 12 | z - A scalar or numpy vector 13 | 14 | Returns: 15 | s - the sigmoid of z 16 | """ 17 | s = 1 / (1 + np.exp(-z)) 18 | return s 19 | 20 | def initialize_with_zeros(dim): 21 | """ 22 | Creates a vector for W initialized to 0 and creates b, initialized to 0 23 | 24 | Arguments: 25 | dim - the size of the weight vector, or the number of features to the system 26 | 27 | Returns: 28 | w - Initialized weight vecor of shape(dim,1) 29 | b - initialized bias, scalar 30 | """ 31 | w = np.zeros((dim, 1)) 32 | b = 0 33 | 34 | assert w.shape == (dim, 1) 35 | assert isinstance(b, float) or isinstance(b, int) 36 | 37 | return w, b 38 | 39 | def propagate(w, b, X, Y): 40 | """ 41 | Implement feed forward step, calculate cost function and its gradient 42 | 43 | Arguments: 44 | w - weights, a numpy array. In the image case it will be of shape (num_px * num_px * 3, 1) 45 | b - the scalar bis to the neuron 46 | X - input data with shape (num_px * num_px * 3, number fo examples) 47 | Y - 'true' label vecor size (1, number of examples) 48 | 49 | Returns: 50 | Cost - negative log likelihood cost of logistic regression 51 | dw - derivative of cost w.r.t. w; same shape of w 52 | db - derivative of cost w.r.t. b; same shape of b 53 | """ 54 | 55 | number_of_examples = X.shape[1] 56 | 57 | #forward propagation 58 | A = sigmoid(np.dot(w.T, X) + b) 59 | cost = np.sum(-(Y * np.log(A) + ((1-Y) * np.log(1-A))), axis=1)/ number_of_examples 60 | 61 | #backward propagation 62 | dw = np.dot(X, (A-Y).T) / number_of_examples 63 | db = np.sum(A-Y) / number_of_examples 64 | 65 | assert dw.shape == w.shape 66 | assert db.dtype == float 67 | 68 | cost = np.squeeze(cost) 69 | 70 | assert cost.shape == () 71 | 72 | grads = {'dw' : dw, 73 | 'db' : db} 74 | return grads, cost 75 | 76 | def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False): 77 | """ 78 | optimizes w and b using gradient descent 79 | 80 | Argument: 81 | w - weights, numpy array of shape (number _of_input_features, 1) 82 | b - bias, scalar 83 | X - data numpy array of shape (num_px*num_px*3, number_of_examples) 84 | Y - 'true' label vector, numpy array of shape (1, number_of_examples) 85 | num_iterations - number of iterations of g.d. 86 | learning_rate - learning rate of g.d. 87 | print_cost - print the loss every 100 steps 88 | 89 | Returns: 90 | params - dictionary containing w and b 91 | grads - dictionary containing dw and db 92 | costs - list of all the costs computed 93 | """ 94 | costs = [] 95 | 96 | for i in range(num_iterations): 97 | 98 | grads, cost = propagate(w, b, X, Y) 99 | 100 | dw = grads['dw'] 101 | db = grads['db'] 102 | 103 | w = w - (learning_rate * dw) 104 | b = b - (learning_rate * db) 105 | 106 | if i%100 == 0: 107 | costs.append(cost) 108 | if print_cost == True: 109 | print('print cost after iteration {}: {}'.format(i, cost)) 110 | 111 | params = {'w': w, 'b': b} 112 | grads = {'dw': dw, 'db': db} 113 | 114 | return params, grads, costs 115 | 116 | def predict(w, b, X): 117 | """ 118 | predict label 0 or 1 using learned linera rlogistic regression parameters 119 | 120 | Arguments: 121 | w - weights, numpy array of shape (number _of_input_features, 1) 122 | b - bias, scalar 123 | X - data numpy array of shape (num_px*num_px*3, number_of_examples) 124 | 125 | Returns: 126 | Y_prediction - numpy array containg predictions for X 127 | """ 128 | number_of_examples = X.shape[1] 129 | 130 | Y_prediction = np.zeros((1, number_of_examples)) 131 | 132 | w = w.reshape(X.shape[0], 1) 133 | 134 | A = sigmoid(np.dot(w.T, X) + b) 135 | 136 | for i in range(A.shape[1]): 137 | if A[0,i] >= 0.5: 138 | Y_prediction[0,i] = 1 139 | else: 140 | Y_prediction[0,i] = 0 141 | 142 | return Y_prediction -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/exec.py: -------------------------------------------------------------------------------- 1 | from binaryclassification import * 2 | from lr_utils import * 3 | 4 | def load_data_test(): 5 | train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() 6 | 7 | index = 26 8 | plt.imshow(train_set_x_orig[index]) 9 | 10 | # use instead if imshow 11 | image_data = train_set_x_orig[index, :, :, :] 12 | img = Image.fromarray(image_data, 'RGB') 13 | #img.show() 14 | 15 | print(classes[train_set_y[:, index]]) 16 | print(classes[np.squeeze(train_set_y[:, index])].decode('utf-8')) 17 | 18 | def sigmoid_function_test(): 19 | 20 | # test sigmoid function 21 | print('sigmoid of [0, 2] is {}'.format(sigmoid(np.array([0, 2])))) 22 | 23 | dim = 5 24 | w, b = initialize_with_zeros(dim) 25 | print('w = {}'.format(w)) 26 | print('b = {}'.format(b)) 27 | 28 | def feedforward_test(): 29 | w = np.array([[1.], [2.]]) 30 | b = 2. 31 | X = np.array([[1., 2., -1.],[3., 4., -3.2]]) 32 | Y= np.array([[1, 0, 1]]) 33 | 34 | grads, cost = propagate(w, b, X, Y) 35 | 36 | print('dw= {}'.format(grads['dw'])) 37 | print('db= {}'.format(grads['db'])) 38 | print('cost= {}'.format(cost)) 39 | 40 | def optimization_test(): 41 | w = np.array([[1.], [2.]]) 42 | b = 2. 43 | X = np.array([[1., 2., -1.],[3., 4., -3.2]]) 44 | Y= np.array([[1, 0, 1]]) 45 | 46 | params, grads, costs = optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost=True) 47 | 48 | print('w= {}'.format(params['w'])) 49 | print('b= {}'.format(params['b'])) 50 | print('dw= {}'.format(grads['dw'])) 51 | print('db= {}'.format(grads['db'])) 52 | 53 | def prediction_test(): 54 | w = np.array([[0.1124579],[0.23106775]]) 55 | b = -.3 56 | X = np.array([[1., -1.1, -3.2],[1.2, 2., 0.1]]) 57 | print('predictions={}'.format(predict(w, b, X))) 58 | 59 | prediction_test() 60 | #optimization_test() 61 | #feedforward_test() 62 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/lr_utils.py: -------------------------------------------------------------------------------- 1 | 2 | #pylint: disable = E0401 3 | import numpy as np 4 | import h5py 5 | import os 6 | 7 | def load_dataset(): 8 | 9 | script_dir = os.path.dirname(__file__) #<-- absolute dir the script is in 10 | rel_path = '../datasets/train_catvnoncat.h5' 11 | abs_file_path = os.path.join(script_dir, rel_path) 12 | train_dataset = h5py.File(abs_file_path, "r") 13 | train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features 14 | train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels 15 | 16 | rel_path = '../datasets/test_catvnoncat.h5' 17 | abs_file_path = os.path.join(script_dir, rel_path) 18 | test_dataset = h5py.File(abs_file_path, "r") 19 | test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features 20 | test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels 21 | 22 | classes = np.array(test_dataset["list_classes"][:]) # the list of classes 23 | 24 | train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) 25 | test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) 26 | 27 | return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes 28 | 29 | -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/test_catvnoncat.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/test_catvnoncat.h5 -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/logistic_regression/train_catvnoncat.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/train_catvnoncat.h5 -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/svm/matplotlib_test.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | fig = plt.figure() 5 | ax = fig.add_subplot(1,1,1) 6 | 7 | colors = {1:'r', -1:'b'} 8 | 9 | 10 | data_dict = {-1:np.array([[1,7],[2,8], [3,8], ]), 11 | 1:np.array([[5,1], [6,-1], [7,3], ])} 12 | 13 | 14 | for i in data_dict: 15 | for x in data_dict[i]: 16 | print(x[0], x[1]) 17 | ax.scatter(x[0], x[1], s=100, color=colors[i]) 18 | 19 | 20 | plt.show() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/svm/supportvectormachine.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib import style 3 | import numpy as np 4 | style.use('ggplot') 5 | 6 | class SupportVectorMachine: 7 | def __init__(self, visualization=True): 8 | self.visualization = visualization 9 | self.colors = {1:'r', -1:'b'} 10 | if self.visualization: 11 | self.fig = plt.figure() 12 | self.ax = self.fig.add_subplot(1,1,1) 13 | 14 | def fit(self, data): 15 | self.data = data 16 | # { ||w||: [w,b]} 17 | opt_dict = {} 18 | 19 | transforms = [[1,1], 20 | [1,-1], 21 | [-1,1], 22 | [-1,-1]] 23 | 24 | all_data = [] 25 | for yi in self.data: 26 | for featureset in self.data[yi]: 27 | for feature in featureset: 28 | all_data.append(feature) 29 | 30 | self.max_feature_value = max(all_data) 31 | self.min_feature_value = min(all_data) 32 | all_data = None 33 | 34 | step_sizes = [self.max_feature_value * 0.1, 35 | self.max_feature_value * 0.01, 36 | self.max_feature_value * 0.001,] 37 | 38 | # very expensive 39 | b_range_multiple = 2 40 | 41 | # with b we can take bigger steps 42 | b_multiple = 5 43 | latest_optimum = self.max_feature_value * 10 44 | 45 | for step in step_sizes: 46 | w = np.array([latest_optimum, latest_optimum]) 47 | print(w) 48 | # possible since this is a convex problem 49 | optimized = False 50 | while not optimized: 51 | for b in np.arange(-1*(self.max_feature_value*b_range_multiple), 52 | self.max_feature_value*b_range_multiple, 53 | step*b_multiple): 54 | 55 | for transformation in transforms: 56 | w_t = w * transformation 57 | found_option = True 58 | 59 | # will have issues with huge volumes. 60 | # yi(xi.w+b) >= 1 61 | for i in self.data: 62 | for xi in self.data[i]: 63 | yi = i 64 | if not yi * (np.dot(w_t, xi)+ b) >= 1: 65 | found_option = False 66 | 67 | 68 | if found_option: 69 | opt_dict[np.linalg.norm(w_t)] = [w_t, b] 70 | 71 | if w[0] < 0: 72 | optimized = True 73 | print('Optimized a step') 74 | else: 75 | # not mathematically correct 76 | # w - [step, step] 77 | w = w - step 78 | 79 | norms = sorted([n for n in opt_dict]) 80 | opt_choice = opt_dict[norms[0]] 81 | 82 | self.w = opt_choice[0] 83 | self.b = opt_choice[1] 84 | latest_optimum = opt_choice[0][0]+step*2 85 | 86 | for i in self.data: 87 | for xi in self.data[i]: 88 | yi=i 89 | 90 | def predict(self, features): 91 | # sign (x_i.w + b) 92 | classification = np.sign(np.dot(np.array(features), self.w) + self.b) 93 | 94 | if classification != 0 and self.visualization: 95 | self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification]) 96 | 97 | 98 | return classification 99 | 100 | 101 | def visualize(self): 102 | [[self.ax.scatter(x[0],x[1],s=100,color=self.colors[i]) for x in data_dict[i]] for i in data_dict] 103 | 104 | # hyperplane = x.w+b 105 | # v = x.w+b 106 | # psv = 1 107 | # nsv = -1 108 | # dec = 0 109 | def hyperplane(x,w,b,v): 110 | return (-w[0]*x-b+v) / w[1] 111 | 112 | datarange = (self.min_feature_value*0.9,self.max_feature_value*1.1) 113 | hyp_x_min = datarange[0] 114 | hyp_x_max = datarange[1] 115 | 116 | # (w.x+b) = 1 117 | # positive support vector hyperplane 118 | psv1 = hyperplane(hyp_x_min, self.w, self.b, 1) 119 | psv2 = hyperplane(hyp_x_max, self.w, self.b, 1) 120 | self.ax.plot([hyp_x_min,hyp_x_max],[psv1,psv2], 'k') 121 | 122 | # (w.x+b) = -1 123 | # negative support vector hyperplane 124 | nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1) 125 | nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1) 126 | self.ax.plot([hyp_x_min,hyp_x_max],[nsv1,nsv2], 'k') 127 | 128 | # (w.x+b) = 0 129 | # positive support vector hyperplane 130 | db1 = hyperplane(hyp_x_min, self.w, self.b, 0) 131 | db2 = hyperplane(hyp_x_max, self.w, self.b, 0) 132 | self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2], 'y--') 133 | 134 | plt.show() 135 | 136 | 137 | data_dict = {-1:np.array([[1,4], 138 | [2,8],]), 139 | 140 | +1:np.array([ [6,-1], 141 | [7,3.5],])} 142 | 143 | svm = SupportVectorMachine() 144 | svm.fit(data=data_dict) 145 | 146 | svm.predict([1,0.745]) 147 | svm.predict([2,1]) 148 | svm.predict([3,-5]) 149 | 150 | svm.visualize() -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/svm/test_code.py: -------------------------------------------------------------------------------- 1 | a = [1,2,3] 2 | b = [4,5,6] 3 | 4 | 5 | for x,y in zip(a,b): 6 | print(x,y) -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/utils/__pycache__/simple_tree.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/utils/__pycache__/simple_tree.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/algorithms/utils/tree_exec.py: -------------------------------------------------------------------------------- 1 | ''' 2 | simple example demonstrating the operation of the SimpleTree 3 | 4 | 1. creates a tree with nodes and edges 5 | 2. creates another tree with nodes and edges 6 | 3. appends the second tree to the first tree 7 | ''' 8 | from nary_tree import NAryTree 9 | from rooted_dac import RootedDAC 10 | 11 | 12 | t = RootedDAC() 13 | 14 | # t.add_node('a1') 15 | # t.add_node('a2') 16 | # t.add_node('a3') 17 | # t.add_node('a4') 18 | # t.add_node('a5') 19 | 20 | # t.add_edge('a1', 'a2', 'aa') 21 | # t.add_edge('a1', 'a3', 'ab') 22 | # t.add_edge('a2', 'a4', 'ac') 23 | # t.add_edge('a2', 'a5', 'ad') 24 | 25 | # print(t.generate_rules()) 26 | 27 | 28 | # example for disjuntion in antecedent clause 29 | t.add_node('A') 30 | t.add_node('B') 31 | t.add_node('X_x') 32 | t.add_node('X_y') 33 | 34 | t.add_edge('A', 'X_x', 'a') 35 | t.add_edge('A', 'X_y', 'b') 36 | t.add_edge('A', 'X_x', 'c') 37 | 38 | print(t.generate_rules()) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/brute_force/fibonacci.py: -------------------------------------------------------------------------------- 1 | def fib(n): 2 | if n==0 or n==1: 3 | return 1 4 | else: 5 | return fib(n-1) + fib(n-2) 6 | 7 | def fast_fib(n, memo={}): 8 | 9 | if n==0 or n==1: 10 | return 1 11 | 12 | if n in memo: 13 | return memo[n] 14 | else: 15 | result = fast_fib(n-1) + fast_fib(n-2) 16 | memo[n] = result 17 | return result 18 | 19 | print(fast_fib(120)) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/brute_force/knapsack.py: -------------------------------------------------------------------------------- 1 | class Food (): 2 | 3 | def __init__(self, n:str, v:int, w:int) -> None: 4 | self.name = n 5 | self.value = v 6 | self.calories = w 7 | 8 | def get_value(self) -> int: 9 | return self.value 10 | 11 | def get_cost(self) -> int: 12 | return self.calories 13 | 14 | def density(self) -> float: 15 | return self.get_value() / self.get_cost() 16 | 17 | def __str__(self) -> str: 18 | return f'{self.name} : <{self.value}, {self.calories}>' 19 | 20 | def build_menu(names, values, calories): 21 | 22 | menu = [] 23 | for i in range(len(values)): 24 | menu.append(Food(names[i], values[i], calories[i])) 25 | 26 | return menu 27 | 28 | def max_val(to_consider, available): 29 | 30 | # available is an index that goes through list 31 | 32 | # base case nothing left available or nothing left to consider 33 | if to_consider == [] or available == 0: 34 | result = (0, ()) 35 | 36 | # is fist element's cost enough to make item available. 37 | # if it is not, right branch is not considered 38 | elif to_consider[0].get_cost() > available: 39 | result = max_val(to_consider[1:], available) 40 | 41 | # consider both branches 42 | else: 43 | next_item = to_consider[0] 44 | 45 | # left branch - take 46 | # we took item so available is not minus the cost of taken item 47 | with_val, with_to_take = max_val(to_consider[1:], 48 | available - next_item.get_cost()) 49 | 50 | # value of subbranch plus value of iteam as it was taken 51 | with_val += next_item.get_value() 52 | 53 | # right branch - leave 54 | without_val, without_to_take = max_val(to_consider[1:], 55 | available) 56 | 57 | # choose better branch 58 | if with_val > without_val: 59 | result = (with_val, with_to_take + (next_item,)) 60 | else: 61 | result = (without_val, without_to_take) 62 | 63 | return result 64 | 65 | def test_max_val(foods, max_units): 66 | 67 | print(f'use search tree to allocate {max_units} calories') 68 | 69 | val, taken = max_val(foods, max_units) 70 | 71 | print(f'total value of items taken {val}') 72 | 73 | for item in taken: 74 | print(f'\t{item}') 75 | 76 | if __name__ == "__main__": 77 | names = ['wine', 'beer', 'pizza', 'burger', 'fries', 'cola', 'apple', 'donghut', 'cake'] 78 | values = [89, 90, 95, 100, 90, 79, 50, 10] 79 | calories = [123, 154, 258, 354, 365, 150, 95, 195] 80 | foods = build_menu(names, values, calories) 81 | 82 | test_max_val(foods, 750) 83 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/confidence_interval/normal_distribution_gen.py: -------------------------------------------------------------------------------- 1 | import random 2 | import matplotlib.pylab as plb 3 | 4 | 5 | dist = [] 6 | num_samples = 1000000 7 | 8 | for i in range(num_samples): 9 | # first parameter mean, second std dev 10 | dist.append(random.gauss(0, 100)) 11 | 12 | weights = [1/num_samples]*len(dist) 13 | v = plb.hist(dist, bins=100, weights=weights) 14 | 15 | plb.xlabel('x') 16 | plb.ylabel('Relative Frequency') 17 | 18 | print(f'fraction within approx 200 of mean = {sum(v[0][30:70])}') 19 | 20 | plb.show() -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/data_structures/graph.py: -------------------------------------------------------------------------------- 1 | class Node(): 2 | def __init__(self, name) -> None: 3 | self.name = name 4 | def get_name(self): 5 | return self.name 6 | def __str__(self) -> str: 7 | return self.name 8 | 9 | class Edge(): 10 | def __init__(self, src, dest) -> None: 11 | self.src = src 12 | self.dest = dest 13 | def get_source(self): 14 | return self.src 15 | def get_destination(self): 16 | return self.dest 17 | def __str__(self) -> str: 18 | return f'{self.src.get_name()} -> {self.dest.get_name()}' 19 | 20 | class Digraph(): 21 | ''' 22 | edges direction inb only one direction 23 | options are: 24 | 1. create adjacency matrix joining src to dest 25 | ok for digraph as it can handle both directions 26 | not symmetric therefore 27 | if few edges present a huge matrix with mostly 0 28 | 2. adjacency list for every node have a list of destinations 29 | nodes keys in dict 30 | ''' 31 | def __init__(self) -> None: 32 | self.edges ={} 33 | 34 | def add_node(self, node): 35 | if node in self.edges: 36 | raise ValueError('Duplicate Node') 37 | else: 38 | self.edges[node] = [] 39 | 40 | def add_edge(self, edge:Edge): 41 | src = edge.get_source() 42 | dest = edge.get_destination() 43 | if not(src in self.edges and dest in self.edges): 44 | raise ValueError('Node not in graph') 45 | self.edges[src].append(dest) 46 | 47 | def get_children(self, node): 48 | return self.edges[node] 49 | 50 | def has_node(self, node): 51 | return node in self.edges 52 | 53 | def get_node(self, name): 54 | for n in self.edges: 55 | if n.get_name() == name: 56 | return n 57 | raise NameError(name) 58 | 59 | def __str__(self) -> str: 60 | result = '' 61 | for src in self.edges: 62 | for dest in self.edges[src]: 63 | result = result + f'{src.get_name()} -> {dest.get_name()}\n' 64 | return result[:-1] 65 | 66 | class Graph(Digraph): 67 | def add_edge(self, edge: Edge): 68 | Digraph.add_edge(self,edge) 69 | rev_edge = Edge(edge.get_destination(), edge.get_source()) 70 | Digraph.add_edge(self, rev_edge) 71 | 72 | def build_city_graph(graph_type): 73 | providence = Node('providence') 74 | boston = Node('boston') 75 | new_york = Node('new york') 76 | denver = Node('denver') 77 | phoenix = Node('phoenix') 78 | chicago = Node('chicago') 79 | los_angeles = Node('los angeles') 80 | 81 | edges = [] 82 | edges.append(Edge(providence, boston)) 83 | edges.append(Edge(providence, new_york)) 84 | edges.append(Edge(denver, phoenix)) 85 | edges.append(Edge(denver, new_york)) 86 | edges.append(Edge(new_york, chicago)) 87 | edges.append(Edge(chicago, denver)) 88 | edges.append(Edge(chicago, phoenix)) 89 | edges.append(Edge(boston, providence)) 90 | edges.append(Edge(boston, new_york)) 91 | edges.append(Edge(los_angeles, boston)) 92 | 93 | graph = graph_type() 94 | graph.add_node(providence) 95 | graph.add_node(boston) 96 | graph.add_node(new_york) 97 | graph.add_node(denver) 98 | graph.add_node(phoenix) 99 | graph.add_node(chicago) 100 | graph.add_node(los_angeles) 101 | 102 | for edge in edges: 103 | graph.add_edge(edge) 104 | 105 | return graph 106 | 107 | 108 | if __name__ == "__main__": 109 | graph = build_city_graph(Digraph) 110 | print(graph) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/__pycache__/biased_die.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/biased_die.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/__pycache__/dishonest_casino.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/dishonest_casino.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_casino.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_casino.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_die.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_die.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/__pycache__/loaded_die.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/loaded_die.cpython-39.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/dishonest_casino.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from fair_die import FairDie 3 | from loaded_die import LoadedDie 4 | from numpy import mean 5 | from numpy import arange 6 | import random 7 | import enum 8 | 9 | 10 | 11 | class DiceThrow(enum.Enum): 12 | FAIR = enum.auto() 13 | LOADED = enum.auto() 14 | class DishonestCasino(): 15 | 16 | def __init__(self, p1:float, p2:float) -> None: 17 | self.fair_die = FairDie() 18 | self.biased_die = LoadedDie() 19 | self.p_1 = p1 20 | self.p_2 = p2 21 | 22 | 23 | 24 | def play(self, number_of_tosses:int)->float: 25 | 26 | results = [] 27 | next_toss = DiceThrow.FAIR 28 | prob_first_toss = random.uniform(0,1) 29 | if prob_first_toss > 0.5: 30 | next_toss = DiceThrow.LOADED 31 | 32 | fair_toss_counter = 0 33 | loaded_toss_counter = 0 34 | 35 | 36 | for i in range(number_of_tosses): 37 | 38 | if next_toss == DiceThrow.FAIR: 39 | fair_toss_counter += 1 40 | prob_next_toss = random.uniform(0,1) 41 | if prob_next_toss > self.p_1: 42 | next_toss = DiceThrow.LOADED 43 | else: 44 | loaded_toss_counter += 1 45 | prob_next_toss = random.uniform(0,1) 46 | if prob_next_toss > self.p_2: 47 | next_toss = DiceThrow.FAIR 48 | 49 | # print(fair_toss_counter) 50 | # print(loaded_toss_counter) 51 | 52 | fair_tosses = self.fair_die.roll_multiple(fair_toss_counter) 53 | loaded_tosses = self.biased_die.roll_multiple(loaded_toss_counter) 54 | 55 | mean_all_tosses = (sum(fair_tosses)+sum(loaded_tosses))/(fair_toss_counter+loaded_toss_counter) 56 | 57 | return mean_all_tosses 58 | 59 | 60 | 61 | def play_old(self, number_of_tosses:int)->list: 62 | 63 | results = [] 64 | 65 | next_toss = DiceThrow.FAIR 66 | prob_first_toss = random.uniform(0,1) 67 | if prob_first_toss > 0.5: 68 | next_toss = DiceThrow.LOADED 69 | 70 | for i in range(number_of_tosses): 71 | 72 | if next_toss == DiceThrow.FAIR: 73 | results.append(self.fair_die.roll()) 74 | prob_next_toss = random.uniform(0,1) 75 | if prob_next_toss > self.p_1: 76 | next_toss = DiceThrow.LOADED 77 | else: 78 | results.append(self.biased_die.roll()) 79 | prob_next_toss = random.uniform(0,1) 80 | if prob_next_toss > self.p_2: 81 | next_toss = DiceThrow.FAIR 82 | return results 83 | 84 | def simulate(self, t:int): 85 | '''Simulate Method''' 86 | simulation_mean = self.play(t) 87 | return mean(simulation_mean) 88 | 89 | 90 | def test(self)->None: 91 | 92 | avg_results = [] 93 | number_tosses_per_play = 100 94 | number_of_plays = 10 95 | for i in range(0, number_of_plays): 96 | # append the average opf the tosses 97 | play = self.play(number_tosses_per_play) 98 | mean_play = mean(play) 99 | print(mean_play) 100 | avg_results.append(mean_play) 101 | 102 | # possible averages from 1 to 6 in steps of 0.5 103 | # ie 11 possible outcomes 104 | avg_frequencies = [] 105 | for i in arange(1,6.5, 0.5): 106 | avg_frequencies.append(avg_results.count(i) / number_of_plays) 107 | 108 | print(avg_frequencies) 109 | plt.bar(arange(1,6.5, 0.5).tolist(), avg_frequencies, color='g', edgecolor='blue', width=0.5) 110 | plt.show() 111 | 112 | if __name__=='__main__': 113 | casino = DishonestCasino(0.99, 0.1) 114 | print(casino.play(100000)) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/fair_casino.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from fair_die import FairDie 3 | from numpy import mean 4 | from numpy import arange 5 | import numpy as np 6 | 7 | class FairCasino(): 8 | 9 | def __init__(self) -> None: 10 | self.die = FairDie() 11 | 12 | def play(self, number_of_tosses:int=2): 13 | return self.die.roll_multiple(number_of_tosses) 14 | 15 | 16 | def simulate(self, t): 17 | mean_res = mean(self.play(t)) 18 | print(mean_res) 19 | return mean_res 20 | 21 | def test(self)->None: 22 | 23 | avg_results = [] 24 | 25 | number_of_plays = 5000 26 | for i in range(0, number_of_plays): 27 | # append the average opf the tosses 28 | play = self.play() 29 | mean_play = mean(play) 30 | avg_results.append(mean_play) 31 | 32 | # possible averages from 1 to 6 in steps of 0.5 33 | # ie 11 possible outcomes 34 | avg_frequencies = [] 35 | for i in arange(1,6.5, 0.5): 36 | avg_frequencies.append(avg_results.count(i) / number_of_plays) 37 | 38 | print(avg_frequencies) 39 | 40 | plt.bar(arange(1,6.5, 0.5), avg_frequencies, color='g', edgecolor='blue', width=0.5) 41 | plt.show() 42 | 43 | 44 | if __name__=='__main__': 45 | casino = FairCasino() 46 | casino.test() 47 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/fair_die.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | class FairDice: 6 | 7 | def __init__(self) -> None: 8 | ... 9 | 10 | def roll(self)->int: 11 | return random.randint(1,6) 12 | 13 | def roll_multiple(self, number_of_tosses): 14 | x = np.random.random((number_of_tosses, 1)).squeeze() 15 | return np.ceil(x*6) 16 | 17 | def test_die(self)->None: 18 | outcomes = [] 19 | 20 | number_of_trials = 5000 21 | for i in range(number_of_trials): 22 | outcomes.append(self.roll()) 23 | 24 | results = [] 25 | for i in np.arange(1,7): 26 | results.append(outcomes.count(i) / number_of_trials) 27 | 28 | plt.bar(np.arange(1,7), results, color='g', edgecolor='blue', width=1) 29 | plt.show() 30 | 31 | 32 | if __name__ == '__main__': 33 | die = FairDice() 34 | die.roll_multiple(10) 35 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/loaded_die.py: -------------------------------------------------------------------------------- 1 | '''Loaded dice implementation''' 2 | 3 | import random 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | class LoadedDice: 8 | '''A loaded dice. probabilities for each number defined in __init_''' 9 | 10 | def __init__(self) -> None: 11 | '''Initializes new dice''' 12 | self.probabilities = np.array([0.5, 0.1, 0.1, 0.1, 0.1, 0.1]) 13 | self.cumul_array = np.cumsum(self.probabilities) 14 | 15 | def roll(self)->int: 16 | '''rolls dice''' 17 | precision = 3 18 | random_number = random.randint(0, 10 ** precision) / float(10 ** precision) 19 | mapped_cumul = self.cumul_array - random_number 20 | rolled_number = np.where(mapped_cumul > 0, mapped_cumul, np.inf).argmin() 21 | return rolled_number + 1 22 | 23 | def roll_multiple(self, number_of_tosses): 24 | '''rolls dice multiple times''' 25 | x = np.random.random((number_of_tosses, 1)).squeeze() 26 | x = np.ceil(x*10)-4 27 | x[x<=0] = 1 28 | return x 29 | 30 | 31 | def test_die(self)->None: 32 | '''executes a test for the dice''' 33 | outcomes = [] 34 | 35 | number_of_trials = 5000 36 | for i in range(number_of_trials): 37 | outcomes.append(self.roll()) 38 | 39 | results = [] 40 | for i in np.arange(1,7): 41 | results.append(outcomes.count(i) / number_of_trials) 42 | 43 | plt.bar(np.arange(1,7), results, color='g', edgecolor='blue', width=1) 44 | plt.show() 45 | 46 | if __name__ == '__main__': 47 | die = LoadedDice() 48 | print(die.roll_multiple(100000)) 49 | die.test_die() 50 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/simulation_results.txt: -------------------------------------------------------------------------------- 1 | Mean for dishonest casino with p1:0.99 and p2:0.05 is 3.489619392000011 2 | Mean for dishonest casino with p1:0.95 and p2:0.1 is 3.4474341769999897 3 | Mean for dishonest casino with p1:0.9 and p2:0.2 is 3.388901595000009 4 | 5 | ----------------------------------------------------------------------------- 6 | 7 | Task 3 8 | ------ 9 | 10 | Dishonest simulation with p1 = 0.9 and p2 = 0.2 11 | mean: 3.388901595, variance:3.096003910988601e-05, standard deviation:0.005564174611735869 12 | 13 | Fair Simulation 14 | mean: 3.4999125920000007, variance:2.8043537135249523e-05, standard deviation:0.005295614896803725 15 | 16 | Note for next Calculation 17 | ------------------------- 18 | 19 | To use the estimated variance to find the sample size required to obtain a RMSE error of 0.001, 20 | we use the following formula: 21 | 22 | n = ( (z * sigma / e)^2 ) 23 | 24 | Where: 25 | 26 | n is the sample size 27 | z is the standard normal deviate (e.g. for a 95% confidence level, z = 1.96) 28 | sigma is the estimated population standard deviation 29 | e is the desired margin of error (e.g. e = 0.001) 30 | 31 | This formula states that the sample size required to achieve a certain level of 32 | precision (e) is proportional to the square of the ratio of the standard deviation 33 | to the margin of error. 34 | 35 | It's important to note that this formula assumes that your estimator has a normal 36 | distribution and that the true variance is known or has been estimated from a sample. 37 | 38 | Also, it's important to know that this is only a rough estimation, 39 | and the sample size required for a specific problem may be influenced 40 | by other factors like the distribution of the data, the model assumptions, 41 | the desired confidence level, etc. 42 | 43 | 44 | 45 | 46 | 47 | ----------------------------------------------------------------------------- 48 | 49 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/dice/simulations.py: -------------------------------------------------------------------------------- 1 | from dishonest_casino import DishonestCasino 2 | from fair_casino import FairCasino 3 | import csv 4 | import numpy as np 5 | 6 | def fair_casino_simulation(): 7 | casino = FairCasino() 8 | 9 | mean_results = [] 10 | 11 | for _ in range(10000): 12 | mean_results.append(casino.simulate(100000)) 13 | 14 | with open('fair_results.csv', 'w') as f: 15 | write = csv.writer(f) 16 | write.writerow(mean_results) 17 | 18 | mean = sum(mean_results)/len(mean_results) 19 | print('Mean for fair casino: ', mean) 20 | 21 | 22 | def dishonest_casino_simulation_run(p1:float, p2:float): 23 | 24 | mean_results = [] 25 | 26 | casino = DishonestCasino(p1, p2) 27 | 28 | for _ in range(10000): 29 | mean_results.append(casino.simulate(100000)) 30 | 31 | with open(f'dishonest_results-{p1}-{p2}.csv', 'w', encoding='UTF-8') as f: 32 | write = csv.writer(f) 33 | write.writerow(mean_results) 34 | 35 | simulations_mean = sum(mean_results)/len(mean_results) 36 | print(f'Mean for dishonest casino with p1:{p1} and p2:{p2} is {simulations_mean}') 37 | 38 | 39 | def dishonest_casino_simulation(): 40 | 41 | dishonest_casino_simulation_run(0.99, 0.05) 42 | dishonest_casino_simulation_run(0.95, 0.1) 43 | dishonest_casino_simulation_run(0.9, 0.2) 44 | 45 | 46 | def calculate_variance(simulation_data:np.array):#type:ignore 47 | simulation_mean = np.mean(simulation_data) 48 | 49 | variance = np.sum(np.square(simulation_data - simulation_mean))/(np.size(simulation_data)-1) 50 | standard_deviation = np.sqrt(variance) 51 | 52 | print(f'mean: {simulation_mean}, variance:{variance}, standard deviation:{standard_deviation}') 53 | 54 | 55 | def dishonest_trial_variance_calculation(): 56 | print('Dishonest simulation with p1 = 0.9 and p2 = 0.2') 57 | simulation_data = np.genfromtxt('dishonest_results-0.9-0.2.csv', delimiter=',') 58 | calculate_variance(simulation_data) 59 | 60 | 61 | def fair_trial_variance_calculation(): 62 | print('Fair Simulation') 63 | simulation_data = np.genfromtxt('fair_results.csv', delimiter=',') 64 | calculate_variance(simulation_data) 65 | 66 | if __name__ == '__main__': 67 | # fair_casino_simulation() 68 | # dishonest_casino_simulation() 69 | dishonest_trial_variance_calculation() 70 | fair_trial_variance_calculation() 71 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/graph_search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/graph_search/__init__.py -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/graph_search/breath_first_search.py: -------------------------------------------------------------------------------- 1 | from graph import Digraph, Node, Edge 2 | 3 | def build_city_graph(graph_type): 4 | providence = Node('providence') 5 | boston = Node('boston') 6 | new_york = Node('new york') 7 | denver = Node('denver') 8 | phoenix = Node('phoenix') 9 | chicago = Node('chicago') 10 | los_angeles = Node('los angeles') 11 | 12 | edges = [] 13 | edges.append(Edge(providence, boston)) 14 | edges.append(Edge(providence, new_york)) 15 | edges.append(Edge(denver, phoenix)) 16 | edges.append(Edge(denver, new_york)) 17 | edges.append(Edge(new_york, chicago)) 18 | edges.append(Edge(chicago, denver)) 19 | edges.append(Edge(chicago, phoenix)) 20 | edges.append(Edge(boston, providence)) 21 | edges.append(Edge(boston, new_york)) 22 | edges.append(Edge(los_angeles, boston)) 23 | 24 | graph = graph_type() 25 | graph.add_node(providence) 26 | graph.add_node(boston) 27 | graph.add_node(new_york) 28 | graph.add_node(denver) 29 | graph.add_node(phoenix) 30 | graph.add_node(chicago) 31 | graph.add_node(los_angeles) 32 | 33 | for edge in edges: 34 | graph.add_edge(edge) 35 | 36 | return graph 37 | 38 | def print_path(path): 39 | names=[] 40 | for loc in path: 41 | names.append(loc.get_name()) 42 | return '->'.join(names) 43 | 44 | 45 | def bfs(graph:Digraph, start, end, to_print=False): 46 | 47 | init_path = [start] 48 | 49 | # queue is a list of paths 50 | path_queue = [init_path] 51 | 52 | while len(path_queue) != 0: 53 | 54 | tmp_path = path_queue.pop(0) 55 | if to_print: 56 | print(f'current bfs path: {print_path(tmp_path)}') 57 | 58 | last_node = tmp_path[-1] 59 | if last_node == end: 60 | return tmp_path 61 | for next_node in graph.get_children(last_node): 62 | if next_node not in tmp_path: 63 | new_path = tmp_path + [next_node] 64 | path_queue.append(new_path) 65 | 66 | return None 67 | 68 | def shortest_path(graph, start, end, to_print=False): 69 | return bfs(graph, start, end, to_print) 70 | 71 | def test_bfs(source, dest): 72 | graph = build_city_graph(Digraph) 73 | sp = shortest_path(graph, graph.get_node(source), graph.get_node(dest), to_print=True) 74 | 75 | if sp != None: 76 | print(f'shortest path from {source} to {dest} is {print_path(sp)}') 77 | else: 78 | print(f'there is no path from {source} to {dest}') 79 | 80 | 81 | if __name__ == "__main__": 82 | graph = build_city_graph(Digraph) 83 | # print(graph) 84 | test_bfs('boston', 'phoenix') -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/graph_search/depth_first_search.py: -------------------------------------------------------------------------------- 1 | from graph import Digraph, Node, Edge 2 | 3 | def build_city_graph(graph_type): 4 | providence = Node('providence') 5 | boston = Node('boston') 6 | new_york = Node('new york') 7 | denver = Node('denver') 8 | phoenix = Node('phoenix') 9 | chicago = Node('chicago') 10 | los_angeles = Node('los angeles') 11 | 12 | edges = [] 13 | edges.append(Edge(providence, boston)) 14 | edges.append(Edge(providence, new_york)) 15 | edges.append(Edge(denver, phoenix)) 16 | edges.append(Edge(denver, new_york)) 17 | edges.append(Edge(new_york, chicago)) 18 | edges.append(Edge(chicago, denver)) 19 | edges.append(Edge(chicago, phoenix)) 20 | edges.append(Edge(boston, providence)) 21 | edges.append(Edge(boston, new_york)) 22 | edges.append(Edge(los_angeles, boston)) 23 | 24 | graph = graph_type() 25 | graph.add_node(providence) 26 | graph.add_node(boston) 27 | graph.add_node(new_york) 28 | graph.add_node(denver) 29 | graph.add_node(phoenix) 30 | graph.add_node(chicago) 31 | graph.add_node(los_angeles) 32 | 33 | for edge in edges: 34 | graph.add_edge(edge) 35 | 36 | return graph 37 | 38 | def print_path(path): 39 | names=[] 40 | for loc in path: 41 | names.append(loc.get_name()) 42 | return '->'.join(names) 43 | 44 | 45 | def dfs(graph:Digraph, start, end, path, shortest, to_print=False): 46 | 47 | path = path + [start 48 | ] 49 | 50 | if to_print: 51 | print(f'Current dfs path: {print_path(path)}') 52 | 53 | if start == end: 54 | return path 55 | 56 | for node in graph.get_children(start): 57 | # no cycles 58 | if node not in path: 59 | if shortest == None or len(path) < len(shortest): 60 | new_path = dfs(graph, node, end, path, shortest, to_print) 61 | if new_path != None: 62 | shortest = new_path 63 | elif to_print: 64 | print(f'{node} already visited') 65 | 66 | return shortest 67 | 68 | def shortest_path(graph, start, end, to_print=False): 69 | return dfs(graph, start, end, [], None, to_print) 70 | 71 | def test_dfs(source, dest): 72 | graph = build_city_graph(Digraph) 73 | sp = shortest_path(graph, graph.get_node(source), graph.get_node(dest), to_print=True) 74 | 75 | if sp != None: 76 | print(f'shortest path from {source} to {dest} is {print_path(sp)}') 77 | else: 78 | print(f'there is no path from {source} to {dest}') 79 | 80 | 81 | if __name__ == "__main__": 82 | graph = build_city_graph(Digraph) 83 | # print(graph) 84 | test_dfs('boston', 'phoenix') -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/graph_search/graph.py: -------------------------------------------------------------------------------- 1 | class Node(): 2 | def __init__(self, name) -> None: 3 | self.name = name 4 | def get_name(self): 5 | return self.name 6 | def __str__(self) -> str: 7 | return self.name 8 | 9 | class Edge(): 10 | def __init__(self, src, dest) -> None: 11 | self.src = src 12 | self.dest = dest 13 | def get_source(self): 14 | return self.src 15 | def get_destination(self): 16 | return self.dest 17 | def __str__(self) -> str: 18 | return f'{self.src.get_name()} -> {self.dest.get_name()}' 19 | 20 | class Digraph(): 21 | ''' 22 | edges direction inb only one direction 23 | options are: 24 | 1. create adjacency matrix joining src to dest 25 | ok for digraph as it can handle both directions 26 | not symmetric therefore 27 | if few edges present a huge matrix with mostly 0 28 | 2. adjacency list for every node have a list of destinations 29 | nodes keys in dict 30 | ''' 31 | def __init__(self) -> None: 32 | self.edges ={} 33 | 34 | def add_node(self, node): 35 | if node in self.edges: 36 | raise ValueError('Duplicate Node') 37 | else: 38 | self.edges[node] = [] 39 | 40 | def add_edge(self, edge:Edge): 41 | src = edge.get_source() 42 | dest = edge.get_destination() 43 | if not(src in self.edges and dest in self.edges): 44 | raise ValueError('Node not in graph') 45 | self.edges[src].append(dest) 46 | 47 | def get_children(self, node): 48 | return self.edges[node] 49 | 50 | def has_node(self, node): 51 | return node in self.edges 52 | 53 | def get_node(self, name): 54 | for n in self.edges: 55 | if n.get_name() == name: 56 | return n 57 | raise NameError(name) 58 | 59 | def __str__(self) -> str: 60 | result = '' 61 | for src in self.edges: 62 | for dest in self.edges[src]: 63 | result = result + f'{src.get_name()} -> {dest.get_name()}\n' 64 | return result[:-1] 65 | 66 | class Graph(Digraph): 67 | def add_edge(self, edge: Edge): 68 | Digraph.add_edge(self,edge) 69 | rev_edge = Edge(edge.get_destination(), edge.get_source()) 70 | Digraph.add_edge(self, rev_edge) 71 | 72 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/greedy_algorithm/knapsack.py: -------------------------------------------------------------------------------- 1 | class Food (): 2 | 3 | def __init__(self, n:str, v:int, w:int) -> None: 4 | self.name = n 5 | self.value = v 6 | self.calories = w 7 | 8 | def get_value(self) -> int: 9 | return self.value 10 | 11 | def get_cost(self) -> int: 12 | return self.calories 13 | 14 | def density(self) -> float: 15 | return self.get_value() / self.get_cost() 16 | 17 | def __str__(self) -> str: 18 | return f'{self.name} : <{self.value}, {self.calories}>' 19 | 20 | 21 | def build_menu(names, values, calories): 22 | 23 | menu = [] 24 | for i in range(len(values)): 25 | menu.append(Food(names[i], values[i], calories[i])) 26 | 27 | return menu 28 | 29 | def greedy(items, max_cost:int, key_function): 30 | 31 | # sort according to key_function, ascending order 32 | items_copy = sorted(items, key=key_function, reverse=True) 33 | 34 | result = [] 35 | 36 | total_value, total_cost = 0, 0 37 | 38 | for i in range(len(items_copy)): 39 | if(total_cost + items_copy[i].get_cost()) <= max_cost: 40 | result.append(items_copy[i]) 41 | total_cost += items_copy[i].get_cost() 42 | total_value += items_copy[i].get_value() 43 | 44 | return (result, total_value) 45 | 46 | def test_greedy(items, constraint:int, key_function) -> None: 47 | 48 | taken, val = greedy(items, constraint, key_function) 49 | 50 | print('Total values of items taken = ', val) 51 | for item in taken: 52 | print('\t',item) 53 | 54 | def test_greedy_functions(foods, max_units:int) -> None: 55 | 56 | # greedy using food value 57 | print(f'Use greedy by value to allocate {max_units} calories') 58 | test_greedy(foods, max_units, Food.get_value) 59 | 60 | # greedy using food cost, calories 61 | # we need inverse of calories to start with foods having the 62 | # smallest numbers, hence lambda fn 63 | print(f'Use greedy by cost to allocate {max_units} calories') 64 | test_greedy(foods, max_units, lambda x: 1/Food.get_cost(x)) 65 | 66 | print(f'Use greedy by density to allocate {max_units} calories') 67 | test_greedy(foods, max_units, Food.density) 68 | 69 | 70 | 71 | if __name__ == "__main__": 72 | names = ['wine', 'beer', 'pizza', 'burger', 'fries', 'cola', 'apple', 'donghut', 'cake'] 73 | values = [89, 90, 95, 100, 90, 79, 50, 10] 74 | calories = [123, 154, 258, 354, 365, 150, 95, 195] 75 | foods = build_menu(names, values, calories) 76 | test_greedy_functions(foods, 750) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/monte_carlo_simulation/fair_roulette.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class FairRoulette(): 4 | def __init__(self): 5 | self.pockets = [] 6 | for i in range(1, 37): 7 | self.pockets.append(i) 8 | 9 | self.ball=None 10 | 11 | self.pocket_odds = len(self.pockets) - 1 12 | 13 | random.seed(0) 14 | 15 | def spin(self): 16 | self.ball = random.choice(self.pockets) 17 | 18 | def bet_pocket(self, pocket, amount): 19 | ''' 20 | pocket: pocket placing bet 21 | amount: sum being bet 22 | ''' 23 | if str(pocket) == str(self.ball): 24 | return amount * self.pocket_odds 25 | else: 26 | return -amount 27 | def __str__(self) -> str: 28 | return 'fair roulette' 29 | 30 | class EURoulette(FairRoulette): 31 | def __init__(self): 32 | super().__init__() 33 | self.pockets.append('O') 34 | def __str__(self) -> str: 35 | return 'EU Roulette' 36 | 37 | class USRoulette(EURoulette): 38 | def __init__(self): 39 | super().__init__() 40 | self.pockets.append('OO') 41 | def __str__(self) -> str: 42 | return 'US Roulette' 43 | 44 | 45 | def play_roulette(game, num_spins, pocket, bet): 46 | ''' 47 | Arguments: 48 | game: Roulette game being played 49 | num_spins: number of spins for the simulation 50 | pocket: pocket placing bet 51 | bet: amount of bet 52 | ''' 53 | total_pocket = 0 54 | for i in range(num_spins): 55 | game.spin() 56 | total_pocket += game.bet_pocket(pocket, bet) 57 | 58 | print(f'{num_spins} spins of {game}') 59 | print(f'expected return betting {pocket} = {str(100*total_pocket/num_spins)}%') 60 | 61 | return total_pocket/num_spins 62 | 63 | if __name__ == "__main__": 64 | game = FairRoulette() 65 | for num_spins in (100, 1000000): 66 | for i in range(3): 67 | # betting 1 dollar on number 2 for num-spins trials 68 | play_roulette(game, num_spins, 2, 1) 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-36.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-37.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-36.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-37.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-36.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-37.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-38.pyc -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/drunk.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class Drunk(): 4 | ''' 5 | base class for drunkard walk algorithms 6 | ''' 7 | def __init__(self, name='Anonymous'): 8 | self._name = name 9 | 10 | def __str__(self): 11 | return self._name 12 | 13 | class UsualDrunk(Drunk): 14 | def take_step(self): 15 | step_choices = [(0,1), (1,0), (0,-1), (-1,0)] 16 | return random.choice(step_choices) 17 | 18 | class BiasedDrunk(Drunk): 19 | ''' 20 | implements biased random walk 21 | ''' 22 | def take_step(self): 23 | step_choices = [(0,0.9), (1.1,0), (0,-1), (-1,0)] 24 | return random.choice(step_choices) 25 | 26 | if __name__ == "__main__": 27 | d1=Drunk('Joe') 28 | print(d1) 29 | 30 | d2 = Drunk() 31 | print(d2) 32 | 33 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/field.py: -------------------------------------------------------------------------------- 1 | from drunk import UsualDrunk 2 | from drunk import BiasedDrunk 3 | from location import Location 4 | 5 | class Field(): 6 | def __init__(self): 7 | self._drunks = {} 8 | 9 | def add_drunk(self, drunk, loc): 10 | if drunk in self._drunks: 11 | raise ValueError('duplicate drunk') 12 | else: 13 | self._drunks[drunk] = loc 14 | 15 | def get_location(self, drunk): 16 | if drunk not in self._drunks: 17 | raise ValueError('drunk not in field') 18 | 19 | return self._drunks[drunk] 20 | 21 | def move_drunk(self, drunk): 22 | if drunk not in self._drunks: 23 | raise ValueError('drunk not in field') 24 | 25 | x_dist, y_dist = drunk.take_step() 26 | 27 | self._drunks[drunk] = self._drunks[drunk].move(x_dist, y_dist) 28 | 29 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/location.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | class Location(): 4 | def __init__(self, x:float, y:float): 5 | self._x = x 6 | self._y = y 7 | 8 | def move (self, delta_x:float, delta_y:float): 9 | return Location(self._x + delta_x, self._y + delta_y) 10 | 11 | @property 12 | def x(self): 13 | return self._x 14 | 15 | @property 16 | def y(self): 17 | return self._y 18 | 19 | def dist(self, other): 20 | x_dist = self._x - other.x 21 | y_dist = self._y - other.y 22 | 23 | return (x_dist**2 + y_dist**2)**0.5 24 | 25 | def __str__(self): 26 | return f'<{self._x}, {self._y}>' 27 | 28 | if __name__ == "__main__": 29 | loc = Location(1,1) 30 | print(loc) 31 | dist = loc.dist(Location(0,0)) 32 | print(dist) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/simulation.py: -------------------------------------------------------------------------------- 1 | from field import Field 2 | from drunk import UsualDrunk 3 | from drunk import BiasedDrunk 4 | from location import Location 5 | import numpy as np 6 | import matplotlib.pylab as plt 7 | 8 | 9 | def walk(f, d, num_steps): 10 | start = f.get_location(d) 11 | for s in range(num_steps): 12 | f.move_drunk(d) 13 | # print(f.get_location(d)) 14 | return start.dist(f.get_location(d)) 15 | 16 | def sim_walks(num_steps, num_trials, dClass): 17 | drunkard = dClass() 18 | origin = Location(0, 0) 19 | distances = [] 20 | for t in range(num_trials): 21 | f = Field() 22 | f.add_drunk(drunkard, origin) 23 | distances.append(round(walk(f, drunkard, num_steps) ,1)) 24 | 25 | return distances 26 | 27 | def drunk_test(walk_lengths, num_trials,dClass): 28 | for num_steps in walk_lengths: 29 | distances = sim_walks(num_steps, num_trials, dClass) 30 | print(f'{dClass.__name__} random walk of {num_steps} steps') 31 | print(f'Mean = {round(sum(distances)/len(distances), 4)}') 32 | print(f'Max={max(distances)}') 33 | print(f'Min={min(distances)}') 34 | 35 | if __name__ == "__main__": 36 | drunk_test((0,1,2), 100, UsualDrunk) -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/random_walk/simulation_analysis.py: -------------------------------------------------------------------------------- 1 | from field import Field 2 | from drunk import UsualDrunk 3 | from drunk import BiasedDrunk 4 | from location import Location 5 | import numpy as np 6 | import matplotlib.pylab as plt 7 | 8 | 9 | def walk(f, d, num_steps): 10 | start = f.get_location(d) 11 | for s in range(num_steps): 12 | f.move_drunk(d) 13 | return f.get_location(d), start.dist(f.get_location(d)) 14 | 15 | def sim_walks(num_steps, num_trials, dClass): 16 | drunkard = dClass() 17 | origin = Location(0, 0) 18 | distances = [] 19 | end_locations = [] 20 | 21 | for t in range(num_trials): 22 | f = Field() 23 | f.add_drunk(drunkard, origin) 24 | end_location, distance = walk(f, drunkard, num_steps) 25 | 26 | distances.append(round(distance,1)) 27 | end_locations.append([end_location.x, end_location.y]) 28 | 29 | return end_locations, distances 30 | 31 | 32 | def drunk_test_dist_analysis(walk_lengths, num_trials): 33 | 34 | mean_dist_x = [] 35 | mean_dist_y = [] 36 | for num_steps in walk_lengths: 37 | _, distances = sim_walks(num_steps, num_trials, UsualDrunk) 38 | mean_dist_y.append(round(sum(distances)/len(distances), 4)) 39 | mean_dist_x.append(num_steps) 40 | 41 | plt.plot(mean_dist_x, mean_dist_y) 42 | 43 | mean_dist_x.clear() 44 | mean_dist_y.clear() 45 | for num_steps in walk_lengths: 46 | _, distances = sim_walks(num_steps, num_trials, BiasedDrunk) 47 | mean_dist_y.append(round(sum(distances)/len(distances), 4)) 48 | mean_dist_x.append(num_steps) 49 | 50 | plt.plot(mean_dist_x, mean_dist_y) 51 | plt.show() 52 | 53 | def drunk_test_end_analysis(walk_lengths, num_trials): 54 | for num_steps in walk_lengths: 55 | end_locations, _ = sim_walks(num_steps, num_trials, UsualDrunk) 56 | end_locations = np.array(end_locations) 57 | plt.scatter(end_locations[:,0], end_locations[:,1]) 58 | 59 | for num_steps in walk_lengths: 60 | end_locations, _ = sim_walks(num_steps, num_trials, BiasedDrunk) 61 | end_locations = np.array(end_locations) 62 | plt.scatter(end_locations[:,0], end_locations[:,1]) 63 | 64 | plt.show() 65 | 66 | 67 | 68 | def drunk_test(walk_lengths, num_trials,dClass): 69 | for num_steps in walk_lengths: 70 | end_locations, distances = sim_walks(num_steps, num_trials, dClass) 71 | end_locations = np.array(end_locations) 72 | print(f'{dClass.__name__} random walk of {num_steps} steps') 73 | print(f'Mean = {round(sum(distances)/len(distances), 4)}') 74 | print(f'Max={max(distances)}') 75 | print(f'Min={min(distances)}') 76 | plt.scatter(end_locations[:,0], end_locations[:,1]) 77 | plt.show() 78 | 79 | if __name__ == "__main__": 80 | # drunk_test((10,100,1000), 1000, BiasedDrunk) 81 | # drunk_test_dist_analysis((10,100,1000, 10000), 100) 82 | # xyz=np.array(np.random.random((100,3))) 83 | # print(xyz) 84 | drunk_test_end_analysis((0,1000), 100) 85 | -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/stochastic/approximation.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | def same_date_birthday(num_poeple, num_same): 5 | possibility_dates = range(366) 6 | birthdays = [0] * 366 7 | for p in range(num_poeple): 8 | birth_date = random.choice(possibility_dates) 9 | birthdays[birth_date] += 1 10 | return max(birthdays) >= num_same 11 | 12 | def birthday_problem(num_poeple, num_same, num_trials): 13 | num_hits = 0 14 | for t in range(num_trials): 15 | if same_date_birthday(num_poeple, num_same): 16 | num_hits += 1 17 | 18 | return num_hits/num_trials 19 | 20 | 21 | 22 | if __name__ == "__main__": 23 | for num_people in [10,20,40,100]: 24 | print(f'for {num_people} est prob of shared birthday is {birthday_problem(num_people, 2, 90000)}') 25 | 26 | num = math.factorial(366) 27 | den = (366**num_people)*math.factorial(366-num_people) 28 | 29 | print(f'actual prob for {num_people} is {1-(num/den)}') -------------------------------------------------------------------------------- /ml_algorithms/src/introduction_to_computation/stochastic/random_processes.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | def roll_die(): 4 | # chose from uniform distribution 5 | return random.choice([1,2,3,4,5,6]) 6 | 7 | 8 | def test_roll(n=10): 9 | result = '' 10 | for i in range(n): 11 | result = result + str(roll_die()) 12 | print(result) 13 | 14 | 15 | def run_sim(goal, num_trials): 16 | total = 0 17 | 18 | for i in range(num_trials): 19 | result='' 20 | for j in range(len(goal)): 21 | result += str(roll_die()) 22 | if result == goal: 23 | total += 1 24 | 25 | print(f'actual prob of {goal} = ', round(1/(6**len(goal)), 8)) 26 | est_prob = round(total/num_trials, 8) 27 | print(f'estimated prob of {goal} = ', est_prob) 28 | 29 | if __name__ == "__main__": 30 | # test_roll() 31 | run_sim('11111', 1000000) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | mathplotlib 4 | --------------------------------------------------------------------------------