├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   └── pylint.yml
├── .gitignore
├── .vscode
    ├── settings.json
    └── settings_DiskStation_Oct-20-1450-2020_Conflict.json
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── ml_algorithms
    ├── doc
    │   ├── greedy_algorithm
    │   │   └── readme.md
    │   ├── monte_carlo_simulation
    │   │   └── notes.md
    │   └── svm
    │   │   └── notes
    │   │       ├── .gitignore
    │   │       ├── README.md
    │   │       ├── budget_prediction_svm.pdf
    │   │       ├── budget_prediction_svm.tex
    │   │       ├── fig_4.tex
    │   │       ├── fig_5.tex
    │   │       ├── fig_6.tex
    │   │       ├── fig_7.tex
    │   │       ├── fig_8.tex
    │   │       └── img
    │   │           ├── fig_1.JPG
    │   │           ├── fig_2.JPG
    │   │           ├── fig_3.jpg
    │   │           ├── fig_4.jpg
    │   │           ├── fig_5.jpg
    │   │           ├── fig_6.jpg
    │   │           ├── fig_7.jpg
    │   │           └── fig_8.jpg
    ├── samples
    │   ├── __init__.py
    │   └── pandas_examples.py
    └── src
    │   ├── algorithms
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   └── __init__.cpython-39.pyc
    │       ├── cnn
    │       │   ├── bud.jpg
    │       │   ├── cnn.py
    │       │   ├── conv2d.py
    │       │   └── image_convolution.py
    │       ├── fuzzy_inference
    │       │   ├── fuzzy_example_2in_1out.py
    │       │   ├── fuzzy_example_2in_2out.py
    │       │   └── fuzzy_system
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │       ├── __init__.cpython-38.pyc
    │       │   │       ├── fuzzy_clause.cpython-38.pyc
    │       │   │       ├── fuzzy_rule.cpython-38.pyc
    │       │   │       ├── fuzzy_set.cpython-38.pyc
    │       │   │       ├── fuzzy_system.cpython-38.pyc
    │       │   │       ├── fuzzy_variable.cpython-38.pyc
    │       │   │       ├── fuzzy_variable_input.cpython-38.pyc
    │       │   │       └── fuzzy_variable_output.cpython-38.pyc
    │       │   │   ├── fuzzy_clause.py
    │       │   │   ├── fuzzy_rule.py
    │       │   │   ├── fuzzy_set.py
    │       │   │   ├── fuzzy_system.py
    │       │   │   ├── fuzzy_variable.py
    │       │   │   ├── fuzzy_variable_input.py
    │       │   │   └── fuzzy_variable_output.py
    │       ├── fuzzy_learning
    │       │   ├── __init__.py
    │       │   ├── data
    │       │   │   ├── Wine Quality Datasets.url
    │       │   │   ├── linear_model.csv
    │       │   │   ├── sample_set.csv
    │       │   │   ├── sbp_age.csv
    │       │   │   ├── sensor_data.csv
    │       │   │   ├── weatherHistory.csv
    │       │   │   ├── weatherHistory_adj.csv
    │       │   │   ├── weatherHistory_adj_test.csv
    │       │   │   ├── winequality-names.txt
    │       │   │   ├── winequality-names.txt.bak
    │       │   │   ├── winequality-red.csv
    │       │   │   ├── winequality-red_test.csv
    │       │   │   ├── winequality-red_train.csv
    │       │   │   └── winequality-white.csv
    │       │   ├── fuzzy_learning_sample_set.py
    │       │   ├── fuzzy_learning_sbp.py
    │       │   ├── fuzzy_system
    │       │   │   ├── __init__.py
    │       │   │   ├── fuzzy_associative_memory.py
    │       │   │   ├── fuzzy_clause.py
    │       │   │   ├── fuzzy_learning_helper.py
    │       │   │   ├── fuzzy_learning_system.py
    │       │   │   ├── fuzzy_rule.py
    │       │   │   ├── fuzzy_system.py
    │       │   │   ├── system_settings.py
    │       │   │   ├── type1_fuzzy_set.py
    │       │   │   └── type1_fuzzy_variable.py
    │       │   ├── fuzzy_system_example.py
    │       │   ├── fuzzy_system_exec
    │       │   │   ├── __init__.py
    │       │   │   ├── fuzzy_learning_system_DiskStation_Apr-16-1542-2020_Conflict.py
    │       │   │   ├── system_test.py
    │       │   │   ├── system_test2.py
    │       │   │   └── system_test2_DiskStation_Mar-27-1623-2020_Conflict.py
    │       │   ├── fuzzy_system_test.py
    │       │   ├── poc
    │       │   │   ├── data_analysis_poc.py
    │       │   │   ├── fuzzy_learning_system_poc copy.py
    │       │   │   ├── fuzzy_learning_system_poc.py
    │       │   │   ├── set_generation.py
    │       │   │   ├── set_generation_2.py
    │       │   │   ├── set_generation_notes.py
    │       │   │   └── set_naming.py
    │       │   ├── sensor_comparison.py
    │       │   ├── sensor_data_generate.py
    │       │   ├── sensor_fuzzy_learn.py
    │       │   ├── weather_analyse.py
    │       │   ├── weather_fuzzy_learning.py
    │       │   ├── weather_preprocessing.py
    │       │   ├── weather_preprocessing_humidity.py
    │       │   ├── wine_dataset_analysis.py
    │       │   ├── wine_fuzzy_learning.py
    │       │   └── wine_scaling.py
    │       ├── gan
    │       │   ├── __pycache__
    │       │   │   ├── discriminator.cpython-39.pyc
    │       │   │   └── generator.cpython-39.pyc
    │       │   ├── discriminator.py
    │       │   ├── gan.py
    │       │   └── generator.py
    │       ├── id3
    │       │   ├── __pycache__
    │       │   │   └── id3_classifier.cpython-39.pyc
    │       │   ├── id3_classifier.py
    │       │   ├── id3_exec.py
    │       │   ├── readme.md
    │       │   ├── tree_exec.py
    │       │   └── weather.csv
    │       ├── id3_version2
    │       │   ├── data.csv
    │       │   ├── id3.py
    │       │   ├── id3_v2.py
    │       │   ├── id3_v3.py
    │       │   ├── results_analysis.txt
    │       │   ├── sample_4_workout.xlsx
    │       │   ├── sample_results_1.txt
    │       │   ├── sample_results_2.txt
    │       │   ├── sample_results_3.txt
    │       │   ├── sample_results_4.txt
    │       │   ├── sample_results_5.txt
    │       │   ├── sample_rules.csv
    │       │   ├── sample_rules_1.csv
    │       │   ├── sample_rules_2.csv
    │       │   ├── sample_rules_3.csv
    │       │   ├── sample_rules_4.csv
    │       │   ├── sensor_rules.csv
    │       │   ├── weather_rules.csv
    │       │   └── weather_rules_1.csv
    │       ├── k-means
    │       │   └── k-means.xlsx
    │       ├── linear_regression
    │       │   ├── __init__.py
    │       │   ├── batch_gradient_descent
    │       │   │   ├── multifeature_batch_gd.py
    │       │   │   ├── twofeature_batch_gd.py
    │       │   │   ├── uni_batch_gd_nv.py
    │       │   │   ├── uni_batch_gd_v.py
    │       │   │   └── uni_batch_gd_v_norm.py
    │       │   ├── data_generation
    │       │   │   ├── data_1f.csv
    │       │   │   ├── data_1f_norm.csv
    │       │   │   ├── data_2f.csv
    │       │   │   ├── data_3f.csv
    │       │   │   ├── dataset_generation_1f.py
    │       │   │   └── dataset_generation_2f.py
    │       │   ├── minibatch_gradient_descent
    │       │   │   ├── data.csv
    │       │   │   ├── minibatch_gd_1.py
    │       │   │   ├── minibatch_gd_2.py
    │       │   │   ├── minibatch_gd_2_v.py
    │       │   │   └── minibatch_gd_3.py
    │       │   ├── multivariate_linear_regression
    │       │   │   └── multivariate_lr.py
    │       │   ├── normalization
    │       │   │   └── normalization_analysis.py
    │       │   ├── stochastic_gradient_descent
    │       │   │   ├── stochastic_gd_1f_1.py
    │       │   │   ├── stochastic_gd_1f_2.py
    │       │   │   ├── stochastic_gd_nf_1.py
    │       │   │   └── stochastic_gd_nf_2.py
    │       │   ├── univariate_gd_analysis.py
    │       │   └── univariate_linear_regression
    │       │   │   └── univariate_lr.py
    │       ├── logistic_regression
    │       │   ├── __pycache__
    │       │   │   ├── binaryclassification.cpython-311.pyc
    │       │   │   └── lr_utils.cpython-311.pyc
    │       │   ├── binaryclassification.py
    │       │   ├── exec.py
    │       │   ├── lr_utils.py
    │       │   ├── test_catvnoncat.h5
    │       │   └── train_catvnoncat.h5
    │       ├── svm
    │       │   ├── matplotlib_test.py
    │       │   ├── supportvectormachine.py
    │       │   ├── svm_orig.py
    │       │   └── test_code.py
    │       └── utils
    │       │   ├── __pycache__
    │       │       └── simple_tree.cpython-39.pyc
    │       │   ├── nary_tree.py
    │       │   ├── rooted_dac.py
    │       │   ├── simple_tree.py
    │       │   ├── tree.py
    │       │   └── tree_exec.py
    │   └── introduction_to_computation
    │       ├── brute_force
    │           ├── fibonacci.py
    │           └── knapsack.py
    │       ├── confidence_interval
    │           └── normal_distribution_gen.py
    │       ├── data_structures
    │           └── graph.py
    │       ├── dice
    │           ├── __pycache__
    │           │   ├── biased_die.cpython-39.pyc
    │           │   ├── dishonest_casino.cpython-39.pyc
    │           │   ├── fair_casino.cpython-39.pyc
    │           │   ├── fair_die.cpython-39.pyc
    │           │   └── loaded_die.cpython-39.pyc
    │           ├── dishonest_casino.py
    │           ├── fair_casino.py
    │           ├── fair_die.py
    │           ├── loaded_die.py
    │           ├── simulation_results.txt
    │           └── simulations.py
    │       ├── graph_search
    │           ├── __init__.py
    │           ├── breath_first_search.py
    │           ├── depth_first_search.py
    │           └── graph.py
    │       ├── greedy_algorithm
    │           └── knapsack.py
    │       ├── monte_carlo_simulation
    │           └── fair_roulette.py
    │       ├── random_walk
    │           ├── __pycache__
    │           │   ├── drunk.cpython-36.pyc
    │           │   ├── drunk.cpython-37.pyc
    │           │   ├── drunk.cpython-38.pyc
    │           │   ├── field.cpython-36.pyc
    │           │   ├── field.cpython-37.pyc
    │           │   ├── field.cpython-38.pyc
    │           │   ├── location.cpython-36.pyc
    │           │   ├── location.cpython-37.pyc
    │           │   └── location.cpython-38.pyc
    │           ├── drunk.py
    │           ├── field.py
    │           ├── location.py
    │           ├── simulation.py
    │           └── simulation_analysis.py
    │       └── stochastic
    │           ├── approximation.py
    │           └── random_processes.py
└── requirements.txt


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8", "3.9", "3.10"]
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v3
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pylint
21 |     - name: Analysing the code with pylint
22 |       run: |
23 |         pylint $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | data


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "cSpell.words": [
 3 |         "discretize",
 4 |         "dtype",
 5 |         "imread",
 6 |         "imshow",
 7 |         "ndarray",
 8 |         "pytoch"
 9 |     ],
10 |     "python.pythonPath": "C:\\Program Files (x86)\\Python36-32\\python.exe",
11 |     "compile-hero.disable-compile-files-on-did-save-code": false
12 | }


--------------------------------------------------------------------------------
/.vscode/settings_DiskStation_Oct-20-1450-2020_Conflict.json:
--------------------------------------------------------------------------------
1 | {
2 |     "cSpell.words": [
3 |         "discretize"
4 |     ],
5 |     "python.pythonPath": "D:\\Users\\carme\\AppData\\Local\\Programs\\Python\\Python37\\python.exe"
6 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ml_algorithms
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/greedy_algorithm/readme.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | Resolution of 0/1 knapsack problem by using a greedy algorithm.
 4 | 
 5 | - each value represented by < value, weight > pair
 6 | - total weight possible is wk
 7 | - vector L of length n representing each set of available items
 8 | - boolean vector V of length n indicates whether item is taken
 9 | 
10 | that is, maximize $$\sum_{i=0}^{n-1} V[i]I[i].value$$
11 | 
12 | such that  $$\sum_{i=0}^{n-1} V[i]I[i].weight \leq w$$
13 | 
14 | ## Brute force
15 | 
16 | From power set, eliminate all sets greater than w and select best set.
17 | 
18 | This approach is not very practical.
19 | 
20 | ## Example
21 | 
22 | Example builds a menu using having a maximum calories constraint using a couple of greedy algorithms having
23 | 
24 | - user preference (held in the value attribute)
25 | - calories (the inverse thereof)
26 | - a function combining the preference and calories (held in the density attribute)
27 | 
28 | ## Reference
29 | 
30 | https://www.youtube.com/watch?v=C1lhuz6pZC0
31 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/monte_carlo_simulation/notes.md:
--------------------------------------------------------------------------------
 1 | # Monte Carlo Simulation
 2 | 
 3 | The technique was first developed by Stanislaw Ulam, a mathematician who worked on the Manhattan Project.
 4 | 
 5 | A method of estimating the value of an unknown quantity using the principles of inferential statistics.
 6 | 
 7 | ## Inferential Statistics
 8 | 
 9 | - Population : Set of examples
10 | - Sample : proper subset of population
11 | - **Random** sample tends to exhibit the same qualities as the population.
12 | 
13 | Confidence depends on:
14 | 
15 | - sample size
16 | - variance. As variance grows, larger samples are required to have the same degree of confidence.
17 | 
18 | ## Roulette Considerations
19 | 
20 | - Law of large numbers (Bernoulli's law)
21 |     If the probability is p, the difference between the prob obtained by samples to p goes to 0 as number of samples goes to infinity.
22 | 
23 | - Gambler's Fallacy and Regression to the mean
24 | 
25 | Gambler's Fallacy: if a particular event occurs more frequently than normal during the past it is less likely to happen in the future (or vice versa), when it has otherwise been established that the probability of such events does not depend on what has happened in the past.
26 | 
27 | Regression to the mean: Following an extreme random event, it is likely that the next random event will be **less extreme**.
28 | 
29 | ## Quantifying Variation
30 | 
31 | $$ variance(X) = \frac{\sum_{x\in X}(x - \mu)^2}{|X|} $$
32 | 
33 | $$\mu$$ is mean
34 | 
35 | $$\sigma(x) = \sqrt{variance(X)}$$
36 | 
37 | - outliers have a big effect
38 | - standard deviation always considered relative to mean
39 | 
40 | ## Empirical Rule
41 | 
42 | - approx. 68% of data within one standard deviation of mean
43 | - approx. 95% of data within 1.96 standard deviation of mean - usually used
44 | - approx. 99.7% of data within 3 standard deviation of mean
45 | 
46 | ### Assumptions
47 | 
48 | - mean estimation error is zero. Therefore no bias
49 | - The distribution of errors in estimates is normal (mean=zero, sd=1)
50 | 
51 | ## Probability Density Function
52 | 
53 | - Distributions defined by Probability Density Function, PDF
54 | - Probability of a random value lying between two values
55 | - Defines a curve where the range in the X-axis is between the maximum and minimum values of the variable.
56 | - Area under curve between two points defined the probability of an example falling in that range
57 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | .*.lb
 14 | 
 15 | ## Intermediate documents:
 16 | *.dvi
 17 | *.xdv
 18 | *-converted-to.*
 19 | # these rules might exclude image files for figures etc.
 20 | # *.ps
 21 | # *.eps
 22 | # *.pdf
 23 | 
 24 | ## Generated if empty string is given at "Please type another file name for output:"
 25 | .pdf
 26 | 
 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 28 | *.bbl
 29 | *.bcf
 30 | *.blg
 31 | *-blx.aux
 32 | *-blx.bib
 33 | *.run.xml
 34 | 
 35 | ## Build tool auxiliary files:
 36 | *.fdb_latexmk
 37 | *.synctex
 38 | *.synctex(busy)
 39 | *.synctex.gz
 40 | *.synctex.gz(busy)
 41 | *.pdfsync
 42 | 
 43 | ## Auxiliary and intermediate files from other packages:
 44 | # algorithms
 45 | *.alg
 46 | *.loa
 47 | 
 48 | # achemso
 49 | acs-*.bib
 50 | 
 51 | # amsthm
 52 | *.thm
 53 | 
 54 | # beamer
 55 | *.nav
 56 | *.pre
 57 | *.snm
 58 | *.vrb
 59 | 
 60 | # changes
 61 | *.soc
 62 | 
 63 | # cprotect
 64 | *.cpt
 65 | 
 66 | # elsarticle (documentclass of Elsevier journals)
 67 | *.spl
 68 | 
 69 | # endnotes
 70 | *.ent
 71 | 
 72 | # fixme
 73 | *.lox
 74 | 
 75 | # feynmf/feynmp
 76 | *.mf
 77 | *.mp
 78 | *.t[1-9]
 79 | *.t[1-9][0-9]
 80 | *.tfm
 81 | 
 82 | #(r)(e)ledmac/(r)(e)ledpar
 83 | *.end
 84 | *.?end
 85 | *.[1-9]
 86 | *.[1-9][0-9]
 87 | *.[1-9][0-9][0-9]
 88 | *.[1-9]R
 89 | *.[1-9][0-9]R
 90 | *.[1-9][0-9][0-9]R
 91 | *.eledsec[1-9]
 92 | *.eledsec[1-9]R
 93 | *.eledsec[1-9][0-9]
 94 | *.eledsec[1-9][0-9]R
 95 | *.eledsec[1-9][0-9][0-9]
 96 | *.eledsec[1-9][0-9][0-9]R
 97 | 
 98 | # glossaries
 99 | *.acn
100 | *.acr
101 | *.glg
102 | *.glo
103 | *.gls
104 | *.glsdefs
105 | 
106 | # gnuplottex
107 | *-gnuplottex-*
108 | 
109 | # gregoriotex
110 | *.gaux
111 | *.gtex
112 | 
113 | # htlatex
114 | *.4ct
115 | *.4tc
116 | *.idv
117 | *.lg
118 | *.trc
119 | *.xref
120 | 
121 | # hyperref
122 | *.brf
123 | 
124 | # knitr
125 | *-concordance.tex
126 | # TODO Comment the next line if you want to keep your tikz graphics files
127 | *.tikz
128 | *-tikzDictionary
129 | 
130 | # listings
131 | *.lol
132 | 
133 | # makeidx
134 | *.idx
135 | *.ilg
136 | *.ind
137 | *.ist
138 | 
139 | # minitoc
140 | *.maf
141 | *.mlf
142 | *.mlt
143 | *.mtc[0-9]*
144 | *.slf[0-9]*
145 | *.slt[0-9]*
146 | *.stc[0-9]*
147 | 
148 | # minted
149 | _minted*
150 | *.pyg
151 | 
152 | # morewrites
153 | *.mw
154 | 
155 | # nomencl
156 | *.nlg
157 | *.nlo
158 | *.nls
159 | 
160 | # pax
161 | *.pax
162 | 
163 | # pdfpcnotes
164 | *.pdfpc
165 | 
166 | # sagetex
167 | *.sagetex.sage
168 | *.sagetex.py
169 | *.sagetex.scmd
170 | 
171 | # scrwfile
172 | *.wrt
173 | 
174 | # sympy
175 | *.sout
176 | *.sympy
177 | sympy-plots-for-*.tex/
178 | 
179 | # pdfcomment
180 | *.upa
181 | *.upb
182 | 
183 | # pythontex
184 | *.pytxcode
185 | pythontex-files-*/
186 | 
187 | # thmtools
188 | *.loe
189 | 
190 | # TikZ & PGF
191 | *.dpth
192 | *.md5
193 | *.auxlock
194 | 
195 | # todonotes
196 | *.tdo
197 | 
198 | # easy-todo
199 | *.lod
200 | 
201 | # xmpincl
202 | *.xmpi
203 | 
204 | # xindy
205 | *.xdy
206 | 
207 | # xypic precompiled matrices
208 | *.xyc
209 | 
210 | # endfloat
211 | *.ttt
212 | *.fff
213 | 
214 | # Latexian
215 | TSWLatexianTemp*
216 | 
217 | ## Editors:
218 | # WinEdt
219 | *.bak
220 | *.sav
221 | 
222 | # Texpad
223 | .texpadtmp
224 | 
225 | # Kile
226 | *.backup
227 | 
228 | # KBibTeX
229 | *~[0-9]*
230 | 
231 | # auto folder when using emacs and auctex
232 | ./auto/*
233 | *.el
234 | 
235 | # expex forward references with \gathertags
236 | *-tags.tex
237 | 
238 | # standalone packages
239 | *.sta
240 | 
241 | # generated if using elsarticle.cls
242 | *.spl
243 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/README.md:
--------------------------------------------------------------------------------
1 | # BET_svm
2 | svm for budget prediction 
3 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/budget_prediction_svm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/budget_prediction_svm.pdf


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/fig_4.tex:
--------------------------------------------------------------------------------
 1 | \usetikzlibrary{arrows}
 2 | \begin{tikzpicture}
 3 | 
 4 | 
 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$};
 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$};
 7 | 
 8 | \node [text=blue] at (1.5,1) {\Huge -};
 9 | \node [text=blue] at (0.5,2) {\Huge -};
10 | \node [text=red] at (3,3) {\huge +};
11 | \node [text=red] at (4,4) {\huge +};
12 | 
13 | \draw[red,ultra thick] (2,5)--(2,2);
14 | \draw[red,ultra thick] (5,2)--(2,2);
15 | 
16 | 
17 | \draw[blue,ultra thick] (0.5,4.5)--(1.5,3);
18 | \draw[blue,ultra thick] (1.5,3)--(1.5,1.5);
19 | \draw[blue,ultra thick] (1.5,1.5)--(3,1.5);
20 | \draw[blue,ultra thick] (3,1.5)--(4.5,0.5);
21 | 
22 | \node[red] at (4.5,2.5) {\tiny neural network};
23 | \node[blue] at (2.75,0.65) {\tiny nearest neighbour};
24 | 
25 | \end{tikzpicture}+


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/fig_5.tex:
--------------------------------------------------------------------------------
 1 | \usetikzlibrary{arrows}
 2 | \begin{tikzpicture}
 3 | 
 4 | 
 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$};
 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$};
 7 | 
 8 | \node [text=blue] at (1.5,1) {\Huge -};
 9 | \node [text=blue] at (0.5,2) {\Huge -};
10 | \node [text=red] at (3,3) {\huge +};
11 | \node [text=red] at (4,4) {\huge +};
12 | 
13 | 
14 | 
15 | \draw[dashed]  (1,5)--(5,1);
16 | 
17 | \draw[dashed]  (0,2.5)--(2.5,0);
18 | 
19 | \draw(0,4.25)--(4.25,0);
20 | 
21 | \end{tikzpicture}


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/fig_6.tex:
--------------------------------------------------------------------------------
 1 | \usetikzlibrary{arrows}
 2 | \begin{tikzpicture}
 3 | 
 4 | 
 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$};
 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$};
 7 | 
 8 | \node [text=blue] at (1.5,1) {\Huge -};
 9 | \node [text=blue] at (0.5,2) {\Huge -};
10 | \node [text=red] at (3,3) {\huge +};
11 | \node [text=red] at (4,4) {\huge +};
12 | 
13 | 
14 | 
15 | \draw[dashed]  (1,5)--(5,1);
16 | \draw[dashed]  (0,2.5)--(2.5,0);
17 | \draw(0,4.25)--(4.25,0);
18 | 
19 | 
20 | 
21 | \draw[->, ultra thick, green] (0,0)--(0.9,1.1) node[text=black, right]{$\vec{w}$};
22 | \draw[->, ultra thick, black] (0,0)--(0.7,2.8) node[text=black, right]{$\vec{u}$};
23 | 
24 | 
25 | \end{tikzpicture}


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/fig_7.tex:
--------------------------------------------------------------------------------
 1 | \usetikzlibrary{arrows}
 2 | \begin{tikzpicture}
 3 | 
 4 | 
 5 | \draw[->,ultra thick] (0,0)--(5,0) node[right]{$x$};
 6 | \draw[->,ultra thick] (0,0)--(0,5) node[above]{$y$};
 7 | 
 8 | \node [text=blue] at (1.5,1) {\Huge -};
 9 | \node [text=blue] at (0.5,2) {\Huge -};
10 | \node [text=red] at (3,3) {\huge +};
11 | \node [text=red] at (4,4) {\huge +};
12 | 
13 | 
14 | 
15 | \draw[dashed]  (1,5)--(5,1);
16 | \draw[dashed]  (0,2.5)--(2.5,0);
17 | \draw(0,4.25)--(4.25,0);
18 | 
19 | 
20 | \node at (4.5,3.5) {$c \geq 1$};
21 | \node at (0.6,0.4) {$c \leq -1$};
22 | \node at (2.9,1.4) {$ -1 \leq c \geq 1$};
23 | 
24 | \end{tikzpicture}
25 | 


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/fig_8.tex:
--------------------------------------------------------------------------------
 1 | \usetikzlibrary{arrows}
 2 | \begin{tikzpicture}
 3 | 
 4 | \draw[->] (0,0)--(5,0) node[right]{$x$};
 5 | \draw[->] (0,0)--(0,5) node[above]{$y$};
 6 | 
 7 | \node [text=blue] at (2,2) {\Huge -};
 8 | \node [text=blue] at (0,0) {\Huge -};
 9 | \node [text=red] at (2,0) {\huge +};
10 | \node [text=red] at (0,2) {\huge +};
11 | 
12 | \draw[->] (0,-6)--(5,-6) node[right]{$x$};
13 | \draw[->] (0,-6)--(0,-1) node[above]{$y$};
14 | \draw[->] (0,-6)--(3,-3) node[above]{$z$};
15 | 
16 | \draw[dashed, gray] (1.5,-4.5)--(4,-4.5);
17 | \draw[dashed, gray] (4,-4.5)--(2.5,-6);
18 | \draw[dashed, gray] (4,-4.5)--(4,-3);
19 | \draw[dashed, gray] (4,-3)--(1.5,-3);
20 | \draw[dashed, gray] (1.5,-4.5)--(1.5,-3);
21 | \draw[dashed, gray] (2.5,-4.5)--(2.5,-6);
22 | \draw[dashed, gray] (2.5,-4.5)--(4,-3);
23 | 
24 | \node [text=red] at (1.5,-3) {\huge +};
25 | \node [text=red] at (2.5,-4.5) {\huge +};
26 | \node [text=blue] at (4,-4.5) {\Huge -};
27 | \node [text=blue] at (0,-6) {\Huge -};
28 | 
29 | \node at (2.5,3) {linearly inseparable samples};
30 | \node at (2.5,-2) {separable after transformation};
31 | 
32 | \end{tikzpicture}


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_1.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_1.JPG


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_2.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_2.JPG


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_3.jpg


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_4.jpg


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_5.jpg


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_6.jpg


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_7.jpg


--------------------------------------------------------------------------------
/ml_algorithms/doc/svm/notes/img/fig_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/doc/svm/notes/img/fig_8.jpg


--------------------------------------------------------------------------------
/ml_algorithms/samples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/samples/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/samples/pandas_examples.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | def multiply_matrix_by_vector(matrix, vector):
 5 |     # original data
 6 |     df_a = pd.DataFrame([[1,2,3],[4,5,6]])
 7 |     print(df_a, '\n')
 8 | 
 9 |     # multiplier vector
10 |     df_b = pd.DataFrame([2,2,1])
11 |     print(df_b, '\n')
12 | 
13 |     # multiply by a list - it works
14 |     df_c = df_a*[2,2,1]
15 |     print(df_c, '\n')
16 | 
17 |     # multiply by the dataframe - it works
18 |     df_c = df_a*df_b.to_numpy().T
19 |     print(df_c, '\n')
20 | 
21 |     #using a series - it works -- preferred
22 |     df_c = df_a*df_b[0]
23 |     print(df_c, '\n')
24 | 
25 | 
26 | 
27 | def matrix_difference():
28 | 
29 |     df_a = pd.DataFrame([[1,2,3],[4,5,6]])
30 |     print(df_a, '\n')
31 | 
32 |     df_b = pd.DataFrame([[1,1,1],[1,1,1]])
33 |     print(df_b, '\n')
34 | 
35 |     df_c = df_a - df_b
36 |     print(df_c, '\n')
37 |     
38 |     
39 | if __name__ == '__main__':
40 |     #multiply_matrix_by_vector()
41 |     matrix_difference()
42 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/cnn/bud.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/cnn/bud.jpg


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/cnn/cnn.py:
--------------------------------------------------------------------------------
1 | def convolution2d(x, kernel, bias, stride=1, padding=0):
2 |     """
3 |     Convolution 2D
4 |     """
5 | 
6 |     
7 |                  


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/cnn/image_convolution.py:
--------------------------------------------------------------------------------
 1 | ''' image manipulation '''
 2 | #pylint: disable = E0401
 3 | import os
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | def load_image_to_rgb(image_path):
 8 |     '''load and convert image to RGB'''
 9 | 
10 |     image = plt.imread(image_path)
11 | 
12 |     b, g, r = image[:, :, 0], image[:, :, 1], image[:, :, 2] # For RGB image
13 | 
14 |     return b, g, r
15 | 
16 | def load_image_to_grayscale(image_path):
17 |     '''load and convert image to grayscale'''
18 | 
19 |     image = plt.imread(image_path)
20 | 
21 |     # convert image to grayscale
22 |     image = image.mean(axis=2)
23 | 
24 |     return image
25 | 
26 | if __name__ == '__main__':
27 |     BUD_FILENAME = 'bud.jpg'
28 |     bud_path = os.path.join(os.path.dirname(__file__), BUD_FILENAME)
29 | 
30 | 
31 |     bud_image_grayscale = load_image_to_grayscale(bud_path)
32 |     plt.imshow(bud_image_grayscale, cmap='gray')
33 |     plt.show()
34 | 
35 | 
36 |     bud_image_r, bud_image_g, bud_image_b = load_image_to_rgb(bud_path)
37 |     plt.imshow(bud_image_r, cmap='Reds')
38 |     plt.show()
39 |     plt.imshow(bud_image_g, cmap='Greens')
40 |     plt.show()
41 |     plt.imshow(bud_image_b, cmap='Blues')
42 |     plt.show()
43 |         


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_example_2in_1out.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_variable_output import FuzzyOutputVariable
 2 | from fuzzy_system.fuzzy_variable_input import FuzzyInputVariable
 3 | # from fuzzy_system.fuzzy_variable import FuzzyVariable
 4 | from fuzzy_system.fuzzy_system import FuzzySystem
 5 | 
 6 | temp = FuzzyInputVariable('Temperature', 10, 40, 100)
 7 | temp.add_triangular('Cold', 10, 10, 25)
 8 | temp.add_triangular('Medium', 15, 25, 35)
 9 | temp.add_triangular('Hot', 25, 40, 40)
10 | 
11 | humidity = FuzzyInputVariable('Humidity', 20, 100, 100)
12 | humidity.add_triangular('Wet', 20, 20, 60)
13 | humidity.add_trapezoidal('Normal', 30, 50, 70, 90)
14 | humidity.add_triangular('Dry', 60, 100, 100)
15 | 
16 | motor_speed = FuzzyOutputVariable('Speed', 0, 100, 100)
17 | motor_speed.add_triangular('Slow', 0, 0, 50)
18 | motor_speed.add_triangular('Moderate', 10, 50, 90)
19 | motor_speed.add_triangular('Fast', 50, 100, 100)
20 | 
21 | system = FuzzySystem()
22 | system.add_input_variable(temp)
23 | system.add_input_variable(humidity)
24 | system.add_output_variable(motor_speed)
25 | 
26 | system.add_rule(
27 | 		{ 'Temperature':'Cold',
28 | 			'Humidity':'Wet' },
29 | 		{ 'Speed':'Slow'})
30 | 
31 | system.add_rule(
32 | 		{ 'Temperature':'Cold',
33 | 			'Humidity':'Normal' },
34 | 		{ 'Speed':'Slow'})
35 | 
36 | system.add_rule(
37 | 		{ 'Temperature':'Medium',
38 | 			'Humidity':'Wet' },
39 | 		{ 'Speed':'Slow'})
40 | 
41 | system.add_rule(
42 | 		{ 'Temperature':'Medium',
43 | 			'Humidity':'Normal' },
44 | 		{ 'Speed':'Moderate'})
45 | 
46 | system.add_rule(
47 | 		{ 'Temperature':'Cold',
48 | 			'Humidity':'Dry' },
49 | 		{ 'Speed':'Moderate'})
50 | 
51 | system.add_rule(
52 | 		{ 'Temperature':'Hot',
53 | 			'Humidity':'Wet' },
54 | 		{ 'Speed':'Moderate'})
55 | 
56 | system.add_rule(
57 | 		{ 'Temperature':'Hot',
58 | 			'Humidity':'Normal' },
59 | 		{ 'Speed':'Fast'})
60 | 
61 | system.add_rule(
62 | 		{ 'Temperature':'Hot',
63 | 			'Humidity':'Dry' },
64 | 		{ 'Speed':'Fast'})
65 | 
66 | system.add_rule(
67 | 		{ 'Temperature':'Medium',
68 | 			'Humidity':'Dry' },
69 | 		{ 'Speed':'Fast'})
70 | 
71 | output = system.evaluate_output({
72 | 				'Temperature':18,
73 | 				'Humidity':60
74 | 		})
75 | 
76 | print(output)
77 | # print('fuzzification\n-------------\n', info['fuzzification'])
78 | # print('rules\n-----\n', info['rules'])
79 | 
80 | system.plot_system()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_example_2in_2out.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_variable_output import FuzzyOutputVariable
 2 | from fuzzy_system.fuzzy_variable_input import FuzzyInputVariable
 3 | 
 4 | 
 5 | from fuzzy_system.fuzzy_system import FuzzySystem
 6 | 
 7 | x1 = FuzzyInputVariable('x1', 0, 100, 100)
 8 | x1.add_triangular('S', 0, 25, 50)
 9 | x1.add_triangular('M', 25, 50, 75)
10 | x1.add_triangular('L', 50, 75, 100)
11 | 
12 | x2 = FuzzyInputVariable('x2', 0, 100, 100)
13 | x2.add_triangular('S', 0, 25, 50)
14 | x2.add_triangular('M', 25, 50, 75)
15 | x2.add_triangular('L', 50, 75, 100)
16 | 
17 | y = FuzzyOutputVariable('y', 0, 100, 100)
18 | y.add_triangular('S', 0, 25, 50)
19 | y.add_triangular('M', 25, 50, 75)
20 | y.add_triangular('L', 50, 75, 100)
21 | 
22 | z = FuzzyOutputVariable('z', 0, 100, 100)
23 | z.add_triangular('S', 0, 25, 50)
24 | z.add_triangular('M', 25, 50, 75)
25 | z.add_triangular('L', 50, 75, 100)
26 | 
27 | system = FuzzySystem()
28 | system.add_input_variable(x1)
29 | system.add_input_variable(x2)
30 | system.add_output_variable(y)
31 | system.add_output_variable(z)
32 | 
33 | system.add_rule(
34 | 		{ 'x1':'S',
35 | 			'x2':'S' },
36 | 		{ 'y':'S',
37 | 			'z':'L' })
38 | 
39 | system.add_rule(
40 | 		{ 'x1':'M',
41 | 			'x2':'M' },
42 | 		{ 'y':'M',
43 | 			'z':'M' })
44 | 
45 | system.add_rule(
46 | 		{ 'x1':'L',
47 | 			'x2':'L' },
48 | 		{ 'y':'L',
49 | 			'z':'S' })
50 | 
51 | system.add_rule(
52 | 		{ 'x1':'S',
53 | 			'x2':'M' },
54 | 		{ 'y':'S',
55 | 			'z':'L' })
56 | 
57 | system.add_rule(
58 | 		{ 'x1':'M',
59 | 			'x2':'S' },
60 | 		{ 'y':'S',
61 | 			'z':'L' })
62 | 
63 | system.add_rule(
64 | 		{ 'x1':'L',
65 | 			'x2':'M' },
66 | 		{ 'y':'L',
67 | 			'z':'S' })
68 | 
69 | system.add_rule(
70 | 		{ 'x1':'M',
71 | 			'x2':'L' },
72 | 		{ 'y':'L',
73 | 			'z':'S' })
74 | 
75 | system.add_rule(
76 | 		{ 'x1':'L',
77 | 			'x2':'S' },
78 | 		{ 'y':'M',
79 | 			'z':'M' })
80 | 
81 | system.add_rule(
82 | 		{ 'x1':'S',
83 | 			'x2':'L' },
84 | 		{ 'y':'M',
85 | 			'z':'M' })
86 | 
87 | output = system.evaluate_output({
88 | 			'x1':35,
89 | 			'x2':75
90 | 		})
91 | 
92 | 
93 | print(output)
94 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_clause.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_clause.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_rule.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_rule.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_set.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_set.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_system.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_system.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_input.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_input.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_output.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/__pycache__/fuzzy_variable_output.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_clause.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Fuzzy Clause class. Used in Fuzzy rule
 3 | '''
 4 | class FuzzyClause():
 5 | 	'''
 6 | 	A fuzzy clause of the type 'variable is set'
 7 | 	used in fuzzy IF ... THEN ... rules
 8 | 	clauses can be antecedent (if part) or consequent 
 9 | 	(then part)
10 | 	'''
11 | 
12 | 	def __init__(self, variable, f_set, degree=1):
13 | 		'''
14 | 		initialization of the fuzzy clause
15 | 
16 | 		Arguments:
17 | 		----------
18 | 		variable -- the clause variable in 'variable is set'
19 | 		set -- the clause set in 'variable is set'
20 | 		'''
21 | 
22 | 		if f_set is None:
23 | 			raise Exception('set none')
24 | 
25 | 		if f_set.name == '':
26 | 			raise Exception(str(f_set), 'no set name')
27 | 
28 | 
29 | 		self._variable = variable
30 | 		self._set = f_set
31 | 
32 | 	def __str__(self):
33 | 		'''
34 | 		string representation of the clause.
35 | 
36 | 		Returns:
37 | 		--------
38 | 		str: str, string representation of the clause in the form
39 | 					A is x
40 | 		'''
41 | 		return f'{self._variable.name} is {self._set.name}'
42 | 
43 | 	@property
44 | 	def variable_name(self):
45 | 		'''
46 | 		returns the name of the clause variable
47 | 
48 | 		Returns:
49 | 		--------
50 | 		variable_name: str, name of variable
51 | 		'''
52 | 		return self._variable.name
53 | 
54 | 	@property
55 | 	def set_name(self):
56 | 		'''
57 | 		returns the name of the clause variable
58 | 
59 | 		Returns:
60 | 		--------
61 | 		variable_name: str, name of variable
62 | 		'''
63 | 		return self._set.name
64 | 
65 | 	def evaluate_antecedent(self):
66 | 		'''
67 | 		Used when set is antecedent.
68 | 		returns the set degree of membership.
69 | 
70 | 		Returns:
71 | 		--------
72 | 		dom -- number, the set degree of membership given a value for
73 | 				that variable. This value is determined at an earlier stage
74 | 				and stored in the set
75 | 		'''
76 | 		return self._set.last_dom_value
77 | 
78 | 	def evaluate_consequent(self, dom):
79 | 		'''
80 | 		Used when clause is consequent.
81 | 
82 | 		Arguments:
83 | 		-----------
84 | 		dom -- number, scalar value from the antecedent clauses
85 | 
86 | 		Returns:
87 | 		--------
88 | 		set -- Type1FuzzySet, a set resulting from min operation with
89 | 				the scalar value
90 | 		'''
91 | 		self._variable.add_rule_contribution(self._set.min_scalar(dom))


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_rule.py:
--------------------------------------------------------------------------------
  1 | from .fuzzy_clause import FuzzyClause
  2 | 
  3 | class FuzzyRule():
  4 | 	'''
  5 | 	A fuzzy rule of the type
  6 | 	IF [antecedent clauses] THEN [consequent clauses]
  7 | 	'''
  8 | 
  9 | 	def __init__(self):
 10 | 		'''
 11 | 		initializes the rule. Two data structures are necessary:
 12 | 			Antecedent clauses list
 13 | 			consequent clauses list
 14 | 		'''
 15 | 		self._antecedent = []
 16 | 		self._consequent = []
 17 | 
 18 | 	def __str__(self):
 19 | 		'''
 20 | 		string representation of the rule.
 21 | 
 22 | 		Returns:
 23 | 		--------
 24 | 		str: str, string representation of the rule in the form
 25 | 					IF [antecedent clauses] THEN [consequent clauses]
 26 | 		'''
 27 | 		ante = ' and '.join(map(str, self._antecedent))
 28 | 		cons = ' and '.join(map(str, self._consequent))
 29 | 		return f'If {ante} then {cons}'
 30 | 
 31 | 	def add_antecedent_clause(self, var, f_set):
 32 | 		'''
 33 | 		adds an antecedent clause to the rule
 34 | 
 35 | 		Arguments:
 36 | 		-----------
 37 | 			clause -- FuzzyClause, the antecedent clause
 38 | 		'''
 39 | 		self._antecedent.append(FuzzyClause(var, f_set))
 40 | 
 41 | 	def add_consequent_clause(self, var, f_set):
 42 | 		'''
 43 | 		adds an consequent clause to the rule
 44 | 
 45 | 		Arguments:
 46 | 		-----------
 47 | 			clause -- FuzzyClause, the consequent clause
 48 | 		'''
 49 | 		self._consequent.append(FuzzyClause(var, f_set))
 50 | 
 51 | 	def evaluate(self):
 52 | 		'''
 53 | 		evaluation of the rule.
 54 | 		the antecedent clauses are executed and the minimum degree of
 55 | 		membership is retained.
 56 | 		This is used in teh consequent clauses to min with the consequent
 57 | 		set
 58 | 		The values are returned in a dict of the form {variable_name: scalar min set, ...}
 59 | 
 60 | 		Returns:
 61 | 		--------
 62 | 		rule_consequence -- dict, the resulting sets in the form
 63 | 							{variable_name: scalar min set, ...}
 64 | 		'''
 65 | 		# rule dom initialize to 1 as min operator will be performed
 66 | 		rule_strength = 1
 67 | 
 68 | 		# execute all antecedent clauses, keeping the minimum of the
 69 | 		# returned doms to determine the rule strength
 70 | 		for ante_clause in self._antecedent:
 71 | 			rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength)
 72 | 
 73 | 		# execute consequent clauses, each output variable will update its output_distribution set
 74 | 		for consequent_clause in self._consequent:
 75 | 			consequent_clause.evaluate_consequent(rule_strength)
 76 | 
 77 | 	def evaluate_info(self):
 78 | 		'''
 79 | 		evaluation of the rule.
 80 | 		the antecedent clauses are executed and the minimum degree of
 81 | 		membership is retained.
 82 | 		This is used in teh consequent clauses to min with the consequent
 83 | 		set
 84 | 		The values are returned in a dict of the form {variable_name: scalar min set, ...}
 85 | 
 86 | 		Returns:
 87 | 		--------
 88 | 		rule_consequence -- dict, the resulting sets in the form
 89 | 							{variable_name: scalar min set, ...}
 90 | 		'''
 91 | 		# rule dom initialize to 1 as min operator will be performed
 92 | 		rule_strength = 1
 93 | 
 94 | 		
 95 | 		# execute all antecedent clauses, keeping the minimum of the
 96 | 		# returned doms to determine the rule strength
 97 | 		for ante_clause in self._antecedent:
 98 | 			rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength)
 99 | 
100 | 		# execute consequent clauses, each output variable will update its output_distribution set
101 | 		for consequent_clause in self._consequent:
102 | 			consequent_clause.evaluate_consequent(rule_strength)
103 | 
104 | 		return f'{rule_strength} : {self}'


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable.py:
--------------------------------------------------------------------------------
 1 | from .fuzzy_set import FuzzySet
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | class FuzzyVariable():
 6 | 	'''
 7 | 	A type-1 fuzzy variable that is mage up of a number of type-1 fuzzy sets
 8 | 	'''
 9 | 	def __init__(self, name, min_val, max_val, res):
10 | 		'''
11 | 		creates a new type-1 fuzzy variable (universe)
12 | 
13 | 		Arguments:
14 | 		----------
15 | 			min_val -- number, minimum value of variable
16 | 			max_val -- number, maximum value of variable
17 | 			res -- int, resolution of variable
18 | 		'''
19 | 		self._sets={}
20 | 		self._max_val = max_val
21 | 		self._min_val = min_val
22 | 		self._res = res
23 | 		self._name = name
24 | 
25 | 	def __str__(self):
26 | 		return ', '.join(self._sets.keys())
27 | 
28 | 	@property
29 | 	def name(self):
30 | 		return self._name
31 | 
32 | 	def _add_set(self, name, f_set):
33 | 		'''
34 | 		adds a fuzzy set to the variable
35 | 
36 | 		Arguments:
37 | 		----------
38 | 			name -- string, name of the set
39 | 			f_set -- FuzzySet, The set
40 | 		'''
41 | 		self._sets[name] = f_set
42 | 
43 | 	def get_set(self, name):
44 | 		'''
45 | 		returns a set given the name
46 | 		Arguments:
47 | 		----------
48 | 		name -- str, set name
49 | 
50 | 		Returns:
51 | 		--------
52 | 		set -- FuzzySet, the set
53 | 		'''
54 | 		return self._sets[name]
55 | 
56 | 	def add_triangular(self, name, low, mid, high):
57 | 		new_set = FuzzySet.create_triangular(name, self._min_val, self._max_val, self._res, low, mid, high)
58 | 		self._add_set(name, new_set)
59 | 		return new_set
60 | 
61 | 	def add_trapezoidal(self, name, a, b, c, d):
62 | 		new_set = FuzzySet. create_trapezoidal(name, self._min_val, self._max_val, self._res, a, b, c, d)
63 | 		self._add_set(name, new_set)
64 | 		return new_set
65 | 
66 | 	def plot_variable(self, ax=None, show=True):
67 | 		'''
68 | 		plots a graphical representation of the fuzzy variable
69 | 
70 | 		Reference:
71 | 		----------
72 | 			https://stackoverflow.com/questions/4700614/how-to-put-the-legend-out-of-the-plot
73 | 		'''
74 | 		if ax == None:
75 | 			ax = plt.subplot(111)
76 | 
77 | 		for n ,s in self._sets.items():
78 | 			ax.plot(s.domain_elements(), s.dom_elements(), label=n)
79 | 
80 | 		# Shrink current axis by 20%
81 | 		pos = ax.get_position()
82 | 		ax.set_position([pos.x0, pos.y0, pos.width * 0.8, pos.height])
83 | 		ax.grid(True, which='both', alpha=0.4)
84 | 		ax.set_title(self._name)
85 | 		ax.set(xlabel='x', ylabel='$\mu (x)$')
86 | 
87 | 		# Put a legend to the right of the current axis
88 | 		ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
89 | 
90 | 		if show:
91 | 			plt.show()
92 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable_input.py:
--------------------------------------------------------------------------------
 1 | from .fuzzy_variable import FuzzyVariable
 2 | 
 3 | class FuzzyInputVariable(FuzzyVariable):
 4 | 
 5 | 	def __init__(self, name, min_val, max_val, res):
 6 | 		super().__init__(name, min_val, max_val, res)
 7 | 
 8 | 	def fuzzify(self, value):
 9 | 		'''
10 | 		performs fuzzification of the variable. used when the
11 | 		variable is an input one
12 | 
13 | 		Arguments:
14 | 		-----------
15 | 		value -- number, input value for the variable
16 | 
17 | 		'''
18 | 		# get dom for each set and store it - it will be required for each rule
19 | 		for set_name, f_set in self._sets.items():
20 | 			f_set.last_dom_value = f_set[value]
21 | 
22 | 	def fuzzify_info(self, value):
23 | 		'''
24 | 		performs fuzzification of the variable. used when the
25 | 		variable is an input one
26 | 
27 | 		Arguments:
28 | 		-----------
29 | 		value -- number, input value for the variable
30 | 
31 | 		'''
32 | 		# get dom for each set and store it - it will be required for each rule
33 | 		for set_name, f_set in self._sets.items():
34 | 			f_set.last_dom_value = f_set[value]
35 | 
36 | 		res = []
37 | 
38 | 		res.append(self._name)
39 | 		res.append('\n')
40 | 
41 | 		for _, f_set in self._sets.items():
42 | 			res.append(f_set.name)
43 | 			res.append(str(f_set.last_dom_value))
44 | 			res.append('\n')
45 | 
46 | 		return ' '.join(res)
47 | 
48 | 
49 | if __name__ == "__main__":
50 | 	pass


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_inference/fuzzy_system/fuzzy_variable_output.py:
--------------------------------------------------------------------------------
 1 | from .fuzzy_variable import FuzzyVariable
 2 | from .fuzzy_set import FuzzySet
 3 | 
 4 | class FuzzyOutputVariable(FuzzyVariable):
 5 | 
 6 |     def __init__(self, name, min_val, max_val, res):
 7 |         super().__init__(name, min_val, max_val, res)
 8 |         self._output_distribution = FuzzySet(name, min_val, max_val, res)
 9 | 
10 |     def clear_output_distribution(self):
11 |         self._output_distribution.clear_set()
12 | 
13 |     def add_rule_contribution(self, rule_consequence):
14 |         self._output_distribution = self._output_distribution.union(rule_consequence)
15 | 
16 |     def get_crisp_output(self):
17 |         return self._output_distribution.cog_defuzzify()
18 | 
19 |     def get_crisp_output_info(self):
20 |         return self._output_distribution.cog_defuzzify(), self._output_distribution
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     pass


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/Wine Quality Datasets.url:
--------------------------------------------------------------------------------
1 | [InternetShortcut]
2 | URL=http://www3.dsi.uminho.pt/pcortez/wine/
3 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/sample_set.csv:
--------------------------------------------------------------------------------
 1 | X,y
 2 | 0, 0.1
 3 | 1,0.4
 4 | 2,3
 5 | 3,2.8
 6 | 4,3.5
 7 | 5,5.9
 8 | 6,5.8
 9 | 7,7.2
10 | 8,7.5
11 | 9,11
12 | 10,9.7
13 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/sbp_age.csv:
--------------------------------------------------------------------------------
 1 | Age,SBP
 2 | 60,117
 3 | 61,120
 4 | 74,145
 5 | 57,129
 6 | 63,132
 7 | 68,135
 8 | 67,129
 9 | 66,110
10 | 77,163
11 | 63,136
12 | 54,115
13 | 63,118
14 | 76,132
15 | 60,111
16 | 61,112
17 | 65,147
18 | 79,138
19 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/sensor_data.csv:
--------------------------------------------------------------------------------
 1 | x,Y
 2 | 0,1.000
 3 | 1,0.815
 4 | 2,0.940
 5 | 3,0.833
 6 | 4,2.032
 7 | 5,2.141
 8 | 6,3.183
 9 | 7,0.214
10 | 8,1.769
11 | 9,-0.181
12 | 10,0.906
13 | 11,2.888
14 | 12,-0.271
15 | 13,1.103
16 | 14,5.057
17 | 15,4.582
18 | 16,1.911
19 | 17,5.965
20 | 18,8.902
21 | 19,0.490
22 | 20,10.397
23 | 21,9.897
24 | 22,6.032
25 | 23,4.200
26 | 24,16.434
27 | 25,8.384
28 | 26,5.565
29 | 27,6.577
30 | 28,20.259
31 | 29,17.668
32 | 30,22.978
33 | 31,23.467
34 | 32,21.809
35 | 33,32.593
36 | 34,23.070
37 | 35,29.195
38 | 36,38.028
39 | 37,36.635
40 | 38,45.651
41 | 39,42.955
42 | 40,50.169
43 | 41,38.612
44 | 42,47.907
45 | 43,54.806
46 | 44,55.171
47 | 45,65.676
48 | 46,69.167
49 | 47,81.936
50 | 48,100.925
51 | 49,102.745
52 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/winequality-names.txt:
--------------------------------------------------------------------------------
 1 | Citation Request:
 2 |   This dataset is public available for research. The details are described in [Cortez et al., 2009]. 
 3 |   Please include this citation if you plan to use this database:
 4 | 
 5 |   P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 
 6 |   Modeling wine preferences by data mining from physicochemical properties.
 7 |   In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236.
 8 | 
 9 |   Available at: [@Elsevier] http://dx.doi.org/10.1016/j.dss.2009.05.016
10 |                 [Pre-press (pdf)] http://www3.dsi.uminho.pt/pcortez/winequality09.pdf
11 |                 [bib] http://www3.dsi.uminho.pt/pcortez/dss09.bib
12 | 
13 | 1. Title: Wine Quality 
14 | 
15 | 2. Sources
16 |    Created by: Paulo Cortez (Univ. Minho), António Cerdeira, Fernando Almeida, Telmo Matos and José Reis (CVRVV) @ 2009
17 |    
18 | 3. Past Usage:
19 | 
20 |   P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 
21 |   Modeling wine preferences by data mining from physicochemical properties.
22 |   In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236.
23 | 
24 |   In the above reference, two datasets were created, using red and white wine samples.
25 |   The inputs include objective tests (e.g. PH values) and the output is based on sensory data
26 |   (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality 
27 |   between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model
28 |   these datasets under a regression approach. The support vector machine model achieved the
29 |   best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerance (T),
30 |   etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity
31 |   analysis procedure).
32 |  
33 | 4. Relevant Information:
34 | 
35 |    These datasets can be viewed as classification or regression tasks.
36 |    The classes are ordered and not balanced (e.g. there are munch more normal wines than
37 |    excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent
38 |    or poor wines. Also, we are not sure if all input variables are relevant. So
39 |    it could be interesting to test feature selection methods. 
40 | 
41 | 5. Number of Instances: red wine - 1599; white wine - 4898. 
42 | 
43 | 6. Number of Attributes: 11 + output attribute
44 |   
45 |    Note: several of the attributes may be correlated, thus it makes sense to apply some sort of
46 |    feature selection.
47 | 
48 | 7. Attribute information:
49 | 
50 |    For more information, read [Cortez et al., 2009].
51 | 
52 |    Input variables (based on physicochemical tests):
53 |    1 - fixed acidity
54 |    2 - volatile acidity
55 |    3 - citric acid
56 |    4 - residual sugar
57 |    5 - chlorides
58 |    6 - free sulfur dioxide
59 |    7 - total sulfur dioxide
60 |    8 - density
61 |    9 - pH
62 |    10 - sulphates
63 |    11 - alcohol
64 |    Output variable (based on sensory data): 
65 |    12 - quality (score between 0 and 10)
66 | 
67 | 8. Missing Attribute Values: None
68 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/data/winequality-names.txt.bak:
--------------------------------------------------------------------------------
 1 | Citation Request:
 2 |   This dataset is public available for research. The details are described in [Cortez et al., 2009]. 
 3 |   Please include this citation if you plan to use this database:
 4 | 
 5 |   P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 
 6 |   Modeling wine preferences by data mining from physicochemical properties.
 7 |   In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236.
 8 | 
 9 |   Available at: [@Elsevier] http://dx.doi.org/10.1016/j.dss.2009.05.016
10 |                 [Pre-press (pdf)] http://www3.dsi.uminho.pt/pcortez/winequality09.pdf
11 |                 [bib] http://www3.dsi.uminho.pt/pcortez/dss09.bib
12 | 
13 | 1. Title: Wine Quality 
14 | 
15 | 2. Sources
16 |    Created by: Paulo Cortez (Univ. Minho), António Cerdeira, Fernando Almeida, Telmo Matos and José Reis (CVRVV) @ 2009
17 |    
18 | 3. Past Usage:
19 | 
20 |   P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. 
21 |   Modeling wine preferences by data mining from physicochemical properties.
22 |   In Decision Support Systems>, Elsevier, 47(4):547-553. ISSN: 0167-9236.
23 | 
24 |   In the above reference, two datasets were created, using red and white wine samples.
25 |   The inputs include objective tests (e.g. PH values) and the output is based on sensory data
26 |   (median of at least 3 evaluations made by wine experts). Each expert graded the wine quality 
27 |   between 0 (very bad) and 10 (very excellent). Several data mining methods were applied to model
28 |   these datasets under a regression approach. The support vector machine model achieved the
29 |   best results. Several metrics were computed: MAD, confusion matrix for a fixed error tolerante (T),
30 |   etc. Also, we plot the relative importances of the input variables (as measured by a sensitivity
31 |   analysis procedure).
32 |  
33 | 4. Relevant Information:
34 | 
35 |    These datasets can be viewed as classification or regression tasks.
36 |    The classes are ordered and not balanced (e.g. there are munch more normal wines than
37 |    excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent
38 |    or poor wines. Also, we are not sure if all input variables are relevant. So
39 |    it could be interesting to test feature selection methods. 
40 | 
41 | 5. Number of Instances: red wine - 1599; white wine - 4898. 
42 | 
43 | 6. Number of Attributes: 11 + output attribute
44 |   
45 |    Note: several of the attributes may be correlated, thus it makes sense to apply some sort of
46 |    feature selection.
47 | 
48 | 7. Attribute information:
49 | 
50 |    For more information, read [Cortez et al., 2009].
51 | 
52 |    Input variables (based on physicochemical tests):
53 |    1 - fixed acidity
54 |    2 - volatile acidity
55 |    3 - citric acid
56 |    4 - residual sugar
57 |    5 - chlorides
58 |    6 - free sulfur dioxide
59 |    7 - total sulfur dioxide
60 |    8 - density
61 |    9 - pH
62 |    10 - sulphates
63 |    11 - alcohol
64 |    Output variable (based on sensory data): 
65 |    12 - quality (score between 0 and 10)
66 | 
67 | 8. Missing Attribute Values: None
68 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_learning_sample_set.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_sample_set
 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | import random
 7 | 
 8 | def generate_sample_data():
 9 | 	random.seed(42)
10 | 	df = pd.DataFrame()
11 | 	r = random.uniform(-1,1)
12 | 	print(r)
13 | 
14 | 
15 | def test_model():
16 | 	X, y = load_sample_set()
17 | 
18 | 	# X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.10)
19 | 	X_train = X
20 | 	X_test = X
21 | 	y_train = y
22 | 	y_test = y
23 | 
24 | 	learning_system = FuzzyLearningSystem(res=1000)
25 | 
26 | 	learning_system.fit(X_train, y_train, X_n=4, y_n=2)
27 | 	print(learning_system)
28 | 
29 | 	score = learning_system.score(X_test, y_test)
30 | 	print(score)
31 | 
32 | 	df = pd.DataFrame()
33 | 
34 | 	for i in np.arange(0,11,0.5):
35 | 
36 | 		y_hat = learning_system.get_result({'X':i})['y']
37 | 
38 | 		a_row = pd.Series([i, y_hat])
39 | 		row_df = pd.DataFrame([a_row])
40 | 		df = pd.concat([row_df, df])
41 | 
42 | 
43 | 	plt.scatter(X, y)
44 | 	plt.scatter(df[0], df[1])
45 | 	plt.show()
46 | 
47 | if __name__ == "__main__":
48 | 	generate_sample_data()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_learning_sbp.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_sbp
 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | 
 7 | X, y = load_sbp()
 8 | # # X, y = load_linear_model()
 9 | 
10 | # min_max_scaler = preprocessing.StandardScaler()
11 | 
12 | # x = X.values #returns a numpy array
13 | # x_scaled = min_max_scaler.fit_transform(x)
14 | # X = pd.DataFrame(x_scaled)
15 | 
16 | # print(X.min(), X.max())
17 | 
18 | 
19 | # X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.10)
20 | X_train = X
21 | X_test = X
22 | y_train = y
23 | y_test = y
24 | 
25 | learning_system = FuzzyLearningSystem(res=1000)
26 | 
27 | learning_system.fit(X_train, y_train, X_n=3, y_n=4)
28 | 
29 | # learning_system.plot_variables()
30 | 
31 | 
32 | print(learning_system)
33 | 
34 | score = learning_system.score(X_test, y_test)
35 | print(score)
36 | 
37 | df = pd.DataFrame()
38 | 
39 | for i in np.arange(54,79,0.5):
40 | 
41 |     y_hat = learning_system.get_result({'Age':i})['SBP']
42 | 
43 |     a_row = pd.Series([i, y_hat])
44 |     row_df = pd.DataFrame([a_row])
45 |     df = pd.concat([row_df, df])
46 | 
47 | 
48 | plt.scatter(X, y)
49 | plt.scatter(df[0], df[1])
50 | plt.show()
51 | 
52 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_associative_memory.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class FuzzyAssociativeMemory():
 5 | 
 6 |     def __init__(self, variables_info, fam_shape):
 7 |         '''
 8 |         '''
 9 |         self._variables_info = variables_info
10 |         self._fam  = np.empty(fam_shape, dtype='object')
11 | 
12 |     def set_entity(self, location, value):
13 |         '''
14 |         '''
15 |         entity_location = []
16 | 
17 |         for variable, f_sets in self._variables_info.items():
18 |             f_set = location[variable]
19 |             entity_location.append(f_sets.index(f_set))
20 | 
21 |         self._fam[tuple(entity_location)] = value
22 | 
23 | 
24 |     def __str__(self):
25 |         return str(self._fam)
26 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_clause.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Fuzzy Clause class. Used in Fuzzy rule
  3 | '''
  4 | class FuzzyClause():
  5 | 	'''
  6 | 	A fuzzy clause of the type 'variable is set'
  7 | 	used in fuzzy IF ... THEN ... rules
  8 | 	clauses can be antecedent (if part) or consequent 
  9 | 	(then part)
 10 | 	'''
 11 | 
 12 | 	def __init__(self, variable, f_set, degree=1):
 13 | 		'''
 14 | 		initialization of the fuzzy clause
 15 | 
 16 | 		Arguments:
 17 | 		----------
 18 | 		variable -- the clause variable in 'variable is set'
 19 | 		set -- the clause set in 'variable is set'
 20 | 		'''
 21 | 
 22 | 		if f_set is None:
 23 | 			raise Exception('set none')
 24 | 
 25 | 		if f_set.name == '':
 26 | 			raise Exception(str(f_set), 'no set name')
 27 | 
 28 | 
 29 | 		self._variable = variable
 30 | 		self._set = f_set
 31 | 		self._degree = degree
 32 | 
 33 | 	def __str__(self):
 34 | 		'''
 35 | 		string representation of the clause.
 36 | 
 37 | 		Returns:
 38 | 		--------
 39 | 		str: str, string representation of the clause in the form
 40 | 					A is x
 41 | 		'''
 42 | 		return f'{self._variable.name} is {self._set.name}'
 43 | 
 44 | 	@property
 45 | 	def degree(self):
 46 | 		return self._degree
 47 | 
 48 | 
 49 | 	@property
 50 | 	def variable_name(self):
 51 | 		'''
 52 | 		returns the name of the clause variable
 53 | 
 54 | 		Returns:
 55 | 		--------
 56 | 		variable_name: str, name of variable
 57 | 		'''
 58 | 		return self._variable.name
 59 | 
 60 | 	@property
 61 | 	def set_name(self):
 62 | 		'''
 63 | 		returns the name of the clause variable
 64 | 
 65 | 		Returns:
 66 | 		--------
 67 | 		variable_name: str, name of variable
 68 | 		'''
 69 | 		return self._set.name
 70 | 
 71 | 
 72 | 	def evaluate_antecedent(self):
 73 | 		'''
 74 | 		Used when set is antecedent.
 75 | 		returns the set degree of membership.
 76 | 
 77 | 		Returns:
 78 | 		--------
 79 | 		dom -- number, the set degree of membership given a value for
 80 | 				that variable. This value is determined at an earlier stage
 81 | 				and stored in the set
 82 | 		'''
 83 | 		return self._set.last_dom_value
 84 | 
 85 | 	def evaluate_consequent(self, dom):
 86 | 		'''
 87 | 		Used when clause is consequent.
 88 | 		returns an alpha-cut fuzzy set given some scalar value
 89 | 
 90 | 		Arguments:
 91 | 		-----------
 92 | 		dom -- number, scalar value from the antecedent clauses
 93 | 
 94 | 		Returns:
 95 | 		--------
 96 | 		set -- Type1FuzzySet, a set resulting from alpha-cut from
 97 | 				the scalar value
 98 | 		'''
 99 | 		return self._set.fuzzy_alpha_cut(dom)
100 | 
101 | 	def get_consequent_center_val(self):
102 | 
103 | 		return self._set.center_value


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_learning_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), '..\data')
 6 | 
 7 | def load_data(filename, data_path=DATA_PATH, separator=';'):
 8 | 	csv_path = os.path.join(data_path, filename)
 9 | 	return pd.read_csv(csv_path, sep=separator)
10 | 
11 | def save_data(data_frame, filename, data_path=DATA_PATH):
12 | 	csv_path = os.path.join(data_path, filename)
13 | 	return data_frame.to_csv(csv_path, float_format='%.3f', index=False)
14 | 
15 | def split_train_test(X, y, test_size=0.1, random_seed=21):
16 | 	np.random.seed(random_seed)
17 | 	shuffled_indices = np.random.permutation(len(X))
18 | 	set_size = int(len(X) * test_size)
19 | 	
20 | 	test_indices = shuffled_indices[:set_size]
21 | 	train_indices = shuffled_indices[set_size:]
22 | 	
23 | 	return X.iloc[train_indices], X.iloc[test_indices], y.iloc[train_indices], y.iloc[test_indices]
24 | 
25 | def format_dataset(data, output_attributes_names):
26 | 	'''
27 | 	Arguments:
28 | 	----------
29 | 	data -- original dataset
30 | 	output_attributes_names -- list, contains the names of the output attributes
31 | 	'''
32 | 	X = data.loc[:, data.columns != output_attributes_names]
33 | 	y = data.loc[:, data.columns == output_attributes_names]
34 | 
35 | 	return X, y
36 | 
37 | def load_winequality_red():
38 | 	dataset = load_data('winequality-red.csv')
39 | 	# print(dataset.shape)
40 | 
41 | 	return format_dataset(dataset, 'quality')
42 | 
43 | 
44 | def load_weather():
45 | 	dataset = load_data('weatherHistory_adj.csv', separator=',')
46 | 	# print(dataset.shape)
47 | 
48 | 	return format_dataset(dataset, 'Temperature')
49 | 
50 | 
51 | 
52 | def load_linear_model():
53 | 	dataset = load_data('linear_model.csv', separator=',')
54 | 	# print(dataset.shape)
55 | 	return format_dataset(dataset, 'y')
56 | 
57 | def load_sample_set():
58 | 	dataset = load_data('sample_set.csv', separator=',')
59 | 	# print(dataset.shape)
60 | 	return format_dataset(dataset, 'y')
61 | 
62 | 
63 | def load_sbp():
64 | 	dataset = load_data('sbp_age.csv', separator=',')
65 | 	# print(dataset.shape)
66 | 	return format_dataset(dataset, 'SBP')
67 | 
68 | 
69 | def load_sensor_data():
70 | 	dataset = load_data('sensor_data.csv', separator=',')
71 | 	# print(dataset.shape)
72 | 
73 | 	return format_dataset(dataset, 'Y')
74 | 
75 | 
76 | 
77 | if __name__ == "__main__":
78 | 	pass


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/fuzzy_rule.py:
--------------------------------------------------------------------------------
  1 | class FuzzyRule():
  2 | 	'''
  3 | 	A fuzzy rule of the type
  4 | 	IF [antecedent clauses] THEN [consequent clauses]
  5 | 	'''
  6 | 
  7 | 	# def __init__(self, antecedent_clauses, consequent_clauses):
  8 | 	def __init__(self):
  9 | 		'''
 10 | 		initializes the rule. Two data structures are necessary:
 11 | 			Antecedent clauses list
 12 | 			consequent clauses list
 13 | 		'''
 14 | 		self._antecedent = []
 15 | 		self._consequent= []
 16 | 		self._degree = 1
 17 | 
 18 | 	@property
 19 | 	def degree(self):
 20 | 		return self._degree
 21 | 
 22 | 	def evaluate_score(self):
 23 | 
 24 | 		output_control = 1
 25 | 
 26 | 		center_values={}
 27 | 
 28 | 		for ante in self._antecedent:
 29 | 			output_control= output_control * ante.evaluate_antecedent()
 30 | 		
 31 | 		for cons in self._consequent:
 32 | 			center_values[cons.variable_name] = cons.get_consequent_center_val()
 33 | 
 34 | 		return output_control, center_values
 35 | 
 36 | 
 37 | 	def __str__(self):
 38 | 		'''
 39 | 		string representation of the rule.
 40 | 
 41 | 		Returns:
 42 | 		--------
 43 | 		str: str, string representation of the rule in the form
 44 | 					IF [antecedent clauses] THEN [consequent clauses]
 45 | 		'''
 46 | 		ante = ' and '.join(map(str, self._antecedent))
 47 | 		cons = ' and '.join(map(str, self._consequent))
 48 | 		return f'If {ante} then {cons}'
 49 | 
 50 | 	def get_antecedent_str(self):
 51 | 		ante = ' and '.join(map(str, self._antecedent))
 52 | 		return ante
 53 | 
 54 | 	def add_antecedent_clause(self, clause):
 55 | 		'''
 56 | 		adds an antecedent clause to the rule
 57 | 
 58 | 		Arguments:
 59 | 		-----------
 60 | 			clause -- FuzzyClause, the antecedent clause
 61 | 		'''
 62 | 		self._antecedent.append(clause)
 63 | 		self._degree = self._degree * clause.degree
 64 | 
 65 | 	def add_consequent_clause(self, clause):
 66 | 		'''
 67 | 		adds an consequent clause to the rule
 68 | 
 69 | 		Arguments:
 70 | 		-----------
 71 | 			clause -- FuzzyClause, the consequent clause
 72 | 		'''
 73 | 		self._consequent.append(clause)
 74 | 		self._degree = self._degree * clause.degree
 75 | 
 76 | 	def evaluate(self):
 77 | 		'''
 78 | 		evaluation of the rule.
 79 | 		the antecedent clauses are executed and the minimum degree of
 80 | 		membership is retained.
 81 | 		This is used in teh consequent clauses to alpha cut the consequent
 82 | 		set
 83 | 		The values are returned in a dict of the form {variable_name: alpha-cut set, ...}
 84 | 
 85 | 		Returns:
 86 | 		--------
 87 | 		rule_consequence -- dict, the resulting sets in the form
 88 | 							{variable_name: alpha-cut set, ...}
 89 | 		'''
 90 | 		# rule dom initialize to 1 as min operator will be performed
 91 | 		rule_strength = 1
 92 | 
 93 | 		# execute all antecedent clauses, keeping the minimum of the
 94 | 		# returned doms to determine the rule strength
 95 | 		for ante_clause in self._antecedent:
 96 | 			rule_strength = min(ante_clause.evaluate_antecedent(), rule_strength)
 97 | 
 98 | 		# initialize the results dict
 99 | 		rule_consequence = {}
100 | 
101 | 		# execute consequent clauses, adding each result to the results dict using the
102 | 		# variable name as key
103 | 		for consequent_clause in self._consequent:
104 | 			rule_consequence[consequent_clause.variable_name] = consequent_clause.evaluate_consequent(rule_strength)
105 | 
106 | 		# return results
107 | 		return rule_consequence
108 | 
109 | 	def get_antecedent_list(self):
110 | 		
111 | 		ret = []
112 | 
113 | 		for ante_clause in self._antecedent:
114 | 			ret.append(ante_clause.set_name)
115 | 
116 | 		return ret
117 | 
118 | 	def get_consequent_list(self):
119 | 		
120 | 		ret = []
121 | 
122 | 		for cons_clause in self._consequent:
123 | 			ret.append(cons_clause.set_name)
124 | 
125 | 		return ret
126 | 
127 | 
128 | 	def get_csv_line(self, header):
129 | 		
130 | 		row = [None] * len(header)
131 | 
132 | 		for ante in self._antecedent:
133 | 			idx = header.index(ante.variable_name)
134 | 			row[idx] = ante.set_name
135 | 
136 | 		for cons in self._consequent:
137 | 			idx = header.index(cons.variable_name)
138 | 			row[idx] = cons.set_name
139 | 
140 | 		return row


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system/system_settings.py:
--------------------------------------------------------------------------------
1 | '''
2 | System settings
3 | '''
4 | 
5 | 
6 | PRECISION = 6 # precision used for rounding operations
7 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_example.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable
 2 | from fuzzy_system.fuzzy_system import FuzzySystem
 3 | 
 4 | temp = Type1FuzzyVariable(10, 40, 100, 'Temperature')
 5 | temp.add_triangular('Cold', 10, 10, 25)
 6 | temp.add_triangular('Medium', 15, 25, 35)
 7 | temp.add_triangular('Hot', 25, 40, 40)
 8 | 
 9 | humidity = Type1FuzzyVariable(20, 100, 100, 'Humidity')
10 | humidity.add_triangular('Wet', 20, 20, 60)
11 | humidity.add_trapezoidal('Normal', 30, 60, 90)
12 | humidity.add_triangular('Dry', 60, 100, 100)
13 | 
14 | motor_speed = Type1FuzzyVariable(0, 100, 100, 'Speed')
15 | motor_speed.add_triangular('Slow', 0, 0, 50)
16 | motor_speed.add_triangular('Moderate', 10, 50, 90)
17 | motor_speed.add_triangular('Fast', 50, 100, 100)
18 | 
19 | system = FuzzySystem()
20 | system.add_input_variable(temp)
21 | system.add_input_variable(humidity)
22 | system.add_output_variable(motor_speed)
23 | 
24 | system.add_rule(
25 | 		{ 'Temperature':'Cold',
26 | 			'Humidity':'Wet' },
27 | 		{ 'Speed':'Slow'})
28 | 
29 | system.add_rule(
30 | 		{ 'Temperature':'Cold',
31 | 			'Humidity':'Normal' },
32 | 		{ 'Speed':'Slow'})
33 | 
34 | system.add_rule(
35 | 		{ 'Temperature':'Medium',
36 | 			'Humidity':'Wet' },
37 | 		{ 'Speed':'Slow'})
38 | 
39 | system.add_rule(
40 | 		{ 'Temperature':'Medium',
41 | 			'Humidity':'Normal' },
42 | 		{ 'Speed':'Moderate'})
43 | 
44 | system.add_rule(
45 | 		{ 'Temperature':'Cold',
46 | 			'Humidity':'Dry' },
47 | 		{ 'Speed':'Moderate'})
48 | 
49 | system.add_rule(
50 | 		{ 'Temperature':'Hot',
51 | 			'Humidity':'Wet' },
52 | 		{ 'Speed':'Moderate'})
53 | 
54 | system.add_rule(
55 | 		{ 'Temperature':'Hot',
56 | 			'Humidity':'Normal' },
57 | 		{ 'Speed':'Fast'})
58 | 
59 | system.add_rule(
60 | 		{ 'Temperature':'Hot',
61 | 			'Humidity':'Dry' },
62 | 		{ 'Speed':'Fast'})
63 | 
64 | system.add_rule(
65 | 		{ 'Temperature':'Medium',
66 | 			'Humidity':'Dry' },
67 | 		{ 'Speed':'Fast'})
68 | 
69 | output = system.evaluate_output({
70 | 				'Temperature':18,
71 | 				'Humidity':60
72 | 		})
73 | 
74 | print(output)
75 | 
76 | system.plot_system()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test.py:
--------------------------------------------------------------------------------
 1 | #from fuzzy_learning._implementation.fuzzy_system import *
 2 | from fuzzy_system.fuzzy_system import FuzzySystem
 3 | from fuzzy_system.fuzzy_clause import FuzzyClause
 4 | 
 5 | 
 6 | # adding an age linguistic variable1
 7 | input_var = Type1FuzzyVariable(0, 100, 100, 'Temperature')
 8 | input_var.add_triangular('S2', 0, 0, 25)
 9 | input_var.add_triangular('S1', 0, 25, 50)
10 | input_var.add_triangular('CE', 25, 50, 75)
11 | input_var.add_triangular('B1', 50, 75, 100)
12 | input_var.add_triangular('B2', 75, 100, 100)
13 | 
14 | 
15 | # adding an age linguistic variable
16 | input2_var = Type1FuzzyVariable(0, 100, 100, 'Humidity')
17 | input2_var.add_triangular('S2', 0, 0, 25)
18 | input2_var.add_triangular('S1', 0, 25, 50)
19 | input2_var.add_triangular('CE', 25, 50, 75)
20 | input2_var.add_triangular('B1', 50, 75, 100)
21 | input2_var.add_triangular('B2', 75, 100, 100)
22 | 
23 | output_var = Type1FuzzyVariable(0, 100, 100, 'Speed')
24 | output_var.add_triangular('L2', 0, 0, 25)
25 | output_var.add_triangular('L1', 0, 25, 50)
26 | output_var.add_triangular('M', 25, 50, 75)
27 | output_var.add_triangular('H1', 50, 75, 100)
28 | output_var.add_triangular('H2', 75, 100, 100)
29 | 
30 | system = FuzzySystem()
31 | system.add_input_variable(input_var)
32 | system.add_input_variable(input2_var)
33 | system.add_output_variable(output_var)
34 | 
35 | ante={
36 |     'Temperature' : 'S2',
37 |     'Humidity' : 'S2'
38 | }
39 | cons ={
40 |     'Speed' : 'H2'
41 | }
42 | 
43 | system.add_rule(ante, cons)
44 | 
45 | print(system)
46 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test2.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("..")
 3 | 
 4 | from ..type1_fuzzy_variable import
 5 | 
 6 | from fuzzy_system import FuzzySystem
 7 | from fuzzy_clause import FuzzyClause
 8 | 
 9 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1')
10 | x1.add_triangular('S', 0, 25, 50)
11 | x1.add_triangular('M', 25, 50, 75)
12 | x1.add_triangular('L', 50, 75, 100)
13 | 
14 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2')
15 | x2.add_triangular('S', 0, 25, 50)
16 | x2.add_triangular('M', 25, 50, 75)
17 | x2.add_triangular('L', 50, 75, 100)
18 | 
19 | y = Type1FuzzyVariable(0, 100, 100, 'y')
20 | y.add_triangular('S', 0, 25, 50)
21 | y.add_triangular('M', 25, 50, 75)
22 | y.add_triangular('L', 50, 75, 100)
23 | 
24 | system = FuzzySystem()
25 | system.add_input_variable(x1)
26 | system.add_input_variable(x2)
27 | system.add_output_variable(y)
28 | 
29 | system.add_rule(
30 |     {
31 |         'x1':'S',
32 |         'x2':'M'
33 |     },
34 |     {
35 |         'y':'S'
36 |     }
37 | )
38 | 
39 | system.add_rule(
40 |     {
41 |         'x1':'M',
42 |         'x2':'L'
43 |     },
44 |     {
45 |         'y':'M'
46 |     }
47 | )
48 | output = system.evaluate_output({
49 |         'x1':44,
50 |         'x2':61
51 |     })
52 | 
53 | # print(output)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_exec/system_test2_DiskStation_Mar-27-1623-2020_Conflict.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("..")
 3 | 
 4 | from .type1_fuzzy_variable import Type1FuzzyVariable
 5 | 
 6 | from fuzzy_system import FuzzySystem
 7 | from fuzzy_clause import FuzzyClause
 8 | 
 9 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1')
10 | x1.add_triangular('S', 0, 25, 50)
11 | x1.add_triangular('M', 25, 50, 75)
12 | x1.add_triangular('L', 50, 75, 100)
13 | 
14 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2')
15 | x2.add_triangular('S', 0, 25, 50)
16 | x2.add_triangular('M', 25, 50, 75)
17 | x2.add_triangular('L', 50, 75, 100)
18 | 
19 | y = Type1FuzzyVariable(0, 100, 100, 'y')
20 | y.add_triangular('S', 0, 25, 50)
21 | y.add_triangular('M', 25, 50, 75)
22 | y.add_triangular('L', 50, 75, 100)
23 | 
24 | system = FuzzySystem()
25 | system.add_input_variable(x1)
26 | system.add_input_variable(x2)
27 | system.add_output_variable(y)
28 | 
29 | system.add_rule(
30 |     {
31 |         'x1':'S',
32 |         'x2':'M'
33 |     },
34 |     {
35 |         'y':'S'
36 |     }
37 | )
38 | 
39 | system.add_rule(
40 |     {
41 |         'x1':'M',
42 |         'x2':'L'
43 |     },
44 |     {
45 |         'y':'M'
46 |     }
47 | )
48 | output = system.evaluate_output({
49 |         'x1':44,
50 |         'x2':61
51 |     })
52 | 
53 | # print(output)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/fuzzy_system_test.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable
 2 | from fuzzy_system.fuzzy_system import FuzzySystem
 3 | 
 4 | x1 = Type1FuzzyVariable(0, 100, 100, 'x1')
 5 | x1.add_triangular('S', 0, 25, 50)
 6 | x1.add_triangular('M', 25, 50, 75)
 7 | x1.add_triangular('L', 50, 75, 100)
 8 | 
 9 | x2 = Type1FuzzyVariable(0, 100, 100, 'x2')
10 | x2.add_triangular('S', 0, 25, 50)
11 | x2.add_triangular('M', 25, 50, 75)
12 | x2.add_triangular('L', 50, 75, 100)
13 | 
14 | y = Type1FuzzyVariable(0, 100, 100, 'y')
15 | y.add_triangular('S', 0, 25, 50)
16 | y.add_triangular('M', 25, 50, 75)
17 | y.add_triangular('L', 50, 75, 100)
18 | 
19 | z = Type1FuzzyVariable(0, 100, 100, 'z')
20 | z.add_triangular('S', 0, 25, 50)
21 | z.add_triangular('M', 25, 50, 75)
22 | z.add_triangular('L', 50, 75, 100)
23 | 
24 | system = FuzzySystem()
25 | system.add_input_variable(x1)
26 | system.add_input_variable(x2)
27 | system.add_output_variable(y)
28 | system.add_output_variable(z)
29 | 
30 | system.add_rule(
31 | 		{ 'x1':'S',
32 | 			'x2':'S' },
33 | 		{ 'y':'S',
34 | 			'z':'L' })
35 | 
36 | system.add_rule(
37 | 		{ 'x1':'M',
38 | 			'x2':'M' },
39 | 		{ 'y':'M',
40 | 			'z':'M' })
41 | 
42 | system.add_rule(
43 | 		{ 'x1':'L',
44 | 			'x2':'L' },
45 | 		{ 'y':'L',
46 | 			'z':'S' })
47 | 
48 | system.add_rule(
49 | 		{ 'x1':'S',
50 | 			'x2':'M' },
51 | 		{ 'y':'S',
52 | 			'z':'L' })
53 | 
54 | system.add_rule(
55 | 		{ 'x1':'M',
56 | 			'x2':'S' },
57 | 		{ 'y':'S',
58 | 			'z':'L' })
59 | 
60 | system.add_rule(
61 | 		{ 'x1':'L',
62 | 			'x2':'M' },
63 | 		{ 'y':'L',
64 | 			'z':'S' })
65 | 
66 | system.add_rule(
67 | 		{ 'x1':'M',
68 | 			'x2':'L' },
69 | 		{ 'y':'L',
70 | 			'z':'S' })
71 | 
72 | system.add_rule(
73 | 		{ 'x1':'L',
74 | 			'x2':'S' },
75 | 		{ 'y':'M',
76 | 			'z':'M' })
77 | 
78 | system.add_rule(
79 | 		{ 'x1':'S',
80 | 			'x2':'L' },
81 | 		{ 'y':'M',
82 | 			'z':'M' })
83 | 
84 | output = system.evaluate_output({
85 | 				'x1':35,
86 | 				'x2':75
87 | 		})
88 | 
89 | fam = system.create_fam('y')
90 | 
91 | print(fam)
92 | 
93 | print(output)
94 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/poc/data_analysis_poc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from pandas import DataFrame
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | dirname = os.path.dirname(__file__)
 7 | filename = os.path.join(dirname, '..\data\winequality-red.csv')
 8 | 
 9 | df = pd.read_csv(filename, sep=';')
10 | # print(df.head())
11 | 
12 | df2 = df['chlorides']
13 | # print(df2.head())
14 | 
15 | df3 = df[['free sulfur dioxide', 'total sulfur dioxide']]
16 | # print(df3.head())
17 | 
18 | 
19 | to_rename = {'fixed acidity':'fixed_acidity',
20 |           'volatile acidity':'volatile_acidity',
21 |           'citric acid':'citric_acid',
22 |           'residual sugar':'residual_sugar',
23 |           'free sulfur dioxide':'free_sulfur_dioxide',
24 |           'total sulfur dioxide':'total_sulfur_dioxide'
25 |           }
26 | 
27 | df.rename(columns=to_rename, inplace=True)
28 | # print(df.head())
29 | 
30 | df4 = df[(df['residual_sugar'] > 10)]
31 | # print(df4)
32 | 
33 | df['sulphur_dioxide_difference'] = df['total_sulfur_dioxide'] - df['free_sulfur_dioxide']
34 | # print(df.head())
35 | 
36 | df[['total_sulfur_dioxide','free_sulfur_dioxide','sulphur_dioxide_difference']][200:300].plot()
37 | # plt.show()
38 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation.py:
--------------------------------------------------------------------------------
 1 | from type2fuzzy import Type1FuzzyVariable
 2 | 
 3 | # adding an age linguistic variable
 4 | var = Type1FuzzyVariable(0, 100, 100)
 5 | 
 6 | var.add_triangular('very young', 0, 0, 20)
 7 | var.add_triangular('young', 10, 20, 30)
 8 | var.add_triangular('adult', 20, 40, 60)
 9 | var.add_triangular('old', 50, 70, 90)
10 | var.add_triangular('very old', 70, 100, 100)
11 | 
12 | var.plot_variable()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation_2.py:
--------------------------------------------------------------------------------
1 | from fuzzy_system.type1_fuzzy_variable import Type1FuzzyVariable
2 | 
3 | # adding an age linguistic variable
4 | var = Type1FuzzyVariable(0, 100, 100)
5 | 
6 | # generate (2*3)+1 = 7 sets
7 | var.generate_sets_mean(3, 30)
8 | 
9 | var.plot_variable()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/poc/set_generation_notes.py:
--------------------------------------------------------------------------------
 1 | from type2fuzzy import Type1FuzzyVariable
 2 | 
 3 | # adding an age linguistic variable
 4 | var = Type1FuzzyVariable(0, 100, 100)
 5 | 
 6 | var.add_triangular('S2', 0, 0, 25)
 7 | var.add_triangular('S1', 0, 25, 50)
 8 | var.add_triangular('CE', 25, 50, 75)
 9 | var.add_triangular('B1', 50, 75, 100)
10 | var.add_triangular('B2', 75, 100, 100)
11 | 
12 | var.plot_variable()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/poc/set_naming.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | n=5
4 | 
5 | x = ['s' + str(i) for i in range(n+1,1,-1)] + ['ce'] + ['b' + str(i) for i in range(1,n+1,1)]
6 | 
7 | # print(x)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/sensor_comparison.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_sensor_data
 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | 
 7 | X, y = load_sensor_data()
 8 | 
 9 | clean_y = y.copy() 
10 | clean_y['Y'] = 1.1**X['x']
11 | 
12 | X_train = X
13 | X_test = X
14 | y_train = y
15 | y_test = y
16 | 
17 | fig, axes = plt.subplots(nrows=3, ncols=3)
18 | df = pd.DataFrame()
19 | 
20 | for x_range in range(3, 6):
21 |     for y_range in range(2, 5):
22 | 
23 |         learning_system = FuzzyLearningSystem(res=1000)
24 |         learning_system.fit(X_train, y_train, X_n=x_range, y_n=y_range)
25 | 
26 |         # learning_system.plot_variables()
27 |         # print(learning_system)
28 | 
29 |         score = learning_system.score(X_test, clean_y)
30 |         df = df[0:0]
31 | 
32 |         for i in np.arange(0,50,1):
33 |             y_hat = learning_system.get_result({'x':i})['Y']
34 |             a_row = pd.Series([i, y_hat])
35 |             row_df = pd.DataFrame([a_row])
36 |             df = pd.concat([row_df, df])
37 | 
38 |         axes[x_range-3, y_range-2].plot(X, clean_y)
39 |         # axes[x_range-1, y_range-1].plot(X, y)
40 |         axes[x_range-3, y_range-2].plot(df[0], df[1])
41 |         axes[x_range-3, y_range-2].set_title(f'sets x: {1+(2*x_range)}, sets y:{1+(2*y_range)}, R-Squared:{score:1.3f}')
42 |         axes[x_range-3, y_range-2].set_xlabel('')
43 | 
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/sensor_data_generate.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Generates the dataset for a noisy sensor having an exponential response
 3 | '''
 4 | from random import uniform, seed
 5 | from matplotlib import pyplot as plt
 6 | from pandas import DataFrame
 7 | from fuzzy_system.fuzzy_learning_helper import save_data
 8 | 
 9 | # generate functions
10 | seed(42)
11 | factor = 0.3
12 | x = range(0,50,1)
13 | 
14 | y_clean = [(1.1**i) for i in x]
15 | y = [(1.1**i) +  ( i * factor * uniform(-1,1))  for i in x]
16 | 
17 | # plot
18 | fig, axes = plt.subplots(nrows=1, ncols=2)
19 | axes[0].plot(y_clean)
20 | axes[1].plot(y)
21 | axes[0].set_title('ideal sensor response')
22 | axes[1].set_title('noisy sensor response')
23 | plt.show()
24 | 
25 | # write in file
26 | data = {
27 |     'x': x,
28 |     'Y': y
29 | }
30 | df = DataFrame(data)
31 | save_data(df, "sensor_data.csv")
32 | 
33 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/sensor_fuzzy_learn.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_sensor_data
 2 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | 
 7 | X, y = load_sensor_data()
 8 | 
 9 | clean_y = y.copy() 
10 | clean_y['Y'] = 1.1**X['x']
11 | 
12 | X_train = X
13 | X_test = X
14 | y_train = y
15 | y_test = y
16 | 
17 | learning_system = FuzzyLearningSystem(res=1000)
18 | 
19 | learning_system.fit(X_train, y_train, X_n=5, y_n=2)
20 | 
21 | score = learning_system.score(X_test, clean_y)
22 | print(score)
23 | 
24 | df = pd.DataFrame()
25 | 
26 | for i in np.arange(0,50,1):
27 | 
28 |     y_hat = learning_system.get_result({'x':i})['Y']
29 | 
30 |     a_row = pd.Series([i, y_hat])
31 |     row_df = pd.DataFrame([a_row])
32 |     df = pd.concat([row_df, df])
33 | 
34 | plt.plot(X, y)
35 | plt.plot(df[0], df[1])
36 | 
37 | print(learning_system)
38 | 
39 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/weather_analyse.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import pandas as pd
 5 | import os
 6 | 
 7 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data')
 8 | 
 9 | def load_data(filename, data_path=DATA_PATH, separator=','):
10 | 	csv_path = os.path.join(data_path, filename)
11 | 	return pd.read_csv(csv_path, sep=separator)
12 | 
13 | 
14 | if __name__ == "__main__":
15 | 
16 |     df = load_data ('weatherHistory_adj.csv')
17 | 
18 |     # param = 'Temperature'
19 |     param = 'Humidity'
20 | 
21 |     res = df.groupby(pd.Grouper(key='Month'))[param].agg([np.min, np.mean, np.max])
22 |     res = res.sort_values(by=['Month'])
23 |     print(res)
24 | 
25 |     error = [(res['amax']-res['mean']), (res['mean']-res['amin'])]
26 | 
27 |     res.plot(kind = "barh", y = "mean", legend = False, title = param, xerr=error)
28 |     plt.show()
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/weather_fuzzy_learning.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_weather
 2 | from fuzzy_system.fuzzy_learning_helper import split_train_test
 3 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 4 | 
 5 | def execute_test(resolution, x_n, y_n):
 6 | 
 7 |     X, y = load_weather()
 8 | 
 9 |     X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.2)
10 | 
11 |     learning_system = FuzzyLearningSystem(res=resolution)
12 | 
13 |     learning_system.fit(X_train, y_train, X_n=x_n, y_n=y_n)
14 | 
15 |     score = learning_system.score(X_test, y_test)
16 | 
17 |     print(learning_system)
18 | 
19 |     learning_system.generate_rules_csv('weather_rules.csv')
20 | 
21 |     return score
22 | 
23 | if __name__ == "__main__":
24 |     result = execute_test(1000,4,16)
25 |     print(result)
26 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/weather_preprocessing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | 
 4 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data')
 5 | 
 6 | def load_data(filename, data_path=DATA_PATH, separator=','):
 7 | 	csv_path = os.path.join(data_path, filename)
 8 | 	return pd.read_csv(csv_path, sep=separator)
 9 | 
10 | def save_data(data_frame, filename, data_path=DATA_PATH):
11 | 	csv_path = os.path.join(data_path, filename)
12 | 	return data_frame.to_csv(csv_path, float_format='%.3f', index=False)
13 | 
14 | def weather_dataset_preprocess():
15 | 
16 |     data_n = load_data ('weatherHistory.csv')
17 | 
18 |     # get month from date
19 |     data_n.loc[:, 'DateTime'] = pd.to_datetime(data_n['Formatted Date'], utc=True)
20 |     data_n['Month'] = data_n['DateTime'].dt.month
21 | 
22 |     data_n = data_n.drop(columns=[
23 |     'Formatted Date',
24 |     'DateTime',
25 |     'Summary',
26 |     'Precip Type',
27 |     'Apparent Temperature (C)',
28 |     'Loud Cover',
29 |     'Daily Summary',
30 |     "Wind Bearing (degrees)",
31 |     "Visibility (km)",
32 |     "Pressure (millibars)",
33 |     "Wind Speed (km/h)",
34 |     ])
35 | 
36 |     data_n = data_n.rename(columns={
37 |         "Temperature (C)":"Temperature",
38 |         # "Wind Speed (km/h)": "Wind Speed",
39 |         # "Wind Bearing (degrees)":"Wind Bearing",
40 |         # "Visibility (km)":"Visibility",
41 |         # "Pressure (millibars)":"Pressure"
42 |     })
43 | 
44 |     save_data(data_n,'weatherHistory_adj.csv')
45 | 
46 | def create_testing_sample():
47 | 
48 |     df = load_data('weatherHistory_adj.csv')
49 |     df_ret = df.head(1000)
50 |     save_data(df_ret,'weatherHistory_adj_test.csv')
51 | 
52 | if __name__ == "__main__":
53 |     weather_dataset_preprocess()
54 |     create_testing_sample()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/weather_preprocessing_humidity.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | 
 4 | 
 5 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data')
 6 | 
 7 | def load_data(filename, data_path=DATA_PATH, separator=','):
 8 | 	csv_path = os.path.join(data_path, filename)
 9 | 	return pd.read_csv(csv_path, sep=separator)
10 | 
11 | def save_data(data_frame, filename, data_path=DATA_PATH):
12 | 	csv_path = os.path.join(data_path, filename)
13 | 	return data_frame.to_csv(csv_path, float_format='%.3f', index=False)
14 | 
15 | def weather_dataset_preprocess():
16 | 
17 |     data_n = load_data ('weatherHistory.csv')
18 | 
19 |     # get month from date
20 |     data_n.loc[:, 'DateTime'] = pd.to_datetime(data_n['Formatted Date'], utc=True)
21 |     data_n['Month'] = data_n['DateTime'].dt.month
22 | 
23 | 
24 |     data_n = data_n.drop(columns=[
25 |     'Formatted Date',
26 |     'DateTime',
27 |     'Summary',
28 |     'Precip Type',
29 |     'Apparent Temperature (C)',
30 |     'Loud Cover',
31 |     'Daily Summary',
32 |     'Wind Bearing (degrees)',
33 |     'Visibility (km)',
34 |     'Pressure (millibars)',
35 |     'Daily Summary'
36 |     ])
37 | 
38 |     data_n = data_n.rename(columns={
39 |         "Temperature (C)":"Temperature",
40 |         'Wind Speed (km/h)':'Wind Speed',
41 |     })
42 | 
43 |     save_data(data_n,'weatherHistory_adj_humidity.csv')
44 | 
45 | def create_testing_sample():
46 |     df = load_data('weatherHistory_adj.csv')
47 |     df_ret = df.head(1000)
48 |     save_data(df_ret,'weatherHistory_adj_test.csv')
49 | 
50 | if __name__ == "__main__":
51 |     # create_testing_sample()
52 |     weather_dataset_preprocess()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/wine_dataset_analysis.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | DATA_PATH = os.path.join(os.path.dirname( __file__ ), 'data')
 7 | 
 8 | 
 9 | def load_data(filename, data_path=DATA_PATH, separator=';'):
10 | 	csv_path = os.path.join(data_path, filename)
11 | 	return pd.read_csv(csv_path, sep=separator)
12 | 
13 | def save_data(data_frame, filename, data_path=DATA_PATH):
14 | 	csv_path = os.path.join(data_path, filename)
15 | 	return data_frame.to_csv(csv_path, float_format='%.3f', index=False)
16 | 
17 | def inspect_data(data):
18 | 	# print first 10 rows
19 | 	print(data.head())
20 | 	# print datatypes
21 | 	print(data.info())
22 | 	# print min, max, mean, std  dev and percentiles
23 | 	print(data.describe())
24 | 	# plot histogram
25 | 	data.hist(bins=50)
26 | 	plt.show()
27 | 
28 | def _split_train_test(data, test_ratio, random_seed=42):
29 | 	np.random.seed(random_seed)
30 | 
31 | 	shuffled_indices = np.random.permutation(len(data))
32 | 	test_set_size = int(len(data) * test_ratio)
33 | 	test_indices = shuffled_indices[:test_set_size]
34 | 	train_indices = shuffled_indices[test_set_size:]
35 | 	return data.iloc[train_indices], data.iloc[test_indices]
36 | 
37 | def split_data(data):
38 | 	train_set, test_set = _split_train_test(data, 0.2)
39 | 	save_data(train_set, 'winequality-red_train.csv')
40 | 	save_data(test_set, 'winequality-red_test.csv')
41 | 	print(f'data count: {len(data)}')
42 | 	print(f'train set count: {len(train_set)}')
43 | 	print(f'test set count: {len(test_set)}')
44 | 
45 | def visualize(data):
46 | 	data.plot(kind='scatter', x='alcohol', y='citric acid', label='pH', figsize=(10,7), alpha=0.1, s=data['total sulfur dioxide'], c='quality',
47 | 	cmap=plt.get_cmap('jet'), colorbar=True)
48 | 	plt.show()
49 | 
50 | if __name__ == "__main__":
51 | 	from sklearn import preprocessing
52 | 
53 | 	data = load_data('winequality-red.csv')
54 | 
55 | 	# Get column names first
56 | 	names = data.columns
57 | 	# Create the Scaler object
58 | 	scaler = preprocessing.MinMaxScaler()
59 | 	# Fit your data on the scaler object
60 | 	scaled_df = scaler.fit_transform(data)
61 | 	scaled_df = pd.DataFrame(scaled_df, columns=names)
62 | 
63 | 	inspect_data(scaled_df)
64 | 	# split_data(data)
65 | 	# visualize(dataNorm)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/wine_fuzzy_learning.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_winequality_red
 2 | from fuzzy_system.fuzzy_learning_helper import split_train_test
 3 | from fuzzy_system.fuzzy_learning_system import FuzzyLearningSystem
 4 | import numpy as np
 5 | 
 6 | 
 7 | def execute_test(resolution, x_n, y_n):
 8 | 
 9 |     X, y = load_winequality_red()
10 | 
11 |     s = 'chlorides'
12 |     c = X[s]
13 |     c_log = np.log(c)
14 |     c_log[c_log < -3.5] = -3.5
15 |     c_log[c_log > -1.5] = -1.5
16 |     X[s] = c_log
17 | 
18 |     s='residual sugar'
19 |     c = X[s]
20 |     c_log = np.log(c)
21 |     c_log[c_log > 2] = 2
22 |     X[s] = c_log
23 | 
24 |     s = 'sulphates'
25 |     c = X[s]
26 |     c[c > 1.25] = 1.25
27 |     X[s] = c
28 | 
29 |     s = 'total sulfur dioxide'
30 |     c = X[s]
31 |     c_log = np.log(c)
32 |     X[s] = c_log
33 | 
34 | 
35 | 
36 |     X_train, X_test, y_train, y_test = split_train_test(X, y, test_size = 0.05)
37 |     # X_train = X
38 |     # X_test = X
39 |     # y_train = y
40 |     # y_test = y
41 | 
42 |     learning_system = FuzzyLearningSystem(res=resolution)
43 | 
44 |     learning_system.fit(X_train, y_train, X_n=x_n, y_n=y_n)
45 | 
46 |     score = learning_system.score(X_test, y_test)
47 | 
48 |     return score
49 | 
50 | 
51 | size = 10
52 | results = np.zeros((size,size))
53 | init_val = 11
54 | for x in range (init_val,init_val+size):
55 |     for y in range (init_val,init_val+size):
56 | 
57 |       results [x-init_val,y-init_val] =  execute_test(1000, x, y)
58 | 
59 | #results.tofile('results.standardscaler.csv', sep=',')
60 | print(results)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/fuzzy_learning/wine_scaling.py:
--------------------------------------------------------------------------------
 1 | from fuzzy_system.fuzzy_learning_helper import load_winequality_red
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | X, y = load_winequality_red()
 6 | 
 7 | s = 'chlorides'
 8 | c = X[s]
 9 | c_log = np.log(c)
10 | c_log[c_log < -3.5] = -3.5
11 | c_log[c_log > -1.5] = -1.5
12 | X[s] = c_log
13 | 
14 | s='residual sugar'
15 | c = X[s]
16 | c_log = np.log(c)
17 | c_log[c_log > 2] = 2
18 | X[s] = c_log
19 | 
20 | s = 'sulphates'
21 | c = X[s]
22 | c[c > 1.25] = 1.25
23 | X[s] = c
24 | 
25 | s = 'total sulfur dioxide'
26 | c = X[s]
27 | c_log = np.log(c)
28 | X[s] = c_log
29 | 
30 | 
31 | X.hist(bins=50)
32 | plt.show()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/gan/__pycache__/discriminator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/gan/__pycache__/discriminator.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/gan/__pycache__/generator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/gan/__pycache__/generator.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/gan/discriminator.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class Discriminator(nn.Module):
 8 |     
 9 |     def __init__(self):
10 |         """
11 |         Initialize the discriminator network.
12 | 
13 |         The network consists of three layers of fully connected (dense) layers.
14 |         The output of the network is a probability that the input is real.
15 |         """
16 |         super(Discriminator, self).__init__()
17 |         self.model = nn.Sequential(
18 |             nn.Linear(784, 512),
19 |             nn.LeakyReLU(0.2),
20 |             nn.Linear(512, 256),
21 |             nn.LeakyReLU(0.2),
22 |             nn.Linear(256, 1),
23 |             nn.Sigmoid()  # Output a probability
24 |         )
25 | 
26 |     def forward(self, img:torch.tensor) -> torch.tensor:
27 |         """
28 |         Forward pass of the discriminator network.
29 | 
30 |         Parameters
31 |         ----------
32 |         img : torch.tensor
33 |             The input image to the discriminator network.
34 | 
35 |         Returns
36 |         -------
37 |         validity : torch.tensor
38 |             The probability that the input image is real.
39 |         """
40 |         img_flat = img.view(img.size(0), -1)  # Flatten the image
41 |         validity = self.model(img_flat)
42 |         return validity
43 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/gan/gan.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torchvision.datasets as datasets
  5 | from torch.utils.data import DataLoader
  6 | from torchvision import transforms
  7 | import torchvision
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from generator import Generator
 11 | from discriminator import Discriminator
 12 | 
 13 | # Hyperparameters
 14 | latent_dim = 100
 15 | lr = 0.0002
 16 | batch_size = 64
 17 | epochs = 200
 18 | 
 19 | # Device configuration (GPU if available)
 20 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 21 | 
 22 | transform = transforms.Compose([
 23 |     transforms.ToTensor(),
 24 |     transforms.Normalize([0.5], [0.5])  # Normalize images to [-1, 1]
 25 | ])
 26 | 
 27 | train_data = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
 28 | train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
 29 | 
 30 | 
 31 | generator = Generator(latent_dim).to(device)
 32 | discriminator = Discriminator().to(device)
 33 | 
 34 | optimizer_G = optim.Adam(generator.parameters(), lr=lr)
 35 | optimizer_D = optim.Adam(discriminator.parameters(), lr=lr)
 36 | 
 37 | criterion = nn.BCELoss()  # Binary Cross Entropy Loss
 38 | 
 39 | generator = Generator(latent_dim).to(device)
 40 | discriminator = Discriminator().to(device)
 41 | 
 42 | optimizer_G = optim.Adam(generator.parameters(), lr=lr)
 43 | optimizer_D = optim.Adam(discriminator.parameters(), lr=lr)
 44 | 
 45 | criterion = nn.BCELoss()  # Binary Cross Entropy Loss
 46 | 
 47 | for epoch in range(epochs):
 48 |     for i, (imgs, _) in enumerate(train_loader):
 49 |         
 50 |         # Ground truths
 51 |         real = torch.ones(imgs.size(0), 1).to(device)
 52 |         fake = torch.zeros(imgs.size(0), 1).to(device)
 53 | 
 54 |         # ---------------------
 55 |         #  Train Discriminator
 56 |         # ---------------------
 57 | 
 58 |         optimizer_D.zero_grad()
 59 | 
 60 |         # Real images
 61 |         real_imgs = imgs.to(device)
 62 |         real_loss = criterion(discriminator(real_imgs), real)
 63 | 
 64 |         # Fake images
 65 |         z = torch.randn(imgs.size(0), latent_dim).to(device)
 66 |         fake_imgs = generator(z)
 67 |         fake_loss = criterion(discriminator(fake_imgs), fake)
 68 | 
 69 |         # Total loss for discriminator
 70 |         d_loss = real_loss + fake_loss
 71 |         d_loss.backward()
 72 |         optimizer_D.step()
 73 | 
 74 |         # -----------------
 75 |         #  Train Generator
 76 |         # -----------------
 77 | 
 78 |         optimizer_G.zero_grad()
 79 | 
 80 |         # Generate fake images
 81 |         z = torch.randn(imgs.size(0), latent_dim).to(device)
 82 |         fake_imgs = generator(z)
 83 | 
 84 |         # The generator wants the discriminator to think these images are real
 85 |         g_loss = criterion(discriminator(fake_imgs), real)
 86 | 
 87 |         g_loss.backward()
 88 |         optimizer_G.step()
 89 | 
 90 |         # Print progress
 91 |         if i % 200 == 0:
 92 |             print(f"Epoch [{epoch}/{epochs}] Batch {i}/{len(train_loader)} \
 93 |                   Loss D: {d_loss.item():.4f}, loss G: {g_loss.item():.4f}")
 94 | 
 95 |     # Save generated samples for visualization every few epochs
 96 |     if epoch % 10 == 0:
 97 |         with torch.no_grad():
 98 |             z = torch.randn(16, latent_dim).to(device)
 99 |             generated_imgs = generator(z).cpu().view(-1, 1, 28, 28)
100 |             grid_img = torchvision.utils.make_grid(generated_imgs, nrow=4, normalize=True)
101 |             plt.imshow(grid_img.permute(1, 2, 0))
102 |             plt.show()
103 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/gan/generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Generator(nn.Module):
 6 |     def __init__(self, latent_dim):
 7 |         super(Generator, self).__init__()
 8 |         self.model = nn.Sequential(
 9 |             nn.Linear(latent_dim, 128),
10 |             nn.LeakyReLU(0.2),
11 |             nn.Linear(128, 256),
12 |             nn.BatchNorm1d(256),
13 |             nn.LeakyReLU(0.2),
14 |             nn.Linear(256, 512),
15 |             nn.BatchNorm1d(512),
16 |             nn.LeakyReLU(0.2),
17 |             nn.Linear(512, 784),  # 28x28=784
18 |             nn.Tanh()  # Normalize the output to [-1, 1]
19 |         )
20 | 
21 |     def forward(self, z:torch.tensor) -> torch.tensor:
22 |         """
23 |         Forward pass of the generator network.
24 | 
25 |         Parameters
26 |         ----------
27 |         z : torch.tensor
28 |             The input latent vector to the generator network.
29 | 
30 |         Returns
31 |         -------
32 |         img : torch.tensor
33 |             The generated image, reshaped to 28x28 for MNIST.
34 |         """
35 |         img = self.model(z)
36 |         img = img.view(img.size(0), 1, 28, 28)  # Reshape to 28x28 for MNIST
37 |         return img
38 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3/__pycache__/id3_classifier.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/id3/__pycache__/id3_classifier.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3/id3_exec.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | id3_exe.py
 3 | execution of ID3 example
 4 | '''
 5 | import os
 6 | import pandas as pd
 7 | import numpy as np
 8 | from id3_classifier import ID3Classifier
 9 | 
10 | DIR_PATH = os.path.dirname(os.path.realpath(__file__))
11 | DF = pd.read_csv(DIR_PATH + '\\weather.csv')
12 | df_copy = DF.copy()
13 | 
14 | #
15 | # preprocessing
16 | #
17 | 
18 | # discretize the TEMP attribute
19 | df_copy.loc[(DF['TEMP'] <= 69), 'TEMP'] = 'Cold'
20 | df_copy.loc[(DF['TEMP'] > 69), 'TEMP'] = 'Medium'
21 | df_copy.loc[(DF['TEMP'] >= 79), 'TEMP'] = 'Hot'
22 | 
23 | # discretize the HUMIDITY attribute
24 | df_copy.loc[(DF['HUMIDITY'] <= 80), 'HUMIDITY'] = 'Normal'
25 | df_copy.loc[(DF['HUMIDITY'] > 80), 'HUMIDITY'] = 'High'
26 | 
27 | # remove the DAY column
28 | df_copy.drop(columns=['DAY'], axis=1, inplace=True)
29 | 
30 | RESULTS = np.array(df_copy['PLAY'])
31 | df_copy.drop('PLAY', axis=1, inplace=True)
32 | 
33 | NODE_NAMES = list(df_copy.columns.values)
34 | NODE = np.array(df_copy[NODE_NAMES])
35 | 
36 | # classify
37 | classifier = ID3Classifier()
38 | classifier.id3_compute(NODE_NAMES, NODE, RESULTS)
39 | classifier.display_tree()
40 | 
41 | # test
42 | case = {'WEATHER': 'Sunny','TEMP':'Cold','HUMIDITY':'High','WIND':'Weak'}
43 | result = classifier.infer(case)
44 | print('Result with', case, 'is: ', result)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3/readme.md:
--------------------------------------------------------------------------------
1 | # Implementation of ID3 algorithm
2 | 
3 | ## Files
4 | 
5 |  - simple_tree.py: implementation of a tree structure
6 |  - id3_classifier.py: implementation of ID3
7 |  - tree_exec.py: execution test for tree
8 |  - id3_exec.py: execution test for id2
9 |  - weather.csv: data to be classified


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3/tree_exec.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | simple example demonstrating the operation of the SimpleTree
 3 | 
 4 | 1. creates a tree with nodes and edges
 5 | 2. creates another tree with nodes and edges
 6 | 3. appends the second tree to the forst tree 
 7 | '''
 8 | from simple_tree import SimpleTree
 9 | 
10 | a_tree = SimpleTree()
11 | a_tree.add_node('One')
12 | a_tree.add_node('Two')
13 | a_tree.add_node('Three')
14 | a_tree.add_node('Four')
15 | a_tree.add_edge('One', 'Three', 'No')
16 | a_tree.add_node('Five')
17 | a_tree.add_edge('One', 'Two', 'Yes')
18 | a_tree.add_edge('Two', 'Four', 'Yes')
19 | a_tree.add_edge('Four', 'Five', 'Yes')
20 | a_tree.add_node('Six')
21 | a_tree.add_edge('Four', 'Six', 'No')
22 | 
23 | a_tree.set_root_node('One')
24 | a_tree.display()
25 | 
26 | b_tree = SimpleTree()
27 | b_tree.add_node('Uno')
28 | b_tree.add_node('Due')
29 | b_tree.add_node('Tre')
30 | b_tree.add_node('Quattro')
31 | b_tree.add_edge('Uno', 'Due', 'Si')
32 | b_tree.add_edge('Uno', 'Tre', 'No')
33 | b_tree.add_edge('Tre', 'Quattro', 'Si')
34 | b_tree.set_root_node('Uno')
35 | b_tree.display()
36 | 
37 | a_tree.append_tree('Five', b_tree, 'No')
38 | a_tree.display()
39 | 
40 | 
41 | 
42 | 
43 | # a_tree = SimpleTree()
44 | # a_tree.add_node('One')
45 | # a_tree.add_node('Two')
46 | # a_tree.add_node('Three')
47 | # a_tree.add_node('Four')
48 | # a_tree.add_edge('One', 'Three', 'No')
49 | 
50 | 
51 | # b_tree = SimpleTree()
52 | # b_tree.add_node('Uno')
53 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3/weather.csv:
--------------------------------------------------------------------------------
 1 | DAY,WEATHER,TEMP,HUMIDITY,WIND,PLAY
 2 | 1,Sunny,85,85,Weak,No
 3 | 2,Sunny,80,90,Strong,No
 4 | 3,Cloudy,83,90,Weak,Yes
 5 | 4,Rainy,70,96,Weak,Yes
 6 | 5,Rainy,68,80,Weak,Yes
 7 | 6,Rainy,65,70,Strong,No
 8 | 7,Cloudy,64,65,Strong,Yes
 9 | 8,Sunny,72,95,Weak,No
10 | 9,Sunny,69,70,Weak,Yes
11 | 10,Rainy,75,80,Weak,Yes
12 | 11,Sunny,75,70,Strong,Yes
13 | 12,Cloudy,72,90,Strong,Yes
14 | 13,Cloudy,81,75,Weak,Yes
15 | 14,Rainy,71,85,Strong,No
16 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/data.csv:
--------------------------------------------------------------------------------
 1 | outlook,temperature,humidity,windy,play
 2 | sunny,hot,high,weak,no
 3 | sunny,hot,high,strong,no
 4 | overcast,hot,high,weak,yes
 5 | rainy,mild,high,weak,yes
 6 | rainy,cool,normal,weak,yes
 7 | rainy,cool,normal,strong,no
 8 | overcast,cool,normal,strong,yes
 9 | sunny,mild,high,weak,no
10 | sunny,cool,normal,weak,yes
11 | rainy,mild,normal,weak,yes
12 | sunny,mild,normal,strong,yes
13 | overcast,mild,high,strong,yes
14 | overcast,hot,normal,weak,yes
15 | rainy,mild,high,strong,no


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/results_analysis.txt:
--------------------------------------------------------------------------------
 1 |             'temp',     'outlook',  'no',       'yes',      'humid',    'windy',    'maybe'
 2 | 
 3 | 'temp'      0           'cool'      0           0           'mild'      0           0
 4 | 'outlook'   'cool'      0           'sunny'     'sunny'     'high'      'rainy'     'sunny'
 5 | 'no'        0           'sunny'     'no'        0           0           True        0
 6 | 'yes'       0           'sunny'     0           'yes'       'normal'    0           0
 7 | 'humid'     'mild'      'high'      0           'normal'    0           0           0
 8 | 'windy'     0           'rainy'     True        0           0           0           True
 9 | 'maybe'     0           'sunny'     0           0           0           True        'maybe'
10 | 
11 | 
12 | 
13 | temperature
14 | 
15 | 
16 | 
17 | 0           'sunny'         0           'overcast'          'rainy'
18 | 'sunny'     0               'high'      'normal'            0
19 | 0           'high'          0           0                   True
20 | 'overcast'  'normal'        0           0                   0
21 | 'rainy'     0               True        0                   0


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_4_workout.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/id3_version2/sample_4_workout.xlsx


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_results_1.txt:
--------------------------------------------------------------------------------
 1 | *  B
 2 | *                 -  ['b1', 'b3', 's1', 's2', 'b2', 's4']  -  A
 3 | *                                 -  [['s2']]  -  b14
 4 | *                                 -  [['s1'], ['s2']]  -  b12
 5 | *                                 -  [['s1']]  -  ce
 6 | *                                 -  [['s2'], ['s2'], ['s2']]  -  b2
 7 | *                                 -  [['s1'], ['s1']]  -  b5
 8 | *                                 -  [['s2']]  -  b6
 9 | *                                 -  [['s1']]  -  b7
10 | *                                 -  [['s1']]  -  s2
11 | *                 -  ['ce']  -  b12
12 | *                 -  ['s3']  -  s2
13 | *                 -  ['b4']  -  s1
14 | [
15 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s2']]", 'output is b14'],                  <--- sample_results_2
16 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s1'], ['s2']]", 'output is b12'], 
17 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s1']]", 'output is ce'], 
18 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s2'], ['s2'], ['s2']]", 'output is b2'],   <--- sample_results_1 [_2]
19 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s1'], ['s1']]", 'output is b5'], 
20 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s2']]", 'output is b6'],                   <--- sample_results_1 [_2]
21 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s1']]", 'output is b7'], 
22 | ["B is  ['b1', 'b3', 's1', 's2', 'b2', 's4']", "A is  [['s1']]", 'output is s2'], 
23 | ["B is  ['ce']", 'output is b12'], 
24 | ["B is  ['s3']", 'output is s2'], 
25 | ["B is  ['b4']", 'output is s1']]
26 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_results_2.txt:
--------------------------------------------------------------------------------
1 | 
2 | *  B
3 | *                 -  ['s2']  -  b6
4 | *                 -  ['s1', 'b3', 's4']  -  b2
5 | [
6 | ["B is  ['s2']", 'output is b6'], 
7 | ["B is  ['s1', 'b3', 's4']", 'output is b2']]


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_results_3.txt:
--------------------------------------------------------------------------------
1 | *  B
2 | *                 -  ['b1']  -  b14
3 | *                 -  ['s2']  -  b6
4 | *                 -  ['s1', 'b3', 's4']  -  b2
5 | [
6 | ["B is  ['b1']", 'output is b14'], 
7 | ["B is  ['s2']", 'output is b6'], 
8 | ["B is  ['s1', 'b3', 's4']", 'output is b2']]


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_results_4.txt:
--------------------------------------------------------------------------------
 1 | *  B
 2 | *                 -  ['b1']  -  A
 3 | *                                 -  [['s2']]  -  b14
 4 | *                                 -  [['s1']]  -  b12
 5 | *                 -  ['ce', 'b2']  -  b12
 6 | *                 -  ['s2']  -  b6
 7 | *                 -  ['s1', 'b3', 's4']  -  b2
 8 | 
 9 | [
10 | ["B is  ['b1']", "A is  [['s2']]", 'output is b14'], 
11 | ["B is  ['b1']", "A is  [['s1']]", 'output is b12'], 
12 | ["B is  ['ce', 'b2']", 'output is b12'],
13 | ["B is  ['s2']", 'output is b6'], 
14 | ["B is  ['s1', 'b3', 's4']", 'output is b2']]


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_results_5.txt:
--------------------------------------------------------------------------------
 1 | *  B
 2 | *                 -  ['ce', 'b1']  -  b12
 3 | *                 -  ['s2']  -  b6
 4 | *                 -  ['b2', 's4']  -  A
 5 | *                                 -  [['s2']]  -  b12
 6 | *                                 -  [['s1']]  -  b7
 7 | *                                 -  [['s2']]  -  b2
 8 | *                                 -  [['s1']]  -  s2
 9 | *                 -  ['s1', 'b3']  -  b2
10 | *                 -  ['s3']  -  s2
11 | [
12 | ["B is  ['ce', 'b1']", 'output is b12'], 
13 | ["B is  ['s2']", 'output is b6'], 
14 | ["B is  ['b2', 's4']", "A is  [['s2']]", 'output is b12'], 
15 | ["B is  ['b2', 's4']", "A is  [['s1']]", 'output is b7'], 
16 | ["B is  ['b2', 's4']", "A is  [['s2']]", 'output is b2'], 
17 | ["B is  ['b2', 's4']", "A is  [['s1']]", 'output is s2'], 
18 | ["B is  ['s1', 'b3']", 'output is b2'], ["B is  ['s3']", 'output is s2']]


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_rules.csv:
--------------------------------------------------------------------------------
 1 | A,B,X
 2 | s2,b1,b14
 3 | s1,ce,b12
 4 | s1,b3,ce
 5 | s1,s1,b5
 6 | s1,b1,b12
 7 | s1,s2,b5
 8 | s2,s2,b6
 9 | s1,b2,b7
10 | s2,s1,b2
11 | s2,ce,b12
12 | s1,s3,s2
13 | s2,s3,s2
14 | s2,b2,b12
15 | s2,b3,b2
16 | s1,s4,s2
17 | s1,b4,s1
18 | s2,s4,b2
19 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_rules_1.csv:
--------------------------------------------------------------------------------
1 | A,B,X
2 | s2,s2,b6
3 | s2,s1,b2
4 | s2,b3,b2
5 | s2,s4,b2
6 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_rules_2.csv:
--------------------------------------------------------------------------------
1 | A,B,X
2 | s2,b1,b14
3 | s2,s2,b6
4 | s2,s1,b2
5 | s2,b3,b2
6 | s2,s4,b2
7 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_rules_3.csv:
--------------------------------------------------------------------------------
 1 | A,B,X
 2 | s2,b1,b14
 3 | s1,ce,b12
 4 | s1,b1,b12
 5 | s2,s2,b6
 6 | s2,s1,b2
 7 | s2,ce,b12
 8 | s2,b2,b12
 9 | s2,b3,b2
10 | s2,s4,b2
11 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sample_rules_4.csv:
--------------------------------------------------------------------------------
 1 | A,B,X
 2 | s1,ce,b12
 3 | s1,b1,b12
 4 | s2,s2,b6
 5 | s1,b2,b7
 6 | s2,s1,b2
 7 | s2,ce,b12
 8 | s1,s3,s2
 9 | s2,s3,s2
10 | s2,b2,b12
11 | s2,b3,b2
12 | s1,s4,s2
13 | s2,s4,b2
14 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/sensor_rules.csv:
--------------------------------------------------------------------------------
 1 | x,Y
 2 | s5,s2
 3 | s4,s2
 4 | s3,s2
 5 | s2,s2
 6 | s1,s2
 7 | ce,s2
 8 | b1,s1
 9 | b2,s1
10 | b3,ce
11 | b4,ce
12 | b5,b2
13 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/weather_rules.csv:
--------------------------------------------------------------------------------
 1 | Humidity,Month,Temperature
 2 | b2,b2,b4
 3 | ce,ce,b6
 4 | s2,b1,b14
 5 | b3,s3,s7
 6 | b4,s3,s3
 7 | b4,b3,s3
 8 | b2,s3,s2
 9 | b4,s1,b1
10 | s1,ce,b12
11 | ce,s1,b5
12 | b4,ce,b3
13 | b3,ce,b4
14 | b3,b1,b3
15 | ce,b3,s1
16 | b1,s3,ce
17 | b3,b4,s7
18 | b2,s4,s5
19 | b3,b2,b3
20 | b3,s4,s5
21 | b1,b1,b8
22 | b3,b3,b1
23 | b2,b1,b4
24 | b2,ce,b3
25 | b4,s2,s1
26 | b2,s1,b5
27 | b2,b3,s1
28 | s1,b3,ce
29 | b4,s4,s3
30 | b3,s1,b3
31 | b1,b2,b3
32 | ce,s2,b2
33 | b1,s2,s1
34 | b1,s1,b7
35 | b2,b4,s1
36 | s1,s1,b5
37 | s1,b1,b12
38 | s1,s2,b5
39 | b1,ce,b7
40 | s2,s2,b6
41 | s1,b2,b7
42 | b1,b3,b1
43 | ce,b1,b8
44 | b4,b4,s4
45 | s2,s1,b2
46 | b4,b2,ce
47 | b3,s2,ce
48 | b1,s4,b1
49 | ce,b2,b5
50 | b1,b4,b1
51 | ce,s3,s1
52 | s2,ce,b12
53 | s1,s3,s2
54 | b2,s2,s2
55 | s2,s3,s2
56 | s2,b2,b12
57 | s3,ce,b16
58 | s2,b3,b2
59 | b4,b1,b1
60 | ce,s4,s1
61 | ce,b4,s1
62 | s1,s4,s2
63 | s4,s4,s4
64 | s3,b1,b15
65 | s3,s3,b3
66 | s1,b4,s1
67 | s3,b3,s3
68 | s2,s4,b2
69 | s4,s3,s11
70 | s3,s2,b6
71 | s4,b4,s12
72 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/id3_version2/weather_rules_1.csv:
--------------------------------------------------------------------------------
1 | Humidity,Month,Temperature
2 | ce,ce,b6
3 | ce,s2,b2
4 | s2,s2,b6
5 | s2,s1,b2
6 | s2,b3,b2
7 | s2,s4,b2
8 | s3,s2,b6
9 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/k-means/k-means.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/k-means/k-means.xlsx


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/linear_regression/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/multifeature_batch_gd.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import matplotlib
  3 | matplotlib.rcParams['text.usetex'] = True
  4 | import matplotlib.pyplot as plt
  5 | import pandas as pd
  6 | import sys
  7 | import numpy as np
  8 | 
  9 | 
 10 | def multifeature_gradient_descent(
 11 |     filename,
 12 |     alpha=0.0023,
 13 |     epochs_threshold=100000,
 14 |     costdifference_threshold=0.00001,
 15 |     plot=False):
 16 | 
 17 |     X = None
 18 |     Y = None
 19 |     beta = None
 20 | 
 21 |     training_data = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
 22 | 
 23 |     Y = training_data['y'].to_numpy()
 24 |     
 25 |     m = len(Y)
 26 | 
 27 |     X = training_data.drop(['y'], axis=1).to_numpy()
 28 |     
 29 |     # add a column of ones to the X matrix to account for the intercept, a0
 30 |     X = np.insert(X, 0, 1, axis=1)
 31 | 
 32 |     y_hat = np.zeros(len(Y))
 33 | 
 34 |     # beta will hold the values of the coefficients
 35 |     beta = np.array([5.0, 3.0, 1.0])
 36 | 
 37 |     epochs = 0
 38 | 
 39 |     # initialize the previous cost function value to a large number
 40 |     previous_cost = sys.float_info.max
 41 | 
 42 |     # store the cost function and a2 values for plotting
 43 |     costs = []
 44 |     a_2s = []
 45 |     
 46 |     while True:
 47 |         # calculate the hypothesis function for all training data
 48 |         y_hat = np.dot(beta, X.T)
 49 | 
 50 |         #  calculate the residuals
 51 |         residuals = y_hat - Y
 52 | 
 53 |         # calculate the new value of beta
 54 |         beta -= (alpha/m) * np.dot(residuals, X)
 55 | 
 56 |         # calculate the cost function
 57 |         cost = np.dot(residuals, residuals)/(2 * m)
 58 | 
 59 |         # increase the number of iterations
 60 |         epochs += 1
 61 | 
 62 |         # record the cost and a1 values for plotting
 63 |         costs.append(cost)
 64 |         a_2s.append(beta[2])
 65 |         
 66 |         cost_difference = previous_cost - cost
 67 | 
 68 |         # undelete this line to see details
 69 |         # print(f'Epoch: {iterations}, cost: {cost:.3f}, beta: {beta}')
 70 |         previous_cost = cost
 71 | 
 72 |         # check if the cost function is diverging, if so, break
 73 |         if cost_difference < 0:
 74 |             print(f'Cost function is diverging. Stopping training.')
 75 |             break
 76 |         
 77 |         # check if the cost function is close enough to 0, if so, break or if the number of 
 78 |         # iterations is greater than the threshold, break
 79 |         if abs(cost_difference) < costdifference_threshold or epochs > epochs_threshold:
 80 |             break
 81 |     
 82 |     if plot:
 83 |         # plot the cost function and a1 values
 84 |         plt.plot(a_2s[3:], costs[3:], '--bx', color='lightblue', mec='red')
 85 |         plt.xlabel('a2')
 86 |         plt.ylabel('cost')
 87 |         plt.title(r'Cost Function vs. a1, with $\alpha$ =' + str(alpha))
 88 |         plt.show()
 89 | 
 90 |     return beta, epochs, cost
 91 | 
 92 | if __name__ == '__main__':
 93 | 
 94 |     from timeit import default_timer as timer
 95 | 
 96 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
 97 |     alpha = 0.0023
 98 |     epochs_threshold = 100000
 99 |     costdifference_threshold = 0.00001
100 |     plot = False
101 | 
102 |     start = timer()
103 |     beta, epochs, cost = multifeature_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
104 |     end = timer()
105 |     print(f'Time: {end - start}, beta: {beta}, epochs: {epochs}, cost: {cost}')
106 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/twofeature_batch_gd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import matplotlib
 3 | matplotlib.rcParams['text.usetex'] = True
 4 | import matplotlib.pyplot as plt
 5 | import pandas as pd
 6 | import sys
 7 | 
 8 | def two_feature_gradient_descent(filename, alpha=0.0023, epochs_threshold=100000, costdifference_threshold=0.00001, plot=False):
 9 |     '''
10 |     Batch gradient descent for a two feature linear regression problem.
11 |     This algorithm does not use any vectorization
12 |     '''
13 | 
14 |     # initialize the coefficients
15 |     a0 = 5
16 |     a1 = 3
17 |     a2 = 1
18 | 
19 |     data_set = None
20 |     data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
21 |     m = len(data_set)
22 |     epoch = 0
23 | 
24 |     previous_cost = sys.float_info.max
25 | 
26 |     while True:
27 |         # calculate the hypothesis function for all training data
28 |         data_set['y_hat'] = a0 + (a1 * data_set['x1']) + (a2 * data_set['x2'])
29 | 
30 |         # calculate the difference between the hypothesis function and the
31 |         # actual y value for all training data
32 |         data_set['y_hat-y'] = data_set['y_hat'] - data_set['y']
33 | 
34 |         # multiply the difference by the x value for all training data
35 |         data_set['y-hat-y.x1'] = data_set['y_hat-y'] * data_set['x1']
36 |         data_set['y-hat-y.x2'] = data_set['y_hat-y'] * data_set['x2']
37 | 
38 |         # square the difference for all training data
39 |         data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2
40 | 
41 |         # update the a0 and a1 values
42 |         a0 -= (alpha * (1/m) * sum(data_set['y_hat-y']))
43 |         a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x1']))
44 |         a2 -= (alpha * (1/m) * sum(data_set['y-hat-y.x2']))
45 | 
46 |         # calculate the cost function
47 |         cost = sum(data_set['y-hat-y_sq']) / (2 * m)
48 |         epoch += 1
49 | 
50 |         # check if the cost function has converged
51 |         cost_difference = previous_cost - cost
52 |         # print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}')
53 |         previous_cost = cost
54 | 
55 |         # check if the cost function is diverging, if so, break
56 |         if cost_difference < 0:
57 |             print(f'Cost function is diverging. Stopping training.')
58 |             break
59 | 
60 |         # check if the cost function is close enough to 0, if so, break or if the number of 
61 |         # iterations is greater than the threshold, break
62 |         if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold:
63 |             break
64 | 
65 |     return a0, a1, a2, epoch, cost
66 | 
67 | if __name__ == '__main__':
68 | 
69 |     from timeit import default_timer as timer
70 | 
71 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
72 |     alpha = 0.0023
73 |     epochs_threshold = 100000
74 |     costdifference_threshold = 0.00001
75 |     plot = False
76 | 
77 |     start = timer()
78 |     a0, a1, a2, epochs, cost = two_feature_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
79 |     end = timer()
80 |     print(f'Time: {end - start}, a0: {a0}, a1: {a1}, a2: {a2} epochs: {epochs}, cost: {cost}')
81 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_nv.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import sys
 4 | import numpy
 5 | import matplotlib
 6 | matplotlib.rcParams['text.usetex'] = True
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | def gradient_descent(filename, alpha=0.0023, epochs_threshold=100000, costdifference_threshold=0.00001, plot=False):
10 | 
11 |     # initialize coefficient values
12 |     a0 = -5
13 |     a1 = -3
14 |     previous_cost = sys.float_info.max
15 | 
16 |     data_set = numpy.loadtxt(filename, delimiter=',', skiprows=1)
17 | 
18 |     epoch = 1
19 |     cost = 0
20 |     
21 |     costs = []
22 |     a_1s = []
23 | 
24 |     data_count = len(data_set)
25 | 
26 |     while True:
27 | 
28 |         sum_a0 = 0.0
29 |         sum_a1 = 0.0
30 |         sum_cost = 0.0
31 |         cost = 0.0
32 | 
33 |         for idx in range(0, data_count):
34 |             y_value = data_set[idx][1]
35 |             x_value = data_set[idx][0]
36 | 
37 |             y_hat = a0 + (a1 * x_value)
38 | 
39 |             sum_a0 += (y_hat - y_value)
40 |             sum_a1 += ((y_hat - y_value) * x_value)
41 |             sum_cost += pow((y_hat - y_value), 2)
42 | 
43 |         a0 -= ((alpha * sum_a0) / data_count)
44 |         a1 -= ((alpha * sum_a1) / data_count)
45 |         
46 |         cost = ((1 / (2 * data_count)) * sum_cost)
47 | 
48 |         epoch += 1
49 |         
50 |         costs.append(cost)
51 |         a_1s.append(a1)
52 |         
53 |         if (previous_cost - cost) < costdifference_threshold or epoch > epochs_threshold:
54 |             print(f'Cost Function: {cost}')
55 |             print(f'Epoch: {epoch}')
56 |             break
57 |         else:
58 |             previous_cost = cost
59 |     
60 |     if plot:
61 |         plt.plot(a_1s[:], costs[:], '--bx', color='lightblue', mec='red')
62 |         plt.xlabel('a1')
63 |         plt.ylabel('cost')
64 |         plt.title(r'Cost Function vs. a1, with $\alpha$ =' + str(alpha))
65 |         plt.show()
66 |         
67 |     return a0, a1
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     
72 |     current_directory = os.path.dirname(__file__)
73 |     filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f.csv')
74 |     alpha = 0.00023
75 |     epochs_threshold = 100000
76 |     costdifference_threshold = 0.00001
77 |     plot = True
78 |     
79 |     a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
80 |     print(f'a0: {a0}, a1: {a1}')


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_v.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #pylint: disable = E0401
 4 | import os
 5 | import pandas as pd
 6 | import sys
 7 | 
 8 | from algorithms.linear_regression.univariate_gd_analysis import plot_univariate_gd_analysis
 9 | 
10 | 
11 | 
12 | 
13 | def gradient_descent(
14 |     filename,
15 |     alpha=0.0023,
16 |     epochs_threshold=100000,
17 |     costdifference_threshold=0.0001,
18 |     plot=False):
19 | 
20 |     a0 = 130
21 |     a1 = 19
22 | 
23 |     a0_prev = a0
24 |     a1_prev = a1
25 | 
26 |     data_set = pd.read_csv(filename, delimiter=',', index_col=False)
27 | 
28 |     m = len(data_set)
29 |     epoch = 0
30 | 
31 |     previous_cost = sys.float_info.max
32 |     gd_data = []
33 | 
34 | 
35 |     while True:
36 |         # calculate the hypothesis function for all training data
37 |         data_set['y_hat'] = a0 + (a1 * data_set['x'])
38 | 
39 |         # calculate the difference between the hypothesis function and the
40 |         # actual y value for all training data
41 |         data_set['y_hat-y'] = data_set['y_hat'] - data_set['y']
42 | 
43 |         # multiply the difference by the x value for all training data
44 |         data_set['y-hat-y.x'] = data_set['y_hat-y'] * data_set['x']
45 | 
46 |         # square the difference for all training data
47 |         data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2
48 | 
49 |         # update the a0 and a1 values
50 |         a0 -= (alpha * (1/m) * sum(data_set['y_hat-y']))
51 |         a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x']))
52 | 
53 |         # calculate the cost function
54 |         cost = sum(data_set['y-hat-y_sq']) / (2 * m)
55 |         epoch += 1
56 | 
57 |         plot_threshold = 0.001
58 |         if abs(a0_prev - a0) > plot_threshold and abs(a1_prev - a1) > plot_threshold:
59 |             gd_data.append((a0_prev, a1_prev, cost))
60 |             a0_prev = a0
61 |             a1_prev = a1
62 | 
63 |         cost_difference = previous_cost - cost
64 |         print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}')
65 |         previous_cost = cost
66 | 
67 |         # check if the cost function is diverging, if so, break
68 |         if cost_difference < 0:
69 |             print(f'Cost function is diverging. Stopping training.')
70 |             break
71 | 
72 |         # check if the cost function is close enough to 0, if so, break or if the number of 
73 |         # iterations is greater than the threshold, break
74 |         if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold:
75 |             gd_data.append((a0_prev, a1_prev, cost))
76 |             break
77 | 
78 |     if plot:
79 |         plot_univariate_gd_analysis(
80 |         file=filename,
81 |         a0_range=(125,175,0.5),
82 |         a1_range=(18,22,0.5),
83 |         gd_points = gd_data
84 |         )
85 |     return a0, a1
86 | 
87 | if __name__ == '__main__':
88 | 
89 |     current_directory = os.path.dirname(__file__)
90 |     filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f.csv')
91 |     alpha = 0.0004
92 |     # alpha = 0.00056
93 |     epochs_threshold = 100000
94 |     costdifference_threshold = 0.00001
95 |     plot = True
96 | 
97 |     a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
98 |     print(f'a0: {a0:.3f}, a1: {a1:.3f}')
99 |     


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/batch_gradient_descent/uni_batch_gd_v_norm.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import pandas as pd
 4 | import sys
 5 | from algorithms.linear_regression.univariate_gd_analysis import plot_univariate_gd_analysis
 6 | 
 7 | def gradient_descent(
 8 |     filename,
 9 |     alpha=0.0023,
10 |     epochs_threshold=100000,
11 |     costdifference_threshold=0.0001,
12 |     plot=False):
13 | 
14 |     a0 = -3
15 |     a1 = -1
16 |     
17 |     a0_prev = a0
18 |     a1_prev = a1
19 | 
20 |     data_set = pd.read_csv(filename, delimiter=',', index_col=False)
21 | 
22 |     m = len(data_set)
23 |     epoch = 0
24 |  
25 |     previous_cost = sys.float_info.max
26 |     gd_data = []
27 |     
28 | 
29 |     while True:
30 |         # calculate the hypothesis function for all training data
31 |         data_set['y_hat'] = a0 + (a1 * data_set['x'])
32 | 
33 |         # calculate the difference between the hypothesis function and the
34 |         # actual y value for all training data
35 |         data_set['y_hat-y'] = data_set['y_hat'] - data_set['y']
36 | 
37 |         # multiply the difference by the x value for all training data
38 |         data_set['y-hat-y.x'] = data_set['y_hat-y'] * data_set['x']
39 | 
40 |         # square the difference for all training data
41 |         data_set['y-hat-y_sq'] = data_set['y_hat-y'] ** 2
42 | 
43 |         # update the a0 and a1 values
44 |         a0 -= (alpha * (1/m) * sum(data_set['y_hat-y']))
45 |         a1 -= (alpha * (1/m) * sum(data_set['y-hat-y.x']))
46 | 
47 |         # calculate the cost function
48 |         cost = sum(data_set['y-hat-y_sq']) / (2 * m)
49 |         epoch += 1
50 | 
51 |         plot_threshold = 0.001
52 |         if abs(a0_prev - a0) > plot_threshold and abs(a1_prev - a1) > plot_threshold:
53 |             gd_data.append((a0_prev, a1_prev, cost))
54 |             a0_prev = a0
55 |             a1_prev = a1
56 | 
57 |         cost_difference = previous_cost - cost
58 |         print(f'Epoch: {epoch}, cost: {cost:.3f}, difference: {cost_difference:.6f}')
59 |         previous_cost = cost
60 | 
61 |         # check if the cost function is diverging, if so, break
62 |         if cost_difference < 0:
63 |             print(f'Cost function is diverging. Stopping training.')
64 |             break
65 | 
66 |         # check if the cost function is close enough to 0, if so, break or if the number of 
67 |         # iterations is greater than the threshold, break
68 |         if abs(cost_difference) < costdifference_threshold or epoch > epochs_threshold:
69 |             gd_data.append((a0_prev, a1_prev, cost))
70 |             break
71 | 
72 |     if plot:
73 |         plot_univariate_gd_analysis(
74 |         file=filename,
75 |         a0_range=(-3,3,0.5),
76 |         a1_range=(-2,2,0.5),
77 |         gd_points = gd_data
78 |         )         
79 |     return a0, a1
80 | 
81 | if __name__ == '__main__':
82 | 
83 |     current_directory = os.path.dirname(__file__)
84 |     filename = os.path.join(current_directory, '..', 'data_generation', 'data_1f_norm.csv')
85 |     alpha = 0.0001
86 |     # alpha = 0.00056
87 |     epochs_threshold = 100000
88 |     costdifference_threshold = 0.0000001
89 |     plot = True
90 |     
91 |     a0, a1 = gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
92 |     print(f'a0: {a0:.3f}, a1: {a1:.3f}')


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/data_generation/data_1f.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 0.0,159.93428306022466
  3 | 1.0,167.2347139765763
  4 | 2.0,202.95377076201385
  5 | 3.0,240.46059712816052
  6 | 4.0,225.3169325055333
  7 | 5.0,245.3172608610164
  8 | 6.0,301.58425631014785
  9 | 7.0,305.3486945830582
 10 | 8.0,300.61051228130094
 11 | 9.0,340.8512008717193
 12 | 10.0,340.73164614375077
 13 | 11.0,360.68540492859483
 14 | 12.0,394.8392454313207
 15 | 13.0,371.73439510684403
 16 | 14.0,395.50164334973937
 17 | 15.0,438.75424941518054
 18 | 16.0,449.74337759331155
 19 | 17.0,496.2849466519055
 20 | 18.0,491.83951848957577
 21 | 19.0,501.75392597329414
 22 | 20.0,579.3129753784311
 23 | 21.0,565.4844739902693
 24 | 22.0,591.3505640937585
 25 | 23.0,581.5050362757308
 26 | 24.0,619.1123455094963
 27 | 25.0,652.2184517941973
 28 | 26.0,646.9801284515539
 29 | 27.0,697.5139603669135
 30 | 28.0,697.9872262016239
 31 | 29.0,724.1661250041344
 32 | 30.0,737.9658677554121
 33 | 31.0,807.0455636901787
 34 | 32.0,789.7300555052414
 35 | 33.0,788.845781420882
 36 | 34.0,846.4508982420638
 37 | 35.0,825.5831270005796
 38 | 36.0,874.1772719000951
 39 | 37.0,850.8065975224044
 40 | 38.0,883.4362790220314
 41 | 39.0,933.9372247173825
 42 | 40.0,964.7693315999082
 43 | 41.0,973.4273656237995
 44 | 42.0,987.6870343522352
 45 | 43.0,1003.9779260882142
 46 | 44.0,1000.4295601926515
 47 | 45.0,1035.603115832106
 48 | 46.0,1060.7872245808041
 49 | 47.0,1111.1424445243783
 50 | 48.0,1116.8723657913692
 51 | 49.0,1094.7391968927452
 52 | 50.0,1156.4816793878958
 53 | 51.0,1162.2983543916737
 54 | 52.0,1176.4615599938809
 55 | 53.0,1222.2335257768173
 56 | 54.0,1250.619990449919
 57 | 55.0,1268.625602382324
 58 | 56.0,1253.2156495355473
 59 | 57.0,1283.8157524829758
 60 | 58.0,1316.6252686280714
 61 | 59.0,1349.5109025424472
 62 | 60.0,1340.4165152430942
 63 | 61.0,1366.2868204667236
 64 | 62.0,1367.8733005198794
 65 | 63.0,1386.0758675183865
 66 | 64.0,1446.250516447884
 67 | 65.0,1477.1248005714165
 68 | 66.0,1468.5597975683934
 69 | 67.0,1510.0706579578405
 70 | 68.0,1517.2327205009526
 71 | 69.0,1517.0976049078974
 72 | 70.0,1557.2279121101683
 73 | 71.0,1600.7607313293195
 74 | 72.0,1589.283479217801
 75 | 73.0,1641.2928731162801
 76 | 74.0,1577.605097918205
 77 | 75.0,1666.4380500875045
 78 | 76.0,1671.7409413647633
 79 | 77.0,1684.0198529906827
 80 | 78.0,1711.83521553071
 81 | 79.0,1690.2486217079822
 82 | 80.0,1745.6065622432498
 83 | 81.0,1777.1422514302349
 84 | 82.0,1819.5578808948303
 85 | 83.0,1799.6345956345272
 86 | 84.0,1813.8301279421362
 87 | 85.0,1839.9648591283092
 88 | 86.0,1888.3080423540414
 89 | 87.0,1896.5750221931937
 90 | 88.0,1899.4047959246593
 91 | 89.0,1940.2653486622671
 92 | 90.0,1951.941550986961
 93 | 91.0,1989.3728998106578
 94 | 92.0,1975.958938122453
 95 | 93.0,2003.4467570680447
 96 | 94.0,2022.157836937357
 97 | 95.0,2020.7297010373577
 98 | 96.0,2075.9224055412915
 99 | 97.0,2095.221105443598
100 | 98.0,2110.1022691328494
101 | 99.0,2125.308257332497
102 | 100.0,2121.6925851589917
103 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/data_generation/data_1f_norm.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 0.0,-1.6837274923728376
  3 | 1.0,-1.6712828488936187
  4 | 2.0,-1.6103945352855658
  5 | 3.0,-1.5464587090977981
  6 | 4.0,-1.57227328214928
  7 | 5.0,-1.5381798212696194
  8 | 6.0,-1.4422645655765751
  9 | 7.0,-1.435847534480374
 10 | 8.0,-1.4439244535226243
 11 | 9.0,-1.375328362606351
 12 | 10.0,-1.3755321609824842
 13 | 11.0,-1.3415180846912436
 14 | 12.0,-1.283297909283442
 15 | 13.0,-1.3226834781934649
 16 | 14.0,-1.2821687559457076
 17 | 15.0,-1.2084384147920226
 18 | 16.0,-1.1897058517568775
 19 | 17.0,-1.1103689960925962
 20 | 18.0,-1.1179468732380071
 21 | 19.0,-1.1010463275227915
 22 | 20.0,-0.9688356772895027
 23 | 21.0,-0.9924083638335403
 24 | 22.0,-0.9483158611816364
 25 | 23.0,-0.9650989915151698
 26 | 24.0,-0.9009918777037519
 27 | 25.0,-0.8445577172522858
 28 | 26.0,-0.8534871992479384
 29 | 27.0,-0.7673449524366089
 30 | 28.0,-0.7665382021705835
 31 | 29.0,-0.7219124717159484
 32 | 30.0,-0.6983888084401361
 33 | 31.0,-0.5806324461909559
 34 | 32.0,-0.6101492416366048
 35 | 33.0,-0.6116566150839882
 36 | 34.0,-0.5134603374077358
 37 | 35.0,-0.5490324805156864
 38 | 36.0,-0.46619671158902304
 39 | 37.0,-0.5060354161575229
 40 | 38.0,-0.45041339086120646
 41 | 39.0,-0.3643272033822746
 42 | 40.0,-0.3117694047724967
 43 | 41.0,-0.2970105298659562
 44 | 42.0,-0.2727028560463633
 45 | 43.0,-0.24493266796627006
 46 | 44.0,-0.2509813723523618
 47 | 45.0,-0.19102294457667257
 48 | 46.0,-0.14809297807052443
 49 | 47.0,-0.062255201274109793
 50 | 48.0,-0.052487719306438294
 51 | 49.0,-0.09021691627641139
 52 | 50.0,0.01503210134706469
 53 | 51.0,0.024947467643370164
 54 | 52.0,0.04909070607160635
 55 | 53.0,0.12711566131579802
 56 | 54.0,0.1755045080208543
 57 | 55.0,0.20619768540878972
 58 | 56.0,0.17992918545213973
 59 | 57.0,0.23209149970063203
 60 | 58.0,0.2880200792365149
 61 | 59.0,0.3440784125515897
 62 | 60.0,0.32857571019070714
 63 | 61.0,0.37267539812632483
 64 | 62.0,0.37537978350776785
 65 | 63.0,0.4064086993756847
 66 | 64.0,0.5089851172604791
 67 | 65.0,0.5616148130956783
 68 | 66.0,0.5470145230590555
 69 | 67.0,0.6177758060547258
 70 | 68.0,0.629984580570387
 71 | 69.0,0.6297542564424935
 72 | 70.0,0.6981621862710204
 73 | 71.0,0.7723701913922257
 74 | 72.0,0.7528055503065811
 75 | 73.0,0.8414631065594781
 76 | 74.0,0.7328980553445608
 77 | 75.0,0.8843267082035763
 78 | 76.0,0.893366255609844
 79 | 77.0,0.9142974416245141
 80 | 78.0,0.9617127619010943
 81 | 79.0,0.9249152814328034
 82 | 80.0,1.0192809211599285
 83 | 81.0,1.0730380778675512
 84 | 82.0,1.1453416709953568
 85 | 83.0,1.1113795412471097
 86 | 84.0,1.1355778852339342
 87 | 85.0,1.1801283255796775
 88 | 86.0,1.2625362939295033
 89 | 87.0,1.2766285602528116
 90 | 88.0,1.281452320058014
 91 | 89.0,1.351105059329118
 92 | 90.0,1.371008839912341
 93 | 91.0,1.4348160036804185
 94 | 92.0,1.4119499601870529
 95 | 93.0,1.458806934894261
 96 | 94.0,1.4907026847140739
 97 | 95.0,1.488268219910694
 98 | 96.0,1.5823521908536844
 99 | 97.0,1.6152496242584322
100 | 98.0,1.6406167263912446
101 | 99.0,1.66653753902026
102 | 100.0,1.6603741013201352
103 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/data_generation/data_3f.csv:
--------------------------------------------------------------------------------
  1 | x,y
  2 | 0.0,159.93428306022466
  3 | 1.0,297.2347139765763
  4 | 2.0,462.95377076201385
  5 | 3.0,630.4605971281605
  6 | 4.0,745.3169325055333
  7 | 5.0,895.3172608610164
  8 | 6.0,1081.5842563101478
  9 | 7.0,1215.3486945830582
 10 | 8.0,1340.610512281301
 11 | 9.0,1510.8512008717194
 12 | 10.0,1640.7316461437508
 13 | 11.0,1790.6854049285948
 14 | 12.0,1954.8392454313207
 15 | 13.0,2061.734395106844
 16 | 14.0,2215.5016433497394
 17 | 15.0,2388.7542494151808
 18 | 16.0,2529.7433775933114
 19 | 17.0,2706.2849466519056
 20 | 18.0,2831.8395184895758
 21 | 19.0,2971.7539259732944
 22 | 20.0,3179.312975378431
 23 | 21.0,3295.484473990269
 24 | 22.0,3451.3505640937583
 25 | 23.0,3571.505036275731
 26 | 24.0,3739.112345509496
 27 | 25.0,3902.2184517941973
 28 | 26.0,4026.980128451554
 29 | 27.0,4207.513960366913
 30 | 28.0,4337.987226201624
 31 | 29.0,4494.166125004134
 32 | 30.0,4637.965867755412
 33 | 31.0,4837.045563690179
 34 | 32.0,4949.730055505242
 35 | 33.0,5078.845781420882
 36 | 34.0,5266.450898242064
 37 | 35.0,5375.583127000579
 38 | 36.0,5554.177271900096
 39 | 37.0,5660.806597522404
 40 | 38.0,5823.436279022031
 41 | 39.0,6003.937224717382
 42 | 40.0,6164.769331599909
 43 | 41.0,6303.427365623799
 44 | 42.0,6447.687034352235
 45 | 43.0,6593.977926088214
 46 | 44.0,6720.4295601926515
 47 | 45.0,6885.603115832106
 48 | 46.0,7040.787224580804
 49 | 47.0,7221.142444524378
 50 | 48.0,7356.872365791369
 51 | 49.0,7464.739196892745
 52 | 50.0,7656.481679387896
 53 | 51.0,7792.2983543916735
 54 | 52.0,7936.461559993881
 55 | 53.0,8112.233525776817
 56 | 54.0,8270.619990449919
 57 | 55.0,8418.625602382324
 58 | 56.0,8533.215649535547
 59 | 57.0,8693.815752482975
 60 | 58.0,8856.625268628071
 61 | 59.0,9019.510902542446
 62 | 60.0,9140.416515243094
 63 | 61.0,9296.286820466723
 64 | 62.0,9427.87330051988
 65 | 63.0,9576.075867518386
 66 | 64.0,9766.250516447884
 67 | 65.0,9927.124800571417
 68 | 66.0,10048.559797568394
 69 | 67.0,10220.07065795784
 70 | 68.0,10357.232720500953
 71 | 69.0,10487.097604907898
 72 | 70.0,10657.227912110167
 73 | 71.0,10830.76073132932
 74 | 72.0,10949.283479217802
 75 | 73.0,11131.29287311628
 76 | 74.0,11197.605097918206
 77 | 75.0,11416.438050087505
 78 | 76.0,11551.740941364764
 79 | 77.0,11694.019852990683
 80 | 78.0,11851.83521553071
 81 | 79.0,11960.248621707982
 82 | 80.0,12145.60656224325
 83 | 81.0,12307.142251430236
 84 | 82.0,12479.55788089483
 85 | 83.0,12589.634595634527
 86 | 84.0,12733.830127942136
 87 | 85.0,12889.96485912831
 88 | 86.0,13068.308042354041
 89 | 87.0,13206.575022193194
 90 | 88.0,13339.40479592466
 91 | 89.0,13510.265348662268
 92 | 90.0,13651.941550986961
 93 | 91.0,13819.372899810658
 94 | 92.0,13935.958938122452
 95 | 93.0,14093.446757068044
 96 | 94.0,14242.157836937356
 97 | 95.0,14370.729701037357
 98 | 96.0,14555.922405541292
 99 | 97.0,14705.221105443597
100 | 98.0,14850.102269132849
101 | 99.0,14995.308257332497
102 | 100.0,15121.692585158991
103 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/data_generation/dataset_generation_1f.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | single feature data generation
 3 | '''
 4 | import os
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | import pandas as pd
 8 | 
 9 | def generate_data(a0, a1, noise_sigma, file_name, plot=False):
10 |     '''
11 |     Generates 100 points with m slope and c intercept 
12 |     and adds noise with sigma
13 |     '''
14 | 
15 |     # x between 0 and 100 in steps of 1
16 |     x = np.arange(0, 101, 1)
17 | 
18 |     # generate a noisy line
19 |     np.random.seed(42)
20 |     l = (a1*x) + a0
21 |     e = np.random.randn(len(x))*noise_sigma
22 |     y = l + e
23 | 
24 |     file_path = os.path.join(os.path.dirname(__file__), file_name)
25 |     # save the data to a csv file
26 |     df = pd.DataFrame(data=[x, y]).T
27 |     df.columns = ['x', 'y']
28 |     df.to_csv(file_path, header=True, index=False)
29 | 
30 |     # plot the data
31 |     if plot:
32 |         plt.plot(x, y)
33 |         plt.plot(x, l, '--')
34 |         plt.xlim([min(x), max(x)])
35 |         plt.ylim([min(y), max(y)])
36 |         plt.show()
37 | 
38 | if __name__=='__main__':
39 |     generate_data(a0=150, a1=20, noise_sigma=20, file_name="data_1f.csv", plot=True)
40 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/data_generation/dataset_generation_2f.py:
--------------------------------------------------------------------------------
 1 | # We generate a random dataset of points in a plane, and then add some noise to the y-values. We then
 2 | # save the data to a csv file.
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import pandas as pd
 6 | import os
 7 | 
 8 | def generate_data(a0, a1, a2, noise_sigma, plot=False):
 9 | 
10 |     x1_lower = -10
11 |     x1_higher = 10
12 |     x1_step = (x1_higher - x1_lower) / 1000
13 |     x1 = np.arange(x1_lower, x1_higher, x1_step)
14 | 
15 |     x2_lower = 0
16 |     x2_higher = 50
17 |     x2_step = (x2_higher - x2_lower) / 1000
18 |     x2= np.arange(x2_lower, x2_higher, x2_step)
19 | 
20 |     # generate the plane
21 |     xx1, xx2 = np.meshgrid(x1, x2)
22 |     y = a0 + (a1 * xx1) + (a2 * xx2)
23 | 
24 |     # add random_multiplier to y
25 |     np.random
26 |     random_multiplier = noise_sigma
27 |     e = np.random.randn(len(xx1), len(xx2) )*random_multiplier
28 |     yy = y + e
29 | 
30 |     df = pd.DataFrame(data=[xx1.ravel(), xx2.ravel(), yy.ravel()]).T
31 |     df = df.sample(frac=0.01)
32 |     df.columns = ['x1', 'x2', 'y']
33 | 
34 |     full_filename = os.path.join(os.path.dirname(__file__), "data_2f.csv")
35 |     df.to_csv(full_filename, header=True, index=False)
36 | 
37 |     if plot:
38 |         # plot the data
39 |         fig = plt.figure(figsize=(12, 12))
40 |         ax = fig.add_subplot(projection='3d')
41 |         y = df.iloc[:,1]
42 |         x = df.iloc[:,0]
43 |         z = df.iloc[:,2]
44 |         ax.scatter(x,y,z, cmap='coolwarm')
45 |         plt.show()
46 | 
47 | if __name__=='__main__':
48 |     generate_data(a0=12, a1=5, a2=-3, noise_sigma=5, plot=True)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/minibatch_gradient_descent/minibatch_gd_1.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import matplotlib
  4 | matplotlib.rcParams['text.usetex'] = True
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | import sys
  8 | import numpy as np
  9 | 
 10 | def minibatch_gradient_descent(
 11 |     file:str, 
 12 |     alpha:float=0.0023, 
 13 |     batch_size:int=100, 
 14 |     epochs_threshold:int=100000, 
 15 |     costdifference_threshold:float=0.00001, 
 16 |     plot:bool=False):
 17 |     '''
 18 |     The function calculates the beta values for the linear regression model
 19 |     using the mini batch gradient descent algorithm
 20 |     '''
 21 | 
 22 |     # load the training data
 23 |     training_data = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
 24 | 
 25 |     # divide the data into features and labels
 26 |     X = training_data.drop(['y'], axis=1).to_numpy()
 27 |     # add a column of ones to the features matrix to account for the intercept, a0
 28 |     X = np.insert(X, 0, 1, axis=1)
 29 |     Y = training_data['y'].to_numpy()
 30 |     
 31 |     # length of the training data
 32 |     m = len(Y)
 33 |     print(f'Length of the training data: {m}')
 34 | 
 35 |     # initialize the y_hat vector to 0
 36 |     y_hat = np.zeros(len(Y))
 37 |     
 38 |     # beta will hold the values of the coefficients, hence it will be  the size 
 39 |     # of a row of the X matrix
 40 |     # initialize beta to random values
 41 |     beta = np.random.random(len(X[0]))
 42 | 
 43 |     # minibatches setting
 44 |     # number of minibatches = m => stochastic gradient descent
 45 |     # number of minibatches = 1 => batch gradient descent
 46 |     minibatches = int(m/batch_size)
 47 | 
 48 |     # initialize the number of epochs
 49 |     minibatch_count = 0
 50 | 
 51 |     previous_cumulative_cost = sys.float_info.max
 52 | 
 53 |     # loop until exit condition is met
 54 |     while True:
 55 | 
 56 |         cumulative_cost = 0
 57 | 
 58 |         for i in range(batch_size):
 59 | 
 60 |             # print(f'Minibatch: {i}')
 61 |             minibatch_X = X[i*minibatches:(i+1)*minibatches]
 62 |             minibatch_Y = Y[i*minibatches:(i+1)*minibatches]
 63 | 
 64 |             # calculate the hypothesis function for all training data
 65 |             y_hat = np.dot(beta, minibatch_X.T)
 66 |             #  calculate the residuals
 67 |             residuals = y_hat - minibatch_Y
 68 | 
 69 |             # calculate the new value of beta
 70 |             beta -= ( alpha / minibatches)  * np.dot(residuals, minibatch_X)
 71 | 
 72 |             # calculate the cost function
 73 |             cost = np.dot(residuals, residuals) / ( 2 * minibatches)
 74 |             cumulative_cost += cost
 75 | 
 76 |         # increase the number of iterations
 77 |         minibatch_count += 1
 78 | 
 79 |         cost_difference = previous_cumulative_cost - cumulative_cost
 80 |         # print(f'Epoch: {epochs}, average cost: {(cumulative_cost/minibatches_number):.3f}, beta: {beta}')
 81 |         previous_cumulative_cost = cumulative_cost
 82 |             
 83 |         # check if the cost function is converged or
 84 |         # iterations is greater than the threshold, break
 85 |         if abs(cost_difference) < costdifference_threshold or minibatch_count > epochs_threshold:
 86 |             break
 87 |     
 88 |     # calculate the cost for the training data and return the beta values and 
 89 |     # the number of iterations and the cost
 90 |     y_hat = np.dot(beta, X.T)
 91 |     residuals = y_hat - Y
 92 |     cost = np.dot(residuals, residuals) / ( 2 * m)
 93 |     
 94 |     return beta, minibatch_count, cost
 95 |     
 96 | 
 97 | if __name__ == '__main__':
 98 | 
 99 |     from timeit import default_timer as timer
100 | 
101 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
102 |     alpha = 0.00023
103 |     epochs_threshold = 1000
104 |     costdifference_threshold = 0.00001
105 |     plot = False
106 |     batch_size = 64
107 | 
108 | 
109 |     start = timer()
110 |     beta, minibatch_count, cost = minibatch_gradient_descent(filename, alpha, batch_size, epochs_threshold, costdifference_threshold, plot)
111 |     end = timer()
112 |     print(f'Time: {end - start} beta: {beta}, minibatch_count: {minibatch_count}, cost: {cost}')
113 |     


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/multivariate_linear_regression/multivariate_lr.py:
--------------------------------------------------------------------------------
 1 | # The above code is a simple linear regression model. 
 2 | # 
 3 | # The model is: 
 4 | # 
 5 | # y = a0 + a1*x1 + a2*x2 
 6 | # 
 7 | # The model is fit using the least squares method. 
 8 | # 
 9 | # The model is tested by computing the cost of the model. 
10 | # 
11 | # The cost is the sum of the squared residuals. 
12 | # 
13 | # The cost is a measure of how good the model is. 
14 | # 
15 | # The lower the cost, the better the model. 
16 | 
17 | import pandas as pd
18 | import os
19 | 
20 | # import data from csv
21 | filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
22 | data_set = pd.read_csv(filename)
23 | 
24 | data_set['x1_sq'] = data_set['x1']**2
25 | data_set['x2_sq'] = data_set['x2']**2
26 | data_set['x1y'] = data_set['x1']*data_set['y']
27 | data_set['x2y'] = data_set['x2']*data_set['y']
28 | data_set['x1x2'] = data_set['x1']*data_set['x2']
29 | 
30 | n = len(data_set)
31 | 
32 | sum_X1_sq = data_set['x1_sq'].sum() - (data_set['x1'].sum()**2)/n
33 | print(f'sum_X1_sq: {sum_X1_sq}')
34 | 
35 | sum_X2_sq = data_set['x2_sq'].sum() - (data_set['x2'].sum()**2)/n
36 | print(f'sum_x2_sq: {sum_X2_sq}')
37 | 
38 | sum_X1y = data_set['x1y'].sum() - (data_set['x1'].sum()*data_set['y'].sum())/n
39 | print(f'sum_X1y: {sum_X1y}')
40 | 
41 | sum_X2y = data_set['x2y'].sum() - (data_set['x2'].sum()*data_set['y'].sum())/n
42 | print(f'sum_X2y: {sum_X2y}')
43 | 
44 | sum_X1X2 = data_set['x1x2'].sum() - (data_set['x1'].sum()*data_set['x2'].sum())/n
45 | print(f'sum_X1X2: {sum_X1X2}')
46 | 
47 | mean_y = data_set['y'].mean()
48 | mean_x1 = data_set['x1'].mean()
49 | mean_x2 = data_set['x2'].mean()
50 | 
51 | n = len(data_set)
52 | 
53 | a1 = (sum_X2_sq*sum_X1y - sum_X1X2*sum_X2y)/(sum_X1_sq*sum_X2_sq - sum_X1X2**2)
54 | 
55 | a2 = (sum_X1_sq*sum_X2y - sum_X1X2*sum_X1y)/(sum_X1_sq*sum_X2_sq - sum_X1X2**2)
56 | 
57 | a0 = mean_y - a1*mean_x1 - a2*mean_x2
58 | 
59 | print(f'a0: {a0}, a1: {a1}, a2: {a2}')
60 | 
61 | 
62 | import numpy as np
63 | 
64 | y_hat = a0 + a1*data_set['x1'] + a2*data_set['x2']
65 | 
66 | residuals = y_hat - data_set['y']
67 | 
68 | cost = np.dot(residuals, residuals)/(2*n)
69 | 
70 | print(f'cost: {cost}')


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/normalization/normalization_analysis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #pylint: disable = E0401
 3 | import os
 4 | import pandas as pd
 5 | 
 6 | 
 7 | 
 8 | data_path=os.path.join(os.path.dirname(__file__),'..', 'data_generation', 'data_1f.csv')
 9 | 
10 | df_data = pd.read_csv(data_path, delimiter=',', index_col=False)
11 | 
12 | y_mean = df_data['y'].mean()
13 | y_stddev = df_data['y'].std()
14 | 
15 | 
16 | df_data['y'] = (df_data['y'] - y_mean) / y_stddev
17 | 
18 | print(df_data['y'].mean())
19 | print(df_data['y'].std())
20 | 
21 | 
22 | normalized_data_path = os.path.join(
23 |     os.path.dirname(__file__),'..', 'data_generation', 'data_1f_norm.csv')
24 | 
25 | df_data.to_csv(normalized_data_path, header=True, index=False)
26 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_1f_1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import matplotlib
 4 | matplotlib.rcParams['text.usetex'] = True
 5 | import matplotlib.pyplot as plt
 6 | import pandas as pd
 7 | import sys
 8 | import numpy as np
 9 | 
10 | 
11 | def stochastic_gradient_descent(filename:str, alpha:float, max_epochs:int = 5):
12 |     '''
13 |     The stochastic gradient descent function takes a dataset, a learning rate, and a maximum number of
14 |     epochs. 
15 |     It returns the beta values and the cost.
16 |     '''
17 |     
18 |     np.random.seed(42)
19 | 
20 |     # load the training data
21 |     data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
22 | 
23 |     # training_data = training_data.sample(frac=1).reset_index(drop=True)
24 | 
25 |     # divide the data into features and labels
26 |     X = data_set.drop(['y'], axis=1).to_numpy()
27 |     
28 |     # add a column of ones to the features matrix to account for the intercept, a0
29 |     X = np.insert(X, 0, 1, axis=1)
30 | 
31 |     Y = data_set['y'].to_numpy()
32 |     
33 |     # length of the training data
34 |     m = len(Y)
35 | 
36 |     # initialize the y_hat vector to 0
37 |     y_hat = np.zeros(len(Y))
38 |     
39 |     # beta will hold the values of the coefficients, hence it will be  the size 
40 |     # of a row of the X matrix
41 |     # initialize beta to random values
42 |     beta = np.random.random(len(X[0]))
43 | 
44 |     # initialize the number of epochs
45 |     epochs = 0
46 | 
47 |     # loop until exit condition is met
48 |     while True:
49 |         
50 |         i = np.random.randint(0, m)
51 | 
52 |         # print(f'Minibatch: {i}')
53 |         x = X[i]
54 |         y = Y[i]
55 | 
56 |         # calculate the hypothesis function for all training data
57 |         y_hat = np.dot(beta, x.T)
58 | 
59 |         #  calculate the residuals
60 |         residuals = y_hat - y
61 | 
62 |         # calculate the new value of beta
63 |         beta -= (alpha * residuals * x)
64 | 
65 |         epochs += 1
66 |   
67 |         # check if the cost function is close enough to 0, if so, break or if the number of 
68 |         # iterations is greater than the threshold, break
69 |         if epochs > (m*max_epochs):
70 |             break
71 |     
72 |     # calculate the cost for the training data and return the beta values and 
73 |     # the number of iterations and the cost
74 |     y_hat = np.dot(beta, X.T)
75 |     residuals = y_hat - Y
76 |     cost = np.dot(residuals, residuals) / ( 2 * m)
77 |     
78 |     return beta, cost
79 | 
80 | 
81 | if __name__ == '__main__':
82 | 
83 |     from timeit import default_timer as timer
84 | 
85 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_1f.csv')
86 |     alpha = 0.0004
87 |     max_epochs = 4000
88 |     start = timer()
89 |     beta, cost = stochastic_gradient_descent(filename, alpha, max_epochs) 
90 |     end = timer()
91 |     print(f'Time: {end - start}, beta: {beta}, cost: {cost}')
92 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_nf_1.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import matplotlib
 4 | matplotlib.rcParams['text.usetex'] = True
 5 | import matplotlib.pyplot as plt
 6 | import pandas as pd
 7 | import sys
 8 | import numpy as np
 9 | 
10 | 
11 | def stochastic_gradient_descent(filename:str, alpha:float, max_epochs:int = 5):
12 |     '''
13 |     The stochastic gradient descent function takes a dataset,
14 |     a learning rate, and a maximum number of epochs.
15 |     It returns the beta values and the cost.
16 |     '''
17 | 
18 |     np.random.seed(42)
19 | 
20 |     # load the training data
21 |     data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
22 | 
23 |     # training_data = training_data.sample(frac=1).reset_index(drop=True)
24 | 
25 |     # divide the data into features and labels
26 |     X = data_set.drop(['y'], axis=1).to_numpy()
27 |     
28 |     # add a column of ones to the features matrix to account for the intercept, a0
29 |     X = np.insert(X, 0, 1, axis=1)
30 | 
31 |     Y = data_set['y'].to_numpy()
32 |     
33 |     # length of the training data
34 |     m = len(Y)
35 | 
36 |     # initialize the y_hat vector to 0
37 |     y_hat = np.zeros(len(Y))
38 |     
39 |     # beta will hold the values of the coefficients, hence it will be  the size 
40 |     # of a row of the X matrix
41 |     # initialize beta to random values
42 |     # beta = np.random.random(len(X[0]))
43 | 
44 |     # beta will hold the values of the coefficients
45 |     beta = np.array([5.0, 3.0, 1.0])
46 | 
47 |     # initialize the number of epochs
48 |     iterations = 0
49 | 
50 |     # loop until exit condition is met
51 |     while True:
52 |         
53 |         i = np.random.randint(0, m)
54 | 
55 |         x = X[i]
56 |         y = Y[i]
57 | 
58 |         # calculate the hypothesis function for all training data
59 |         y_hat = np.dot(beta, x.T)
60 | 
61 |         #  calculate the residuals
62 |         residuals = y_hat - y
63 | 
64 |         # calculate the new value of beta
65 |         beta -= (alpha * residuals * x)
66 | 
67 |         iterations += 1
68 |   
69 |         # check if the cost function is close enough to 0, if so, break or if the number of 
70 |         # iterations is greater than the threshold, break
71 |         if iterations > (m*max_epochs):
72 |             break
73 |     
74 |     # calculate the cost for the training data and return the beta values and 
75 |     # the number of iterations and the cost
76 |     y_hat = np.dot(beta, X.T)
77 |     residuals = y_hat - Y
78 |     cost = np.dot(residuals, residuals) / ( 2 * m)
79 |     
80 |     return beta, cost
81 | 
82 | 
83 | if __name__ == '__main__':
84 | 
85 |     from timeit import default_timer as timer
86 | 
87 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
88 |     alpha = 0.0001
89 |     max_epochs = 10
90 | 
91 |     start = timer()
92 |     beta, cost = stochastic_gradient_descent(filename, alpha, max_epochs) 
93 |     end = timer()
94 |     print(f'Time: {end - start}, beta: {beta}, cost: {cost}')
95 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/stochastic_gradient_descent/stochastic_gd_nf_2.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import matplotlib
  4 | matplotlib.rcParams['text.usetex'] = True
  5 | import pandas as pd
  6 | import sys
  7 | import numpy as np
  8 | 
  9 | 
 10 | def stochastic_gradient_descent(
 11 |     filename:str, 
 12 |     alpha:float=0.0023, 
 13 |     epochs_threshold:int=100, 
 14 |     costdifference_threshold:float=0.00001, 
 15 |     plot:bool=False):
 16 |     '''
 17 |     The function takes a training data set, a learning rate, a number of epochs and a cost difference
 18 |     threshold. 
 19 |     It then calculates the beta values for the training data set and returns the beta values, the number
 20 |     of iterations and the cost
 21 |     '''
 22 | 
 23 |     np.random.seed(42)
 24 |     
 25 |     # load the training data
 26 |     data_set = pd.read_csv(filename, delimiter=',', header=0, index_col=False)
 27 | 
 28 |     # create train and test sets
 29 |     mask = np.random.rand(len(data_set)) < 0.8
 30 |     training_data = data_set[mask]
 31 |     validation_data = data_set[~mask]
 32 | 
 33 |     # divide the data into features and labels
 34 |     X_train = training_data.drop(['y'], axis=1).to_numpy()
 35 |     # add a column of ones to the features matrix to account for the intercept, a0
 36 |     X_train = np.insert(X_train, 0, 1, axis=1)
 37 |     Y_train = training_data['y'].to_numpy()
 38 | 
 39 |     X_validation = validation_data.drop(['y'], axis=1).to_numpy()
 40 |     X_validation = np.insert(X_validation, 0, 1, axis=1)
 41 |     Y_validation = validation_data['y'].to_numpy()
 42 | 
 43 | 
 44 |     # length of the training data
 45 |     m = len(Y_train)
 46 | 
 47 |     # initialize the y_hat vector to 0
 48 |     y_hat = np.zeros(len(Y_train))
 49 |     
 50 |     # beta will hold the values of the coefficients, hence it will be  the size 
 51 |     # of a row of the X matrix
 52 |     # initialize beta to random values
 53 |     beta = np.random.random(len(X_train[0]))
 54 | 
 55 |     # initialize the number of epochs
 56 |     iterations = 0
 57 |     previous_validation_cost = sys.float_info.max
 58 | 
 59 |     # loop until exit condition is met
 60 |     while True:
 61 | 
 62 |         i = np.random.randint(0, m)
 63 | 
 64 |         x = X_train[i]
 65 |         y = Y_train[i]
 66 | 
 67 |         # calculate the hypothesis function for all training data
 68 |         y_hat = np.dot(beta, x.T)
 69 | 
 70 |         #  calculate the residuals
 71 |         residuals = y_hat - y
 72 | 
 73 |         # calculate the new value of beta
 74 |         beta -= (alpha * residuals * x)
 75 | 
 76 |         iterations += 1
 77 | 
 78 |         if iterations % 1000 == 0:
 79 |             y_hat_validation = np.dot(beta, X_validation.T)
 80 |             residuals_validation = y_hat_validation - Y_validation
 81 |             cost_validation = np.dot(
 82 |                 residuals_validation, residuals_validation) / (
 83 |                     2 * len(Y_validation))
 84 | 
 85 |             if abs(previous_validation_cost - cost_validation) < costdifference_threshold:
 86 |                 break
 87 |             else:
 88 |                 previous_validation_cost = cost_validation
 89 | 
 90 |             # uncomment this line to see details
 91 |             # print(f'Epoch: {count/m} Cost: {cost_validation} beta: {beta}')
 92 | 
 93 |         # check if the cost function is close enough to 0, if so, break or if the number of 
 94 |         # iterations is greater than the threshold, break
 95 |         if (iterations/m) > (epochs_threshold):
 96 |             break
 97 | 
 98 |     # calculate the cost for the training data and return the beta values and 
 99 |     # the number of iterations and the cost
100 |     y_hat = np.dot(beta, X_train.T)
101 |     residuals = y_hat - Y_train
102 |     cost = np.dot(residuals, residuals) / ( 2 * m)
103 | 
104 |     return beta, iterations/m, cost
105 | 
106 | 
107 | if __name__ == '__main__':
108 | 
109 |     from timeit import default_timer as timer
110 | 
111 |     filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_2f.csv')
112 |     alpha = 0.00033
113 |     epochs_threshold = 100
114 |     costdifference_threshold = 0.0004
115 |     plot = False
116 | 
117 |     start = timer()
118 |     beta, count, cost = stochastic_gradient_descent(filename, alpha, epochs_threshold, costdifference_threshold, plot)
119 |     end = timer()
120 |     print(f'Time: {end - start}, beta: {beta}, count: {count}, cost: {cost}')
121 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/univariate_gd_analysis.py:
--------------------------------------------------------------------------------
  1 | '''script to plot the cost surface and the gradient descent points'''
  2 | 
  3 | #pylint: disable = E0401
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | from matplotlib import cm
  8 | 
  9 | def plot_univariate_gd_analysis(
 10 |     file:str,
 11 |     a0_range:tuple,
 12 |     a1_range:tuple,
 13 |     gd_points:list,
 14 |     plot_slices=False):
 15 |     '''
 16 |     plot the costs surface and the gradient descent points
 17 |     '''
 18 | 
 19 |     # read the data set
 20 |     data_set = pd.read_csv(file, delimiter=',', index_col=False)
 21 |     m = len(data_set)
 22 | 
 23 |     # plot the costs surface
 24 |     a0, a1  = np.meshgrid(
 25 |         np.arange(a0_range[0], a0_range[1], a0_range[2]),
 26 |         np.arange(a1_range[0], a1_range[1], a1_range[2]))
 27 |     ii, jj = np.shape(a0)
 28 | 
 29 | 
 30 |     costs = []
 31 |     for i in range(ii):
 32 |         cost_row = []
 33 |         for j in range(jj):
 34 |             y_hat = a0[i,j] + (a1[i,j] * data_set['x'])
 35 |             y_diff = y_hat - data_set['y']
 36 |             y_diff_sq = y_diff ** 2
 37 |             cost = sum(y_diff_sq) / (2 * m)
 38 |             cost_row.append(cost)
 39 |         costs.append(cost_row)
 40 | 
 41 |     if plot_slices:
 42 | 
 43 |         a0_mincost_idx = np.where(np.round(a0[0,:], 1)==150)
 44 |         a1_mincost =  a1[:, a0_mincost_idx].squeeze()
 45 |         ncosts = np.array(costs)
 46 |         costs_mincosts = ncosts[:,a0_mincost_idx[0].squeeze()]
 47 | 
 48 |         plt.rcParams['text.usetex'] = True
 49 |         plt.plot(a1_mincost, costs_mincosts)
 50 |         plt.xlabel(r'$a_1$')
 51 |         plt.ylabel(r'$J(150,a_1$)')
 52 | 
 53 |         plt.show()
 54 | 
 55 | 
 56 |         a1_mincost_idx = np.where(np.round(a1[:,0], 1)==20)
 57 |         a0_mincost =  a0[a1_mincost_idx, :].squeeze()
 58 |         ncosts = np.array(costs)
 59 |         costs_mincosts = ncosts[a1_mincost_idx[0].squeeze(), :]
 60 | 
 61 |         plt.rcParams['text.usetex'] = True
 62 |         plt.plot(a0_mincost, costs_mincosts)
 63 |         plt.xlabel(r'$a_1$')
 64 |         plt.ylabel(r'$J(a_0, 20$)')
 65 | 
 66 |         plt.show()
 67 | 
 68 |     # plot the gradient descent points
 69 |     xx = []
 70 |     yy = []
 71 |     zz = []
 72 |     for item in gd_points:
 73 |         xx.append(item[0])
 74 |         yy.append(item[1])
 75 |         zz.append(item[2])
 76 | 
 77 |     plt.rcParams['text.usetex'] = True
 78 |     fig = plt.figure()
 79 |     ax = plt.axes(projection='3d')
 80 |     # ax.plot_surface(
 81 |     #     a0,
 82 |     #     a1,
 83 |     #     np.array(costs),
 84 |     #     rstride=1,
 85 |     #     cstride=1,
 86 |     #     cmap='cividis',
 87 |     #     edgecolor='none',
 88 |     #     alpha=0.5)
 89 |     ax.plot_surface(
 90 |         a0,
 91 |         a1,
 92 |         np.array(costs),
 93 |         rstride=1,
 94 |         cstride=1,
 95 |         cmap='viridis',  # or 'plasma'
 96 |         edgecolor='none',
 97 |         alpha=0.6)
 98 | 
 99 |     
100 |     
101 |     ax.contour(a0, a1, np.array(costs), zdir='z', offset=-0.5, cmap=cm.coolwarm)
102 |     ax.plot(xx, yy, zz, 'r.--', alpha=1)
103 |     ax.set_xlabel(r'$a_0$')
104 |     ax.set_ylabel(r'$a_1$')
105 |     ax.set_zlabel(r'$J(a_0, a_1)$')
106 |     plt.show()
107 | 
108 | if __name__=='__main__':
109 |     import os
110 | 
111 |     # plot_univariate_gd_analysis(
112 |     #     file=os.path.join(os.path.dirname(__file__), 'data_generation', 'data_1f.csv'),
113 |     #     a0_range=(125,175,0.2),
114 |     #     a1_range=(18,22,0.2),
115 |     #     gd_points= [],
116 |     #     plot_slices=True)
117 | 
118 | 
119 |     plot_univariate_gd_analysis(
120 |         file=os.path.join(os.path.dirname(__file__), 'data_generation', 'data_1f.csv'),
121 |         a0_range=(125, 175, 0.1),  # finer grid
122 |         a1_range=(18, 22, 0.1),    # finer grid
123 |         gd_points=[],
124 |         plot_slices=True
125 |     )
126 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/linear_regression/univariate_linear_regression/univariate_lr.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #pylint: disable = E0401
 3 | import pandas as pd
 4 | import os
 5 | 
 6 | # import data from csv
 7 | full_filename = os.path.join(os.path.dirname(__file__), '..', 'data_generation', 'data_1f.csv')
 8 | data_set = pd.read_csv(full_filename)
 9 | 
10 | data_set.columns=['x', 'y']
11 | 
12 | # add new columns required to solve the problem
13 | data_set['x_sq'] = data_set['x']**2
14 | data_set['xy'] = data_set['x']*data_set['y']
15 | 
16 | 
17 | # calculate the sums of the data
18 | sum_x = data_set['x'].sum()
19 | sum_y = data_set['y'].sum()
20 | sum_x_sq = data_set['x_sq'].sum()
21 | sum_xy = data_set['xy'].sum()
22 | 
23 | n = len(data_set)
24 | print(f'sum_x: {sum_x}, sum_y: {sum_y}, sum_x_sq: {sum_x_sq}, sum_xy: {sum_xy}, n: {n}')
25 | 
26 | # calculate the slope and intercept
27 | a_0 = (sum_x_sq*sum_y - sum_x*sum_xy)/(n*sum_x_sq - sum_x**2)
28 | 
29 | a_1 = (n*sum_xy - sum_x*sum_y)/(n*sum_x_sq - sum_x**2)
30 | 
31 | 
32 | print(f'a_0: {a_0}, a_1: {a_1}')


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/__pycache__/binaryclassification.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/__pycache__/binaryclassification.cpython-311.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/__pycache__/lr_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/__pycache__/lr_utils.cpython-311.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/binaryclassification.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #pylint: disable = E0401
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | def sigmoid(z):
  8 |     """
  9 |     Compute the sigmoid of z
 10 | 
 11 |     Arguments:
 12 |     z - A scalar or numpy vector
 13 | 
 14 |     Returns:
 15 |     s - the sigmoid of z
 16 |     """
 17 |     s = 1 / (1 + np.exp(-z))
 18 |     return s
 19 | 
 20 | def initialize_with_zeros(dim):
 21 |     """
 22 |     Creates a vector for W initialized to 0 and creates b, initialized to 0
 23 | 
 24 |     Arguments:
 25 |     dim - the size of the weight vector, or the number of features to the system
 26 | 
 27 |     Returns:
 28 |     w - Initialized weight vecor of shape(dim,1)
 29 |     b - initialized bias, scalar
 30 |     """
 31 |     w = np.zeros((dim, 1))
 32 |     b = 0
 33 | 
 34 |     assert w.shape == (dim, 1)
 35 |     assert isinstance(b, float) or isinstance(b, int)
 36 | 
 37 |     return w, b
 38 | 
 39 | def propagate(w, b, X, Y):
 40 |     """
 41 |     Implement feed forward step, calculate cost function and its gradient
 42 |     
 43 |     Arguments:
 44 |     w - weights, a numpy array. In the image case it will be of shape (num_px * num_px * 3, 1)
 45 |     b - the scalar bis to the neuron
 46 |     X - input data with shape (num_px * num_px * 3, number fo examples)
 47 |     Y - 'true' label vecor size (1, number of examples)
 48 | 
 49 |     Returns:
 50 |     Cost - negative log likelihood cost of logistic regression
 51 |     dw - derivative of cost w.r.t. w; same shape of w
 52 |     db - derivative of cost w.r.t. b; same shape of b
 53 |     """
 54 | 
 55 |     number_of_examples = X.shape[1]
 56 | 
 57 |     #forward propagation
 58 |     A = sigmoid(np.dot(w.T, X) + b)
 59 |     cost = np.sum(-(Y * np.log(A) + ((1-Y) * np.log(1-A))), axis=1)/ number_of_examples
 60 | 
 61 |     #backward propagation
 62 |     dw = np.dot(X, (A-Y).T) / number_of_examples
 63 |     db = np.sum(A-Y) / number_of_examples
 64 | 
 65 |     assert dw.shape == w.shape
 66 |     assert db.dtype == float
 67 | 
 68 |     cost = np.squeeze(cost)
 69 | 
 70 |     assert cost.shape == ()
 71 | 
 72 |     grads = {'dw' : dw,
 73 |              'db' : db}
 74 |     return grads, cost
 75 | 
 76 | def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
 77 |     """
 78 |     optimizes w and b using gradient descent
 79 | 
 80 |     Argument:
 81 |     w - weights, numpy array of shape (number _of_input_features, 1)
 82 |     b - bias, scalar
 83 |     X - data numpy array of shape (num_px*num_px*3, number_of_examples)
 84 |     Y - 'true' label vector, numpy array of shape (1, number_of_examples)
 85 |     num_iterations - number of iterations of g.d.
 86 |     learning_rate - learning rate of g.d.
 87 |     print_cost - print the loss every 100 steps
 88 | 
 89 |     Returns:
 90 |     params - dictionary containing w and b
 91 |     grads - dictionary containing dw and db
 92 |     costs - list of all the costs computed
 93 |     """
 94 |     costs = []
 95 | 
 96 |     for i in range(num_iterations):
 97 | 
 98 |         grads, cost = propagate(w, b, X, Y)
 99 | 
100 |         dw = grads['dw']
101 |         db = grads['db']
102 | 
103 |         w = w - (learning_rate * dw)
104 |         b = b - (learning_rate * db)
105 | 
106 |         if i%100 == 0:
107 |             costs.append(cost)
108 |             if print_cost == True:
109 |                 print('print cost after iteration {}: {}'.format(i, cost))
110 | 
111 |     params = {'w': w, 'b': b}
112 |     grads = {'dw': dw, 'db': db}
113 | 
114 |     return params, grads, costs
115 | 
116 | def predict(w, b, X):
117 |     """
118 |     predict label 0 or 1 using learned linera rlogistic regression parameters
119 | 
120 |     Arguments:
121 |     w - weights, numpy array of shape (number _of_input_features, 1)
122 |     b - bias, scalar
123 |     X - data numpy array of shape (num_px*num_px*3, number_of_examples)
124 | 
125 |     Returns:
126 |     Y_prediction - numpy array containg predictions for X
127 |     """
128 |     number_of_examples = X.shape[1]
129 | 
130 |     Y_prediction = np.zeros((1, number_of_examples))
131 | 
132 |     w = w.reshape(X.shape[0], 1)
133 | 
134 |     A = sigmoid(np.dot(w.T, X) + b)
135 | 
136 |     for i in range(A.shape[1]):
137 |         if A[0,i] >= 0.5:
138 |             Y_prediction[0,i] = 1
139 |         else:
140 |             Y_prediction[0,i] = 0
141 | 
142 |     return Y_prediction


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/exec.py:
--------------------------------------------------------------------------------
 1 | from binaryclassification import *
 2 | from lr_utils import *
 3 | 
 4 | def load_data_test():
 5 |     train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
 6 | 
 7 |     index = 26
 8 |     plt.imshow(train_set_x_orig[index])
 9 | 
10 |     # use instead if imshow
11 |     image_data = train_set_x_orig[index, :, :, :]
12 |     img = Image.fromarray(image_data, 'RGB')
13 |     #img.show()
14 | 
15 |     print(classes[train_set_y[:, index]])
16 |     print(classes[np.squeeze(train_set_y[:, index])].decode('utf-8'))
17 | 
18 | def sigmoid_function_test():
19 | 
20 |     # test sigmoid function
21 |     print('sigmoid of [0, 2] is {}'.format(sigmoid(np.array([0, 2]))))
22 | 
23 |     dim = 5
24 |     w, b = initialize_with_zeros(dim)
25 |     print('w = {}'.format(w))
26 |     print('b = {}'.format(b))
27 | 
28 | def feedforward_test():
29 |     w = np.array([[1.], [2.]])
30 |     b = 2.
31 |     X = np.array([[1., 2., -1.],[3., 4., -3.2]])
32 |     Y= np.array([[1, 0, 1]])
33 | 
34 |     grads, cost = propagate(w, b, X, Y)
35 | 
36 |     print('dw= {}'.format(grads['dw']))
37 |     print('db= {}'.format(grads['db']))
38 |     print('cost= {}'.format(cost))
39 | 
40 | def optimization_test():
41 |     w = np.array([[1.], [2.]])
42 |     b = 2.
43 |     X = np.array([[1., 2., -1.],[3., 4., -3.2]])
44 |     Y= np.array([[1, 0, 1]])
45 | 
46 |     params, grads, costs = optimize(w, b, X, Y, num_iterations=100, learning_rate=0.009, print_cost=True)
47 | 
48 |     print('w= {}'.format(params['w']))
49 |     print('b= {}'.format(params['b']))
50 |     print('dw= {}'.format(grads['dw']))
51 |     print('db= {}'.format(grads['db']))
52 | 
53 | def prediction_test():
54 |     w = np.array([[0.1124579],[0.23106775]])
55 |     b = -.3
56 |     X = np.array([[1., -1.1, -3.2],[1.2, 2., 0.1]])
57 |     print('predictions={}'.format(predict(w, b, X)))
58 | 
59 | prediction_test()
60 | #optimization_test()
61 | #feedforward_test()
62 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/lr_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #pylint: disable = E0401
 3 | import numpy as np
 4 | import h5py
 5 | import os
 6 | 
 7 | def load_dataset():
 8 | 
 9 |     script_dir = os.path.dirname(__file__) #<-- absolute dir the script is in
10 |     rel_path = '../datasets/train_catvnoncat.h5'
11 |     abs_file_path = os.path.join(script_dir, rel_path)
12 |     train_dataset = h5py.File(abs_file_path, "r")
13 |     train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
14 |     train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
15 | 
16 |     rel_path = '../datasets/test_catvnoncat.h5'
17 |     abs_file_path = os.path.join(script_dir, rel_path)
18 |     test_dataset = h5py.File(abs_file_path, "r")
19 |     test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
20 |     test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
21 | 
22 |     classes = np.array(test_dataset["list_classes"][:]) # the list of classes
23 |     
24 |     train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
25 |     test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
26 |     
27 |     return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
28 | 
29 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/test_catvnoncat.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/test_catvnoncat.h5


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/logistic_regression/train_catvnoncat.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/logistic_regression/train_catvnoncat.h5


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/svm/matplotlib_test.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | fig = plt.figure()
 5 | ax = fig.add_subplot(1,1,1)
 6 | 
 7 | colors = {1:'r', -1:'b'}
 8 | 
 9 | 
10 | data_dict = {-1:np.array([[1,7],[2,8], [3,8], ]),
11 | 			1:np.array([[5,1], [6,-1], [7,3], ])}
12 | 
13 | 
14 | for i in data_dict:
15 | 	for x in data_dict[i]:
16 | 		print(x[0], x[1])
17 | 		ax.scatter(x[0], x[1], s=100, color=colors[i]) 
18 | 
19 | 
20 | plt.show()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/svm/supportvectormachine.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | from matplotlib import style
  3 | import numpy as np
  4 | style.use('ggplot')
  5 | 
  6 | class SupportVectorMachine:
  7 | 	def __init__(self, visualization=True):
  8 | 		self.visualization = visualization
  9 | 		self.colors = {1:'r', -1:'b'}
 10 | 		if self.visualization:
 11 | 			self.fig = plt.figure()
 12 | 			self.ax = self.fig.add_subplot(1,1,1)
 13 | 
 14 | 	def fit(self, data):
 15 | 		self.data = data
 16 | 		# { ||w||: [w,b]}
 17 | 		opt_dict = {}
 18 | 		
 19 | 		transforms = [[1,1],
 20 | 						[1,-1],
 21 | 						[-1,1],
 22 | 						[-1,-1]]
 23 | 		
 24 | 		all_data = []
 25 | 		for yi in self.data:
 26 | 			for featureset in self.data[yi]:
 27 | 				for feature in featureset:
 28 | 					all_data.append(feature)
 29 | 
 30 | 		self.max_feature_value = max(all_data)
 31 | 		self.min_feature_value = min(all_data)
 32 | 		all_data = None 
 33 | 
 34 | 		step_sizes = [self.max_feature_value * 0.1,
 35 | 						self.max_feature_value * 0.01,
 36 | 						self.max_feature_value * 0.001,]
 37 | 
 38 | 		# very expensive
 39 | 		b_range_multiple = 2
 40 | 
 41 | 		# with b we can take bigger steps
 42 | 		b_multiple = 5
 43 | 		latest_optimum = self.max_feature_value * 10
 44 | 
 45 | 		for step in step_sizes:
 46 | 			w = np.array([latest_optimum, latest_optimum])
 47 | 			print(w)
 48 | 			# possible since this is a convex problem
 49 | 			optimized = False
 50 | 			while not optimized:
 51 | 				for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
 52 | 									self.max_feature_value*b_range_multiple, 
 53 | 									step*b_multiple): 
 54 | 
 55 | 					for transformation in transforms:
 56 | 						w_t = w * transformation
 57 | 						found_option = True
 58 | 
 59 | 						# will have issues with huge volumes.
 60 | 						# yi(xi.w+b) >= 1
 61 | 						for i in self.data:
 62 | 							for xi in self.data[i]:
 63 | 								yi = i
 64 | 								if not yi * (np.dot(w_t, xi)+ b) >= 1:
 65 | 									found_option = False
 66 | 										
 67 | 
 68 | 						if found_option:
 69 | 							opt_dict[np.linalg.norm(w_t)] = [w_t, b]
 70 | 
 71 | 				if w[0] < 0:
 72 | 					optimized = True
 73 | 					print('Optimized a step')
 74 | 				else:
 75 | 					# not mathematically correct
 76 | 					# w  - [step, step]
 77 | 					w = w - step
 78 | 
 79 | 			norms = sorted([n for n in opt_dict])
 80 | 			opt_choice = opt_dict[norms[0]]
 81 | 
 82 | 			self.w = opt_choice[0]
 83 | 			self.b = opt_choice[1]
 84 | 			latest_optimum = opt_choice[0][0]+step*2
 85 | 
 86 | 		for i in self.data:
 87 | 			for xi in self.data[i]:
 88 | 				yi=i
 89 | 
 90 | 	def predict(self, features):
 91 | 		# sign (x_i.w + b)
 92 | 		classification = np.sign(np.dot(np.array(features), self.w) + self.b)
 93 | 
 94 | 		if classification != 0 and self.visualization:
 95 | 			self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification])
 96 | 			
 97 | 
 98 | 		return classification
 99 | 
100 | 
101 | 	def visualize(self):
102 | 		[[self.ax.scatter(x[0],x[1],s=100,color=self.colors[i]) for x in data_dict[i]] for i in data_dict]
103 | 
104 | 		# hyperplane = x.w+b
105 | 		# v = x.w+b
106 | 		# psv = 1
107 | 		# nsv = -1
108 | 		# dec = 0
109 | 		def hyperplane(x,w,b,v):
110 | 			return (-w[0]*x-b+v) / w[1]
111 | 
112 | 		datarange = (self.min_feature_value*0.9,self.max_feature_value*1.1)
113 | 		hyp_x_min = datarange[0]
114 | 		hyp_x_max = datarange[1]
115 | 
116 | 		# (w.x+b) = 1
117 | 		# positive support vector hyperplane
118 | 		psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
119 | 		psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
120 | 		self.ax.plot([hyp_x_min,hyp_x_max],[psv1,psv2], 'k')
121 | 
122 | 		# (w.x+b) = -1
123 | 		# negative support vector hyperplane
124 | 		nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
125 | 		nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
126 | 		self.ax.plot([hyp_x_min,hyp_x_max],[nsv1,nsv2], 'k')
127 | 
128 | 		# (w.x+b) = 0
129 | 		# positive support vector hyperplane
130 | 		db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
131 | 		db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
132 | 		self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2], 'y--')
133 | 
134 | 		plt.show()
135 | 
136 | 
137 | data_dict = {-1:np.array([[1,4],
138 | 						  [2,8],]),
139 | 			 
140 | 			+1:np.array([ [6,-1],
141 | 						 [7,3.5],])}
142 | 
143 | svm = SupportVectorMachine()
144 | svm.fit(data=data_dict)
145 | 
146 | svm.predict([1,0.745])
147 | svm.predict([2,1])
148 | svm.predict([3,-5])
149 | 
150 | svm.visualize()


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/svm/test_code.py:
--------------------------------------------------------------------------------
1 | a = [1,2,3]
2 | b = [4,5,6]
3 | 
4 | 
5 | for x,y in zip(a,b):
6 |     print(x,y)


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/utils/__pycache__/simple_tree.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/algorithms/utils/__pycache__/simple_tree.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/algorithms/utils/tree_exec.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | simple example demonstrating the operation of the SimpleTree
 3 | 
 4 | 1. creates a tree with nodes and edges
 5 | 2. creates another tree with nodes and edges
 6 | 3. appends the second tree to the first tree 
 7 | '''
 8 | from nary_tree import NAryTree
 9 | from rooted_dac import RootedDAC
10 | 
11 | 
12 | t = RootedDAC()
13 | 
14 | # t.add_node('a1')
15 | # t.add_node('a2')
16 | # t.add_node('a3')
17 | # t.add_node('a4')
18 | # t.add_node('a5')
19 | 
20 | # t.add_edge('a1', 'a2', 'aa')
21 | # t.add_edge('a1', 'a3', 'ab')
22 | # t.add_edge('a2', 'a4', 'ac')
23 | # t.add_edge('a2', 'a5', 'ad')
24 | 
25 | # print(t.generate_rules())
26 | 
27 | 
28 | # example for disjuntion in antecedent clause
29 | t.add_node('A')
30 | t.add_node('B')
31 | t.add_node('X_x')
32 | t.add_node('X_y')
33 | 
34 | t.add_edge('A', 'X_x', 'a')
35 | t.add_edge('A', 'X_y', 'b')
36 | t.add_edge('A', 'X_x', 'c')
37 | 
38 | print(t.generate_rules())


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/brute_force/fibonacci.py:
--------------------------------------------------------------------------------
 1 | def fib(n):
 2 |     if n==0 or n==1:
 3 |         return 1
 4 |     else:
 5 |         return fib(n-1) + fib(n-2)
 6 | 
 7 | def fast_fib(n, memo={}):
 8 | 
 9 |     if n==0 or n==1:
10 |         return 1
11 |     
12 |     if n in memo:
13 |         return memo[n]
14 |     else:
15 |         result = fast_fib(n-1) + fast_fib(n-2)
16 |         memo[n] = result
17 |         return result
18 | 
19 | print(fast_fib(120))


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/brute_force/knapsack.py:
--------------------------------------------------------------------------------
 1 | class Food ():
 2 | 
 3 |     def __init__(self, n:str, v:int, w:int) -> None:
 4 |         self.name = n
 5 |         self.value = v
 6 |         self.calories = w
 7 | 
 8 |     def get_value(self) -> int:
 9 |         return self.value
10 | 
11 |     def get_cost(self) -> int:
12 |         return self.calories
13 |     
14 |     def density(self) -> float:
15 |         return self.get_value() / self.get_cost()
16 | 
17 |     def __str__(self) -> str:
18 |         return f'{self.name} : <{self.value}, {self.calories}>'
19 | 
20 | def build_menu(names, values, calories):
21 |     
22 |     menu = []
23 |     for i in range(len(values)):
24 |         menu.append(Food(names[i], values[i], calories[i]))
25 |     
26 |     return menu
27 | 
28 | def max_val(to_consider, available):
29 | 
30 |     # available is an index that goes through list 
31 | 
32 |     # base case nothing left available or nothing left to consider 
33 |     if to_consider == [] or available == 0:
34 |         result = (0, ())
35 |     
36 |     # is fist element's cost enough to make item available.
37 |     # if it is not, right branch is not considered
38 |     elif to_consider[0].get_cost() > available:
39 |         result = max_val(to_consider[1:], available)
40 |     
41 |     # consider both branches
42 |     else:
43 |         next_item = to_consider[0]
44 |         
45 |         # left branch - take
46 |         # we took item so available is not minus the cost of taken item
47 |         with_val, with_to_take = max_val(to_consider[1:],
48 |             available - next_item.get_cost())
49 | 
50 |         # value of subbranch plus value of iteam as it was taken
51 |         with_val += next_item.get_value()
52 | 
53 |         # right branch - leave
54 |         without_val, without_to_take = max_val(to_consider[1:],
55 |             available)
56 | 
57 |         # choose better branch
58 |         if with_val > without_val:
59 |             result = (with_val, with_to_take + (next_item,))
60 |         else:
61 |             result = (without_val, without_to_take)
62 | 
63 |     return result
64 | 
65 | def test_max_val(foods, max_units):
66 |     
67 |     print(f'use search tree to allocate {max_units} calories')
68 | 
69 |     val, taken = max_val(foods, max_units)
70 |     
71 |     print(f'total value of items taken {val}')
72 | 
73 |     for item in taken:
74 |         print(f'\t{item}')
75 | 
76 | if __name__ == "__main__":
77 |     names = ['wine', 'beer', 'pizza', 'burger', 'fries', 'cola', 'apple', 'donghut', 'cake']
78 |     values = [89, 90, 95, 100, 90, 79, 50, 10]
79 |     calories = [123, 154, 258, 354, 365, 150, 95, 195]
80 |     foods = build_menu(names, values, calories)
81 | 
82 |     test_max_val(foods, 750)
83 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/confidence_interval/normal_distribution_gen.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import matplotlib.pylab as plb
 3 | 
 4 | 
 5 | dist = []
 6 | num_samples = 1000000
 7 | 
 8 | for i in range(num_samples):
 9 |     # first parameter mean, second std dev
10 |     dist.append(random.gauss(0, 100))
11 | 
12 | weights = [1/num_samples]*len(dist)
13 | v =  plb.hist(dist, bins=100, weights=weights)
14 | 
15 | plb.xlabel('x')
16 | plb.ylabel('Relative Frequency')
17 | 
18 | print(f'fraction within approx 200 of mean = {sum(v[0][30:70])}')
19 | 
20 | plb.show()


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/data_structures/graph.py:
--------------------------------------------------------------------------------
  1 | class Node():
  2 |     def __init__(self, name) -> None:
  3 |         self.name = name
  4 |     def get_name(self):
  5 |         return self.name
  6 |     def __str__(self) -> str:
  7 |         return self.name
  8 | 
  9 | class Edge():
 10 |     def __init__(self, src, dest) -> None:
 11 |         self.src = src
 12 |         self.dest = dest
 13 |     def get_source(self):
 14 |         return self.src
 15 |     def get_destination(self):
 16 |         return self.dest
 17 |     def __str__(self) -> str:
 18 |         return f'{self.src.get_name()} -> {self.dest.get_name()}'
 19 | 
 20 | class Digraph():
 21 |     '''
 22 |     edges direction inb only one direction
 23 |     options are:
 24 |     1. create adjacency matrix joining src to dest
 25 |         ok for digraph as it can handle both directions
 26 |         not symmetric therefore
 27 |         if few edges present a huge matrix with mostly 0
 28 |     2. adjacency list for every node have a list of destinations
 29 |         nodes keys in dict
 30 |     '''
 31 |     def __init__(self) -> None:
 32 |         self.edges ={}
 33 | 
 34 |     def add_node(self, node):
 35 |         if node in self.edges:
 36 |             raise ValueError('Duplicate Node')
 37 |         else:
 38 |             self.edges[node] = []
 39 | 
 40 |     def add_edge(self, edge:Edge):
 41 |         src = edge.get_source()
 42 |         dest = edge.get_destination()
 43 |         if not(src in self.edges and dest in self.edges):
 44 |             raise ValueError('Node not in graph')
 45 |         self.edges[src].append(dest)
 46 | 
 47 |     def  get_children(self, node):
 48 |         return self.edges[node]
 49 |     
 50 |     def has_node(self, node):
 51 |         return node in self.edges
 52 | 
 53 |     def get_node(self, name):
 54 |         for n in self.edges:
 55 |             if n.get_name() == name:
 56 |                 return n
 57 |         raise NameError(name)
 58 | 
 59 |     def __str__(self) -> str:
 60 |         result = ''
 61 |         for src in self.edges:
 62 |             for dest in self.edges[src]:
 63 |                 result = result + f'{src.get_name()} -> {dest.get_name()}\n'
 64 |         return result[:-1]
 65 | 
 66 | class Graph(Digraph):
 67 |     def add_edge(self, edge: Edge):
 68 |         Digraph.add_edge(self,edge)
 69 |         rev_edge = Edge(edge.get_destination(), edge.get_source())
 70 |         Digraph.add_edge(self, rev_edge)
 71 | 
 72 | def build_city_graph(graph_type):
 73 |     providence = Node('providence')
 74 |     boston = Node('boston')
 75 |     new_york = Node('new york')
 76 |     denver = Node('denver')
 77 |     phoenix = Node('phoenix')
 78 |     chicago = Node('chicago')
 79 |     los_angeles = Node('los angeles')
 80 | 
 81 |     edges = []
 82 |     edges.append(Edge(providence, boston))
 83 |     edges.append(Edge(providence, new_york))
 84 |     edges.append(Edge(denver, phoenix))
 85 |     edges.append(Edge(denver, new_york))
 86 |     edges.append(Edge(new_york, chicago))
 87 |     edges.append(Edge(chicago, denver))
 88 |     edges.append(Edge(chicago, phoenix))
 89 |     edges.append(Edge(boston, providence))
 90 |     edges.append(Edge(boston, new_york))
 91 |     edges.append(Edge(los_angeles, boston))
 92 | 
 93 |     graph = graph_type()
 94 |     graph.add_node(providence)
 95 |     graph.add_node(boston)
 96 |     graph.add_node(new_york)
 97 |     graph.add_node(denver)
 98 |     graph.add_node(phoenix)
 99 |     graph.add_node(chicago)
100 |     graph.add_node(los_angeles)
101 | 
102 |     for edge in edges:
103 |         graph.add_edge(edge)
104 | 
105 |     return graph
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     graph = build_city_graph(Digraph)
110 |     print(graph)


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/__pycache__/biased_die.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/biased_die.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/__pycache__/dishonest_casino.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/dishonest_casino.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_casino.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_casino.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_die.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/fair_die.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/__pycache__/loaded_die.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/dice/__pycache__/loaded_die.cpython-39.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/dishonest_casino.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | from fair_die import FairDie
  3 | from loaded_die import LoadedDie
  4 | from numpy import mean
  5 | from numpy import arange
  6 | import random
  7 | import enum
  8 | 
  9 | 
 10 | 
 11 | class DiceThrow(enum.Enum):
 12 |     FAIR = enum.auto()
 13 |     LOADED = enum.auto()
 14 | class DishonestCasino():
 15 | 
 16 |     def __init__(self, p1:float, p2:float) -> None:
 17 |         self.fair_die = FairDie()
 18 |         self.biased_die = LoadedDie()
 19 |         self.p_1 = p1
 20 |         self.p_2 = p2
 21 | 
 22 | 
 23 | 
 24 |     def play(self, number_of_tosses:int)->float:
 25 | 
 26 |         results = []
 27 |         next_toss = DiceThrow.FAIR
 28 |         prob_first_toss = random.uniform(0,1)
 29 |         if prob_first_toss > 0.5:
 30 |             next_toss = DiceThrow.LOADED
 31 | 
 32 |         fair_toss_counter = 0
 33 |         loaded_toss_counter = 0
 34 | 
 35 | 
 36 |         for i in range(number_of_tosses):
 37 | 
 38 |             if next_toss == DiceThrow.FAIR:
 39 |                 fair_toss_counter += 1
 40 |                 prob_next_toss = random.uniform(0,1)
 41 |                 if prob_next_toss > self.p_1:
 42 |                     next_toss = DiceThrow.LOADED
 43 |             else:
 44 |                 loaded_toss_counter += 1
 45 |                 prob_next_toss = random.uniform(0,1)
 46 |                 if prob_next_toss > self.p_2:
 47 |                     next_toss = DiceThrow.FAIR
 48 |         
 49 |         # print(fair_toss_counter)
 50 |         # print(loaded_toss_counter)
 51 |         
 52 |         fair_tosses = self.fair_die.roll_multiple(fair_toss_counter)
 53 |         loaded_tosses = self.biased_die.roll_multiple(loaded_toss_counter)
 54 |         
 55 |         mean_all_tosses = (sum(fair_tosses)+sum(loaded_tosses))/(fair_toss_counter+loaded_toss_counter)
 56 |         
 57 |         return mean_all_tosses
 58 | 
 59 | 
 60 | 
 61 |     def play_old(self, number_of_tosses:int)->list:
 62 | 
 63 |         results = []
 64 | 
 65 |         next_toss = DiceThrow.FAIR
 66 |         prob_first_toss = random.uniform(0,1)
 67 |         if prob_first_toss > 0.5:
 68 |             next_toss = DiceThrow.LOADED
 69 | 
 70 |         for i in range(number_of_tosses):
 71 | 
 72 |             if next_toss == DiceThrow.FAIR:
 73 |                 results.append(self.fair_die.roll())
 74 |                 prob_next_toss = random.uniform(0,1)
 75 |                 if prob_next_toss > self.p_1:
 76 |                     next_toss = DiceThrow.LOADED
 77 |             else:
 78 |                 results.append(self.biased_die.roll())
 79 |                 prob_next_toss = random.uniform(0,1)
 80 |                 if prob_next_toss > self.p_2:
 81 |                     next_toss = DiceThrow.FAIR
 82 |         return results
 83 | 
 84 |     def simulate(self, t:int):
 85 |         '''Simulate Method'''
 86 |         simulation_mean = self.play(t)
 87 |         return mean(simulation_mean)
 88 | 
 89 | 
 90 |     def test(self)->None:
 91 | 
 92 |         avg_results = []
 93 |         number_tosses_per_play = 100
 94 |         number_of_plays = 10
 95 |         for i in range(0, number_of_plays):
 96 |             # append the average opf the tosses
 97 |             play = self.play(number_tosses_per_play)
 98 |             mean_play = mean(play)
 99 |             print(mean_play)
100 |             avg_results.append(mean_play)
101 | 
102 |         # possible averages from 1 to 6 in steps of 0.5
103 |         # ie 11 possible outcomes
104 |         avg_frequencies = []
105 |         for i in arange(1,6.5, 0.5):
106 |             avg_frequencies.append(avg_results.count(i) / number_of_plays)
107 | 
108 |         print(avg_frequencies)
109 |         plt.bar(arange(1,6.5, 0.5).tolist(), avg_frequencies, color='g', edgecolor='blue', width=0.5)
110 |         plt.show()
111 | 
112 | if __name__=='__main__':
113 |     casino = DishonestCasino(0.99, 0.1)
114 |     print(casino.play(100000))


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/fair_casino.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | from fair_die import FairDie
 3 | from numpy import mean
 4 | from numpy import arange
 5 | import numpy as np
 6 | 
 7 | class FairCasino():
 8 | 
 9 |     def __init__(self) -> None:
10 |         self.die = FairDie()
11 | 
12 |     def play(self, number_of_tosses:int=2):
13 |         return self.die.roll_multiple(number_of_tosses)
14 | 
15 | 
16 |     def simulate(self, t):
17 |         mean_res = mean(self.play(t))
18 |         print(mean_res)
19 |         return mean_res
20 | 
21 |     def test(self)->None:
22 | 
23 |         avg_results = []
24 | 
25 |         number_of_plays = 5000
26 |         for i in range(0, number_of_plays):
27 |             # append the average opf the tosses
28 |             play = self.play()
29 |             mean_play = mean(play)
30 |             avg_results.append(mean_play)
31 | 
32 |         # possible averages from 1 to 6 in steps of 0.5
33 |         # ie 11 possible outcomes
34 |         avg_frequencies = []
35 |         for i in arange(1,6.5, 0.5):
36 |             avg_frequencies.append(avg_results.count(i) / number_of_plays)
37 | 
38 |         print(avg_frequencies)
39 | 
40 |         plt.bar(arange(1,6.5, 0.5), avg_frequencies, color='g', edgecolor='blue', width=0.5)
41 |         plt.show()
42 | 
43 | 
44 | if __name__=='__main__':
45 |     casino = FairCasino()
46 |     casino.test()
47 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/fair_die.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | class FairDice:
 6 | 
 7 |     def __init__(self) -> None:
 8 |         ...
 9 | 
10 |     def roll(self)->int:
11 |         return random.randint(1,6)
12 | 
13 |     def roll_multiple(self, number_of_tosses):
14 |         x = np.random.random((number_of_tosses, 1)).squeeze()
15 |         return np.ceil(x*6)
16 | 
17 |     def test_die(self)->None:
18 |         outcomes = []
19 | 
20 |         number_of_trials = 5000
21 |         for i in range(number_of_trials):
22 |             outcomes.append(self.roll())
23 | 
24 |         results = []
25 |         for i in np.arange(1,7):
26 |             results.append(outcomes.count(i) / number_of_trials)
27 | 
28 |         plt.bar(np.arange(1,7), results, color='g', edgecolor='blue', width=1)
29 |         plt.show()
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     die = FairDice()
34 |     die.roll_multiple(10)
35 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/loaded_die.py:
--------------------------------------------------------------------------------
 1 | '''Loaded dice implementation'''
 2 | 
 3 | import random
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | class LoadedDice:
 8 |     '''A loaded dice. probabilities for each number defined in __init_'''
 9 | 
10 |     def __init__(self) -> None:
11 |         '''Initializes new dice'''
12 |         self.probabilities = np.array([0.5, 0.1, 0.1, 0.1, 0.1, 0.1])
13 |         self.cumul_array = np.cumsum(self.probabilities)
14 | 
15 |     def roll(self)->int:
16 |         '''rolls dice'''
17 |         precision = 3
18 |         random_number = random.randint(0, 10 ** precision) / float(10 ** precision)
19 |         mapped_cumul = self.cumul_array - random_number
20 |         rolled_number = np.where(mapped_cumul > 0, mapped_cumul, np.inf).argmin()
21 |         return rolled_number + 1
22 | 
23 |     def roll_multiple(self, number_of_tosses):
24 |         '''rolls dice multiple times'''
25 |         x = np.random.random((number_of_tosses, 1)).squeeze()
26 |         x = np.ceil(x*10)-4
27 |         x[x<=0] = 1
28 |         return x
29 | 
30 | 
31 |     def test_die(self)->None:
32 |         '''executes a test for the dice'''
33 |         outcomes = []
34 | 
35 |         number_of_trials = 5000
36 |         for i in range(number_of_trials):
37 |             outcomes.append(self.roll())
38 | 
39 |         results = []
40 |         for i in np.arange(1,7):
41 |             results.append(outcomes.count(i) / number_of_trials)
42 | 
43 |         plt.bar(np.arange(1,7), results, color='g', edgecolor='blue', width=1)
44 |         plt.show()
45 | 
46 | if __name__ == '__main__':
47 |     die = LoadedDice()
48 |     print(die.roll_multiple(100000))
49 |     die.test_die()
50 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/simulation_results.txt:
--------------------------------------------------------------------------------
 1 | Mean for dishonest casino with p1:0.99 and p2:0.05 is 3.489619392000011
 2 | Mean for dishonest casino with p1:0.95 and p2:0.1 is 3.4474341769999897
 3 | Mean for dishonest casino with p1:0.9 and p2:0.2 is 3.388901595000009
 4 | 
 5 | -----------------------------------------------------------------------------
 6 | 
 7 | Task 3
 8 | ------
 9 | 
10 | Dishonest simulation with p1 = 0.9 and p2 = 0.2
11 | mean: 3.388901595, variance:3.096003910988601e-05, standard deviation:0.005564174611735869
12 | 
13 | Fair Simulation
14 | mean: 3.4999125920000007, variance:2.8043537135249523e-05, standard deviation:0.005295614896803725
15 | 
16 | Note for next Calculation
17 | -------------------------
18 | 
19 | To use the estimated variance to find the sample size required to obtain a RMSE error of 0.001,
20 | we use the following formula:
21 | 
22 | n = ( (z * sigma / e)^2 )
23 | 
24 | Where:
25 | 
26 | n is the sample size
27 | z is the standard normal deviate (e.g. for a 95% confidence level, z = 1.96)
28 | sigma is the estimated population standard deviation
29 | e is the desired margin of error (e.g. e = 0.001)
30 | 
31 | This formula states that the sample size required to achieve a certain level of 
32 | precision (e) is proportional to the square of the ratio of the standard deviation 
33 | to the margin of error.
34 | 
35 | It's important to note that this formula assumes that your estimator has a normal 
36 | distribution and that the true variance is known or has been estimated from a sample.
37 | 
38 | Also, it's important to know that this is only a rough estimation, 
39 | and the sample size required for a specific problem may be influenced 
40 | by other factors like the distribution of the data, the model assumptions, 
41 | the desired confidence level, etc.
42 | 
43 | 
44 | 
45 | 
46 | 
47 | -----------------------------------------------------------------------------
48 | 
49 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/dice/simulations.py:
--------------------------------------------------------------------------------
 1 | from dishonest_casino import DishonestCasino
 2 | from fair_casino import FairCasino
 3 | import csv
 4 | import numpy as np
 5 | 
 6 | def fair_casino_simulation():
 7 |     casino = FairCasino()
 8 | 
 9 |     mean_results = []
10 | 
11 |     for _ in range(10000):
12 |         mean_results.append(casino.simulate(100000))
13 | 
14 |     with open('fair_results.csv', 'w') as f:
15 |         write = csv.writer(f)
16 |         write.writerow(mean_results)
17 | 
18 |     mean = sum(mean_results)/len(mean_results)
19 |     print('Mean for fair casino: ', mean)
20 | 
21 | 
22 | def dishonest_casino_simulation_run(p1:float, p2:float):
23 | 
24 |     mean_results = []
25 | 
26 |     casino = DishonestCasino(p1, p2)
27 | 
28 |     for _ in range(10000):
29 |         mean_results.append(casino.simulate(100000))
30 | 
31 |     with open(f'dishonest_results-{p1}-{p2}.csv', 'w', encoding='UTF-8') as f:
32 |         write = csv.writer(f)
33 |         write.writerow(mean_results)
34 | 
35 |     simulations_mean = sum(mean_results)/len(mean_results)
36 |     print(f'Mean for dishonest casino with p1:{p1} and p2:{p2} is {simulations_mean}')
37 | 
38 | 
39 | def dishonest_casino_simulation():
40 | 
41 |     dishonest_casino_simulation_run(0.99, 0.05)
42 |     dishonest_casino_simulation_run(0.95, 0.1)
43 |     dishonest_casino_simulation_run(0.9, 0.2)
44 | 
45 | 
46 | def calculate_variance(simulation_data:np.array):#type:ignore
47 |     simulation_mean = np.mean(simulation_data)
48 | 
49 |     variance = np.sum(np.square(simulation_data - simulation_mean))/(np.size(simulation_data)-1)
50 |     standard_deviation = np.sqrt(variance)
51 |     
52 |     print(f'mean: {simulation_mean}, variance:{variance}, standard deviation:{standard_deviation}')
53 | 
54 | 
55 | def dishonest_trial_variance_calculation():
56 |     print('Dishonest simulation with p1 = 0.9 and p2 = 0.2')
57 |     simulation_data = np.genfromtxt('dishonest_results-0.9-0.2.csv', delimiter=',')
58 |     calculate_variance(simulation_data)
59 | 
60 | 
61 | def fair_trial_variance_calculation():
62 |     print('Fair Simulation')
63 |     simulation_data = np.genfromtxt('fair_results.csv', delimiter=',')
64 |     calculate_variance(simulation_data)
65 | 
66 | if __name__ == '__main__':
67 |     # fair_casino_simulation()
68 |     # dishonest_casino_simulation()
69 |     dishonest_trial_variance_calculation()
70 |     fair_trial_variance_calculation()
71 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/graph_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/graph_search/__init__.py


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/graph_search/breath_first_search.py:
--------------------------------------------------------------------------------
 1 | from graph import Digraph, Node, Edge
 2 | 
 3 | def build_city_graph(graph_type):
 4 |     providence = Node('providence')
 5 |     boston = Node('boston')
 6 |     new_york = Node('new york')
 7 |     denver = Node('denver')
 8 |     phoenix = Node('phoenix')
 9 |     chicago = Node('chicago')
10 |     los_angeles = Node('los angeles')
11 | 
12 |     edges = []
13 |     edges.append(Edge(providence, boston))
14 |     edges.append(Edge(providence, new_york))
15 |     edges.append(Edge(denver, phoenix))
16 |     edges.append(Edge(denver, new_york))
17 |     edges.append(Edge(new_york, chicago))
18 |     edges.append(Edge(chicago, denver))
19 |     edges.append(Edge(chicago, phoenix))
20 |     edges.append(Edge(boston, providence))
21 |     edges.append(Edge(boston, new_york))
22 |     edges.append(Edge(los_angeles, boston))
23 | 
24 |     graph = graph_type()
25 |     graph.add_node(providence)
26 |     graph.add_node(boston)
27 |     graph.add_node(new_york)
28 |     graph.add_node(denver)
29 |     graph.add_node(phoenix)
30 |     graph.add_node(chicago)
31 |     graph.add_node(los_angeles)
32 | 
33 |     for edge in edges:
34 |         graph.add_edge(edge)
35 | 
36 |     return graph
37 | 
38 | def print_path(path):
39 |     names=[]
40 |     for loc in path:
41 |         names.append(loc.get_name())
42 |     return '->'.join(names)
43 | 
44 | 
45 | def bfs(graph:Digraph, start, end, to_print=False):
46 |     
47 |     init_path = [start]
48 | 
49 |     # queue is a list of paths
50 |     path_queue = [init_path]
51 | 
52 |     while len(path_queue) != 0:
53 | 
54 |         tmp_path = path_queue.pop(0)
55 |         if to_print:
56 |             print(f'current bfs path: {print_path(tmp_path)}')
57 |         
58 |         last_node = tmp_path[-1]
59 |         if last_node == end:
60 |             return tmp_path
61 |         for next_node in graph.get_children(last_node):
62 |             if next_node not in tmp_path:
63 |                 new_path = tmp_path + [next_node]
64 |                 path_queue.append(new_path)
65 | 
66 |     return None
67 | 
68 | def shortest_path(graph, start, end, to_print=False):
69 |     return bfs(graph, start, end, to_print)
70 | 
71 | def test_bfs(source, dest):
72 |     graph = build_city_graph(Digraph)
73 |     sp = shortest_path(graph, graph.get_node(source), graph.get_node(dest), to_print=True)
74 | 
75 |     if sp != None:
76 |         print(f'shortest path from {source} to {dest} is {print_path(sp)}')
77 |     else:
78 |         print(f'there is no path from {source} to {dest}')
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     graph = build_city_graph(Digraph)
83 |     # print(graph)
84 |     test_bfs('boston', 'phoenix')


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/graph_search/depth_first_search.py:
--------------------------------------------------------------------------------
 1 | from graph import Digraph, Node, Edge
 2 | 
 3 | def build_city_graph(graph_type):
 4 |     providence = Node('providence')
 5 |     boston = Node('boston')
 6 |     new_york = Node('new york')
 7 |     denver = Node('denver')
 8 |     phoenix = Node('phoenix')
 9 |     chicago = Node('chicago')
10 |     los_angeles = Node('los angeles')
11 | 
12 |     edges = []
13 |     edges.append(Edge(providence, boston))
14 |     edges.append(Edge(providence, new_york))
15 |     edges.append(Edge(denver, phoenix))
16 |     edges.append(Edge(denver, new_york))
17 |     edges.append(Edge(new_york, chicago))
18 |     edges.append(Edge(chicago, denver))
19 |     edges.append(Edge(chicago, phoenix))
20 |     edges.append(Edge(boston, providence))
21 |     edges.append(Edge(boston, new_york))
22 |     edges.append(Edge(los_angeles, boston))
23 | 
24 |     graph = graph_type()
25 |     graph.add_node(providence)
26 |     graph.add_node(boston)
27 |     graph.add_node(new_york)
28 |     graph.add_node(denver)
29 |     graph.add_node(phoenix)
30 |     graph.add_node(chicago)
31 |     graph.add_node(los_angeles)
32 | 
33 |     for edge in edges:
34 |         graph.add_edge(edge)
35 | 
36 |     return graph
37 | 
38 | def print_path(path):
39 |     names=[]
40 |     for loc in path:
41 |         names.append(loc.get_name())
42 |     return '->'.join(names)
43 | 
44 | 
45 | def dfs(graph:Digraph, start, end, path, shortest, to_print=False):
46 |     
47 |     path = path + [start
48 |     ]
49 | 
50 |     if to_print:
51 |         print(f'Current dfs path: {print_path(path)}')
52 | 
53 |     if start == end:
54 |         return path
55 | 
56 |     for node in graph.get_children(start):
57 |         # no cycles
58 |         if node not in path:
59 |             if shortest == None or len(path) < len(shortest):
60 |                 new_path = dfs(graph, node, end, path, shortest, to_print)
61 |                 if new_path != None:
62 |                     shortest = new_path
63 |         elif to_print:
64 |             print(f'{node} already visited')
65 | 
66 |     return shortest
67 | 
68 | def shortest_path(graph, start, end, to_print=False):
69 |     return dfs(graph, start, end, [], None, to_print)
70 | 
71 | def test_dfs(source, dest):
72 |     graph = build_city_graph(Digraph)
73 |     sp = shortest_path(graph, graph.get_node(source), graph.get_node(dest), to_print=True)
74 | 
75 |     if sp != None:
76 |         print(f'shortest path from {source} to {dest} is {print_path(sp)}')
77 |     else:
78 |         print(f'there is no path from {source} to {dest}')
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     graph = build_city_graph(Digraph)
83 |     # print(graph)
84 |     test_dfs('boston', 'phoenix')


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/graph_search/graph.py:
--------------------------------------------------------------------------------
 1 | class Node():
 2 |     def __init__(self, name) -> None:
 3 |         self.name = name
 4 |     def get_name(self):
 5 |         return self.name
 6 |     def __str__(self) -> str:
 7 |         return self.name
 8 | 
 9 | class Edge():
10 |     def __init__(self, src, dest) -> None:
11 |         self.src = src
12 |         self.dest = dest
13 |     def get_source(self):
14 |         return self.src
15 |     def get_destination(self):
16 |         return self.dest
17 |     def __str__(self) -> str:
18 |         return f'{self.src.get_name()} -> {self.dest.get_name()}'
19 | 
20 | class Digraph():
21 |     '''
22 |     edges direction inb only one direction
23 |     options are:
24 |     1. create adjacency matrix joining src to dest
25 |         ok for digraph as it can handle both directions
26 |         not symmetric therefore
27 |         if few edges present a huge matrix with mostly 0
28 |     2. adjacency list for every node have a list of destinations
29 |         nodes keys in dict
30 |     '''
31 |     def __init__(self) -> None:
32 |         self.edges ={}
33 | 
34 |     def add_node(self, node):
35 |         if node in self.edges:
36 |             raise ValueError('Duplicate Node')
37 |         else:
38 |             self.edges[node] = []
39 | 
40 |     def add_edge(self, edge:Edge):
41 |         src = edge.get_source()
42 |         dest = edge.get_destination()
43 |         if not(src in self.edges and dest in self.edges):
44 |             raise ValueError('Node not in graph')
45 |         self.edges[src].append(dest)
46 | 
47 |     def  get_children(self, node):
48 |         return self.edges[node]
49 |     
50 |     def has_node(self, node):
51 |         return node in self.edges
52 | 
53 |     def get_node(self, name):
54 |         for n in self.edges:
55 |             if n.get_name() == name:
56 |                 return n
57 |         raise NameError(name)
58 | 
59 |     def __str__(self) -> str:
60 |         result = ''
61 |         for src in self.edges:
62 |             for dest in self.edges[src]:
63 |                 result = result + f'{src.get_name()} -> {dest.get_name()}\n'
64 |         return result[:-1]
65 | 
66 | class Graph(Digraph):
67 |     def add_edge(self, edge: Edge):
68 |         Digraph.add_edge(self,edge)
69 |         rev_edge = Edge(edge.get_destination(), edge.get_source())
70 |         Digraph.add_edge(self, rev_edge)
71 | 
72 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/greedy_algorithm/knapsack.py:
--------------------------------------------------------------------------------
 1 | class Food ():
 2 | 
 3 |     def __init__(self, n:str, v:int, w:int) -> None:
 4 |         self.name = n
 5 |         self.value = v
 6 |         self.calories = w
 7 | 
 8 |     def get_value(self) -> int:
 9 |         return self.value
10 | 
11 |     def get_cost(self) -> int:
12 |         return self.calories
13 |     
14 |     def density(self) -> float:
15 |         return self.get_value() / self.get_cost()
16 | 
17 |     def __str__(self) -> str:
18 |         return f'{self.name} : <{self.value}, {self.calories}>'
19 | 
20 | 
21 | def build_menu(names, values, calories):
22 |     
23 |     menu = []
24 |     for i in range(len(values)):
25 |         menu.append(Food(names[i], values[i], calories[i]))
26 |     
27 |     return menu
28 | 
29 | def greedy(items, max_cost:int, key_function):
30 | 
31 |     # sort according to key_function, ascending order
32 |     items_copy = sorted(items, key=key_function, reverse=True)
33 | 
34 |     result = []
35 | 
36 |     total_value, total_cost = 0, 0
37 | 
38 |     for i in range(len(items_copy)):
39 |         if(total_cost + items_copy[i].get_cost()) <= max_cost:
40 |             result.append(items_copy[i])
41 |             total_cost += items_copy[i].get_cost()
42 |             total_value += items_copy[i].get_value()
43 |     
44 |     return (result, total_value)
45 | 
46 | def test_greedy(items, constraint:int, key_function) -> None:
47 |     
48 |     taken, val = greedy(items, constraint, key_function)
49 | 
50 |     print('Total values of items taken = ', val)
51 |     for item in taken:
52 |         print('\t',item)
53 | 
54 | def test_greedy_functions(foods, max_units:int) -> None:
55 |     
56 |     # greedy using food value 
57 |     print(f'Use greedy by value to allocate {max_units} calories')
58 |     test_greedy(foods, max_units, Food.get_value)
59 | 
60 |     # greedy using food cost, calories
61 |     # we need inverse of calories to start with foods having the 
62 |     # smallest numbers, hence lambda fn
63 |     print(f'Use greedy by cost to allocate {max_units} calories')
64 |     test_greedy(foods, max_units, lambda x: 1/Food.get_cost(x))
65 | 
66 |     print(f'Use greedy by density to allocate {max_units} calories')
67 |     test_greedy(foods, max_units, Food.density)
68 | 
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     names = ['wine', 'beer', 'pizza', 'burger', 'fries', 'cola', 'apple', 'donghut', 'cake']
73 |     values = [89, 90, 95, 100, 90, 79, 50, 10]
74 |     calories = [123, 154, 258, 354, 365, 150, 95, 195]
75 |     foods = build_menu(names, values, calories)
76 |     test_greedy_functions(foods, 750)


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/monte_carlo_simulation/fair_roulette.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class FairRoulette():
 4 |     def __init__(self):
 5 |         self.pockets = []
 6 |         for i in range(1, 37):
 7 |             self.pockets.append(i)
 8 |         
 9 |         self.ball=None
10 | 
11 |         self.pocket_odds = len(self.pockets) - 1
12 | 
13 |         random.seed(0)
14 | 
15 |     def spin(self):
16 |         self.ball = random.choice(self.pockets)
17 | 
18 |     def bet_pocket(self, pocket, amount):
19 |         '''
20 |         pocket: pocket placing bet
21 |         amount: sum being bet
22 |         '''
23 |         if str(pocket) == str(self.ball):
24 |             return amount * self.pocket_odds
25 |         else:
26 |             return -amount
27 |     def __str__(self) -> str:
28 |         return 'fair roulette'
29 | 
30 | class EURoulette(FairRoulette):
31 |     def __init__(self):
32 |         super().__init__()
33 |         self.pockets.append('O')
34 |     def __str__(self) -> str:
35 |         return 'EU Roulette'
36 | 
37 | class USRoulette(EURoulette):
38 |     def __init__(self):
39 |         super().__init__()
40 |         self.pockets.append('OO')
41 |     def __str__(self) -> str:
42 |         return 'US Roulette'
43 | 
44 | 
45 | def play_roulette(game, num_spins, pocket, bet):
46 |     '''
47 |     Arguments:
48 |     game: Roulette game being played
49 |     num_spins: number of spins for the simulation
50 |     pocket: pocket placing bet
51 |     bet: amount of bet
52 |     '''
53 |     total_pocket = 0
54 |     for i in range(num_spins):
55 |         game.spin()
56 |         total_pocket += game.bet_pocket(pocket, bet)
57 | 
58 |     print(f'{num_spins} spins of {game}')
59 |     print(f'expected return betting {pocket} = {str(100*total_pocket/num_spins)}%')
60 | 
61 |     return total_pocket/num_spins
62 | 
63 | if __name__ == "__main__":
64 |     game = FairRoulette()
65 |     for num_spins in (100, 1000000):
66 |         for i in range(3):
67 |             # betting 1 dollar on number 2 for num-spins trials
68 |             play_roulette(game, num_spins, 2, 1)
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-36.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-37.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/drunk.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-36.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-37.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/field.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-36.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-37.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carmelgafa/ml-from-scratch/c51c4f4c88829da26a448dfc6bc48d9ec706df96/ml_algorithms/src/introduction_to_computation/random_walk/__pycache__/location.cpython-38.pyc


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/drunk.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class Drunk():
 4 |     '''
 5 |     base class for drunkard walk algorithms
 6 |     '''
 7 |     def __init__(self, name='Anonymous'):
 8 |         self._name = name
 9 | 
10 |     def __str__(self):
11 |         return self._name
12 | 
13 | class UsualDrunk(Drunk):
14 |     def take_step(self):
15 |         step_choices = [(0,1), (1,0), (0,-1), (-1,0)]
16 |         return random.choice(step_choices)
17 | 
18 | class BiasedDrunk(Drunk):
19 |     '''
20 |     implements biased random walk
21 |     '''
22 |     def take_step(self):
23 |         step_choices = [(0,0.9), (1.1,0), (0,-1), (-1,0)]
24 |         return random.choice(step_choices)
25 | 
26 | if __name__ == "__main__":
27 |     d1=Drunk('Joe')
28 |     print(d1)
29 | 
30 |     d2 = Drunk()
31 |     print(d2)
32 | 
33 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/field.py:
--------------------------------------------------------------------------------
 1 | from drunk import UsualDrunk
 2 | from drunk import BiasedDrunk
 3 | from location import Location
 4 | 
 5 | class Field():
 6 |     def __init__(self):
 7 |         self._drunks = {}
 8 |     
 9 |     def add_drunk(self, drunk, loc):
10 |         if drunk in self._drunks:
11 |             raise ValueError('duplicate drunk')
12 |         else:
13 |             self._drunks[drunk] = loc
14 | 
15 |     def get_location(self, drunk):
16 |         if drunk not in self._drunks:
17 |             raise ValueError('drunk not in field')
18 |         
19 |         return self._drunks[drunk]
20 | 
21 |     def move_drunk(self, drunk):
22 |         if drunk not in self._drunks:
23 |             raise ValueError('drunk not in field')
24 | 
25 |         x_dist, y_dist = drunk.take_step()
26 | 
27 |         self._drunks[drunk] = self._drunks[drunk].move(x_dist, y_dist)
28 | 
29 |         


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/location.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | class Location():
 4 |     def __init__(self, x:float, y:float):
 5 |         self._x = x
 6 |         self._y = y
 7 |     
 8 |     def move (self, delta_x:float, delta_y:float):
 9 |         return Location(self._x + delta_x, self._y + delta_y)
10 | 
11 |     @property
12 |     def x(self):
13 |         return self._x
14 | 
15 |     @property
16 |     def y(self):
17 |         return self._y
18 |     
19 |     def dist(self, other):
20 |         x_dist = self._x - other.x
21 |         y_dist = self._y - other.y
22 | 
23 |         return (x_dist**2 + y_dist**2)**0.5
24 | 
25 |     def __str__(self):
26 |         return f'<{self._x}, {self._y}>'
27 | 
28 | if __name__ == "__main__":
29 |     loc = Location(1,1)
30 |     print(loc)
31 |     dist = loc.dist(Location(0,0))
32 |     print(dist)


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/simulation.py:
--------------------------------------------------------------------------------
 1 | from field import Field
 2 | from drunk import UsualDrunk
 3 | from drunk import BiasedDrunk
 4 | from location import Location
 5 | import numpy as np
 6 | import matplotlib.pylab as plt
 7 | 
 8 | 
 9 | def walk(f, d, num_steps):
10 |     start = f.get_location(d)
11 |     for s in range(num_steps):
12 |         f.move_drunk(d)
13 |         # print(f.get_location(d))
14 |     return start.dist(f.get_location(d))
15 | 
16 | def sim_walks(num_steps, num_trials, dClass):
17 |     drunkard = dClass()
18 |     origin = Location(0, 0)
19 |     distances = []
20 |     for t in range(num_trials):
21 |         f = Field()
22 |         f.add_drunk(drunkard, origin)
23 |         distances.append(round(walk(f, drunkard, num_steps) ,1))
24 | 
25 |     return distances
26 | 
27 | def drunk_test(walk_lengths, num_trials,dClass):
28 |     for num_steps in walk_lengths:
29 |         distances = sim_walks(num_steps, num_trials, dClass)
30 |         print(f'{dClass.__name__} random walk of {num_steps} steps')
31 |         print(f'Mean = {round(sum(distances)/len(distances), 4)}')
32 |         print(f'Max={max(distances)}')
33 |         print(f'Min={min(distances)}')
34 | 
35 | if __name__ == "__main__":
36 |     drunk_test((0,1,2), 100, UsualDrunk)


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/random_walk/simulation_analysis.py:
--------------------------------------------------------------------------------
 1 | from field import Field
 2 | from drunk import UsualDrunk
 3 | from drunk import BiasedDrunk
 4 | from location import Location
 5 | import numpy as np
 6 | import matplotlib.pylab as plt
 7 | 
 8 | 
 9 | def walk(f, d, num_steps):
10 |     start = f.get_location(d)
11 |     for s in range(num_steps):
12 |         f.move_drunk(d)
13 |     return f.get_location(d), start.dist(f.get_location(d))
14 | 
15 | def sim_walks(num_steps, num_trials, dClass):
16 |     drunkard = dClass()
17 |     origin = Location(0, 0)
18 |     distances = []
19 |     end_locations = []
20 |     
21 |     for t in range(num_trials):
22 |         f = Field()
23 |         f.add_drunk(drunkard, origin)
24 |         end_location, distance =  walk(f, drunkard, num_steps)
25 | 
26 |         distances.append(round(distance,1))
27 |         end_locations.append([end_location.x, end_location.y])
28 | 
29 |     return end_locations, distances
30 | 
31 | 
32 | def drunk_test_dist_analysis(walk_lengths, num_trials):
33 |     
34 |     mean_dist_x = []
35 |     mean_dist_y = []
36 |     for num_steps in walk_lengths:
37 |         _, distances = sim_walks(num_steps, num_trials, UsualDrunk)
38 |         mean_dist_y.append(round(sum(distances)/len(distances), 4))
39 |         mean_dist_x.append(num_steps)
40 | 
41 |     plt.plot(mean_dist_x, mean_dist_y)
42 | 
43 |     mean_dist_x.clear()
44 |     mean_dist_y.clear()
45 |     for num_steps in walk_lengths:
46 |         _, distances = sim_walks(num_steps, num_trials, BiasedDrunk)
47 |         mean_dist_y.append(round(sum(distances)/len(distances), 4))
48 |         mean_dist_x.append(num_steps)
49 | 
50 |     plt.plot(mean_dist_x, mean_dist_y)
51 |     plt.show()
52 | 
53 | def drunk_test_end_analysis(walk_lengths, num_trials):
54 |     for num_steps in walk_lengths:
55 |         end_locations, _ = sim_walks(num_steps, num_trials, UsualDrunk)
56 |         end_locations = np.array(end_locations)
57 |         plt.scatter(end_locations[:,0], end_locations[:,1])
58 |     
59 |     for num_steps in walk_lengths:
60 |         end_locations, _ = sim_walks(num_steps, num_trials, BiasedDrunk)
61 |         end_locations = np.array(end_locations)
62 |         plt.scatter(end_locations[:,0], end_locations[:,1])
63 |     
64 |     plt.show()
65 | 
66 | 
67 | 
68 | def drunk_test(walk_lengths, num_trials,dClass):
69 |     for num_steps in walk_lengths:
70 |         end_locations, distances = sim_walks(num_steps, num_trials, dClass)
71 |         end_locations = np.array(end_locations)
72 |         print(f'{dClass.__name__} random walk of {num_steps} steps')
73 |         print(f'Mean = {round(sum(distances)/len(distances), 4)}')
74 |         print(f'Max={max(distances)}')
75 |         print(f'Min={min(distances)}')
76 |         plt.scatter(end_locations[:,0], end_locations[:,1])
77 |         plt.show()
78 | 
79 | if __name__ == "__main__":
80 |     # drunk_test((10,100,1000), 1000, BiasedDrunk)
81 |     # drunk_test_dist_analysis((10,100,1000, 10000), 100)
82 |     # xyz=np.array(np.random.random((100,3)))
83 |     # print(xyz)
84 |     drunk_test_end_analysis((0,1000), 100)
85 | 


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/stochastic/approximation.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import math 
 3 | 
 4 | def same_date_birthday(num_poeple, num_same):
 5 |     possibility_dates = range(366)
 6 |     birthdays = [0] * 366
 7 |     for p in range(num_poeple):
 8 |         birth_date = random.choice(possibility_dates)
 9 |         birthdays[birth_date] += 1
10 |     return max(birthdays) >= num_same
11 | 
12 | def birthday_problem(num_poeple, num_same, num_trials):
13 |     num_hits = 0
14 |     for t in range(num_trials):
15 |         if same_date_birthday(num_poeple, num_same):
16 |             num_hits += 1
17 | 
18 |     return num_hits/num_trials
19 | 
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     for num_people in [10,20,40,100]:
24 |         print(f'for {num_people} est prob of shared birthday is {birthday_problem(num_people, 2, 90000)}')
25 | 
26 |         num = math.factorial(366)
27 |         den = (366**num_people)*math.factorial(366-num_people)
28 | 
29 |         print(f'actual prob for {num_people} is {1-(num/den)}')


--------------------------------------------------------------------------------
/ml_algorithms/src/introduction_to_computation/stochastic/random_processes.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | def roll_die():
 4 |     # chose from uniform distribution
 5 |     return random.choice([1,2,3,4,5,6])
 6 | 
 7 | 
 8 | def test_roll(n=10):
 9 |     result = ''
10 |     for i in range(n):
11 |         result = result + str(roll_die())
12 |     print(result)
13 | 
14 | 
15 | def run_sim(goal, num_trials):
16 |     total = 0
17 | 
18 |     for i in range(num_trials):
19 |         result=''
20 |         for j in range(len(goal)):
21 |             result += str(roll_die())
22 |         if result == goal:
23 |             total += 1
24 |     
25 |     print(f'actual prob of {goal} = ', round(1/(6**len(goal)), 8))
26 |     est_prob = round(total/num_trials, 8)
27 |     print(f'estimated prob of {goal} = ', est_prob)
28 | 
29 | if __name__ == "__main__":
30 |     # test_roll()
31 |     run_sim('11111', 1000000)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | mathplotlib
4 | 


--------------------------------------------------------------------------------