├── .github └── workflows │ └── latex-builder.yml ├── .gitignore ├── .vscode └── settings.json ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── assets └── 42ai_logo.png ├── build ├── module05.pdf ├── module06.pdf ├── module07.pdf ├── module08.pdf └── module09.pdf ├── module05 ├── Makefile ├── assets │ ├── 42ai_logo.pdf │ ├── Default.png │ ├── Evaluate.png │ ├── Improve.png │ ├── Predict.png │ ├── bad_pred_with_distance.png │ ├── bad_prediction.png │ ├── plot1.png │ ├── plot2.png │ ├── plot3.png │ ├── plotcost1.png │ ├── plotcost2.png │ ├── plotcost3.png │ └── sigmoid.png ├── en.subject.tex ├── exercises │ ├── en.ex02_interlude.tex │ ├── en.ex03_interlude.tex │ ├── en.ex06_interlude.tex │ ├── en.ex08_interlude.tex │ ├── m05ex00.tex │ ├── m05ex01.tex │ ├── m05ex02.tex │ ├── m05ex03.tex │ ├── m05ex04.tex │ ├── m05ex05.tex │ ├── m05ex06.tex │ ├── m05ex07.tex │ ├── m05ex08.tex │ └── m05ex09.tex └── useful_resources.tex ├── module06 ├── Makefile ├── assets │ ├── 42ai_logo.pdf │ ├── Improve.png │ ├── Improve2.png │ ├── ex03_cost_1.png │ ├── ex03_cost_2.png │ ├── ex03_cost_3.png │ ├── ex03_interlude_cost.png │ ├── ex03_interlude_plot.png │ ├── ex04_J_vs_t1.png │ ├── ex04_score_vs_bluepills.png │ ├── ex05_price_vs_Tmeters_part1.png │ ├── ex05_price_vs_Tmeters_part2.png │ ├── ex05_price_vs_age_part1.png │ ├── ex05_price_vs_age_part2.png │ ├── ex05_price_vs_thrust_part1.png │ ├── ex05_price_vs_thrust_part2.png │ ├── ex06_sellprice_ne_lgd_vs_age.png │ ├── ex07_3Dplot_data.png │ ├── ex07_hypo_test_part1.png │ └── ex07_learning_rate.png ├── attachments │ ├── are_blue_pills_magics.csv │ └── are_blue_pills_magics_description.txt ├── en.subject.tex ├── exercises │ ├── en.ex00_interlude.tex │ ├── en.ex01_interlude.tex │ ├── en.ex02_interlude.tex │ ├── en.ex05_interlude.tex │ ├── m06ex00.tex │ ├── m06ex01.tex │ ├── m06ex02.tex │ ├── m06ex03.tex │ ├── m06ex04.tex │ ├── m06ex05.tex │ └── m06ex06.tex └── useful_resources.tex ├── module07 ├── Makefile ├── assets │ ├── 42ai_logo.pdf │ ├── Default.png │ ├── Evaluate.png │ ├── Improve.png │ ├── Predict.png │ ├── bad_pred_with_distance.png │ ├── ex07_price_vs_Tmeters_part1.png │ ├── ex07_price_vs_Tmeters_part2.png │ ├── ex07_price_vs_age_part1.png │ ├── ex07_price_vs_age_part2.png │ ├── ex07_price_vs_thrust_part1.png │ ├── ex07_price_vs_thrust_part2.png │ ├── ex12_data.png │ ├── ex12_plot.png │ ├── overfitt.png │ ├── overfitt_with_dots.png │ └── polynomial_straight_line.png ├── attachments │ ├── are_blue_pills_magics.csv │ ├── are_blue_pills_magics_description.txt │ ├── space_avocado.csv │ ├── spacecraft_data.csv │ └── spacecraft_data_description.txt ├── en.subject.tex ├── exercises │ ├── en.ex00_interlude.tex │ ├── en.ex01_interlude.tex │ ├── en.ex02_interlude.tex │ ├── en.ex03_interlude.tex │ ├── en.ex04_interlude.tex │ ├── en.ex07_interlude.tex │ ├── en.ex08_interlude.tex │ ├── en.ex09_interlude.tex │ ├── m07ex00.tex │ ├── m07ex01.tex │ ├── m07ex02.tex │ ├── m07ex03.tex │ ├── m07ex04.tex │ ├── m07ex05.tex │ ├── m07ex06.tex │ ├── m07ex07.tex │ ├── m07ex08.tex │ ├── m07ex09.tex │ └── m07ex10.tex └── useful_resources.tex ├── module08 ├── Makefile ├── assets │ ├── -log_1-x.png │ ├── -log_x.png │ ├── 42ai_logo.pdf │ ├── Default.png │ ├── Evaluate.png │ ├── Improve.png │ ├── Predict.png │ ├── figure1_3Dplot_dataset.png │ ├── log_loss.png │ └── sigmoid.png ├── attachments │ ├── data.csv │ ├── solar_system_census.csv │ └── solar_system_census_planets.csv ├── en.subject.tex ├── exercises │ ├── en.ex00_interlude.tex │ ├── en.ex01_interlude.tex │ ├── en.ex02_interlude.tex │ ├── en.ex03_interlude.tex │ ├── en.ex04_interlude.tex │ ├── en.ex05_interlude.tex │ ├── en.ex08_interlude.tex │ ├── m08ex00.tex │ ├── m08ex01.tex │ ├── m08ex02.tex │ ├── m08ex03.tex │ ├── m08ex04.tex │ ├── m08ex05.tex │ ├── m08ex06.tex │ ├── m08ex07.tex │ ├── m08ex08.tex │ └── m08ex09.tex └── useful_resources.tex ├── module09 ├── Makefile ├── assets │ ├── 42ai_logo.pdf │ ├── Evaluate.png │ └── Improve.png ├── attachments │ ├── solar_system_census.csv │ ├── solar_system_census_planets.csv │ └── space_avocado.csv ├── en.subject.tex ├── exercises │ ├── en.ex01_interlude.tex │ ├── en.ex04_interlude.tex │ ├── en.ex06_interlude.tex │ ├── en.ex08_interlude.tex │ ├── en.ex10_interlude.tex │ ├── m09ex00.tex │ ├── m09ex01.tex │ ├── m09ex02.tex │ ├── m09ex03.tex │ ├── m09ex04.tex │ ├── m09ex05.tex │ ├── m09ex06.tex │ ├── m09ex07.tex │ ├── m09ex08.tex │ ├── m09ex09.tex │ └── m09ex10.tex └── useful_resources.tex ├── resources ├── .gitignore ├── 42ai_bootcamps │ ├── en.acknowledgements.tex │ └── en.instructions.tex └── latex │ ├── 42-crea-en.cls │ ├── 42-crea-fr.cls │ ├── 42-en.cls │ ├── 42-es.cls │ ├── 42-fr.cls │ ├── 42-hy.cls │ ├── 42-it.cls │ ├── 42-ja.cls │ ├── 42-ko.cls │ ├── 42-nl.cls │ ├── 42-pt.cls │ ├── 42-ro.cls │ ├── 42_logo.pdf │ ├── Makefile.LaTeX │ ├── Makefile.template │ ├── atbeginend.sty │ ├── back.pdf │ ├── dark-42-fr.cls │ ├── dark-42_logo.pdf │ ├── dark-back.pdf │ ├── helphint.pdf │ ├── info.pdf │ ├── minted.sty │ ├── multirow.sty │ ├── redefinition-commands.tex │ ├── tocloft.sty │ └── warn.pdf └── version /.github/workflows/latex-builder.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Latex Builder 3 | 4 | on: [push] 5 | 6 | jobs: 7 | make-it: 8 | permissions: 9 | contents: write 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repo 13 | uses: actions/checkout@v4 14 | - name: Install LaTex utilities 15 | run: sudo apt update && sudo apt-get install -y texlive-full 16 | - name: Get version from file 17 | run: echo "::set-output name=version::$(cat version)" 18 | id: version 19 | - name: Build PDFs 20 | run: make 21 | release-it: 22 | permissions: 23 | contents: write 24 | runs-on: ubuntu-latest 25 | if: contains(github.ref, 'master') 26 | needs: [make-it] 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v4 30 | - name: Install LaTex utilities 31 | run: sudo apt update && sudo apt-get install -y texlive-full 32 | - name: Get version from file 33 | id: get_version 34 | run: echo "VERSION=version/$(cat version)" >> $GITHUB_ENV 35 | - name: Name release from version 36 | id: get_release 37 | run: echo "RELEASE=release_$(cat version)" >> $GITHUB_ENV 38 | - name: Build PDFs 39 | run: make 40 | - name: Upload PDFs archives 41 | uses: actions/upload-artifact@v4 42 | with: 43 | name: modules.pdf 44 | path: | 45 | build/module05.pdf 46 | build/module06.pdf 47 | build/module07.pdf 48 | build/module08.pdf 49 | build/module09.pdf 50 | - name: Bundle Release 51 | uses: softprops/action-gh-release@v2 52 | with: 53 | files: | 54 | build/module05.pdf 55 | build/module06.pdf 56 | build/module07.pdf 57 | build/module08.pdf 58 | build/module09.pdf 59 | name: ${{env.RELEASE}} 60 | tag_name: ${{env.VERSION}} 61 | make_latest: true 62 | body_path: CHANGELOG.md 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | answers/ 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | .DS_Store 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Latex compilation files 13 | *.fdb_latexmk 14 | *.fls 15 | *.pdf.version 16 | 17 | # VSCode 18 | .vscode/ 19 | 20 | # Distribution / packaging 21 | .Python 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | *.ipynb 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # SageMath parsed files 93 | *.sage.py 94 | 95 | # Environments 96 | .env 97 | .venv 98 | env/ 99 | venv/ 100 | ENV/ 101 | env.bak/ 102 | venv.bak/ 103 | .idea/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | .idea/ 118 | day00/.DS_Store 119 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "workbench.colorCustomizations": { 3 | "activityBar.activeBackground": "#fc8099", 4 | "activityBar.activeBorder": "#b4fda2", 5 | "activityBar.background": "#fc8099", 6 | "activityBar.foreground": "#15202b", 7 | "activityBar.inactiveForeground": "#15202b99", 8 | "activityBarBadge.background": "#b4fda2", 9 | "activityBarBadge.foreground": "#15202b", 10 | "sash.hoverBorder": "#fc8099", 11 | "statusBar.background": "#fb4e71", 12 | "statusBar.foreground": "#15202b", 13 | "statusBarItem.hoverBackground": "#fa1c49", 14 | "statusBarItem.remoteBackground": "#fb4e71", 15 | "statusBarItem.remoteForeground": "#15202b", 16 | "titleBar.activeBackground": "#fb4e71", 17 | "titleBar.activeForeground": "#15202b", 18 | "titleBar.inactiveBackground": "#fb4e7199", 19 | "titleBar.inactiveForeground": "#15202b99" 20 | }, 21 | "peacock.color": "#fb4e71" 22 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 5.1.1 2 | 3 | - Displaced recurring files (acknowledgements and instructions) to a new `/resources/42ai_bootcamps/` folder. 4 | - Added sat forms within the conclusion of each day. 5 | - Updated `instructions` to add a reference to Black package, and a cautionary word about using LLMs. 6 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at contact@42ai.fr. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM blang/latex:ubuntu 2 | 3 | RUN pwd 4 | 5 | COPY . /data/bootcamp_machine-learning 6 | 7 | WORKDIR /data/bootcamp_machine-learning 8 | 9 | RUN pwd 10 | 11 | RUN make \ 12 | && ls -la . \ 13 | && ls -la module05 14 | 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DIRECTORIES = module05 \ 2 | module06 \ 3 | module07 \ 4 | module08 \ 5 | module09 6 | 7 | TARGETS_DIRS = $(DIRECTORIES:%=%/en.subject.pdf) 8 | 9 | TARGETS = $(DIRECTORIES:%=%.pdf) 10 | 11 | all: clean dirs 12 | 13 | %.pdf: 14 | @$(MAKE) -C `dirname $@` 15 | @$(MAKE) clean -C `dirname $@` 16 | cp $@ build/`dirname $@`.pdf 17 | 18 | dirs: $(TARGETS_DIRS) 19 | 20 | build_pdfs: 21 | sudo docker run -v "$(shell pwd)/build:/data/bootcamp_machine-learning/build" -i latex_build make 22 | 23 | build_builder: 24 | sudo docker build -t latex_build . 25 | 26 | clean: 27 | rm -rf $(TARGETS) $(TARGETS_DIRS) 28 | 29 | debug: 30 | echo $(TARGETS_DIRS) 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 42 AI Logo 3 |

4 | 5 |

6 | Bootcamp Machine Learning 7 |

8 |

9 | One week to learn the basics in Machine Learning! :robot: 10 |

11 |
12 | 13 | ### Table of Contents 14 | 15 | - [Download](#download) 16 | - [Curriculum](#curriculum) 17 | - [Module05 - Stepping Into Machine Learning](#module05---stepping-into-machine-learning) 18 | - [Module06 - Univariate Linear Regression](#module06---univariate-linear-regression) 19 | - [Module07 - Multivariate Linear Regression](#module07---multivariate-linear-regression) 20 | - [Module08 - Logistic Regression](#module08---logistic-regression) 21 | - [Module09 - Regularization](#module09---regularization) 22 | - [Acknowledgements](#acknowledgements) 23 | - [Contributors](#contributors) 24 | - [Beta-testers](#beta-testers) 25 |
26 | 27 | This project is a Machine Learning bootcamp created by [42 AI](http://www.42ai.fr). 28 | 29 | As notions seen during this bootcamp can be complex, we very strongly advise students to have previously done the following bootcamp: 30 | 31 | - [Python](https://github.com/42-AI/bootcamp_python) 32 | 33 | 42 Artificial Intelligence is a student organization of the Paris campus of the school 42. Our purpose is to foster discussion, learning, and interest in the field of artificial intelligence, by organizing various activities such as lectures and workshops. 34 |
35 | 36 | ## Download 37 | 38 | The pdf files of each module can be downloaded from our realease page: 39 | [https://github.com/42-AI/bootcamp_machine-learning/releases](https://github.com/42-AI/bootcamp_machine-learning/releases) 40 | 41 | ## Curriculum 42 | 43 | ### Module05 - Stepping Into Machine Learning 44 | 45 | **Get started with some linear algebra and statistics** 46 | 47 | > Sum, mean, variance, standard deviation, vectors and matrices operations. 48 | > Hypothesis, model, regression, loss function. 49 | 50 | ### Module06 - Univariate Linear Regression 51 | 52 | **Implement a method to improve your model's performance: **gradient descent**, and discover the notion of normalization** 53 | 54 | > Gradient descent, linear regression, normalization. 55 | 56 | ### Module07 - Multivariate Linear Regression 57 | 58 | **Extend the linear regression to handle more than one features, build polynomial models and detect overfitting** 59 | 60 | > Multivariate linear hypothesis, multivariate linear gradient descent, polynomial models. 61 | > Training and test sets, overfitting. 62 | 63 | ### Module08 - Logistic Regression 64 | 65 | **Discover your first classification algorithm: logistic regression!** 66 | 67 | > Logistic hypothesis, logistic gradient descent, logistic regression, multiclass classification. 68 | > Accuracy, precision, recall, F1-score, confusion matrix. 69 | 70 | ### Module09 - Regularization 71 | 72 | **Fight overfitting!** 73 | 74 | > Regularization, overfitting. Regularized loss function, regularized gradient descent. 75 | > Regularized linear regression. Regularized logistic regression. 76 | 77 |
78 | 79 | ## Acknowledgements 80 | 81 | ### Contributors 82 | 83 | - Amric Trudel (amric@42ai.fr) 84 | - Maxime Choulika (maxime@42ai.fr) 85 | - Pierre Peigné (ppeigne@student.42.fr) 86 | - Matthieu David (mdavid@student.42.fr) 87 | - Benjamin Carlier (bcarlier@student.42.fr) 88 | - Pablo Clement (pclement@student.42.fr) 89 | - Amir Mahla (amahla@42ai.fr) 90 | - Mathieu Perez (mathieu.perez@42ai.fr) 91 | 92 | ### Beta-testers 93 | 94 | - Richard Blanc (riblanc@student.42.fr) 95 | - Solveig Gaydon Ohl (sgaydon-@student.42.fr) 96 | - Quentin Feuillade--Montixi (qfeuilla@student.42.fr) 97 | -------------------------------------------------------------------------------- /assets/42ai_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/assets/42ai_logo.png -------------------------------------------------------------------------------- /build/module05.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module05.pdf -------------------------------------------------------------------------------- /build/module06.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module06.pdf -------------------------------------------------------------------------------- /build/module07.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module07.pdf -------------------------------------------------------------------------------- /build/module08.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module08.pdf -------------------------------------------------------------------------------- /build/module09.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module09.pdf -------------------------------------------------------------------------------- /module05/Makefile: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = en.subject.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = exercises/en.ex02_interlude.tex \ 6 | exercises/en.ex03_interlude.tex \ 7 | exercises/en.ex06_interlude.tex \ 8 | exercises/en.ex08_interlude.tex \ 9 | ../resources/42ai_bootcamps/en.instructions.tex \ 10 | ../resources/42ai_bootcamps/en.acknowledgements.tex \ 11 | ../resources/latex/redefinition-commands.tex \ 12 | useful_resources.tex 13 | 14 | # Relative path to the LaTeX documentclass setup files 15 | # Adapt as needed 16 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/ 17 | # RELPATH for github actions: 18 | RELPATH = $(shell dirname `pwd`)/resources/latex/ 19 | 20 | # You should not touch this either 21 | include $(RELPATH)/Makefile.LaTeX 22 | -------------------------------------------------------------------------------- /module05/assets/42ai_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/42ai_logo.pdf -------------------------------------------------------------------------------- /module05/assets/Default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Default.png -------------------------------------------------------------------------------- /module05/assets/Evaluate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Evaluate.png -------------------------------------------------------------------------------- /module05/assets/Improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Improve.png -------------------------------------------------------------------------------- /module05/assets/Predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Predict.png -------------------------------------------------------------------------------- /module05/assets/bad_pred_with_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/bad_pred_with_distance.png -------------------------------------------------------------------------------- /module05/assets/bad_prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/bad_prediction.png -------------------------------------------------------------------------------- /module05/assets/plot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot1.png -------------------------------------------------------------------------------- /module05/assets/plot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot2.png -------------------------------------------------------------------------------- /module05/assets/plot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot3.png -------------------------------------------------------------------------------- /module05/assets/plotcost1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost1.png -------------------------------------------------------------------------------- /module05/assets/plotcost2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost2.png -------------------------------------------------------------------------------- /module05/assets/plotcost3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost3.png -------------------------------------------------------------------------------- /module05/assets/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/sigmoid.png -------------------------------------------------------------------------------- /module05/exercises/en.ex03_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | \section*{Interlude - A Simple Linear Algebra Trick} 9 | 10 | As you know, vectors and matrices can be multiplied to perform linear combinations.\\ 11 | Let's do a little linear algebra trick to optimize our calculation and use matrix multiplication. 12 | If we add a column full of $1$'s to our vector of examples $x$, we can create the following matrix: 13 | 14 | $$ 15 | X' = \begin{bmatrix} 1 & x^{(1)} \\ \vdots & \vdots \\ 1 & x^{(m)}\end{bmatrix} 16 | $$ 17 | 18 | We can then rewrite our hypothesis as: 19 | 20 | $$ 21 | \hat{y}^{(i)} = \theta \cdot x'^{(i)} = \begin{bmatrix}\theta_0 \\ \theta_1 \end{bmatrix} \cdot \begin{bmatrix} 1 & x^{(i)} \end{bmatrix} = \theta_0 + \theta_1 x^{(i)} 22 | $$ 23 | 24 | Therefore, the calculation of each $\hat{y}^{(i)}$can be done with only one vector multiplication. 25 | 26 | But we can even go further, by calculating the whole $\hat{y}$ vector in one operation: 27 | 28 | $$ 29 | \hat{y} = X' \cdot \theta = \begin{bmatrix} 1 & x^{(1)} \\ \vdots & \vdots \\ 1 & x^{(m)}\end{bmatrix}\cdot\begin{bmatrix}\theta_0 \\ \theta_1 \end{bmatrix} = \begin{bmatrix} \theta_0 + \theta_1 x^{(1)} \\ \vdots \\ \theta_0 + \theta_1 x^{(m)} \end{bmatrix} 30 | $$ 31 | 32 | We can now get to the same result as in the previous exercise with just a single multiplication between our brand new $X'$ matrix and the $\theta$ vector! 33 | 34 | \subsection*{A Note on Notation} 35 | In further Interludes, we will use the following convention: 36 | \begin{itemize} 37 | \item Capital letters represent matrices (e.g.: $X$) 38 | \item Lower-case letters represent vectors and scalars (e.g.: $x^{(i)}$, $y$) 39 | \end{itemize} -------------------------------------------------------------------------------- /module05/exercises/en.ex06_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | \section*{Interlude - Evaluate} 9 | 10 | \begin{figure}[h!] 11 | \centering 12 | \includegraphics[scale=0.25]{assets/Evaluate.png} 13 | % \caption{cycle evaluate} 14 | \end{figure} 15 | 16 | \subsection*{Introducing the loss function} 17 | 18 | How good is our model? 19 | It is hard to say just by simply looking at the plots! 20 | We can clearly observe that certain regression lines seem to fit the data better than others, but it would be convenient to find a way to measure it. 21 | 22 | \begin{figure}[h!] 23 | \centering 24 | \includegraphics[scale=0.55]{assets/bad_prediction.png} 25 | \caption{bad prediction} 26 | \end{figure} 27 | 28 | To evaluate our model, we are going to use a \textbf{metric} called \textbf{the loss function} (sometimes called \textbf{cost function}).\\ 29 | \newline 30 | The loss function tells us how bad our model is performing, how much it \textit{costs} us to use it, how much information we \textit{lose} when we use it. 31 | If the model is good, we won't lose that much; if it's terrible instead, we will have a high loss! 32 | 33 | The metric you choose will deeply impact the evaluation (and therefore also the training) of your model. 34 | 35 | A frequent way to evaluate the performance of a regression model is to measure the distance between each predicted value ($\hat{y}^{(i)}$) and the real value it tries to predict (${y}^{(i)}$). The distances are then squared, and averaged to get one single metric, denoted $J$: 36 | 37 | $$ 38 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2 39 | $$ 40 | 41 | The smaller, the better! 42 | 43 | \begin{figure}[h!] 44 | \centering 45 | \includegraphics[scale=0.55]{assets/bad_pred_with_distance.png} 46 | \caption{bad prediction with distance} 47 | \end{figure} 48 | -------------------------------------------------------------------------------- /module05/exercises/en.ex08_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | \section*{Interlude - Fifty Shades of Linear Algebra} 9 | 10 | In the last exercise, we implemented the \textbf{loss function} in two subfunctions. 11 | It worked, but it's not very pretty. 12 | What if we could do it all in one step, with linear algebra? 13 | 14 | As we did with the hypothesis, we can use a vectorized equation to improve the calculations of the loss function. 15 | 16 | So now let's take a look at how squaring and averaging can be performed (more or less) in a single matrix multiplication! 17 | 18 | $$ 19 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2 20 | $$ 21 | $$ 22 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}[(\hat{y}^{(i)} - y^{(i)}) (\hat{y}^{(i)} - y^{(i)})] 23 | $$ 24 | 25 | Now, if we apply the definition of the dot product: 26 | 27 | $$ 28 | J(\theta) = \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y) 29 | $$ -------------------------------------------------------------------------------- /module05/exercises/m05ex02.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 02} 2 | \input{exercises/en.ex02_interlude.tex} 3 | \newpage 4 | \extitle{Simple Prediction} 5 | \turnindir{ex02} 6 | \exnumber{02} 7 | \exfiles{prediction.py} 8 | \exforbidden{any functions which performs prediction} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and manipulate the notion of hypothesis in machine learning. 15 | 16 | You must implement the following formula as a function: 17 | $$ 18 | \begin{matrix} 19 | \hat{y}^{(i)} = \theta_0 + \theta_1 x^{(i)} & &\text{ for i = 1, ..., m} 20 | \end{matrix} 21 | $$ 22 | 23 | Where: 24 | \begin{itemize} 25 | \item $x$ is a vector of dimension $m$, the vector of examples/features (without the $y$ values) 26 | \item $\hat{y}$ is a vector of dimension m * 1, the vector of predicted values 27 | \item $\theta$ is a vector of dimension 2 * 1, the vector of parameters 28 | \item $y^{(i)}$ is the $i^{th}$ component of vector $y$ 29 | \item $x^{(i)}$ is the $i^{th}$ component of vector $x$ 30 | \end{itemize} 31 | 32 | % ================================= % 33 | \section*{Instructions} 34 | % --------------------------------- % 35 | In the prediction.py file, write the following function as per the instructions given below: 36 | 37 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 38 | def simple_predict(x, theta): 39 | """Computes the vector of prediction y_hat from two non-empty numpy.ndarray. 40 | Args: 41 | x: has to be an numpy.ndarray, a one-dimensional array of size m. 42 | theta: has to be an numpy.ndarray, a one-dimensional array of size 2. 43 | Returns: 44 | y_hat as a numpy.ndarray, a one-dimensional array of size m. 45 | None if x or theta are empty numpy.ndarray. 46 | None if x or theta dimensions are not appropriate. 47 | Raises: 48 | This function should not raise any Exception. 49 | """ 50 | ... Your code ... 51 | \end{minted} 52 | 53 | % ================================= % 54 | \section*{Examples} 55 | % --------------------------------- % 56 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 57 | import numpy as np 58 | x = np.arange(1,6) 59 | 60 | # Example 1: 61 | theta1 = np.array([5, 0]) 62 | simple_predict(x, theta1) 63 | # Ouput: 64 | array([5., 5., 5., 5., 5.]) 65 | # Do you understand why y_hat contains only 5s here? 66 | 67 | 68 | # Example 2: 69 | theta2 = np.array([0, 1]) 70 | simple_predict(x, theta2) 71 | # Output: 72 | array([1., 2., 3., 4., 5.]) 73 | # Do you understand why y_hat == x here? 74 | 75 | 76 | # Example 3: 77 | theta3 = np.array([5, 3]) 78 | simple_predict(x, theta3) 79 | # Output: 80 | array([ 8., 11., 14., 17., 20.]) 81 | 82 | 83 | # Example 4: 84 | theta4 = np.array([-3, 1]) 85 | simple_predict(x, theta4) 86 | # Output: 87 | array([-2., -1., 0., 1., 2.]) 88 | \end{minted} -------------------------------------------------------------------------------- /module05/exercises/m05ex03.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 03} 2 | \input{exercises/en.ex03_interlude.tex} 3 | \newpage 4 | \extitle{Add Intercept} 5 | \turnindir{ex03} 6 | \exnumber{03} 7 | \exfiles{tools.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | 15 | Understand and manipulate the notion of hypothesis in machine learning. 16 | \\ 17 | You must implement a function which adds an extra column of $1$'s on the left side of a given vector or matrix. 18 | 19 | % ================================= % 20 | \section*{Instructions} 21 | % --------------------------------- % 22 | In the tools.py file create the following function as per the instructions given below: 23 | 24 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 25 | def add_intercept(x): 26 | """Adds a column of 1's to the non-empty numpy.array x. 27 | Args: 28 | x: has to be a numpy.array. x can be a one-dimensional (m * 1) or two-dimensional (m * n) array. 29 | Returns: 30 | X, a numpy.array of dimension m * (n + 1). 31 | None if x is not a numpy.array. 32 | None if x is an empty numpy.array. 33 | Raises: 34 | This function should not raise any Exception. 35 | """ 36 | ... Your code ... 37 | \end{minted} 38 | 39 | % ================================= % 40 | \section*{Examples} 41 | % --------------------------------- % 42 | 43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 44 | import numpy as np 45 | 46 | # Example 1: 47 | x = np.arange(1,6) 48 | add_intercept(x) 49 | # Output: 50 | array([[1., 1.], 51 | [1., 2.], 52 | [1., 3.], 53 | [1., 4.], 54 | [1., 5.]]) 55 | 56 | 57 | # Example 2: 58 | y = np.arange(1,10).reshape((3,3)) 59 | add_intercept(y) 60 | # Output: 61 | array([[1., 1., 2., 3.], 62 | [1., 4., 5., 6.], 63 | [1., 7., 8., 9.]]) 64 | \end{minted} 65 | 66 | -------------------------------------------------------------------------------- /module05/exercises/m05ex04.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 04} 2 | \extitle{Prediction} 3 | \turnindir{ex04} 4 | \exnumber{04} 5 | \exfiles{prediction.py} 6 | \exforbidden{None} 7 | \makeheaderfilesforbidden 8 | 9 | 10 | % ================================= % 11 | \section*{Objective} 12 | % --------------------------------- % 13 | Understand and manipulate the notion of hypothesis in machine learning. 14 | 15 | You must implement the following formula as a function: 16 | 17 | $$ 18 | \begin{matrix} 19 | \hat{y}^{(i)} = \theta_0 + \theta_1 x^{(i)} & &\text{ for i = 1, ..., m} 20 | \end{matrix} 21 | $$ 22 | 23 | Where: 24 | \begin{itemize} 25 | \item $\hat{y}^{(i)}$ is the $i^{th}$ component of vector $\hat{y}$ 26 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 27 | \item $\theta$ is a vector of dimension $2 \times 1$, the vector of parameters 28 | \item $x^{(i)}$ is the $i^{th}$ component of vector $x$ 29 | \item $x$ is a vector of dimension $m$, the vector of examples 30 | \end{itemize} 31 | 32 | But this time you have to do it with the linear algebra trick! 33 | 34 | $$ 35 | \hat{y} = X' \cdot \theta = 36 | \begin{bmatrix} 37 | 1 & x^{(1)} \\ 38 | \vdots & \vdots \\ 39 | 1 & x^{(m)} 40 | \end{bmatrix} 41 | \cdot 42 | \begin{bmatrix} 43 | \theta_0 \\ 44 | \theta_1 45 | \end{bmatrix} 46 | = \begin{bmatrix} 47 | \theta_0 + \theta_1x^{(1)} \\ 48 | \vdots \\ 49 | \theta_0 + \theta_1x^{(m)} 50 | \end{bmatrix} 51 | $$ 52 | 53 | \warn{ 54 | \begin{itemize} 55 | \item the argument $x$ is an $m$ vector 56 | \item $\theta$ is a $2 \times 1$ vector. 57 | \end{itemize} 58 | } 59 | 60 | You have to transform $x$ into $X'$ to fit the dimension of $\theta$! 61 | 62 | 63 | % ================================= % 64 | \section*{Instructions} 65 | % --------------------------------- % 66 | In the prediction.py file create the following function as per the instructions given below: 67 | \newline 68 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 69 | def predict_(x, theta): 70 | """Computes the vector of prediction y_hat from two non-empty numpy.array. 71 | Args: 72 | x: has to be an numpy.array, a one-dimensional array of size m. 73 | theta: has to be an numpy.array, a two-dimensional array of shape 2 * 1. 74 | Returns: 75 | y_hat as a numpy.array, a two-dimensional array of shape m * 1. 76 | None if x and/or theta are not numpy.array. 77 | None if x or theta are empty numpy.array. 78 | None if x or theta dimensions are not appropriate. 79 | Raises: 80 | This function should not raise any Exceptions. 81 | """ 82 | ... Your code ... 83 | \end{minted} 84 | 85 | \section*{Examples} 86 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 87 | import numpy as np 88 | x = np.arange(1,6) 89 | 90 | # Example 1: 91 | theta1 = np.array([[5], [0]]) 92 | predict_(x, theta1) 93 | # Ouput: 94 | array([[5.], [5.], [5.], [5.], [5.]]) 95 | # Do you remember why y_hat contains only 5's here? 96 | 97 | # Example 2: 98 | theta2 = np.array([[0], [1]]) 99 | predict_(x, theta2) 100 | # Output: 101 | array([[1.], [2.], [3.], [4.], [5.]]) 102 | # Do you remember why y_hat == x here? 103 | 104 | # Example 3: 105 | theta3 = np.array([[5], [3]]) 106 | predict_(x, theta3) 107 | # Output: 108 | array([[ 8.], [11.], [14.], [17.], [20.]]) 109 | 110 | 111 | # Example 4: 112 | theta4 = np.array([[-3], [1]]) 113 | predict_(x, theta4) 114 | # Output: 115 | array([[-2.], [-1.], [ 0.], [ 1.], [ 2.]]) 116 | \end{minted} 117 | -------------------------------------------------------------------------------- /module05/exercises/m05ex05.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 05} 2 | \extitle{Let’s Make Nice Plots} 3 | \turnindir{ex05} 4 | \exnumber{05} 5 | \exfiles{plot.py} 6 | \exforbidden{None} 7 | \makeheaderfilesforbidden 8 | 9 | \info{ 10 | For your information, the task we are performing here is called \textbf{regression}. 11 | It means that we are trying to predict a continuous numerical attribute for all examples (like a price, for instance). 12 | Later in the bootcamp, you will see that we can predict other things such as categories. 13 | } 14 | 15 | % ================================= % 16 | \section*{Objective} 17 | % --------------------------------- % 18 | You must implement a function to plot the data and the prediction line (or regression line).\\ 19 | \newline 20 | You will plot the data points (with their x and y values), and the prediction line that represents your hypothesis ($h_{\theta}$). 21 | \newpage 22 | % ================================= % 23 | \section*{Instructions} 24 | % --------------------------------- % 25 | In the plot.py file, create the following function as per the instructions given below: 26 | 27 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 28 | def plot(x, y, theta): 29 | """Plot the data and prediction line from three non-empty numpy.array. 30 | Args: 31 | x: has to be an numpy.array, a one-dimensional array of size m. 32 | y: has to be an numpy.array, a one-dimensional array of size m. 33 | theta: has to be an numpy.array, a two-dimensional array of shape 2 * 1. 34 | Returns: 35 | Nothing. 36 | Raises: 37 | This function should not raise any Exceptions. 38 | """ 39 | ... Your code ... 40 | \end{minted} 41 | 42 | % ================================= % 43 | \section*{Examples} 44 | % --------------------------------- % 45 | 46 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 47 | import numpy as np 48 | x = np.arange(1,6) 49 | y = np.array([3.74013816, 3.61473236, 4.57655287, 4.66793434, 5.95585554]) 50 | 51 | # Example 1: 52 | theta1 = np.array([[4.5],[-0.2]]) 53 | plot(x, y, theta1) 54 | # Output: 55 | \end{minted} 56 | 57 | \begin{figure}[H] 58 | \centering 59 | \includegraphics[scale=0.6]{assets/plot1.png} 60 | \end{figure} 61 | 62 | \newpage 63 | 64 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 65 | # Example 2: 66 | theta2 = np.array([[-1.5],[2]]) 67 | plot(x, y, theta2) 68 | # Output: 69 | \end{minted} 70 | 71 | \begin{figure}[H] 72 | \centering 73 | \includegraphics[scale=0.6]{assets/plot2.png} 74 | \caption{Example 2} 75 | \end{figure} 76 | 77 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 78 | # Example 3: 79 | theta3 = np.array([[3],[0.3]]) 80 | plot(x, y, theta3) 81 | # Output: 82 | \end{minted} 83 | 84 | \begin{figure}[H] 85 | \centering 86 | \includegraphics[scale=0.6]{assets/plot3.png} 87 | \caption{Example 3} 88 | \end{figure} 89 | 90 | -------------------------------------------------------------------------------- /module05/exercises/m05ex07.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 07} 2 | \extitle{Vectorized loss function} 3 | \turnindir{ex07} 4 | \exnumber{07} 5 | \exfiles{vec\_loss.py} 6 | \exforbidden{None} 7 | \makeheaderfilesforbidden 8 | 9 | % ================================= % 10 | \section*{Objective} 11 | % --------------------------------- % 12 | Understand and experiment with the \textbf{loss function} in machine learning. 13 | 14 | You must implement the following formula as a function: 15 | $$ 16 | \begin{matrix} 17 | J(\theta) & = & \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y) 18 | \end{matrix} 19 | $$ 20 | 21 | Where: 22 | \begin{itemize} 23 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 24 | \item $y$ is a vector of dimension $m$, the vector of expected values 25 | \end{itemize} 26 | 27 | \newpage 28 | 29 | % ================================= % 30 | \section*{Instructions} 31 | % --------------------------------- % 32 | In the \texttt{vec\_loss.py} file, create the following function as per the instructions given below: 33 | 34 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 35 | def loss_(y, y_hat): 36 | """Computes the half mean-squared-error of two non-empty numpy.arrays, without any for loop. 37 | The two arrays must have the same dimensions. 38 | Args: 39 | y: has to be an numpy.array, a one-dimensional array of size m. 40 | y_hat: has to be an numpy.array, a one-dimensional array of size m. 41 | Returns: 42 | The half mean-squared-error of the two vectors as a float. 43 | None if y or y_hat are empty numpy.array. 44 | None if y and y_hat does not share the same dimensions. 45 | Raises: 46 | This function should not raise any Exceptions. 47 | """ 48 | ... Your code ... 49 | \end{minted} 50 | 51 | 52 | % ================================= % 53 | \section*{Examples} 54 | % --------------------------------- % 55 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 56 | import numpy as np 57 | X = np.array([0, 15, -9, 7, 12, 3, -21]) 58 | Y = np.array([2, 14, -13, 5, 12, 4, -19]) 59 | 60 | # Example 1: 61 | loss_(X, Y) 62 | # Output: 63 | 2.142857142857143 64 | 65 | # Example 2: 66 | loss_(X, X) 67 | # Output: 68 | 0.0 69 | \end{minted} -------------------------------------------------------------------------------- /module05/exercises/m05ex08.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 08} 2 | \input{exercises/en.ex08_interlude.tex} 3 | \newpage 4 | \extitle{Lets Make Nice Plots Again} 5 | \turnindir{ex08} 6 | \exnumber{08} 7 | \exfiles{plot.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | % ================================= % 13 | \section*{Objective} 14 | % --------------------------------- % 15 | You must implement a function which plots the data, the prediction line, and the loss.\\ 16 | \newline 17 | You will plot the $x$ and $y$ coordinates of all data points as well as the prediction line generated by your theta parameters.\\ 18 | \newline 19 | Your function must also display the overall loss ($J$) in the title, and draw small lines marking the distance between each data point and its predicted value. 20 | 21 | % ================================= % 22 | \section*{Instructions} 23 | % --------------------------------- % 24 | In the plot.py file create the following function as per the instructions given below:\\ 25 | \newline 26 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 27 | def plot_with_loss(x, y, theta): 28 | """Plot the data and prediction line from three non-empty numpy.ndarray. 29 | Args: 30 | x: has to be an numpy.ndarray, one-dimensional array of size m. 31 | y: has to be an numpy.ndarray, one-dimensional array of size m. 32 | theta: has to be an numpy.ndarray, one-dimensional array of size 2. 33 | Returns: 34 | Nothing. 35 | Raises: 36 | This function should not raise any Exception. 37 | """ 38 | ... Your code ... 39 | \end{minted} 40 | 41 | \newpage 42 | 43 | % ================================= % 44 | \section*{Examples} 45 | % --------------------------------- % 46 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 47 | import numpy as np 48 | x = np.arange(1,6) 49 | y = np.array([11.52434424, 10.62589482, 13.14755699, 18.60682298, 14.14329568]) 50 | 51 | # Example 1: 52 | theta1= np.array([18,-1]) 53 | plot_with_loss(x, y, theta1) 54 | # Output: 55 | \end{minted} 56 | 57 | \begin{figure}[H] 58 | \centering 59 | \includegraphics[scale=0.65]{assets/plotcost1.png} 60 | \caption{Example 1} 61 | \end{figure} 62 | 63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 64 | # Example 2: 65 | theta2 = np.array([14, 0]) 66 | plot_with_loss(x, y, theta2) 67 | # Output: 68 | \end{minted} 69 | 70 | \begin{figure}[H] 71 | \centering 72 | \includegraphics[scale=0.65]{assets/plotcost2.png} 73 | \caption{Example 2} 74 | \end{figure} 75 | 76 | \newpage 77 | 78 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 79 | # Example 3: 80 | theta3 = np.array([12, 0.8]) 81 | plot_with_loss(x, y, theta3) 82 | # Output: 83 | \end{minted} 84 | 85 | \begin{figure}[H] 86 | \centering 87 | \includegraphics[scale=0.65]{assets/plotcost3.png} 88 | \caption{Example 3} 89 | \end{figure} -------------------------------------------------------------------------------- /module05/useful_resources.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Section useful resources % 4 | % for ML Modules % 5 | % % 6 | %******************************************************************************% 7 | 8 | 9 | \chapter*{Notions covered and learning resources} 10 | 11 | \section*{What notions will be covered by this module?} 12 | 13 | \begin{itemize} 14 | \item Sum 15 | \item Mean 16 | \item Variance 17 | \item Standard deviation 18 | \item Operations on vectors and matrices 19 | \item Hypothesis 20 | \item Regression 21 | \item Loss function 22 | \end{itemize} 23 | 24 | \section*{Learning resources} 25 | 26 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\ 27 | \newline 28 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\ 29 | \newline 30 | The following sections of the course are particularly relevant to today's exercises: 31 | 32 | \subsection*{Week 1: Introduction to Machine Learning} 33 | 34 | \subsubsection*{Supervised vs. Unsupervised Machine Learning} 35 | \begin{itemize} 36 | \item What is Machine Learning? 37 | \item Supervised Learning Part 1 38 | \item Supervised Learning Part 2 39 | \item Unsupervised Learning Part 1 40 | \item Unsupervised Learning Part 2 41 | \end{itemize} 42 | 43 | \subsubsection*{Regression Model} 44 | \begin{itemize} 45 | \item Regression Model Part 1 46 | \item Regression Model Part 2 47 | \item Cost Function Formula 48 | \item Cost Function Intuition 49 | \item Visualizing the cost function 50 | \item Visualizing Example 51 | \item \textit{Keep the rest for tommorow ;-)} 52 | \end{itemize} 53 | 54 | \emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist} from 3 to 14 includes} 55 | 56 | \subsubsection*{Linear Algebra reminders} 57 | \begin{itemize} 58 | \item \href{https://www.youtube.com/watch?v=XMB__E658fQ}{Matrices and Vectors} 59 | \item \href{https://www.youtube.com/watch?v=k1JGJhUGmBE}{Addition and Scalar Multiplication} 60 | \item \href{https://www.youtube.com/watch?v=VIfykceJoZI}{Matrix - Vector Multiplication} 61 | \item \href{https://www.youtube.com/watch?v=JHZKyt0m1kc}{Matrix - Matrix Multiplication} 62 | \item \href{https://www.youtube.com/watch?v=wqM7O_ZUtCc}{Matrix - Multiplication Properties} 63 | \item \href{https://www.youtube.com/watch?v=IUf8HDyUeY0}{Inverse and Transpose} 64 | \end{itemize} 65 | -------------------------------------------------------------------------------- /module06/Makefile: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = en.subject.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = exercises/en.ex00_interlude.tex \ 6 | exercises/en.ex01_interlude.tex \ 7 | exercises/en.ex02_interlude.tex \ 8 | exercises/en.ex05_interlude.tex \ 9 | ../resources/42ai_bootcamps/en.instructions.tex \ 10 | ../resources/42ai_bootcamps/en.acknowledgements.tex \ 11 | ../resources/latex/redefinition-commands.tex \ 12 | en.subject.tex \ 13 | useful_resources.tex 14 | 15 | # Relative path to the LaTeX documentclass setup files 16 | # Adapt as needed 17 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/ 18 | # RELPATH for github actions: 19 | RELPATH = $(shell dirname `pwd`)/resources/latex/ 20 | 21 | 22 | # You should not touch this either 23 | include $(RELPATH)/Makefile.LaTeX 24 | -------------------------------------------------------------------------------- /module06/assets/42ai_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/42ai_logo.pdf -------------------------------------------------------------------------------- /module06/assets/Improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/Improve.png -------------------------------------------------------------------------------- /module06/assets/Improve2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/Improve2.png -------------------------------------------------------------------------------- /module06/assets/ex03_cost_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_1.png -------------------------------------------------------------------------------- /module06/assets/ex03_cost_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_2.png -------------------------------------------------------------------------------- /module06/assets/ex03_cost_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_3.png -------------------------------------------------------------------------------- /module06/assets/ex03_interlude_cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_interlude_cost.png -------------------------------------------------------------------------------- /module06/assets/ex03_interlude_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_interlude_plot.png -------------------------------------------------------------------------------- /module06/assets/ex04_J_vs_t1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex04_J_vs_t1.png -------------------------------------------------------------------------------- /module06/assets/ex04_score_vs_bluepills.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex04_score_vs_bluepills.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_Tmeters_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_Tmeters_part1.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_Tmeters_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_Tmeters_part2.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_age_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_age_part1.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_age_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_age_part2.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_thrust_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_thrust_part1.png -------------------------------------------------------------------------------- /module06/assets/ex05_price_vs_thrust_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_thrust_part2.png -------------------------------------------------------------------------------- /module06/assets/ex06_sellprice_ne_lgd_vs_age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex06_sellprice_ne_lgd_vs_age.png -------------------------------------------------------------------------------- /module06/assets/ex07_3Dplot_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_3Dplot_data.png -------------------------------------------------------------------------------- /module06/assets/ex07_hypo_test_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_hypo_test_part1.png -------------------------------------------------------------------------------- /module06/assets/ex07_learning_rate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_learning_rate.png -------------------------------------------------------------------------------- /module06/attachments/are_blue_pills_magics.csv: -------------------------------------------------------------------------------- 1 | "Patient","Micrograms","Score" 2 | 1, 1.17, 78.93 3 | 2, 2.97, 58.20 4 | 3, 3.26, 67.47 5 | 4, 4.69, 37.47 6 | 5, 5.83, 45.65 7 | 6, 6.00, 32.92 8 | 7, 6.41, 29.97 9 | -------------------------------------------------------------------------------- /module06/attachments/are_blue_pills_magics_description.txt: -------------------------------------------------------------------------------- 1 | #Patient: number of the patient. 2 | 3 | #Micrograms: quantity of blue pills patient has taken (in micrograms). 4 | 5 | #Score: Standardized score at the spacecraft driving test. 6 | -------------------------------------------------------------------------------- /module06/exercises/en.ex02_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude - Gradient Descent} 10 | % ------------------------------- % 11 | 12 | So far we've calculated the \textit{gradient}, 13 | which indicates whether and by how much we should increase or decrease $\theta_0$ and $\theta_1$ in order to reduce the loss.\\ 14 | \newline 15 | What we have to do next is to update the theta parameters accordingly, 16 | step by step, until we reach the minimum. 17 | This iterative process, called \textbf{Gradient Descent}, 18 | will progressively improve the performance of your regression model on the training data.\\ 19 | \newline 20 | The gradient descent \textbf{algorithm} can be summed up to this: 21 | for a certain number of cycles, at each step, 22 | both $\theta$ parameters are slightly moved in the opposite directions than what the gradient indicates.\\ 23 | \newline 24 | The algorithm can be expressed in pseudocode as the following: 25 | $$ 26 | \begin{matrix} 27 | &\text{repeat until convergence:} & \{\\ 28 | & \text{compute } \nabla{(J)} \\ 29 | & \theta_0 := \theta_0 - \alpha \nabla(J)_0 \\ 30 | & \theta_1 := \theta_1 - \alpha \nabla(J)_1\\ 31 | \} \hspace{0.5cm} 32 | \end{matrix} 33 | $$ 34 | A few remarks on this algorithm: 35 | \begin{itemize} 36 | \item If you directly subtracted the gradient from $\theta$, 37 | your steps would be too big and you would quickly overshoot past the minimum. 38 | That's why we use $\alpha$ (alpha), called the \textit{learning rate}. 39 | It's a small float number (usually between 0 and 1) that decreases the magnitude of each update. 40 | \item The pseudocode says "repeat until convergence", 41 | but in your implementation, you will not actually check for convergence at each iteration. 42 | You will instead set a number of cycles that is sufficient for your gradient descent to converge. 43 | \item When training a linear regression model on a new dataset, 44 | you will have to choose appropriate alpha and the number of cycles through trial and error. 45 | \end{itemize} -------------------------------------------------------------------------------- /module06/exercises/en.ex05_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | \section*{Interlude - Normalization} 9 | 10 | The values inside the $x$ vector can vary quite a lot in magnitude, 11 | depending on the type of data you are working with.\\ 12 | \\ 13 | For example, if your dataset contains distances between planets in km, the numbers will be huge. 14 | On the other hand, if you are working with planet masses expressed as a fraction of the solar system's total mass, the numbers will be very small (between 0 and 1). 15 | Both cases may slow down convergence in Gradient Descent (or even sometimes prevent convergence at all). 16 | To avoid that kind of situation, \textbf{normalization} is a very effective way to proceed.\\ 17 | \\ 18 | The idea behind this technique is quite straightforward: \textbf{scaling the data}.\\ 19 | \\ 20 | With normalization, you can transform your $x$ vector into a new $x'$ vector whose values range between $[-1, 1]$ more or less. Doing this allows you to see much more easily how a training example compares to the other ones: 21 | \begin{itemize} 22 | \item If an $x'$ value is close to $1$, you know it's among the largest in the dataset 23 | \item If an $x'$ value is close to $0$, you know it's close to the median 24 | \item If an $x'$ value is close to $-1$, you know it's among the smallest 25 | \end{itemize} 26 | \noindent{So with the upcoming normalization techniques, you'll be able to map your data to two different value ranges: $[0, 1]$ or $[-1, 1]$. Your algorithm will like it and thank you for it.} 27 | -------------------------------------------------------------------------------- /module06/exercises/m06ex00.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 00} 2 | \input{exercises/en.ex00_interlude.tex} 3 | \newpage 4 | \extitle{Linear Gradient - Iterative Version} 5 | \turnindir{ex00} 6 | \exnumber{00} 7 | \exfiles{gradient.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================== % 12 | \section*{Objective} 13 | % ---------------------------------- % 14 | Understand and manipulate the notion of gradient and gradient descent in machine learning.\\ 15 | \newline 16 | You must write a function that computes the \textbf{\textit{gradient}} of the loss function. 17 | It must compute a partial derivative with respect to each theta parameter separately, and return the vector gradient.\\ 18 | \newline 19 | The partial derivatives can be calculated with the following formulas: 20 | $$ 21 | \nabla(J)_0 = \frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) 22 | $$ 23 | 24 | $$ 25 | \nabla(J)_1 = \frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x^{(i)} 26 | $$ 27 | Where: 28 | \begin{itemize} 29 | \item $\nabla(J)$ is the gradient vector of size $2 \times 1$, (this strange symbol : $\nabla$ is called nabla) 30 | \item $x$ is a vector of dimension $m$ 31 | \item $y$ is a vector of dimension $m$ 32 | \item $x^{(i)}$ is the i$^\text{th}$ component of vector $x$ 33 | \item $y^{(i)}$ is the i$^\text{th}$ component of vector $y$ 34 | \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$ 35 | \item $h_{\theta}(x^{(i)})$ corresponds to the model's prediction of $y^{(i)}$ 36 | \end{itemize} 37 | 38 | % ================================== % 39 | \section*{Hypothesis Notation} 40 | % ---------------------------------- % 41 | $h_{\theta}(x^{(i)})$ is the same as what we previously noted $\hat{y}^{(i)}$. 42 | The two notations are equivalent. 43 | They represent the model's prediction (or estimation) of the ${y}^{(i)}$ value. 44 | If you follow Andrew Ng's course material on Coursera, you will see him using the former notation. 45 | \newline 46 | As a reminder: 47 | $h_{\theta}(x^{(i)}) = \theta_0 + \theta_1x^{(i)}$ 48 | 49 | % ================================== % 50 | \section*{Instructions} 51 | % ---------------------------------- % 52 | 53 | In the \texttt{gradient.py} file create the following function as per the instructions given below: 54 | \newline 55 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 56 | def simple_gradient(x, y, theta): 57 | """Computes a gradient vector from three non-empty numpy.arrays, with a for-loop. 58 | The three arrays must have compatible shapes. 59 | Args: 60 | x: has to be an numpy.array, a vector of shape m * 1. 61 | y: has to be an numpy.array, a vector of shape m * 1. 62 | theta: has to be an numpy.array, a 2 * 1 vector. 63 | Return: 64 | The gradient as a numpy.array, a vector of shape 2 * 1. 65 | None if x, y, or theta are empty numpy.array. 66 | None if x, y and theta do not have compatible shapes. 67 | None if x, y or theta is not of the expected type. 68 | Raises: 69 | This function should not raise any Exception. 70 | """ 71 | ... Your code ... 72 | \end{minted} 73 | 74 | % ================================== % 75 | \section*{Examples} 76 | % ---------------------------------- % 77 | 78 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 79 | import numpy as np 80 | x = np.array([12.4956442, 21.5007972, 31.5527382, 48.9145838, 57.5088733]).reshape((-1, 1)) 81 | y = np.array([37.4013816, 36.1473236, 45.7655287, 46.6793434, 59.5585554]).reshape((-1, 1)) 82 | 83 | # Example 0: 84 | theta1 = np.array([2, 0.7]).reshape((-1, 1)) 85 | simple_gradient(x, y, theta1) 86 | # Output: 87 | array([[-19.0342574], [-586.66875564]]) 88 | 89 | # Example 1: 90 | theta2 = np.array([1, -0.4]).reshape((-1, 1)) 91 | simple_gradient(x, y, theta2) 92 | # Output: 93 | array([[-57.86823748], [-2230.12297889]]) 94 | \end{minted} -------------------------------------------------------------------------------- /module06/exercises/m06ex01.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 01} 2 | \input{exercises/en.ex01_interlude.tex} 3 | \newpage 4 | \extitle{Linear Gradient - Vectorized Version} 5 | \turnindir{ex01} 6 | \exnumber{01} 7 | \exfiles{vec\_gradient.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and experiment with the notions of \textbf{gradient} and \textbf{gradient descent} in machine learning.\\ 15 | \newline 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y) 20 | $$ 21 | Where: 22 | \begin{itemize} 23 | \item $\nabla(J)$ is a vector of dimension $2 \times 1$ 24 | \item $X'$ is a \textbf{matrix} of dimensions $(m \times 2)$ 25 | \item ${X'}^T$ is the transpose of $X'$. Its dimensions are $(2 \times m)$ 26 | \item $y$ is a vector of dimension $m$ 27 | \item $\theta$ is a vector of dimension $2 \times 1$ 28 | \end{itemize} 29 | Be careful: 30 | \begin{itemize} 31 | \item the $x$ you will get as an input is an $m$ vector, 32 | \item $\theta$ is a $2 \times 1$ vector. You have to transform $x$ to fit the dimension of $\theta$! 33 | \end{itemize} 34 | \newpage 35 | % ================================= % 36 | \section*{Instructions} 37 | % --------------------------------- % 38 | In the \texttt{vec\_gradient.py} file create the following function as per the instructions given below: 39 | \newline 40 | \par 41 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 42 | def simple_gradient(x, y, theta): 43 | """Computes a gradient vector from three non-empty numpy.arrays, without any for loop. 44 | The three arrays must have compatible shapes. 45 | Args: 46 | x: has to be a numpy.array, a vector of shape m * 1. 47 | y: has to be a numpy.array, a vector of shape m * 1. 48 | theta: has to be a numpy.array, a 2 * 1 vector. 49 | Return: 50 | The gradient as a numpy.ndarray, a vector of dimension 2 * 1. 51 | None if x, y, or theta is an empty numpy.ndarray. 52 | None if x, y and theta do not have compatible dimensions. 53 | Raises: 54 | This function should not raise any Exception. 55 | """ 56 | ... Your code ... 57 | \end{minted} 58 | 59 | % ================================= % 60 | \section*{Examples} 61 | % --------------------------------- % 62 | 63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 64 | import numpy as np 65 | x = np.array([12.4956442, 21.5007972, 31.5527382, 48.9145838, 57.5088733]).reshape((-1, 1)) 66 | y = np.array([37.4013816, 36.1473236, 45.7655287, 46.6793434, 59.5585554]).reshape((-1, 1)) 67 | 68 | # Example 0: 69 | theta1 = np.array([2, 0.7]).reshape((-1, 1)) 70 | gradient(x, y, theta1) 71 | # Output: 72 | array([[-19.0342...], [-586.6687...]]) 73 | 74 | # Example 1: 75 | theta2 = np.array([1, -0.4]).reshape((-1, 1)) 76 | gradient(x, y, theta2) 77 | # Output: 78 | array([[-57.8682...], [-2230.1229...]]) 79 | \end{minted} -------------------------------------------------------------------------------- /module06/exercises/m06ex02.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 02} 2 | \input{exercises/en.ex02_interlude.tex} 3 | \newpage 4 | \extitle{Gradient Descent} 5 | \turnindir{ex02} 6 | \exnumber{02} 7 | \exfiles{fit.py} 8 | \exforbidden{any function that calculates derivatives for you} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and experiment with the notions of \textbf{gradient} and \textbf{gradient descent} in machine learning.\\ 15 | \newline 16 | Be able to explain what it means to \textbf{\textit{fit}} a Machine Learning model to a dataset.\ 17 | Implement a function that performs \textbf{Linear Gradient Descent} (LGD). 18 | 19 | 20 | % ================================= % 21 | \section*{Instructions} 22 | % --------------------------------- % 23 | In this exercise, you will implement linear gradient descent to fit your model to the dataset.\\ 24 | \newline 25 | The pseudocode for the algorithm is the following: 26 | $$ 27 | \begin{matrix} 28 | &\text{repeat until convergence:} & \{ \\ 29 | & \text{compute } \nabla{(J)} \\ 30 | & \theta_0 := \theta_0 - \alpha \nabla(J)_0 \\ 31 | & \theta_1 := \theta_1 - \alpha \nabla(J)_1\\ 32 | \} 33 | \end{matrix} 34 | $$ 35 | 36 | Where: 37 | \begin{itemize} 38 | \item $\alpha$ (alpha) is the \textit{learning rate}. It's a small float number (usually between 0 and 1), 39 | \item For now, "reapeat until convergence" will mean to simply repeat for max\_iter (a number that you will choose wisely). 40 | \end{itemize} 41 | You are expected to write a function named \texttt{fit\_} as per the instructions below:\\ 42 | \newline 43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 44 | def fit_(x, y, theta, alpha, max_iter): 45 | """ 46 | Description: 47 | Fits the model to the training dataset contained in x and y. 48 | Args: 49 | x: has to be a numpy.ndarray, a vector of dimension m * 1: (number of training examples, 1). 50 | y: has to be a numpy.ndarray, a vector of dimension m * 1: (number of training examples, 1). 51 | theta: has to be a numpy.ndarray, a vector of dimension 2 * 1. 52 | alpha: has to be a float, the learning rate 53 | max_iter: has to be an int, the number of iterations done during the gradient descent 54 | Returns: 55 | new_theta: numpy.ndarray, a vector of dimension 2 * 1. 56 | None if there is a matching dimension problem. 57 | Raises: 58 | This function should not raise any Exception. 59 | """ 60 | ... your code here ... 61 | \end{minted} 62 | Hopefully, you have already written a function to calculate the linear gradient.\\ 63 | 64 | % ================================= % 65 | \section*{Examples} 66 | % --------------------------------- % 67 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 68 | import numpy as np 69 | x = np.array([[12.4956442], [21.5007972], [31.5527382], [48.9145838], [57.5088733]]) 70 | y = np.array([[37.4013816], [36.1473236], [45.7655287], [46.6793434], [59.5585554]]) 71 | theta= np.array([1, 1]).reshape((-1, 1)) 72 | 73 | # Example 0: 74 | theta1 = fit_(x, y, theta, alpha=5e-8, max_iter=1500000) 75 | theta1 76 | # Output: 77 | array([[1.40709365], 78 | [1.1150909 ]]) 79 | 80 | # Example 1: 81 | predict(x, theta1) 82 | # Output: 83 | array([[15.3408728 ], 84 | [25.38243697], 85 | [36.59126492], 86 | [55.95130097], 87 | [65.53471499]]) 88 | \end{minted} 89 | 90 | \info{ 91 | \begin{itemize} 92 | \item You can create more training data by generating an $x$ array with random values and computing the corresponding $y$ vector as a linear expression of $x$. You can then fit a model on this artificial data and find out if it comes out with the same $\theta$ coefficients that you first used. 93 | \item It is possible that $\theta_0$ and $\theta_1$ become "nan". In that case, it means you probably used a learning rate that is too large. 94 | \end{itemize} 95 | } -------------------------------------------------------------------------------- /module06/exercises/m06ex03.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 03} 2 | \extitle{Linear Regression with Class} 3 | \turnindir{ex03} 4 | \exnumber{03} 5 | \exfiles{my\_linear\_regression.py} 6 | \exforbidden{any functions from sklearn} 7 | \makeheaderfilesforbidden 8 | 9 | % ================================= % 10 | \section*{Objective} 11 | % --------------------------------- % 12 | Write a class that contains all the necessary methods to perform a linear regression. 13 | % ================================= % 14 | \section*{Instructions} 15 | % --------------------------------- % 16 | In this exercise, you will not learn anything new but don't worry, it's for your own good.\\ 17 | \newline 18 | You are expected to write your own \texttt{MyLinearRegression} class which looks similar to the class available in Scikit-learn: 19 | \texttt{sklearn.linear\_model.LinearRegression}\\ 20 | \newline 21 | \par 22 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 23 | class MyLinearRegression(): 24 | """ 25 | Description: 26 | My personnal linear regression class to fit like a boss. 27 | """ 28 | def __init__(self, thetas, alpha=0.001, max_iter=1000): 29 | self.alpha = alpha 30 | self.max_iter = max_iter 31 | self.thetas = thetas 32 | 33 | #... other methods ... 34 | \end{minted} 35 | \newpage 36 | You will add the following methods: 37 | \begin{itemize} 38 | \item \texttt{fit\_(self, x, y)}, 39 | \item \texttt{predict\_(self, x)}, 40 | \item \texttt{loss\_elem\_(self, y, y\_hat)}, 41 | \item \texttt{loss\_(self, y, y\_hat)}. 42 | \end{itemize} 43 | You have already implemented these functions, you just need a few adjustments so that they all work well within your \texttt{MyLinearRegression} class. 44 | 45 | % ================================= % 46 | \section*{Examples} 47 | % --------------------------------- % 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 49 | import numpy as np 50 | from my_linear_regression import MyLinearRegression as MyLR 51 | x = np.array([[12.4956442], [21.5007972], [31.5527382], [48.9145838], [57.5088733]]) 52 | y = np.array([[37.4013816], [36.1473236], [45.7655287], [46.6793434], [59.5585554]]) 53 | 54 | lr1 = MyLR(np.array([[2], [0.7]])) 55 | 56 | # Example 0.0: 57 | y_hat = lr1.predict_(x) 58 | # Output: 59 | array([[10.74695094], 60 | [17.05055804], 61 | [24.08691674], 62 | [36.24020866], 63 | [42.25621131]]) 64 | 65 | # Example 0.1: 66 | lr1.loss_elem_(y, y_hat) 67 | # Output: 68 | array([[710.45867381], 69 | [364.68645485], 70 | [469.96221651], 71 | [108.97553412], 72 | [299.37111101]]) 73 | 74 | # Example 0.2: 75 | lr1.loss_(y, y_hat) 76 | # Output: 77 | 195.34539903032385 78 | 79 | # Example 1.0: 80 | lr2 = MyLR(np.array([[1], [1]]), 5e-8, 1500000) 81 | lr2.fit_(x, y) 82 | lr2.thetas 83 | # Output: 84 | array([[1.40709365], 85 | [1.1150909 ]]) 86 | 87 | # Example 1.1: 88 | y_hat = lr2.predict_(x) 89 | # Output: 90 | array([[15.3408728 ], 91 | [25.38243697], 92 | [36.59126492], 93 | [55.95130097], 94 | [65.53471499]]) 95 | 96 | # Example 1.2: 97 | lr2.loss_elem_(y, y_hat) 98 | # Output: 99 | array([[486.66604863], 100 | [115.88278416], 101 | [ 84.16711596], 102 | [ 85.96919719], 103 | [ 35.71448348]]) 104 | 105 | # Example 1.3: 106 | lr2.loss_(y, y_hat) 107 | # Output: 108 | 80.83996294128525 109 | \end{minted} -------------------------------------------------------------------------------- /module06/exercises/m06ex05.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 05} 2 | \input{exercises/en.ex05_interlude.tex} 3 | \newpage 4 | \extitle{Normalization I: Z-score Standardization} 5 | \turnindir{ex05} 6 | \exnumber{05} 7 | \exfiles{z\_score.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | 13 | % ================================= % 14 | \section*{Objective} 15 | % --------------------------------- % 16 | Introduction to standardization/normalization methods.\\ 17 | \\ 18 | You must implement the following formula as a function: 19 | $$ 20 | \begin{matrix} 21 | x'^{(i)} = \frac{x^{(i)} - \frac{1}{m} \sum_{i = 1}^{m} x^{(i)}}{\sqrt{\frac{1}{m - 1} \sum_{i = 1}^{m} (x^{(i)} - \frac{1}{m} \sum_{i = 1}^{m} x^{(i)})^{2}}} & &\text{ for $i$ in $1, ..., m$} 22 | \end{matrix} 23 | $$ 24 | Where: 25 | \begin{itemize} 26 | \item $x$ is a vector of dimension $m$ 27 | \item $x^{(i)}$ is the i$^\text{th}$ component of the $x$ vector 28 | \item $x'$ is the normalized version of the $x$ vector 29 | \end{itemize} 30 | \noindent{The equation is much easier to understand in the following form:} 31 | $$ 32 | \begin{matrix} 33 | x'^{(i)} = \frac{x^{(i)} - \mu}{\sigma} & &\text{ for $i$ in $1, ..., m$} 34 | \end{matrix} 35 | $$ 36 | This should remind you of something from \textbf{TinyStatistician}... doesn't it?! 37 | \\ 38 | Ok, let's do a quick recap ! 39 | \begin{itemize} 40 | \item $\mu$ is the mean of $x$ 41 | \item $\sigma$ is the standard deviation of $x$ 42 | \end{itemize} 43 | 44 | % ================================= % 45 | \section*{Instructions} 46 | % --------------------------------- % 47 | \noindent{In the \texttt{zscore.py} file, write the \texttt{zscore} function as per the instructions given below:} 48 | \\ 49 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 50 | def zscore(x): 51 | """Computes the normalized version of a non-empty numpy.ndarray using the z-score standardization. 52 | Args: 53 | x: has to be an numpy.ndarray, a vector. 54 | Returns: 55 | x' as a numpy.ndarray. 56 | None if x is a non-empty numpy.ndarray or not a numpy.ndarray. 57 | Raises: 58 | This function shouldn't raise any Exception. 59 | """ 60 | ... Your code ... 61 | \end{minted} 62 | 63 | 64 | % ================================= % 65 | \section*{Examples} 66 | % --------------------------------- % 67 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 68 | # Example 1: 69 | X = numpy.array([0, 15, -9, 7, 12, 3, -21]) 70 | zscore(X) 71 | # Output: 72 | array([-0.08620324, 1.2068453 , -0.86203236, 0.51721942, 0.94823559, 73 | 0.17240647, -1.89647119]) 74 | 75 | # Example 2: 76 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 77 | zscore(Y) 78 | # Output: 79 | array([ 0.11267619, 1.16432067, -1.20187941, 0.37558731, 0.98904659, 80 | 0.28795027, -1.72770165]) 81 | \end{minted} 82 | -------------------------------------------------------------------------------- /module06/exercises/m06ex06.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 06} 2 | \extitle{Normalization II: Min-max Standardization} 3 | \turnindir{ex06} 4 | \exnumber{06} 5 | \exfiles{minmax.py} 6 | \exforbidden{None} 7 | \makeheaderfilesforbidden 8 | 9 | 10 | % ================================= % 11 | \section*{Objective} 12 | % --------------------------------- % 13 | Introduction to standardization/normalization methods. 14 | Implement another normalization method.\\ 15 | \\ 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \begin{matrix} 20 | x'^{(i)} = \frac{x^{(i)} - min(x)}{max(x) - min(x)} & & \text{ for $i = 1, ..., m$} 21 | \end{matrix} 22 | $$ 23 | Where: 24 | \begin{itemize} 25 | \item $x$ is a vector of dimension $m$ 26 | \item $x^{(i)}$ is the i$^\text{th}$ component of vector $x$ 27 | \item $min(x)$ is the minimum value found among the components of vector $x$ 28 | \item $max(x)$ is the maximum value found among the components of vector $x$ 29 | \end{itemize} 30 | You will notice that this min-max standardization doesn't scale the values to the $[-1,1]$ range. 31 | What do you think the final range will be? 32 | \newpage 33 | % ================================= % 34 | \section*{Instructions} 35 | % --------------------------------- % 36 | In the \texttt{minmax.py} file, create the \texttt{minmax} function as per the instructions given below:\\ 37 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 38 | def minmax(x): 39 | """Computes the normalized version of a non-empty numpy.ndarray using the min-max standardization. 40 | Args: 41 | x: has to be an numpy.ndarray, a vector. 42 | Returns: 43 | x' as a numpy.ndarray. 44 | None if x is a non-empty numpy.ndarray or not a numpy.ndarray. 45 | Raises: 46 | This function shouldn't raise any Exception. 47 | """ 48 | ... Your code ... 49 | \end{minted} 50 | 51 | % ================================= % 52 | \section*{Examples} 53 | % --------------------------------- % 54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 55 | # Example 1: 56 | X = np.array([0, 15, -9, 7, 12, 3, -21]).reshape((-1, 1)) 57 | minmax(X) 58 | # Output: 59 | array([0.58333333, 1. , 0.33333333, 0.77777778, 0.91666667, 60 | 0.66666667, 0. ]) 61 | 62 | # Example 2: 63 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 64 | minmax(Y) 65 | # Output: 66 | array([0.63636364, 1. , 0.18181818, 0.72727273, 0.93939394, 67 | 0.6969697 , 0. ]) 68 | \end{minted} -------------------------------------------------------------------------------- /module06/useful_resources.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Section usefull ressources % 4 | % for ML Modules % 5 | % % 6 | %******************************************************************************% 7 | 8 | 9 | 10 | \chapter*{Notions covered and learning resources} 11 | 12 | \section*{What notions will be covered by this module?} 13 | 14 | \begin{itemize} 15 | \item Linear regression 16 | \item Gradient descent 17 | \item Learning rate 18 | \item Normalization 19 | \end{itemize} 20 | 21 | \section*{Learning resources} 22 | 23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\ 24 | \newline 25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\ 26 | \newline 27 | The following sections of the course are particularly relevant to today's exercises: 28 | 29 | \subsection*{Week 1: Introduction to Machine Learning} 30 | 31 | \subsubsection*{Train the model with Gradient Descent} 32 | \begin{itemize} 33 | \item Gradient descent 34 | \item Implementing gradient descent 35 | \item Gradient descent intuition 36 | \item Learning rate 37 | \item Gradient descent for linear regression 38 | \item Running gradient descent 39 | \end{itemize} 40 | 41 | \subsection*{Week 2: Regression with multiple input variables} 42 | 43 | \subsubsection*{Multiple linear Regression} 44 | \begin{itemize} 45 | \item Multiple features 46 | \item Vectorization part1 (optional) 47 | \item Vectorization part2 (optional) 48 | \end{itemize} 49 | 50 | \subsubsection*{Gradient descent in practice} 51 | \begin{itemize} 52 | \item Feature scaling part 1 53 | \item Feature scaling part 2 54 | \end{itemize} 55 | \emph{All videos mentionned above are also available on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist} from 15 to 21 included, 25 and 26} 56 | 57 | 58 | -------------------------------------------------------------------------------- /module07/Makefile: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = en.subject.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = exercises/en.ex00_interlude.tex \ 6 | exercises/en.ex01_interlude.tex \ 7 | exercises/en.ex02_interlude.tex \ 8 | exercises/en.ex03_interlude.tex \ 9 | exercises/en.ex04_interlude.tex \ 10 | exercises/en.ex07_interlude.tex \ 11 | exercises/en.ex08_interlude.tex \ 12 | exercises/en.ex09_interlude.tex \ 13 | ../resources/latex/redefinition-commands.tex \ 14 | ../resources/42ai_bootcamps/en.instructions.tex \ 15 | ../resources/42ai_bootcamps/en.acknowledgements.tex \ 16 | useful_resources.tex 17 | 18 | # Relative path to the LaTeX documentclass setup files 19 | # Adapt as needed 20 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/ 21 | # RELPATH for github actions: 22 | RELPATH = $(shell dirname `pwd`)/resources/latex/ 23 | 24 | # You should not touch this either 25 | include $(RELPATH)/Makefile.LaTeX 26 | -------------------------------------------------------------------------------- /module07/assets/42ai_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/42ai_logo.pdf -------------------------------------------------------------------------------- /module07/assets/Default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Default.png -------------------------------------------------------------------------------- /module07/assets/Evaluate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Evaluate.png -------------------------------------------------------------------------------- /module07/assets/Improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Improve.png -------------------------------------------------------------------------------- /module07/assets/Predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Predict.png -------------------------------------------------------------------------------- /module07/assets/bad_pred_with_distance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/bad_pred_with_distance.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_Tmeters_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_Tmeters_part1.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_Tmeters_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_Tmeters_part2.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_age_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_age_part1.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_age_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_age_part2.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_thrust_part1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_thrust_part1.png -------------------------------------------------------------------------------- /module07/assets/ex07_price_vs_thrust_part2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_thrust_part2.png -------------------------------------------------------------------------------- /module07/assets/ex12_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex12_data.png -------------------------------------------------------------------------------- /module07/assets/ex12_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex12_plot.png -------------------------------------------------------------------------------- /module07/assets/overfitt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/overfitt.png -------------------------------------------------------------------------------- /module07/assets/overfitt_with_dots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/overfitt_with_dots.png -------------------------------------------------------------------------------- /module07/assets/polynomial_straight_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/polynomial_straight_line.png -------------------------------------------------------------------------------- /module07/attachments/are_blue_pills_magics.csv: -------------------------------------------------------------------------------- 1 | "Patient","Micrograms","Score" 2 | 1, 1.17, 78.93 3 | 2, 2.97, 58.20 4 | 3, 3.26, 67.47 5 | 4, 4.69, 37.47 6 | 5, 5.83, 45.65 7 | 6, 6.00, 32.92 8 | 7, 6.41, 29.97 9 | -------------------------------------------------------------------------------- /module07/attachments/are_blue_pills_magics_description.txt: -------------------------------------------------------------------------------- 1 | #Patient: number of the patient. 2 | 3 | #Micrograms: quantity of blue pills patient has taken (in micrograms). 4 | 5 | #Score: Standardized score at the spacecraft driving test. 6 | -------------------------------------------------------------------------------- /module07/attachments/spacecraft_data_description.txt: -------------------------------------------------------------------------------- 1 | #Age: Age of the spacecraft. 2 | 3 | #Thrust_powern: Power of engines in 10 km/s. 4 | 5 | #Terameters: Distance that the spacecraft has travelled in terameters. 6 | 7 | #Sell_price: This is the prices at which the custommer bought the spacecraft (in kiloeuros). 8 | -------------------------------------------------------------------------------- /module07/exercises/en.ex01_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude - Even More Linear Algebra Tricks!} 10 | % ******************************* % 11 | 12 | As you already did before with the univariate hypothesis, 13 | the multivariate hypothesis can be vectorized as well.\\ 14 | \newline 15 | If you add a column of $1$'s as the first column of the $X$ matrix, you get what we'll call the $X'$ matrix. 16 | Then, you can calculate $\hat{y}$ by multiplying $X'$ and $\theta$. 17 | 18 | $$ 19 | X' \cdot \theta = 20 | \begin{bmatrix} 21 | 1 & x_{1}^{(1)} & \dots & x_{n}^{(1)}\\ 22 | \vdots & \vdots & \ddots & \vdots\\ 23 | 1 & x_{1}^{(m)} & \dots & x_{n}^{(m)}\end{bmatrix} 24 | \cdot 25 | \begin{bmatrix} 26 | \theta_0 \\ 27 | \theta_1 \\ 28 | \vdots \\ 29 | \theta_n 30 | \end{bmatrix} 31 | = 32 | \begin{bmatrix} 33 | \theta_0 + \theta_{1} x_{1}^{(1)} + \dots + \theta_{n} x_{n}^{(1)}\\ 34 | \vdots \\ 35 | \theta_0 + \theta_{1} x_{1}^{(m)} + \dots + \theta_{n} x_{n}^{(m)} 36 | \end{bmatrix} 37 | = 38 | \begin{bmatrix} 39 | \hat{y}^{(1)} \\ 40 | \vdots \\ 41 | \hat{y}^{(m)} 42 | \end{bmatrix} 43 | = 44 | \hat{y} 45 | $$ 46 | Another way of understanding this algebra trick is to pretend that each training 47 | example has an artificial $x_0$ feature that is always equal to $1$.\\ 48 | \newline 49 | This simplifies the equations as now, each $x_j$ feature has its 50 | corresponding $\theta_j$ parameter in the multiplication. 51 | 52 | $$ 53 | \theta_0x_0^{(i)} + \theta_{1} x_{1}^{(i)} + \dots + \theta_{n} x_{n}^{(i)} = \theta \cdot x'^{(i)} 54 | $$ -------------------------------------------------------------------------------- /module07/exercises/en.ex02_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude - Evaluate} 10 | % ------------------------------- % 11 | 12 | \begin{figure}[!h] 13 | \centering 14 | \includegraphics[scale=0.2]{assets/Evaluate.png} 15 | %\caption{The Learning Cycle: Evaluate} 16 | \end{figure} 17 | 18 | % =============================== % 19 | \section*{Back to the Loss Function} 20 | % ------------------------------- % 21 | How is our model doing?\\ 22 | To evaluate our model, remember that we have already used a \textbf{metric} called the \textbf{loss function} (also known as \textbf{cost function}). 23 | The loss function is basically just a measure of how wrong the model is, in all of its predictions.\\ 24 | \newline 25 | Two modules ago, we defined the loss function as the average of the squared distances between each prediction and its expected value (distances represented by the dotted lines in the figure below): 26 | 27 | \begin{figure}[!h] 28 | \centering 29 | \includegraphics[scale=0.5]{assets/bad_pred_with_distance.png} 30 | \caption{Distances between predicted and expected values} 31 | \end{figure} 32 | \newpage 33 | \noindent{The formula was the following:} 34 | 35 | $$ 36 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2 37 | $$ 38 | \\ 39 | And its vectorized form: 40 | 41 | $$ 42 | \begin{matrix} 43 | J(\theta) = \frac{1}{2m}(\hat{y} - y)\cdot(\hat{y}- y) 44 | \end{matrix} 45 | $$ 46 | \\ 47 | \textit{So, now that we moved to multivariate linear regression, what does it change?}\\ 48 | \newline 49 | You may have noticed that variables such as $x_j$ and $\theta_j$ are not in the equation. 50 | Indeed, the loss function only uses the predictions ($\hat{y}$) and the expected values ($y$), 51 | so the inner workings of the model do not have an impact on its evaluation metric.\\ 52 | \\ 53 | This means we can use the exact same loss function as we did before! 54 | -------------------------------------------------------------------------------- /module07/exercises/en.ex03_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude - Improve with the Gradient} 10 | % ******************************* % 11 | 12 | \begin{figure}[!h] 13 | \centering 14 | \includegraphics[scale=0.2]{assets/Improve.png} 15 | %\caption{The Learning Cycle: Improve} 16 | \end{figure} 17 | 18 | % =============================== % 19 | \section*{Multivariate Gradient} 20 | % ******************************* % 21 | From our multivariate linear hypothesis we can derive our multivariate gradient. 22 | It looks a lot like the one we saw during the previous module, but instead of having just two components, the gradient now has as many as there are parameters. 23 | This means that now we need to calculate $\nabla(J)_0,\nabla(J)_1,\dots,\nabla(J)_n$.\\ 24 | \newline 25 | If we take the univariate equations we used during the previous module and replace the formula for $\nabla(J)_1$ by a more general $\nabla(J)_j$, we get the following: 26 | 27 | $$ 28 | \begin{matrix} 29 | \nabla(J)_0 & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\ 30 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n} 31 | \end{matrix} 32 | $$ 33 | Where: 34 | \begin{itemize} 35 | \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector 36 | \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, the partial derivative of $J$ with respect to $\theta_j$ 37 | \item $y$ is a vector of dimension $m$, the vector of expected values 38 | \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$ 39 | \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example 40 | \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example 41 | \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$. (It can also be denoted $\hat{y}^{(i)}$) 42 | \end{itemize} 43 | 44 | % =============================== % 45 | \section*{Vectorized Form} 46 | % ******************************* % 47 | As usual, we can use some linear algebra magic to get a more compact (and computationally efficient) formula. 48 | First we can use our convention that each training example has an extra $x_0 = 1$ feature, and replace the gradient formulas above by one single equation that is valid for all $j$ components: 49 | 50 | $$ 51 | \begin{matrix} 52 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 0, ..., n} 53 | \end{matrix} 54 | $$ 55 | And this generic equation can then be rewritten in a vectorized form: 56 | 57 | $$ 58 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y) 59 | $$ 60 | Where: 61 | \begin{itemize} 62 | \item $\nabla(J)$ is the gradient vector of dimension $(n + 1)$ 63 | \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of $1$'s was added as the first column 64 | \item ${X'}^T$ means the matrix has been transposed 65 | \item $\theta$ is a vector of dimension $(n + 1)$: the parameter vector 66 | \item $y$ is a vector of dimension $m$: the vector of expected values 67 | \end{itemize} 68 | The vectorized equation can output the entire gradient vector all at once, in one calculation!\\ 69 | \newline 70 | So if you understand the linear algebra operations, you can forget about the equations we presented at the top of the page and simply use the vectorized one. 71 | -------------------------------------------------------------------------------- /module07/exercises/en.ex04_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude - Gradient Descent} 10 | % ******************************* % 11 | 12 | Now comes the fun part: \textbf{gradient descent}!\\ 13 | \newline 14 | The algorithm is not that different from the one used in univariate 15 | linear regression. As you might have guessed, what will change is 16 | that the $j$ indice needs to run from $0$ to $n$ instead of $0$ 17 | to $1$. So all you need is a more generic algorithm, which can be 18 | expressed in pseudocode as the following: 19 | 20 | $$ 21 | \begin{matrix} 22 | \textbf{repeat} \text{ } \text{until convergence} \\ 23 | \textbf{compute} \\ 24 | \nabla{(J)} \text{ } \theta_j \gets \theta_j - \alpha \nabla(J)_j \\ 25 | \textit{simultaneously update} \\ 26 | \theta \text{ for j=0,1,...,n} 27 | \end{matrix} 28 | $$ 29 | If we take the univariate equations we used during the previous module and replace the formula for $\nabla(J)_1$ by a more general $\nabla(J)_j$, we get the following:\\ 30 | \newline 31 | $$ 32 | \begin{matrix} 33 | \nabla(J)_0 & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\ 34 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n} 35 | \end{matrix} 36 | $$ 37 | \\ 38 | If you started to like vectorized forms, you might have noticed that that the $\theta_j$ notation is actually redundant here, since all components of $\theta$ need to be updated simultaneously. $\theta$ is a vector, $\nabla{(J)}$ also, they both have dimension $(n+1)$. So all we need to do is this: 39 | 40 | 41 | $$ 42 | \begin{matrix} 43 | \textbf{repeat} \ \text{until convergence} \\ 44 | \textbf{compute} \\ 45 | \nabla{(J)} \ \theta \gets \theta - \alpha \nabla(J) 46 | \end{matrix} 47 | $$ 48 | Where: 49 | \begin{itemize} 50 | \item $\theta$ is the entire parameter vector 51 | \item $\alpha$ (alpha) is the learning rate (a small number, usually between 0 and 1) 52 | \item $\nabla{(J)}$ is the entire gradient vector 53 | \end{itemize} 54 | 55 | % =============================== % 56 | \section*{Note: Do you still wonder why there is a subtraction in the equation?} 57 | % ******************************* % 58 | By definition, the gradient indicates the direction towards which we 59 | should adjust the $\theta$ parameters if we wanted to increase the loss. 60 | But since our optimization objective is to minimize the loss, 61 | we move $\theta$ in the opposite direction of the gradient 62 | (hence the name gradient descent). -------------------------------------------------------------------------------- /module07/exercises/en.ex07_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================================== % 9 | \section*{Interlude - Introducing Polynomial Models} 10 | % ----------------------------------------------- % 11 | 12 | You probably noticed that the method we use is called \textit{linear regression} for a reason: 13 | the model generates all of its predictions on a straight line. 14 | However, we often encounter features that do not have a linear relationship with the predicted variable, 15 | like in the figure below: 16 | 17 | \begin{figure}[!h] 18 | \centering 19 | \includegraphics[scale=0.6]{assets/polynomial_straight_line.png} 20 | \caption{Non-linear relationship} 21 | \end{figure} 22 | In that case, we are stuck with a straight line that cannot fit the data points properly.\\ 23 | \newline 24 | In this example, what if we could express $y$ not as a function of $x$, but also of $x^2$, and maybe even $x^3$ and $x^4$? 25 | We could make a hypothesis that draws a nice \textbf{curve} that would better fit the data. 26 | That's where polynomial features can help! 27 | 28 | % =============================================== % 29 | \section*{Interlude - Polynomial features} 30 | % ----------------------------------------------- % 31 | First we get to do some \textit{feature engineering}. 32 | We create new features by raising our initial $x$ feature to the power of 2, and then 3, 4... as far as we want to go. 33 | For each new feature we need to create a new column in the dataset. 34 | 35 | % =============================================== % 36 | \section*{Interlude - Polynomial Hypothesis} 37 | % ----------------------------------------------- % 38 | Now that we created our new features, we can combine them in a linear hypothesis that looks just the same as what we're used to: 39 | 40 | $$ 41 | \hat{y} = \theta_0 + \theta_1 x +\theta_2 x^{2} + \dots + \theta_n x^{n} 42 | $$ 43 | It's a little strange because we are building a linear combination, not with different features but with different powers of the same feature. 44 | This is a first way of introducing non-linearity in a regression model! -------------------------------------------------------------------------------- /module07/exercises/en.ex08_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{Interlude - Plotting Curves With Matplotlib} 10 | % ---------------------------------------------- % 11 | 12 | We asked you to plot straight lines in the \texttt{module05}. 13 | Now you are working with polynomial models, the hypothesis functions are no longer straight lines, but \textbf{curves}.\\ 14 | \newline 15 | Plotting curves is a bit more tricky, because if you do not have enough data point, you will get an ugly broken line instead of a smooth curve. 16 | Here's a way to do it.\\ 17 | \newline 18 | Let's begin with a simple dataset: 19 | 20 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | 24 | x = np.arange(1,11).reshape(-1,1) 25 | y = np.array([[ 1.39270298], 26 | [ 3.88237651], 27 | [ 4.37726357], 28 | [ 4.63389049], 29 | [ 7.79814439], 30 | [ 6.41717461], 31 | [ 8.63429886], 32 | [ 8.19939795], 33 | [10.37567392], 34 | [10.68238222]]) 35 | 36 | plt.scatter(x,y) 37 | plt.show() 38 | \end{minted} 39 | 40 | \begin{figure}[!h] 41 | \centering 42 | \includegraphics[scale=0.6]{assets/ex12_data.png} 43 | \caption{Scatter plot of a dataset} 44 | \end{figure} 45 | \newpage 46 | \noindent{Now, we build a polynomial model of degree 3 and plot its hypothesis function $h(\theta)$.} 47 | \\ 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 49 | from polynomial_model import add_polynomial_features 50 | from mylinearregression import MyLinearRegression as MyLR 51 | 52 | # Build the model: 53 | x_ = add_polynomial_features(x, 3) 54 | my_lr = MyLR(np.ones(4).reshape(-1,1)).fit_(x_, y) 55 | 56 | # Plot: 57 | ## To get a smooth curve, we need a lot of data points 58 | continuous_x = np.arange(1,10.01, 0.01).reshape(-1,1) 59 | x_ = add_polynomial_features(continuous_x, 3) 60 | y_hat = my_lr.predict_(continuous_x) 61 | 62 | plt.scatter(x,y) 63 | plt.plot(continuous_x, y_hat, color='orange') 64 | plt.show() 65 | \end{minted} 66 | 67 | \begin{figure}[!h] 68 | \centering 69 | \includegraphics[scale=0.6]{assets/ex12_plot.png} 70 | \caption{Scatter plot of a dataset, and on top, a plot of the polynomial hypothesis function} 71 | \end{figure} 72 | -------------------------------------------------------------------------------- /module07/exercises/en.ex09_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{Interlude - Lost in Overfitting} 10 | % ---------------------------------------------- % 11 | 12 | The two previous exercises lead you, dear reader, to a very dangerous territory: the realm of \textbf{overfitting}.\\ 13 | You did not see it coming but now, you are in a bad situation...\\ 14 | \\ 15 | By increasing the polynomial degree of your model, you increased its \textbf{complexity}. 16 | Is it wrong? 17 | Not always. 18 | Some models are indeed very complex because the relationships they represent are very complex as well.\\ 19 | \\ 20 | But, if you look at the plots for the previous exercise's \textit{best model}, you should feel that something is wrong...\\ 21 | \\ 22 | % ============================================== % 23 | \section*{Interlude - Something is rotten in the state of our model...} 24 | % ---------------------------------------------- % 25 | Take a look at the following plot. 26 | 27 | \begin{figure}[!h] 28 | \centering 29 | \includegraphics[scale=0.6]{assets/overfitt.png} 30 | \caption{Overfitting hypothesis} 31 | \end{figure} 32 | 33 | You can see that the prediction line fits each data point perfectly, but completely misses out on capturing the relationship between $x$ and $y$ properly. 34 | And now, if we add some brand new data points to the dataset, we see that the predictions on those new examples are way off. 35 | 36 | \begin{figure}[!h] 37 | \centering 38 | \includegraphics[scale=0.6]{assets/overfitt_with_dots.png} 39 | \caption{Generalization errors resulting from overfitting} 40 | \end{figure} 41 | This situation is called overfitting, because the model is doing an excessively good job at fitting the data. 42 | It is literally bending over backward to account for the data's mini details. 43 | But most of the data's irregularities are just noise, and they should in fact be ignored. 44 | So because the model overfits, it can't generalize to new data. 45 | 46 | % ============================================== % 47 | \section*{Interlude - The training set, the test set, and the happy data scientist} 48 | % ---------------------------------------------- % 49 | To be able to detect overfitting, \textbf{you should always evaluate your model on new data}.\\ 50 | \\ 51 | New data means, data that your model hasn't seen during training.\\ 52 | \\ 53 | It's the only way to make sure your model isn't \textit{recalling}. 54 | To do so, now and forever, you must always divide your dataset in (at least) two parts: one for the training, and one for the evaluation of your model. -------------------------------------------------------------------------------- /module07/exercises/m07ex00.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 00} 2 | \input{exercises/en.ex00_interlude.tex} 3 | \newpage 4 | \extitle{Multivariate Hypothesis - Iterative Version} 5 | \turnindir{ex00} 6 | \exnumber{00} 7 | \exfiles{prediction.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================== % 12 | \section*{Objective} 13 | % ---------------------------------- % 14 | Manipulate the hypothesis to make a prediction.\\ 15 | \newline 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \begin{matrix} 20 | \hat{y}^{(i)} = \theta_0 + \theta_1 x_{1}^{(i)} + \dots + \theta_n x_{n}^{(i)} && & \text{ for i = 1, ..., m} 21 | \end{matrix} 22 | $$ 23 | Where: 24 | \begin{itemize} 25 | \item $\hat{y}$ is a vector of dimension $m$: the vector of predicted values 26 | \item $\hat{y}^{(i)}$ is the i$^\text{th}$ component of the $\hat{y}$ vector: the predicted value for the i$^\text{th}$ example 27 | \item $\theta$ is a vector of dimension $(n + 1)$: the parameter vector 28 | \item $\theta_j$ is the j$^\text{th}$ component of the parameter vector 29 | \item $X$ is a matrix of dimensions $(m \times n)$: the design matrix 30 | \item $x^{(i)}$ is the i$^\text{th}$ row of the $X$ matrix: the feature vector of the i$^\text{th}$ example 31 | \item $x_{j}$ is the j$^\text{th}$ column of the $X$ matrix 32 | \item $x_j^{(i)}$ is the element at the intersection of the i$^\text{th}$ row and the j$^\text{th}$ column of the $X$ matrix: the j$^\text{th}$ feature of the i$^\text{th}$ example 33 | \end{itemize} 34 | \newpage 35 | % ================================== % 36 | \section*{Instructions} 37 | % ---------------------------------- % 38 | 39 | In the \texttt{prediction.py} file, create the following function as per the instructions given below:\\ 40 | \newline 41 | \par 42 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 43 | def simple_predict(x, theta): 44 | """Computes the prediction vector y_hat from two non-empty numpy.array. 45 | Args: 46 | x: has to be an numpy.array, a matrix of dimension m * n. 47 | theta: has to be an numpy.array, a vector of dimension (n + 1) * 1. 48 | Return: 49 | y_hat as a numpy.array, a vector of dimension m * 1. 50 | None if x or theta are empty numpy.array. 51 | None if x or theta dimensions are not matching. 52 | None if x or theta is not of expected type. 53 | Raises: 54 | This function should not raise any Exception. 55 | """ 56 | ... Your code ... 57 | \end{minted} 58 | 59 | % ================================== % 60 | \section*{Examples} 61 | % ---------------------------------- % 62 | 63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 64 | import numpy as np 65 | x = np.arange(1,13).reshape((4,-1)) 66 | 67 | # Example 1: 68 | theta1 = np.array([5, 0, 0, 0]).reshape((-1, 1)) 69 | simple_predict(x, theta1) 70 | # Ouput: 71 | array([[5.], [5.], [5.], [5.]]) 72 | # Do you understand why y_hat contains only 5's here? 73 | 74 | 75 | # Example 2: 76 | theta2 = np.array([0, 1, 0, 0]).reshape((-1, 1)) 77 | simple_predict(x, theta2) 78 | # Output: 79 | array([[ 1.], [ 4.], [ 7.], [10.]]) 80 | # Do you understand why y_hat == x[:,0] here? 81 | 82 | 83 | # Example 3: 84 | theta3 = np.array([-1.5, 0.6, 2.3, 1.98]).reshape((-1, 1)) 85 | simple_predict(x, theta3) 86 | # Output: 87 | array([[ 9.64], [24.28], [38.92], [53.56]]) 88 | 89 | 90 | # Example 4: 91 | theta4 = np.array([-3, 1, 2, 3.5]).reshape((-1, 1)) 92 | simple_predict(x, theta4) 93 | # Output: 94 | array([[12.5], [32. ], [51.5], [71. ]]) 95 | \end{minted} -------------------------------------------------------------------------------- /module07/exercises/m07ex02.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 02} 2 | \input{exercises/en.ex02_interlude.tex} 3 | \newpage 4 | \extitle{Vectorized Loss Function} 5 | \turnindir{ex02} 6 | \exnumber{02} 7 | \exfiles{loss.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and manipulate loss function for multivariate linear regression.\\ 15 | \newline 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \begin{matrix} 20 | J(\theta) & = & \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y) 21 | \end{matrix} 22 | $$ 23 | Where: 24 | \begin{itemize} 25 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 26 | \item $y$ is a vector of dimension $m$, the vector of expected values 27 | \end{itemize} 28 | % ================================= % 29 | \section*{Instructions} 30 | % --------------------------------- % 31 | In the \texttt{loss.py} file create the following function as per the instructions given below:\\ 32 | \newline 33 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 34 | def loss_(y, y_hat): 35 | """Computes the mean squared error of two non-empty numpy.array, without any for loop. 36 | The two arrays must have the same dimensions. 37 | Args: 38 | y: has to be an numpy.array, a vector. 39 | y_hat: has to be an numpy.array, a vector. 40 | Return: 41 | The mean squared error of the two vectors as a float. 42 | None if y or y_hat are empty numpy.array. 43 | None if y and y_hat does not share the same dimensions. 44 | None if y or y_hat is not of expected type. 45 | Raises: 46 | This function should not raise any Exception. 47 | """ 48 | ... Your code ... 49 | \end{minted} 50 | \newpage 51 | % ================================= % 52 | \section*{Examples} 53 | % --------------------------------- % 54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 55 | import numpy as np 56 | X = np.array([0, 15, -9, 7, 12, 3, -21]).reshape((-1, 1)) 57 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 58 | 59 | # Example 1: 60 | loss_(X, Y) 61 | # Output: 62 | 2.142857142857143 63 | 64 | # Example 2: 65 | loss_(X, X) 66 | # Output: 67 | 0.0 68 | \end{minted} -------------------------------------------------------------------------------- /module07/exercises/m07ex03.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 03} 2 | \extitle{Multivariate Linear Gradient} 3 | \input{exercises/en.ex03_interlude.tex} 4 | \newpage 5 | \turnindir{ex03} 6 | \exnumber{03} 7 | \exfiles{gradient.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and manipulate concept of gradient in the case of multivariate formulation.\\ 15 | \newline 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y) 20 | $$ 21 | Where: 22 | \begin{itemize} 23 | \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector 24 | \item $X$ is a matrix of dimensions $(m \times n)$, the design matrix 25 | \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of $1$'s was added as a first column 26 | \item $\theta$ is a vector of dimension $(n + 1)$, the parameter vector 27 | \item $y$ is a vector of dimension $m$, the vector of expected values 28 | \end{itemize} 29 | 30 | % ================================= % 31 | \section*{Instructions} 32 | % --------------------------------- % 33 | In the \texttt{gradient.py} file, create the following function as per the instructions given below:\\ 34 | \newline 35 | \par 36 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 37 | def gradient(x, y, theta): 38 | """Computes a gradient vector from three non-empty numpy.array, without any for-loop. 39 | The three arrays must have the compatible dimensions. 40 | Args: 41 | x: has to be an numpy.array, a matrix of dimension m * n. 42 | y: has to be an numpy.array, a vector of dimension m * 1. 43 | theta: has to be an numpy.array, a vector (n +1) * 1. 44 | Return: 45 | The gradient as a numpy.array, a vector of dimensions n * 1, 46 | containg the result of the formula for all j. 47 | None if x, y, or theta are empty numpy.array. 48 | None if x, y and theta do not have compatible dimensions. 49 | None if x, y or theta is not of expected type. 50 | Raises: 51 | This function should not raise any Exception. 52 | """ 53 | ... Your code ... 54 | \end{minted} 55 | 56 | % ================================= % 57 | \section*{Examples} 58 | % --------------------------------- % 59 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 60 | import numpy as np 61 | x = np.array([ 62 | [ -6, -7, -9], 63 | [ 13, -2, 14], 64 | [ -7, 14, -1], 65 | [ -8, -4, 6], 66 | [ -5, -9, 6], 67 | [ 1, -5, 11], 68 | [ 9, -11, 8]]) 69 | y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 70 | theta1 = np.array([0, 3, 0.5, -6]).reshape((-1, 1)) 71 | 72 | # Example : 73 | gradient(x, y, theta1) 74 | # Output: 75 | array([[ -33.71428571], [ -37.35714286], [183.14285714], [-393.]]) 76 | 77 | 78 | # Example : 79 | theta2 = np.array([0, 0, 0, 0]).reshape((-1, 1)) 80 | gradient(x, y, theta2) 81 | # Output: 82 | array([[ -0.71428571], [ 0.85714286], [23.28571429], [-26.42857143]]) 83 | \end{minted} -------------------------------------------------------------------------------- /module07/exercises/m07ex05.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 05} 2 | \extitle{Multivariate Linear Regression with Class} 3 | \turnindir{ex05} 4 | \exnumber{05} 5 | \exfiles{mylinearregression.py} 6 | \exforbidden{sklearn} 7 | \makeheaderfilesforbidden 8 | 9 | % ================================= % 10 | \section*{Objective} 11 | % --------------------------------- % 12 | Upgrade your Linear Regression class so it can handle multivariate hypothesis. 13 | 14 | % ================================= % 15 | \section*{Instructions} 16 | % --------------------------------- % 17 | You are expected to upgrade your own \texttt{MyLinearRegression} class from \textbf{Module01}.\\ 18 | \newline 19 | You will upgrade (at least) the following methods to support multivariate linear regression: 20 | \begin{itemize} 21 | \item \texttt{predict\_(self, x)}, 22 | \item \texttt{fit\_(self, x, y)}. 23 | \end{itemize} 24 | Depending on how you implement your methods, you might need to update other methods. 25 | 26 | % ================================= % 27 | \section*{Examples} 28 | % --------------------------------- % 29 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 30 | import numpy as np 31 | from mylinearregression import MyLinearRegression as MyLR 32 | X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [34., 55., 89., 144.]]) 33 | Y = np.array([[23.], [48.], [218.]]) 34 | mylr = MyLR([[1.], [1.], [1.], [1.], [1]]) 35 | 36 | 37 | # Example 0: 38 | y_hat = mylr.predict_(X) 39 | # Output: 40 | array([[8.], [48.], [323.]]) 41 | 42 | 43 | # Example 1: 44 | mylr.loss_elem_(Y, y_hat) 45 | # Output: 46 | array([[225.], [0.], [11025.]]) 47 | 48 | 49 | # Example 2: 50 | mylr.loss_(Y, y_hat) 51 | # Output: 52 | 1875.0 53 | 54 | 55 | # Example 3: 56 | mylr.alpha = 1.6e-4 57 | mylr.max_iter = 200000 58 | mylr.fit_(X, Y) 59 | mylr.thetas 60 | # Output: 61 | array([[18.188..], [2.767..], [-0.374..], [1.392..], [0.017..]]) 62 | 63 | 64 | # Example 4: 65 | y_hat = mylr.predict_(X) 66 | # Output: 67 | array([[23.417..], [47.489..], [218.065...]]) 68 | 69 | 70 | # Example 5: 71 | mylr.loss_elem_(Y, y_hat) 72 | # Output: 73 | array([[0.174..], [0.260..], [0.004..]]) 74 | 75 | 76 | # Example 6: 77 | mylr.loss_(Y, y_hat) 78 | # Output: 79 | 0.0732.. 80 | \end{minted} -------------------------------------------------------------------------------- /module07/exercises/m07ex07.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 07} 2 | \extitle{Polynomial models} 3 | \input{exercises/en.ex07_interlude.tex} 4 | \newpage 5 | \turnindir{ex07} 6 | \exnumber{07} 7 | \exfiles{polynomial\_model.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | % ================================= % 13 | \section*{Objective} 14 | % --------------------------------- % 15 | Broaden your comprehension of the concept of hypothesis.\\ 16 | \newline 17 | Create a function that takes a vector $x$ of dimension $m$ and an integer $n$ as input, and returns a matrix of dimensions $(m \times n)$. 18 | Each column of the matrix contains $x$ raised to the power of $j$, for $j = 1, 2, ..., n$: 19 | 20 | $$ 21 | \begin{matrix} 22 | x &|& x^2 &|& x^3 &|& \ldots &|& x^n 23 | \end{matrix} 24 | $$ 25 | Such a matrix is called a \textbf{Vandermonde matrix}. 26 | 27 | % ================================= % 28 | \section*{Instructions} 29 | % --------------------------------- % 30 | In the \texttt{polynomial\_model.py} file, create the following function as per the instructions given below:\\ 31 | \\ 32 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 33 | def add_polynomial_features(x, power): 34 | """Add polynomial features to vector x by raising its values up to the power given in argument. 35 | Args: 36 | x: has to be an numpy.array, a vector of dimension m * 1. 37 | power: has to be an int, the power up to which the components of vector x are going to be raised. 38 | Return: 39 | The matrix of polynomial features as a numpy.array, of dimension m * n, 40 | containing the polynomial feature values for all training examples. 41 | None if x is an empty numpy.array. 42 | None if x or power is not of expected type. 43 | Raises: 44 | This function should not raise any Exception. 45 | """ 46 | ... Your code ... 47 | \end{minted} 48 | 49 | % ================================= % 50 | \section*{Examples} 51 | % --------------------------------- % 52 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 53 | import numpy as np 54 | x = np.arange(1,6).reshape(-1, 1) 55 | 56 | 57 | # Example 0: 58 | add_polynomial_features(x, 3) 59 | # Output: 60 | array([[ 1, 1, 1], 61 | [ 2, 4, 8], 62 | [ 3, 9, 27], 63 | [ 4, 16, 64], 64 | [ 5, 25, 125]]) 65 | 66 | 67 | # Example 1: 68 | add_polynomial_features(x, 6) 69 | # Output: 70 | array([[ 1, 1, 1, 1, 1, 1], 71 | [ 2, 4, 8, 16, 32, 64], 72 | [ 3, 9, 27, 81, 243, 729], 73 | [ 4, 16, 64, 256, 1024, 4096], 74 | [ 5, 25, 125, 625, 3125, 15625]]) 75 | \end{minted} -------------------------------------------------------------------------------- /module07/exercises/m07ex08.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 08} 2 | \extitle{Let's Train Polynomial Models!} 3 | \input{exercises/en.ex08_interlude.tex} 4 | \newpage 5 | \turnindir{ex08} 6 | \exnumber{08} 7 | \exfiles{polynomial\_train.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | % ================================= % 13 | \section*{Objective} 14 | % --------------------------------- % 15 | Manipulation of polynomial hypothesis.\\ 16 | \newline 17 | It's training time! Let's train some polynomial models, and see if those with higher polynomial degree perform better!\\ 18 | \\ 19 | Write a program which: 20 | \begin{itemize} 21 | \item Reads and loads \texttt{are\_blue\_pills\_magics.csv} dataset 22 | \item Trains \textbf{six} separate Linear Regression models with polynomial hypothesis with degrees ranging from 1 to 6 23 | \item Evaluates and prints evaluation score (MSE) of each of the six models 24 | \item Plots a bar plot showing the MSE score of the models in function of the polynomial degree of the hypothesis 25 | \item Plots the 6 models and the data points on the same figure 26 | Use lineplot style for the models and scatterplot for the data points 27 | Add more prediction points to have smooth curves for the models 28 | \end{itemize} 29 | You will use \texttt{Micrograms} as feature and \texttt{Score} as target.\\ 30 | \\ 31 | The implementation of the method \texttt{fit\_} based on the simple gradient descent lacks of efficiency and sturdiness, 32 | which will lead to the impossibility of converging for polynomial models with high degree or with features having several orders of magnitude of difference. 33 | See the starting values below for some thetas to help you get acceptable parameters values for your models.\\ 34 | \\ 35 | \hint{ 36 | According to evaluation score only, what is the best hypothesis (or model) between the trained models? 37 | According to the last plot, why is it not true? 38 | Which phenomenon do you observe here? 39 | } 40 | 41 | \newpage 42 | % ================================= % 43 | \subsection*{Starting points} 44 | % --------------------------------- % 45 | You will not be able to get acceptable parameters for models 4, 5 and 6. 46 | Thus you can start the fit process for those models with:\\ 47 | \newline 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 49 | theta4 = np.array([[-20],[ 160],[ -80],[ 10],[ -1]]).reshape(-1,1) 50 | theta5 = np.array([[1140],[ -1850],[ 1110],[ -305],[ 40],[ -2]]).reshape(-1,1) 51 | theta6 = np.array([[9110],[ -18015],[ 13400],[ -4935],[ 966],[ -96.4],[ 3.86]]).reshape(-1,1) 52 | \end{minted} 53 | 54 | % ================================= % 55 | \subsection*{Teminology Note} 56 | % --------------------------------- % 57 | The \textbf{degree} of a polynomial expression is its highest exponent. 58 | E.g.: The polynomial degree of $5x^3 - x^6 + 2 x^2$ is $6$.\\ 59 | \\ 60 | In this equation, you don't see any terms with $x$, $x^4$ and $x^5$,but we can still say they exist. It's just that their coefficient is $0$. 61 | This means that a polynomial linear regression model can lower the impact of any term by bringing its corresponding $\theta_j$ closer to $0$. 62 | 63 | % ================================= % 64 | \subsection*{Remark} 65 | % --------------------------------- % 66 | When you are evaluated, it will be wise to run your program at the beginning of the evaluation as it can take several minutes to train the models. 67 | -------------------------------------------------------------------------------- /module07/exercises/m07ex10.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 10} 2 | \extitle{Machine Learning for Grown-ups: Trantor guacamole business} 3 | \turnindir{ex10} 4 | \exnumber{10} 5 | \exfiles{space\_avocado.py, benchmark\_train.py, models.[csv/yml/pickle]} 6 | \exforbidden{sklearn} 7 | \makeheaderfilesforbidden 8 | 9 | % ================================= % 10 | \section*{Objective} 11 | % --------------------------------- % 12 | Let's do Machine Learning for "real"! 13 | 14 | % ================================= % 15 | \section*{Introduction} 16 | % --------------------------------- % 17 | The dataset is made of 5 columns: 18 | \begin{itemize} 19 | \item \textbf{index}: not relevant 20 | \item \textbf{weight}: the avocado weight order (in tons) 21 | \item \textbf{prod\_distance}: distance from where the avocado ordered is produced (in Mkm) 22 | \item \textbf{time\_delivery}: time between the order and the receipt (in days) 23 | \item \textbf{target}: price of the order (in trantorian unit) 24 | \end{itemize} 25 | It contains the data of all the avocado purchases made by Trantor administration (guacamole is a serious business there). 26 | \newpage 27 | % ================================= % 28 | \section*{Instructions} 29 | % --------------------------------- % 30 | You have to explore different models and select the best you find.\\ 31 | \newline 32 | To do this: 33 | \begin{itemize} 34 | \item Split your \texttt{space\_avocado.csv} dataset into a training and a test set. 35 | \item Use your \texttt{polynomial\_features} method on your training set. 36 | \item Consider several Linear Regression models with polynomial hypothesis with a maximum degree of 4. 37 | \item Evaluate your models on the test set. 38 | \end{itemize} 39 | 40 | According to your model evaluations, what is the best hypothesis you can get? 41 | \begin{itemize} 42 | \item Plot the evaluation curve which help you to select the best model (evaluation metrics vs models). 43 | \item Plot the true price and the predicted price obtained via your best model (3D representation or 3 scatterplots). 44 | \end{itemize} 45 | 46 | The training of all your models can take a long time.\\ 47 | \\ 48 | Thus you need to train only the best one during the correction.\\ 49 | \\ 50 | But, you should return in \texttt{benchmark\_train.py} the program which performs the training of all the models and save the parameters of the different models into a file.\\ 51 | \\ 52 | In \texttt{models.[csv/yml/pickle]} one must find the parameters of all the models you have explored and trained.\\ 53 | \\ 54 | In \texttt{space\_avocado.py} train the model based on the best hypothesis you find and load the other models from \texttt{models.[csv/yml/pickle]}.\\ 55 | \newline 56 | Then evaluate and plot the different graphics as asked before. -------------------------------------------------------------------------------- /module07/useful_resources.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Section usefull ressources % 4 | % for ML Modules % 5 | % % 6 | %******************************************************************************% 7 | 8 | 9 | \chapter*{Notions covered and learning resources} 10 | 11 | \section*{What notions will be covered by this module?} 12 | 13 | \begin{itemize} 14 | \item Multivariate linear hypothesis 15 | \item Multivariate linear gradient descent 16 | \item Polynomial models 17 | \item Training set, test set, overfitting 18 | \end{itemize} 19 | 20 | \section*{Useful Resources} 21 | 22 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\ 23 | \newline 24 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\ 25 | \newline 26 | The following sections of the course are particularly relevant to today's exercises: 27 | 28 | \subsection*{Week 2: Regression with multiple input variables} 29 | 30 | \subsubsection*{Multiple linear regression} 31 | \begin{itemize} 32 | \item Multiple features 33 | \item Gradient descent for multiple linear regression 34 | \end{itemize} 35 | 36 | \subsubsection*{Gradient descent in practice} 37 | \begin{itemize} 38 | \item Feature scaling part 1 39 | \item Feature scaling part 2 40 | \item Checking gradient descent for convergence 41 | \item Choosing the learning rate 42 | \item Feature engineering 43 | \item Polynomial regression 44 | \end{itemize} 45 | \emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist}, videos 21 and from 24 to 30} -------------------------------------------------------------------------------- /module08/Makefile: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = en.subject.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = exercises/en.ex00_interlude.tex \ 6 | exercises/en.ex01_interlude.tex \ 7 | exercises/en.ex02_interlude.tex \ 8 | exercises/en.ex03_interlude.tex \ 9 | exercises/en.ex04_interlude.tex \ 10 | exercises/en.ex05_interlude.tex \ 11 | exercises/en.ex08_interlude.tex \ 12 | ../resources/latex/redefinition-commands.tex \ 13 | ../resources/42ai_bootcamps/en.instructions.tex \ 14 | ../resources/42ai_bootcamps/en.acknowledgements.tex \ 15 | useful_resources.tex 16 | 17 | # Relative path to the LaTeX documentclass setup files 18 | # Adapt as needed 19 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/ 20 | # RELPATH for github actions: 21 | RELPATH = $(shell dirname `pwd`)/resources/latex/ 22 | 23 | # You should not touch this either 24 | include $(RELPATH)/Makefile.LaTeX 25 | -------------------------------------------------------------------------------- /module08/assets/-log_1-x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/-log_1-x.png -------------------------------------------------------------------------------- /module08/assets/-log_x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/-log_x.png -------------------------------------------------------------------------------- /module08/assets/42ai_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/42ai_logo.pdf -------------------------------------------------------------------------------- /module08/assets/Default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Default.png -------------------------------------------------------------------------------- /module08/assets/Evaluate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Evaluate.png -------------------------------------------------------------------------------- /module08/assets/Improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Improve.png -------------------------------------------------------------------------------- /module08/assets/Predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Predict.png -------------------------------------------------------------------------------- /module08/assets/figure1_3Dplot_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/figure1_3Dplot_dataset.png -------------------------------------------------------------------------------- /module08/assets/log_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/log_loss.png -------------------------------------------------------------------------------- /module08/assets/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/sigmoid.png -------------------------------------------------------------------------------- /module08/exercises/en.ex01_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================== % 9 | \section*{Interlude} 10 | % =============================== % 11 | \subsection*{Predict II : Hypothesis} 12 | % ------------------------------- % 13 | 14 | We hope your curiosity led you to plot your sigmoid function. 15 | If you didn't, well here is what it looks like: 16 | 17 | \begin{figure}[!h] 18 | \centering 19 | \includegraphics[scale=0.55]{assets/sigmoid.png} 20 | \caption{Sigmoid} 21 | \end{figure} 22 | As you can see, \textbf{the sigmoid's output values range from $0$ to $1$}.\\ 23 | \\ 24 | You can input real numbers as big as you want (positive or negative), the output 25 | will always land within this range. 26 | This will be very helpful and convenient for the next part. 27 | 28 | \newpage 29 | 30 | % =============================== % 31 | \subsection*{Logistic Hypothesis} 32 | % ------------------------------- % 33 | 34 | Now you've written your sigmoid function, let's take a look at \textbf{the logistic regression 35 | hypothesis}. 36 | 37 | $$ 38 | \begin{matrix} 39 | \hat{y}^{(i)} & = & h_\theta(x^{(i)}) & = & \text{sigmoid}(\theta \cdot x'^{(i)}) 40 | & = &\cfrac{1} {1 + e^{-\theta \cdot x'^{(i)}}} & &\text{ for i = 1, \dots, m} 41 | \end{matrix} 42 | $$ 43 | \textbf{This is simply the sigmoid function applied on top 44 | of the linear regression hypothesis!!}\\ 45 | \\ 46 | It can be vectorized as: 47 | \\ 48 | $$ 49 | \begin{matrix} 50 | \hat{y} & = & h_\theta(X) & = & \text{sigmoid}(X'\theta) & = &\cfrac{1} {1 + e^{-X'\theta}} 51 | \end{matrix} 52 | $$ 53 | As we said before: the \textbf{sigmoid function} is just a way 54 | to \textbf{map the result of a linear equation onto a $[0,1]$ value range}.\\ 55 | \\ 56 | This transformation allows us to interpret the result 57 | as a \textbf{probability that an individual or observation belongs to of a given class}.\\ -------------------------------------------------------------------------------- /module08/exercises/en.ex04_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================================== % 9 | \section*{Interlude} 10 | % =============================================== % 11 | \subsection*{Improve} 12 | % ----------------------------------------------- % 13 | 14 | \begin{figure}[!h] 15 | \centering 16 | \includegraphics[scale=0.25]{assets/Improve.png} 17 | %\caption{The Learning Cycle: Improve} 18 | \end{figure} 19 | \noindent{Now we want to improve the algorithm's 20 | performance, or in other words, reduce the loss of its predictions.}\\ 21 | \\ 22 | This brings us (again) to calculating the gradient, which will tell us by 23 | how much and in which direction the theta parameters belonging to the model should be adjusted. 24 | 25 | \newpage 26 | % =============================================== % 27 | \subsection*{The logistic gradient} 28 | % ----------------------------------------------- % 29 | If you remember, to calculate the gradient, we start with the loss function and we derive it 30 | with respect to each of the theta parameters.\\ 31 | \\ 32 | If you know multivariate calculus already, you can try it for yourself, otherwise we've got you covered:\\ 33 | 34 | $$ 35 | \begin{matrix} 36 | \nabla(J)_0 & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\ 37 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n} 38 | \end{matrix} 39 | $$ 40 | Where: 41 | \begin{itemize} 42 | \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector 43 | \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, 44 | the partial derivative of $J$ with respect to $\theta_j$ 45 | \item $y$ is a vector of dimension $m$, the vector of expected values 46 | \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$ 47 | \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example 48 | \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example 49 | \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$\\ 50 | \end{itemize} 51 | This formula should be very familiar to you, as it's the same one you used to calculate the linear regression gradient!\\ 52 | \\ 53 | The only difference is that $h_{\theta}(x^{(i)})$ corresponds to \textbf{the logistic regression hypothesis instead of the linear regression hypothesis}.\\ 54 | \\ 55 | In other words:\\ 56 | $$ 57 | h_{\theta}(x^{(i)}) = \text{sigmoid}( \theta \cdot x'^{(i)}) = \cfrac{1} {1 + e^{-\theta \cdot x'^{(i)}}} 58 | $$ 59 | \\ 60 | Instead of: 61 | \\ 62 | $$ 63 | \cancel{h_{\theta}(x^{(i)}) = \theta \cdot x'^{(i)}} 64 | $$ 65 | -------------------------------------------------------------------------------- /module08/exercises/en.ex05_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{Interlude} 10 | % ============================================== % 11 | \subsection*{Vectorized Logistic Gradient} 12 | % ---------------------------------------------- % 13 | 14 | Given the previous logistic gradient formula, it's quite easy to produce a vectorized version of it. 15 | Actually, you almost already implemented it on module02!\\ 16 | \\ 17 | As with the previous exercise, \textbf{the only thing you have to change is your hypothesis} 18 | in order to calculate your logistic gradient.\\ 19 | 20 | $$ 21 | \begin{matrix} 22 | \nabla(J)_0 & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\ 23 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n} 24 | \end{matrix} 25 | $$ 26 | 27 | % ============================================== % 28 | \subsection*{Vectorized Version} 29 | % ---------------------------------------------- % 30 | 31 | Can be vectorized the same way you did before: 32 | 33 | $$ 34 | \nabla(J) = \cfrac{1}{m} X'^T(h_\theta(X) - y) 35 | $$ 36 | -------------------------------------------------------------------------------- /module08/exercises/m08ex00.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 00} 2 | \input{exercises/en.ex00_interlude.tex} 3 | \newpage 4 | \extitle{Sigmoid} 5 | \turnindir{ex00} 6 | \exnumber{00} 7 | \exfiles{sigmoid.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================== % 12 | \section*{Objective} 13 | % ---------------------------------- % 14 | Introduction to the hypothesis in the context of logistic regression.\\ 15 | \\ 16 | You must implement the sigmoid function, given by the following formula: 17 | 18 | $$ 19 | \text{sigmoid}(x) = \cfrac{1} {1 + e^{-x}} 20 | $$ 21 | Where: 22 | \begin{itemize} 23 | \item $x$ is a scalar or a vector, 24 | \item $e$ is the contracted form for the exponential function. It is also a mathematical constant, named Euler's number. 25 | \end{itemize} 26 | This function is also known as \textbf{Standard logistic sigmoid function}. 27 | This explains the name \textit{logistic regression}.\\ 28 | \\ 29 | The sigmoid function transforms an input into a probability value, i.e. a value between 0 and 1. 30 | This probability value will then be used to classify the inputs. 31 | \\ 32 | % ================================== % 33 | \section*{Instructions} 34 | % ---------------------------------- % 35 | In the \texttt{sigmoid.py} file, write the following function as per the instructions below:\\ 36 | \\ 37 | \par 38 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 39 | def sigmoid_(x): 40 | """ 41 | Compute the sigmoid of a vector. 42 | Args: 43 | x: has to be a numpy.ndarray of shape (m, 1). 44 | Returns: 45 | The sigmoid value as a numpy.ndarray of shape (m, 1). 46 | None if x is an empty numpy.ndarray. 47 | Raises: 48 | This function should not raise any Exception. 49 | """ 50 | ... Your code ... 51 | \end{minted} 52 | 53 | % ================================== % 54 | \section*{Examples} 55 | % ---------------------------------- % 56 | 57 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 58 | # Example 1: 59 | x = np.array([[-4]]) 60 | sigmoid_(x) 61 | # Output: 62 | array([[0.01798620996209156]]) 63 | 64 | # Example 2: 65 | x = np.array([[2]]) 66 | sigmoid_(x) 67 | # Output: 68 | array([[0.8807970779778823]]) 69 | 70 | # Example 3: 71 | x = np.array([[-4], [2], [0]]) 72 | sigmoid_(x) 73 | # Output: 74 | array([[0.01798620996209156], [0.8807970779778823], [0.5]]) 75 | \end{minted} 76 | 77 | 78 | \info{ 79 | Our sigmoid formula is a special case of the logistic function below, with $L = 1$, $k = 1$ and $x_0 = 0$: 80 | $$ 81 | f(x) = \cfrac{L}{1 + e^{-k(x-x_0)}} 82 | $$ 83 | } -------------------------------------------------------------------------------- /module08/exercises/m08ex01.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 01} 2 | \input{exercises/en.ex01_interlude.tex} 3 | \newpage 4 | \extitle{Logistic Hypothesis} 5 | \turnindir{ex01} 6 | \exnumber{01} 7 | \exfiles{log\_pred.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Introduction to the hypothesis notion in the context of logistic regression.\\ 15 | \\ 16 | You must implement the following formula as a function:\\ 17 | 18 | $$ 19 | \begin{matrix} 20 | \hat{y} & = & \text{sigmoid}(X' \cdot \theta) & = & \cfrac{1} {1 + e^{-X' \cdot \theta}} 21 | \end{matrix} 22 | $$ 23 | Where: 24 | \begin{itemize} 25 | \item $X$ is a matrix of dimensions $(m \times n)$, the design matrix 26 | \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, 27 | the design matrix onto which a column of $1$'s is added as a first column 28 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 29 | \item $\theta$ is a vector of dimension $(n + 1)$, the vector of parameters 30 | \end{itemize} 31 | Be careful: 32 | \begin{itemize} 33 | \item the $x$ your function will get as an input corresponds to $X$, the $(m \times n)$ matrix. 34 | Not $X'$. 35 | \item $\theta$ is a vector of dimension $(n + 1)$ 36 | \end{itemize} 37 | \newpage 38 | % ================================= % 39 | \section*{Instructions} 40 | % --------------------------------- % 41 | In the \texttt{log\_pred.py} file, write the following function as per the instructions below:\\ 42 | \par 43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 44 | def logistic_predict_(x, theta): 45 | """Computes the vector of prediction y_hat from two non-empty numpy.ndarray. 46 | Args: 47 | x: has to be an numpy.ndarray, a vector of dimension m * n. 48 | theta: has to be an numpy.ndarray, a vector of dimension (n + 1) * 1. 49 | Returns: 50 | y_hat as a numpy.ndarray, a vector of dimension m * 1. 51 | None if x or theta are empty numpy.ndarray. 52 | None if x or theta dimensions are not appropriate. 53 | Raises: 54 | This function should not raise any Exception. 55 | """ 56 | ... Your code ... 57 | \end{minted} 58 | 59 | % ================================= % 60 | \section*{Examples} 61 | % --------------------------------- % 62 | 63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 64 | # Example 1 65 | x = np.array([4]).reshape((-1, 1)) 66 | theta = np.array([[2], [0.5]]) 67 | logistic_predict_(x, theta) 68 | # Output: 69 | array([[0.98201379]]) 70 | 71 | # Example 1 72 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]]) 73 | theta2 = np.array([[2], [0.5]]) 74 | logistic_predict_(x2, theta2) 75 | # Output: 76 | array([[0.98201379], 77 | [0.99624161], 78 | [0.97340301], 79 | [0.99875204], 80 | [0.90720705]]) 81 | 82 | # Example 3 83 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]]) 84 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 85 | logistic_predict_(x3, theta3) 86 | # Output: 87 | array([[0.03916572], 88 | [0.00045262], 89 | [0.2890505 ]]) 90 | \end{minted} -------------------------------------------------------------------------------- /module08/exercises/m08ex02.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 02} 2 | \input{exercises/en.ex02_interlude.tex} 3 | \newpage 4 | \extitle{Logistic Loss Function} 5 | \turnindir{ex02} 6 | \exnumber{02} 7 | \exfiles{log\_loss.py} 8 | \exforbidden{None} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understanding and manipulation of the loss function in the context of logistic regression.\\ 15 | \\ 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | J( \theta) = -\cfrac{1} {m} \lbrack \sum_{i = 1}^{m} y^{(i)}\log(\hat{y}^{(i)})) + (1 - y^{(i)})\log(1 - \hat{y}^{(i)})\rbrack 20 | $$ 21 | Where: 22 | \begin{itemize} 23 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 24 | \item $\hat{y}^{(i)}$ is the $i^{th}$ component of the $\hat{y}$ vector 25 | \item $y$ is a vector of dimension $m$, the vector of expected values 26 | \item $y^{(i)}$ is the $i^{th}$ component of the $y$ vector 27 | \end{itemize} 28 | 29 | % ================================= % 30 | \section*{Instructions} 31 | % --------------------------------- % 32 | In the \texttt{log\_loss.py} file, write the following function as per the instructions below: 33 | \\ 34 | \par 35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 36 | def log_loss_(y, y_hat, eps=1e-15): 37 | """ 38 | Computes the logistic loss value. 39 | Args: 40 | y: has to be an numpy.ndarray, a vector of shape m * 1. 41 | y_hat: has to be an numpy.ndarray, a vector of shape m * 1. 42 | eps: has to be a float, epsilon (default=1e-15) 43 | Returns: 44 | The logistic loss value as a float. 45 | None on any error. 46 | Raises: 47 | This function should not raise any Exception. 48 | """ 49 | ... Your code ... 50 | \end{minted} 51 | 52 | \hint{ 53 | The logarithmic function isn't defined in $0$. 54 | This means that if $y^{(i)} = 0$ you will get an error when you try to compute $log(y^{(i)})$. 55 | The purpose of the \texttt{eps} argument is to avoid $log(0)$ errors. 56 | It is a very small residual value we add to \texttt{y}, also referred to as `epsilon`. 57 | } 58 | 59 | % ================================= % 60 | \section*{Examples} 61 | % --------------------------------- % 62 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 63 | # Example 1: 64 | y1 = np.array([1]).reshape((-1, 1)) 65 | x1 = np.array([4]).reshape((-1, 1)) 66 | theta1 = np.array([[2], [0.5]]) 67 | y_hat1 = logistic_predict_(x1, theta1) 68 | log_loss_(y1, y_hat1) 69 | # Output: 70 | 0.01814992791780973 71 | 72 | # Example 2: 73 | y2 = np.array([[1], [0], [1], [0], [1]]) 74 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]]) 75 | theta2 = np.array([[2], [0.5]]) 76 | y_hat2 = logistic_predict_(x2, theta2) 77 | log_loss_(y2, y_hat2) 78 | # Output: 79 | 2.4825011602474483 80 | 81 | # Example 3: 82 | y3 = np.array([[0], [1], [1]]) 83 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]]) 84 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 85 | y_hat3 = logistic_predict_(x3, theta3) 86 | log_loss_(y3, y_hat3) 87 | # Output: 88 | 2.9938533108607053 89 | \end{minted} 90 | 91 | \info{ 92 | This function is called \textbf{Cross-Entropy loss}, or \textbf{logistic loss}. 93 | For more information you can look at \href{https://en.wikipedia.org/wiki/Cross_entropy\#Cross-entropy\_error\_function\_and\_logistic\_regression}{this section} 94 | of the Cross entropy Wikipedia article. 95 | } -------------------------------------------------------------------------------- /module08/exercises/m08ex03.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 03} 2 | \extitle{Vectorized Logistic Loss Function} 3 | \input{exercises/en.ex03_interlude.tex} 4 | \newpage 5 | \turnindir{ex03} 6 | \exnumber{03} 7 | \exfiles{vec\_log\_loss.py} 8 | \exforbidden{any function that calculates the derivatives for you} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understanding and manipulation of loss function in the context of logistic regression.\\ 15 | \\ 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | J( \theta) = -\cfrac{1} {m} \lbrack y \cdot \log(\hat{y}) + (\vec{1} - y) \cdot \log(\vec{1} - \hat{y})\rbrack 20 | $$ 21 | \\ 22 | Where: 23 | \begin{itemize} 24 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 25 | \item $y$ is a vector of dimension $m$, the vector of expected values 26 | \item $\vec{1}$ is a vector of dimension $m$, a vector full of 1's 27 | \end{itemize} 28 | 29 | 30 | % ================================= % 31 | \section*{Instructions} 32 | % --------------------------------- % 33 | In the \texttt{vec\_log\_loss.py} file, write the following function as per the instructions below:\\ 34 | \\ 35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 36 | def vec_log_loss_(y, y_hat, eps=1e-15): 37 | """ 38 | Computes the logistic loss value. 39 | Args: 40 | y: has to be an numpy.ndarray, a vector of shape m * 1. 41 | y_hat: has to be an numpy.ndarray, a vector of shape m * 1. 42 | eps: epsilon (default=1e-15) 43 | Returns: 44 | The logistic loss value as a float. 45 | None on any error. 46 | Raises: 47 | This function should not raise any Exception. 48 | """ 49 | \end{minted} 50 | 51 | \hint{ 52 | The purpose of epsilon (eps) is to avoid $log(0)$ errors, it is a very small residual value we add to y. 53 | } 54 | 55 | % ================================= % 56 | \section*{Examples} 57 | % --------------------------------- % 58 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 59 | # Example 1: 60 | y1 = np.array([1]).reshape((-1, 1)) 61 | x1 = np.array([4]).reshape((-1, 1)) 62 | theta1 = np.array([[2], [0.5]]) 63 | y_hat1 = logistic_predict_(x1, theta1) 64 | vec_log_loss_(y1, y_hat1) 65 | # Output: 66 | 0.018149927917808714 67 | 68 | # Example 2: 69 | y2 = np.array([[1], [0], [1], [0], [1]]) 70 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]]) 71 | theta2 = np.array([[2], [0.5]]) 72 | y_hat2 = logistic_predict_(x2, theta2) 73 | vec_log_loss_(y2, y_hat2) 74 | # Output: 75 | 2.4825011602472347 76 | 77 | # Example 3: 78 | y3 = np.array([[0], [1], [1]]) 79 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]]) 80 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 81 | y_hat3 = logistic_predict_(x3, theta3) 82 | vec_log_loss_(y3, y_hat3) 83 | # Output: 84 | 2.993853310859968 85 | \end{minted} -------------------------------------------------------------------------------- /module08/exercises/m08ex04.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 04} 2 | \extitle{Logistic Gradient} 3 | \input{exercises/en.ex04_interlude.tex} 4 | \newpage 5 | \turnindir{ex04} 6 | \exnumber{04} 7 | \exfiles{log\_gradient.py} 8 | \exforbidden{any function that performs derivatives for you} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | % ================================= % 13 | \section*{Objective} 14 | % --------------------------------- % 15 | Understand and manipulate the concept of gradient in the context of logistic formulation.\\ 16 | \\ 17 | You must implement the following formula as a function: 18 | 19 | $$ 20 | \begin{matrix} 21 | \nabla(J)_0 & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\ 22 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n} 23 | \end{matrix} 24 | $$ 25 | \\ 26 | Where: 27 | \begin{itemize} 28 | \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector 29 | \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, the partial derivative of $J$ with respect to $\theta_j$ 30 | \item $y$ is a vector of dimension $m$, the vector of expected values 31 | \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$ 32 | \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example 33 | \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example 34 | \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$ 35 | \end{itemize} 36 | \bigskip 37 | \noindent{Remember that with logistic regression, the hypothesis is slightly different:}\\ 38 | $$ 39 | h_{\theta}(x^{(i)}) = sigmoid( \theta \cdot x'^{(i)}) 40 | $$ 41 | \newpage 42 | 43 | % ================================= % 44 | \section*{Instructions} 45 | % --------------------------------- % 46 | In the \texttt{log\_gradient.py} file, write the following function as per the instructions below:\\ 47 | 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 49 | def log_gradient(x, y, theta): 50 | """Computes a gradient vector from three non-empty numpy.ndarray, with a for-loop. The three arrays must have compatible dimensions. 51 | Args: 52 | x: has to be an numpy.ndarray, a matrix of shape m * n. 53 | y: has to be an numpy.ndarray, a vector of shape m * 1. 54 | theta: has to be an numpy.ndarray, a vector of shape (n + 1) * 1. 55 | Returns: 56 | The gradient as a numpy.ndarray, a vector of shape n * 1, containing the result of the formula for all j. 57 | None if x, y, or theta are empty numpy.ndarray. 58 | None if x, y and theta do not have compatible dimensions. 59 | Raises: 60 | This function should not raise any Exception. 61 | """ 62 | ... Your code ... 63 | \end{minted} 64 | 65 | % ================================= % 66 | \section*{Examples} 67 | % ================================= % 68 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 69 | # Example 1: 70 | y1 = np.array([1]).reshape((-1, 1)) 71 | x1 = np.array([4]).reshape((-1, 1)) 72 | theta1 = np.array([[2], [0.5]]) 73 | 74 | log_gradient(x1, y1, theta1) 75 | # Output: 76 | array([[-0.01798621], 77 | [-0.07194484]]) 78 | 79 | # Example 2: 80 | y2 = np.array([[1], [0], [1], [0], [1]]) 81 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]]) 82 | theta2 = np.array([[2], [0.5]]) 83 | 84 | log_gradient(x2, y2, theta2) 85 | # Output: 86 | array([[0.3715235 ], 87 | [3.25647547]]) 88 | 89 | # Example 3: 90 | y3 = np.array([[0], [1], [1]]) 91 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]]) 92 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 93 | 94 | log_gradient(x3, y3, theta3) 95 | # Output: 96 | array([[-0.55711039], 97 | [-0.90334809], 98 | [-2.01756886], 99 | [-2.10071291], 100 | [-3.27257351]]) 101 | \end{minted} -------------------------------------------------------------------------------- /module08/exercises/m08ex05.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 05} 2 | \extitle{Vectorized Logistic Gradient} 3 | \input{exercises/en.ex05_interlude.tex} 4 | \newpage 5 | \turnindir{ex05} 6 | \exnumber{05} 7 | \exfiles{vec\_log\_gradient.py} 8 | \exforbidden{any function that performs derivatives for you} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Understand and manipulate the gradient in the context of logistic formulation.\\ 15 | \\ 16 | You must implement the following formula as a function: 17 | 18 | $$ 19 | \nabla(J) = \cfrac{1}{m} X'^T(h_\theta(X) - y) 20 | $$ 21 | \\ 22 | Where: 23 | \begin{itemize} 24 | \item $\nabla(J)$ is the gradient vector of dimension $(n + 1)$ 25 | \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of ones was added as the first column 26 | \item $X'^T$ means the matrix has been transposed 27 | \item $h_\theta(X)$ is a vector of dimension $m$, the vector of predicted values 28 | \item $y$ is a vector of dimension $m$, the vector of expected values 29 | \end{itemize} 30 | 31 | 32 | % ================================= % 33 | \section*{Instructions} 34 | % --------------------------------- % 35 | In the \texttt{vec\_log\_gradient.py} file, write the following function as per the instructions given below: 36 | \\ 37 | \par 38 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 39 | def vec_log_gradient(x, y, theta): 40 | """Computes a gradient vector from three non-empty numpy.ndarray, without any for-loop. The three arrays must have compatible shapes. 41 | Args: 42 | x: has to be an numpy.ndarray, a matrix of shape m * n. 43 | y: has to be an numpy.ndarray, a vector of shape m * 1. 44 | theta: has to be an numpy.ndarray, a vector (n +1) * 1. 45 | Returns: 46 | The gradient as a numpy.ndarray, a vector of shape n * 1, containg the result of the formula for all j. 47 | None if x, y, or theta are empty numpy.ndarray. 48 | None if x, y and theta do not have compatible shapes. 49 | Raises: 50 | This function should not raise any Exception. 51 | """ 52 | ... Your code ... 53 | \end{minted} 54 | 55 | 56 | % ================================= % 57 | \section*{Examples} 58 | % --------------------------------- % 59 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 60 | # Example 1: 61 | y1 = np.array([1]).reshape((-1, 1)) 62 | x1 = np.array([4]).reshape((-1, 1)) 63 | theta1 = np.array([[2], [0.5]]) 64 | 65 | vec_log_gradient(x1, y1, theta1) 66 | # Output: 67 | array([[-0.01798621], 68 | [-0.07194484]]) 69 | 70 | # Example 2: 71 | y2 = np.array([[1], [0], [1], [0], [1]]) 72 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]]) 73 | theta2 = np.array([[2], [0.5]]) 74 | 75 | vec_log_gradient(x2, y2, theta2) 76 | # Output: 77 | array([[0.3715235 ], 78 | [3.25647547]]) 79 | 80 | # Example 3: 81 | y3 = np.array([[0], [1], [1]]) 82 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]]) 83 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 84 | 85 | vec_log_gradient(x3, y3, theta3) 86 | # Output: 87 | array([[-0.55711039], 88 | [-0.90334809], 89 | [-2.01756886], 90 | [-2.10071291], 91 | [-3.27257351]]) 92 | \end{minted} -------------------------------------------------------------------------------- /module08/exercises/m08ex06.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 06} 2 | \extitle{Logistic Regression} 3 | %\input{exercises/en.ex06_interlude.tex} 4 | %\newpage 5 | \turnindir{ex06} 6 | \exnumber{06} 7 | \exfiles{my\_logistic\_regression.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | The time to use everything you built so far has (finally) come!\\ 15 | \\ 16 | Demonstrate your knowledge by implementing a logistic regression classifier using 17 | the gradient descent algorithm.\\ 18 | \\ 19 | You must have seen the power of \texttt{numpy} for vectorized operations. 20 | Well let's make something more concrete with that.\\ 21 | \\ 22 | You may have taken a look at Scikit-Learn's implementation of logistic regression 23 | and noticed that the \textbf{sklearn.linear\_model.LogisticRegression} class 24 | offers a lot of options.\\ 25 | \\ 26 | The goal of this exercise is to make a simplified but nonetheless useful and powerful 27 | version, with fewer options.\\ 28 | \newpage 29 | % ================================= % 30 | \section*{Instructions} 31 | % --------------------------------- % 32 | In the \texttt{my\_logistic\_regression.py} file, write a \texttt{MyLogisticRegression} 33 | class as in the instructions given below:\\ 34 | 35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 36 | class MyLogisticRegression(): 37 | """ 38 | Description: 39 | My personnal logistic regression to classify things. 40 | """ 41 | def __init__(self, theta, alpha=0.001, max_iter=1000): 42 | self.alpha = alpha 43 | self.max_iter = max_iter 44 | self.theta = theta 45 | ... Your code here ... 46 | 47 | ... other methods ... 48 | \end{minted} 49 | \\ 50 | You will add at least the following methods: 51 | \begin{itemize} 52 | \item \texttt{predict\_(self, x)} 53 | \item \texttt{loss\_elem\_(self, y, yhat)} 54 | \item \texttt{loss\_(self, y, yhat)} 55 | \item \texttt{fit\_(self, x, y)} 56 | \end{itemize} 57 | \hint{You have already written these functions, you will just need a 58 | few adjustments in order for them to work well within your \textbf{MyLogisticRegression} class.} 59 | 60 | % ================================= % 61 | \subsection*{Examples} 62 | % --------------------------------- % 63 | 64 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 65 | import numpy as np 66 | from my_logistic_regression import MyLogisticRegression as MyLR 67 | X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]]) 68 | Y = np.array([[1], [0], [1]]) 69 | thetas = np.array([[2], [0.5], [7.1], [-4.3], [2.09]]) 70 | mylr = MyLR(thetas) 71 | 72 | # Example 0: 73 | mylr.predict_(X) 74 | # Output: 75 | array([[0.99930437], 76 | [1. ], 77 | [1. ]]) 78 | 79 | # Example 1: 80 | mylr.loss_(X,Y) 81 | # Output: 82 | 11.513157421577002 83 | 84 | # Example 2: 85 | mylr.fit_(X, Y) 86 | mylr.theta 87 | # Output: 88 | array([[ 2.11826435] 89 | [ 0.10154334] 90 | [ 6.43942899] 91 | [-5.10817488] 92 | [ 0.6212541 ]]) 93 | 94 | # Example 3: 95 | mylr.predict_(X) 96 | # Output: 97 | array([[0.57606717] 98 | [0.68599807] 99 | [0.06562156]]) 100 | 101 | # Example 4: 102 | mylr.loss_(X,Y) 103 | # Output: 104 | 1.4779126923052268 105 | \end{minted} -------------------------------------------------------------------------------- /module08/useful_resources.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Section usefull ressources % 4 | % for ML Modules % 5 | % % 6 | %******************************************************************************% 7 | 8 | 9 | \chapter*{Notions and ressources} 10 | 11 | \section*{Notions of the module} 12 | \begin{itemize} 13 | \item Logistic regression 14 | \item Logistic hypothesis 15 | \item Logistic gradient descent 16 | \item Multiclass classification 17 | \item Accuracy, precision, recall, F1-score 18 | \item Confusion matrix 19 | \end{itemize} 20 | 21 | \section*{Useful Resources} 22 | 23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\ 24 | \newline 25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\ 26 | \newline 27 | The following sections of the course are particularly relevant to today's exercises: 28 | 29 | \subsection*{Week 3: Classification} 30 | 31 | \subsubsection*{Classification with logistic regression} 32 | \begin{itemize} 33 | \item Motivations 34 | \item Logistic regression 35 | \item Decision boundary 36 | \end{itemize} 37 | 38 | \subsubsection*{Cost function for logistic regression} 39 | \begin{itemize} 40 | \item Cost function for logistic regression 41 | \item Simplified Cost Function for Logistic Regression 42 | \end{itemize} 43 | 44 | \subsubsection*{Gradient descent for logistic regression} 45 | \begin{itemize} 46 | \item Gradient Descent Implementation 47 | \end{itemize} 48 | \noindent{\emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist}, videos from 31 to 36.}} 49 | -------------------------------------------------------------------------------- /module09/Makefile: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = en.subject.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = exercises/en.ex01_interlude.tex \ 6 | exercises/en.ex04_interlude.tex \ 7 | exercises/en.ex06_interlude.tex \ 8 | exercises/en.ex08_interlude.tex \ 9 | exercises/en.ex10_interlude.tex \ 10 | ../resources/42ai_bootcamps/en.instructions.tex \ 11 | ../resources/42ai_bootcamps/en.acknowledgements.tex \ 12 | useful_resources.tex 13 | 14 | # Relative path to the LaTeX documentclass setup files 15 | # Adapt as needed 16 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/ 17 | # RELPATH for github actions: 18 | RELPATH = $(shell dirname `pwd`)/resources/latex/ 19 | 20 | # You should not touch this either 21 | include $(RELPATH)/Makefile.LaTeX 22 | -------------------------------------------------------------------------------- /module09/assets/42ai_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/42ai_logo.pdf -------------------------------------------------------------------------------- /module09/assets/Evaluate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/Evaluate.png -------------------------------------------------------------------------------- /module09/assets/Improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/Improve.png -------------------------------------------------------------------------------- /module09/attachments/solar_system_census_planets.csv: -------------------------------------------------------------------------------- 1 | ,Origin 2 | 0,1.0 3 | 1,2.0 4 | 2,3.0 5 | 3,3.0 6 | 4,0.0 7 | 5,3.0 8 | 6,2.0 9 | 7,1.0 10 | 8,1.0 11 | 9,2.0 12 | 10,1.0 13 | 11,2.0 14 | 12,3.0 15 | 13,0.0 16 | 14,3.0 17 | 15,2.0 18 | 16,3.0 19 | 17,0.0 20 | 18,0.0 21 | 19,2.0 22 | 20,1.0 23 | 21,3.0 24 | 22,1.0 25 | 23,3.0 26 | 24,0.0 27 | 25,0.0 28 | 26,0.0 29 | 27,2.0 30 | 28,1.0 31 | 29,3.0 32 | 30,1.0 33 | 31,2.0 34 | 32,2.0 35 | 33,2.0 36 | 34,1.0 37 | 35,0.0 38 | 36,0.0 39 | 37,0.0 40 | 38,0.0 41 | 39,1.0 42 | 40,1.0 43 | 41,1.0 44 | 42,0.0 45 | 43,3.0 46 | 44,2.0 47 | 45,2.0 48 | 46,2.0 49 | 47,3.0 50 | 48,1.0 51 | 49,1.0 52 | 50,3.0 53 | 51,0.0 54 | 52,0.0 55 | 53,1.0 56 | 54,3.0 57 | 55,3.0 58 | 56,3.0 59 | 57,1.0 60 | 58,1.0 61 | 59,0.0 62 | 60,1.0 63 | 61,0.0 64 | 62,1.0 65 | 63,2.0 66 | 64,2.0 67 | 65,3.0 68 | 66,1.0 69 | 67,2.0 70 | 68,2.0 71 | 69,2.0 72 | 70,0.0 73 | 71,2.0 74 | 72,2.0 75 | 73,3.0 76 | 74,2.0 77 | 75,0.0 78 | 76,1.0 79 | 77,1.0 80 | 78,3.0 81 | 79,1.0 82 | 80,2.0 83 | 81,2.0 84 | 82,2.0 85 | 83,3.0 86 | 84,3.0 87 | 85,2.0 88 | 86,3.0 89 | 87,0.0 90 | 88,2.0 91 | 89,1.0 92 | 90,3.0 93 | 91,1.0 94 | 92,3.0 95 | 93,3.0 96 | 94,0.0 97 | 95,1.0 98 | 96,0.0 99 | 97,0.0 100 | 98,0.0 101 | 99,3.0 102 | 100,3.0 103 | 101,0.0 104 | 102,3.0 105 | 103,3.0 106 | 104,2.0 107 | 105,1.0 108 | 106,1.0 109 | 107,1.0 110 | 108,0.0 111 | 109,0.0 112 | 110,0.0 113 | 111,0.0 114 | 112,1.0 115 | 113,2.0 116 | 114,3.0 117 | 115,0.0 118 | 116,2.0 119 | 117,0.0 120 | 118,2.0 121 | 119,3.0 122 | -------------------------------------------------------------------------------- /module09/exercises/en.ex04_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % =============================================== % 9 | \section*{Interlude - Regularized Gradient} 10 | % =============================================== % 11 | \begin{figure}[!h] 12 | \centering 13 | \includegraphics[scale=0.25]{assets/Improve.png} 14 | %\caption{The Learning Cycle: Improve} 15 | \end{figure} 16 | \noindent{To derive the gradient of the regularized loss function, $\nabla(J)$ 17 | you have to change a bit the formula of the unregularized gradient.}\\ 18 | \\ 19 | Given the fact that we are not penalizing $\theta_0$, the formula will remain 20 | the same as before for this parameter. For the other parameters ($\theta_1, \dots, \theta_n$), 21 | we must add the partial derivative of the regularization term: $\lambda \theta_j$.\\ 22 | \\ 23 | Therefore, we get: 24 | $$ 25 | \nabla(J)_0 = \frac{1}{m}\sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)}) 26 | $$ 27 | $$ 28 | \nabla(J)_j = \frac{1}{m}\left(\sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)})x_j^{(i)} + \lambda \theta_j\right) \text{ for j = 1, ..., n} 29 | $$ 30 | \\ 31 | Where: 32 | \begin{itemize} 33 | \item $\nabla(J)_j$ is the j$^\text{th}$ component of the gradient vector $\nabla(J)$ 34 | \item $m$ is the number of training examples used 35 | \item $h_\theta(x^{(i)})$ is the model's prediction for the i$^\text{th}$ training example 36 | \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ training example 37 | \item $y^{(i)}$ is the expected target value for the i$^\text{th}$ example 38 | \item $\lambda$ is a constant, the regularization hyperparameter 39 | \item $\theta_j$ is the j$^\text{th}$ parameter of the $\theta$ vector 40 | \end{itemize} 41 | \bigskip 42 | Which can be vectorized as: 43 | $$ 44 | \nabla(J) = \frac{1}{m} [X'^T(h_\theta(X) - y) + \lambda \theta'] 45 | $$ 46 | \\ 47 | Where: 48 | \begin{itemize} 49 | \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector 50 | \item $m$ is the number of training examples used 51 | \item $X$ is a matrix of dimension $(m \times n)$, the design matrix 52 | \item $X'$ is a matrix of dimension $(m \times (n + 1))$, the design matrix onto 53 | which a column of ones is added as a first column 54 | \item $y$ is a vector of dimension $m$, the vector of expected values 55 | \item $h_\theta(X)$ is a vector of dimension $m$, the vector of predicted values 56 | \item $\lambda$ is a constant 57 | \item $\theta$ is a vector of dimension $(n + 1)$, the parameter vector 58 | \item $\theta'$ is a vector of dimension $(n + 1)$, constructed using the following rules: 59 | \end{itemize} 60 | 61 | $$ 62 | \begin{matrix} 63 | \theta'_0 & = 0 \\ 64 | \theta'_j & = \theta_j & \text{ for } j = 1, \dots, n\\ 65 | \end{matrix} 66 | $$ 67 | 68 | % =============================================== % 69 | \subsection*{Linear Gradient vs Logistic Gradient} 70 | % ----------------------------------------------- % 71 | As before, we draw your attention on the only difference between the linear regression's 72 | and the logistic regression's gradient equations: \textbf{the hypothesis function} $h_\theta(X)$. 73 | \begin{itemize} 74 | \item In the linear regression: $h_\theta(X) = X'\theta$ 75 | \item In the logistic regression: $h_\theta(X) = \text{sigmoid}(X'\theta)$ 76 | \end{itemize} 77 | -------------------------------------------------------------------------------- /module09/exercises/en.ex06_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{Interlude - Next Level: Ridge Regression} 10 | % ============================================== % 11 | 12 | Until now we only talked about L$_2$ regularization and its implications on the 13 | calculation of the loss function and gradient for both the linear and the logistic regression.\\ 14 | \\ 15 | Now it's time to use the proper terminology :\\ 16 | \\ 17 | \emph{When we apply L$_2$ regularization on a linear regression model, the new model is 18 | called a \textbf{Ridge Regression} model. 19 | Besides that brand-new name, Ridge regression is nothing more than 20 | linear regression regularized with L$_2$.}\\ 21 | \\ 22 | We suggest you watch this \href{https://www.youtube.com/watch?v=Q81RR3yKn30}{very nice 23 | explanation of Ridge Regularization}.\\ 24 | \\ 25 | By the way, this Youtube channel, \texttt{\textit{StatQuest}}, is very helpful to 26 | understand the gist of a lot of machine learning concepts.\\ 27 | You will not waste your time watching its statistics and machine learning playlists! 28 | -------------------------------------------------------------------------------- /module09/exercises/en.ex08_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{Interlude} 10 | % ============================================== % 11 | \subsection*{Regularized Logistic Regression is still Logistic Regression} 12 | % ---------------------------------------------- % 13 | As opposed to linear regression, \textbf{regularized logistic regression is still 14 | called logistic regression}.\\ 15 | \\ 16 | Working without regularization parameters can simply be regarded as a special 17 | case where $\lambda = 0$.\\ 18 | 19 | if $\lambda = 0$: 20 | \begin{eqnarray*} 21 | \nabla(J) & = & \frac{1}{m} [X'^T(h_\theta(X) - y) + \lambda \theta'] \\ 22 | & = & \frac{1}{m} [X'^T(h_\theta(X) - y) + 0 \cdot \theta'] \\ 23 | & = & \frac{1}{m} [X'^T(h_\theta(X) - y)] 24 | \end{eqnarray*} -------------------------------------------------------------------------------- /module09/exercises/en.ex10_interlude.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Interlude % 4 | % for Machine Learning module % 5 | % % 6 | %******************************************************************************% 7 | 8 | % ============================================== % 9 | \section*{One Last Word - It's Just a Beginning...} 10 | % ============================================== % 11 | \subsection*{Congratulation!!} 12 | % ---------------------------------------------- % 13 | You have finished this bootcamp and you can be proud of yourself! 14 | We hope you liked it and that the material were understandable. 15 | 16 | We tried our best to make it as accessible as possible to anyone, even for someone with little mathematical background. It was quite a challenge, and we hope we succeed to that difficult mission. 17 | 18 | Equiped with your brand-new knowledge you are now able to tackle more challenging algorithm like \texttt{\textbf{ensemble methods (random forest, gradient boosting)}}, \texttt{\textbf{support vector machine}} or even \texttt{\textbf{artificial neural networks}}!! 19 | 20 | An because we know that \texttt{\textbf{a lot of you have neural networks in mind}} when you started this journey into machine learning, let's talk a bit more about why you are now able to deep dive into it... fearlessly! 21 | 22 | \texttt{\textbf{Neural networks}} are based on the same blocks you should now be familiar with. 23 | Essentially: 24 | \begin{itemize} 25 | \item matrix and vector operations, 26 | \item gradient descent, 27 | \item regularization, 28 | \item sigmoid (as activation functions, even if it is a bit outdated now) 29 | \end{itemize} 30 | 31 | Let's see what you can do now. 32 | 33 | % ============================================== % 34 | \subsection*{To go further} 35 | % ---------------------------------------------- % 36 | To keep learning Machine Learning, here are several options you should consider: 37 | \begin{itemize} 38 | \item To complete the entire \href{https://www.coursera.org/learn/machine-learning/home/}{Stanford's Machine Learning MOOC}. 39 | It is a great ressource, a \textbf{classic} for those who want to study machine learning. 40 | This bootcamp followed thigthly the architecture of its first three weeks. 41 | This course is definitely worth your time! 42 | Also, someone did a great work to convert all the Octave assignments into \href{https://github.com/dibgerge/ml-coursera-python-assignments}{Python notebooks}. 43 | \item To take \href{https://course.fast.ai/}{fast.ai Deep Learning MOOC}. 44 | It's a great way to learn Deep Learning following a top-down approach. 45 | \end{itemize} -------------------------------------------------------------------------------- /module09/exercises/m09ex00.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 00} 2 | \extitle{Polynomial models II} 3 | %\input{exercises/en.ex00_interlude.tex} 4 | %\newpage 5 | \turnindir{ex00} 6 | \exnumber{00} 7 | \exfiles{polynomial\_model\_extended.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================== % 12 | \section*{Objective} 13 | % ---------------------------------- % 14 | Create a function that takes a matrix $X$ of dimensions $(m \times n)$ and an integer $p$ 15 | as input, and returns a matrix of dimension $(m \times (np))$.\\ 16 | \\ 17 | For each column $x_j$ of the matrix $X$, the new matrix contains 18 | $x_j$ raised to the power of $k$, for $k = 1, 2, ..., p$ : 19 | 20 | $$ 21 | x_1 \mid \ldots \mid x_n \mid x_1^2 \mid \ldots \mid x_n^2 \mid \ldots \mid x_1^p \mid \ldots \mid x_n^p 22 | $$ 23 | \newpage 24 | % ================================== % 25 | \section*{Instructions} 26 | % ---------------------------------- % 27 | In the \texttt{polynomial\_model\_extended.py} file, write the following function 28 | as per the instructions given below:\\ 29 | \\ 30 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 31 | def add_polynomial_features(x, power): 32 | """Add polynomial features to matrix x by raising its columns to every power in the range 33 | of 1 up to the power given in argument. 34 | Args: 35 | x: has to be an numpy.ndarray, a matrix of shape m * n. 36 | power: has to be an int, the power up to which the columns of matrix x are going 37 | to be raised. 38 | Returns: 39 | The matrix of polynomial features as a numpy.ndarray, of shape m * (np), 40 | containg the polynomial feature values for all 41 | training examples. 42 | None if x is an empty numpy.ndarray. 43 | Raises: 44 | This function should not raise any Exception. 45 | """ 46 | ... Your code ... 47 | \end{minted} 48 | 49 | 50 | % ================================== % 51 | \section*{Examples} 52 | % ---------------------------------- % 53 | 54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 55 | import numpy as np 56 | x = np.arange(1,11).reshape(5, 2) 57 | 58 | # Example 1: 59 | add_polynomial_features(x, 3) 60 | # Output: 61 | array([[ 1, 2, 1, 4, 1, 8], 62 | [ 3, 4, 9, 16, 27, 64], 63 | [ 5, 6, 25, 36, 125, 216], 64 | [ 7, 8, 49, 64, 343, 512], 65 | [ 9, 10, 81, 100, 729, 1000]]) 66 | 67 | # Example 2: 68 | add_polynomial_features(x, 4) 69 | # Output: 70 | array([[ 1, 2, 1, 4, 1, 8, 1, 16], 71 | [ 3, 4, 9, 16, 27, 64, 81, 256], 72 | [ 5, 6, 25, 36, 125, 216, 625, 1296], 73 | [ 7, 8, 49, 64, 343, 512, 2401, 4096], 74 | [ 9, 10, 81, 100, 729, 1000, 6561, 10000]]) 75 | \end{minted} -------------------------------------------------------------------------------- /module09/exercises/m09ex01.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 01} 2 | \extitle{L2 Regularization} 3 | \input{exercises/en.ex01_interlude.tex} 4 | \newpage 5 | \turnindir{ex01} 6 | \exnumber{01} 7 | \exfiles{l2\_reg.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | You must implement the following formulas as functions: 15 | 16 | % ================================= % 17 | \subsection*{Iterative} 18 | % --------------------------------- % 19 | $$ 20 | L_2(\theta)^2 = \sum_{j = 1}^n \theta_j^2 21 | $$ 22 | \\ 23 | Where: 24 | \begin{itemize} 25 | \item $\theta$ is a vector of dimension $(n + 1)$. 26 | \end{itemize} 27 | 28 | % ================================= % 29 | \subsection*{Vectorized} 30 | % --------------------------------- % 31 | $$ 32 | L_2(\theta)^2 = \theta' \cdot \theta' 33 | $$ 34 | \\ 35 | Where: 36 | \begin{itemize} 37 | \item $\theta'$ is a vector of dimension $(n + 1)$, constructed using the following rules: 38 | \end{itemize} 39 | 40 | $$ 41 | \begin{matrix} 42 | \theta'_0 & = 0 \\ 43 | \theta'_j & = \theta_j & \text{ for } j = 1, \dots, n\\ 44 | \end{matrix} 45 | $$ 46 | \newpage 47 | % ================================= % 48 | \section*{Instructions} 49 | % --------------------------------- % 50 | In the \texttt{l2\_reg.py} file, write the following functions as per the instructions given below:\\ 51 | \\ 52 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 53 | def iterative_l2(theta): 54 | """Computes the L2 regularization of a non-empty numpy.ndarray, with a for-loop. 55 | Args: 56 | theta: has to be a numpy.ndarray, a vector of shape n * 1. 57 | Returns: 58 | The L2 regularization as a float. 59 | None if theta in an empty numpy.ndarray. 60 | Raises: 61 | This function should not raise any Exception. 62 | """ 63 | ... Your code ... 64 | 65 | def l2(theta): 66 | """Computes the L2 regularization of a non-empty numpy.ndarray, without any for-loop. 67 | Args: 68 | theta: has to be a numpy.ndarray, a vector of shape n * 1. 69 | Returns: 70 | The L2 regularization as a float. 71 | None if theta in an empty numpy.ndarray. 72 | Raises: 73 | This function should not raise any Exception. 74 | """ 75 | ... Your code ... 76 | \end{minted} 77 | 78 | % ================================= % 79 | \section*{Examples} 80 | % --------------------------------- % 81 | 82 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 83 | x = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 84 | 85 | # Example 1: 86 | iterative_l2(x) 87 | # Output: 88 | 911.0 89 | 90 | # Example 2: 91 | l2(x) 92 | # Output: 93 | 911.0 94 | 95 | y = np.array([3,0.5,-6]).reshape((-1, 1)) 96 | # Example 3: 97 | iterative_l2(y) 98 | # Output: 99 | 36.25 100 | 101 | # Example 4: 102 | l2(y) 103 | # Output: 104 | 36.25 105 | \end{minted} -------------------------------------------------------------------------------- /module09/exercises/m09ex02.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 02} 2 | \extitle{Regularized Linear Loss Function} 3 | \turnindir{ex02} 4 | \exnumber{02} 5 | \exfiles{linear\_loss\_reg.py} 6 | \exforbidden{sklearn} 7 | \makeheaderfilesforbidden 8 | 9 | % ================================= % 10 | \section*{Objective} 11 | % --------------------------------- % 12 | You must implement the following formula as a function: 13 | 14 | $$ 15 | J(\theta) = \frac{1}{2m}[(\hat{y} - y)\cdot(\hat{y} - y) + \lambda (\theta' \cdot \theta')] 16 | $$ 17 | \\ 18 | Where: 19 | \begin{itemize} 20 | \item $y$ is a vector of dimension $m$, the expected values 21 | \item $\hat{y}$ is a vector of dimension $m$, the predicted values 22 | \item $\lambda$ is a constant, the regularization hyperparameter 23 | \item $\theta'$ is a vector of dimension $n$, constructed using the following rules: 24 | \end{itemize} 25 | 26 | $$ 27 | \begin{matrix} 28 | \theta'_0 & = 0 \\ 29 | \theta'_j & = \theta_j & \text{ for } j = 1, \dots, n\\ 30 | \end{matrix} 31 | $$ 32 | \newpage 33 | % ================================= % 34 | \section*{Instructions} 35 | % --------------------------------- % 36 | In the \texttt{linear\_loss\_reg.py} file, write the following function 37 | as per the instructions given below:\\ 38 | \\ 39 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 40 | def reg_loss_(y, y_hat, theta, lambda_): 41 | """Computes the regularized loss of a linear regression model from two non-empty numpy.array, 42 | without any for loop. The two arrays must have the same dimensions. 43 | Args: 44 | y: has to be an numpy.ndarray, a vector of shape m * 1. 45 | y_hat: has to be an numpy.ndarray, a vector of shape m * 1. 46 | theta: has to be a numpy.ndarray, a vector of shape n * 1. 47 | lambda_: has to be a float. 48 | Returns: 49 | The regularized loss as a float. 50 | None if y, y_hat, or theta are empty numpy.ndarray. 51 | None if y and y_hat do not share the same shapes. 52 | Raises: 53 | This function should not raise any Exception. 54 | """ 55 | ... Your code ... 56 | \end{minted} 57 | 58 | \hint{such a situation could be a good use case for decorators...} 59 | 60 | % ================================= % 61 | \section*{Examples} 62 | % --------------------------------- % 63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 64 | y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1)) 65 | y_hat = np.array([3, 13, -11.5, 5, 11, 5, -20]).reshape((-1, 1)) 66 | theta = np.array([1, 2.5, 1.5, -0.9]).reshape((-1, 1)) 67 | 68 | # Example : 69 | reg_loss_(y, y_hat, theta, .5) 70 | # Output: 71 | 0.8503571428571429 72 | 73 | # Example : 74 | reg_loss_(y, y_hat, theta, .05) 75 | # Output: 76 | 0.5511071428571429 77 | 78 | # Example : 79 | reg_loss_(y, y_hat, theta, .9) 80 | # Output: 81 | 1.116357142857143 82 | \end{minted} -------------------------------------------------------------------------------- /module09/exercises/m09ex03.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 03} 2 | \extitle{Regularized Logistic Loss Function} 3 | %\input{exercises/en.ex03_interlude.tex} 4 | %\newpage 5 | \turnindir{ex03} 6 | \exnumber{03} 7 | \exfiles{logistic\_loss\_reg.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | You must implement the following formula as a function: 15 | 16 | $$ 17 | J( \theta) = -\frac{1} {m} \lbrack y \cdot \log(\hat{y}) + (\vec{1} - y) \cdot \log(\vec{1} - \hat{y})\rbrack + \frac{\lambda}{2m} (\theta' \cdot \theta') 18 | $$ 19 | \\ 20 | Where: 21 | \begin{itemize} 22 | \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values 23 | \item $y$ is a vector of dimension $m$, the vector of expected values 24 | \item $\vec{1}$ is a vector of dimension $m$, a vector full of ones 25 | \item $\lambda$ is a constant, the regularization hyperparameter 26 | \item $\theta'$ is a vector of dimension $n$, constructed using the following rules: 27 | \end{itemize} 28 | $$ 29 | \begin{matrix} 30 | \theta'_0 & = 0 \\ 31 | \theta'_j & = \theta_j & \text{ for } j = 1, \dots, n\\ 32 | \end{matrix} 33 | $$ 34 | \newpage 35 | % ================================= % 36 | \section*{Instructions} 37 | % --------------------------------- % 38 | In the \texttt{logistic\_loss\_reg.py} file, write the following function as 39 | per the instructions given below:\\ 40 | \\ 41 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 42 | def reg_log_loss_(y, y_hat, theta, lambda_): 43 | """Computes the regularized loss of a logistic regression model from two non-empty numpy.ndarray, 44 | without any for loop. The two arrays must have the same shapes. 45 | Args: 46 | y: has to be an numpy.ndarray, a vector of shape m * 1. 47 | y_hat: has to be an numpy.ndarray, a vector of shape m * 1. 48 | theta: has to be a numpy.ndarray, a vector of shape n * 1. 49 | lambda_: has to be a float. 50 | Returns: 51 | The regularized loss as a float. 52 | None if y, y_hat, or theta is empty numpy.ndarray. 53 | None if y and y_hat do not share the same shapes. 54 | Raises: 55 | This function should not raise any Exception. 56 | """ 57 | ... Your code ... 58 | \end{minted} 59 | 60 | \hint{Here again, seems to be a good use case for decorators ...} 61 | 62 | % ================================= % 63 | \section*{Examples} 64 | % --------------------------------- % 65 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 66 | y = np.array([1, 1, 0, 0, 1, 1, 0]).reshape((-1, 1)) 67 | y_hat = np.array([.9, .79, .12, .04, .89, .93, .01]).reshape((-1, 1)) 68 | theta = np.array([1, 2.5, 1.5, -0.9]).reshape((-1, 1)) 69 | 70 | # Example : 71 | reg_log_loss_(y, y_hat, theta, .5) 72 | # Output: 73 | 0.43377043716475955 74 | 75 | # Example : 76 | reg_log_loss_(y, y_hat, theta, .05) 77 | # Output: 78 | 0.13452043716475953 79 | 80 | # Example : 81 | reg_log_loss_(y, y_hat, theta, .9) 82 | # Output: 83 | 0.6997704371647596 84 | \end{minted} -------------------------------------------------------------------------------- /module09/exercises/m09ex06.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 06} 2 | \extitle{Ridge Regression} 3 | \input{exercises/en.ex06_interlude.tex} 4 | \newpage 5 | \turnindir{ex06} 6 | \exnumber{06} 7 | \exfiles{ridge.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | Now it's time to implement your \texttt{MyRidge} class, similar to 15 | the class of the same name in \texttt{sklearn.linear\_model}.\\ 16 | 17 | % ================================= % 18 | \section*{Instructions} 19 | % --------------------------------- % 20 | In the \texttt{ridge.py} file, create the following class as per the instructions given below:\\ 21 | \\ 22 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 23 | class MyRidge(ParentClass): 24 | """ 25 | Description: 26 | My personnal ridge regression class to fit like a boss. 27 | """ 28 | def __init__(self, thetas, alpha=0.001, max_iter=1000, lambda_=0.5): 29 | self.alpha = alpha 30 | self.max_iter = max_iter 31 | self.thetas = thetas 32 | self.lambda_ = lambda_ 33 | ... Your code here ... 34 | 35 | ... other methods ... 36 | \end{minted} 37 | \\ 38 | Your \texttt{MyRidge} class will have at least the following methods: 39 | \begin{itemize} 40 | \item \texttt{\_\_init\_\_}, special method, similar to the one you 41 | wrote in \texttt{MyLinearRegression} (module06) 42 | \item \texttt{get\_params\_}, which gets the parameters of the estimator 43 | \item \texttt{set\_params\_}, which sets the parameters of the estimator 44 | \item \texttt{loss\_}, which returns the loss between 2 vectors (numpy arrays) 45 | \item \texttt{loss\_elem\_}, which returns a vector corresponding to the squared 46 | diffrence between 2 vectors (numpy arrays) 47 | \item \texttt{predict\_}, which generates predictions using a linear model 48 | \item \texttt{gradient\_}, which calculates the vectorized regularized gradient 49 | \item \texttt{fit\_}, which fits Ridge regression model to a training dataset 50 | \end{itemize} 51 | 52 | \hint{You should consider inheritance from \texttt{MyLinearRegression}.} 53 | \noindent{If \texttt{MyRidge} inheritates from \texttt{MyLinearRegression}, you may not 54 | need to reimplement the \texttt{predict\_} method.}\\ 55 | \\ 56 | The difference between the \texttt{MyRidge}'s implementations of \texttt{loss\_elem\_}, \texttt{loss\_}, \texttt{gradient\_} and 57 | \texttt{fit\_} and the ones in your \texttt{MyLinearRegression} class 58 | (implemented in module 02) is the use of a regularization term.\\ 59 | \hint{ 60 | again, this is a good use case for decorators... 61 | } -------------------------------------------------------------------------------- /module09/exercises/m09ex07.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 07} 2 | \extitle{Practicing Ridge Regression} 3 | %\input{exercises/en.ex07_interlude.tex} 4 | %\newpage 5 | \turnindir{ex07} 6 | \exnumber{07} 7 | \exfiles{space\_avocado.py, benchmark\_train.py, models.[csv/yml/pickle]} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | 12 | % ================================= % 13 | \section*{Objective} 14 | % --------------------------------- % 15 | It's training time! 16 | Let's practice our brand new Ridge Regression with a polynomial model. 17 | 18 | % ================================= % 19 | \section*{Introduction} 20 | % --------------------------------- % 21 | You have already used the dataset \texttt{space\_avocado.csv}. 22 | The dataset is made of 5 columns: 23 | \begin{itemize} 24 | \item \textbf{index}: not relevant 25 | \item \textbf{weight}: the avocado weight order (in tons) 26 | \item \textbf{prod\_distance}: distance from where the avocado ordered is produced (in Mkms) 27 | \item \textbf{time\_delivery}: time between the order and the receipt (in days) 28 | \item \textbf{target}: price of the order (in trantorian unit) 29 | \end{itemize} 30 | It contains the data of all the avocado purchases made by Trantor administration 31 | (guacamole is a serious business there).\\ 32 | 33 | % ================================= % 34 | \section*{Instructions} 35 | % --------------------------------- % 36 | You have to explore different models and select the best you find. 37 | To do this:\\ 38 | \begin{itemize} 39 | \item Split your \texttt{space\_avocado.csv} dataset into a training, a cross-validation and a test sets 40 | \item Use your \texttt{polynomial\_features} method on your training set 41 | \item Consider several Linear Regression models with polynomial hypotheses with a maximum degree of $4$ 42 | \item For each hypothesis consider a regularized factor ranging from $0$ to $1$ with a step of $0.2$ 43 | \item Evaluate your models on the cross-validation set 44 | \item Evaluate the best model on the test set 45 | \end{itemize} 46 | \info{According to your model evaluations, what is the best hypothesis you can get?} 47 | \begin{itemize} 48 | \item Plot the evaluation curve which will help you to select the best model 49 | (evaluation metrics vs models + $\lambda$ factor). 50 | \item Plot the true price and the predicted price obtained via your best model 51 | with the different $\lambda$ values (meaning the dataset + the 5 predicted curves). 52 | \end{itemize} 53 | 54 | \hint{The training of all your models can take a long time.\newline 55 | Therefore you need to train only the best one during the correction.} 56 | \noindent{Nevertheless, you should return in \texttt{benchmark\_train.py} the program which performs the training 57 | of all the models and saves the parameters of the different models into a file.}\\ 58 | \\ 59 | In \texttt{models.[csv/yml/pickle]} one must find the parameters of all the models 60 | you have explored and trained.\\ 61 | \\ 62 | In \texttt{space\_avocado.py}, train the model based on the best hypothesis you find 63 | and load the other models from \texttt{models.[csv/yml/pickle]}. 64 | Then evaluate the best model on the right set and plot the different graphics as asked before. 65 | -------------------------------------------------------------------------------- /module09/exercises/m09ex08.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 08} 2 | \extitle{Regularized Logistic Regression} 3 | \input{exercises/en.ex08_interlude.tex} 4 | \newpage 5 | \turnindir{ex08} 6 | \exnumber{08} 7 | \exfiles{my\_logistic\_regression.py} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | In the last exercise, you implemented a regularized version 15 | of the linear regression algorithm, called Ridge regression.\\ 16 | \\ 17 | Now it's time to update your logistic regression classifier as well!\\ 18 | \\ 19 | In the \texttt{scikit-learn} library, the logistic regression implementation 20 | offers a few regularization techniques, which can be selected using 21 | the parameter \texttt{penalty} (L$_2$ is default).\\ 22 | The goal of this exercise is to update your old \texttt{MyLogisticRegression} class to 23 | take that into account.\\ 24 | 25 | % ================================= % 26 | \section*{Instructions} 27 | % --------------------------------- % 28 | In the \texttt{my\_logistic\_regression.py} file, update your \texttt{MyLogisticRegression} 29 | class according to the following instructions:\\ 30 | \\ 31 | 32 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 33 | class MyLogisticRegression(): 34 | """ 35 | Description: 36 | My personnal logistic regression to classify things. 37 | """ 38 | supported_penalities = ['l2'] #We consider l2 penalities only. One may want to implement other penalities 39 | 40 | def __init__(self, theta, alpha=0.001, max_iter=1000, penality='l2', lambda_=1.0): 41 | # Check on type, data type, value ... if necessary 42 | self.alpha = alpha 43 | self.max_iter = max_iter 44 | self.theta = theta 45 | self.penality = penality 46 | self.lambda_ = lambda_ if penality in self.supported_penalities else 0 47 | #... Your code ... 48 | 49 | ... other methods ... 50 | \end{minted} 51 | \begin{itemize} 52 | \item \textbf{add} a \texttt{penalty} parameter which can take the following values:\texttt{'l2'}, \texttt{'none'} (default value is \texttt{'l2'}). 53 | \end{itemize} 54 | \begin{itemize} 55 | \item \textbf{update} the \texttt{fit\_(self, x, y)} method: 56 | \begin{itemize} 57 | \item \texttt{if penality == 'l2'}: use a \textbf{regularized version} of the gradient descent. 58 | \item \texttt{if penality = 'none'}: use the \textbf{unregularized version} of the gradient descent from \texttt{module03}. 59 | \end{itemize} 60 | \end{itemize} 61 | 62 | % ================================= % 63 | \section*{Examples} 64 | % --------------------------------- % 65 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python} 66 | from my_logistic_regression import MyLogisticRegression as mylogr 67 | 68 | theta = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]]) 69 | 70 | # Example 1: 71 | model1 = mylogr(theta, lambda_=5.0) 72 | 73 | model1.penality 74 | # Output 75 | 'l2' 76 | 77 | model1.lambda_ 78 | # Output 79 | 5.0 80 | 81 | # Example 2: 82 | model2 = mylogr(theta, penality=None) 83 | 84 | model2.penality 85 | # Output 86 | None 87 | 88 | model2.lambda_ 89 | # Output 90 | 0.0 91 | 92 | # Example 3: 93 | model3 = mylogr(theta, penality=None, lambda_=2.0) 94 | 95 | model3.penality 96 | # Output 97 | None 98 | 99 | model3.lambda_ 100 | # Output 101 | 0.0 102 | 103 | \end{minted} 104 | 105 | \hint{ 106 | this is also a great use case for decorators... 107 | } 108 | -------------------------------------------------------------------------------- /module09/exercises/m09ex09.tex: -------------------------------------------------------------------------------- 1 | \chapter{Exercise 09} 2 | \extitle{Practicing Regularized Logistic Regression} 3 | %\input{exercises/en.ex09_interlude.tex} 4 | %\newpage 5 | \turnindir{ex09} 6 | \exnumber{09} 7 | \exfiles{solar\_system\_census.py, benchmark\_train.py, models.[csv/yml/pickle]} 8 | \exforbidden{sklearn} 9 | \makeheaderfilesforbidden 10 | 11 | % ================================= % 12 | \section*{Objective} 13 | % --------------------------------- % 14 | It's training time! 15 | Let's practice our updated Logistic Regression with polynomial models.\\ 16 | % ================================= % 17 | \section*{Introduction} 18 | % --------------------------------- % 19 | You have already used the dataset \texttt{solar\_system\_census.csv} 20 | and \texttt{solar\_system\_census\_planets.csv}.\\ 21 | \begin{itemize} 22 | \item The dataset is divided in two files which can be found in the 23 | \texttt{resources} folder: \texttt{solar\_system\_census.csv} and \texttt{solar\_system\_census\_planets.csv} 24 | \item The first file contains biometric information such as the height, weight, and bone density 25 | of several Solar System citizens 26 | \item The second file contains the homeland of each citizen, indicated by its 27 | Space Zipcode representation (i.e. one number for each planet... :)) 28 | \end{itemize} 29 | As you should know, Solar citizens come from four registered areas (zipcodes):\\ 30 | 31 | \begin{itemize} 32 | \item The flying cities of Venus ($0$) 33 | \item United Nations of Earth ($1$) 34 | \item Mars Republic ($2$) 35 | \item The Asteroids' Belt colonies ($3$) 36 | \end{itemize} 37 | 38 | % ================================= % 39 | \section*{Instructions} 40 | % --------------------------------- % 41 | % ================================= % 42 | \subsection*{Split the Data} 43 | % --------------------------------- % 44 | 45 | Take your \texttt{solar\_system\_census.csv} dataset and split it into a 46 | \textbf{training set}, a \textbf{cross-validation set} 47 | and a \textbf{test set}. 48 | 49 | % ================================= % 50 | \subsection*{Training and benchmark} 51 | % --------------------------------- % 52 | One part of your submission will be located in the \texttt{benchmark\_train.py} and 53 | \texttt{models.[csv/yml/pickle]} files. 54 | You have to: 55 | \begin{itemize} 56 | \item Train different regularized logistic regression models with a polynomial hypothesis of \textbf{degree 3}. 57 | The models will be trained with different $\lambda$ values, ranging from $0$ to $1$. 58 | Use the one-vs-all method. 59 | \item Evaluate the \textbf{f1 score} of each of the models on the cross-validation set. 60 | You can use the \texttt{f1\_score\_} function that you wrote in the \texttt{ex11} of \texttt{module08}. 61 | \item Save the different models into a \texttt{models.[csv/yml/pickle]}. 62 | \end{itemize} 63 | 64 | % ================================= % 65 | \subsection*{Solar system census program} 66 | % --------------------------------- % 67 | The second and last part of your submission is in \texttt{solar\_system\_census.py}. You have to: 68 | \begin{itemize} 69 | \item Load the differents models from \texttt{models.[csv/yml/pickle]} and train from scratch 70 | only the best one on a training set. 71 | \item Visualize the performance of the different models with a bar plot showing the score of 72 | the models given their $\lambda$ value. 73 | \item Print the \textbf{f1 score} of all the models calculated on the test set. 74 | \item Visualize the target values and the predicted values of the best model on the same scatterplot. 75 | Make some efforts to have a readable figure. 76 | \end{itemize} 77 | 78 | \info{For the second script \texttt{solar\_system\_census.py}, only a train and test set are necessary 79 | as one is simply looking at the performance.} 80 | -------------------------------------------------------------------------------- /module09/exercises/m09ex10.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/exercises/m09ex10.tex -------------------------------------------------------------------------------- /module09/useful_resources.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Section usefull ressources % 4 | % for ML Modules % 5 | % % 6 | %******************************************************************************% 7 | 8 | 9 | \chapter*{Notions and ressources} 10 | 11 | \section*{Notions of the module} 12 | \begin{itemize} 13 | \item Regularization 14 | \item Overfitting 15 | \item Regularized loss function 16 | \item Regularized gradient descent 17 | \item Regularized linear regression 18 | \item Regularized logistic regression 19 | \end{itemize} 20 | 21 | \section*{Useful Ressources} 22 | 23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\ 24 | \newline 25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\ 26 | \newline 27 | The following sections of the course are particularly relevant to today's exercises: 28 | 29 | \subsection*{Week 3: Classification} 30 | 31 | \subsubsection*{Classification with logistic regression (already seen in module 03)} 32 | \begin{itemize} 33 | \item Motivations 34 | \item Logistic regression 35 | \item Decision boundary 36 | \end{itemize} 37 | 38 | \subsubsection*{Cost function for logistic regression (already seen in module 03)} 39 | \begin{itemize} 40 | \item Cost function for logistic regression 41 | \item Simplified Cost Function for Logistic Regression 42 | \end{itemize} 43 | 44 | \subsubsection*{Gradient descent for logistic regression (already seen in module 03)} 45 | \begin{itemize} 46 | \item Gradient Descent Implementation 47 | \end{itemize} 48 | 49 | \subsubsection*{The problem of overfitting (New !!!)} 50 | \begin{itemize} 51 | \item The problem of overfitting 52 | \item Addressing overfitting 53 | \item Cost function with regularization 54 | \item Regularized linear regression 55 | \item Regularized logistic regression 56 | \end{itemize} 57 | 58 | \noindent{\emph{All videos above are available also on this 59 | \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared} 60 | {Andrew Ng's YouTube playlist}, videos 31 to 36 (already seen in module 03) and 37 to 41 (new !!!).}} -------------------------------------------------------------------------------- /resources/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .aux 3 | .log 4 | .out 5 | .pyg 6 | .pyc 7 | .toc 8 | .o 9 | *~ 10 | #*# 11 | -------------------------------------------------------------------------------- /resources/42ai_bootcamps/en.acknowledgements.tex: -------------------------------------------------------------------------------- 1 | \section*{Contact} 2 | % --------------------------------- % 3 | You can contact 42AI by email: \href{mailto:contact@42ai.fr}{contact@42ai.fr}\\ 4 | \newline 5 | Thank you for attending 42AI's Machine Learning Bootcamp ! 6 | 7 | % ================================= % 8 | \section*{Acknowledgements} 9 | % --------------------------------- % 10 | The Python \& ML bootcamps are the result of a collective effort. We would like to thank:\\ 11 | \begin{itemize} 12 | \item Maxime Choulika (cmaxime), 13 | \item Pierre Peigné (ppeigne), 14 | \item Matthieu David (mdavid), 15 | \item Quentin Feuillade--Montixi (qfeuilla, quentin@42ai.fr) 16 | \item Mathieu Perez (maperez, mathieu.perez@42ai.fr) 17 | \end{itemize} 18 | who supervised the creation and enhancements of the present transcription.\\ 19 | \begin{itemize} 20 | \item Louis Develle (ldevelle, louis@42ai.fr) 21 | \item Owen Roberts (oroberts) 22 | \item Augustin Lopez (aulopez) 23 | \item Luc Lenotre (llenotre) 24 | \item Amric Trudel (amric@42ai.fr) 25 | \item Benjamin Carlier (bcarlier@student.42.fr) 26 | \item Pablo Clement (pclement@student.42.fr) 27 | \item Amir Mahla (amahla, amahla@42ai.fr) 28 | \end{itemize} 29 | for your investment in the creation and development of these modules.\\ 30 | \begin{itemize} 31 | \item All prior participants who took a moment to provide their feedbacks, and help us improve these bootcamps ! 32 | \end{itemize} 33 | 34 | \vfill 35 | \doclicenseThis 36 | -------------------------------------------------------------------------------- /resources/42ai_bootcamps/en.instructions.tex: -------------------------------------------------------------------------------- 1 | %******************************************************************************% 2 | % % 3 | % Common Instructions % 4 | % for Python Projects % 5 | % % 6 | %******************************************************************************% 7 | 8 | \chapter{Common Instructions} 9 | \begin{itemize} 10 | \item The version of Python recommended to use is 3.7. You can 11 | check your Python's version with the following command: \texttt{python -V} 12 | 13 | \item The norm: during this bootcamp, it is recommended to follow the 14 | \href{https://www.python.org/dev/peps/pep-0008/}{PEP 8 standards}, though it is not mandatory. 15 | You can install \href{https://pypi.org/project/pycodestyle}{pycodestyle} or 16 | \href{https://black.readthedocs.io/en/stable/}{Black}, which are convenient 17 | packages to check your code. 18 | 19 | \item The function \texttt{eval} is never allowed. 20 | 21 | \item The exercises are ordered from the easiest to the hardest. 22 | 23 | \item Your exercises are going to be evaluated by someone else, 24 | so make sure that your variable names and function names are appropriate and civil. 25 | 26 | \item Your manual is the internet. 27 | 28 | \item If you're planning on using an AI assistant such as a LLM, make sure it is helpful 29 | for you to \textbf{learn and practice}, not to provide you with hands-on solution ! Own your tool, don't let it own you. 30 | 31 | \item If you are a student from 42, you can access our Discord server 32 | on \href{https://discord.com/channels/887850395697807362/887850396314398720}{42 student's associations portal} and ask your 33 | questions to your peers in the dedicated Bootcamp channel. 34 | 35 | \item You can learn more about 42 Artificial Intelligence by visiting \href{https://42-ai.github.io}{our website}. 36 | 37 | \item If you find any issue or mistake in the subject please create an issue on 38 | \href{https://github.com/42-AI/bootcamp_machine-learning/issues}{42AI repository on Github}. 39 | 40 | \item We encourage you to create test programs for your 41 | project even though this work \textbf{won't have to be 42 | submitted and won't be graded}. It will give you a chance 43 | to easily test your work and your peers’ work. You will find 44 | those tests especially useful during your defence. Indeed, 45 | during defence, you are free to use your tests and/or the 46 | tests of the peer you are evaluating. 47 | 48 | \end{itemize} -------------------------------------------------------------------------------- /resources/latex/42_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/42_logo.pdf -------------------------------------------------------------------------------- /resources/latex/Makefile.LaTeX: -------------------------------------------------------------------------------- 1 | #******************************************************************************# 2 | # # 3 | # Makefile.LaTeX for LaTeX # 4 | # Created on : Mon Oct 5 17:01:29 2015 # 5 | # Make by : Uriel "Korfuri" Corfa # 6 | # Made by : David "Thor" GIRON # 7 | # # 8 | #******************************************************************************# 9 | 10 | RESSOURCE_FILES = $(RELPATH)/Makefile.LaTeX \ 11 | $(RELPATH)/atbeginend.sty \ 12 | $(RELPATH)/tocloft.sty \ 13 | $(RELPATH)/minted.sty \ 14 | $(RELPATH)/multirow.sty \ 15 | $(RELPATH)/42-fr.cls \ 16 | $(RELPATH)/back.pdf \ 17 | $(RELPATH)/42_logo.pdf \ 18 | $(RELPATH)/info.pdf \ 19 | $(RELPATH)/helphint.pdf \ 20 | $(RELPATH)/warn.pdf 21 | 22 | ALL_DEPENDENCIES = $(RESSOURCE_FILES) $(DEPS) 23 | 24 | export TEXINPUTS := ..:.:$(RELPATH):${TEXINPUTS}: 25 | 26 | all: $(TARGETS) 27 | 28 | clean: 29 | rm -f *.aux *.ent *.log *.out *.toc *.nav *.snm *.vrb *.pyg *.fdb_latexmk *.fls 30 | 31 | distclean: clean 32 | rm -f $(TARGETS) 33 | 34 | re: distclean all 35 | 36 | %.pdf: %.tex Makefile $(ALL_DEPENDENCIES) 37 | pdflatex -shell-escape -halt-on-error $< 38 | @if [ $$? = 0 ]; then\ 39 | pdflatex -shell-escape -halt-on-error $<;\ 40 | echo "Build OK";\ 41 | else\ 42 | echo "Build Failed";\ 43 | exit 1;\ 44 | fi 45 | 46 | view: all 47 | (type xpdf > /dev/null 2>&1 && xpdf ${TARGETS}) ||\ 48 | (type evince > /dev/null 2>&1 && evince ${TARGETS}) 49 | 50 | 51 | #******************************************************************************# 52 | -------------------------------------------------------------------------------- /resources/latex/Makefile.template: -------------------------------------------------------------------------------- 1 | # List the pdf's to build. foo.tex will produce foo.pdf 2 | TARGETS = foo.pdf 3 | 4 | # List the files included in the slides 5 | DEPS = somePicture.png someSound.flac someOtherPicture.png 6 | 7 | # Relative path to the LaTeX documentclass setup files 8 | # Adapt as needed 9 | RELPATH = $(shell git rev-parse --show-toplevel)/templates/latex/ 10 | 11 | # You should not touch this either 12 | include $(RELPATH)/Makefile.LaTeX 13 | -------------------------------------------------------------------------------- /resources/latex/atbeginend.sty: -------------------------------------------------------------------------------- 1 | % atbeginend.sty 2 | % 3 | % From : http://www.eng.cam.ac.uk/help/tpl/textprocessing/atbeginend.sty 4 | % 5 | % defines 6 | % \BeforeBegin{environment}{code-to-execute} 7 | % \BeforeEnd {environment}{code-to-execute} 8 | % \AfterBegin {environment}{code-to-execute} 9 | % \AfterEnd {environment}{code-to-execute} 10 | % 11 | % Save \begin and \end to \BeginEnvironment and \EndEnvironment 12 | \let\BeginEnvironment=\begin 13 | \let\EndEnvironment=\end 14 | 15 | \def\IfUnDef#1{\expandafter\ifx\csname#1\endcsname\relax} 16 | 17 | % Null command needed to for \nothing{something}=.nothing. 18 | \def\NullCom#1{} 19 | 20 | \def\begin#1{% 21 | % 22 | % if defined \BeforeBeg for this environment, execute it 23 | \IfUnDef{BeforeBeg#1}\else\csname BeforeBeg#1\endcsname\fi% 24 | % 25 | % 26 | % 27 | \IfUnDef{AfterBeg#1}% This is done to skip the command for environments 28 | % which can take arguments, like multicols; YOU MUST NOT 29 | % USE \AfterBegin{...}{...} for such environments! 30 | \let\SaveBegEng=\BeginEnvironment% 31 | \else% 32 | % Start this environment 33 | \BeginEnvironment{#1}% 34 | % and execute code after \begin{environment} 35 | \csname AfterBeg#1\endcsname% 36 | % 37 | \let\SaveBegEng=\NullCom% 38 | \fi% 39 | \SaveBegEng{#1}% 40 | } 41 | 42 | 43 | \def\end#1{% 44 | % 45 | % execute code before \end{environment} 46 | \IfUnDef{BeforeEnd#1}\else\csname BeforeEnd#1\endcsname\fi% 47 | % 48 | % close this environment 49 | \EndEnvironment{#1}% 50 | % 51 | % and execute code after \begin{environment} 52 | \IfUnDef{AfterEnd#1}\else\csname AfterEnd#1\endcsname\fi% 53 | } 54 | 55 | 56 | %% Now, define commands 57 | % \BeforeBegin{environment}{code-to-execute} 58 | % \BeforeEnd {environment}{code-to-execute} 59 | % \AfterBegin {environment}{code-to-execute} 60 | % \AfterEnd {environment}{code-to-execute} 61 | 62 | \def\BeforeBegin#1#2{\expandafter\gdef\csname BeforeBeg#1\endcsname 63 | {#2}} 64 | \def\BeforeEnd #1#2{\expandafter\gdef\csname BeforeEnd#1\endcsname 65 | {#2}} 66 | \def\AfterBegin #1#2{\expandafter\gdef\csname AfterBeg#1\endcsname {#2}} 67 | \def\AfterEnd #1#2{\expandafter\gdef\csname AfterEnd#1\endcsname{#2}} 68 | -------------------------------------------------------------------------------- /resources/latex/back.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/back.pdf -------------------------------------------------------------------------------- /resources/latex/dark-42_logo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/dark-42_logo.pdf -------------------------------------------------------------------------------- /resources/latex/dark-back.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/dark-back.pdf -------------------------------------------------------------------------------- /resources/latex/helphint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/helphint.pdf -------------------------------------------------------------------------------- /resources/latex/info.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/info.pdf -------------------------------------------------------------------------------- /resources/latex/redefinition-commands.tex: -------------------------------------------------------------------------------- 1 | 2 | \newcommand{\ailogo}[1]{\def \@ailogo {#1}}\ailogo{assets/42ai_logo.pdf} 3 | 4 | %% Redefine \maketitle 5 | \makeatletter 6 | \def \maketitle { 7 | \begin{titlepage} 8 | \begin{center} 9 | %\begin{figure}[t] 10 | %\includegraphics[height=8cm]{\@ailogo} 11 | \includegraphics[height=8cm]{assets/42ai_logo.pdf} 12 | %\end{figure} 13 | \vskip 5em 14 | {\huge \@title} 15 | \vskip 2em 16 | {\LARGE \@subtitle} 17 | \vskip 4em 18 | \end{center} 19 | %\begin{center} 20 | %\@author 21 | %\end{center} 22 | %\vskip 5em 23 | \vfill 24 | \begin{center} 25 | \emph{\summarytitle : \@summary} 26 | \end{center} 27 | \vspace{2cm} 28 | %\vskip 5em 29 | %\doclicenseThis 30 | \end{titlepage} 31 | } 32 | \makeatother 33 | 34 | \makeatletter 35 | \def \makeheaderfilesforbidden 36 | { 37 | \noindent 38 | \begin{tabularx}{\textwidth}{|X X X X|} 39 | \hline 40 | \multicolumn{1}{|>{\raggedright}m{1cm}|} 41 | {\vskip 2mm \includegraphics[height=1cm]{assets/42ai_logo.pdf}} & 42 | \multicolumn{2}{>{\centering}m{12cm}}{\small Exercise : \@exnumber } & 43 | \multicolumn{1}{ >{\raggedleft}p{1.5cm}|} 44 | %% {\scriptsize points : \@exscore} \\ \hline 45 | {} \\ \hline 46 | 47 | \multicolumn{4}{|>{\centering}m{15cm}|} 48 | {\small \@extitle} \\ \hline 49 | 50 | \multicolumn{4}{|>{\raggedright}m{15cm}|} 51 | {\small Turn-in directory : \ttfamily 52 | $ex\@exnumber/$ } 53 | \\ \hline 54 | \multicolumn{4}{|>{\raggedright}m{15cm}|} 55 | {\small Files to turn in : \ttfamily \@exfiles } 56 | \\ \hline 57 | 58 | \multicolumn{4}{|>{\raggedright}m{15cm}|} 59 | {\small Forbidden functions : \ttfamily \@exforbidden } 60 | \\ \hline 61 | 62 | %% \multicolumn{4}{|>{\raggedright}m{15cm}|} 63 | %% {\small Remarks : \ttfamily \@exnotes } 64 | %% \\ \hline 65 | \end{tabularx} 66 | %% \exnotes 67 | \exrules 68 | \exmake 69 | \exauthorize{None} 70 | \exforbidden{None} 71 | \extitle{} 72 | \exnumber{} 73 | } 74 | \makeatother 75 | 76 | %% Syntactic highlights 77 | \makeatletter 78 | \newenvironment{pythoncode}{% 79 | \VerbatimEnvironment 80 | \usemintedstyle{emacs} 81 | \minted@resetoptions 82 | \setkeys{minted@opt}{bgcolor=black,formatcom=\color{lightgrey},fontsize=\scriptsize} 83 | \begin{figure}[ht!] 84 | \centering 85 | \begin{minipage}{16cm} 86 | \begin{VerbatimOut}{\jobname.pyg}} 87 | {%[ 88 | \end{VerbatimOut} 89 | \minted@pygmentize{c} 90 | \DeleteFile{\jobname.pyg} 91 | \end{minipage} 92 | \end{figure}} 93 | \makeatother -------------------------------------------------------------------------------- /resources/latex/warn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/warn.pdf -------------------------------------------------------------------------------- /version: -------------------------------------------------------------------------------- 1 | 5.1.1 --------------------------------------------------------------------------------