├── .github
    └── workflows
    │   └── latex-builder.yml
├── .gitignore
├── .vscode
    └── settings.json
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── assets
    └── 42ai_logo.png
├── build
    ├── module05.pdf
    ├── module06.pdf
    ├── module07.pdf
    ├── module08.pdf
    └── module09.pdf
├── module05
    ├── Makefile
    ├── assets
    │   ├── 42ai_logo.pdf
    │   ├── Default.png
    │   ├── Evaluate.png
    │   ├── Improve.png
    │   ├── Predict.png
    │   ├── bad_pred_with_distance.png
    │   ├── bad_prediction.png
    │   ├── plot1.png
    │   ├── plot2.png
    │   ├── plot3.png
    │   ├── plotcost1.png
    │   ├── plotcost2.png
    │   ├── plotcost3.png
    │   └── sigmoid.png
    ├── en.subject.tex
    ├── exercises
    │   ├── en.ex02_interlude.tex
    │   ├── en.ex03_interlude.tex
    │   ├── en.ex06_interlude.tex
    │   ├── en.ex08_interlude.tex
    │   ├── m05ex00.tex
    │   ├── m05ex01.tex
    │   ├── m05ex02.tex
    │   ├── m05ex03.tex
    │   ├── m05ex04.tex
    │   ├── m05ex05.tex
    │   ├── m05ex06.tex
    │   ├── m05ex07.tex
    │   ├── m05ex08.tex
    │   └── m05ex09.tex
    └── useful_resources.tex
├── module06
    ├── Makefile
    ├── assets
    │   ├── 42ai_logo.pdf
    │   ├── Improve.png
    │   ├── Improve2.png
    │   ├── ex03_cost_1.png
    │   ├── ex03_cost_2.png
    │   ├── ex03_cost_3.png
    │   ├── ex03_interlude_cost.png
    │   ├── ex03_interlude_plot.png
    │   ├── ex04_J_vs_t1.png
    │   ├── ex04_score_vs_bluepills.png
    │   ├── ex05_price_vs_Tmeters_part1.png
    │   ├── ex05_price_vs_Tmeters_part2.png
    │   ├── ex05_price_vs_age_part1.png
    │   ├── ex05_price_vs_age_part2.png
    │   ├── ex05_price_vs_thrust_part1.png
    │   ├── ex05_price_vs_thrust_part2.png
    │   ├── ex06_sellprice_ne_lgd_vs_age.png
    │   ├── ex07_3Dplot_data.png
    │   ├── ex07_hypo_test_part1.png
    │   └── ex07_learning_rate.png
    ├── attachments
    │   ├── are_blue_pills_magics.csv
    │   └── are_blue_pills_magics_description.txt
    ├── en.subject.tex
    ├── exercises
    │   ├── en.ex00_interlude.tex
    │   ├── en.ex01_interlude.tex
    │   ├── en.ex02_interlude.tex
    │   ├── en.ex05_interlude.tex
    │   ├── m06ex00.tex
    │   ├── m06ex01.tex
    │   ├── m06ex02.tex
    │   ├── m06ex03.tex
    │   ├── m06ex04.tex
    │   ├── m06ex05.tex
    │   └── m06ex06.tex
    └── useful_resources.tex
├── module07
    ├── Makefile
    ├── assets
    │   ├── 42ai_logo.pdf
    │   ├── Default.png
    │   ├── Evaluate.png
    │   ├── Improve.png
    │   ├── Predict.png
    │   ├── bad_pred_with_distance.png
    │   ├── ex07_price_vs_Tmeters_part1.png
    │   ├── ex07_price_vs_Tmeters_part2.png
    │   ├── ex07_price_vs_age_part1.png
    │   ├── ex07_price_vs_age_part2.png
    │   ├── ex07_price_vs_thrust_part1.png
    │   ├── ex07_price_vs_thrust_part2.png
    │   ├── ex12_data.png
    │   ├── ex12_plot.png
    │   ├── overfitt.png
    │   ├── overfitt_with_dots.png
    │   └── polynomial_straight_line.png
    ├── attachments
    │   ├── are_blue_pills_magics.csv
    │   ├── are_blue_pills_magics_description.txt
    │   ├── space_avocado.csv
    │   ├── spacecraft_data.csv
    │   └── spacecraft_data_description.txt
    ├── en.subject.tex
    ├── exercises
    │   ├── en.ex00_interlude.tex
    │   ├── en.ex01_interlude.tex
    │   ├── en.ex02_interlude.tex
    │   ├── en.ex03_interlude.tex
    │   ├── en.ex04_interlude.tex
    │   ├── en.ex07_interlude.tex
    │   ├── en.ex08_interlude.tex
    │   ├── en.ex09_interlude.tex
    │   ├── m07ex00.tex
    │   ├── m07ex01.tex
    │   ├── m07ex02.tex
    │   ├── m07ex03.tex
    │   ├── m07ex04.tex
    │   ├── m07ex05.tex
    │   ├── m07ex06.tex
    │   ├── m07ex07.tex
    │   ├── m07ex08.tex
    │   ├── m07ex09.tex
    │   └── m07ex10.tex
    └── useful_resources.tex
├── module08
    ├── Makefile
    ├── assets
    │   ├── -log_1-x.png
    │   ├── -log_x.png
    │   ├── 42ai_logo.pdf
    │   ├── Default.png
    │   ├── Evaluate.png
    │   ├── Improve.png
    │   ├── Predict.png
    │   ├── figure1_3Dplot_dataset.png
    │   ├── log_loss.png
    │   └── sigmoid.png
    ├── attachments
    │   ├── data.csv
    │   ├── solar_system_census.csv
    │   └── solar_system_census_planets.csv
    ├── en.subject.tex
    ├── exercises
    │   ├── en.ex00_interlude.tex
    │   ├── en.ex01_interlude.tex
    │   ├── en.ex02_interlude.tex
    │   ├── en.ex03_interlude.tex
    │   ├── en.ex04_interlude.tex
    │   ├── en.ex05_interlude.tex
    │   ├── en.ex08_interlude.tex
    │   ├── m08ex00.tex
    │   ├── m08ex01.tex
    │   ├── m08ex02.tex
    │   ├── m08ex03.tex
    │   ├── m08ex04.tex
    │   ├── m08ex05.tex
    │   ├── m08ex06.tex
    │   ├── m08ex07.tex
    │   ├── m08ex08.tex
    │   └── m08ex09.tex
    └── useful_resources.tex
├── module09
    ├── Makefile
    ├── assets
    │   ├── 42ai_logo.pdf
    │   ├── Evaluate.png
    │   └── Improve.png
    ├── attachments
    │   ├── solar_system_census.csv
    │   ├── solar_system_census_planets.csv
    │   └── space_avocado.csv
    ├── en.subject.tex
    ├── exercises
    │   ├── en.ex01_interlude.tex
    │   ├── en.ex04_interlude.tex
    │   ├── en.ex06_interlude.tex
    │   ├── en.ex08_interlude.tex
    │   ├── en.ex10_interlude.tex
    │   ├── m09ex00.tex
    │   ├── m09ex01.tex
    │   ├── m09ex02.tex
    │   ├── m09ex03.tex
    │   ├── m09ex04.tex
    │   ├── m09ex05.tex
    │   ├── m09ex06.tex
    │   ├── m09ex07.tex
    │   ├── m09ex08.tex
    │   ├── m09ex09.tex
    │   └── m09ex10.tex
    └── useful_resources.tex
├── resources
    ├── .gitignore
    ├── 42ai_bootcamps
    │   ├── en.acknowledgements.tex
    │   └── en.instructions.tex
    └── latex
    │   ├── 42-crea-en.cls
    │   ├── 42-crea-fr.cls
    │   ├── 42-en.cls
    │   ├── 42-es.cls
    │   ├── 42-fr.cls
    │   ├── 42-hy.cls
    │   ├── 42-it.cls
    │   ├── 42-ja.cls
    │   ├── 42-ko.cls
    │   ├── 42-nl.cls
    │   ├── 42-pt.cls
    │   ├── 42-ro.cls
    │   ├── 42_logo.pdf
    │   ├── Makefile.LaTeX
    │   ├── Makefile.template
    │   ├── atbeginend.sty
    │   ├── back.pdf
    │   ├── dark-42-fr.cls
    │   ├── dark-42_logo.pdf
    │   ├── dark-back.pdf
    │   ├── helphint.pdf
    │   ├── info.pdf
    │   ├── minted.sty
    │   ├── multirow.sty
    │   ├── redefinition-commands.tex
    │   ├── tocloft.sty
    │   └── warn.pdf
└── version


/.github/workflows/latex-builder.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Latex Builder
 3 | 
 4 | on: [push]
 5 | 
 6 | jobs:
 7 |   make-it:
 8 |     permissions:
 9 |       contents: write
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout repo
13 |         uses: actions/checkout@v4
14 |       - name: Install LaTex utilities
15 |         run: sudo apt update && sudo apt-get install -y texlive-full
16 |       - name: Get version from file
17 |         run: echo "::set-output name=version::$(cat version)"
18 |         id: version
19 |       - name: Build PDFs
20 |         run: make
21 |   release-it:
22 |     permissions:
23 |       contents: write
24 |     runs-on: ubuntu-latest
25 |     if: contains(github.ref, 'master')
26 |     needs: [make-it]
27 |     steps:
28 |       - name: Checkout repository
29 |         uses: actions/checkout@v4
30 |       - name: Install LaTex utilities
31 |         run: sudo apt update && sudo apt-get install -y texlive-full
32 |       - name: Get version from file
33 |         id: get_version
34 |         run: echo "VERSION=version/$(cat version)" >> $GITHUB_ENV
35 |       - name: Name release from version
36 |         id: get_release
37 |         run: echo "RELEASE=release_$(cat version)" >> $GITHUB_ENV
38 |       - name: Build PDFs
39 |         run: make
40 |       - name: Upload PDFs archives
41 |         uses: actions/upload-artifact@v4
42 |         with:
43 |           name: modules.pdf
44 |           path: |
45 |             build/module05.pdf
46 |             build/module06.pdf
47 |             build/module07.pdf
48 |             build/module08.pdf
49 |             build/module09.pdf
50 |       - name: Bundle Release 
51 |         uses: softprops/action-gh-release@v2
52 |         with:
53 |           files: |
54 |             build/module05.pdf
55 |             build/module06.pdf
56 |             build/module07.pdf
57 |             build/module08.pdf
58 |             build/module09.pdf
59 |           name: ${{env.RELEASE}}
60 |           tag_name: ${{env.VERSION}}
61 |           make_latest: true
62 |           body_path: CHANGELOG.md
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | answers/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | .DS_Store
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Latex compilation files
 13 | *.fdb_latexmk 
 14 | *.fls
 15 | *.pdf.version
 16 | 
 17 | # VSCode
 18 | .vscode/
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | *.ipynb
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # Environments
 96 | .env
 97 | .venv
 98 | env/
 99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 | .idea/
104 | 
105 | # Spyder project settings
106 | .spyderproject
107 | .spyproject
108 | 
109 | # Rope project settings
110 | .ropeproject
111 | 
112 | # mkdocs documentation
113 | /site
114 | 
115 | # mypy
116 | .mypy_cache/
117 | .idea/
118 | day00/.DS_Store
119 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "workbench.colorCustomizations": {
 3 |         "activityBar.activeBackground": "#fc8099",
 4 |         "activityBar.activeBorder": "#b4fda2",
 5 |         "activityBar.background": "#fc8099",
 6 |         "activityBar.foreground": "#15202b",
 7 |         "activityBar.inactiveForeground": "#15202b99",
 8 |         "activityBarBadge.background": "#b4fda2",
 9 |         "activityBarBadge.foreground": "#15202b",
10 |         "sash.hoverBorder": "#fc8099",
11 |         "statusBar.background": "#fb4e71",
12 |         "statusBar.foreground": "#15202b",
13 |         "statusBarItem.hoverBackground": "#fa1c49",
14 |         "statusBarItem.remoteBackground": "#fb4e71",
15 |         "statusBarItem.remoteForeground": "#15202b",
16 |         "titleBar.activeBackground": "#fb4e71",
17 |         "titleBar.activeForeground": "#15202b",
18 |         "titleBar.inactiveBackground": "#fb4e7199",
19 |         "titleBar.inactiveForeground": "#15202b99"
20 |     },
21 |     "peacock.color": "#fb4e71"
22 | }


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 5.1.1
2 | 
3 | - Displaced recurring files (acknowledgements and instructions) to a new `/resources/42ai_bootcamps/` folder.
4 | - Added sat forms within the conclusion of each day.
5 | - Updated `instructions` to add a reference to Black package, and a cautionary word about using LLMs.
6 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at contact@42ai.fr. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM blang/latex:ubuntu
 2 | 
 3 | RUN pwd
 4 | 
 5 | COPY . /data/bootcamp_machine-learning
 6 | 
 7 | WORKDIR /data/bootcamp_machine-learning
 8 | 
 9 | RUN pwd
10 | 
11 | RUN make \
12 | 	&& ls -la . \
13 | 	&& ls -la module05
14 | 
15 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | DIRECTORIES = 	module05 \
 2 | 				module06 \
 3 | 				module07 \
 4 | 				module08 \
 5 | 				module09
 6 | 
 7 | TARGETS_DIRS = $(DIRECTORIES:%=%/en.subject.pdf)
 8 | 
 9 | TARGETS = 	$(DIRECTORIES:%=%.pdf)
10 | 
11 | all: clean dirs
12 | 
13 | %.pdf: 
14 | 	@$(MAKE) -C `dirname $@`
15 | 	@$(MAKE) clean -C `dirname $@`
16 | 	cp $@ build/`dirname $@`.pdf
17 | 
18 | dirs: $(TARGETS_DIRS)
19 | 
20 | build_pdfs:
21 | 	sudo docker run -v "$(shell pwd)/build:/data/bootcamp_machine-learning/build" -i latex_build make
22 | 
23 | build_builder:
24 | 	sudo docker build -t latex_build .
25 | 
26 | clean:
27 | 	rm -rf $(TARGETS) $(TARGETS_DIRS)
28 | 
29 | debug:
30 | 	echo $(TARGETS_DIRS)
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img src="assets/42ai_logo.png" width="200" alt="42 AI Logo" />
 3 | </p>
 4 | 
 5 | <h1 align="center">
 6 |   Bootcamp Machine Learning
 7 | </h1>
 8 | <h3 align="center">
 9 |   One week to learn the basics in Machine Learning! :robot:
10 | </h3>
11 | <br/>
12 | 
13 | ### Table of Contents
14 | 
15 | - [Download](#download)
16 | - [Curriculum](#curriculum)
17 |   - [Module05 - Stepping Into Machine Learning](#module05---stepping-into-machine-learning)
18 |   - [Module06 - Univariate Linear Regression](#module06---univariate-linear-regression)
19 |   - [Module07 - Multivariate Linear Regression](#module07---multivariate-linear-regression)
20 |   - [Module08 - Logistic Regression](#module08---logistic-regression)
21 |   - [Module09 - Regularization](#module09---regularization)
22 | - [Acknowledgements](#acknowledgements)
23 |   - [Contributors](#contributors)
24 |   - [Beta-testers](#beta-testers)
25 | <br/>
26 | 
27 | This project is a Machine Learning bootcamp created by [42 AI](http://www.42ai.fr).
28 | 
29 | As notions seen during this bootcamp can be complex, we very strongly advise students to have previously done the following bootcamp:
30 | 
31 | - [Python](https://github.com/42-AI/bootcamp_python)
32 | 
33 | 42 Artificial Intelligence is a student organization of the Paris campus of the school 42. Our purpose is to foster discussion, learning, and interest in the field of artificial intelligence, by organizing various activities such as lectures and workshops.
34 | <br/>
35 | 
36 | ## Download
37 | 
38 | The pdf files of each module can be downloaded from our realease page:
39 | [https://github.com/42-AI/bootcamp_machine-learning/releases](https://github.com/42-AI/bootcamp_machine-learning/releases)
40 | 
41 | ## Curriculum
42 | 
43 | ### Module05 - Stepping Into Machine Learning
44 | 
45 | **Get started with some linear algebra and statistics**
46 | 
47 | > Sum, mean, variance, standard deviation, vectors and matrices operations.  
48 | > Hypothesis, model, regression, loss function.
49 | 
50 | ### Module06 - Univariate Linear Regression
51 | 
52 | **Implement a method to improve your model's performance: **gradient descent**, and discover the notion of normalization**
53 | 
54 | > Gradient descent, linear regression, normalization.
55 | 
56 | ### Module07 - Multivariate Linear Regression
57 | 
58 | **Extend the linear regression to handle more than one features, build polynomial models and detect overfitting**
59 | 
60 | > Multivariate linear hypothesis, multivariate linear gradient descent, polynomial models.  
61 | > Training and test sets, overfitting.
62 | 
63 | ### Module08 - Logistic Regression
64 | 
65 | **Discover your first classification algorithm: logistic regression!**
66 | 
67 | > Logistic hypothesis, logistic gradient descent, logistic regression, multiclass classification.  
68 | > Accuracy, precision, recall, F1-score, confusion matrix.
69 | 
70 | ### Module09 - Regularization
71 | 
72 | **Fight overfitting!**
73 | 
74 | > Regularization, overfitting. Regularized loss function, regularized gradient descent.  
75 | > Regularized linear regression. Regularized logistic regression.
76 | 
77 | <br/>
78 | 
79 | ## Acknowledgements
80 | 
81 | ### Contributors
82 | 
83 | - Amric Trudel (amric@42ai.fr)
84 | - Maxime Choulika (maxime@42ai.fr)
85 | - Pierre Peigné (ppeigne@student.42.fr)
86 | - Matthieu David (mdavid@student.42.fr)
87 | - Benjamin Carlier (bcarlier@student.42.fr)
88 | - Pablo Clement (pclement@student.42.fr)
89 | - Amir Mahla (amahla@42ai.fr)
90 | - Mathieu Perez (mathieu.perez@42ai.fr)
91 | 
92 | ### Beta-testers
93 | 
94 | - Richard Blanc (riblanc@student.42.fr)
95 | - Solveig Gaydon Ohl (sgaydon-@student.42.fr)
96 | - Quentin Feuillade--Montixi (qfeuilla@student.42.fr)
97 | 


--------------------------------------------------------------------------------
/assets/42ai_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/assets/42ai_logo.png


--------------------------------------------------------------------------------
/build/module05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module05.pdf


--------------------------------------------------------------------------------
/build/module06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module06.pdf


--------------------------------------------------------------------------------
/build/module07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module07.pdf


--------------------------------------------------------------------------------
/build/module08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module08.pdf


--------------------------------------------------------------------------------
/build/module09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/build/module09.pdf


--------------------------------------------------------------------------------
/module05/Makefile:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = en.subject.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = exercises/en.ex02_interlude.tex \
 6 |   exercises/en.ex03_interlude.tex \
 7 |   exercises/en.ex06_interlude.tex \
 8 |   exercises/en.ex08_interlude.tex \
 9 |   ../resources/42ai_bootcamps/en.instructions.tex \
10 |   ../resources/42ai_bootcamps/en.acknowledgements.tex \
11 |   ../resources/latex/redefinition-commands.tex \
12 |   useful_resources.tex
13 | 
14 | # Relative path to the LaTeX documentclass setup files
15 | # Adapt as needed
16 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/
17 | # RELPATH for github actions:
18 | RELPATH = $(shell dirname `pwd`)/resources/latex/
19 | 
20 | # You should not touch this either
21 | include $(RELPATH)/Makefile.LaTeX
22 | 


--------------------------------------------------------------------------------
/module05/assets/42ai_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/42ai_logo.pdf


--------------------------------------------------------------------------------
/module05/assets/Default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Default.png


--------------------------------------------------------------------------------
/module05/assets/Evaluate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Evaluate.png


--------------------------------------------------------------------------------
/module05/assets/Improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Improve.png


--------------------------------------------------------------------------------
/module05/assets/Predict.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/Predict.png


--------------------------------------------------------------------------------
/module05/assets/bad_pred_with_distance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/bad_pred_with_distance.png


--------------------------------------------------------------------------------
/module05/assets/bad_prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/bad_prediction.png


--------------------------------------------------------------------------------
/module05/assets/plot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot1.png


--------------------------------------------------------------------------------
/module05/assets/plot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot2.png


--------------------------------------------------------------------------------
/module05/assets/plot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plot3.png


--------------------------------------------------------------------------------
/module05/assets/plotcost1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost1.png


--------------------------------------------------------------------------------
/module05/assets/plotcost2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost2.png


--------------------------------------------------------------------------------
/module05/assets/plotcost3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/plotcost3.png


--------------------------------------------------------------------------------
/module05/assets/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module05/assets/sigmoid.png


--------------------------------------------------------------------------------
/module05/exercises/en.ex03_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | \section*{Interlude - A Simple Linear Algebra Trick}
 9 | 
10 | As you know, vectors and matrices can be multiplied to perform linear combinations.\\
11 | Let's do a little linear algebra trick to optimize our calculation and use matrix multiplication.  
12 | If we add a column full of $1$'s to our vector of examples $x$, we can create the following matrix: 
13 | 
14 | $$
15 | X' = \begin{bmatrix} 1 & x^{(1)} \\ \vdots & \vdots \\ 1 & x^{(m)}\end{bmatrix}
16 | $$
17 |   
18 | We can then rewrite our hypothesis as: 
19 | 
20 | $$
21 | \hat{y}^{(i)} = \theta \cdot x'^{(i)} = \begin{bmatrix}\theta_0 \\ \theta_1 \end{bmatrix}  \cdot \begin{bmatrix} 1 & x^{(i)} \end{bmatrix} = \theta_0 + \theta_1 x^{(i)}
22 | $$
23 | 
24 | Therefore, the calculation of each $\hat{y}^{(i)}$can be done with only one vector multiplication. 
25 | 
26 | But we can even go further, by calculating the whole $\hat{y}$ vector in one operation: 
27 | 
28 | $$
29 | \hat{y} = X' \cdot \theta = \begin{bmatrix} 1 & x^{(1)} \\ \vdots & \vdots \\ 1 & x^{(m)}\end{bmatrix}\cdot\begin{bmatrix}\theta_0 \\ \theta_1 \end{bmatrix} = \begin{bmatrix} \theta_0 + \theta_1 x^{(1)} \\ \vdots \\ \theta_0 + \theta_1 x^{(m)} \end{bmatrix}
30 | $$
31 | 
32 | We can now get to the same result as in the previous exercise with just a single multiplication between our brand new $X'$ matrix and the $\theta$ vector!
33 | 
34 | \subsection*{A Note on Notation}
35 | In further Interludes, we will use the following convention:  
36 | \begin{itemize}
37 |     \item Capital letters represent matrices (e.g.: $X$)
38 |     \item Lower-case letters represent vectors and scalars (e.g.: $x^{(i)}$, $y$)
39 | \end{itemize}


--------------------------------------------------------------------------------
/module05/exercises/en.ex06_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | \section*{Interlude - Evaluate}
 9 | 
10 | \begin{figure}[h!]
11 |   \centering
12 |   \includegraphics[scale=0.25]{assets/Evaluate.png}
13 |   % \caption{cycle evaluate}
14 | \end{figure}
15 | 
16 | \subsection*{Introducing the loss function}
17 | 
18 | How good is our model?  
19 | It is hard to say just by simply looking at the plots!
20 | We can clearly observe that certain regression lines seem to fit the data better than others, but it would be convenient to find a way to measure it. 
21 | 
22 | \begin{figure}[h!]
23 |   \centering
24 |   \includegraphics[scale=0.55]{assets/bad_prediction.png}
25 |   \caption{bad prediction}
26 | \end{figure}
27 | 
28 | To evaluate our model, we are going to use a \textbf{metric} called \textbf{the loss function} (sometimes called \textbf{cost function}).\\
29 | \newline
30 | The loss function tells us how bad our model is performing, how much it \textit{costs} us to use it, how much information we \textit{lose} when we use it.
31 | If the model is good, we won't lose that much; if it's terrible instead, we will have a high loss!
32 | 
33 | The metric you choose will deeply impact the evaluation (and therefore also the training) of your model.
34 | 
35 | A frequent way to evaluate the performance of a regression model is to measure the distance between each predicted value ($\hat{y}^{(i)}$) and the real value it tries to predict (${y}^{(i)}$). The distances are then squared, and averaged to get one single metric, denoted $J$:
36 | 
37 | $$
38 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2
39 | $$
40 | 
41 | The smaller, the better! 
42 | 
43 | \begin{figure}[h!]
44 |   \centering
45 |   \includegraphics[scale=0.55]{assets/bad_pred_with_distance.png}
46 |   \caption{bad prediction with distance}
47 | \end{figure}
48 | 


--------------------------------------------------------------------------------
/module05/exercises/en.ex08_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | \section*{Interlude - Fifty Shades of Linear Algebra}
 9 | 
10 | In the last exercise, we implemented the \textbf{loss function} in two subfunctions.
11 | It worked, but it's not very pretty.
12 | What if we could do it all in one step, with linear algebra?   
13 | 
14 | As we did with the hypothesis, we can use a vectorized equation to improve the calculations of the loss function.
15 | 
16 | So now let's take a look at how squaring and averaging can be performed (more or less) in a single matrix multiplication!
17 | 
18 | $$
19 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2
20 | $$
21 | $$
22 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}[(\hat{y}^{(i)} - y^{(i)}) (\hat{y}^{(i)} - y^{(i)})]
23 | $$
24 | 
25 | Now, if we apply the definition of the dot product:
26 | 
27 | $$
28 | J(\theta) = \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y)
29 | $$


--------------------------------------------------------------------------------
/module05/exercises/m05ex02.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 02}
 2 | \input{exercises/en.ex02_interlude.tex}
 3 | \newpage
 4 | \extitle{Simple Prediction}
 5 | \turnindir{ex02}
 6 | \exnumber{02}
 7 | \exfiles{prediction.py}
 8 | \exforbidden{any functions which performs prediction}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and manipulate the notion of hypothesis in machine learning.
15 | 
16 | You must implement the following formula as a function:  
17 | $$
18 | \begin{matrix}
19 | \hat{y}^{(i)} = \theta_0 + \theta_1 x^{(i)} & &\text{ for i = 1, ..., m}
20 | \end{matrix}
21 | $$  
22 | 
23 | Where:
24 | \begin{itemize}
25 |   \item $x$ is a vector of dimension $m$, the vector of examples/features (without the $y$ values)
26 |   \item $\hat{y}$ is a vector of dimension m * 1, the vector of predicted values
27 |   \item $\theta$ is a vector of dimension 2 * 1, the vector of parameters
28 |   \item $y^{(i)}$ is the $i^{th}$ component of vector $y$
29 |   \item $x^{(i)}$ is the $i^{th}$ component of vector $x$
30 | \end{itemize}
31 | 
32 | % ================================= %
33 | \section*{Instructions}
34 | % --------------------------------- %
35 | In the prediction.py file, write the following function as per the instructions given below:
36 | 
37 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
38 | def simple_predict(x, theta):
39 |     """Computes the vector of prediction y_hat from two non-empty numpy.ndarray.
40 |     Args:
41 |       x: has to be an numpy.ndarray, a one-dimensional array of size m.
42 |       theta: has to be an numpy.ndarray, a one-dimensional array of size 2.
43 |     Returns:
44 |       y_hat as a numpy.ndarray, a one-dimensional array of size m.
45 |       None if x or theta are empty numpy.ndarray.
46 |       None if x or theta dimensions are not appropriate.
47 |     Raises:
48 |       This function should not raise any Exception.
49 |     """
50 |     ... Your code ...
51 | \end{minted}
52 | 
53 | % ================================= %
54 | \section*{Examples}
55 | % --------------------------------- %
56 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
57 | import numpy as np
58 | x = np.arange(1,6)
59 | 
60 | # Example 1:
61 | theta1 = np.array([5, 0])
62 | simple_predict(x, theta1)
63 | # Ouput:
64 | array([5., 5., 5., 5., 5.])
65 | # Do you understand why y_hat contains only 5s here?  
66 | 
67 | 
68 | # Example 2:
69 | theta2 = np.array([0, 1])
70 | simple_predict(x, theta2)
71 | # Output:
72 | array([1., 2., 3., 4., 5.])
73 | # Do you understand why y_hat == x here?  
74 | 
75 | 
76 | # Example 3:
77 | theta3 = np.array([5, 3])
78 | simple_predict(x, theta3)
79 | # Output:
80 | array([ 8., 11., 14., 17., 20.])
81 | 
82 | 
83 | # Example 4:
84 | theta4 = np.array([-3, 1])
85 | simple_predict(x, theta4)
86 | # Output:
87 | array([-2., -1.,  0.,  1.,  2.])  
88 | \end{minted}


--------------------------------------------------------------------------------
/module05/exercises/m05ex03.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 03}
 2 | \input{exercises/en.ex03_interlude.tex}
 3 | \newpage
 4 | \extitle{Add Intercept}
 5 | \turnindir{ex03}
 6 | \exnumber{03}
 7 | \exfiles{tools.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | 
15 | Understand and manipulate the notion of hypothesis in machine learning.
16 | \\
17 | You must implement a function which adds an extra column of $1$'s on the left side of a given vector or matrix.
18 | 
19 | % ================================= %
20 | \section*{Instructions}
21 | % --------------------------------- %
22 | In the tools.py file create the following function as per the instructions given below:
23 | 
24 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
25 | def add_intercept(x):
26 |     """Adds a column of 1's to the non-empty numpy.array x.
27 |     Args:
28 |       x: has to be a numpy.array. x can be a one-dimensional (m * 1) or two-dimensional (m * n) array.
29 |     Returns:
30 |       X, a numpy.array of dimension m * (n + 1).
31 |       None if x is not a numpy.array.
32 |       None if x is an empty numpy.array.
33 |     Raises:
34 |       This function should not raise any Exception.
35 |     """
36 |     ... Your code ...
37 | \end{minted}
38 | 
39 | % ================================= %
40 | \section*{Examples}
41 | % --------------------------------- %
42 | 
43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
44 | import numpy as np
45 | 
46 | # Example 1:
47 | x = np.arange(1,6)
48 | add_intercept(x)
49 | # Output:
50 | array([[1., 1.],
51 |        [1., 2.],
52 |        [1., 3.],
53 |        [1., 4.],
54 |        [1., 5.]])
55 | 
56 | 
57 | # Example 2:
58 | y = np.arange(1,10).reshape((3,3))
59 | add_intercept(y)
60 | # Output:
61 | array([[1., 1., 2., 3.],
62 |        [1., 4., 5., 6.],
63 |        [1., 7., 8., 9.]])
64 | \end{minted}
65 | 
66 | 


--------------------------------------------------------------------------------
/module05/exercises/m05ex04.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 04}
  2 | \extitle{Prediction}
  3 | \turnindir{ex04}
  4 | \exnumber{04}
  5 | \exfiles{prediction.py}
  6 | \exforbidden{None}
  7 | \makeheaderfilesforbidden
  8 | 
  9 | 
 10 | % ================================= %
 11 | \section*{Objective}
 12 | % --------------------------------- %
 13 | Understand and manipulate the notion of hypothesis in machine learning.
 14 | 
 15 | You must implement the following formula as a function: 
 16 | 
 17 | $$
 18 | \begin{matrix}
 19 | \hat{y}^{(i)} = \theta_0 + \theta_1 x^{(i)} & &\text{ for i = 1, ..., m}
 20 | \end{matrix}
 21 | $$  
 22 | 
 23 | Where:
 24 | \begin{itemize}
 25 |   \item $\hat{y}^{(i)}$ is the $i^{th}$ component of vector $\hat{y}$
 26 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
 27 |   \item $\theta$ is a vector of dimension $2 \times 1$, the vector of parameters
 28 |   \item $x^{(i)}$ is the $i^{th}$ component of vector $x$  
 29 |   \item $x$ is a vector of dimension $m$, the vector of examples
 30 | \end{itemize}
 31 | 
 32 | But this time you have to do it with the linear algebra trick!
 33 | 
 34 | $$
 35 | \hat{y} = X' \cdot \theta = 
 36 | \begin{bmatrix} 
 37 | 1 & x^{(1)} \\ 
 38 | \vdots & \vdots \\ 
 39 | 1 & x^{(m)} 
 40 | \end{bmatrix} 
 41 | \cdot
 42 | \begin{bmatrix}
 43 | \theta_0 \\ 
 44 | \theta_1 
 45 | \end{bmatrix} 
 46 |  = \begin{bmatrix} 
 47 | \theta_0 + \theta_1x^{(1)} \\ 
 48 | \vdots \\ 
 49 | \theta_0 + \theta_1x^{(m)} 
 50 | \end{bmatrix} 
 51 | $$
 52 | 
 53 | \warn{
 54 | \begin{itemize}
 55 |   \item the argument $x$ is an $m$ vector
 56 |   \item $\theta$ is a $2 \times 1$ vector. 
 57 | \end{itemize}
 58 | }
 59 | 
 60 | You have to transform $x$ into $X'$ to fit the dimension of $\theta$!
 61 | 
 62 | 
 63 | % ================================= %
 64 | \section*{Instructions}
 65 | % --------------------------------- %
 66 | In the prediction.py file create the following function as per the instructions given below:
 67 | \newline
 68 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 69 | def predict_(x, theta):
 70 |     """Computes the vector of prediction y_hat from two non-empty numpy.array.
 71 |     Args:
 72 |       x: has to be an numpy.array, a one-dimensional array of size m.
 73 |       theta: has to be an numpy.array, a two-dimensional array of shape 2 * 1.
 74 |     Returns:
 75 |       y_hat as a numpy.array, a two-dimensional array of shape m * 1.
 76 |       None if x and/or theta are not numpy.array.
 77 |       None if x or theta are empty numpy.array.
 78 |       None if x or theta dimensions are not appropriate.
 79 |     Raises:
 80 |       This function should not raise any Exceptions.
 81 |     """
 82 |     ... Your code ...
 83 | \end{minted}
 84 | 
 85 | \section*{Examples}
 86 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 87 | import numpy as np
 88 | x = np.arange(1,6)
 89 | 
 90 | # Example 1:
 91 | theta1 = np.array([[5], [0]])
 92 | predict_(x, theta1)
 93 | # Ouput:
 94 | array([[5.], [5.], [5.], [5.], [5.]])
 95 | # Do you remember why y_hat contains only 5's here?
 96 | 
 97 | # Example 2:
 98 | theta2 = np.array([[0], [1]])
 99 | predict_(x, theta2)
100 | # Output:
101 | array([[1.], [2.], [3.], [4.], [5.]])
102 | # Do you remember why y_hat == x here?
103 | 
104 | # Example 3:
105 | theta3 = np.array([[5], [3]])
106 | predict_(x, theta3)
107 | # Output:
108 | array([[ 8.], [11.], [14.], [17.], [20.]])
109 | 
110 | 
111 | # Example 4:
112 | theta4 = np.array([[-3], [1]])
113 | predict_(x, theta4)
114 | # Output:
115 | array([[-2.], [-1.], [ 0.], [ 1.], [ 2.]])
116 | \end{minted}
117 | 


--------------------------------------------------------------------------------
/module05/exercises/m05ex05.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 05}
 2 | \extitle{Let’s Make Nice Plots}
 3 | \turnindir{ex05}
 4 | \exnumber{05}
 5 | \exfiles{plot.py}
 6 | \exforbidden{None}
 7 | \makeheaderfilesforbidden
 8 | 
 9 | \info{
10 | For your information, the task we are performing here is called \textbf{regression}.
11 | It means that we are trying to predict a continuous numerical attribute for all examples (like a price, for instance).
12 | Later in the bootcamp, you will see that we can predict other things such as categories.
13 | }
14 | 
15 | % ================================= %
16 | \section*{Objective}
17 | % --------------------------------- %
18 | You must implement a function to plot the data and the prediction line (or regression line).\\
19 | \newline
20 | You will plot the data points (with their x and y values), and the prediction line that represents your hypothesis ($h_{\theta}$).
21 | \newpage
22 | % ================================= %
23 | \section*{Instructions}
24 | % --------------------------------- %
25 | In the plot.py file, create the following function as per the instructions given below:
26 | 
27 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
28 | def plot(x, y, theta):
29 |     """Plot the data and prediction line from three non-empty numpy.array.
30 |     Args:
31 |       x: has to be an numpy.array, a one-dimensional array of size m.
32 |       y: has to be an numpy.array, a one-dimensional array of size m.
33 |       theta: has to be an numpy.array, a two-dimensional array of shape 2 * 1.
34 |     Returns:
35 |         Nothing.
36 |     Raises:
37 |       This function should not raise any Exceptions.
38 |     """
39 |     ... Your code ...
40 | \end{minted}
41 | 
42 | % ================================= %
43 | \section*{Examples}
44 | % --------------------------------- %
45 | 
46 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
47 | import numpy as np
48 | x = np.arange(1,6)
49 | y = np.array([3.74013816, 3.61473236, 4.57655287, 4.66793434, 5.95585554])
50 | 
51 | # Example 1:
52 | theta1 = np.array([[4.5],[-0.2]])
53 | plot(x, y, theta1)
54 | # Output:
55 | \end{minted}
56 | 
57 | \begin{figure}[H]
58 |   \centering
59 |   \includegraphics[scale=0.6]{assets/plot1.png}
60 | \end{figure}
61 | 
62 | \newpage
63 | 
64 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
65 | # Example 2:
66 | theta2 = np.array([[-1.5],[2]])
67 | plot(x, y, theta2)
68 | # Output:
69 | \end{minted}
70 | 
71 | \begin{figure}[H]
72 |   \centering
73 |   \includegraphics[scale=0.6]{assets/plot2.png}
74 |   \caption{Example 2}
75 | \end{figure}
76 | 
77 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
78 | # Example 3:
79 | theta3 = np.array([[3],[0.3]])
80 | plot(x, y, theta3)
81 | # Output:
82 | \end{minted}
83 | 
84 | \begin{figure}[H]
85 |   \centering
86 |   \includegraphics[scale=0.6]{assets/plot3.png}
87 |   \caption{Example 3}
88 | \end{figure}
89 | 
90 | 


--------------------------------------------------------------------------------
/module05/exercises/m05ex07.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 07}
 2 | \extitle{Vectorized loss function}
 3 | \turnindir{ex07}
 4 | \exnumber{07}
 5 | \exfiles{vec\_loss.py}
 6 | \exforbidden{None}
 7 | \makeheaderfilesforbidden
 8 |   
 9 | % ================================= %
10 | \section*{Objective}
11 | % --------------------------------- %
12 | Understand and experiment with the \textbf{loss function} in machine learning.
13 |   
14 | You must implement the following formula as a function:  
15 | $$
16 | \begin{matrix}
17 |   J(\theta) &  = & \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y)
18 | \end{matrix}
19 | $$
20 | 
21 | Where:
22 | \begin{itemize}
23 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
24 |   \item $y$ is a vector of dimension $m$, the vector of expected values
25 | \end{itemize}
26 | 
27 | \newpage
28 | 
29 | % ================================= %
30 | \section*{Instructions}
31 | % --------------------------------- %
32 | In the \texttt{vec\_loss.py} file, create the following function as per the instructions given below:
33 | 
34 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
35 | def loss_(y, y_hat):
36 |     """Computes the half mean-squared-error of two non-empty numpy.arrays, without any for loop.
37 |     The two arrays must have the same dimensions.
38 |     Args:
39 |       y: has to be an numpy.array, a one-dimensional array of size m.
40 |       y_hat: has to be an numpy.array, a one-dimensional array of size m.
41 |     Returns:
42 |       The half mean-squared-error of the two vectors as a float.
43 |       None if y or y_hat are empty numpy.array.
44 |       None if y and y_hat does not share the same dimensions.
45 |     Raises:
46 |       This function should not raise any Exceptions.
47 |     """
48 |     ... Your code ...
49 | \end{minted}
50 | 
51 | 
52 | % ================================= %
53 | \section*{Examples}
54 | % --------------------------------- %
55 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
56 | import numpy as np
57 | X = np.array([0, 15, -9, 7, 12, 3, -21])
58 | Y = np.array([2, 14, -13, 5, 12, 4, -19])
59 | 
60 | # Example 1:
61 | loss_(X, Y)
62 | # Output:
63 | 2.142857142857143
64 | 
65 | # Example 2:
66 | loss_(X, X)
67 | # Output:
68 | 0.0
69 | \end{minted}


--------------------------------------------------------------------------------
/module05/exercises/m05ex08.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 08}
 2 | \input{exercises/en.ex08_interlude.tex}
 3 | \newpage
 4 | \extitle{Lets Make Nice Plots Again}
 5 | \turnindir{ex08}
 6 | \exnumber{08}
 7 | \exfiles{plot.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | 
12 | % ================================= %
13 | \section*{Objective}
14 | % --------------------------------- %
15 | You must implement a function which plots the data, the prediction line, and the loss.\\
16 | \newline
17 | You will plot the $x$ and $y$ coordinates of all data points as well as the prediction line generated by your theta parameters.\\
18 | \newline
19 | Your function must also display the overall loss ($J$) in the title, and draw small lines marking the distance between each data point and its predicted value.
20 | 
21 | % ================================= %
22 | \section*{Instructions}
23 | % --------------------------------- %
24 | In the plot.py file create the following function as per the instructions given below:\\
25 | \newline
26 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
27 | def plot_with_loss(x, y, theta):
28 | """Plot the data and prediction line from three non-empty numpy.ndarray.
29 |     Args:
30 |       x: has to be an numpy.ndarray, one-dimensional array of size m.
31 |       y: has to be an numpy.ndarray, one-dimensional array of size m.
32 |       theta: has to be an numpy.ndarray, one-dimensional array of size 2.
33 |     Returns:
34 |         Nothing.
35 |     Raises:
36 |       This function should not raise any Exception.
37 |     """
38 |     ... Your code ...
39 | \end{minted}
40 | 
41 | \newpage
42 | 
43 | % ================================= %
44 | \section*{Examples}
45 | % --------------------------------- %
46 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
47 | import numpy as np
48 | x = np.arange(1,6)
49 | y = np.array([11.52434424, 10.62589482, 13.14755699, 18.60682298, 14.14329568])
50 | 
51 | # Example 1:
52 | theta1= np.array([18,-1])
53 | plot_with_loss(x, y, theta1)
54 | # Output:
55 | \end{minted}
56 | 
57 | \begin{figure}[H]
58 |   \centering
59 |   \includegraphics[scale=0.65]{assets/plotcost1.png}
60 |   \caption{Example 1}
61 | \end{figure}
62 | 
63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
64 | # Example 2:
65 | theta2 = np.array([14, 0])
66 | plot_with_loss(x, y, theta2)
67 | # Output:
68 | \end{minted}
69 | 
70 | \begin{figure}[H]
71 |   \centering
72 |   \includegraphics[scale=0.65]{assets/plotcost2.png}
73 |   \caption{Example 2}
74 | \end{figure}
75 | 
76 | \newpage
77 | 
78 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
79 | # Example 3:
80 | theta3 = np.array([12, 0.8])
81 | plot_with_loss(x, y, theta3)
82 | # Output:
83 | \end{minted}
84 | 
85 | \begin{figure}[H]
86 |   \centering
87 |   \includegraphics[scale=0.65]{assets/plotcost3.png}
88 |   \caption{Example 3}
89 | \end{figure}


--------------------------------------------------------------------------------
/module05/useful_resources.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Section useful resources                            %
 4 | %                          for ML Modules                                      %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | 
 9 | \chapter*{Notions covered and learning resources}
10 | 
11 | \section*{What notions will be covered by this module?}
12 | 
13 | \begin{itemize}
14 |     \item Sum
15 |     \item Mean
16 |     \item Variance
17 |     \item Standard deviation
18 |     \item Operations on vectors and matrices
19 |     \item Hypothesis
20 |     \item Regression
21 |     \item Loss function
22 | \end{itemize}
23 | 
24 | \section*{Learning resources}
25 | 
26 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\
27 | \newline
28 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\
29 | \newline
30 | The following sections of the course are particularly relevant to today's exercises: 
31 | 
32 | \subsection*{Week 1: Introduction to Machine Learning}
33 | 
34 | \subsubsection*{Supervised vs. Unsupervised Machine Learning}
35 | \begin{itemize}
36 |     \item What is Machine Learning?
37 |     \item Supervised Learning Part 1
38 |     \item Supervised Learning Part 2
39 |     \item Unsupervised Learning Part 1
40 |     \item Unsupervised Learning Part 2
41 | \end{itemize}
42 |     
43 | \subsubsection*{Regression Model}  
44 | \begin{itemize}
45 |     \item Regression Model Part 1
46 |     \item Regression Model Part 2
47 |     \item Cost Function Formula
48 |     \item Cost Function Intuition
49 |     \item Visualizing the cost function
50 |     \item Visualizing Example
51 |     \item \textit{Keep the rest for tommorow ;-)}
52 | \end{itemize}
53 | 
54 | \emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist} from 3 to 14 includes}
55 | 
56 | \subsubsection*{Linear Algebra reminders}
57 | \begin{itemize}
58 |     \item \href{https://www.youtube.com/watch?v=XMB__E658fQ}{Matrices and Vectors}
59 |     \item \href{https://www.youtube.com/watch?v=k1JGJhUGmBE}{Addition and Scalar Multiplication}
60 |     \item \href{https://www.youtube.com/watch?v=VIfykceJoZI}{Matrix - Vector Multiplication}
61 |     \item \href{https://www.youtube.com/watch?v=JHZKyt0m1kc}{Matrix - Matrix Multiplication}
62 |     \item \href{https://www.youtube.com/watch?v=wqM7O_ZUtCc}{Matrix - Multiplication Properties}
63 |     \item \href{https://www.youtube.com/watch?v=IUf8HDyUeY0}{Inverse and Transpose}
64 | \end{itemize}
65 | 


--------------------------------------------------------------------------------
/module06/Makefile:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = en.subject.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = exercises/en.ex00_interlude.tex \
 6 | 	exercises/en.ex01_interlude.tex \
 7 | 	exercises/en.ex02_interlude.tex \
 8 | 	exercises/en.ex05_interlude.tex \
 9 | 	../resources/42ai_bootcamps/en.instructions.tex \
10 | 	../resources/42ai_bootcamps/en.acknowledgements.tex \
11 | 	../resources/latex/redefinition-commands.tex \
12 | 	en.subject.tex \
13 | 	useful_resources.tex
14 | 
15 | # Relative path to the LaTeX documentclass setup files
16 | # Adapt as needed
17 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/
18 | # RELPATH for github actions:
19 | RELPATH = $(shell dirname `pwd`)/resources/latex/
20 | 
21 | 
22 | # You should not touch this either
23 | include $(RELPATH)/Makefile.LaTeX
24 | 


--------------------------------------------------------------------------------
/module06/assets/42ai_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/42ai_logo.pdf


--------------------------------------------------------------------------------
/module06/assets/Improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/Improve.png


--------------------------------------------------------------------------------
/module06/assets/Improve2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/Improve2.png


--------------------------------------------------------------------------------
/module06/assets/ex03_cost_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_1.png


--------------------------------------------------------------------------------
/module06/assets/ex03_cost_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_2.png


--------------------------------------------------------------------------------
/module06/assets/ex03_cost_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_cost_3.png


--------------------------------------------------------------------------------
/module06/assets/ex03_interlude_cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_interlude_cost.png


--------------------------------------------------------------------------------
/module06/assets/ex03_interlude_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex03_interlude_plot.png


--------------------------------------------------------------------------------
/module06/assets/ex04_J_vs_t1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex04_J_vs_t1.png


--------------------------------------------------------------------------------
/module06/assets/ex04_score_vs_bluepills.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex04_score_vs_bluepills.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_Tmeters_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_Tmeters_part1.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_Tmeters_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_Tmeters_part2.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_age_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_age_part1.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_age_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_age_part2.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_thrust_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_thrust_part1.png


--------------------------------------------------------------------------------
/module06/assets/ex05_price_vs_thrust_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex05_price_vs_thrust_part2.png


--------------------------------------------------------------------------------
/module06/assets/ex06_sellprice_ne_lgd_vs_age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex06_sellprice_ne_lgd_vs_age.png


--------------------------------------------------------------------------------
/module06/assets/ex07_3Dplot_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_3Dplot_data.png


--------------------------------------------------------------------------------
/module06/assets/ex07_hypo_test_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_hypo_test_part1.png


--------------------------------------------------------------------------------
/module06/assets/ex07_learning_rate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module06/assets/ex07_learning_rate.png


--------------------------------------------------------------------------------
/module06/attachments/are_blue_pills_magics.csv:
--------------------------------------------------------------------------------
1 | "Patient","Micrograms","Score"
2 | 1, 1.17, 78.93
3 | 2, 2.97, 58.20
4 | 3, 3.26, 67.47
5 | 4, 4.69, 37.47
6 | 5, 5.83, 45.65
7 | 6, 6.00, 32.92
8 | 7, 6.41, 29.97
9 | 


--------------------------------------------------------------------------------
/module06/attachments/are_blue_pills_magics_description.txt:
--------------------------------------------------------------------------------
1 | #Patient: number of the patient.
2 | 
3 | #Micrograms: quantity of blue pills patient has taken (in micrograms).
4 | 
5 | #Score: Standardized score at the spacecraft driving test.
6 | 


--------------------------------------------------------------------------------
/module06/exercises/en.ex02_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude - Gradient Descent}
10 | % ------------------------------- %
11 | 
12 | So far we've calculated the \textit{gradient},
13 | which indicates whether and by how much we should increase or decrease $\theta_0$ and $\theta_1$ in order to reduce the loss.\\
14 | \newline
15 | What we have to do next is to update the theta parameters accordingly,
16 | step by step, until we reach the minimum.
17 | This iterative process, called \textbf{Gradient Descent},
18 | will progressively improve the performance of your regression model on the training data.\\
19 | \newline
20 | The gradient descent \textbf{algorithm} can be summed up to this:
21 | for a certain number of cycles, at each step,
22 | both $\theta$ parameters are slightly moved in the opposite directions than what the gradient indicates.\\
23 | \newline
24 | The algorithm can be expressed in pseudocode as the following:
25 | $$
26 | \begin{matrix}
27 | &\text{repeat until convergence:} & \{\\
28 | &    \text{compute } \nabla{(J)}  \\
29 | &	\theta_0 := \theta_0 - \alpha \nabla(J)_0  \\ 
30 | &	\theta_1 := \theta_1 - \alpha \nabla(J)_1\\
31 | 	\} \hspace{0.5cm} 
32 | \end{matrix}
33 | $$
34 | A few remarks on this algorithm:
35 | \begin{itemize}
36 |   \item If you directly subtracted the gradient from $\theta$,
37 |         your steps would be too big and you would quickly overshoot past the minimum.
38 |         That's why we use $\alpha$ (alpha), called the \textit{learning rate}.
39 |         It's a small float number (usually between 0 and 1) that decreases the magnitude of each update.
40 |   \item The pseudocode says "repeat until convergence",
41 |         but in your implementation, you will not actually check for convergence at each iteration.
42 |         You will instead set a number of cycles that is sufficient for your gradient descent to converge. 
43 |   \item When training a linear regression model on a new dataset,
44 |         you will have to choose appropriate alpha and the number of cycles through trial and error.
45 | \end{itemize}


--------------------------------------------------------------------------------
/module06/exercises/en.ex05_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | \section*{Interlude - Normalization}
 9 | 
10 | The values inside the $x$ vector can vary quite a lot in magnitude,
11 | depending on the type of data you are working with.\\
12 | \\
13 | For example, if your dataset contains distances between planets in km, the numbers will be huge.
14 | On the other hand, if you are working with planet masses expressed as a fraction of the solar system's total mass, the numbers will be very small (between 0 and 1).
15 | Both cases may slow down convergence in Gradient Descent (or even sometimes prevent convergence at all).
16 | To avoid that kind of situation, \textbf{normalization} is a very effective way to proceed.\\
17 | \\
18 | The idea behind this technique is quite straightforward: \textbf{scaling the data}.\\
19 | \\
20 | With normalization, you can transform your $x$ vector into a new $x'$ vector whose values range between $[-1, 1]$ more or less. Doing this allows you to see much more easily how a training example compares to the other ones:
21 | \begin{itemize}
22 |     \item If an $x'$ value is close to $1$, you know it's among the largest in the dataset
23 |     \item If an $x'$ value is close to $0$, you know it's close to the median
24 |     \item If an $x'$ value is close to $-1$, you know it's among the smallest
25 | \end{itemize}
26 | \noindent{So with the upcoming normalization techniques, you'll be able to map your data to two different value ranges: $[0, 1]$ or $[-1, 1]$. Your algorithm will like it and thank you for it.}
27 | 


--------------------------------------------------------------------------------
/module06/exercises/m06ex00.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 00}
 2 | \input{exercises/en.ex00_interlude.tex}
 3 | \newpage
 4 | \extitle{Linear Gradient - Iterative Version}
 5 | \turnindir{ex00}
 6 | \exnumber{00}
 7 | \exfiles{gradient.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================== %
12 | \section*{Objective}
13 | % ---------------------------------- %
14 | Understand and manipulate the notion of gradient and gradient descent in machine learning.\\
15 | \newline
16 | You must write a function that computes the \textbf{\textit{gradient}} of the loss function.
17 | It must compute a partial derivative with respect to each theta parameter separately, and return the vector gradient.\\
18 | \newline
19 | The partial derivatives can be calculated with the following formulas:  
20 | $$
21 | \nabla(J)_0 = \frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})
22 | $$
23 | 
24 | $$
25 | \nabla(J)_1 = \frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x^{(i)}
26 | $$
27 | Where:
28 | \begin{itemize}
29 |   \item $\nabla(J)$ is the gradient vector of size $2 \times 1$, (this strange symbol : $\nabla$ is called nabla)
30 |   \item $x$ is a vector of dimension $m$
31 |   \item $y$ is a vector of dimension $m$
32 |   \item $x^{(i)}$ is the i$^\text{th}$ component of vector $x$
33 |   \item $y^{(i)}$ is the i$^\text{th}$ component of vector $y$
34 |   \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$
35 |   \item $h_{\theta}(x^{(i)})$ corresponds to the model's prediction of $y^{(i)}$
36 | \end{itemize}
37 | 
38 | % ================================== %
39 | \section*{Hypothesis Notation}
40 | % ---------------------------------- %
41 | $h_{\theta}(x^{(i)})$ is the same as what we previously noted $\hat{y}^{(i)}$.  
42 | The two notations are equivalent.
43 | They represent the model's prediction (or estimation) of the ${y}^{(i)}$ value.
44 | If you follow Andrew Ng's course material on Coursera, you will see him using the former notation.
45 | \newline
46 | As a reminder:
47 | $h_{\theta}(x^{(i)}) = \theta_0 + \theta_1x^{(i)}$
48 | 
49 | % ================================== %
50 | \section*{Instructions}
51 | % ---------------------------------- %
52 | 
53 | In the \texttt{gradient.py} file create the following function as per the instructions given below:
54 | \newline
55 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
56 |   def simple_gradient(x, y, theta):
57 |     """Computes a gradient vector from three non-empty numpy.arrays, with a for-loop.
58 |        The three arrays must have compatible shapes.
59 |     Args:
60 |       x: has to be an numpy.array, a vector of shape m * 1.
61 |       y: has to be an numpy.array, a vector of shape m * 1.
62 |       theta: has to be an numpy.array, a 2 * 1 vector.
63 |     Return:
64 |       The gradient as a numpy.array, a vector of shape 2 * 1.
65 |       None if x, y, or theta are empty numpy.array.
66 |       None if x, y and theta do not have compatible shapes.
67 |       None if x, y or theta is not of the expected type.
68 |     Raises:
69 |       This function should not raise any Exception.
70 |     """
71 |     ... Your code ...
72 | \end{minted}
73 | 
74 | % ================================== %
75 | \section*{Examples}
76 | % ---------------------------------- %
77 | 
78 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
79 | import numpy as np
80 | x = np.array([12.4956442, 21.5007972, 31.5527382, 48.9145838, 57.5088733]).reshape((-1, 1))
81 | y = np.array([37.4013816, 36.1473236, 45.7655287, 46.6793434, 59.5585554]).reshape((-1, 1))
82 | 
83 | # Example 0:
84 | theta1 = np.array([2, 0.7]).reshape((-1, 1))
85 | simple_gradient(x, y, theta1)
86 | # Output:
87 | array([[-19.0342574], [-586.66875564]])
88 | 
89 | # Example 1:
90 | theta2 = np.array([1, -0.4]).reshape((-1, 1))
91 | simple_gradient(x, y, theta2)
92 | # Output:
93 | array([[-57.86823748], [-2230.12297889]])
94 | \end{minted}


--------------------------------------------------------------------------------
/module06/exercises/m06ex01.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 01}
 2 | \input{exercises/en.ex01_interlude.tex}
 3 | \newpage
 4 | \extitle{Linear Gradient - Vectorized Version}
 5 | \turnindir{ex01}
 6 | \exnumber{01}
 7 | \exfiles{vec\_gradient.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and experiment with the notions of \textbf{gradient} and \textbf{gradient descent} in machine learning.\\
15 | \newline
16 | You must implement the following formula as a function:
17 | 
18 | $$
19 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y)
20 | $$  
21 | Where:
22 | \begin{itemize}
23 |   \item $\nabla(J)$ is a vector of dimension $2 \times 1$
24 |   \item $X'$ is a \textbf{matrix} of dimensions $(m \times 2)$
25 |   \item ${X'}^T$ is the transpose of $X'$. Its dimensions are $(2 \times m)$
26 |   \item $y$ is a vector of dimension $m$
27 |   \item $\theta$ is a vector of dimension $2 \times 1$ 
28 | \end{itemize}
29 | Be careful:
30 | \begin{itemize}
31 |   \item the $x$ you will get as an input is an $m$ vector,
32 |   \item $\theta$ is a $2 \times 1$ vector. You have to transform $x$ to fit the dimension of $\theta$!
33 | \end{itemize}
34 | \newpage
35 | % ================================= %
36 | \section*{Instructions}
37 | % --------------------------------- %
38 | In the \texttt{vec\_gradient.py} file create the following function as per the instructions given below:
39 | \newline
40 | \par
41 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
42 | def simple_gradient(x, y, theta):
43 |     """Computes a gradient vector from three non-empty numpy.arrays, without any for loop.
44 |        The three arrays must have compatible shapes.
45 |     Args:
46 |       x: has to be a numpy.array, a vector of shape m * 1.
47 |       y: has to be a numpy.array, a vector of shape m * 1.
48 |       theta: has to be a numpy.array, a 2 * 1 vector.
49 |     Return:
50 |       The gradient as a numpy.ndarray, a vector of dimension 2 * 1.
51 |       None if x, y, or theta is an empty numpy.ndarray.
52 |       None if x, y and theta do not have compatible dimensions.
53 |     Raises:
54 |       This function should not raise any Exception.
55 |     """
56 |     ... Your code ...
57 | \end{minted}
58 | 
59 | % ================================= %
60 | \section*{Examples}
61 | % --------------------------------- %
62 | 
63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
64 | import numpy as np
65 | x = np.array([12.4956442, 21.5007972, 31.5527382, 48.9145838, 57.5088733]).reshape((-1, 1))
66 | y = np.array([37.4013816, 36.1473236, 45.7655287, 46.6793434, 59.5585554]).reshape((-1, 1))
67 | 
68 | # Example 0:
69 | theta1 = np.array([2, 0.7]).reshape((-1, 1))
70 | gradient(x, y, theta1)
71 | # Output:
72 | array([[-19.0342...], [-586.6687...]])
73 | 
74 | # Example 1:
75 | theta2 = np.array([1, -0.4]).reshape((-1, 1))
76 | gradient(x, y, theta2)
77 | # Output:
78 | array([[-57.8682...], [-2230.1229...]])
79 | \end{minted}


--------------------------------------------------------------------------------
/module06/exercises/m06ex02.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 02}
 2 | \input{exercises/en.ex02_interlude.tex}
 3 | \newpage
 4 | \extitle{Gradient Descent}
 5 | \turnindir{ex02}
 6 | \exnumber{02}
 7 | \exfiles{fit.py}
 8 | \exforbidden{any function that calculates derivatives for you}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and experiment with the notions of \textbf{gradient} and \textbf{gradient descent} in machine learning.\\
15 | \newline
16 | Be able to explain what it means to \textbf{\textit{fit}} a Machine Learning model to a dataset.\
17 | Implement a function that performs \textbf{Linear Gradient Descent} (LGD).
18 | 
19 | 
20 | % ================================= %
21 | \section*{Instructions}
22 | % --------------------------------- %
23 | In this exercise, you will implement linear gradient descent to fit your model to the dataset.\\
24 | \newline
25 | The pseudocode for the algorithm is the following:
26 | $$
27 | \begin{matrix}
28 | &\text{repeat until convergence:} & \{ \\
29 | &	\text{compute } \nabla{(J)}  \\
30 | &	\theta_0 := \theta_0 - \alpha \nabla(J)_0  \\
31 | &	\theta_1 := \theta_1 - \alpha \nabla(J)_1\\
32 | 	\}
33 | \end{matrix}
34 | $$
35 | 
36 | Where:
37 | \begin{itemize}
38 |   \item $\alpha$ (alpha) is the \textit{learning rate}. It's a small float number (usually between 0 and 1),
39 |   \item For now, "reapeat until convergence" will mean to simply repeat for max\_iter (a number that you will choose wisely).
40 | \end{itemize}
41 | You are expected to write a function named \texttt{fit\_} as per the instructions below:\\
42 | \newline
43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
44 | def fit_(x, y, theta, alpha, max_iter):
45 | 	"""
46 | 	Description:
47 | 		Fits the model to the training dataset contained in x and y.
48 | 	Args:
49 | 		x: has to be a numpy.ndarray, a vector of dimension m * 1: (number of training examples, 1).
50 | 		y: has to be a numpy.ndarray, a vector of dimension m * 1: (number of training examples, 1).
51 | 		theta: has to be a numpy.ndarray, a vector of dimension 2 * 1.
52 | 		alpha: has to be a float, the learning rate
53 | 		max_iter: has to be an int, the number of iterations done during the gradient descent
54 | 	Returns:
55 | 		new_theta: numpy.ndarray, a vector of dimension 2 * 1.
56 | 		None if there is a matching dimension problem.
57 | 	Raises:
58 | 		This function should not raise any Exception.
59 | 	"""
60 | 		... your code here ...
61 | \end{minted}
62 | Hopefully, you have already written a function to calculate the linear gradient.\\
63 | 
64 | % ================================= %
65 | \section*{Examples}
66 | % --------------------------------- %
67 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
68 | import numpy as np
69 | x = np.array([[12.4956442], [21.5007972], [31.5527382], [48.9145838], [57.5088733]])
70 | y = np.array([[37.4013816], [36.1473236], [45.7655287], [46.6793434], [59.5585554]])
71 | theta= np.array([1, 1]).reshape((-1, 1))
72 | 
73 | # Example 0:
74 | theta1 = fit_(x, y, theta, alpha=5e-8, max_iter=1500000)
75 | theta1
76 | # Output:
77 | array([[1.40709365],
78 | 		[1.1150909 ]])
79 | 
80 | # Example 1:
81 | predict(x, theta1)
82 | # Output:
83 | array([[15.3408728 ],
84 | 		[25.38243697],
85 | 		[36.59126492],
86 | 		[55.95130097],
87 | 		[65.53471499]])
88 | \end{minted}
89 | 
90 | \info{
91 | \begin{itemize}
92 |   \item You can create more training data by generating an $x$ array with random values and computing the corresponding $y$ vector as a linear expression of $x$. You can then fit a model on this artificial data and find out if it comes out with the same $\theta$ coefficients that you first used.
93 |   \item It is possible that $\theta_0$ and $\theta_1$ become "nan". In that case, it means you probably used a learning rate that is too large.
94 | \end{itemize}
95 | }


--------------------------------------------------------------------------------
/module06/exercises/m06ex03.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 03}
  2 | \extitle{Linear Regression with Class}
  3 | \turnindir{ex03}
  4 | \exnumber{03}
  5 | \exfiles{my\_linear\_regression.py}
  6 | \exforbidden{any functions from sklearn}
  7 | \makeheaderfilesforbidden
  8 | 
  9 | % ================================= %
 10 | \section*{Objective}
 11 | % --------------------------------- %
 12 | Write a class that contains all the necessary methods to perform a linear regression.
 13 | % ================================= %
 14 | \section*{Instructions}
 15 | % --------------------------------- %
 16 | In this exercise, you will not learn anything new but don't worry, it's for your own good.\\
 17 | \newline
 18 | You are expected to write your own \texttt{MyLinearRegression} class which looks similar to the class available in Scikit-learn:
 19 | \texttt{sklearn.linear\_model.LinearRegression}\\
 20 | \newline
 21 | \par
 22 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 23 | class MyLinearRegression():
 24 | 	"""
 25 | 	Description:
 26 | 		My personnal linear regression class to fit like a boss.
 27 | 	"""
 28 | 	def __init__(self, thetas, alpha=0.001, max_iter=1000):
 29 | 				self.alpha = alpha
 30 | 				self.max_iter = max_iter
 31 | 				self.thetas = thetas
 32 | 
 33 | 	#... other methods ...
 34 | \end{minted}
 35 | \newpage
 36 | You will add the following methods:
 37 | \begin{itemize}
 38 |   \item \texttt{fit\_(self, x, y)},
 39 |   \item \texttt{predict\_(self, x)},
 40 |   \item \texttt{loss\_elem\_(self, y, y\_hat)},
 41 |   \item \texttt{loss\_(self, y, y\_hat)}.
 42 | \end{itemize}
 43 | You have already implemented these functions, you just need a few adjustments so that they all work well within your \texttt{MyLinearRegression} class.
 44 | 
 45 | % ================================= %
 46 | \section*{Examples}
 47 | % --------------------------------- %
 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 49 | import numpy as np
 50 | from my_linear_regression import MyLinearRegression as MyLR
 51 | x = np.array([[12.4956442], [21.5007972], [31.5527382], [48.9145838], [57.5088733]])
 52 | y = np.array([[37.4013816], [36.1473236], [45.7655287], [46.6793434], [59.5585554]])
 53 | 
 54 | lr1 = MyLR(np.array([[2], [0.7]]))
 55 | 
 56 | # Example 0.0:
 57 | y_hat = lr1.predict_(x)
 58 | # Output:
 59 | array([[10.74695094],
 60 | 		[17.05055804],
 61 | 		[24.08691674],
 62 | 		[36.24020866],
 63 | 		[42.25621131]])
 64 | 
 65 | # Example 0.1:
 66 | lr1.loss_elem_(y, y_hat)
 67 | # Output:
 68 | array([[710.45867381],
 69 | 		[364.68645485],
 70 | 		[469.96221651],
 71 | 		[108.97553412],
 72 | 		[299.37111101]])
 73 | 
 74 | # Example 0.2:
 75 | lr1.loss_(y, y_hat)
 76 | # Output:
 77 | 195.34539903032385
 78 | 
 79 | # Example 1.0:
 80 | lr2 = MyLR(np.array([[1], [1]]), 5e-8, 1500000)
 81 | lr2.fit_(x, y)
 82 | lr2.thetas
 83 | # Output:
 84 | array([[1.40709365],
 85 | 		[1.1150909 ]])
 86 | 
 87 | # Example 1.1:
 88 | y_hat = lr2.predict_(x)
 89 | # Output:
 90 | array([[15.3408728 ],
 91 | 		[25.38243697],
 92 | 		[36.59126492],
 93 | 		[55.95130097],
 94 | 		[65.53471499]])
 95 | 
 96 | # Example 1.2:
 97 | lr2.loss_elem_(y, y_hat)
 98 | # Output:
 99 | array([[486.66604863],
100 | 		[115.88278416],
101 | 		[ 84.16711596],
102 | 		[ 85.96919719],
103 | 		[ 35.71448348]])
104 | 
105 | # Example 1.3:
106 | lr2.loss_(y, y_hat)
107 | # Output:
108 | 80.83996294128525
109 | \end{minted}


--------------------------------------------------------------------------------
/module06/exercises/m06ex05.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 05}
 2 | \input{exercises/en.ex05_interlude.tex}
 3 | \newpage
 4 | \extitle{Normalization I: Z-score Standardization}
 5 | \turnindir{ex05}
 6 | \exnumber{05}
 7 | \exfiles{z\_score.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | 
12 | 
13 | % ================================= %
14 | \section*{Objective}
15 | % --------------------------------- %
16 | Introduction to standardization/normalization methods.\\
17 | \\
18 | You must implement the following formula as a function:
19 | $$
20 | \begin{matrix}
21 |  x'^{(i)} = \frac{x^{(i)} - \frac{1}{m} \sum_{i = 1}^{m} x^{(i)}}{\sqrt{\frac{1}{m - 1} \sum_{i = 1}^{m} (x^{(i)} - \frac{1}{m} \sum_{i = 1}^{m} x^{(i)})^{2}}} & &\text{ for $i$ in $1, ..., m$} 
22 | \end{matrix}
23 | $$
24 | Where:
25 | \begin{itemize}
26 |   \item $x$ is a vector of dimension $m$
27 |   \item $x^{(i)}$ is the i$^\text{th}$ component of the $x$ vector
28 |   \item $x'$ is the normalized version of the $x$ vector
29 | \end{itemize}
30 | \noindent{The equation is much easier to understand in the following form:}
31 | $$
32 | \begin{matrix}
33 | x'^{(i)} = \frac{x^{(i)} - \mu}{\sigma} & &\text{ for $i$ in $1, ..., m$}
34 | \end{matrix}
35 | $$
36 | This should remind you of something from \textbf{TinyStatistician}... doesn't it?!
37 | \\
38 | Ok, let's do a quick recap !
39 | \begin{itemize}
40 |   \item $\mu$ is the mean of $x$
41 |   \item $\sigma$ is the standard deviation of $x$
42 | \end{itemize}
43 | 
44 | % ================================= %
45 | \section*{Instructions}
46 | % --------------------------------- %
47 | \noindent{In the \texttt{zscore.py} file, write the \texttt{zscore} function as per the instructions given below:}
48 | \\
49 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
50 | def zscore(x):
51 | 	"""Computes the normalized version of a non-empty numpy.ndarray using the z-score standardization.
52 | 	Args:
53 | 		x: has to be an numpy.ndarray, a vector.
54 | 	Returns:
55 | 		x' as a numpy.ndarray. 
56 | 		None if x is a non-empty numpy.ndarray or not a numpy.ndarray.
57 | 	Raises:
58 | 		This function shouldn't raise any Exception.
59 | 	"""
60 | 	... Your code ...
61 | \end{minted}
62 | 
63 | 
64 | % ================================= %
65 | \section*{Examples}
66 | % --------------------------------- %
67 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
68 | # Example 1:
69 | X = numpy.array([0, 15, -9, 7, 12, 3, -21])
70 | zscore(X)
71 | # Output:
72 | array([-0.08620324,  1.2068453 , -0.86203236,  0.51721942,  0.94823559,
73 | 		0.17240647, -1.89647119])
74 | 
75 | # Example 2:
76 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
77 | zscore(Y)
78 | # Output:
79 | array([ 0.11267619,  1.16432067, -1.20187941,  0.37558731,  0.98904659,
80 | 		0.28795027, -1.72770165])
81 | \end{minted}
82 | 


--------------------------------------------------------------------------------
/module06/exercises/m06ex06.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 06}
 2 | \extitle{Normalization II: Min-max Standardization}
 3 | \turnindir{ex06}
 4 | \exnumber{06}
 5 | \exfiles{minmax.py}
 6 | \exforbidden{None}
 7 | \makeheaderfilesforbidden
 8 | 
 9 | 
10 | % ================================= %
11 | \section*{Objective}
12 | % --------------------------------- %
13 | Introduction to standardization/normalization methods.
14 | Implement another normalization method.\\
15 | \\
16 | You must implement the following formula as a function: 
17 | 
18 | $$
19 | \begin{matrix}
20 |   x'^{(i)} = \frac{x^{(i)} - min(x)}{max(x) - min(x)} & & \text{ for $i = 1, ..., m$}
21 | \end{matrix}
22 | $$
23 | Where:
24 | \begin{itemize}
25 |   \item $x$ is a vector of dimension $m$
26 |   \item $x^{(i)}$ is the i$^\text{th}$ component of vector $x$
27 |   \item $min(x)$ is the minimum value found among the components of vector $x$
28 |   \item $max(x)$ is the maximum value found among the components of vector $x$
29 | \end{itemize}
30 | You will notice that this min-max standardization doesn't scale the values to the $[-1,1]$ range.
31 | What do you think the final range will be?
32 | \newpage
33 | % ================================= %
34 | \section*{Instructions}
35 | % --------------------------------- %
36 | In the \texttt{minmax.py} file, create the \texttt{minmax} function as per the instructions given below:\\
37 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
38 | def minmax(x):
39 | 	"""Computes the normalized version of a non-empty numpy.ndarray using the min-max standardization.
40 | 	Args:
41 | 		x: has to be an numpy.ndarray, a vector.
42 | 	Returns:
43 | 		x' as a numpy.ndarray. 
44 | 		None if x is a non-empty numpy.ndarray or not a numpy.ndarray.
45 | 	Raises:
46 | 		This function shouldn't raise any Exception.
47 | 	"""
48 |     ... Your code ...
49 | \end{minted}
50 | 
51 | % ================================= %
52 | \section*{Examples}
53 | % --------------------------------- %
54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
55 | # Example 1:
56 | X = np.array([0, 15, -9, 7, 12, 3, -21]).reshape((-1, 1))
57 | minmax(X)
58 | # Output:
59 | array([0.58333333, 1.        , 0.33333333, 0.77777778, 0.91666667,
60 | 		0.66666667, 0.        ])
61 | 
62 | # Example 2:
63 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
64 | minmax(Y)
65 | # Output:
66 | array([0.63636364, 1.        , 0.18181818, 0.72727273, 0.93939394,
67 | 		0.6969697 , 0.        ])
68 | \end{minted}


--------------------------------------------------------------------------------
/module06/useful_resources.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Section usefull ressources                            %
 4 | %                          for ML Modules                                      %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | 
 9 | 
10 | \chapter*{Notions covered and learning resources}
11 | 
12 | \section*{What notions will be covered by this module?}
13 | 
14 | \begin{itemize}
15 |     \item Linear regression
16 |     \item Gradient descent
17 |     \item Learning rate
18 |     \item Normalization
19 | \end{itemize}
20 | 
21 | \section*{Learning resources}
22 | 
23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\
24 | \newline
25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\
26 | \newline
27 | The following sections of the course are particularly relevant to today's exercises: 
28 | 
29 | \subsection*{Week 1: Introduction to Machine Learning}
30 | 
31 | \subsubsection*{Train the model with Gradient Descent}
32 | \begin{itemize}
33 |   \item Gradient descent
34 |   \item Implementing gradient descent
35 |   \item Gradient descent intuition
36 |   \item Learning rate
37 |   \item Gradient descent for linear regression
38 |   \item Running gradient descent
39 | \end{itemize}
40 |     
41 | \subsection*{Week 2: Regression with multiple input variables}
42 | 
43 | \subsubsection*{Multiple linear Regression}  
44 | \begin{itemize}
45 |   \item Multiple features
46 |   \item Vectorization part1 (optional)
47 |   \item Vectorization part2 (optional)
48 | \end{itemize}
49 | 
50 | \subsubsection*{Gradient descent in practice}  
51 | \begin{itemize}
52 |   \item Feature scaling part 1
53 |   \item Feature scaling part 2
54 | \end{itemize}
55 | \emph{All videos mentionned above are also available on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist} from 15 to 21 included, 25 and 26}
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/module07/Makefile:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = en.subject.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = exercises/en.ex00_interlude.tex \
 6 | 	exercises/en.ex01_interlude.tex \
 7 | 	exercises/en.ex02_interlude.tex \
 8 | 	exercises/en.ex03_interlude.tex \
 9 | 	exercises/en.ex04_interlude.tex \
10 | 	exercises/en.ex07_interlude.tex \
11 | 	exercises/en.ex08_interlude.tex \
12 | 	exercises/en.ex09_interlude.tex \
13 | 	../resources/latex/redefinition-commands.tex \
14 | 	../resources/42ai_bootcamps/en.instructions.tex \
15 | 	../resources/42ai_bootcamps/en.acknowledgements.tex \
16 | 	useful_resources.tex
17 | 
18 | # Relative path to the LaTeX documentclass setup files
19 | # Adapt as needed
20 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/
21 | # RELPATH for github actions:
22 | RELPATH = $(shell dirname `pwd`)/resources/latex/
23 | 
24 | # You should not touch this either
25 | include $(RELPATH)/Makefile.LaTeX
26 | 


--------------------------------------------------------------------------------
/module07/assets/42ai_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/42ai_logo.pdf


--------------------------------------------------------------------------------
/module07/assets/Default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Default.png


--------------------------------------------------------------------------------
/module07/assets/Evaluate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Evaluate.png


--------------------------------------------------------------------------------
/module07/assets/Improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Improve.png


--------------------------------------------------------------------------------
/module07/assets/Predict.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/Predict.png


--------------------------------------------------------------------------------
/module07/assets/bad_pred_with_distance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/bad_pred_with_distance.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_Tmeters_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_Tmeters_part1.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_Tmeters_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_Tmeters_part2.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_age_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_age_part1.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_age_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_age_part2.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_thrust_part1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_thrust_part1.png


--------------------------------------------------------------------------------
/module07/assets/ex07_price_vs_thrust_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex07_price_vs_thrust_part2.png


--------------------------------------------------------------------------------
/module07/assets/ex12_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex12_data.png


--------------------------------------------------------------------------------
/module07/assets/ex12_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/ex12_plot.png


--------------------------------------------------------------------------------
/module07/assets/overfitt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/overfitt.png


--------------------------------------------------------------------------------
/module07/assets/overfitt_with_dots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/overfitt_with_dots.png


--------------------------------------------------------------------------------
/module07/assets/polynomial_straight_line.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module07/assets/polynomial_straight_line.png


--------------------------------------------------------------------------------
/module07/attachments/are_blue_pills_magics.csv:
--------------------------------------------------------------------------------
1 | "Patient","Micrograms","Score"
2 | 1, 1.17, 78.93
3 | 2, 2.97, 58.20
4 | 3, 3.26, 67.47
5 | 4, 4.69, 37.47
6 | 5, 5.83, 45.65
7 | 6, 6.00, 32.92
8 | 7, 6.41, 29.97
9 | 


--------------------------------------------------------------------------------
/module07/attachments/are_blue_pills_magics_description.txt:
--------------------------------------------------------------------------------
1 | #Patient: number of the patient.
2 | 
3 | #Micrograms: quantity of blue pills patient has taken (in micrograms).
4 | 
5 | #Score: Standardized score at the spacecraft driving test.
6 | 


--------------------------------------------------------------------------------
/module07/attachments/spacecraft_data_description.txt:
--------------------------------------------------------------------------------
1 | #Age: Age of the spacecraft.
2 | 
3 | #Thrust_powern: Power of engines in 10 km/s.
4 | 
5 | #Terameters: Distance that the spacecraft has travelled in terameters.
6 | 
7 | #Sell_price: This is the prices at which the custommer bought the spacecraft (in kiloeuros).
8 | 


--------------------------------------------------------------------------------
/module07/exercises/en.ex01_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude - Even More Linear Algebra Tricks!}
10 | % ******************************* %
11 | 
12 | As you already did before with the univariate hypothesis,
13 | the multivariate hypothesis can be vectorized as well.\\
14 | \newline
15 | If you add a column of $1$'s as the first column of the $X$ matrix, you get what we'll call the $X'$ matrix.  
16 | Then, you can calculate $\hat{y}$ by multiplying $X'$ and $\theta$.
17 | 
18 | $$
19 | X' \cdot \theta = 
20 | \begin{bmatrix} 
21 | 1 & x_{1}^{(1)} & \dots & x_{n}^{(1)}\\
22 | \vdots & \vdots & \ddots & \vdots\\
23 | 1 & x_{1}^{(m)} & \dots &  x_{n}^{(m)}\end{bmatrix}
24 | \cdot
25 | \begin{bmatrix}
26 | \theta_0 \\ 
27 | \theta_1 \\
28 | \vdots \\
29 | \theta_n
30 | \end{bmatrix} 
31 | = 
32 | \begin{bmatrix} 
33 | \theta_0 + \theta_{1} x_{1}^{(1)} + \dots + \theta_{n} x_{n}^{(1)}\\ 
34 | \vdots \\ 
35 | \theta_0 + \theta_{1} x_{1}^{(m)} + \dots + \theta_{n} x_{n}^{(m)}
36 | \end{bmatrix}
37 | =
38 | \begin{bmatrix}
39 | \hat{y}^{(1)} \\ 
40 | \vdots \\
41 | \hat{y}^{(m)}
42 | \end{bmatrix} 
43 | =
44 | \hat{y}
45 | $$
46 | Another way of understanding this algebra trick is to pretend that each training
47 | example has an artificial $x_0$ feature that is always equal to $1$.\\
48 | \newline
49 | This simplifies the equations as now, each $x_j$ feature has its
50 | corresponding $\theta_j$ parameter in the multiplication.
51 | 
52 | $$
53 | \theta_0x_0^{(i)} + \theta_{1} x_{1}^{(i)} + \dots + \theta_{n} x_{n}^{(i)} = \theta \cdot x'^{(i)}
54 | $$


--------------------------------------------------------------------------------
/module07/exercises/en.ex02_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude - Evaluate}
10 | % ------------------------------- %
11 | 
12 | \begin{figure}[!h]
13 |     \centering
14 |     \includegraphics[scale=0.2]{assets/Evaluate.png}
15 |     %\caption{The Learning Cycle: Evaluate}
16 | \end{figure}
17 | 
18 | % =============================== %
19 | \section*{Back to the Loss Function}
20 | % ------------------------------- %
21 | How is our model doing?\\
22 | To evaluate our model, remember that we have already used a \textbf{metric} called the \textbf{loss function} (also known as \textbf{cost function}).
23 | The loss function is basically just a measure of how wrong the model is, in all of its predictions.\\
24 | \newline
25 | Two modules ago, we defined the loss function as the average of the squared distances between each prediction and its expected value (distances represented by the dotted lines in the figure below):
26 | 
27 | \begin{figure}[!h]
28 |     \centering
29 |     \includegraphics[scale=0.5]{assets/bad_pred_with_distance.png}
30 |     \caption{Distances between predicted and expected values}
31 | \end{figure}
32 | \newpage
33 | \noindent{The formula was the following:}
34 | 
35 | $$
36 | J(\theta) = \frac{1}{2m}\sum_{i=1}^{m}(\hat{y}^{(i)} - y^{(i)})^2
37 | $$
38 | \\
39 | And its vectorized form:
40 | 
41 | $$
42 | \begin{matrix}
43 | J(\theta) = \frac{1}{2m}(\hat{y} - y)\cdot(\hat{y}- y)
44 | \end{matrix}
45 | $$
46 | \\
47 | \textit{So, now that we moved to multivariate linear regression, what does it change?}\\
48 | \newline
49 | You may have noticed that variables such as $x_j$ and $\theta_j$ are not in the equation.
50 | Indeed, the loss function only uses the predictions ($\hat{y}$) and the expected values ($y$), 
51 | so the inner workings of the model do not have an impact on its evaluation metric.\\
52 | \\
53 | This means we can use the exact same loss function as we did before!
54 | 


--------------------------------------------------------------------------------
/module07/exercises/en.ex03_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude - Improve with the Gradient}
10 | % ******************************* %
11 | 
12 | \begin{figure}[!h]
13 |     \centering
14 |     \includegraphics[scale=0.2]{assets/Improve.png}
15 |     %\caption{The Learning Cycle: Improve}
16 | \end{figure}
17 | 
18 | % =============================== %
19 | \section*{Multivariate Gradient}
20 | % ******************************* %
21 | From our multivariate linear hypothesis we can derive our multivariate gradient.
22 | It looks a lot like the one we saw during the previous module, but instead of having just two components, the gradient now has as many as there are parameters.
23 | This means that now we need to calculate $\nabla(J)_0,\nabla(J)_1,\dots,\nabla(J)_n$.\\
24 | \newline
25 | If we take the univariate equations we used during the previous module and replace the formula for $\nabla(J)_1$ by a more general $\nabla(J)_j$, we get the following:
26 | 
27 | $$
28 | \begin{matrix}
29 | \nabla(J)_0 &  = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\
30 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n}    
31 | \end{matrix}
32 | $$
33 | Where:
34 | \begin{itemize}
35 |     \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector
36 |     \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, the partial derivative of $J$ with respect to $\theta_j$
37 |     \item $y$ is a vector of dimension $m$, the vector of expected values
38 |     \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$
39 |     \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example
40 |     \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example
41 |     \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$. (It can also be denoted $\hat{y}^{(i)}$)
42 | \end{itemize}
43 | 
44 | % =============================== %
45 | \section*{Vectorized Form}
46 | % ******************************* %
47 | As usual, we can use some linear algebra magic to get a more compact (and computationally efficient) formula.
48 | First we can use our convention that each training example has an extra $x_0 = 1$ feature, and replace the gradient formulas above by one single equation that is valid for all $j$ components:
49 | 
50 | $$
51 | \begin{matrix}
52 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 0, ..., n}
53 | \end{matrix}
54 | $$
55 | And this generic equation can then be rewritten in a vectorized form:
56 | 
57 | $$
58 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y)
59 | $$  
60 | Where:  
61 | \begin{itemize}
62 |     \item $\nabla(J)$ is the gradient vector of dimension $(n + 1)$
63 |     \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of $1$'s was added as the first column
64 |     \item ${X'}^T$ means the matrix has been transposed
65 |     \item $\theta$ is a vector of dimension $(n + 1)$: the parameter vector 
66 |     \item $y$ is a vector of dimension $m$: the vector of expected values
67 | \end{itemize}
68 | The vectorized equation can output the entire gradient vector all at once, in one calculation!\\
69 | \newline
70 | So if you understand the linear algebra operations, you can forget about the equations we presented at the top of the page and simply use the vectorized one.
71 | 


--------------------------------------------------------------------------------
/module07/exercises/en.ex04_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude - Gradient Descent}
10 | % ******************************* %
11 | 
12 | Now comes the fun part: \textbf{gradient descent}!\\
13 | \newline
14 | The algorithm is not that different from the one used in univariate 
15 | linear regression. As you might have guessed, what will change is 
16 | that the $j$ indice needs to run from $0$ to $n$ instead of $0$ 
17 | to $1$. So all you need is a more generic algorithm, which can be 
18 | expressed in pseudocode as the following:
19 | 
20 | $$ 
21 | \begin{matrix}
22 | \textbf{repeat} \text{ } \text{until convergence} \\
23 | \textbf{compute} \\
24 | \nabla{(J)} \text{ } \theta_j \gets \theta_j - \alpha \nabla(J)_j \\
25 | \textit{simultaneously update} \\
26 | \theta \text{ for j=0,1,...,n}
27 | \end{matrix}
28 | $$
29 | If we take the univariate equations we used during the previous module and replace the formula for $\nabla(J)_1$ by a more general $\nabla(J)_j$, we get the following:\\
30 | \newline
31 | $$
32 | \begin{matrix}
33 | \nabla(J)_0 &  = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\
34 | \nabla(J)_j & = &\frac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n}    
35 | \end{matrix}
36 | $$
37 | \\
38 | If you started to like vectorized forms, you might have noticed that that the $\theta_j$ notation is actually redundant here, since all components of $\theta$ need to be updated simultaneously. $\theta$ is a vector, $\nabla{(J)}$ also, they both have dimension $(n+1)$. So all we need to do is this:
39 | 
40 | 
41 | $$ 
42 | \begin{matrix}
43 | \textbf{repeat} \ \text{until convergence} \\
44 | \textbf{compute} \\
45 | \nabla{(J)} \  \theta \gets \theta - \alpha \nabla(J)
46 | \end{matrix}
47 | $$
48 | Where:
49 | \begin{itemize}
50 |     \item $\theta$ is the entire parameter vector
51 |     \item $\alpha$ (alpha) is the learning rate (a small number, usually between 0 and 1)
52 |     \item $\nabla{(J)}$ is the entire gradient vector
53 | \end{itemize}
54 | 
55 | % =============================== %
56 | \section*{Note: Do you still wonder why there is a subtraction in the equation?}
57 | % ******************************* %
58 | By definition, the gradient indicates the direction towards which we 
59 | should adjust the $\theta$ parameters if we wanted to increase the loss. 
60 | But since our optimization objective is to minimize the loss,
61 | we move $\theta$ in the opposite direction of the gradient 
62 | (hence the name gradient descent).


--------------------------------------------------------------------------------
/module07/exercises/en.ex07_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================================== %
 9 | \section*{Interlude - Introducing Polynomial Models}
10 | % ----------------------------------------------- %
11 | 
12 | You probably noticed that the method we use is called \textit{linear regression} for a reason:
13 | the model generates all of its predictions on a straight line.
14 | However, we often encounter features that do not have a linear relationship with the predicted variable,
15 | like in the figure below:
16 | 
17 | \begin{figure}[!h]
18 |     \centering
19 |     \includegraphics[scale=0.6]{assets/polynomial_straight_line.png}
20 |     \caption{Non-linear relationship}
21 | \end{figure}
22 | In that case, we are stuck with a straight line that cannot fit the data points properly.\\
23 | \newline
24 | In this example, what if we could express $y$ not as a function of $x$, but also of $x^2$, and maybe even $x^3$ and $x^4$?
25 | We could make a hypothesis that draws a nice \textbf{curve} that would better fit the data.
26 | That's where polynomial features can help!
27 | 
28 | % =============================================== %
29 | \section*{Interlude - Polynomial features}
30 | % ----------------------------------------------- %
31 | First we get to do some \textit{feature engineering}.
32 | We create new features by raising our initial $x$ feature to the power of 2, and then 3, 4... as far as we want to go.
33 | For each new feature we need to create a new column in the dataset.
34 | 
35 | % =============================================== %
36 | \section*{Interlude - Polynomial Hypothesis}
37 | % ----------------------------------------------- %
38 | Now that we created our new features, we can combine them in a linear hypothesis that looks just the same as what we're used to:
39 | 
40 | $$
41 | \hat{y} = \theta_0 + \theta_1 x  +\theta_2 x^{2} + \dots + \theta_n x^{n}
42 | $$  
43 | It's a little strange because we are building a linear combination, not with different features but with different powers of the same feature.
44 | This is a first way of introducing non-linearity in a regression model!


--------------------------------------------------------------------------------
/module07/exercises/en.ex08_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{Interlude - Plotting Curves With Matplotlib}
10 | % ---------------------------------------------- %
11 | 
12 | We asked you to plot straight lines in the \texttt{module05}.
13 | Now you are working with polynomial models, the hypothesis functions are no longer straight lines, but \textbf{curves}.\\
14 | \newline
15 | Plotting curves is a bit more tricky, because if you do not have enough data point, you will get an ugly broken line instead of a smooth curve.
16 | Here's a way to do it.\\
17 | \newline
18 | Let's begin with a simple dataset:
19 | 
20 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | 
24 | x = np.arange(1,11).reshape(-1,1)
25 | y = np.array([[ 1.39270298],
26 |            [ 3.88237651],
27 |            [ 4.37726357],
28 |            [ 4.63389049],
29 |            [ 7.79814439],
30 |            [ 6.41717461],
31 |            [ 8.63429886],
32 |            [ 8.19939795],
33 |            [10.37567392],
34 |            [10.68238222]])
35 | 
36 | plt.scatter(x,y)
37 | plt.show()
38 | \end{minted}
39 | 
40 | \begin{figure}[!h]
41 |     \centering
42 |     \includegraphics[scale=0.6]{assets/ex12_data.png}
43 |     \caption{Scatter plot of a dataset}
44 | \end{figure}
45 | \newpage
46 | \noindent{Now, we build a polynomial model of degree 3 and plot its hypothesis function $h(\theta)$.}
47 | \\
48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
49 | from polynomial_model import add_polynomial_features
50 | from mylinearregression import MyLinearRegression as MyLR
51 | 
52 | # Build the model:
53 | x_ = add_polynomial_features(x, 3)
54 | my_lr = MyLR(np.ones(4).reshape(-1,1)).fit_(x_, y)
55 | 
56 | # Plot:
57 | ## To get a smooth curve, we need a lot of data points
58 | continuous_x = np.arange(1,10.01, 0.01).reshape(-1,1)
59 | x_ = add_polynomial_features(continuous_x, 3)
60 | y_hat = my_lr.predict_(continuous_x)
61 | 
62 | plt.scatter(x,y)
63 | plt.plot(continuous_x, y_hat, color='orange')
64 | plt.show()
65 | \end{minted}
66 | 
67 | \begin{figure}[!h]
68 |     \centering
69 |     \includegraphics[scale=0.6]{assets/ex12_plot.png}
70 |     \caption{Scatter plot of a dataset, and on top, a plot of the polynomial hypothesis function}
71 | \end{figure}
72 | 


--------------------------------------------------------------------------------
/module07/exercises/en.ex09_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{Interlude - Lost in Overfitting}
10 | % ---------------------------------------------- %
11 | 
12 | The two previous exercises lead you, dear reader, to a very dangerous territory: the realm of \textbf{overfitting}.\\
13 | You did not see it coming but now, you are in a bad situation...\\
14 | \\
15 | By increasing the polynomial degree of your model, you increased its \textbf{complexity}.  
16 | Is it wrong?
17 | Not always.
18 | Some models are indeed very complex because the relationships they represent are very complex as well.\\
19 | \\
20 | But, if you look at the plots for the previous exercise's \textit{best model}, you should feel that something is wrong...\\
21 | \\
22 | % ============================================== %
23 | \section*{Interlude - Something is rotten in the state of our model...}
24 | % ---------------------------------------------- %
25 | Take a look at the following plot. 
26 | 
27 | \begin{figure}[!h]
28 |     \centering
29 |     \includegraphics[scale=0.6]{assets/overfitt.png}
30 |     \caption{Overfitting hypothesis}
31 | \end{figure}
32 | 
33 | You can see that the prediction line fits each data point perfectly, but completely misses out on capturing the relationship between $x$ and $y$ properly.
34 | And now, if we add some brand new data points to the dataset, we see that the predictions on those new examples are way off.
35 | 
36 | \begin{figure}[!h]
37 |     \centering
38 |     \includegraphics[scale=0.6]{assets/overfitt_with_dots.png}
39 |     \caption{Generalization errors resulting from overfitting}
40 | \end{figure}
41 | This situation is called overfitting, because the model is doing an excessively good job at fitting the data.
42 | It is literally bending over backward to account for the data's mini details.
43 | But most of the data's irregularities are just noise, and they should in fact be ignored.
44 | So because the model overfits, it can't generalize to new data.
45 | 
46 | % ============================================== %
47 | \section*{Interlude - The training set, the test set, and the happy data scientist}
48 | % ---------------------------------------------- %
49 | To be able to detect overfitting, \textbf{you should always evaluate your model on new data}.\\
50 | \\
51 | New data means, data that your model hasn't seen during training.\\
52 | \\
53 | It's the only way to make sure your model isn't \textit{recalling}.
54 | To do so, now and forever, you must always divide your dataset in (at least) two parts: one for the training, and one for the evaluation of your model.


--------------------------------------------------------------------------------
/module07/exercises/m07ex00.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 00}
 2 | \input{exercises/en.ex00_interlude.tex}
 3 | \newpage
 4 | \extitle{Multivariate Hypothesis - Iterative Version}
 5 | \turnindir{ex00}
 6 | \exnumber{00}
 7 | \exfiles{prediction.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================== %
12 | \section*{Objective}
13 | % ---------------------------------- %
14 | Manipulate the hypothesis to make a prediction.\\
15 | \newline
16 | You must implement the following formula as a function:
17 | 
18 | $$
19 | \begin{matrix}
20 |   \hat{y}^{(i)} = \theta_0 + \theta_1 x_{1}^{(i)}  + \dots + \theta_n x_{n}^{(i)} && & \text{ for i = 1, ..., m}
21 | \end{matrix}
22 | $$
23 | Where:
24 | \begin{itemize}
25 |   \item $\hat{y}$ is a vector of dimension $m$: the vector of predicted values
26 |   \item $\hat{y}^{(i)}$ is the i$^\text{th}$ component of the $\hat{y}$ vector: the predicted value for the i$^\text{th}$ example
27 |   \item $\theta$ is a vector of dimension $(n + 1)$: the parameter vector
28 |   \item $\theta_j$ is the j$^\text{th}$ component of the parameter vector
29 |   \item $X$ is a matrix of dimensions $(m \times n)$: the design matrix
30 |   \item $x^{(i)}$ is the i$^\text{th}$ row of the $X$ matrix: the feature vector of the i$^\text{th}$ example
31 |   \item $x_{j}$ is the j$^\text{th}$ column of the $X$ matrix
32 |   \item $x_j^{(i)}$ is the element at the intersection of the i$^\text{th}$ row and the j$^\text{th}$ column of the $X$ matrix: the j$^\text{th}$ feature of the i$^\text{th}$ example
33 | \end{itemize}
34 | \newpage
35 | % ================================== %
36 | \section*{Instructions}
37 | % ---------------------------------- %
38 | 
39 | In the \texttt{prediction.py} file, create the following function as per the instructions given below:\\
40 | \newline
41 | \par
42 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
43 |   def simple_predict(x, theta):
44 |       """Computes the prediction vector y_hat from two non-empty numpy.array.
45 |       Args:
46 |         x: has to be an numpy.array, a matrix of dimension m * n.
47 |         theta: has to be an numpy.array, a vector of dimension (n + 1) * 1.
48 |       Return:
49 |         y_hat as a numpy.array, a vector of dimension m * 1.
50 |         None if x or theta are empty numpy.array.
51 |         None if x or theta dimensions are not matching.
52 |         None if x or theta is not of expected type.
53 |       Raises:
54 |         This function should not raise any Exception.
55 |       """
56 |       ... Your code ...
57 | \end{minted}
58 | 
59 | % ================================== %
60 | \section*{Examples}
61 | % ---------------------------------- %
62 | 
63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
64 | import numpy as np
65 | x = np.arange(1,13).reshape((4,-1))
66 | 
67 | # Example 1:
68 | theta1 = np.array([5, 0, 0, 0]).reshape((-1, 1))
69 | simple_predict(x, theta1)
70 | # Ouput:
71 | array([[5.], [5.], [5.], [5.]])
72 | # Do you understand why y_hat contains only 5's here?  
73 | 
74 | 
75 | # Example 2:
76 | theta2 = np.array([0, 1, 0, 0]).reshape((-1, 1))
77 | simple_predict(x, theta2)
78 | # Output:
79 | array([[ 1.], [ 4.], [ 7.], [10.]])
80 | # Do you understand why y_hat == x[:,0] here?  
81 | 
82 | 
83 | # Example 3:
84 | theta3 = np.array([-1.5, 0.6, 2.3, 1.98]).reshape((-1, 1))
85 | simple_predict(x, theta3)
86 | # Output:
87 | array([[ 9.64], [24.28], [38.92], [53.56]])
88 | 
89 | 
90 | # Example 4:
91 | theta4 = np.array([-3, 1, 2, 3.5]).reshape((-1, 1))
92 | simple_predict(x, theta4)
93 | # Output:
94 | array([[12.5], [32. ], [51.5], [71. ]])
95 | \end{minted}


--------------------------------------------------------------------------------
/module07/exercises/m07ex02.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 02}
 2 | \input{exercises/en.ex02_interlude.tex}
 3 | \newpage
 4 | \extitle{Vectorized Loss Function}
 5 | \turnindir{ex02}
 6 | \exnumber{02}
 7 | \exfiles{loss.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and manipulate loss function for multivariate linear regression.\\
15 | \newline
16 | You must implement the following formula as a function:  
17 | 
18 | $$
19 | \begin{matrix}
20 | J(\theta) &  = & \frac{1}{2m}(\hat{y} - y) \cdot(\hat{y}- y)
21 | \end{matrix}
22 | $$  
23 | Where:
24 | \begin{itemize}
25 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
26 |   \item $y$ is a vector of dimension $m$, the vector of expected values
27 | \end{itemize}
28 | % ================================= %
29 | \section*{Instructions}
30 | % --------------------------------- %
31 | In the \texttt{loss.py} file create the following function as per the instructions given below:\\
32 | \newline
33 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
34 | def loss_(y, y_hat):
35 |     """Computes the mean squared error of two non-empty numpy.array, without any for loop.
36 |     The two arrays must have the same dimensions.
37 |     Args:
38 |       y: has to be an numpy.array, a vector.
39 |       y_hat: has to be an numpy.array, a vector.
40 |     Return:
41 |       The mean squared error of the two vectors as a float.
42 |       None if y or y_hat are empty numpy.array.
43 |       None if y and y_hat does not share the same dimensions.
44 |       None if y or y_hat is not of expected type.
45 |     Raises:
46 |       This function should not raise any Exception.
47 |     """
48 |     ... Your code ...
49 | \end{minted}
50 | \newpage
51 | % ================================= %
52 | \section*{Examples}
53 | % --------------------------------- %
54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
55 | import numpy as np
56 | X = np.array([0, 15, -9, 7, 12, 3, -21]).reshape((-1, 1))
57 | Y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
58 | 
59 | # Example 1:
60 | loss_(X, Y)
61 | # Output:
62 | 2.142857142857143
63 | 
64 | # Example 2:
65 | loss_(X, X)
66 | # Output:
67 | 0.0
68 | \end{minted}


--------------------------------------------------------------------------------
/module07/exercises/m07ex03.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 03}
 2 | \extitle{Multivariate Linear Gradient}
 3 | \input{exercises/en.ex03_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex03}
 6 | \exnumber{03}
 7 | \exfiles{gradient.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and manipulate concept of gradient in the case of multivariate formulation.\\
15 | \newline
16 | You must implement the following formula as a function:    
17 | 
18 | $$
19 | \nabla(J) = \frac{1}{m} {X'}^T(X'\theta - y)
20 | $$  
21 | Where:  
22 | \begin{itemize}
23 |   \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector
24 |   \item $X$ is a matrix of dimensions $(m \times n)$, the design matrix
25 |   \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of $1$'s was added as a first column
26 |   \item $\theta$ is a vector of dimension $(n + 1)$, the parameter vector
27 |   \item $y$ is a vector of dimension $m$, the vector of expected values
28 | \end{itemize}
29 | 
30 | % ================================= %
31 | \section*{Instructions}
32 | % --------------------------------- %
33 | In the \texttt{gradient.py} file, create the following function as per the instructions given below:\\
34 | \newline
35 | \par
36 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
37 | def gradient(x, y, theta):
38 |     """Computes a gradient vector from three non-empty numpy.array, without any for-loop.
39 |     The three arrays must have the compatible dimensions.
40 |     Args:
41 |       x: has to be an numpy.array, a matrix of dimension m * n.
42 |       y: has to be an numpy.array, a vector of dimension m * 1.
43 |       theta: has to be an numpy.array, a vector (n +1) * 1.
44 |     Return:
45 |       The gradient as a numpy.array, a vector of dimensions n * 1,
46 |         containg the result of the formula for all j.
47 |       None if x, y, or theta are empty numpy.array.
48 |       None if x, y and theta do not have compatible dimensions.
49 |       None if x, y or theta is not of expected type.
50 |     Raises:
51 |       This function should not raise any Exception.
52 |     """
53 |     ... Your code ...
54 | \end{minted}
55 | 
56 | % ================================= %
57 | \section*{Examples}
58 | % --------------------------------- %
59 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
60 | import numpy as np
61 | x = np.array([
62 | 	      [ -6,  -7,  -9],
63 |         [ 13,  -2,  14],
64 |         [ -7,  14,  -1],
65 |         [ -8,  -4,   6],
66 |         [ -5,  -9,   6],
67 |         [  1,  -5,  11],
68 |         [  9, -11,   8]])
69 | y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
70 | theta1 = np.array([0, 3, 0.5, -6]).reshape((-1, 1))
71 | 
72 | # Example :
73 | gradient(x, y, theta1)
74 | # Output:
75 | array([[ -33.71428571], [ -37.35714286], [183.14285714], [-393.]])
76 | 
77 | 
78 | # Example :
79 | theta2 = np.array([0, 0, 0, 0]).reshape((-1, 1))
80 | gradient(x, y, theta2)
81 | # Output:
82 | array([[ -0.71428571], [  0.85714286], [23.28571429], [-26.42857143]])
83 | \end{minted}


--------------------------------------------------------------------------------
/module07/exercises/m07ex05.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 05}
 2 | \extitle{Multivariate Linear Regression with Class}
 3 | \turnindir{ex05}
 4 | \exnumber{05}
 5 | \exfiles{mylinearregression.py}
 6 | \exforbidden{sklearn}
 7 | \makeheaderfilesforbidden
 8 | 
 9 | % ================================= %
10 | \section*{Objective}
11 | % --------------------------------- %
12 | Upgrade your Linear Regression class so it can handle multivariate hypothesis.
13 | 
14 | % ================================= %
15 | \section*{Instructions}
16 | % --------------------------------- %
17 | You are expected to upgrade your own \texttt{MyLinearRegression} class from \textbf{Module01}.\\
18 | \newline
19 | You will upgrade (at least) the following methods to support multivariate linear regression:
20 | \begin{itemize}
21 |   \item \texttt{predict\_(self, x)}, 
22 |   \item \texttt{fit\_(self, x, y)}.
23 | \end{itemize}
24 | Depending on how you implement your methods, you might need to update other methods.
25 | 
26 | % ================================= %
27 | \section*{Examples}
28 | % --------------------------------- %
29 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
30 | import numpy as np
31 | from mylinearregression import MyLinearRegression as MyLR
32 | X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [34., 55., 89., 144.]])
33 | Y = np.array([[23.], [48.], [218.]])
34 | mylr = MyLR([[1.], [1.], [1.], [1.], [1]])
35 | 
36 | 
37 | # Example 0:
38 | y_hat = mylr.predict_(X)
39 | # Output:
40 | array([[8.], [48.], [323.]])
41 | 
42 | 
43 | # Example 1:
44 | mylr.loss_elem_(Y, y_hat)
45 | # Output:
46 | array([[225.], [0.], [11025.]])
47 | 
48 | 
49 | # Example 2:
50 | mylr.loss_(Y, y_hat)
51 | # Output:
52 | 1875.0
53 | 
54 | 
55 | # Example 3:
56 | mylr.alpha = 1.6e-4
57 | mylr.max_iter = 200000
58 | mylr.fit_(X, Y)
59 | mylr.thetas
60 | # Output:
61 | array([[18.188..], [2.767..], [-0.374..], [1.392..], [0.017..]])
62 | 
63 | 
64 | # Example 4:
65 | y_hat = mylr.predict_(X)
66 | # Output:
67 | array([[23.417..], [47.489..], [218.065...]])
68 | 
69 | 
70 | # Example 5:
71 | mylr.loss_elem_(Y, y_hat)
72 | # Output:
73 | array([[0.174..], [0.260..], [0.004..]])
74 | 
75 | 
76 | # Example 6:
77 | mylr.loss_(Y, y_hat)
78 | # Output:
79 | 0.0732..
80 | \end{minted}


--------------------------------------------------------------------------------
/module07/exercises/m07ex07.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 07}
 2 | \extitle{Polynomial models}
 3 | \input{exercises/en.ex07_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex07}
 6 | \exnumber{07}
 7 | \exfiles{polynomial\_model.py}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | 
12 | % ================================= %
13 | \section*{Objective}
14 | % --------------------------------- %
15 | Broaden your comprehension of the concept of hypothesis.\\
16 | \newline
17 | Create a function that takes a vector $x$ of dimension $m$ and an integer $n$ as input, and returns a matrix of dimensions $(m \times n)$.
18 | Each column of the matrix contains $x$ raised to the power of $j$, for $j = 1, 2, ..., n$:
19 | 
20 | $$
21 | \begin{matrix}
22 | x &|& x^2 &|& x^3 &|& \ldots &|& x^n
23 | \end{matrix}
24 | $$
25 | Such a matrix is called a \textbf{Vandermonde matrix}.
26 | 
27 | % ================================= %
28 | \section*{Instructions}
29 | % --------------------------------- %
30 | In the \texttt{polynomial\_model.py} file, create the following function as per the instructions given below:\\
31 | \\
32 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
33 | def add_polynomial_features(x, power):
34 |     """Add polynomial features to vector x by raising its values up to the power given in argument.  
35 |     Args:
36 |       x: has to be an numpy.array, a vector of dimension m * 1.
37 |       power: has to be an int, the power up to which the components of vector x are going to be raised.
38 |     Return:
39 |       The matrix of polynomial features as a numpy.array, of dimension m * n,
40 |         containing the polynomial feature values for all training examples.
41 |       None if x is an empty numpy.array.
42 |       None if x or power is not of expected type.
43 |     Raises:
44 |       This function should not raise any Exception.
45 |     """
46 |     ... Your code ...
47 | \end{minted}
48 | 
49 | % ================================= %
50 | \section*{Examples}
51 | % --------------------------------- %
52 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
53 | import numpy as np
54 | x = np.arange(1,6).reshape(-1, 1)
55 | 
56 | 
57 | # Example 0:
58 | add_polynomial_features(x, 3)
59 | # Output:
60 | array([[  1,   1,   1],
61 |        [  2,   4,   8],
62 |        [  3,   9,  27],
63 |        [  4,  16,  64],
64 |        [  5,  25, 125]])
65 | 
66 | 
67 | # Example 1:
68 | add_polynomial_features(x, 6)
69 | # Output:
70 | array([[    1,     1,     1,     1,     1,     1],
71 |        [    2,     4,     8,    16,    32,    64],
72 |        [    3,     9,    27,    81,   243,   729],
73 |        [    4,    16,    64,   256,  1024,  4096],
74 |        [    5,    25,   125,   625,  3125, 15625]])
75 | \end{minted}


--------------------------------------------------------------------------------
/module07/exercises/m07ex08.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 08}
 2 | \extitle{Let's Train Polynomial Models!}
 3 | \input{exercises/en.ex08_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex08}
 6 | \exnumber{08}
 7 | \exfiles{polynomial\_train.py}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | 
12 | % ================================= %
13 | \section*{Objective}
14 | % --------------------------------- %
15 | Manipulation of polynomial hypothesis.\\
16 | \newline
17 | It's training time! Let's train some polynomial models, and see if those with higher polynomial degree perform better!\\
18 | \\
19 | Write a program which:
20 | \begin{itemize}
21 |   \item Reads and loads \texttt{are\_blue\_pills\_magics.csv} dataset
22 |   \item Trains \textbf{six} separate Linear Regression models with polynomial hypothesis with degrees ranging from 1 to 6
23 |   \item Evaluates and prints evaluation score (MSE) of each of the six models
24 |   \item Plots a bar plot showing the MSE score of the models in function of the polynomial degree of the hypothesis
25 |   \item Plots the 6 models and the data points on the same figure
26 |         Use lineplot style for the models and scatterplot for the data points
27 |         Add more prediction points to have smooth curves for the models
28 | \end{itemize}
29 | You will use \texttt{Micrograms} as feature and \texttt{Score} as target.\\
30 | \\
31 | The implementation of the method \texttt{fit\_} based on the simple gradient descent lacks of efficiency and sturdiness,
32 | which will lead to the impossibility of converging for polynomial models with high degree or with features having several orders of magnitude of difference.
33 | See the starting values below for some thetas to help you get acceptable parameters values for your models.\\
34 | \\
35 | \hint{
36 |   According to evaluation score only, what is the best hypothesis (or model) between the trained models?
37 |   According to the last plot, why is it not true?
38 |   Which phenomenon do you observe here?
39 | }
40 | 
41 | \newpage
42 | % ================================= %
43 | \subsection*{Starting points}
44 | % --------------------------------- %
45 | You will not be able to get acceptable parameters for models 4, 5 and 6.
46 | Thus you can start the fit process for those models with:\\
47 | \newline
48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
49 | theta4 = np.array([[-20],[ 160],[ -80],[ 10],[ -1]]).reshape(-1,1)
50 | theta5 = np.array([[1140],[ -1850],[ 1110],[ -305],[ 40],[ -2]]).reshape(-1,1)
51 | theta6 = np.array([[9110],[ -18015],[ 13400],[ -4935],[ 966],[ -96.4],[ 3.86]]).reshape(-1,1)
52 | \end{minted}
53 | 
54 | % ================================= %
55 | \subsection*{Teminology Note}
56 | % --------------------------------- %
57 | The \textbf{degree} of a polynomial expression is its highest exponent.  
58 | E.g.: The polynomial degree of $5x^3 - x^6 + 2 x^2$ is $6$.\\
59 | \\
60 | In this equation, you don't see any terms with $x$, $x^4$ and $x^5$,but we can still say they exist. It's just that their coefficient is $0$.
61 | This means that a polynomial linear regression model can lower the impact of any term by bringing its corresponding $\theta_j$ closer to $0$.
62 | 
63 | % ================================= %
64 | \subsection*{Remark}
65 | % --------------------------------- %
66 | When you are evaluated, it will be wise to run your program at the beginning of the evaluation as it can take several minutes to train the models.
67 | 


--------------------------------------------------------------------------------
/module07/exercises/m07ex10.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 10}
 2 | \extitle{Machine Learning for Grown-ups: Trantor guacamole business}
 3 | \turnindir{ex10}
 4 | \exnumber{10}
 5 | \exfiles{space\_avocado.py, benchmark\_train.py,  models.[csv/yml/pickle]}
 6 | \exforbidden{sklearn}
 7 | \makeheaderfilesforbidden
 8 | 
 9 | % ================================= %
10 | \section*{Objective}
11 | % --------------------------------- %
12 | Let's do Machine Learning for "real"!
13 | 
14 | % ================================= %
15 | \section*{Introduction}
16 | % --------------------------------- %
17 | The dataset is made of 5 columns:
18 | \begin{itemize}
19 |   \item \textbf{index}: not relevant
20 |   \item \textbf{weight}: the avocado weight order (in tons)
21 |   \item \textbf{prod\_distance}: distance from where the avocado ordered is produced (in Mkm)
22 |   \item \textbf{time\_delivery}: time between the order and the receipt (in days)
23 |   \item \textbf{target}: price of the order (in trantorian unit)
24 | \end{itemize}
25 | It contains the data of all the avocado purchases made by Trantor administration (guacamole is a serious business there).
26 | \newpage
27 | % ================================= %
28 | \section*{Instructions}
29 | % --------------------------------- %
30 | You have to explore different models and select the best you find.\\
31 | \newline
32 | To do this:
33 | \begin{itemize}
34 |   \item Split your \texttt{space\_avocado.csv} dataset into a training and a test set.
35 |   \item Use your \texttt{polynomial\_features} method on your training set.
36 |   \item Consider several Linear Regression models with polynomial hypothesis with a maximum degree of 4.
37 |   \item Evaluate your models on the test set.
38 | \end{itemize}
39 | 
40 | According to your model evaluations, what is the best hypothesis you can get?
41 | \begin{itemize}
42 |   \item Plot the evaluation curve which help you to select the best model (evaluation metrics vs models).
43 |   \item Plot the true price and the predicted price obtained via your best model (3D representation or 3 scatterplots).
44 | \end{itemize}
45 | 
46 | The training of all your models can take a long time.\\
47 | \\
48 | Thus you need to train only the best one during the correction.\\
49 | \\
50 | But, you should return in \texttt{benchmark\_train.py} the program which performs the training of all the models and save the parameters of the different models into a file.\\
51 | \\
52 | In \texttt{models.[csv/yml/pickle]} one must find the parameters of all the models you have explored and trained.\\
53 | \\
54 | In \texttt{space\_avocado.py} train the model based on the best hypothesis you find and load the other models from \texttt{models.[csv/yml/pickle]}.\\
55 | \newline
56 | Then evaluate and plot the different graphics as asked before.


--------------------------------------------------------------------------------
/module07/useful_resources.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Section usefull ressources                            %
 4 | %                          for ML Modules                                      %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | 
 9 | \chapter*{Notions covered and learning resources}
10 | 
11 | \section*{What notions will be covered by this module?}
12 | 
13 | \begin{itemize}
14 |     \item Multivariate linear hypothesis
15 |     \item Multivariate linear gradient descent
16 |     \item Polynomial models
17 |     \item Training set, test set, overfitting
18 | \end{itemize}
19 | 
20 | \section*{Useful Resources}
21 | 
22 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\
23 | \newline
24 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\
25 | \newline
26 | The following sections of the course are particularly relevant to today's exercises: 
27 | 
28 | \subsection*{Week 2: Regression with multiple input variables}
29 | 
30 | \subsubsection*{Multiple linear regression}
31 | \begin{itemize}
32 |   \item Multiple features
33 |   \item Gradient descent for multiple linear regression
34 | \end{itemize}
35 | 
36 | \subsubsection*{Gradient descent in practice}
37 | \begin{itemize}
38 |   \item Feature scaling part 1
39 |   \item Feature scaling part 2
40 |   \item Checking gradient descent for convergence
41 |   \item Choosing the learning rate
42 |   \item Feature engineering
43 |   \item Polynomial regression
44 | \end{itemize}
45 | \emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist}, videos 21 and from 24 to 30}


--------------------------------------------------------------------------------
/module08/Makefile:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = en.subject.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = exercises/en.ex00_interlude.tex \
 6 | 	exercises/en.ex01_interlude.tex \
 7 | 	exercises/en.ex02_interlude.tex \
 8 | 	exercises/en.ex03_interlude.tex \
 9 | 	exercises/en.ex04_interlude.tex \
10 | 	exercises/en.ex05_interlude.tex \
11 | 	exercises/en.ex08_interlude.tex \
12 | 	../resources/latex/redefinition-commands.tex \
13 | 	../resources/42ai_bootcamps/en.instructions.tex \
14 | 	../resources/42ai_bootcamps/en.acknowledgements.tex \
15 | 	useful_resources.tex
16 | 
17 | # Relative path to the LaTeX documentclass setup files
18 | # Adapt as needed
19 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/
20 | # RELPATH for github actions:
21 | RELPATH = $(shell dirname `pwd`)/resources/latex/
22 | 
23 | # You should not touch this either
24 | include $(RELPATH)/Makefile.LaTeX
25 | 


--------------------------------------------------------------------------------
/module08/assets/-log_1-x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/-log_1-x.png


--------------------------------------------------------------------------------
/module08/assets/-log_x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/-log_x.png


--------------------------------------------------------------------------------
/module08/assets/42ai_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/42ai_logo.pdf


--------------------------------------------------------------------------------
/module08/assets/Default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Default.png


--------------------------------------------------------------------------------
/module08/assets/Evaluate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Evaluate.png


--------------------------------------------------------------------------------
/module08/assets/Improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Improve.png


--------------------------------------------------------------------------------
/module08/assets/Predict.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/Predict.png


--------------------------------------------------------------------------------
/module08/assets/figure1_3Dplot_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/figure1_3Dplot_dataset.png


--------------------------------------------------------------------------------
/module08/assets/log_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/log_loss.png


--------------------------------------------------------------------------------
/module08/assets/sigmoid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module08/assets/sigmoid.png


--------------------------------------------------------------------------------
/module08/exercises/en.ex01_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================== %
 9 | \section*{Interlude}
10 | % =============================== %
11 | \subsection*{Predict II : Hypothesis}
12 | % ------------------------------- %
13 | 
14 | We hope your curiosity led you to plot your sigmoid function.
15 | If you didn't, well here is what it looks like:
16 | 
17 | \begin{figure}[!h]
18 |     \centering
19 |     \includegraphics[scale=0.55]{assets/sigmoid.png}
20 |     \caption{Sigmoid}
21 | \end{figure}
22 | As you can see, \textbf{the sigmoid's output values range from $0$ to $1$}.\\
23 | \\
24 | You can input real numbers as big as you want (positive or negative), the output 
25 | will always land within this range.
26 | This will be very helpful and convenient for the next part.
27 | 
28 | \newpage
29 | 
30 | % =============================== %
31 | \subsection*{Logistic Hypothesis}
32 | % ------------------------------- %
33 | 
34 | Now you've written your sigmoid function, let's take a look at \textbf{the logistic regression
35 |  hypothesis}.
36 | 
37 | $$
38 | \begin{matrix}
39 | \hat{y}^{(i)} & = & h_\theta(x^{(i)}) & = & \text{sigmoid}(\theta \cdot x'^{(i)}) 
40 | & =  &\cfrac{1} {1 + e^{-\theta \cdot x'^{(i)}}} & &\text{ for i = 1, \dots, m}    
41 | \end{matrix}
42 | $$
43 | \textbf{This is simply the sigmoid function applied on top 
44 | of the linear regression hypothesis!!}\\
45 | \\
46 | It can be vectorized as:
47 | \\
48 | $$
49 | \begin{matrix}
50 | \hat{y} & = & h_\theta(X) & = & \text{sigmoid}(X'\theta) & =  &\cfrac{1} {1 + e^{-X'\theta}}    
51 | \end{matrix}
52 | $$
53 | As we said before: the \textbf{sigmoid function} is just a way 
54 | to \textbf{map the result of a linear equation onto a $[0,1]$ value range}.\\
55 | \\
56 | This transformation allows us to interpret the result 
57 | as a \textbf{probability that an individual or observation belongs to of a given class}.\\


--------------------------------------------------------------------------------
/module08/exercises/en.ex04_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================================== %
 9 | \section*{Interlude}
10 | % =============================================== %
11 | \subsection*{Improve}
12 | % ----------------------------------------------- %
13 | 
14 | \begin{figure}[!h]
15 |     \centering
16 |     \includegraphics[scale=0.25]{assets/Improve.png}
17 |     %\caption{The Learning Cycle: Improve}
18 | \end{figure}
19 | \noindent{Now we want to improve the algorithm's 
20 | performance, or in other words, reduce the loss of its predictions.}\\
21 | \\
22 | This brings us (again) to calculating the gradient, which will tell us by
23 | how much and in which direction the theta parameters belonging to the model should be adjusted.
24 | 
25 | \newpage
26 | % =============================================== %
27 | \subsection*{The logistic gradient}
28 | % ----------------------------------------------- %
29 | If you remember, to calculate the gradient, we start with the loss function and we derive it 
30 | with respect to each of the theta parameters.\\
31 | \\
32 | If you know multivariate calculus already, you can try it for yourself, otherwise we've got you covered:\\
33 | 
34 | $$
35 | \begin{matrix}
36 | \nabla(J)_0 &  = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\
37 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n}    
38 | \end{matrix}
39 | $$
40 | Where:
41 | \begin{itemize}
42 |     \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector
43 |     \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, 
44 |     the partial derivative of $J$ with respect to $\theta_j$
45 |     \item $y$ is a vector of dimension $m$, the vector of expected values
46 |     \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$
47 |     \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example
48 |     \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example
49 |     \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$\\
50 | \end{itemize}
51 | This formula should be very familiar to you, as it's the same one you used to calculate the linear regression gradient!\\
52 | \\
53 | The only difference is that $h_{\theta}(x^{(i)})$ corresponds to \textbf{the logistic regression hypothesis instead of the linear regression hypothesis}.\\
54 | \\
55 | In other words:\\
56 | $$
57 | h_{\theta}(x^{(i)}) = \text{sigmoid}( \theta \cdot x'^{(i)}) = \cfrac{1} {1 + e^{-\theta \cdot x'^{(i)}}}
58 | $$
59 | \\
60 | Instead of:
61 | \\
62 | $$
63 | \cancel{h_{\theta}(x^{(i)}) = \theta \cdot x'^{(i)}}
64 | $$
65 | 


--------------------------------------------------------------------------------
/module08/exercises/en.ex05_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{Interlude}
10 | % ============================================== %
11 | \subsection*{Vectorized Logistic Gradient}
12 | % ---------------------------------------------- %
13 | 
14 | Given the previous logistic gradient formula, it's quite easy to produce a vectorized version of it.
15 | Actually, you almost already implemented it on module02!\\
16 | \\
17 | As with the previous exercise, \textbf{the only thing you have to change is your hypothesis} 
18 | in order to calculate your logistic gradient.\\
19 | 
20 | $$
21 | \begin{matrix}
22 | \nabla(J)_0 &  = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\
23 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n}    
24 | \end{matrix}
25 | $$
26 | 
27 | % ============================================== %
28 | \subsection*{Vectorized Version}
29 | % ---------------------------------------------- %
30 | 
31 | Can be vectorized the same way you did before:
32 | 
33 | $$
34 | \nabla(J) = \cfrac{1}{m} X'^T(h_\theta(X) - y)
35 | $$  
36 | 


--------------------------------------------------------------------------------
/module08/exercises/m08ex00.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 00}
 2 | \input{exercises/en.ex00_interlude.tex}
 3 | \newpage
 4 | \extitle{Sigmoid}
 5 | \turnindir{ex00}
 6 | \exnumber{00}
 7 | \exfiles{sigmoid.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================== %
12 | \section*{Objective}
13 | % ---------------------------------- %
14 | Introduction to the hypothesis in the context of logistic regression.\\
15 | \\
16 | You must implement the sigmoid function, given by the following formula:
17 | 
18 | $$
19 | \text{sigmoid}(x) = \cfrac{1} {1 + e^{-x}}
20 | $$
21 | Where:
22 | \begin{itemize}
23 |   \item $x$ is a scalar or a vector,
24 |   \item $e$ is the contracted form for the exponential function. It is also a mathematical constant, named Euler's number.
25 | \end{itemize}
26 | This function is also known as \textbf{Standard logistic sigmoid function}.
27 | This explains the name \textit{logistic regression}.\\
28 | \\
29 | The sigmoid function transforms an input into a probability value, i.e. a value between 0 and 1.  
30 | This probability value will then be used to classify the inputs.
31 | \\
32 | % ================================== %
33 | \section*{Instructions}
34 | % ---------------------------------- %
35 | In the \texttt{sigmoid.py} file, write the following function as per the instructions below:\\
36 | \\
37 | \par
38 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
39 | def sigmoid_(x):
40 |     """
41 |     Compute the sigmoid of a vector.
42 |     Args:
43 |         x: has to be a numpy.ndarray of shape (m, 1).
44 |     Returns: 
45 |         The sigmoid value as a numpy.ndarray of shape (m, 1).
46 |         None if x is an empty numpy.ndarray.
47 |     Raises:
48 |         This function should not raise any Exception.
49 |     """
50 |     ... Your code ...
51 | \end{minted}
52 | 
53 | % ================================== %
54 | \section*{Examples}
55 | % ---------------------------------- %
56 | 
57 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
58 | # Example 1:
59 | x = np.array([[-4]])
60 | sigmoid_(x)
61 | # Output:
62 | array([[0.01798620996209156]])
63 | 
64 | # Example 2:
65 | x = np.array([[2]])
66 | sigmoid_(x)
67 | # Output:
68 | array([[0.8807970779778823]])
69 | 
70 | # Example 3:
71 | x = np.array([[-4], [2], [0]])
72 | sigmoid_(x)
73 | # Output:
74 | array([[0.01798620996209156], [0.8807970779778823], [0.5]])
75 | \end{minted}
76 | 
77 | 
78 | \info{
79 |   Our sigmoid formula is a special case of the logistic function below, with $L = 1$, $k = 1$ and $x_0 = 0$:
80 |   $$
81 |   f(x) = \cfrac{L}{1 + e^{-k(x-x_0)}}
82 |   $$
83 | }


--------------------------------------------------------------------------------
/module08/exercises/m08ex01.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 01}
 2 | \input{exercises/en.ex01_interlude.tex}
 3 | \newpage
 4 | \extitle{Logistic Hypothesis}
 5 | \turnindir{ex01}
 6 | \exnumber{01}
 7 | \exfiles{log\_pred.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Introduction to the hypothesis notion in the context of logistic regression.\\
15 | \\
16 | You must implement the following formula as a function:\\
17 | 
18 | $$
19 | \begin{matrix}
20 | \hat{y} & = & \text{sigmoid}(X' \cdot \theta) & = & \cfrac{1} {1 + e^{-X' \cdot \theta}}    
21 | \end{matrix}
22 | $$
23 | Where:
24 | \begin{itemize}
25 |   \item $X$ is a matrix of dimensions $(m \times n)$, the design matrix
26 |   \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, 
27 |   the design matrix onto which a column of $1$'s is added as a first column
28 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
29 |   \item $\theta$ is a vector of dimension $(n + 1)$, the vector of parameters
30 | \end{itemize}
31 | Be careful: 
32 | \begin{itemize}
33 |   \item the $x$ your function will get as an input corresponds to $X$, the $(m \times n)$ matrix.
34 |         Not $X'$.
35 |   \item $\theta$ is a vector of dimension $(n + 1)$
36 | \end{itemize}
37 | \newpage
38 | % ================================= %
39 | \section*{Instructions}
40 | % --------------------------------- %
41 | In the \texttt{log\_pred.py} file, write the following function as per the instructions below:\\
42 | \par
43 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
44 | def logistic_predict_(x, theta):
45 |     """Computes the vector of prediction y_hat from two non-empty numpy.ndarray.
46 |     Args:
47 |       x: has to be an numpy.ndarray, a vector of dimension m * n.
48 |       theta: has to be an numpy.ndarray, a vector of dimension (n + 1) * 1.
49 |     Returns:
50 |       y_hat as a numpy.ndarray, a vector of dimension m * 1.
51 |       None if x or theta are empty numpy.ndarray.
52 |       None if x or theta dimensions are not appropriate.
53 |     Raises:
54 |       This function should not raise any Exception.
55 |     """
56 |     ... Your code ...
57 | \end{minted}
58 | 
59 | % ================================= %
60 | \section*{Examples}
61 | % --------------------------------- %
62 | 
63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
64 | # Example 1
65 | x = np.array([4]).reshape((-1, 1))
66 | theta = np.array([[2], [0.5]])
67 | logistic_predict_(x, theta)
68 | # Output: 
69 | array([[0.98201379]])
70 | 
71 | # Example 1
72 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]])
73 | theta2 = np.array([[2], [0.5]]) 
74 | logistic_predict_(x2, theta2)
75 | # Output: 
76 | array([[0.98201379],
77 |        [0.99624161],
78 |        [0.97340301],
79 |        [0.99875204],
80 |        [0.90720705]])
81 | 
82 | # Example 3
83 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]])
84 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
85 | logistic_predict_(x3, theta3)
86 | # Output: 
87 | array([[0.03916572],
88 |        [0.00045262],
89 |        [0.2890505 ]])
90 | \end{minted}


--------------------------------------------------------------------------------
/module08/exercises/m08ex02.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 02}
 2 | \input{exercises/en.ex02_interlude.tex}
 3 | \newpage
 4 | \extitle{Logistic Loss Function}
 5 | \turnindir{ex02}
 6 | \exnumber{02}
 7 | \exfiles{log\_loss.py}
 8 | \exforbidden{None}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understanding and manipulation of the loss function in the context of logistic regression.\\
15 | \\
16 | You must implement the following formula as a function:  
17 | 
18 | $$
19 | J( \theta) = -\cfrac{1} {m} \lbrack \sum_{i = 1}^{m} y^{(i)}\log(\hat{y}^{(i)})) + (1 - y^{(i)})\log(1 - \hat{y}^{(i)})\rbrack
20 | $$
21 | Where:
22 | \begin{itemize}
23 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
24 |   \item $\hat{y}^{(i)}$ is the $i^{th}$ component of the $\hat{y}$ vector
25 |   \item $y$ is a vector of dimension $m$, the vector of expected values
26 |   \item $y^{(i)}$ is the $i^{th}$ component of the $y$ vector
27 | \end{itemize}
28 | 
29 | % ================================= %
30 | \section*{Instructions}
31 | % --------------------------------- %
32 | In the \texttt{log\_loss.py} file, write the following function as per the instructions below:
33 | \\
34 | \par
35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
36 | def log_loss_(y, y_hat, eps=1e-15):
37 |     """
38 |     Computes the logistic loss value.
39 |     Args:
40 |         y: has to be an numpy.ndarray, a vector of shape m * 1.
41 |         y_hat: has to be an numpy.ndarray, a vector of shape m * 1.
42 |         eps: has to be a float, epsilon (default=1e-15)
43 |     Returns:
44 |         The logistic loss value as a float.
45 |         None on any error.
46 |     Raises:
47 |         This function should not raise any Exception.
48 |     """
49 |     ... Your code ...
50 | \end{minted}
51 | 
52 | \hint{
53 |   The logarithmic function isn't defined in $0$.
54 |   This means that if $y^{(i)} = 0$ you will get an error when you try to compute $log(y^{(i)})$.
55 |   The purpose of the \texttt{eps} argument is to avoid $log(0)$ errors.
56 |   It is a very small residual value we add to \texttt{y}, also referred to as `epsilon`.
57 | }
58 | 
59 | % ================================= %
60 | \section*{Examples}
61 | % --------------------------------- %
62 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
63 | # Example 1:
64 | y1 = np.array([1]).reshape((-1, 1))
65 | x1 = np.array([4]).reshape((-1, 1))
66 | theta1 = np.array([[2], [0.5]])
67 | y_hat1 = logistic_predict_(x1, theta1)
68 | log_loss_(y1, y_hat1)
69 | # Output:
70 | 0.01814992791780973
71 | 
72 | # Example 2:
73 | y2 = np.array([[1], [0], [1], [0], [1]])
74 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]])
75 | theta2 = np.array([[2], [0.5]])
76 | y_hat2 = logistic_predict_(x2, theta2)
77 | log_loss_(y2, y_hat2)
78 | # Output:
79 | 2.4825011602474483
80 | 
81 | # Example 3:
82 | y3 = np.array([[0], [1], [1]])
83 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]])
84 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
85 | y_hat3 = logistic_predict_(x3, theta3)
86 | log_loss_(y3, y_hat3)
87 | # Output:
88 | 2.9938533108607053
89 | \end{minted}
90 | 
91 | \info{
92 |   This function is called \textbf{Cross-Entropy loss}, or \textbf{logistic loss}.
93 |   For more information you can look at \href{https://en.wikipedia.org/wiki/Cross_entropy\#Cross-entropy\_error\_function\_and\_logistic\_regression}{this section}
94 |   of the Cross entropy Wikipedia article.
95 | }


--------------------------------------------------------------------------------
/module08/exercises/m08ex03.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 03}
 2 | \extitle{Vectorized Logistic Loss Function}
 3 | \input{exercises/en.ex03_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex03}
 6 | \exnumber{03}
 7 | \exfiles{vec\_log\_loss.py}
 8 | \exforbidden{any function that calculates the derivatives for you}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understanding and manipulation of loss function in the context of logistic regression.\\
15 | \\
16 | You must implement the following formula as a function:  
17 | 
18 | $$
19 | J( \theta) = -\cfrac{1} {m} \lbrack y \cdot \log(\hat{y}) + (\vec{1} - y) \cdot \log(\vec{1} - \hat{y})\rbrack
20 | $$
21 | \\
22 | Where:
23 | \begin{itemize}
24 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
25 |   \item $y$ is a vector of dimension $m$, the vector of expected values
26 |   \item $\vec{1}$ is a vector of dimension $m$, a vector full of 1's
27 | \end{itemize}
28 | 
29 | 
30 | % ================================= %
31 | \section*{Instructions}
32 | % --------------------------------- %
33 | In the \texttt{vec\_log\_loss.py} file, write the following function as per the instructions below:\\
34 | \\
35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
36 | def vec_log_loss_(y, y_hat, eps=1e-15):
37 |     """
38 |     Computes the logistic loss value.
39 |     Args:
40 |         y: has to be an numpy.ndarray, a vector of shape m * 1.
41 |         y_hat: has to be an numpy.ndarray, a vector of shape m * 1.
42 |         eps: epsilon (default=1e-15)
43 |     Returns:
44 |         The logistic loss value as a float.
45 |         None on any error.
46 |     Raises:
47 |         This function should not raise any Exception.
48 |     """
49 | \end{minted}
50 | 
51 | \hint{
52 |   The purpose of epsilon (eps) is to avoid $log(0)$ errors, it is a very small residual value we add to y.
53 | }
54 | 
55 | % ================================= %
56 | \section*{Examples}
57 | % --------------------------------- %
58 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
59 | # Example 1:
60 | y1 = np.array([1]).reshape((-1, 1))
61 | x1 = np.array([4]).reshape((-1, 1))
62 | theta1 = np.array([[2], [0.5]])
63 | y_hat1 = logistic_predict_(x1, theta1)
64 | vec_log_loss_(y1, y_hat1)
65 | # Output:
66 | 0.018149927917808714
67 | 
68 | # Example 2:
69 | y2 = np.array([[1], [0], [1], [0], [1]])
70 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]])
71 | theta2 = np.array([[2], [0.5]])
72 | y_hat2 = logistic_predict_(x2, theta2)
73 | vec_log_loss_(y2, y_hat2)
74 | # Output:
75 | 2.4825011602472347
76 | 
77 | # Example 3:
78 | y3 = np.array([[0], [1], [1]])
79 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]])
80 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
81 | y_hat3 = logistic_predict_(x3, theta3)
82 | vec_log_loss_(y3, y_hat3)
83 | # Output:
84 | 2.993853310859968
85 | \end{minted}


--------------------------------------------------------------------------------
/module08/exercises/m08ex04.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 04}
  2 | \extitle{Logistic Gradient}
  3 | \input{exercises/en.ex04_interlude.tex}
  4 | \newpage
  5 | \turnindir{ex04}
  6 | \exnumber{04}
  7 | \exfiles{log\_gradient.py}
  8 | \exforbidden{any function that performs derivatives for you}
  9 | \makeheaderfilesforbidden
 10 | 
 11 | 
 12 | % ================================= %
 13 | \section*{Objective}
 14 | % --------------------------------- %
 15 | Understand and manipulate the concept of gradient in the context of logistic formulation.\\
 16 | \\
 17 | You must implement the following formula as a function:  
 18 | 
 19 | $$
 20 | \begin{matrix}
 21 | \nabla(J)_0 &  = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)}) & \\
 22 | \nabla(J)_j & = &\cfrac{1}{m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)}) - y^{(i)})x_{j}^{(i)} & \text{ for j = 1, ..., n}    
 23 | \end{matrix}
 24 | $$
 25 | \\
 26 | Where:
 27 | \begin{itemize}
 28 |   \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector
 29 |   \item $\nabla(J)_j$ is the j$^\text{th}$ component of $\nabla(J)$, the partial derivative of $J$ with respect to $\theta_j$
 30 |   \item $y$ is a vector of dimension $m$, the vector of expected values
 31 |   \item $y^{(i)}$ is a scalar, the i$^\text{th}$ component of vector $y$
 32 |   \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ example
 33 |   \item $x^{(i)}_j$ is a scalar, the j$^\text{th}$ feature value of the i$^\text{th}$ example
 34 |   \item $h_{\theta}(x^{(i)})$ is a scalar, the model's estimation of $y^{(i)}$
 35 | \end{itemize}
 36 | \bigskip
 37 | \noindent{Remember that with logistic regression, the hypothesis is slightly different:}\\
 38 | $$
 39 | h_{\theta}(x^{(i)}) = sigmoid( \theta \cdot x'^{(i)})
 40 | $$
 41 | \newpage
 42 | 
 43 | % ================================= %
 44 | \section*{Instructions}
 45 | % --------------------------------- %
 46 | In the \texttt{log\_gradient.py} file, write the following function as per the instructions below:\\
 47 | 
 48 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 49 | def log_gradient(x, y, theta):
 50 |     """Computes a gradient vector from three non-empty numpy.ndarray, with a for-loop. The three arrays must have compatible dimensions.
 51 |     Args:
 52 |       x: has to be an numpy.ndarray, a matrix of shape m * n.
 53 |       y: has to be an numpy.ndarray, a vector of shape m * 1.
 54 |       theta: has to be an numpy.ndarray, a vector of shape (n + 1) * 1.
 55 |     Returns:
 56 |       The gradient as a numpy.ndarray, a vector of shape n * 1, containing the result of the formula for all j.
 57 |       None if x, y, or theta are empty numpy.ndarray.
 58 |       None if x, y and theta do not have compatible dimensions.
 59 |     Raises:
 60 |       This function should not raise any Exception.
 61 |     """
 62 |     ... Your code ...
 63 | \end{minted}
 64 | 
 65 | % ================================= %
 66 | \section*{Examples}
 67 | % ================================= %
 68 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 69 | # Example 1:
 70 | y1 = np.array([1]).reshape((-1, 1))
 71 | x1 = np.array([4]).reshape((-1, 1))
 72 | theta1 = np.array([[2], [0.5]])
 73 | 
 74 | log_gradient(x1, y1, theta1)
 75 | # Output:
 76 | array([[-0.01798621],
 77 |        [-0.07194484]])
 78 | 
 79 | # Example 2: 
 80 | y2 = np.array([[1], [0], [1], [0], [1]])
 81 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]])
 82 | theta2 = np.array([[2], [0.5]])
 83 | 
 84 | log_gradient(x2, y2, theta2)
 85 | # Output:
 86 | array([[0.3715235 ],
 87 |        [3.25647547]])
 88 | 
 89 | # Example 3: 
 90 | y3 = np.array([[0], [1], [1]])
 91 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]])
 92 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
 93 | 
 94 | log_gradient(x3, y3, theta3)
 95 | # Output:
 96 | array([[-0.55711039],
 97 |        [-0.90334809],
 98 |        [-2.01756886],
 99 |        [-2.10071291],
100 |        [-3.27257351]])
101 | \end{minted}


--------------------------------------------------------------------------------
/module08/exercises/m08ex05.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 05}
 2 | \extitle{Vectorized Logistic Gradient}
 3 | \input{exercises/en.ex05_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex05}
 6 | \exnumber{05}
 7 | \exfiles{vec\_log\_gradient.py}
 8 | \exforbidden{any function that performs derivatives for you}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Understand and manipulate the gradient in the context of logistic formulation.\\
15 | \\
16 | You must implement the following formula as a function:
17 | 
18 | $$
19 | \nabla(J) = \cfrac{1}{m} X'^T(h_\theta(X) - y)
20 | $$  
21 | \\
22 | Where:  
23 | \begin{itemize}
24 |   \item $\nabla(J)$ is the gradient vector of dimension $(n + 1)$
25 |   \item $X'$ is a matrix of dimensions $(m \times (n + 1))$, the design matrix onto which a column of ones was added as the first column
26 |   \item $X'^T$ means the matrix has been transposed
27 |   \item $h_\theta(X)$ is a vector of dimension $m$, the vector of predicted values
28 |   \item $y$ is a vector of dimension $m$, the vector of expected values
29 | \end{itemize}
30 | 
31 | 
32 | % ================================= %
33 | \section*{Instructions}
34 | % --------------------------------- %
35 | In the \texttt{vec\_log\_gradient.py} file, write the following function as per the instructions given below:
36 | \\
37 | \par
38 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
39 | def vec_log_gradient(x, y, theta):
40 |     """Computes a gradient vector from three non-empty numpy.ndarray, without any for-loop. The three arrays must have compatible shapes.
41 |     Args:
42 |       x: has to be an numpy.ndarray, a matrix of shape m * n.
43 |       y: has to be an numpy.ndarray, a vector of shape m * 1.
44 |       theta: has to be an numpy.ndarray, a vector (n +1) * 1.
45 |     Returns:
46 |       The gradient as a numpy.ndarray, a vector of shape n * 1, containg the result of the formula for all j.
47 |       None if x, y, or theta are empty numpy.ndarray.
48 |       None if x, y and theta do not have compatible shapes.
49 |     Raises:
50 |       This function should not raise any Exception.
51 |     """
52 |     ... Your code ...
53 | \end{minted}
54 | 
55 | 
56 | % ================================= %
57 | \section*{Examples}
58 | % --------------------------------- %
59 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
60 | # Example 1:
61 | y1 = np.array([1]).reshape((-1, 1))
62 | x1 = np.array([4]).reshape((-1, 1))
63 | theta1 = np.array([[2], [0.5]])
64 | 
65 | vec_log_gradient(x1, y1, theta1)
66 | # Output:
67 | array([[-0.01798621],
68 |        [-0.07194484]])
69 | 
70 | # Example 2: 
71 | y2 = np.array([[1], [0], [1], [0], [1]])
72 | x2 = np.array([[4], [7.16], [3.2], [9.37], [0.56]])
73 | theta2 = np.array([[2], [0.5]])
74 | 
75 | vec_log_gradient(x2, y2, theta2)
76 | # Output:
77 | array([[0.3715235 ],
78 |        [3.25647547]])
79 | 
80 | # Example 3: 
81 | y3 = np.array([[0], [1], [1]])
82 | x3 = np.array([[0, 2, 3, 4], [2, 4, 5, 5], [1, 3, 2, 7]])
83 | theta3 = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
84 | 
85 | vec_log_gradient(x3, y3, theta3)
86 | # Output:
87 | array([[-0.55711039],
88 |        [-0.90334809],
89 |        [-2.01756886],
90 |        [-2.10071291],
91 |        [-3.27257351]])
92 | \end{minted}


--------------------------------------------------------------------------------
/module08/exercises/m08ex06.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 06}
  2 | \extitle{Logistic Regression}
  3 | %\input{exercises/en.ex06_interlude.tex}
  4 | %\newpage
  5 | \turnindir{ex06}
  6 | \exnumber{06}
  7 | \exfiles{my\_logistic\_regression.py}
  8 | \exforbidden{sklearn}
  9 | \makeheaderfilesforbidden
 10 | 
 11 | % ================================= %
 12 | \section*{Objective}
 13 | % --------------------------------- %
 14 | The time to use everything you built so far has (finally) come!\\
 15 | \\
 16 | Demonstrate your knowledge by implementing a logistic regression classifier using 
 17 | the gradient descent algorithm.\\
 18 | \\
 19 | You must have seen the power of \texttt{numpy} for vectorized operations.
 20 |  Well let's make something more concrete with that.\\
 21 | \\
 22 | You may have taken a look at Scikit-Learn's implementation of logistic regression 
 23 | and noticed that the \textbf{sklearn.linear\_model.LogisticRegression} class 
 24 | offers a lot of options.\\
 25 | \\
 26 | The goal of this exercise is to make a simplified but nonetheless useful and powerful
 27 |  version, with fewer options.\\
 28 | \newpage
 29 | % ================================= %
 30 | \section*{Instructions}
 31 | % --------------------------------- %
 32 | In the \texttt{my\_logistic\_regression.py} file, write a \texttt{MyLogisticRegression} 
 33 | class as in the instructions given below:\\
 34 | 
 35 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 36 | class MyLogisticRegression():
 37 | 	"""
 38 | 	Description:
 39 | 		My personnal logistic regression to classify things.
 40 | 	"""
 41 |     def __init__(self, theta, alpha=0.001, max_iter=1000):
 42 |         self.alpha = alpha
 43 |         self.max_iter = max_iter
 44 |         self.theta = theta
 45 |         ... Your code here ...
 46 | 
 47 | 	... other methods ...
 48 | \end{minted}
 49 | \\
 50 | You will add at least the following methods:
 51 | \begin{itemize}
 52 |   \item \texttt{predict\_(self, x)}
 53 |   \item \texttt{loss\_elem\_(self, y, yhat)}
 54 |   \item \texttt{loss\_(self, y, yhat)}
 55 |   \item \texttt{fit\_(self, x, y)}
 56 | \end{itemize}
 57 | \hint{You have already written these functions, you will just need a 
 58 | few adjustments in order for them to work well within your \textbf{MyLogisticRegression} class.}
 59 | 
 60 | % ================================= %
 61 | \subsection*{Examples}
 62 | % --------------------------------- %
 63 | 
 64 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 65 | import numpy as np
 66 | from my_logistic_regression import MyLogisticRegression as MyLR
 67 | X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [3., 5., 9., 14.]])
 68 | Y = np.array([[1], [0], [1]])
 69 | thetas = np.array([[2], [0.5], [7.1], [-4.3], [2.09]])
 70 | mylr = MyLR(thetas)
 71 | 
 72 | # Example 0:
 73 | mylr.predict_(X)
 74 | # Output:
 75 | array([[0.99930437],
 76 |        [1.        ],
 77 |        [1.        ]])
 78 | 
 79 | # Example 1:
 80 | mylr.loss_(X,Y)
 81 | # Output:
 82 | 11.513157421577002
 83 | 
 84 | # Example 2:
 85 | mylr.fit_(X, Y)
 86 | mylr.theta
 87 | # Output:
 88 | array([[ 2.11826435]
 89 |        [ 0.10154334]
 90 |        [ 6.43942899]
 91 |        [-5.10817488]
 92 |        [ 0.6212541 ]])
 93 | 
 94 | # Example 3:
 95 | mylr.predict_(X)
 96 | # Output:
 97 | array([[0.57606717]
 98 |        [0.68599807]
 99 |        [0.06562156]])
100 | 
101 | # Example 4:
102 | mylr.loss_(X,Y)
103 | # Output:
104 | 1.4779126923052268
105 | \end{minted}


--------------------------------------------------------------------------------
/module08/useful_resources.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Section usefull ressources                            %
 4 | %                          for ML Modules                                      %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | 
 9 | \chapter*{Notions and ressources}
10 | 
11 | \section*{Notions of the module}
12 | \begin{itemize}
13 |   \item Logistic regression
14 |   \item Logistic hypothesis
15 |   \item Logistic gradient descent
16 |   \item Multiclass classification
17 |   \item Accuracy, precision, recall, F1-score
18 |   \item Confusion matrix
19 | \end{itemize}
20 |   
21 | \section*{Useful Resources}
22 | 
23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\
24 | \newline
25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\
26 | \newline
27 | The following sections of the course are particularly relevant to today's exercises: 
28 | 
29 | \subsection*{Week 3: Classification}
30 | 
31 | \subsubsection*{Classification with logistic regression}
32 | \begin{itemize}
33 |   \item Motivations
34 |   \item Logistic regression
35 |   \item Decision boundary
36 | \end{itemize}
37 | 
38 | \subsubsection*{Cost function for logistic regression}
39 | \begin{itemize}
40 |   \item Cost function for logistic regression
41 |   \item Simplified Cost Function for Logistic Regression
42 | \end{itemize}
43 | 
44 | \subsubsection*{Gradient descent for logistic regression}
45 | \begin{itemize}
46 |   \item Gradient Descent Implementation
47 | \end{itemize}
48 | \noindent{\emph{All videos above are available also on this \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}{Andrew Ng's YouTube playlist}, videos from 31 to 36.}}
49 | 


--------------------------------------------------------------------------------
/module09/Makefile:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = en.subject.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = exercises/en.ex01_interlude.tex \
 6 | 	exercises/en.ex04_interlude.tex \
 7 | 	exercises/en.ex06_interlude.tex \
 8 | 	exercises/en.ex08_interlude.tex \
 9 | 	exercises/en.ex10_interlude.tex \
10 | 	../resources/42ai_bootcamps/en.instructions.tex \
11 | 	../resources/42ai_bootcamps/en.acknowledgements.tex \
12 | 	useful_resources.tex
13 | 
14 | # Relative path to the LaTeX documentclass setup files
15 | # Adapt as needed
16 | # RELPATH = $(shell git rev-parse --show-toplevel)/resources/latex/
17 | # RELPATH for github actions:
18 | RELPATH = $(shell dirname `pwd`)/resources/latex/
19 | 
20 | # You should not touch this either
21 | include $(RELPATH)/Makefile.LaTeX
22 | 


--------------------------------------------------------------------------------
/module09/assets/42ai_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/42ai_logo.pdf


--------------------------------------------------------------------------------
/module09/assets/Evaluate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/Evaluate.png


--------------------------------------------------------------------------------
/module09/assets/Improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/assets/Improve.png


--------------------------------------------------------------------------------
/module09/attachments/solar_system_census_planets.csv:
--------------------------------------------------------------------------------
  1 | ,Origin
  2 | 0,1.0
  3 | 1,2.0
  4 | 2,3.0
  5 | 3,3.0
  6 | 4,0.0
  7 | 5,3.0
  8 | 6,2.0
  9 | 7,1.0
 10 | 8,1.0
 11 | 9,2.0
 12 | 10,1.0
 13 | 11,2.0
 14 | 12,3.0
 15 | 13,0.0
 16 | 14,3.0
 17 | 15,2.0
 18 | 16,3.0
 19 | 17,0.0
 20 | 18,0.0
 21 | 19,2.0
 22 | 20,1.0
 23 | 21,3.0
 24 | 22,1.0
 25 | 23,3.0
 26 | 24,0.0
 27 | 25,0.0
 28 | 26,0.0
 29 | 27,2.0
 30 | 28,1.0
 31 | 29,3.0
 32 | 30,1.0
 33 | 31,2.0
 34 | 32,2.0
 35 | 33,2.0
 36 | 34,1.0
 37 | 35,0.0
 38 | 36,0.0
 39 | 37,0.0
 40 | 38,0.0
 41 | 39,1.0
 42 | 40,1.0
 43 | 41,1.0
 44 | 42,0.0
 45 | 43,3.0
 46 | 44,2.0
 47 | 45,2.0
 48 | 46,2.0
 49 | 47,3.0
 50 | 48,1.0
 51 | 49,1.0
 52 | 50,3.0
 53 | 51,0.0
 54 | 52,0.0
 55 | 53,1.0
 56 | 54,3.0
 57 | 55,3.0
 58 | 56,3.0
 59 | 57,1.0
 60 | 58,1.0
 61 | 59,0.0
 62 | 60,1.0
 63 | 61,0.0
 64 | 62,1.0
 65 | 63,2.0
 66 | 64,2.0
 67 | 65,3.0
 68 | 66,1.0
 69 | 67,2.0
 70 | 68,2.0
 71 | 69,2.0
 72 | 70,0.0
 73 | 71,2.0
 74 | 72,2.0
 75 | 73,3.0
 76 | 74,2.0
 77 | 75,0.0
 78 | 76,1.0
 79 | 77,1.0
 80 | 78,3.0
 81 | 79,1.0
 82 | 80,2.0
 83 | 81,2.0
 84 | 82,2.0
 85 | 83,3.0
 86 | 84,3.0
 87 | 85,2.0
 88 | 86,3.0
 89 | 87,0.0
 90 | 88,2.0
 91 | 89,1.0
 92 | 90,3.0
 93 | 91,1.0
 94 | 92,3.0
 95 | 93,3.0
 96 | 94,0.0
 97 | 95,1.0
 98 | 96,0.0
 99 | 97,0.0
100 | 98,0.0
101 | 99,3.0
102 | 100,3.0
103 | 101,0.0
104 | 102,3.0
105 | 103,3.0
106 | 104,2.0
107 | 105,1.0
108 | 106,1.0
109 | 107,1.0
110 | 108,0.0
111 | 109,0.0
112 | 110,0.0
113 | 111,0.0
114 | 112,1.0
115 | 113,2.0
116 | 114,3.0
117 | 115,0.0
118 | 116,2.0
119 | 117,0.0
120 | 118,2.0
121 | 119,3.0
122 | 


--------------------------------------------------------------------------------
/module09/exercises/en.ex04_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % =============================================== %
 9 | \section*{Interlude - Regularized Gradient}
10 | % =============================================== %
11 | \begin{figure}[!h]
12 |     \centering
13 |     \includegraphics[scale=0.25]{assets/Improve.png}
14 |     %\caption{The Learning Cycle: Improve}
15 | \end{figure}
16 | \noindent{To derive the gradient of the regularized loss function, $\nabla(J)$ 
17 | you have to change a bit the formula of the unregularized gradient.}\\
18 | \\
19 | Given the fact that we are not penalizing $\theta_0$, the formula will remain 
20 | the same as before for this parameter. For the other parameters ($\theta_1, \dots, \theta_n$),
21 |  we must add the partial derivative of the regularization term: $\lambda \theta_j$.\\
22 | \\
23 | Therefore, we get:
24 | $$
25 | \nabla(J)_0 = \frac{1}{m}\sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)})
26 | $$
27 | $$
28 | \nabla(J)_j = \frac{1}{m}\left(\sum_{i=1}^{m}(h_\theta(x^{(i)}) - y^{(i)})x_j^{(i)} + \lambda \theta_j\right) \text{ for j = 1, ..., n}
29 | $$
30 | \\
31 | Where:  
32 | \begin{itemize}
33 |     \item $\nabla(J)_j$ is the j$^\text{th}$ component of the gradient vector $\nabla(J)$
34 |     \item $m$ is the number of training examples used
35 |     \item $h_\theta(x^{(i)})$ is the model's prediction for the i$^\text{th}$ training example
36 |     \item $x^{(i)}$ is the feature vector of the i$^\text{th}$ training example
37 |     \item $y^{(i)}$ is the expected target value for the i$^\text{th}$ example
38 |     \item $\lambda$ is a constant, the regularization hyperparameter
39 |     \item $\theta_j$ is the j$^\text{th}$ parameter of the $\theta$ vector
40 | \end{itemize}
41 | \bigskip
42 | Which can be vectorized as:
43 | $$
44 | \nabla(J) = \frac{1}{m} [X'^T(h_\theta(X) - y) + \lambda \theta']
45 | $$  
46 | \\
47 | Where:  
48 | \begin{itemize}
49 |     \item $\nabla(J)$ is a vector of dimension $(n + 1)$, the gradient vector
50 |     \item $m$ is the number of training examples used
51 |     \item $X$ is a matrix of dimension $(m \times n)$, the design matrix
52 |     \item $X'$ is a matrix of dimension $(m \times (n + 1))$, the design matrix onto 
53 |     which a column of ones is added as a first column
54 |     \item $y$ is a vector of dimension $m$, the vector of expected values
55 |     \item $h_\theta(X)$ is a vector of dimension $m$, the vector of predicted values
56 |     \item $\lambda$ is a constant
57 |     \item $\theta$ is a vector of dimension $(n + 1)$, the parameter vector
58 |     \item $\theta'$ is a vector of dimension $(n + 1)$, constructed using the following rules:
59 | \end{itemize}
60 | 
61 | $$
62 | \begin{matrix}
63 | \theta'_0 & =  0 \\
64 | \theta'_j & =  \theta_j & \text{ for } j = 1, \dots, n\\    
65 | \end{matrix}
66 | $$
67 | 
68 | % =============================================== %
69 | \subsection*{Linear Gradient vs Logistic Gradient}
70 | % ----------------------------------------------- %
71 | As before, we draw your attention on the only difference between the linear regression's 
72 | and the logistic regression's gradient equations: \textbf{the hypothesis function} $h_\theta(X)$.
73 | \begin{itemize}
74 |     \item In the linear regression: $h_\theta(X) = X'\theta$
75 |     \item In the logistic regression: $h_\theta(X) = \text{sigmoid}(X'\theta)$
76 | \end{itemize}
77 | 


--------------------------------------------------------------------------------
/module09/exercises/en.ex06_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{Interlude - Next Level: Ridge Regression}
10 | % ============================================== %
11 | 
12 | Until now we only talked about L$_2$ regularization and its implications on the 
13 | calculation of the loss function and gradient for both the linear and the logistic regression.\\
14 | \\
15 | Now it's time to use the proper terminology :\\ 
16 | \\
17 | \emph{When we apply L$_2$ regularization on a linear regression model, the new model is 
18 | called a \textbf{Ridge Regression} model.
19 | Besides that brand-new name, Ridge regression is nothing more than 
20 | linear regression regularized with L$_2$.}\\
21 | \\
22 | We suggest you watch this \href{https://www.youtube.com/watch?v=Q81RR3yKn30}{very nice 
23 | explanation of Ridge Regularization}.\\
24 | \\
25 | By the way, this Youtube channel, \texttt{\textit{StatQuest}}, is very helpful to 
26 | understand the gist of a lot of machine learning concepts.\\
27 | You will not waste your time watching its statistics and machine learning playlists!
28 | 


--------------------------------------------------------------------------------
/module09/exercises/en.ex08_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{Interlude}
10 | % ============================================== %
11 | \subsection*{Regularized Logistic Regression is still Logistic Regression}
12 | % ---------------------------------------------- %
13 | As opposed to linear regression, \textbf{regularized logistic regression is still 
14 | called logistic regression}.\\
15 | \\
16 | Working without regularization parameters can simply be regarded as a special 
17 | case where $\lambda = 0$.\\
18 | 
19 | if $\lambda = 0$:
20 | \begin{eqnarray*}
21 |     \nabla(J)   & = & \frac{1}{m} [X'^T(h_\theta(X) - y) + \lambda \theta'] \\
22 |                 & = & \frac{1}{m} [X'^T(h_\theta(X) - y) + 0 \cdot \theta'] \\
23 |                 & = & \frac{1}{m} [X'^T(h_\theta(X) - y)]    
24 | \end{eqnarray*}


--------------------------------------------------------------------------------
/module09/exercises/en.ex10_interlude.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                                 Interlude                                    %
 4 | %                         for Machine Learning module                          %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | % ============================================== %
 9 | \section*{One Last Word - It's Just a Beginning...}
10 | % ============================================== %
11 | \subsection*{Congratulation!!}
12 | % ---------------------------------------------- %
13 | You have finished this bootcamp and you can be proud of yourself!
14 | We hope you liked it and that the material were understandable.
15 | 
16 | We tried our best to make it as accessible as possible to anyone, even for someone with little mathematical background. It was quite a challenge, and we hope we succeed to that difficult mission.
17 | 
18 | Equiped with your brand-new knowledge you are now able to tackle more challenging algorithm like \texttt{\textbf{ensemble methods (random forest, gradient boosting)}}, \texttt{\textbf{support vector machine}} or even \texttt{\textbf{artificial neural networks}}!!
19 | 
20 | An because we know that \texttt{\textbf{a lot of you have neural networks in mind}} when you started this journey into machine learning, let's talk a bit more about why you are now able to deep dive into it... fearlessly!
21 | 
22 | \texttt{\textbf{Neural networks}} are based on the same blocks you should now be familiar with.
23 | Essentially:
24 | \begin{itemize}
25 |       \item matrix and vector operations,
26 |       \item gradient descent,
27 |       \item regularization,
28 |       \item sigmoid (as activation functions, even if it is a bit outdated now)
29 | \end{itemize}
30 | 
31 | Let's see what you can do now.
32 | 
33 | % ============================================== %
34 | \subsection*{To go further}
35 | % ---------------------------------------------- %
36 | To keep learning Machine Learning, here are several options you should consider:
37 | \begin{itemize}
38 |       \item To complete the entire \href{https://www.coursera.org/learn/machine-learning/home/}{Stanford's Machine Learning MOOC}.
39 |             It is a great ressource, a \textbf{classic} for those who want to study machine learning.
40 |             This bootcamp followed thigthly the architecture of its first three weeks.
41 |             This course is definitely worth your time!
42 |             Also, someone did a great work to convert all the Octave assignments into \href{https://github.com/dibgerge/ml-coursera-python-assignments}{Python notebooks}.
43 |       \item To take \href{https://course.fast.ai/}{fast.ai Deep Learning MOOC}.
44 |             It's a great way to learn Deep Learning following a top-down approach.
45 | \end{itemize}


--------------------------------------------------------------------------------
/module09/exercises/m09ex00.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 00}
 2 | \extitle{Polynomial models II}
 3 | %\input{exercises/en.ex00_interlude.tex}
 4 | %\newpage
 5 | \turnindir{ex00}
 6 | \exnumber{00}
 7 | \exfiles{polynomial\_model\_extended.py}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================== %
12 | \section*{Objective}
13 | % ---------------------------------- %
14 | Create a function that takes a matrix $X$ of dimensions $(m \times n)$ and an integer $p$ 
15 | as input, and returns a matrix of dimension $(m \times (np))$.\\
16 | \\
17 | For each column $x_j$ of the matrix $X$, the new matrix contains
18 | $x_j$ raised to the power of $k$, for $k = 1, 2, ..., p$ :
19 | 
20 | $$
21 | x_1  \mid  \ldots  \mid  x_n  \mid  x_1^2  \mid  \ldots  \mid  x_n^2  \mid  \ldots  \mid  x_1^p  \mid  \ldots  \mid  x_n^p
22 | $$
23 | \newpage
24 | % ================================== %
25 | \section*{Instructions}
26 | % ---------------------------------- %
27 | In the \texttt{polynomial\_model\_extended.py} file, write the following function 
28 | as per the instructions given below:\\
29 | \\
30 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
31 | def add_polynomial_features(x, power):
32 | 	"""Add polynomial features to matrix x by raising its columns to every power in the range 
33 | 		of 1 up to the power given in argument.  
34 | 	Args:
35 | 		x: 	has to be an numpy.ndarray, a matrix of shape m * n.
36 | 		power: 	has to be an int, the power up to which the columns of matrix x are going
37 | 			to be raised.
38 | 	Returns:
39 | 				The matrix of polynomial features as a numpy.ndarray, of shape m * (np),
40 | 					containg the polynomial feature values for all 
41 | 					training examples.
42 | 				None if x is an empty numpy.ndarray.
43 | 	Raises:
44 | 		This function should not raise any Exception.
45 | 	"""
46 | 	... Your code ...
47 | \end{minted}
48 | 
49 | 
50 | % ================================== %
51 | \section*{Examples}
52 | % ---------------------------------- %
53 | 
54 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
55 | import numpy as np
56 | x = np.arange(1,11).reshape(5, 2)
57 | 
58 | # Example 1:
59 | add_polynomial_features(x, 3)
60 | # Output:
61 | array([[   1,    2,    1,    4,    1,    8],
62 |        [   3,    4,    9,   16,   27,   64],
63 |        [   5,    6,   25,   36,  125,  216],
64 |        [   7,    8,   49,   64,  343,  512],
65 |        [   9,   10,   81,  100,  729, 1000]])
66 | 
67 | # Example 2:
68 | add_polynomial_features(x, 4)
69 | # Output:
70 | array([[    1,     2,     1,     4,     1,     8,     1,    16],
71 |        [    3,     4,     9,    16,    27,    64,    81,   256],
72 |        [    5,     6,    25,    36,   125,   216,   625,  1296],
73 |        [    7,     8,    49,    64,   343,   512,  2401,  4096],
74 |        [    9,    10,    81,   100,   729,  1000,  6561, 10000]])
75 | \end{minted}


--------------------------------------------------------------------------------
/module09/exercises/m09ex01.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 01}
  2 | \extitle{L2 Regularization}
  3 | \input{exercises/en.ex01_interlude.tex}
  4 | \newpage
  5 | \turnindir{ex01}
  6 | \exnumber{01}
  7 | \exfiles{l2\_reg.py}
  8 | \exforbidden{sklearn}
  9 | \makeheaderfilesforbidden
 10 | 
 11 | % ================================= %
 12 | \section*{Objective}
 13 | % --------------------------------- %
 14 | You must implement the following formulas as functions:  
 15 | 
 16 | % ================================= %
 17 | \subsection*{Iterative}
 18 | % --------------------------------- %
 19 | $$
 20 | L_2(\theta)^2 = \sum_{j = 1}^n \theta_j^2
 21 | $$
 22 | \\
 23 | Where:
 24 | \begin{itemize}
 25 |   \item $\theta$ is a vector of dimension $(n + 1)$.
 26 | \end{itemize}
 27 | 
 28 | % ================================= %
 29 | \subsection*{Vectorized}
 30 | % --------------------------------- %
 31 | $$
 32 | L_2(\theta)^2 = \theta' \cdot \theta'
 33 | $$
 34 | \\
 35 | Where:
 36 | \begin{itemize}
 37 |   \item $\theta'$ is a vector of dimension $(n + 1)$, constructed using the following rules:
 38 | \end{itemize}
 39 |   
 40 | $$
 41 | \begin{matrix}
 42 | \theta'_0 & =  0 \\
 43 | \theta'_j & =  \theta_j & \text{ for } j = 1, \dots, n\\
 44 | \end{matrix}
 45 | $$
 46 | \newpage
 47 | % ================================= %
 48 | \section*{Instructions}
 49 | % --------------------------------- %
 50 | In the \texttt{l2\_reg.py} file, write the following functions as per the instructions given below:\\
 51 | \\
 52 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 53 | def iterative_l2(theta):
 54 | 	"""Computes the L2 regularization of a non-empty numpy.ndarray, with a for-loop.
 55 | 	Args:
 56 | 		theta: has to be a numpy.ndarray, a vector of shape n * 1.
 57 | 	Returns:
 58 | 		The L2 regularization as a float.
 59 | 		None if theta in an empty numpy.ndarray.
 60 | 	Raises:
 61 | 		This function should not raise any Exception.
 62 | 	"""
 63 | 	... Your code ...
 64 | 
 65 | def l2(theta):
 66 | 	"""Computes the L2 regularization of a non-empty numpy.ndarray, without any for-loop.
 67 | 	Args:
 68 | 		theta: has to be a numpy.ndarray, a vector of shape n * 1.
 69 | 	Returns:
 70 | 		The L2 regularization as a float.
 71 | 		None if theta in an empty numpy.ndarray.
 72 | 	Raises:
 73 | 		This function should not raise any Exception.
 74 | 	"""
 75 | 	... Your code ...
 76 | \end{minted}
 77 | 
 78 | % ================================= %
 79 | \section*{Examples}
 80 | % --------------------------------- %
 81 | 
 82 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 83 | x = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
 84 | 
 85 | # Example 1: 
 86 | iterative_l2(x)
 87 | # Output:
 88 | 911.0
 89 | 
 90 | # Example 2: 
 91 | l2(x)
 92 | # Output:
 93 | 911.0
 94 | 
 95 | y = np.array([3,0.5,-6]).reshape((-1, 1))
 96 | # Example 3: 
 97 | iterative_l2(y)
 98 | # Output:
 99 | 36.25
100 | 
101 | # Example 4: 
102 | l2(y)
103 | # Output:
104 | 36.25
105 | \end{minted}


--------------------------------------------------------------------------------
/module09/exercises/m09ex02.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 02}
 2 | \extitle{Regularized Linear Loss Function}
 3 | \turnindir{ex02}
 4 | \exnumber{02}
 5 | \exfiles{linear\_loss\_reg.py}
 6 | \exforbidden{sklearn}
 7 | \makeheaderfilesforbidden
 8 | 
 9 | % ================================= %
10 | \section*{Objective}
11 | % --------------------------------- %
12 | You must implement the following formula as a function:  
13 | 
14 | $$
15 | J(\theta)  =  \frac{1}{2m}[(\hat{y} - y)\cdot(\hat{y} - y) + \lambda (\theta' \cdot \theta')]
16 | $$  
17 | \\
18 | Where:
19 | \begin{itemize}
20 |   \item $y$ is a vector of dimension $m$, the expected values
21 |   \item $\hat{y}$ is a vector of dimension $m$, the predicted values
22 |   \item $\lambda$ is a constant, the regularization hyperparameter
23 |   \item $\theta'$ is a vector of dimension $n$, constructed using the following rules:
24 | \end{itemize}
25 |   
26 | $$
27 | \begin{matrix}
28 | \theta'_0 & =  0 \\
29 | \theta'_j & =  \theta_j & \text{ for } j = 1, \dots, n\\
30 | \end{matrix}
31 | $$
32 | \newpage
33 | % ================================= %
34 | \section*{Instructions}
35 | % --------------------------------- %
36 | In the \texttt{linear\_loss\_reg.py} file, write the following function 
37 | as per the instructions given below:\\
38 | \\
39 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
40 | def reg_loss_(y, y_hat, theta, lambda_):
41 | 	"""Computes the regularized loss of a linear regression model from two non-empty numpy.array, 
42 | without any for loop. The two arrays must have the same dimensions.
43 | 	Args:
44 | 		y: has to be an numpy.ndarray, a vector of shape m * 1.
45 | 		y_hat: has to be an numpy.ndarray, a vector of shape m * 1.
46 | 		theta: has to be a numpy.ndarray, a vector of shape n * 1.
47 | 		lambda_: has to be a float.
48 | 	Returns:
49 | 		The regularized loss as a float.
50 | 		None if y, y_hat, or theta are empty numpy.ndarray.
51 | 		None if y and y_hat do not share the same shapes.
52 | 	Raises:
53 | 		This function should not raise any Exception.
54 | 	"""
55 | 	... Your code ...
56 | \end{minted}
57 | 
58 | \hint{such a situation could be a good use case for decorators...}
59 | 
60 | % ================================= %
61 | \section*{Examples}
62 | % --------------------------------- %
63 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
64 | y = np.array([2, 14, -13, 5, 12, 4, -19]).reshape((-1, 1))
65 | y_hat = np.array([3, 13, -11.5, 5, 11, 5, -20]).reshape((-1, 1))
66 | theta = np.array([1, 2.5, 1.5, -0.9]).reshape((-1, 1))
67 | 
68 | # Example :
69 | reg_loss_(y, y_hat, theta, .5)
70 | # Output:
71 | 0.8503571428571429
72 | 
73 | # Example :
74 | reg_loss_(y, y_hat, theta, .05)
75 | # Output:
76 | 0.5511071428571429
77 | 
78 | # Example :
79 | reg_loss_(y, y_hat, theta, .9)
80 | # Output:
81 | 1.116357142857143
82 | \end{minted}


--------------------------------------------------------------------------------
/module09/exercises/m09ex03.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 03}
 2 | \extitle{Regularized Logistic Loss Function}
 3 | %\input{exercises/en.ex03_interlude.tex}
 4 | %\newpage
 5 | \turnindir{ex03}
 6 | \exnumber{03}
 7 | \exfiles{logistic\_loss\_reg.py}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | You must implement the following formula as a function:
15 | 
16 | $$
17 | J( \theta) = -\frac{1} {m} \lbrack y \cdot \log(\hat{y}) + (\vec{1} - y) \cdot \log(\vec{1} - \hat{y})\rbrack + \frac{\lambda}{2m} (\theta' \cdot \theta')
18 | $$
19 | \\
20 | Where:
21 | \begin{itemize}
22 |   \item $\hat{y}$ is a vector of dimension $m$, the vector of predicted values
23 |   \item $y$ is a vector of dimension $m$, the vector of expected values
24 |   \item $\vec{1}$ is a vector of dimension $m$, a vector full of ones
25 |   \item $\lambda$ is a constant, the regularization hyperparameter
26 |   \item $\theta'$ is a vector of dimension $n$, constructed using the following rules: 
27 | \end{itemize}
28 | $$
29 | \begin{matrix}
30 | \theta'_0 & =  0 \\
31 | \theta'_j & =  \theta_j & \text{ for } j = 1, \dots, n\\    
32 | \end{matrix}
33 | $$
34 | \newpage
35 | % ================================= %
36 | \section*{Instructions}
37 | % --------------------------------- %
38 | In the \texttt{logistic\_loss\_reg.py} file, write the following function as 
39 | per the instructions given below:\\
40 | \\
41 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
42 | def reg_log_loss_(y, y_hat, theta, lambda_):
43 | 	"""Computes the regularized loss of a logistic regression model from two non-empty numpy.ndarray, 
44 | 	without any for loop. The two arrays must have the same shapes.
45 | 	Args:
46 | 		y: has to be an numpy.ndarray, a vector of shape m * 1.
47 | 		y_hat: has to be an numpy.ndarray, a vector of shape m * 1.
48 | 		theta: has to be a numpy.ndarray, a vector of shape n * 1.
49 | 		lambda_: has to be a float.
50 | 	Returns:
51 | 		The regularized loss as a float.
52 | 		None if y, y_hat, or theta is empty numpy.ndarray.
53 | 		None if y and y_hat do not share the same shapes.
54 | 	Raises:
55 | 		This function should not raise any Exception.
56 | 	"""
57 | 	... Your code ...
58 | \end{minted}
59 | 
60 | \hint{Here again, seems to be a good use case for decorators ...}
61 | 
62 | % ================================= %
63 | \section*{Examples}
64 | % --------------------------------- %
65 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
66 | y = np.array([1, 1, 0, 0, 1, 1, 0]).reshape((-1, 1))
67 | y_hat = np.array([.9, .79, .12, .04, .89, .93, .01]).reshape((-1, 1))
68 | theta = np.array([1, 2.5, 1.5, -0.9]).reshape((-1, 1))
69 | 
70 | # Example :
71 | reg_log_loss_(y, y_hat, theta, .5)
72 | # Output:
73 | 0.43377043716475955
74 | 
75 | # Example :
76 | reg_log_loss_(y, y_hat, theta, .05)
77 | # Output:
78 | 0.13452043716475953
79 | 
80 | # Example :
81 | reg_log_loss_(y, y_hat, theta, .9)
82 | # Output:
83 | 0.6997704371647596
84 | \end{minted}


--------------------------------------------------------------------------------
/module09/exercises/m09ex06.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 06}
 2 | \extitle{Ridge Regression}
 3 | \input{exercises/en.ex06_interlude.tex}
 4 | \newpage
 5 | \turnindir{ex06}
 6 | \exnumber{06}
 7 | \exfiles{ridge.py}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | Now it's time to implement your \texttt{MyRidge} class, similar to
15 |  the class of the same name in \texttt{sklearn.linear\_model}.\\
16 | 
17 | % ================================= %
18 | \section*{Instructions}
19 | % --------------------------------- %
20 | In the \texttt{ridge.py} file, create the following class as per the instructions given below:\\
21 | \\
22 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
23 | 	class MyRidge(ParentClass):
24 | 		"""
25 | 		Description:
26 | 			My personnal ridge regression class to fit like a boss.
27 | 		"""
28 | 		def __init__(self,  thetas, alpha=0.001, max_iter=1000, lambda_=0.5):
29 | 			self.alpha = alpha
30 | 			self.max_iter = max_iter
31 | 			self.thetas = thetas
32 | 			self.lambda_ = lambda_
33 | 			... Your code here ...
34 | 	
35 | 		... other methods ...
36 | 	\end{minted}
37 | \\
38 | Your \texttt{MyRidge} class will have at least the following methods:
39 | \begin{itemize}
40 |   \item \texttt{\_\_init\_\_}, special method, similar to the one you 
41 |   wrote in \texttt{MyLinearRegression} (module06)
42 |   \item \texttt{get\_params\_}, which gets the parameters of the estimator
43 |   \item \texttt{set\_params\_}, which sets the parameters of the estimator
44 |   \item \texttt{loss\_}, which returns the loss between 2 vectors (numpy arrays)
45 |   \item \texttt{loss\_elem\_}, which returns a vector corresponding to the squared 
46 |   diffrence between 2 vectors (numpy arrays)  
47 |   \item \texttt{predict\_}, which generates predictions using a linear model
48 |   \item \texttt{gradient\_}, which calculates the vectorized regularized gradient
49 |   \item \texttt{fit\_}, which fits Ridge regression model to a training dataset
50 | \end{itemize}
51 | 
52 | \hint{You should consider inheritance from \texttt{MyLinearRegression}.}
53 | \noindent{If \texttt{MyRidge} inheritates from \texttt{MyLinearRegression}, you may not 
54 | need to reimplement the \texttt{predict\_} method.}\\
55 | \\
56 | The difference between the \texttt{MyRidge}'s implementations of \texttt{loss\_elem\_}, \texttt{loss\_}, \texttt{gradient\_} and 
57 | \texttt{fit\_} and the ones in your \texttt{MyLinearRegression} class 
58 | (implemented in module 02) is the use of a regularization term.\\
59 | \hint{
60 |   again, this is a good use case for decorators...
61 | }


--------------------------------------------------------------------------------
/module09/exercises/m09ex07.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 07}
 2 | \extitle{Practicing Ridge Regression}
 3 | %\input{exercises/en.ex07_interlude.tex}
 4 | %\newpage
 5 | \turnindir{ex07}
 6 | \exnumber{07}
 7 | \exfiles{space\_avocado.py, benchmark\_train.py,  models.[csv/yml/pickle]}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | 
12 | % ================================= %
13 | \section*{Objective}
14 | % --------------------------------- %
15 | It's training time!  
16 | Let's practice our brand new Ridge Regression with a polynomial model.
17 | 
18 | % ================================= %
19 | \section*{Introduction}
20 | % --------------------------------- %
21 | You have already used the dataset \texttt{space\_avocado.csv}.
22 | The dataset is made of 5 columns:
23 | \begin{itemize}
24 |   \item \textbf{index}: not relevant
25 |   \item \textbf{weight}: the avocado weight order (in tons)
26 |   \item \textbf{prod\_distance}: distance from where the avocado ordered is produced (in Mkms)
27 |   \item \textbf{time\_delivery}: time between the order and the receipt (in days)
28 |   \item \textbf{target}: price of the order (in trantorian unit)
29 | \end{itemize}
30 | It contains the data of all the avocado purchases made by Trantor administration 
31 | (guacamole is a serious business there).\\
32 | 
33 | % ================================= %
34 | \section*{Instructions}
35 | % --------------------------------- %
36 | You have to explore different models and select the best you find.
37 | To do this:\\
38 | \begin{itemize}
39 |   \item Split your \texttt{space\_avocado.csv} dataset into a training, a cross-validation and a test sets
40 |   \item Use your \texttt{polynomial\_features} method on your training set
41 |   \item Consider several Linear Regression models with polynomial hypotheses with a maximum degree of $4$
42 |   \item For each hypothesis consider a regularized factor ranging from $0$ to $1$ with a step of $0.2$
43 |   \item Evaluate your models on the cross-validation set
44 |   \item Evaluate the best model on the test set
45 | \end{itemize}
46 | \info{According to your model evaluations, what is the best hypothesis you can get?}
47 | \begin{itemize}
48 |   \item Plot the evaluation curve which will help you to select the best model 
49 |   (evaluation metrics vs models + $\lambda$ factor).
50 |   \item Plot the true price and the predicted price obtained via your best model
51 |    with the different $\lambda$ values (meaning the dataset + the 5 predicted curves).
52 | \end{itemize}
53 | 
54 | \hint{The training of all your models can take a long time.\newline
55 | Therefore you need to train only the best one during the correction.}
56 | \noindent{Nevertheless, you should return in \texttt{benchmark\_train.py} the program which performs the training 
57 | of all the models and saves the parameters of the different models into a file.}\\
58 | \\
59 | In \texttt{models.[csv/yml/pickle]} one must find the parameters of all the models 
60 | you have explored and trained.\\
61 | \\
62 | In \texttt{space\_avocado.py}, train the model based on the best hypothesis you find 
63 | and load the other models from \texttt{models.[csv/yml/pickle]}.
64 | Then evaluate the best model on the right set and plot the different graphics as asked before.
65 | 


--------------------------------------------------------------------------------
/module09/exercises/m09ex08.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Exercise 08}
  2 | \extitle{Regularized Logistic Regression}
  3 | \input{exercises/en.ex08_interlude.tex}
  4 | \newpage
  5 | \turnindir{ex08}
  6 | \exnumber{08}
  7 | \exfiles{my\_logistic\_regression.py}
  8 | \exforbidden{sklearn}
  9 | \makeheaderfilesforbidden
 10 | 
 11 | % ================================= %
 12 | \section*{Objective}
 13 | % --------------------------------- %
 14 | In the last exercise, you implemented a regularized version 
 15 | of the linear regression algorithm, called Ridge regression.\\
 16 | \\
 17 | Now it's time to update your logistic regression classifier as well!\\
 18 | \\
 19 | In the \texttt{scikit-learn} library, the logistic regression implementation 
 20 | offers a few regularization techniques, which can be selected using 
 21 | the parameter \texttt{penalty} (L$_2$ is default).\\
 22 | The goal of this exercise is to update your old \texttt{MyLogisticRegression} class to 
 23 | take that into account.\\
 24 | 
 25 | % ================================= %
 26 | \section*{Instructions}
 27 | % --------------------------------- %
 28 | In the \texttt{my\_logistic\_regression.py} file, update your \texttt{MyLogisticRegression} 
 29 | class according to the following instructions:\\
 30 | \\
 31 | 
 32 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 33 | class MyLogisticRegression():
 34 | 	"""
 35 | 	Description:
 36 | 		My personnal logistic regression to classify things.
 37 | 	"""
 38 |   supported_penalities = ['l2'] #We consider l2 penalities only. One may want to implement other penalities
 39 | 
 40 | 	def __init__(self, theta, alpha=0.001, max_iter=1000, penality='l2', lambda_=1.0):
 41 | 		# Check on type, data type, value ... if necessary
 42 |     self.alpha = alpha
 43 | 		self.max_iter = max_iter
 44 | 		self.theta = theta
 45 | 		self.penality = penality
 46 |     self.lambda_ = lambda_ if penality in self.supported_penalities else 0
 47 | 		#... Your code ...
 48 | 
 49 | 	... other methods ...
 50 | \end{minted}
 51 | \begin{itemize}
 52 | 	\item \textbf{add} a \texttt{penalty} parameter which can take the following values:\texttt{'l2'}, \texttt{'none'} (default value is \texttt{'l2'}).
 53 |   \end{itemize}
 54 | \begin{itemize}
 55 |   \item \textbf{update} the \texttt{fit\_(self, x, y)} method: 
 56 |   \begin{itemize}
 57 |     \item \texttt{if penality == 'l2'}: use a \textbf{regularized version} of the gradient descent.
 58 |     \item \texttt{if penality = 'none'}: use the \textbf{unregularized version} of the gradient descent from \texttt{module03}.
 59 |   \end{itemize}
 60 | \end{itemize}
 61 | 
 62 | % ================================= %
 63 | \section*{Examples}
 64 | % --------------------------------- %
 65 | \begin{minted}[bgcolor=darcula-back,formatcom=\color{lightgrey},fontsize=\scriptsize]{python}
 66 | from my_logistic_regression import MyLogisticRegression as mylogr
 67 | 
 68 | theta = np.array([[-2.4], [-1.5], [0.3], [-1.4], [0.7]])
 69 | 
 70 | # Example 1:
 71 | model1 = mylogr(theta, lambda_=5.0)
 72 | 
 73 | model1.penality
 74 | # Output
 75 | 'l2'
 76 | 
 77 | model1.lambda_
 78 | # Output
 79 | 5.0
 80 | 
 81 | # Example 2:
 82 | model2 = mylogr(theta, penality=None)
 83 | 
 84 | model2.penality
 85 | # Output
 86 | None
 87 | 
 88 | model2.lambda_
 89 | # Output
 90 | 0.0
 91 | 
 92 | # Example 3:
 93 | model3 = mylogr(theta, penality=None, lambda_=2.0)
 94 | 
 95 | model3.penality
 96 | # Output
 97 | None
 98 | 
 99 | model3.lambda_
100 | # Output
101 | 0.0
102 | 
103 | \end{minted}
104 | 
105 | \hint{
106 |   this is also a great use case for decorators...
107 | }
108 | 


--------------------------------------------------------------------------------
/module09/exercises/m09ex09.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Exercise 09}
 2 | \extitle{Practicing Regularized Logistic Regression}
 3 | %\input{exercises/en.ex09_interlude.tex}
 4 | %\newpage
 5 | \turnindir{ex09}
 6 | \exnumber{09}
 7 | \exfiles{solar\_system\_census.py, benchmark\_train.py,  models.[csv/yml/pickle]}
 8 | \exforbidden{sklearn}
 9 | \makeheaderfilesforbidden
10 | 
11 | % ================================= %
12 | \section*{Objective}
13 | % --------------------------------- %
14 | It's training time!
15 | Let's practice our updated Logistic Regression with polynomial models.\\
16 | % ================================= %
17 | \section*{Introduction}
18 | % --------------------------------- %
19 | You have already used the dataset \texttt{solar\_system\_census.csv} 
20 | and \texttt{solar\_system\_census\_planets.csv}.\\
21 | \begin{itemize}
22 | 	\item The dataset is divided in two files which can be found in the 
23 |   \texttt{resources} folder: \texttt{solar\_system\_census.csv} and \texttt{solar\_system\_census\_planets.csv}
24 | 	\item The first file contains biometric information such as the height, weight, and bone density
25 |    of several Solar System citizens
26 | 	\item The second file contains the homeland of each citizen, indicated by its 
27 |   Space Zipcode representation (i.e. one number for each planet... :)) 
28 | \end{itemize}
29 | As you should know, Solar citizens come from four registered areas (zipcodes):\\
30 | 
31 | \begin{itemize}
32 | 	\item The flying cities of Venus ($0$)
33 | 	\item United Nations of Earth ($1$)
34 | 	\item Mars Republic ($2$)
35 | 	\item The Asteroids' Belt colonies ($3$)
36 | \end{itemize}
37 | 
38 | % ================================= %
39 | \section*{Instructions}
40 | % --------------------------------- %
41 | % ================================= %
42 | \subsection*{Split the Data}
43 | % --------------------------------- %
44 | 
45 | Take your \texttt{solar\_system\_census.csv} dataset and split it into a
46 |  \textbf{training set}, a \textbf{cross-validation set}
47 | and  a \textbf{test set}.
48 | 
49 | % ================================= %
50 | \subsection*{Training and benchmark}
51 | % --------------------------------- %
52 | One part of your submission will be located in the \texttt{benchmark\_train.py} and 
53 | \texttt{models.[csv/yml/pickle]} files.
54 | You have to:
55 | \begin{itemize}
56 |   \item Train different regularized logistic regression models with a polynomial hypothesis of \textbf{degree 3}.
57 |         The models will be trained with different $\lambda$ values, ranging from $0$ to $1$.
58 |         Use the one-vs-all method.
59 |   \item Evaluate the \textbf{f1 score} of each of the models on the cross-validation set.
60 |         You can use the \texttt{f1\_score\_} function that you wrote in the \texttt{ex11} of \texttt{module08}.
61 |   \item Save the different models into a \texttt{models.[csv/yml/pickle]}.
62 | \end{itemize}
63 | 
64 | % ================================= %
65 | \subsection*{Solar system census program}
66 | % --------------------------------- %
67 | The second and last part of your submission is in \texttt{solar\_system\_census.py}. You have to:
68 | \begin{itemize}
69 |   \item Load the differents models from \texttt{models.[csv/yml/pickle]} and train from scratch 
70 |   only the best one on a training set.
71 |   \item Visualize the performance of the different models with a bar plot showing the score of
72 |    the models given their $\lambda$ value.
73 |   \item Print the \textbf{f1 score} of all the models calculated on the test set.
74 |   \item Visualize the target values and the predicted values of the best model on the same scatterplot. 
75 |   Make some efforts to have a readable figure.
76 | \end{itemize}
77 | 
78 | \info{For the second script \texttt{solar\_system\_census.py}, only a train and test set are necessary 
79 | as one is simply looking at the performance.}
80 | 


--------------------------------------------------------------------------------
/module09/exercises/m09ex10.tex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/module09/exercises/m09ex10.tex


--------------------------------------------------------------------------------
/module09/useful_resources.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Section usefull ressources                            %
 4 | %                          for ML Modules                                      %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | 
 9 | \chapter*{Notions and ressources}
10 | 
11 | \section*{Notions of the module}
12 | \begin{itemize}
13 |   \item Regularization
14 |   \item Overfitting
15 |   \item Regularized loss function
16 |   \item Regularized gradient descent
17 |   \item Regularized linear regression
18 |   \item Regularized logistic regression
19 | \end{itemize}
20 | 
21 | \section*{Useful Ressources}
22 | 
23 | You are recommended to use the following material: \href{https://www.coursera.org/learn/machine-learning}{Machine Learning MOOC - Stanford}\\
24 | \newline
25 | This series of videos is available at no cost: simply log in, select "Enroll for Free", and click "Audit" at the bottom of the pop-up window.\\
26 | \newline
27 | The following sections of the course are particularly relevant to today's exercises: 
28 | 
29 | \subsection*{Week 3: Classification}
30 | 
31 | \subsubsection*{Classification with logistic regression (already seen in module 03)}
32 | \begin{itemize}
33 |   \item Motivations
34 |   \item Logistic regression
35 |   \item Decision boundary
36 | \end{itemize}
37 | 
38 | \subsubsection*{Cost function for logistic regression (already seen in module 03)}
39 | \begin{itemize}
40 |   \item Cost function for logistic regression
41 |   \item Simplified Cost Function for Logistic Regression
42 | \end{itemize}
43 | 
44 | \subsubsection*{Gradient descent for logistic regression (already seen in module 03)}
45 | \begin{itemize}
46 |   \item Gradient Descent Implementation
47 | \end{itemize}
48 | 
49 | \subsubsection*{The problem of overfitting (New !!!)}
50 | \begin{itemize}
51 |   \item The problem of overfitting
52 |   \item Addressing overfitting
53 |   \item Cost function with regularization
54 |   \item Regularized linear regression
55 |   \item Regularized logistic regression  
56 | \end{itemize}
57 | 
58 | \noindent{\emph{All videos above are available also on this
59 |  \href{https://youtube.com/playlist?list=PLkDaE6sCZn6FNC6YRfRQc_FbeQrF8BwGI&feature=shared}
60 |  {Andrew Ng's YouTube playlist}, videos 31 to 36 (already seen in module 03) and 37 to 41 (new !!!).}}


--------------------------------------------------------------------------------
/resources/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .aux
 3 | .log
 4 | .out
 5 | .pyg
 6 | .pyc
 7 | .toc
 8 | .o
 9 | *~
10 | #*#
11 | 


--------------------------------------------------------------------------------
/resources/42ai_bootcamps/en.acknowledgements.tex:
--------------------------------------------------------------------------------
 1 | \section*{Contact}
 2 | % --------------------------------- %
 3 | You can contact 42AI by email: \href{mailto:contact@42ai.fr}{contact@42ai.fr}\\
 4 | \newline
 5 | Thank you for attending 42AI's Machine Learning Bootcamp !
 6 | 
 7 | % ================================= %
 8 | \section*{Acknowledgements}
 9 | % --------------------------------- %
10 | The Python \& ML bootcamps are the result of a collective effort. We would like to thank:\\
11 | \begin{itemize}
12 |   \item Maxime Choulika (cmaxime),
13 |   \item Pierre Peigné (ppeigne),
14 |   \item Matthieu David (mdavid),
15 |   \item Quentin Feuillade--Montixi (qfeuilla, quentin@42ai.fr)
16 |   \item Mathieu Perez (maperez, mathieu.perez@42ai.fr)
17 | \end{itemize}
18 | who supervised the creation and enhancements of the present transcription.\\
19 | \begin{itemize}
20 |   \item Louis Develle (ldevelle, louis@42ai.fr)
21 |   \item Owen Roberts (oroberts)
22 |   \item Augustin Lopez (aulopez)
23 |   \item Luc Lenotre (llenotre)
24 |   \item Amric Trudel (amric@42ai.fr)
25 |   \item Benjamin Carlier (bcarlier@student.42.fr)
26 |   \item Pablo Clement (pclement@student.42.fr)
27 |   \item Amir Mahla (amahla, amahla@42ai.fr)
28 | \end{itemize}
29 | for your investment in the creation and development of these modules.\\
30 | \begin{itemize}
31 |     \item All prior participants who took a moment to provide their feedbacks, and help us improve these bootcamps !
32 |   \end{itemize}
33 | 
34 | \vfill
35 | \doclicenseThis
36 | 


--------------------------------------------------------------------------------
/resources/42ai_bootcamps/en.instructions.tex:
--------------------------------------------------------------------------------
 1 | %******************************************************************************%
 2 | %                                                                              %
 3 | %                        Common Instructions                                   %
 4 | %                          for Python Projects                                 %
 5 | %                                                                              %
 6 | %******************************************************************************%
 7 | 
 8 | \chapter{Common Instructions}
 9 | \begin{itemize}
10 |   \item The version of Python recommended to use is 3.7. You can
11 |   check your Python's version with the following command: \texttt{python -V}
12 |   
13 |   \item The norm: during this bootcamp, it is recommended to follow the
14 |   \href{https://www.python.org/dev/peps/pep-0008/}{PEP 8 standards}, though it is not mandatory.
15 |   You can install \href{https://pypi.org/project/pycodestyle}{pycodestyle} or 
16 |   \href{https://black.readthedocs.io/en/stable/}{Black}, which are convenient 
17 |   packages to check your code.
18 |   
19 |   \item The function \texttt{eval} is never allowed.
20 |   
21 |   \item The exercises are ordered from the easiest to the hardest.
22 |   
23 |   \item Your exercises are going to be evaluated by someone else,
24 |   so make sure that your variable names and function names are appropriate and civil.
25 | 
26 |   \item Your manual is the internet.
27 | 
28 |   \item If you're planning on using an AI assistant such as a LLM, make sure it is helpful 
29 |   for you to \textbf{learn and practice}, not to provide you with hands-on solution ! Own your tool, don't let it own you.
30 |   
31 |   \item If you are a student from 42, you can access our Discord server 
32 |   on \href{https://discord.com/channels/887850395697807362/887850396314398720}{42 student's associations portal} and ask your
33 |   questions to your peers in the dedicated Bootcamp channel. 
34 | 
35 |   \item You can learn more about 42 Artificial Intelligence by visiting \href{https://42-ai.github.io}{our website}.
36 | 
37 |   \item If you find any issue or mistake in the subject please create an issue on 
38 |   \href{https://github.com/42-AI/bootcamp_machine-learning/issues}{42AI repository on Github}.
39 |   
40 |   \item We encourage you to create test programs for your
41 |   project even though this work \textbf{won't have to be
42 |   submitted and won't be graded}. It will give you a chance
43 |   to easily test your work and your peers’ work. You will find
44 |   those tests especially useful during your defence. Indeed,
45 |   during defence, you are free to use your tests and/or the
46 |   tests of the peer you are evaluating.
47 | 
48 | \end{itemize}


--------------------------------------------------------------------------------
/resources/latex/42_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/42_logo.pdf


--------------------------------------------------------------------------------
/resources/latex/Makefile.LaTeX:
--------------------------------------------------------------------------------
 1 | #******************************************************************************#
 2 | #                                                                              #
 3 | #               Makefile.LaTeX for LaTeX                                       #
 4 | #               Created on : Mon Oct  5 17:01:29 2015                          #
 5 | #               Make by : Uriel "Korfuri" Corfa <uriel@corfa.fr>               #
 6 | #               Made by : David "Thor" GIRON <thor@staff.42.fr>                #
 7 | #                                                                              #
 8 | #******************************************************************************#
 9 | 
10 | RESSOURCE_FILES		=	$(RELPATH)/Makefile.LaTeX	\
11 | 						$(RELPATH)/atbeginend.sty	\
12 | 						$(RELPATH)/tocloft.sty		\
13 | 						$(RELPATH)/minted.sty		\
14 | 						$(RELPATH)/multirow.sty		\
15 | 						$(RELPATH)/42-fr.cls		\
16 | 						$(RELPATH)/back.pdf		\
17 | 						$(RELPATH)/42_logo.pdf		\
18 | 						$(RELPATH)/info.pdf		\
19 | 						$(RELPATH)/helphint.pdf		\
20 | 						$(RELPATH)/warn.pdf
21 | 
22 | ALL_DEPENDENCIES	=	$(RESSOURCE_FILES) $(DEPS)
23 | 
24 | export TEXINPUTS	:=	..:.:$(RELPATH):${TEXINPUTS}:
25 | 
26 | all: $(TARGETS)
27 | 
28 | clean:
29 | 	rm -f *.aux *.ent *.log *.out *.toc *.nav *.snm *.vrb *.pyg *.fdb_latexmk *.fls
30 | 
31 | distclean: clean
32 | 	rm -f $(TARGETS)
33 | 
34 | re: distclean all
35 | 
36 | %.pdf: %.tex Makefile $(ALL_DEPENDENCIES)
37 | 	pdflatex -shell-escape -halt-on-error $<
38 | 	@if [ $$? = 0 ]; then\
39 | 		pdflatex -shell-escape -halt-on-error $<;\
40 | 		echo "Build OK";\
41 | 	else\
42 | 		echo "Build Failed";\
43 | 		exit 1;\
44 | 	fi
45 | 
46 | view: all
47 | 	(type xpdf > /dev/null 2>&1 && xpdf ${TARGETS}) ||\
48 | 	(type evince > /dev/null 2>&1 && evince ${TARGETS})
49 | 
50 | 
51 | #******************************************************************************#
52 | 


--------------------------------------------------------------------------------
/resources/latex/Makefile.template:
--------------------------------------------------------------------------------
 1 | # List the pdf's to build. foo.tex will produce foo.pdf
 2 | TARGETS = foo.pdf
 3 | 
 4 | # List the files included in the slides
 5 | DEPS = somePicture.png someSound.flac someOtherPicture.png
 6 | 
 7 | # Relative path to the LaTeX documentclass setup files
 8 | # Adapt as needed
 9 | RELPATH = $(shell git rev-parse --show-toplevel)/templates/latex/
10 | 
11 | # You should not touch this either
12 | include $(RELPATH)/Makefile.LaTeX
13 | 


--------------------------------------------------------------------------------
/resources/latex/atbeginend.sty:
--------------------------------------------------------------------------------
 1 | % atbeginend.sty 
 2 | %
 3 | % From : http://www.eng.cam.ac.uk/help/tpl/textprocessing/atbeginend.sty
 4 | %
 5 | % defines
 6 | % \BeforeBegin{environment}{code-to-execute}
 7 | % \BeforeEnd  {environment}{code-to-execute}
 8 | % \AfterBegin {environment}{code-to-execute}
 9 | % \AfterEnd   {environment}{code-to-execute}
10 | %
11 | % Save \begin and \end to \BeginEnvironment and \EndEnvironment
12 | \let\BeginEnvironment=\begin
13 | \let\EndEnvironment=\end
14 | 
15 | \def\IfUnDef#1{\expandafter\ifx\csname#1\endcsname\relax}
16 | 
17 | % Null command needed to for \nothing{something}=.nothing.
18 | \def\NullCom#1{}
19 | 
20 | \def\begin#1{%
21 | %
22 | % if defined \BeforeBeg for this environment, execute it
23 | \IfUnDef{BeforeBeg#1}\else\csname BeforeBeg#1\endcsname\fi%
24 | %
25 | %
26 | %
27 | \IfUnDef{AfterBeg#1}% This is done to skip the command for environments
28 | 		     % which can take arguments, like multicols; YOU MUST NOT
29 | 		     % USE \AfterBegin{...}{...} for such environments!
30 | 	\let\SaveBegEng=\BeginEnvironment%
31 | \else%
32 | 	% Start this environment
33 | 		\BeginEnvironment{#1}%
34 | 	% and execute code after \begin{environment}
35 | 		\csname AfterBeg#1\endcsname%
36 | 	% 
37 | 	\let\SaveBegEng=\NullCom%
38 | \fi%
39 | \SaveBegEng{#1}%
40 | }
41 | 
42 | 
43 | \def\end#1{%
44 | %
45 | % execute code before \end{environment}
46 | \IfUnDef{BeforeEnd#1}\else\csname BeforeEnd#1\endcsname\fi%
47 | %
48 | % close this environment
49 | \EndEnvironment{#1}%
50 | %
51 | % and execute code after \begin{environment}
52 | \IfUnDef{AfterEnd#1}\else\csname AfterEnd#1\endcsname\fi%
53 | }
54 | 
55 | 
56 | %% Now, define commands
57 | % \BeforeBegin{environment}{code-to-execute}
58 | % \BeforeEnd  {environment}{code-to-execute}
59 | % \AfterBegin {environment}{code-to-execute}
60 | % \AfterEnd   {environment}{code-to-execute}
61 | 
62 | \def\BeforeBegin#1#2{\expandafter\gdef\csname BeforeBeg#1\endcsname
63 | {#2}}
64 | \def\BeforeEnd  #1#2{\expandafter\gdef\csname BeforeEnd#1\endcsname
65 | {#2}}
66 | \def\AfterBegin #1#2{\expandafter\gdef\csname AfterBeg#1\endcsname {#2}}
67 | \def\AfterEnd   #1#2{\expandafter\gdef\csname AfterEnd#1\endcsname{#2}}
68 | 


--------------------------------------------------------------------------------
/resources/latex/back.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/back.pdf


--------------------------------------------------------------------------------
/resources/latex/dark-42_logo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/dark-42_logo.pdf


--------------------------------------------------------------------------------
/resources/latex/dark-back.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/dark-back.pdf


--------------------------------------------------------------------------------
/resources/latex/helphint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/helphint.pdf


--------------------------------------------------------------------------------
/resources/latex/info.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/info.pdf


--------------------------------------------------------------------------------
/resources/latex/redefinition-commands.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \newcommand{\ailogo}[1]{\def \@ailogo {#1}}\ailogo{assets/42ai_logo.pdf}
 3 | 
 4 | %%  Redefine \maketitle
 5 | \makeatletter
 6 | \def \maketitle {
 7 |   \begin{titlepage}
 8 |     \begin{center}
 9 | 	%\begin{figure}[t]
10 | 	  %\includegraphics[height=8cm]{\@ailogo}
11 | 	  \includegraphics[height=8cm]{assets/42ai_logo.pdf}
12 | 	%\end{figure}
13 |       \vskip 5em
14 |       {\huge \@title}
15 |       \vskip 2em
16 |       {\LARGE \@subtitle}
17 |       \vskip 4em
18 |     \end{center}
19 |     %\begin{center}
20 | 	  %\@author
21 |     %\end{center}
22 | 	%\vskip 5em
23 |   \vfill
24 |   \begin{center}
25 |     \emph{\summarytitle : \@summary}
26 |   \end{center}
27 |   \vspace{2cm}
28 |   %\vskip 5em
29 |   %\doclicenseThis
30 |   \end{titlepage}
31 | }
32 | \makeatother
33 | 
34 | \makeatletter
35 | \def \makeheaderfilesforbidden
36 | {
37 |   \noindent
38 |   \begin{tabularx}{\textwidth}{|X X  X X|}
39 |     \hline
40 |   \multicolumn{1}{|>{\raggedright}m{1cm}|}
41 |   {\vskip 2mm \includegraphics[height=1cm]{assets/42ai_logo.pdf}} &
42 |   \multicolumn{2}{>{\centering}m{12cm}}{\small Exercise : \@exnumber } &
43 |   \multicolumn{1}{ >{\raggedleft}p{1.5cm}|}
44 | %%              {\scriptsize points : \@exscore} \\ \hline
45 |               {} \\ \hline
46 | 
47 |   \multicolumn{4}{|>{\centering}m{15cm}|}
48 |               {\small \@extitle} \\ \hline
49 | 
50 |   \multicolumn{4}{|>{\raggedright}m{15cm}|}
51 |               {\small Turn-in directory : \ttfamily
52 |                 $ex\@exnumber/$ }
53 |               \\ \hline
54 |   \multicolumn{4}{|>{\raggedright}m{15cm}|}
55 |               {\small Files to turn in : \ttfamily \@exfiles }
56 |               \\ \hline
57 | 
58 |   \multicolumn{4}{|>{\raggedright}m{15cm}|}
59 |               {\small Forbidden functions : \ttfamily \@exforbidden }
60 |               \\ \hline
61 | 
62 | %%  \multicolumn{4}{|>{\raggedright}m{15cm}|}
63 | %%              {\small Remarks : \ttfamily \@exnotes }
64 | %%              \\ \hline
65 | \end{tabularx}
66 | %% \exnotes
67 | \exrules
68 | \exmake
69 | \exauthorize{None}
70 | \exforbidden{None}
71 | \extitle{}
72 | \exnumber{}
73 | }
74 | \makeatother
75 | 
76 | %%  Syntactic highlights
77 | \makeatletter
78 | \newenvironment{pythoncode}{%
79 |   \VerbatimEnvironment
80 |   \usemintedstyle{emacs}
81 |   \minted@resetoptions
82 |   \setkeys{minted@opt}{bgcolor=black,formatcom=\color{lightgrey},fontsize=\scriptsize}
83 |   \begin{figure}[ht!]
84 |     \centering
85 |     \begin{minipage}{16cm}
86 |       \begin{VerbatimOut}{\jobname.pyg}}
87 | {%[
88 |       \end{VerbatimOut}
89 |       \minted@pygmentize{c}
90 |       \DeleteFile{\jobname.pyg}
91 |     \end{minipage}
92 | \end{figure}}
93 | \makeatother


--------------------------------------------------------------------------------
/resources/latex/warn.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/42-AI/bootcamp_machine-learning/d5cb433e9acab87f90f9f91450cc05acc4a6b16c/resources/latex/warn.pdf


--------------------------------------------------------------------------------
/version:
--------------------------------------------------------------------------------
1 | 5.1.1


--------------------------------------------------------------------------------